Fix a couple incorrect link detections ('e.g.' and 'foo.jpg') (close #13)

zio/stable
Paul Frazee 2022-11-28 10:22:08 -06:00
parent 8723b51693
commit bcb1ad98de
4 changed files with 51 additions and 4 deletions

View File

@ -31,6 +31,11 @@ describe('extractEntities', () => {
'start middle end.com/foo/bar?baz=bux#hash',
'newline1.com\nnewline2.com',
'not.. a..url ..here',
'e.g.',
'something-cool.jpg',
'website.com.jpg',
'e.g./foo',
'website.com.jpg/foo',
]
interface Output {
type: string
@ -80,6 +85,11 @@ describe('extractEntities', () => {
{type: 'link', value: 'newline2.com', noScheme: true},
],
[],
[],
[],
[],
[],
[],
]
it('correctly handles a set of text inputs', () => {
for (let i = 0; i < inputs.length; i++) {
@ -145,6 +155,12 @@ describe('detectLinkables', () => {
'start middle end.com/foo/bar?baz=bux#hash',
'newline1.com\nnewline2.com',
'not.. a..url ..here',
'e.g.',
'e.g. real.com fake.notreal',
'something-cool.jpg',
'website.com.jpg',
'e.g./foo',
'website.com.jpg/foo',
]
const outputs = [
['no linkable'],
@ -171,6 +187,12 @@ describe('detectLinkables', () => {
['start middle ', {link: 'end.com/foo/bar?baz=bux#hash'}],
[{link: 'newline1.com'}, '\n', {link: 'newline2.com'}],
['not.. a..url ..here'],
['e.g.'],
['e.g. ', {link: 'real.com'}, ' fake.notreal'],
['something-cool.jpg'],
['website.com.jpg'],
['e.g./foo'],
['website.com.jpg/foo'],
]
it('correctly handles a set of text inputs', () => {
for (let i = 0; i < inputs.length; i++) {

View File

@ -46,7 +46,8 @@
"react-native-svg": "^12.4.0",
"react-native-tab-view": "^3.3.0",
"react-native-url-polyfill": "^1.3.0",
"react-native-web": "^0.17.7"
"react-native-web": "^0.17.7",
"tlds": "^1.234.0"
},
"devDependencies": {
"@babel/core": "^7.12.9",
@ -74,7 +75,9 @@
},
"jest": {
"preset": "react-native",
"setupFiles": ["./jest.js"],
"setupFiles": [
"./jest.js"
],
"moduleFileExtensions": [
"ts",
"tsx",

View File

@ -1,6 +1,7 @@
import {AtUri} from '../third-party/uri'
import {Entity} from '../third-party/api/src/client/types/app/bsky/feed/post'
import {PROD_SERVICE} from '../state'
import TLDs from 'tlds'
export const MAX_DISPLAY_NAME = 64
export const MAX_DESCRIPTION = 256
@ -57,6 +58,14 @@ export function ago(date: number | string | Date): string {
}
}
export function isValidDomain(str: string): boolean {
return !!TLDs.find(tld => {
let i = str.lastIndexOf(tld)
if (i === -1) return false
return str.charAt(i - 1) === '.' && i === str.length - tld.length
})
}
export function extractEntities(
text: string,
knownHandles?: Set<string>,
@ -85,10 +94,14 @@ export function extractEntities(
{
// links
const re =
/(^|\s)((https?:\/\/[\S]+)|([a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*))(\b)/dg
/(^|\s)((https?:\/\/[\S]+)|((?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))(\b)/dg
while ((match = re.exec(text))) {
let value = match[2]
if (!value.startsWith('http')) {
const domain = match.groups?.domain
if (!domain || !isValidDomain(domain)) {
continue
}
value = `https://${value}`
}
ents.push({
@ -110,7 +123,7 @@ interface DetectedLink {
type DetectedLinkable = string | DetectedLink
export function detectLinkables(text: string): DetectedLinkable[] {
const re =
/((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)[a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*)/gi
/((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)(?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*)/gi
const segments = []
let match
let start = 0
@ -118,6 +131,10 @@ export function detectLinkables(text: string): DetectedLinkable[] {
let matchIndex = match.index
let matchValue = match[0]
if (match.groups?.domain && !isValidDomain(match.groups?.domain)) {
continue
}
if (/\s/.test(matchValue)) {
// HACK
// skip the starting space

View File

@ -11708,6 +11708,11 @@ thunky@^1.0.2:
resolved "https://registry.yarnpkg.com/thunky/-/thunky-1.1.0.tgz#5abaf714a9405db0504732bbccd2cedd9ef9537d"
integrity sha512-eHY7nBftgThBqOyHGVN+l8gF0BucP09fMo0oO/Lb0w1OF80dJv+lDVpXG60WMQvkcxAkNybKsrEIE3ZtKGmPrA==
tlds@^1.234.0:
version "1.234.0"
resolved "https://registry.yarnpkg.com/tlds/-/tlds-1.234.0.tgz#f61fe73f6e85c51f8503181f47dcfbd18c6910db"
integrity sha512-TNDfeyDIC+oroH44bMbWC+Jn/2qNrfRvDK2EXt1icOXYG5NMqoRyUosADrukfb4D8lJ3S1waaBWSvQro0erdng==
tmpl@1.0.5:
version "1.0.5"
resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.5.tgz#8683e0b902bb9c20c4f726e3c0b69f36518c07cc"