Fix a couple incorrect link detections ('e.g.' and 'foo.jpg') (close #13)
parent
8723b51693
commit
bcb1ad98de
|
@ -31,6 +31,11 @@ describe('extractEntities', () => {
|
||||||
'start middle end.com/foo/bar?baz=bux#hash',
|
'start middle end.com/foo/bar?baz=bux#hash',
|
||||||
'newline1.com\nnewline2.com',
|
'newline1.com\nnewline2.com',
|
||||||
'not.. a..url ..here',
|
'not.. a..url ..here',
|
||||||
|
'e.g.',
|
||||||
|
'something-cool.jpg',
|
||||||
|
'website.com.jpg',
|
||||||
|
'e.g./foo',
|
||||||
|
'website.com.jpg/foo',
|
||||||
]
|
]
|
||||||
interface Output {
|
interface Output {
|
||||||
type: string
|
type: string
|
||||||
|
@ -80,6 +85,11 @@ describe('extractEntities', () => {
|
||||||
{type: 'link', value: 'newline2.com', noScheme: true},
|
{type: 'link', value: 'newline2.com', noScheme: true},
|
||||||
],
|
],
|
||||||
[],
|
[],
|
||||||
|
[],
|
||||||
|
[],
|
||||||
|
[],
|
||||||
|
[],
|
||||||
|
[],
|
||||||
]
|
]
|
||||||
it('correctly handles a set of text inputs', () => {
|
it('correctly handles a set of text inputs', () => {
|
||||||
for (let i = 0; i < inputs.length; i++) {
|
for (let i = 0; i < inputs.length; i++) {
|
||||||
|
@ -145,6 +155,12 @@ describe('detectLinkables', () => {
|
||||||
'start middle end.com/foo/bar?baz=bux#hash',
|
'start middle end.com/foo/bar?baz=bux#hash',
|
||||||
'newline1.com\nnewline2.com',
|
'newline1.com\nnewline2.com',
|
||||||
'not.. a..url ..here',
|
'not.. a..url ..here',
|
||||||
|
'e.g.',
|
||||||
|
'e.g. real.com fake.notreal',
|
||||||
|
'something-cool.jpg',
|
||||||
|
'website.com.jpg',
|
||||||
|
'e.g./foo',
|
||||||
|
'website.com.jpg/foo',
|
||||||
]
|
]
|
||||||
const outputs = [
|
const outputs = [
|
||||||
['no linkable'],
|
['no linkable'],
|
||||||
|
@ -171,6 +187,12 @@ describe('detectLinkables', () => {
|
||||||
['start middle ', {link: 'end.com/foo/bar?baz=bux#hash'}],
|
['start middle ', {link: 'end.com/foo/bar?baz=bux#hash'}],
|
||||||
[{link: 'newline1.com'}, '\n', {link: 'newline2.com'}],
|
[{link: 'newline1.com'}, '\n', {link: 'newline2.com'}],
|
||||||
['not.. a..url ..here'],
|
['not.. a..url ..here'],
|
||||||
|
['e.g.'],
|
||||||
|
['e.g. ', {link: 'real.com'}, ' fake.notreal'],
|
||||||
|
['something-cool.jpg'],
|
||||||
|
['website.com.jpg'],
|
||||||
|
['e.g./foo'],
|
||||||
|
['website.com.jpg/foo'],
|
||||||
]
|
]
|
||||||
it('correctly handles a set of text inputs', () => {
|
it('correctly handles a set of text inputs', () => {
|
||||||
for (let i = 0; i < inputs.length; i++) {
|
for (let i = 0; i < inputs.length; i++) {
|
||||||
|
|
|
@ -46,7 +46,8 @@
|
||||||
"react-native-svg": "^12.4.0",
|
"react-native-svg": "^12.4.0",
|
||||||
"react-native-tab-view": "^3.3.0",
|
"react-native-tab-view": "^3.3.0",
|
||||||
"react-native-url-polyfill": "^1.3.0",
|
"react-native-url-polyfill": "^1.3.0",
|
||||||
"react-native-web": "^0.17.7"
|
"react-native-web": "^0.17.7",
|
||||||
|
"tlds": "^1.234.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@babel/core": "^7.12.9",
|
"@babel/core": "^7.12.9",
|
||||||
|
@ -74,7 +75,9 @@
|
||||||
},
|
},
|
||||||
"jest": {
|
"jest": {
|
||||||
"preset": "react-native",
|
"preset": "react-native",
|
||||||
"setupFiles": ["./jest.js"],
|
"setupFiles": [
|
||||||
|
"./jest.js"
|
||||||
|
],
|
||||||
"moduleFileExtensions": [
|
"moduleFileExtensions": [
|
||||||
"ts",
|
"ts",
|
||||||
"tsx",
|
"tsx",
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import {AtUri} from '../third-party/uri'
|
import {AtUri} from '../third-party/uri'
|
||||||
import {Entity} from '../third-party/api/src/client/types/app/bsky/feed/post'
|
import {Entity} from '../third-party/api/src/client/types/app/bsky/feed/post'
|
||||||
import {PROD_SERVICE} from '../state'
|
import {PROD_SERVICE} from '../state'
|
||||||
|
import TLDs from 'tlds'
|
||||||
|
|
||||||
export const MAX_DISPLAY_NAME = 64
|
export const MAX_DISPLAY_NAME = 64
|
||||||
export const MAX_DESCRIPTION = 256
|
export const MAX_DESCRIPTION = 256
|
||||||
|
@ -57,6 +58,14 @@ export function ago(date: number | string | Date): string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isValidDomain(str: string): boolean {
|
||||||
|
return !!TLDs.find(tld => {
|
||||||
|
let i = str.lastIndexOf(tld)
|
||||||
|
if (i === -1) return false
|
||||||
|
return str.charAt(i - 1) === '.' && i === str.length - tld.length
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
export function extractEntities(
|
export function extractEntities(
|
||||||
text: string,
|
text: string,
|
||||||
knownHandles?: Set<string>,
|
knownHandles?: Set<string>,
|
||||||
|
@ -85,10 +94,14 @@ export function extractEntities(
|
||||||
{
|
{
|
||||||
// links
|
// links
|
||||||
const re =
|
const re =
|
||||||
/(^|\s)((https?:\/\/[\S]+)|([a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*))(\b)/dg
|
/(^|\s)((https?:\/\/[\S]+)|((?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))(\b)/dg
|
||||||
while ((match = re.exec(text))) {
|
while ((match = re.exec(text))) {
|
||||||
let value = match[2]
|
let value = match[2]
|
||||||
if (!value.startsWith('http')) {
|
if (!value.startsWith('http')) {
|
||||||
|
const domain = match.groups?.domain
|
||||||
|
if (!domain || !isValidDomain(domain)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
value = `https://${value}`
|
value = `https://${value}`
|
||||||
}
|
}
|
||||||
ents.push({
|
ents.push({
|
||||||
|
@ -110,7 +123,7 @@ interface DetectedLink {
|
||||||
type DetectedLinkable = string | DetectedLink
|
type DetectedLinkable = string | DetectedLink
|
||||||
export function detectLinkables(text: string): DetectedLinkable[] {
|
export function detectLinkables(text: string): DetectedLinkable[] {
|
||||||
const re =
|
const re =
|
||||||
/((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)[a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*)/gi
|
/((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)(?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*)/gi
|
||||||
const segments = []
|
const segments = []
|
||||||
let match
|
let match
|
||||||
let start = 0
|
let start = 0
|
||||||
|
@ -118,6 +131,10 @@ export function detectLinkables(text: string): DetectedLinkable[] {
|
||||||
let matchIndex = match.index
|
let matchIndex = match.index
|
||||||
let matchValue = match[0]
|
let matchValue = match[0]
|
||||||
|
|
||||||
|
if (match.groups?.domain && !isValidDomain(match.groups?.domain)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if (/\s/.test(matchValue)) {
|
if (/\s/.test(matchValue)) {
|
||||||
// HACK
|
// HACK
|
||||||
// skip the starting space
|
// skip the starting space
|
||||||
|
|
|
@ -11708,6 +11708,11 @@ thunky@^1.0.2:
|
||||||
resolved "https://registry.yarnpkg.com/thunky/-/thunky-1.1.0.tgz#5abaf714a9405db0504732bbccd2cedd9ef9537d"
|
resolved "https://registry.yarnpkg.com/thunky/-/thunky-1.1.0.tgz#5abaf714a9405db0504732bbccd2cedd9ef9537d"
|
||||||
integrity sha512-eHY7nBftgThBqOyHGVN+l8gF0BucP09fMo0oO/Lb0w1OF80dJv+lDVpXG60WMQvkcxAkNybKsrEIE3ZtKGmPrA==
|
integrity sha512-eHY7nBftgThBqOyHGVN+l8gF0BucP09fMo0oO/Lb0w1OF80dJv+lDVpXG60WMQvkcxAkNybKsrEIE3ZtKGmPrA==
|
||||||
|
|
||||||
|
tlds@^1.234.0:
|
||||||
|
version "1.234.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/tlds/-/tlds-1.234.0.tgz#f61fe73f6e85c51f8503181f47dcfbd18c6910db"
|
||||||
|
integrity sha512-TNDfeyDIC+oroH44bMbWC+Jn/2qNrfRvDK2EXt1icOXYG5NMqoRyUosADrukfb4D8lJ3S1waaBWSvQro0erdng==
|
||||||
|
|
||||||
tmpl@1.0.5:
|
tmpl@1.0.5:
|
||||||
version "1.0.5"
|
version "1.0.5"
|
||||||
resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.5.tgz#8683e0b902bb9c20c4f726e3c0b69f36518c07cc"
|
resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.5.tgz#8683e0b902bb9c20c4f726e3c0b69f36518c07cc"
|
||||||
|
|
Loading…
Reference in New Issue