diff --git a/__tests__/string-utils.ts b/__tests__/string-utils.ts index c677b44d..fc7a8f27 100644 --- a/__tests__/string-utils.ts +++ b/__tests__/string-utils.ts @@ -31,6 +31,11 @@ describe('extractEntities', () => { 'start middle end.com/foo/bar?baz=bux#hash', 'newline1.com\nnewline2.com', 'not.. a..url ..here', + 'e.g.', + 'something-cool.jpg', + 'website.com.jpg', + 'e.g./foo', + 'website.com.jpg/foo', ] interface Output { type: string @@ -80,6 +85,11 @@ describe('extractEntities', () => { {type: 'link', value: 'newline2.com', noScheme: true}, ], [], + [], + [], + [], + [], + [], ] it('correctly handles a set of text inputs', () => { for (let i = 0; i < inputs.length; i++) { @@ -145,6 +155,12 @@ describe('detectLinkables', () => { 'start middle end.com/foo/bar?baz=bux#hash', 'newline1.com\nnewline2.com', 'not.. a..url ..here', + 'e.g.', + 'e.g. real.com fake.notreal', + 'something-cool.jpg', + 'website.com.jpg', + 'e.g./foo', + 'website.com.jpg/foo', ] const outputs = [ ['no linkable'], @@ -171,6 +187,12 @@ describe('detectLinkables', () => { ['start middle ', {link: 'end.com/foo/bar?baz=bux#hash'}], [{link: 'newline1.com'}, '\n', {link: 'newline2.com'}], ['not.. a..url ..here'], + ['e.g.'], + ['e.g. ', {link: 'real.com'}, ' fake.notreal'], + ['something-cool.jpg'], + ['website.com.jpg'], + ['e.g./foo'], + ['website.com.jpg/foo'], ] it('correctly handles a set of text inputs', () => { for (let i = 0; i < inputs.length; i++) { diff --git a/package.json b/package.json index f500c0e8..6d54102c 100644 --- a/package.json +++ b/package.json @@ -46,7 +46,8 @@ "react-native-svg": "^12.4.0", "react-native-tab-view": "^3.3.0", "react-native-url-polyfill": "^1.3.0", - "react-native-web": "^0.17.7" + "react-native-web": "^0.17.7", + "tlds": "^1.234.0" }, "devDependencies": { "@babel/core": "^7.12.9", @@ -74,7 +75,9 @@ }, "jest": { "preset": "react-native", - "setupFiles": ["./jest.js"], + "setupFiles": [ + "./jest.js" + ], "moduleFileExtensions": [ "ts", "tsx", diff --git a/src/lib/strings.ts b/src/lib/strings.ts index 032eec56..fb9d15b2 100644 --- a/src/lib/strings.ts +++ b/src/lib/strings.ts @@ -1,6 +1,7 @@ import {AtUri} from '../third-party/uri' import {Entity} from '../third-party/api/src/client/types/app/bsky/feed/post' import {PROD_SERVICE} from '../state' +import TLDs from 'tlds' export const MAX_DISPLAY_NAME = 64 export const MAX_DESCRIPTION = 256 @@ -57,6 +58,14 @@ export function ago(date: number | string | Date): string { } } +export function isValidDomain(str: string): boolean { + return !!TLDs.find(tld => { + let i = str.lastIndexOf(tld) + if (i === -1) return false + return str.charAt(i - 1) === '.' && i === str.length - tld.length + }) +} + export function extractEntities( text: string, knownHandles?: Set, @@ -85,10 +94,14 @@ export function extractEntities( { // links const re = - /(^|\s)((https?:\/\/[\S]+)|([a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*))(\b)/dg + /(^|\s)((https?:\/\/[\S]+)|((?[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))(\b)/dg while ((match = re.exec(text))) { let value = match[2] if (!value.startsWith('http')) { + const domain = match.groups?.domain + if (!domain || !isValidDomain(domain)) { + continue + } value = `https://${value}` } ents.push({ @@ -110,7 +123,7 @@ interface DetectedLink { type DetectedLinkable = string | DetectedLink export function detectLinkables(text: string): DetectedLinkable[] { const re = - /((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)[a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*)/gi + /((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)(?[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*)/gi const segments = [] let match let start = 0 @@ -118,6 +131,10 @@ export function detectLinkables(text: string): DetectedLinkable[] { let matchIndex = match.index let matchValue = match[0] + if (match.groups?.domain && !isValidDomain(match.groups?.domain)) { + continue + } + if (/\s/.test(matchValue)) { // HACK // skip the starting space diff --git a/yarn.lock b/yarn.lock index ce2d532d..21f33f82 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11708,6 +11708,11 @@ thunky@^1.0.2: resolved "https://registry.yarnpkg.com/thunky/-/thunky-1.1.0.tgz#5abaf714a9405db0504732bbccd2cedd9ef9537d" integrity sha512-eHY7nBftgThBqOyHGVN+l8gF0BucP09fMo0oO/Lb0w1OF80dJv+lDVpXG60WMQvkcxAkNybKsrEIE3ZtKGmPrA== +tlds@^1.234.0: + version "1.234.0" + resolved "https://registry.yarnpkg.com/tlds/-/tlds-1.234.0.tgz#f61fe73f6e85c51f8503181f47dcfbd18c6910db" + integrity sha512-TNDfeyDIC+oroH44bMbWC+Jn/2qNrfRvDK2EXt1icOXYG5NMqoRyUosADrukfb4D8lJ3S1waaBWSvQro0erdng== + tmpl@1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.5.tgz#8683e0b902bb9c20c4f726e3c0b69f36518c07cc"