Fix: dont detect double dots as urls

This commit is contained in:
Paul Frazee 2022-11-23 13:15:38 -06:00
parent 67906db720
commit 0840c3f8f7
2 changed files with 6 additions and 2 deletions

View file

@ -26,6 +26,7 @@ describe('extractEntities', () => {
'start.com/foo/bar?baz=bux#hash middle end', 'start.com/foo/bar?baz=bux#hash middle end',
'start middle end.com/foo/bar?baz=bux#hash', 'start middle end.com/foo/bar?baz=bux#hash',
'newline1.com\nnewline2.com', 'newline1.com\nnewline2.com',
'not.. a..url ..here',
] ]
interface Output { interface Output {
type: string type: string
@ -74,6 +75,7 @@ describe('extractEntities', () => {
{type: 'link', value: 'newline1.com', noScheme: true}, {type: 'link', value: 'newline1.com', noScheme: true},
{type: 'link', value: 'newline2.com', noScheme: true}, {type: 'link', value: 'newline2.com', noScheme: true},
], ],
[],
] ]
it('correctly handles a set of text inputs', () => { it('correctly handles a set of text inputs', () => {
for (let i = 0; i < inputs.length; i++) { for (let i = 0; i < inputs.length; i++) {
@ -138,6 +140,7 @@ describe('detectLinkables', () => {
'start.com/foo/bar?baz=bux#hash middle end', 'start.com/foo/bar?baz=bux#hash middle end',
'start middle end.com/foo/bar?baz=bux#hash', 'start middle end.com/foo/bar?baz=bux#hash',
'newline1.com\nnewline2.com', 'newline1.com\nnewline2.com',
'not.. a..url ..here',
] ]
const outputs = [ const outputs = [
['no linkable'], ['no linkable'],
@ -163,6 +166,7 @@ describe('detectLinkables', () => {
[{link: 'start.com/foo/bar?baz=bux#hash'}, ' middle end'], [{link: 'start.com/foo/bar?baz=bux#hash'}, ' middle end'],
['start middle ', {link: 'end.com/foo/bar?baz=bux#hash'}], ['start middle ', {link: 'end.com/foo/bar?baz=bux#hash'}],
[{link: 'newline1.com'}, '\n', {link: 'newline2.com'}], [{link: 'newline1.com'}, '\n', {link: 'newline2.com'}],
['not.. a..url ..here'],
] ]
it('correctly handles a set of text inputs', () => { it('correctly handles a set of text inputs', () => {
for (let i = 0; i < inputs.length; i++) { for (let i = 0; i < inputs.length; i++) {

View file

@ -83,7 +83,7 @@ export function extractEntities(
{ {
// links // links
const re = const re =
/(^|\s)((https?:\/\/[\S]+)|([a-z][a-z0-9]*\.[a-z0-9\.]+[\S]*))(\b)/dg /(^|\s)((https?:\/\/[\S]+)|([a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*))(\b)/dg
while ((match = re.exec(text))) { while ((match = re.exec(text))) {
let value = match[2] let value = match[2]
if (!value.startsWith('http')) { if (!value.startsWith('http')) {
@ -108,7 +108,7 @@ interface DetectedLink {
type DetectedLinkable = string | DetectedLink type DetectedLinkable = string | DetectedLink
export function detectLinkables(text: string): DetectedLinkable[] { export function detectLinkables(text: string): DetectedLinkable[] {
const re = const re =
/((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)[a-z][a-z0-9]*\.[a-z0-9\.]+[\S]*)/gi /((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)[a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*)/gi
const segments = [] const segments = []
let match let match
let start = 0 let start = 0