bsky-app/__tests__/string-utils.ts

320 lines
11 KiB
TypeScript
Raw Normal View History

import {
extractEntities,
detectLinkables,
extractHtmlMeta,
} from '../src/lib/strings'
2022-10-04 17:15:35 +02:00
describe('extractEntities', () => {
const knownHandles = new Set(['handle.com', 'full123.test-of-chars'])
2022-10-04 17:15:35 +02:00
const inputs = [
'no mention',
'@handle.com middle end',
'start @handle.com end',
'start middle @handle.com',
'@handle.com @handle.com @handle.com',
'@full123.test-of-chars',
'not@right',
'@handle.com!@#$chars',
'@handle.com\n@handle.com',
'parenthetical (@handle.com)',
'start https://middle.com end',
'start https://middle.com/foo/bar end',
'start https://middle.com/foo/bar?baz=bux end',
'start https://middle.com/foo/bar?baz=bux#hash end',
'https://start.com/foo/bar?baz=bux#hash middle end',
'start middle https://end.com/foo/bar?baz=bux#hash',
'https://newline1.com\nhttps://newline2.com',
'start middle.com end',
'start middle.com/foo/bar end',
'start middle.com/foo/bar?baz=bux end',
'start middle.com/foo/bar?baz=bux#hash end',
'start.com/foo/bar?baz=bux#hash middle end',
'start middle end.com/foo/bar?baz=bux#hash',
'newline1.com\nnewline2.com',
2022-11-23 20:15:38 +01:00
'not.. a..url ..here',
'e.g.',
'something-cool.jpg',
'website.com.jpg',
'e.g./foo',
'website.com.jpg/foo',
'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/ ',
'https://foo.com https://bar.com/whatever https://baz.com',
'punctuation https://foo.com, https://bar.com/whatever; https://baz.com.',
'parenthentical (https://foo.com)',
'except for https://foo.com/thing_(cool)',
]
interface Output {
type: string
value: string
noScheme?: boolean
}
const outputs: Output[][] = [
[],
[{type: 'mention', value: 'handle.com'}],
[{type: 'mention', value: 'handle.com'}],
[{type: 'mention', value: 'handle.com'}],
[
{type: 'mention', value: 'handle.com'},
{type: 'mention', value: 'handle.com'},
{type: 'mention', value: 'handle.com'},
],
[
{
type: 'mention',
value: 'full123.test-of-chars',
},
],
[],
[{type: 'mention', value: 'handle.com'}],
[
{type: 'mention', value: 'handle.com'},
{type: 'mention', value: 'handle.com'},
],
[{type: 'mention', value: 'handle.com'}],
[{type: 'link', value: 'https://middle.com'}],
[{type: 'link', value: 'https://middle.com/foo/bar'}],
[{type: 'link', value: 'https://middle.com/foo/bar?baz=bux'}],
[{type: 'link', value: 'https://middle.com/foo/bar?baz=bux#hash'}],
[{type: 'link', value: 'https://start.com/foo/bar?baz=bux#hash'}],
[{type: 'link', value: 'https://end.com/foo/bar?baz=bux#hash'}],
[
{type: 'link', value: 'https://newline1.com'},
{type: 'link', value: 'https://newline2.com'},
],
[{type: 'link', value: 'middle.com', noScheme: true}],
[{type: 'link', value: 'middle.com/foo/bar', noScheme: true}],
[{type: 'link', value: 'middle.com/foo/bar?baz=bux', noScheme: true}],
[{type: 'link', value: 'middle.com/foo/bar?baz=bux#hash', noScheme: true}],
[{type: 'link', value: 'start.com/foo/bar?baz=bux#hash', noScheme: true}],
[{type: 'link', value: 'end.com/foo/bar?baz=bux#hash', noScheme: true}],
[
{type: 'link', value: 'newline1.com', noScheme: true},
{type: 'link', value: 'newline2.com', noScheme: true},
],
2022-11-23 20:15:38 +01:00
[],
[],
[],
[],
[],
[],
[
{
type: 'link',
value:
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
},
],
[
{
type: 'link',
value:
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
},
],
[
{type: 'link', value: 'https://foo.com'},
{type: 'link', value: 'https://bar.com/whatever'},
{type: 'link', value: 'https://baz.com'},
],
[
{type: 'link', value: 'https://foo.com'},
{type: 'link', value: 'https://bar.com/whatever'},
{type: 'link', value: 'https://baz.com'},
],
[{type: 'link', value: 'https://foo.com'}],
[{type: 'link', value: 'https://foo.com/thing_(cool)'}],
]
it('correctly handles a set of text inputs', () => {
for (let i = 0; i < inputs.length; i++) {
const input = inputs[i]
const result = extractEntities(input, knownHandles)
if (!outputs[i].length) {
expect(result).toBeFalsy()
} else if (outputs[i].length && !result) {
expect(result).toBeTruthy()
} else if (result) {
expect(result.length).toBe(outputs[i].length)
for (let j = 0; j < outputs[i].length; j++) {
expect(result[j].type).toEqual(outputs[i][j].type)
if (outputs[i][j].noScheme) {
expect(result[j].value).toEqual(`https://${outputs[i][j].value}`)
} else {
expect(result[j].value).toEqual(outputs[i][j].value)
}
if (outputs[i]?.[j].type === 'mention') {
expect(
input.slice(result[j].index.start, result[j].index.end),
).toBe(`@${result[j].value}`)
} else {
if (!outputs[i]?.[j].noScheme) {
expect(
input.slice(result[j].index.start, result[j].index.end),
).toBe(result[j].value)
} else {
expect(
input.slice(result[j].index.start, result[j].index.end),
).toBe(result[j].value.slice('https://'.length))
}
}
}
}
}
})
})
describe('detectLinkables', () => {
const inputs = [
'no linkable',
2022-10-04 17:15:35 +02:00
'@start middle end',
'start @middle end',
'start middle @end',
'@start @middle @end',
'@full123.test-of-chars',
'not@right',
'@bad!@#$chars',
'@newline1\n@newline2',
'parenthetical (@handle)',
'start https://middle.com end',
'start https://middle.com/foo/bar end',
'start https://middle.com/foo/bar?baz=bux end',
'start https://middle.com/foo/bar?baz=bux#hash end',
'https://start.com/foo/bar?baz=bux#hash middle end',
'start middle https://end.com/foo/bar?baz=bux#hash',
'https://newline1.com\nhttps://newline2.com',
'start middle.com end',
'start middle.com/foo/bar end',
'start middle.com/foo/bar?baz=bux end',
'start middle.com/foo/bar?baz=bux#hash end',
'start.com/foo/bar?baz=bux#hash middle end',
'start middle end.com/foo/bar?baz=bux#hash',
'newline1.com\nnewline2.com',
2022-11-23 20:15:38 +01:00
'not.. a..url ..here',
'e.g.',
'e.g. real.com fake.notreal',
'something-cool.jpg',
'website.com.jpg',
'e.g./foo',
'website.com.jpg/foo',
'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/ ',
'https://foo.com https://bar.com/whatever https://baz.com',
'punctuation https://foo.com, https://bar.com/whatever; https://baz.com.',
'parenthentical (https://foo.com)',
'except for https://foo.com/thing_(cool)',
2022-10-04 17:15:35 +02:00
]
const outputs = [
['no linkable'],
[{link: '@start'}, ' middle end'],
['start ', {link: '@middle'}, ' end'],
['start middle ', {link: '@end'}],
[{link: '@start'}, ' ', {link: '@middle'}, ' ', {link: '@end'}],
[{link: '@full123.test-of-chars'}],
['not@right'],
[{link: '@bad'}, '!@#$chars'],
[{link: '@newline1'}, '\n', {link: '@newline2'}],
['parenthetical (', {link: '@handle'}, ')'],
['start ', {link: 'https://middle.com'}, ' end'],
['start ', {link: 'https://middle.com/foo/bar'}, ' end'],
['start ', {link: 'https://middle.com/foo/bar?baz=bux'}, ' end'],
['start ', {link: 'https://middle.com/foo/bar?baz=bux#hash'}, ' end'],
[{link: 'https://start.com/foo/bar?baz=bux#hash'}, ' middle end'],
['start middle ', {link: 'https://end.com/foo/bar?baz=bux#hash'}],
[{link: 'https://newline1.com'}, '\n', {link: 'https://newline2.com'}],
['start ', {link: 'middle.com'}, ' end'],
['start ', {link: 'middle.com/foo/bar'}, ' end'],
['start ', {link: 'middle.com/foo/bar?baz=bux'}, ' end'],
['start ', {link: 'middle.com/foo/bar?baz=bux#hash'}, ' end'],
[{link: 'start.com/foo/bar?baz=bux#hash'}, ' middle end'],
['start middle ', {link: 'end.com/foo/bar?baz=bux#hash'}],
[{link: 'newline1.com'}, '\n', {link: 'newline2.com'}],
2022-11-23 20:15:38 +01:00
['not.. a..url ..here'],
['e.g.'],
['e.g. ', {link: 'real.com'}, ' fake.notreal'],
['something-cool.jpg'],
['website.com.jpg'],
['e.g./foo'],
['website.com.jpg/foo'],
[
'Classic article ',
{
link: 'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
},
],
[
'Classic article ',
{
link: 'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
},
' ',
],
[
{link: 'https://foo.com'},
' ',
{link: 'https://bar.com/whatever'},
' ',
{link: 'https://baz.com'},
],
[
'punctuation ',
{link: 'https://foo.com'},
', ',
{link: 'https://bar.com/whatever'},
'; ',
{link: 'https://baz.com'},
'.',
],
['parenthentical (', {link: 'https://foo.com'}, ')'],
['except for ', {link: 'https://foo.com/thing_(cool)'}],
2022-10-04 17:15:35 +02:00
]
it('correctly handles a set of text inputs', () => {
for (let i = 0; i < inputs.length; i++) {
const input = inputs[i]
const output = detectLinkables(input)
2022-10-04 17:15:35 +02:00
expect(output).toEqual(outputs[i])
}
})
})
describe('extractHtmlMeta', () => {
const inputs = [
'',
'nothing',
'<title>title</title>',
'<title> aSd!@#AC </title>',
'<title>\n title\n </title>',
'<meta name="title" content="meta title">',
'<meta name="description" content="meta description">',
'<meta property="og:title" content="og title">',
'<meta property="og:description" content="og description">',
'<meta property="og:image" content="https://ogimage.com/foo.png">',
'<meta property="twitter:title" content="twitter title">',
'<meta property="twitter:description" content="twitter description">',
'<meta property="twitter:image" content="https://twitterimage.com/foo.png">',
'<meta\n name="title"\n content="meta title"\n>',
]
const outputs = [
{},
{},
{title: 'title'},
{title: 'aSd!@#AC'},
{title: 'title'},
{title: 'meta title'},
{description: 'meta description'},
{title: 'og title'},
{description: 'og description'},
{image: 'https://ogimage.com/foo.png'},
{title: 'twitter title'},
{description: 'twitter description'},
{image: 'https://twitterimage.com/foo.png'},
{title: 'meta title'},
]
it('correctly handles a set of text inputs', () => {
for (let i = 0; i < inputs.length; i++) {
const input = inputs[i]
const output = extractHtmlMeta(input)
expect(output).toEqual(outputs[i])
}
})
})