Implement a link metadata fetching util function
parent
eb106a9758
commit
89638dbd18
|
@ -0,0 +1,118 @@
|
|||
import {LikelyType, getLinkMeta} from '../src/lib/link-meta'
|
||||
|
||||
const exampleComHtml = `<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Example Domain</title>
|
||||
<meta name="description" content="An example website">
|
||||
|
||||
<meta charset="utf-8" />
|
||||
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<style type="text/css">
|
||||
body {
|
||||
background-color: #f0f0f2;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
|
||||
|
||||
}
|
||||
div {
|
||||
width: 600px;
|
||||
margin: 5em auto;
|
||||
padding: 2em;
|
||||
background-color: #fdfdff;
|
||||
border-radius: 0.5em;
|
||||
box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
|
||||
}
|
||||
a:link, a:visited {
|
||||
color: #38488f;
|
||||
text-decoration: none;
|
||||
}
|
||||
@media (max-width: 700px) {
|
||||
div {
|
||||
margin: 0 auto;
|
||||
width: auto;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div>
|
||||
<h1>Example Domain</h1>
|
||||
<p>This domain is for use in illustrative examples in documents. You may use this
|
||||
domain in literature without prior coordination or asking for permission.</p>
|
||||
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
describe('getLinkMeta', () => {
|
||||
const inputs = [
|
||||
'',
|
||||
'httpbadurl',
|
||||
'https://example.com',
|
||||
'https://example.com/index.html',
|
||||
'https://example.com/image.png',
|
||||
'https://example.com/video.avi',
|
||||
'https://example.com/audio.ogg',
|
||||
'https://example.com/javascript.js',
|
||||
]
|
||||
const outputs = [
|
||||
{
|
||||
error: 'Invalid URL',
|
||||
likelyType: LikelyType.Other,
|
||||
url: '',
|
||||
},
|
||||
{
|
||||
error: 'Invalid URL',
|
||||
likelyType: LikelyType.Other,
|
||||
url: 'httpbadurl',
|
||||
},
|
||||
{
|
||||
likelyType: LikelyType.HTML,
|
||||
url: 'https://example.com',
|
||||
title: 'Example Domain',
|
||||
description: 'An example website',
|
||||
},
|
||||
{
|
||||
likelyType: LikelyType.HTML,
|
||||
url: 'https://example.com/index.html',
|
||||
title: 'Example Domain',
|
||||
description: 'An example website',
|
||||
},
|
||||
{
|
||||
likelyType: LikelyType.Image,
|
||||
url: 'https://example.com/image.png',
|
||||
},
|
||||
{
|
||||
likelyType: LikelyType.Video,
|
||||
url: 'https://example.com/video.avi',
|
||||
},
|
||||
{
|
||||
likelyType: LikelyType.Audio,
|
||||
url: 'https://example.com/audio.ogg',
|
||||
},
|
||||
{
|
||||
likelyType: LikelyType.Other,
|
||||
url: 'https://example.com/javascript.js',
|
||||
},
|
||||
]
|
||||
it('correctly handles a set of text inputs', async () => {
|
||||
for (let i = 0; i < inputs.length; i++) {
|
||||
global.fetch = jest.fn().mockImplementationOnce(() => {
|
||||
return new Promise((resolve, reject) => {
|
||||
resolve({
|
||||
ok: true,
|
||||
status: 200,
|
||||
text: () => exampleComHtml,
|
||||
})
|
||||
})
|
||||
})
|
||||
const input = inputs[i]
|
||||
const output = await getLinkMeta(input)
|
||||
expect(output).toEqual(outputs[i])
|
||||
}
|
||||
})
|
||||
})
|
|
@ -1,17 +1,21 @@
|
|||
import {extractEntities, detectLinkables} from '../src/lib/strings'
|
||||
import {
|
||||
extractEntities,
|
||||
detectLinkables,
|
||||
extractHtmlMeta,
|
||||
} from '../src/lib/strings'
|
||||
|
||||
describe('extractEntities', () => {
|
||||
const knownHandles = new Set(['handle', 'full123.test-of-chars'])
|
||||
const knownHandles = new Set(['handle.com', 'full123.test-of-chars'])
|
||||
const inputs = [
|
||||
'no mention',
|
||||
'@handle middle end',
|
||||
'start @handle end',
|
||||
'start middle @handle',
|
||||
'@handle @handle @handle',
|
||||
'@handle.com middle end',
|
||||
'start @handle.com end',
|
||||
'start middle @handle.com',
|
||||
'@handle.com @handle.com @handle.com',
|
||||
'@full123.test-of-chars',
|
||||
'not@right',
|
||||
'@handle!@#$chars',
|
||||
'@handle\n@handle',
|
||||
'@handle.com!@#$chars',
|
||||
'@handle.com\n@handle.com',
|
||||
'start https://middle.com end',
|
||||
'start https://middle.com/foo/bar end',
|
||||
'start https://middle.com/foo/bar?baz=bux end',
|
||||
|
@ -35,13 +39,13 @@ describe('extractEntities', () => {
|
|||
}
|
||||
const outputs: Output[][] = [
|
||||
[],
|
||||
[{type: 'mention', value: 'handle'}],
|
||||
[{type: 'mention', value: 'handle'}],
|
||||
[{type: 'mention', value: 'handle'}],
|
||||
[{type: 'mention', value: 'handle.com'}],
|
||||
[{type: 'mention', value: 'handle.com'}],
|
||||
[{type: 'mention', value: 'handle.com'}],
|
||||
[
|
||||
{type: 'mention', value: 'handle'},
|
||||
{type: 'mention', value: 'handle'},
|
||||
{type: 'mention', value: 'handle'},
|
||||
{type: 'mention', value: 'handle.com'},
|
||||
{type: 'mention', value: 'handle.com'},
|
||||
{type: 'mention', value: 'handle.com'},
|
||||
],
|
||||
[
|
||||
{
|
||||
|
@ -50,10 +54,10 @@ describe('extractEntities', () => {
|
|||
},
|
||||
],
|
||||
[],
|
||||
[{type: 'mention', value: 'handle'}],
|
||||
[{type: 'mention', value: 'handle.com'}],
|
||||
[
|
||||
{type: 'mention', value: 'handle'},
|
||||
{type: 'mention', value: 'handle'},
|
||||
{type: 'mention', value: 'handle.com'},
|
||||
{type: 'mention', value: 'handle.com'},
|
||||
],
|
||||
[{type: 'link', value: 'https://middle.com'}],
|
||||
[{type: 'link', value: 'https://middle.com/foo/bar'}],
|
||||
|
@ -176,3 +180,45 @@ describe('detectLinkables', () => {
|
|||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('extractHtmlMeta', () => {
|
||||
const inputs = [
|
||||
'',
|
||||
'nothing',
|
||||
'<title>title</title>',
|
||||
'<title> aSd!@#AC </title>',
|
||||
'<title>\n title\n </title>',
|
||||
'<meta name="title" content="meta title">',
|
||||
'<meta name="description" content="meta description">',
|
||||
'<meta property="og:title" content="og title">',
|
||||
'<meta property="og:description" content="og description">',
|
||||
'<meta property="og:image" content="https://ogimage.com/foo.png">',
|
||||
'<meta property="twitter:title" content="twitter title">',
|
||||
'<meta property="twitter:description" content="twitter description">',
|
||||
'<meta property="twitter:image" content="https://twitterimage.com/foo.png">',
|
||||
'<meta\n name="title"\n content="meta title"\n>',
|
||||
]
|
||||
const outputs = [
|
||||
{},
|
||||
{},
|
||||
{title: 'title'},
|
||||
{title: 'aSd!@#AC'},
|
||||
{title: 'title'},
|
||||
{title: 'meta title'},
|
||||
{description: 'meta description'},
|
||||
{title: 'og title'},
|
||||
{description: 'og description'},
|
||||
{image: 'https://ogimage.com/foo.png'},
|
||||
{title: 'twitter title'},
|
||||
{description: 'twitter description'},
|
||||
{image: 'https://twitterimage.com/foo.png'},
|
||||
{title: 'meta title'},
|
||||
]
|
||||
it('correctly handles a set of text inputs', () => {
|
||||
for (let i = 0; i < inputs.length; i++) {
|
||||
const input = inputs[i]
|
||||
const output = extractHtmlMeta(input)
|
||||
expect(output).toEqual(outputs[i])
|
||||
}
|
||||
})
|
||||
})
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -220,3 +220,54 @@ export function convertBskyAppUrlIfNeeded(url: string): string {
|
|||
}
|
||||
return url
|
||||
}
|
||||
|
||||
const htmlTitleRegex = /<title>([^<]+)<\/title>/i
|
||||
export function extractHtmlMeta(html: string): Record<string, string> {
|
||||
const res: Record<string, string> = {}
|
||||
|
||||
{
|
||||
const match = htmlTitleRegex.exec(html)
|
||||
if (match) {
|
||||
res.title = match[1].trim()
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let metaMatch
|
||||
let propMatch
|
||||
const metaRe = /<meta[\s]([^>]+)>/gis
|
||||
while ((metaMatch = metaRe.exec(html))) {
|
||||
let propName
|
||||
let propValue
|
||||
const propRe = /(name|property|content)="([^"]+)"/gis
|
||||
while ((propMatch = propRe.exec(metaMatch[1]))) {
|
||||
if (propMatch[1] === 'content') {
|
||||
propValue = propMatch[2]
|
||||
} else {
|
||||
propName = propMatch[2]
|
||||
}
|
||||
}
|
||||
if (!propName || !propValue) {
|
||||
continue
|
||||
}
|
||||
switch (propName?.trim()) {
|
||||
case 'title':
|
||||
case 'og:title':
|
||||
case 'twitter:title':
|
||||
res.title = propValue?.trim()
|
||||
break
|
||||
case 'description':
|
||||
case 'og:description':
|
||||
case 'twitter:description':
|
||||
res.description = propValue?.trim()
|
||||
break
|
||||
case 'og:image':
|
||||
case 'twitter:image':
|
||||
res.image = propValue?.trim()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue