Fixes youtube embed issues (#50)

* fixes youtube embed

* move extractMetaHtml test to its own file

* tests cleanup

* Add fallback for youtube meta data

* lint

* Check for youtube in the url domain

* use hostname instead of full url to check for link domain

* checks only for domain
zio/stable
Aryan Goharzad 2023-01-19 13:53:11 -05:00 committed by GitHub
parent 9230d52ff5
commit f10a8308d9
12 changed files with 245 additions and 149 deletions

View File

@ -0,0 +1,47 @@
export const exampleComHtml = `<!doctype html>
<html>
<head>
<title>Example Domain</title>
<meta name="description" content="An example website">
<meta charset="utf-8" />
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<style type="text/css">
body {
background-color: #f0f0f2;
margin: 0;
padding: 0;
font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
}
div {
width: 600px;
margin: 5em auto;
padding: 2em;
background-color: #fdfdff;
border-radius: 0.5em;
box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
}
a:link, a:visited {
color: #38488f;
text-decoration: none;
}
@media (max-width: 700px) {
div {
margin: 0 auto;
width: auto;
}
}
</style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples in documents. You may use this
domain in literature without prior coordination or asking for permission.</p>
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
</div>
</body>
</html>`

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,70 @@
import {extractHtmlMeta} from '../../src/lib/extractHtmlMeta'
import {exampleComHtml} from './__mocks__/exampleComHtml'
import {youtubeHTML} from './__mocks__/youtubeHtml'
describe('extractHtmlMeta', () => {
const cases = [
['', {}],
['nothing', {}],
['<title>title</title>', {title: 'title'}],
['<title> aSd!@#AC </title>', {title: 'aSd!@#AC'}],
['<title>\n title\n </title>', {title: 'title'}],
['<meta name="title" content="meta title">', {title: 'meta title'}],
[
'<meta name="description" content="meta description">',
{description: 'meta description'},
],
['<meta property="og:title" content="og title">', {title: 'og title'}],
[
'<meta property="og:description" content="og description">',
{description: 'og description'},
],
[
'<meta property="og:image" content="https://ogimage.com/foo.png">',
{image: 'https://ogimage.com/foo.png'},
],
[
'<meta property="twitter:title" content="twitter title">',
{title: 'twitter title'},
],
[
'<meta property="twitter:description" content="twitter description">',
{description: 'twitter description'},
],
[
'<meta property="twitter:image" content="https://twitterimage.com/foo.png">',
{image: 'https://twitterimage.com/foo.png'},
],
['<meta\n name="title"\n content="meta title"\n>', {title: 'meta title'}],
]
it.each(cases)(
'given the html tag %p, returns %p',
(input, expectedResult) => {
const output = extractHtmlMeta(input)
expect(output).toEqual(expectedResult)
},
)
it('extracts title and description from a generic HTML page', () => {
const input = exampleComHtml
const expectedOutput = {
title: 'Example Domain',
description: 'An example website',
}
const output = extractHtmlMeta(input)
expect(output).toEqual(expectedOutput)
})
it('extracts title and description from a generic youtube page', () => {
const input = youtubeHTML
const expectedOutput = {
title: 'HD Video (1080p) with Relaxing Music of Native American Shamans',
description:
'Stunning HD Video ( 1080p ) of Patagonian Nature with Relaxing Native American Shamanic Music. HD footage used from ',
image: 'https://i.ytimg.com/vi/x6UITRjhijI/sddefault.jpg',
}
const output = extractHtmlMeta(input)
expect(output).toEqual(expectedOutput)
})
})

View File

@ -1,54 +1,7 @@
import {LikelyType, getLinkMeta, getLikelyType} from '../../src/lib/link-meta' import {LikelyType, getLinkMeta, getLikelyType} from '../../src/lib/link-meta'
import {exampleComHtml} from './__mocks__/exampleComHtml'
import {mockedRootStore} from '../../__mocks__/state-mock' import {mockedRootStore} from '../../__mocks__/state-mock'
const exampleComHtml = `<!doctype html>
<html>
<head>
<title>Example Domain</title>
<meta name="description" content="An example website">
<meta charset="utf-8" />
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<style type="text/css">
body {
background-color: #f0f0f2;
margin: 0;
padding: 0;
font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
}
div {
width: 600px;
margin: 5em auto;
padding: 2em;
background-color: #fdfdff;
border-radius: 0.5em;
box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
}
a:link, a:visited {
color: #38488f;
text-decoration: none;
}
@media (max-width: 700px) {
div {
margin: 0 auto;
width: auto;
}
}
</style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples in documents. You may use this
domain in literature without prior coordination or asking for permission.</p>
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
</div>
</body>
</html>`
describe('getLinkMeta', () => { describe('getLinkMeta', () => {
const inputs = [ const inputs = [
'', '',

View File

@ -1,7 +1,6 @@
import { import {
extractEntities, extractEntities,
detectLinkables, detectLinkables,
extractHtmlMeta,
pluralize, pluralize,
makeRecordUri, makeRecordUri,
ago, ago,
@ -286,48 +285,6 @@ describe('detectLinkables', () => {
}) })
}) })
describe('extractHtmlMeta', () => {
const inputs = [
'',
'nothing',
'<title>title</title>',
'<title> aSd!@#AC </title>',
'<title>\n title\n </title>',
'<meta name="title" content="meta title">',
'<meta name="description" content="meta description">',
'<meta property="og:title" content="og title">',
'<meta property="og:description" content="og description">',
'<meta property="og:image" content="https://ogimage.com/foo.png">',
'<meta property="twitter:title" content="twitter title">',
'<meta property="twitter:description" content="twitter description">',
'<meta property="twitter:image" content="https://twitterimage.com/foo.png">',
'<meta\n name="title"\n content="meta title"\n>',
]
const outputs = [
{},
{},
{title: 'title'},
{title: 'aSd!@#AC'},
{title: 'title'},
{title: 'meta title'},
{description: 'meta description'},
{title: 'og title'},
{description: 'og description'},
{image: 'https://ogimage.com/foo.png'},
{title: 'twitter title'},
{description: 'twitter description'},
{image: 'https://twitterimage.com/foo.png'},
{title: 'meta title'},
]
it('correctly handles a set of text inputs', () => {
for (let i = 0; i < inputs.length; i++) {
const input = inputs[i]
const output = extractHtmlMeta(input)
expect(output).toEqual(outputs[i])
}
})
})
describe('pluralize', () => { describe('pluralize', () => {
const inputs: [number, string, string?][] = [ const inputs: [number, string, string?][] = [
[1, 'follower'], [1, 'follower'],

View File

@ -565,13 +565,13 @@ EXTERNAL SOURCES:
:path: "../node_modules/react-native/ReactCommon/yoga" :path: "../node_modules/react-native/ReactCommon/yoga"
SPEC CHECKSUMS: SPEC CHECKSUMS:
boost: a7c83b31436843459a1961bfd74b96033dc77234 boost: 57d2868c099736d80fcd648bf211b4431e51a558
BVLinearGradient: 34a999fda29036898a09c6a6b728b0b4189e1a44 BVLinearGradient: 34a999fda29036898a09c6a6b728b0b4189e1a44
DoubleConversion: 831926d9b8bf8166fd87886c4abab286c2422662 DoubleConversion: 5189b271737e1565bdce30deb4a08d647e3f5f54
FBLazyVector: 61839cba7a48c570b7ac3e1cd8a4d0948382202f FBLazyVector: 61839cba7a48c570b7ac3e1cd8a4d0948382202f
FBReactNativeSpec: 5a14398ccf5e27c1ca2d7109eb920594ce93c10d FBReactNativeSpec: 5a14398ccf5e27c1ca2d7109eb920594ce93c10d
fmt: ff9d55029c625d3757ed641535fd4a75fedc7ce9 fmt: ff9d55029c625d3757ed641535fd4a75fedc7ce9
glog: 476ee3e89abb49e07f822b48323c51c57124b572 glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b
hermes-engine: f6e715aa6c8bd38de6c13bc85e07b0a337edaa89 hermes-engine: f6e715aa6c8bd38de6c13bc85e07b0a337edaa89
libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913 libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913
RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1 RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1

View File

@ -9,6 +9,7 @@
"start": "react-native start", "start": "react-native start",
"clean-cache": "rm -rf node_modules/.cache/babel-loader/*", "clean-cache": "rm -rf node_modules/.cache/babel-loader/*",
"test": "jest", "test": "jest",
"test-watch": "jest --watchAll",
"test-ci": "jest --ci --forceExit --reporters=default --reporters=jest-junit", "test-ci": "jest --ci --forceExit --reporters=default --reporters=jest-junit",
"test-coverage": "jest --coverage", "test-coverage": "jest --coverage",
"lint": "eslint . --ext .js,.jsx,.ts,.tsx" "lint": "eslint . --ext .js,.jsx,.ts,.tsx"
@ -114,6 +115,7 @@
"transformIgnorePatterns": [ "transformIgnorePatterns": [
"node_modules/(?!(jest-)?react-native|react-clone-referenced-element|@react-native-community|rollbar-react-native|@fortawesome|@react-native|@react-navigation)" "node_modules/(?!(jest-)?react-native|react-clone-referenced-element|@react-native-community|rollbar-react-native|@fortawesome|@react-native|@react-navigation)"
], ],
"modulePathIgnorePatterns": ["__tests__\/.*\/__mocks__"],
"coveragePathIgnorePatterns": [ "coveragePathIgnorePatterns": [
"<rootDir>/node_modules/", "<rootDir>/node_modules/",
"<rootDir>/src/platform", "<rootDir>/src/platform",

View File

@ -0,0 +1,65 @@
import {extractYoutubeMeta} from './extractYoutubeMeta'
interface ExtractHtmlMetaInput {
html: string
hostname?: string
}
export const extractHtmlMeta = ({
html,
hostname,
}: ExtractHtmlMetaInput): Record<string, string> => {
const htmlTitleRegex = /<title>([^<]+)<\/title>/i
let res: Record<string, string> = {}
const match = htmlTitleRegex.exec(html)
if (match) {
res.title = match[1].trim()
}
let metaMatch
let propMatch
const metaRe = /<meta[\s]([^>]+)>/gis
while ((metaMatch = metaRe.exec(html))) {
let propName
let propValue
const propRe = /(name|property|content)="([^"]+)"/gis
while ((propMatch = propRe.exec(metaMatch[1]))) {
if (propMatch[1] === 'content') {
propValue = propMatch[2]
} else {
propName = propMatch[2]
}
}
if (!propName || !propValue) {
continue
}
switch (propName?.trim()) {
case 'title':
case 'og:title':
case 'twitter:title':
res.title = propValue?.trim()
break
case 'description':
case 'og:description':
case 'twitter:description':
res.description = propValue?.trim()
break
case 'og:image':
case 'twitter:image':
res.image = propValue?.trim()
break
}
}
const isYoutubeUrl =
hostname?.includes('youtube.') || hostname?.includes('youtu.be')
if (isYoutubeUrl) {
// Workaround for Youtube not having a title in the meta tags
res = {...res, ...extractYoutubeMeta(html)}
}
return res
}

View File

@ -0,0 +1,26 @@
export const extractYoutubeMeta = (html: string): Record<string, string> => {
const res: Record<string, string> = {}
const youtubeTitleRegex = /"videoDetails":.*"title":"([^"]*)"/i
const youtubeDescriptionRegex =
/"videoDetails":.*"shortDescription":"([^"]*)"/i
const youtubeThumbnailRegex = /"videoDetails":.*"url":"(.*)(default\.jpg)/i
const youtubeTitleMatch = youtubeTitleRegex.exec(html)
const youtubeDescriptionMatch = youtubeDescriptionRegex.exec(html)
const youtubeThumbnailMatch = youtubeThumbnailRegex.exec(html)
if (youtubeTitleMatch && youtubeTitleMatch.length >= 1) {
res.title = decodeURI(youtubeTitleMatch[1])
}
if (youtubeDescriptionMatch && youtubeDescriptionMatch.length >= 1) {
res.description = decodeURI(youtubeDescriptionMatch[1]).replace(
/\\n/g,
'\n',
)
}
if (youtubeThumbnailMatch && youtubeThumbnailMatch.length >= 2) {
res.image = youtubeThumbnailMatch[1] + 'default.jpg'
}
return res
}

View File

@ -1,7 +1,8 @@
import he from 'he' import he from 'he'
import {extractHtmlMeta, isBskyAppUrl} from './strings' import {isBskyAppUrl} from './strings'
import {RootStoreModel} from '../state' import {RootStoreModel} from '../state'
import {extractBskyMeta} from './extractBskyMeta' import {extractBskyMeta} from './extractBskyMeta'
import {extractHtmlMeta} from './extractHtmlMeta'
export enum LikelyType { export enum LikelyType {
HTML, HTML,
@ -59,7 +60,10 @@ export async function getLinkMeta(
}) })
const httpResBody = await httpRes.text() const httpResBody = await httpRes.text()
clearTimeout(to) clearTimeout(to)
const httpResMeta = extractHtmlMeta(httpResBody) const httpResMeta = extractHtmlMeta({
html: httpResBody,
hostname: urlp?.hostname,
})
meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined
meta.description = httpResMeta.description meta.description = httpResMeta.description
? he.decode(httpResMeta.description) ? he.decode(httpResMeta.description)

View File

@ -265,54 +265,3 @@ export function convertBskyAppUrlIfNeeded(url: string): string {
} }
return url return url
} }
const htmlTitleRegex = /<title>([^<]+)<\/title>/i
export function extractHtmlMeta(html: string): Record<string, string> {
const res: Record<string, string> = {}
{
const match = htmlTitleRegex.exec(html)
if (match) {
res.title = match[1].trim()
}
}
{
let metaMatch
let propMatch
const metaRe = /<meta[\s]([^>]+)>/gis
while ((metaMatch = metaRe.exec(html))) {
let propName
let propValue
const propRe = /(name|property|content)="([^"]+)"/gis
while ((propMatch = propRe.exec(metaMatch[1]))) {
if (propMatch[1] === 'content') {
propValue = propMatch[2]
} else {
propName = propMatch[2]
}
}
if (!propName || !propValue) {
continue
}
switch (propName?.trim()) {
case 'title':
case 'og:title':
case 'twitter:title':
res.title = propValue?.trim()
break
case 'description':
case 'og:description':
case 'twitter:description':
res.description = propValue?.trim()
break
case 'og:image':
case 'twitter:image':
res.image = propValue?.trim()
break
}
}
}
return res
}

View File

@ -39,7 +39,9 @@ export const FeedItem = observer(function ({
const itemTitle = `Post by ${item.post.author.handle}` const itemTitle = `Post by ${item.post.author.handle}`
const authorHref = `/profile/${item.post.author.handle}` const authorHref = `/profile/${item.post.author.handle}`
const replyAuthorDid = useMemo(() => { const replyAuthorDid = useMemo(() => {
if (!record?.reply) return '' if (!record?.reply) {
return ''
}
const urip = new AtUri(record.reply.parent?.uri || record.reply.root.uri) const urip = new AtUri(record.reply.parent?.uri || record.reply.root.uri)
return urip.hostname return urip.hostname
}, [record?.reply]) }, [record?.reply])
@ -196,7 +198,9 @@ export const FeedItem = observer(function ({
) : ( ) : (
<View style={{height: 5}} /> <View style={{height: 5}} />
)} )}
{item.post.embed ? (
<PostEmbeds embed={item.post.embed} style={styles.embed} /> <PostEmbeds embed={item.post.embed} style={styles.embed} />
) : null}
<PostCtrls <PostCtrls
style={styles.ctrls} style={styles.ctrls}
itemHref={itemHref} itemHref={itemHref}