Fixes youtube embed issues (#50)
* fixes youtube embed * move extractMetaHtml test to its own file * tests cleanup * Add fallback for youtube meta data * lint * Check for youtube in the url domain * use hostname instead of full url to check for link domain * checks only for domainzio/stable
parent
9230d52ff5
commit
f10a8308d9
|
@ -0,0 +1,47 @@
|
|||
export const exampleComHtml = `<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Example Domain</title>
|
||||
<meta name="description" content="An example website">
|
||||
|
||||
<meta charset="utf-8" />
|
||||
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<style type="text/css">
|
||||
body {
|
||||
background-color: #f0f0f2;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
|
||||
|
||||
}
|
||||
div {
|
||||
width: 600px;
|
||||
margin: 5em auto;
|
||||
padding: 2em;
|
||||
background-color: #fdfdff;
|
||||
border-radius: 0.5em;
|
||||
box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
|
||||
}
|
||||
a:link, a:visited {
|
||||
color: #38488f;
|
||||
text-decoration: none;
|
||||
}
|
||||
@media (max-width: 700px) {
|
||||
div {
|
||||
margin: 0 auto;
|
||||
width: auto;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div>
|
||||
<h1>Example Domain</h1>
|
||||
<p>This domain is for use in illustrative examples in documents. You may use this
|
||||
domain in literature without prior coordination or asking for permission.</p>
|
||||
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>`
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,70 @@
|
|||
import {extractHtmlMeta} from '../../src/lib/extractHtmlMeta'
|
||||
import {exampleComHtml} from './__mocks__/exampleComHtml'
|
||||
import {youtubeHTML} from './__mocks__/youtubeHtml'
|
||||
|
||||
describe('extractHtmlMeta', () => {
|
||||
const cases = [
|
||||
['', {}],
|
||||
['nothing', {}],
|
||||
['<title>title</title>', {title: 'title'}],
|
||||
['<title> aSd!@#AC </title>', {title: 'aSd!@#AC'}],
|
||||
['<title>\n title\n </title>', {title: 'title'}],
|
||||
['<meta name="title" content="meta title">', {title: 'meta title'}],
|
||||
[
|
||||
'<meta name="description" content="meta description">',
|
||||
{description: 'meta description'},
|
||||
],
|
||||
['<meta property="og:title" content="og title">', {title: 'og title'}],
|
||||
[
|
||||
'<meta property="og:description" content="og description">',
|
||||
{description: 'og description'},
|
||||
],
|
||||
[
|
||||
'<meta property="og:image" content="https://ogimage.com/foo.png">',
|
||||
{image: 'https://ogimage.com/foo.png'},
|
||||
],
|
||||
[
|
||||
'<meta property="twitter:title" content="twitter title">',
|
||||
{title: 'twitter title'},
|
||||
],
|
||||
[
|
||||
'<meta property="twitter:description" content="twitter description">',
|
||||
{description: 'twitter description'},
|
||||
],
|
||||
[
|
||||
'<meta property="twitter:image" content="https://twitterimage.com/foo.png">',
|
||||
{image: 'https://twitterimage.com/foo.png'},
|
||||
],
|
||||
['<meta\n name="title"\n content="meta title"\n>', {title: 'meta title'}],
|
||||
]
|
||||
|
||||
it.each(cases)(
|
||||
'given the html tag %p, returns %p',
|
||||
(input, expectedResult) => {
|
||||
const output = extractHtmlMeta(input)
|
||||
expect(output).toEqual(expectedResult)
|
||||
},
|
||||
)
|
||||
|
||||
it('extracts title and description from a generic HTML page', () => {
|
||||
const input = exampleComHtml
|
||||
const expectedOutput = {
|
||||
title: 'Example Domain',
|
||||
description: 'An example website',
|
||||
}
|
||||
const output = extractHtmlMeta(input)
|
||||
expect(output).toEqual(expectedOutput)
|
||||
})
|
||||
|
||||
it('extracts title and description from a generic youtube page', () => {
|
||||
const input = youtubeHTML
|
||||
const expectedOutput = {
|
||||
title: 'HD Video (1080p) with Relaxing Music of Native American Shamans',
|
||||
description:
|
||||
'Stunning HD Video ( 1080p ) of Patagonian Nature with Relaxing Native American Shamanic Music. HD footage used from ',
|
||||
image: 'https://i.ytimg.com/vi/x6UITRjhijI/sddefault.jpg',
|
||||
}
|
||||
const output = extractHtmlMeta(input)
|
||||
expect(output).toEqual(expectedOutput)
|
||||
})
|
||||
})
|
|
@ -1,54 +1,7 @@
|
|||
import {LikelyType, getLinkMeta, getLikelyType} from '../../src/lib/link-meta'
|
||||
import {exampleComHtml} from './__mocks__/exampleComHtml'
|
||||
import {mockedRootStore} from '../../__mocks__/state-mock'
|
||||
|
||||
const exampleComHtml = `<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Example Domain</title>
|
||||
<meta name="description" content="An example website">
|
||||
|
||||
<meta charset="utf-8" />
|
||||
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<style type="text/css">
|
||||
body {
|
||||
background-color: #f0f0f2;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
|
||||
|
||||
}
|
||||
div {
|
||||
width: 600px;
|
||||
margin: 5em auto;
|
||||
padding: 2em;
|
||||
background-color: #fdfdff;
|
||||
border-radius: 0.5em;
|
||||
box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
|
||||
}
|
||||
a:link, a:visited {
|
||||
color: #38488f;
|
||||
text-decoration: none;
|
||||
}
|
||||
@media (max-width: 700px) {
|
||||
div {
|
||||
margin: 0 auto;
|
||||
width: auto;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div>
|
||||
<h1>Example Domain</h1>
|
||||
<p>This domain is for use in illustrative examples in documents. You may use this
|
||||
domain in literature without prior coordination or asking for permission.</p>
|
||||
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
describe('getLinkMeta', () => {
|
||||
const inputs = [
|
||||
'',
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import {
|
||||
extractEntities,
|
||||
detectLinkables,
|
||||
extractHtmlMeta,
|
||||
pluralize,
|
||||
makeRecordUri,
|
||||
ago,
|
||||
|
@ -286,48 +285,6 @@ describe('detectLinkables', () => {
|
|||
})
|
||||
})
|
||||
|
||||
describe('extractHtmlMeta', () => {
|
||||
const inputs = [
|
||||
'',
|
||||
'nothing',
|
||||
'<title>title</title>',
|
||||
'<title> aSd!@#AC </title>',
|
||||
'<title>\n title\n </title>',
|
||||
'<meta name="title" content="meta title">',
|
||||
'<meta name="description" content="meta description">',
|
||||
'<meta property="og:title" content="og title">',
|
||||
'<meta property="og:description" content="og description">',
|
||||
'<meta property="og:image" content="https://ogimage.com/foo.png">',
|
||||
'<meta property="twitter:title" content="twitter title">',
|
||||
'<meta property="twitter:description" content="twitter description">',
|
||||
'<meta property="twitter:image" content="https://twitterimage.com/foo.png">',
|
||||
'<meta\n name="title"\n content="meta title"\n>',
|
||||
]
|
||||
const outputs = [
|
||||
{},
|
||||
{},
|
||||
{title: 'title'},
|
||||
{title: 'aSd!@#AC'},
|
||||
{title: 'title'},
|
||||
{title: 'meta title'},
|
||||
{description: 'meta description'},
|
||||
{title: 'og title'},
|
||||
{description: 'og description'},
|
||||
{image: 'https://ogimage.com/foo.png'},
|
||||
{title: 'twitter title'},
|
||||
{description: 'twitter description'},
|
||||
{image: 'https://twitterimage.com/foo.png'},
|
||||
{title: 'meta title'},
|
||||
]
|
||||
it('correctly handles a set of text inputs', () => {
|
||||
for (let i = 0; i < inputs.length; i++) {
|
||||
const input = inputs[i]
|
||||
const output = extractHtmlMeta(input)
|
||||
expect(output).toEqual(outputs[i])
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('pluralize', () => {
|
||||
const inputs: [number, string, string?][] = [
|
||||
[1, 'follower'],
|
||||
|
|
|
@ -565,13 +565,13 @@ EXTERNAL SOURCES:
|
|||
:path: "../node_modules/react-native/ReactCommon/yoga"
|
||||
|
||||
SPEC CHECKSUMS:
|
||||
boost: a7c83b31436843459a1961bfd74b96033dc77234
|
||||
boost: 57d2868c099736d80fcd648bf211b4431e51a558
|
||||
BVLinearGradient: 34a999fda29036898a09c6a6b728b0b4189e1a44
|
||||
DoubleConversion: 831926d9b8bf8166fd87886c4abab286c2422662
|
||||
DoubleConversion: 5189b271737e1565bdce30deb4a08d647e3f5f54
|
||||
FBLazyVector: 61839cba7a48c570b7ac3e1cd8a4d0948382202f
|
||||
FBReactNativeSpec: 5a14398ccf5e27c1ca2d7109eb920594ce93c10d
|
||||
fmt: ff9d55029c625d3757ed641535fd4a75fedc7ce9
|
||||
glog: 476ee3e89abb49e07f822b48323c51c57124b572
|
||||
glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b
|
||||
hermes-engine: f6e715aa6c8bd38de6c13bc85e07b0a337edaa89
|
||||
libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913
|
||||
RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
"start": "react-native start",
|
||||
"clean-cache": "rm -rf node_modules/.cache/babel-loader/*",
|
||||
"test": "jest",
|
||||
"test-watch": "jest --watchAll",
|
||||
"test-ci": "jest --ci --forceExit --reporters=default --reporters=jest-junit",
|
||||
"test-coverage": "jest --coverage",
|
||||
"lint": "eslint . --ext .js,.jsx,.ts,.tsx"
|
||||
|
@ -114,6 +115,7 @@
|
|||
"transformIgnorePatterns": [
|
||||
"node_modules/(?!(jest-)?react-native|react-clone-referenced-element|@react-native-community|rollbar-react-native|@fortawesome|@react-native|@react-navigation)"
|
||||
],
|
||||
"modulePathIgnorePatterns": ["__tests__\/.*\/__mocks__"],
|
||||
"coveragePathIgnorePatterns": [
|
||||
"<rootDir>/node_modules/",
|
||||
"<rootDir>/src/platform",
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
import {extractYoutubeMeta} from './extractYoutubeMeta'
|
||||
|
||||
interface ExtractHtmlMetaInput {
|
||||
html: string
|
||||
hostname?: string
|
||||
}
|
||||
|
||||
export const extractHtmlMeta = ({
|
||||
html,
|
||||
hostname,
|
||||
}: ExtractHtmlMetaInput): Record<string, string> => {
|
||||
const htmlTitleRegex = /<title>([^<]+)<\/title>/i
|
||||
|
||||
let res: Record<string, string> = {}
|
||||
|
||||
const match = htmlTitleRegex.exec(html)
|
||||
|
||||
if (match) {
|
||||
res.title = match[1].trim()
|
||||
}
|
||||
|
||||
let metaMatch
|
||||
let propMatch
|
||||
const metaRe = /<meta[\s]([^>]+)>/gis
|
||||
while ((metaMatch = metaRe.exec(html))) {
|
||||
let propName
|
||||
let propValue
|
||||
const propRe = /(name|property|content)="([^"]+)"/gis
|
||||
while ((propMatch = propRe.exec(metaMatch[1]))) {
|
||||
if (propMatch[1] === 'content') {
|
||||
propValue = propMatch[2]
|
||||
} else {
|
||||
propName = propMatch[2]
|
||||
}
|
||||
}
|
||||
if (!propName || !propValue) {
|
||||
continue
|
||||
}
|
||||
switch (propName?.trim()) {
|
||||
case 'title':
|
||||
case 'og:title':
|
||||
case 'twitter:title':
|
||||
res.title = propValue?.trim()
|
||||
break
|
||||
case 'description':
|
||||
case 'og:description':
|
||||
case 'twitter:description':
|
||||
res.description = propValue?.trim()
|
||||
break
|
||||
case 'og:image':
|
||||
case 'twitter:image':
|
||||
res.image = propValue?.trim()
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const isYoutubeUrl =
|
||||
hostname?.includes('youtube.') || hostname?.includes('youtu.be')
|
||||
if (isYoutubeUrl) {
|
||||
// Workaround for Youtube not having a title in the meta tags
|
||||
res = {...res, ...extractYoutubeMeta(html)}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
export const extractYoutubeMeta = (html: string): Record<string, string> => {
|
||||
const res: Record<string, string> = {}
|
||||
const youtubeTitleRegex = /"videoDetails":.*"title":"([^"]*)"/i
|
||||
const youtubeDescriptionRegex =
|
||||
/"videoDetails":.*"shortDescription":"([^"]*)"/i
|
||||
const youtubeThumbnailRegex = /"videoDetails":.*"url":"(.*)(default\.jpg)/i
|
||||
|
||||
const youtubeTitleMatch = youtubeTitleRegex.exec(html)
|
||||
const youtubeDescriptionMatch = youtubeDescriptionRegex.exec(html)
|
||||
const youtubeThumbnailMatch = youtubeThumbnailRegex.exec(html)
|
||||
|
||||
if (youtubeTitleMatch && youtubeTitleMatch.length >= 1) {
|
||||
res.title = decodeURI(youtubeTitleMatch[1])
|
||||
}
|
||||
if (youtubeDescriptionMatch && youtubeDescriptionMatch.length >= 1) {
|
||||
res.description = decodeURI(youtubeDescriptionMatch[1]).replace(
|
||||
/\\n/g,
|
||||
'\n',
|
||||
)
|
||||
}
|
||||
if (youtubeThumbnailMatch && youtubeThumbnailMatch.length >= 2) {
|
||||
res.image = youtubeThumbnailMatch[1] + 'default.jpg'
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
|
@ -1,7 +1,8 @@
|
|||
import he from 'he'
|
||||
import {extractHtmlMeta, isBskyAppUrl} from './strings'
|
||||
import {isBskyAppUrl} from './strings'
|
||||
import {RootStoreModel} from '../state'
|
||||
import {extractBskyMeta} from './extractBskyMeta'
|
||||
import {extractHtmlMeta} from './extractHtmlMeta'
|
||||
|
||||
export enum LikelyType {
|
||||
HTML,
|
||||
|
@ -59,7 +60,10 @@ export async function getLinkMeta(
|
|||
})
|
||||
const httpResBody = await httpRes.text()
|
||||
clearTimeout(to)
|
||||
const httpResMeta = extractHtmlMeta(httpResBody)
|
||||
const httpResMeta = extractHtmlMeta({
|
||||
html: httpResBody,
|
||||
hostname: urlp?.hostname,
|
||||
})
|
||||
meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined
|
||||
meta.description = httpResMeta.description
|
||||
? he.decode(httpResMeta.description)
|
||||
|
|
|
@ -265,54 +265,3 @@ export function convertBskyAppUrlIfNeeded(url: string): string {
|
|||
}
|
||||
return url
|
||||
}
|
||||
|
||||
const htmlTitleRegex = /<title>([^<]+)<\/title>/i
|
||||
export function extractHtmlMeta(html: string): Record<string, string> {
|
||||
const res: Record<string, string> = {}
|
||||
|
||||
{
|
||||
const match = htmlTitleRegex.exec(html)
|
||||
if (match) {
|
||||
res.title = match[1].trim()
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let metaMatch
|
||||
let propMatch
|
||||
const metaRe = /<meta[\s]([^>]+)>/gis
|
||||
while ((metaMatch = metaRe.exec(html))) {
|
||||
let propName
|
||||
let propValue
|
||||
const propRe = /(name|property|content)="([^"]+)"/gis
|
||||
while ((propMatch = propRe.exec(metaMatch[1]))) {
|
||||
if (propMatch[1] === 'content') {
|
||||
propValue = propMatch[2]
|
||||
} else {
|
||||
propName = propMatch[2]
|
||||
}
|
||||
}
|
||||
if (!propName || !propValue) {
|
||||
continue
|
||||
}
|
||||
switch (propName?.trim()) {
|
||||
case 'title':
|
||||
case 'og:title':
|
||||
case 'twitter:title':
|
||||
res.title = propValue?.trim()
|
||||
break
|
||||
case 'description':
|
||||
case 'og:description':
|
||||
case 'twitter:description':
|
||||
res.description = propValue?.trim()
|
||||
break
|
||||
case 'og:image':
|
||||
case 'twitter:image':
|
||||
res.image = propValue?.trim()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
|
|
@ -39,7 +39,9 @@ export const FeedItem = observer(function ({
|
|||
const itemTitle = `Post by ${item.post.author.handle}`
|
||||
const authorHref = `/profile/${item.post.author.handle}`
|
||||
const replyAuthorDid = useMemo(() => {
|
||||
if (!record?.reply) return ''
|
||||
if (!record?.reply) {
|
||||
return ''
|
||||
}
|
||||
const urip = new AtUri(record.reply.parent?.uri || record.reply.root.uri)
|
||||
return urip.hostname
|
||||
}, [record?.reply])
|
||||
|
@ -196,7 +198,9 @@ export const FeedItem = observer(function ({
|
|||
) : (
|
||||
<View style={{height: 5}} />
|
||||
)}
|
||||
<PostEmbeds embed={item.post.embed} style={styles.embed} />
|
||||
{item.post.embed ? (
|
||||
<PostEmbeds embed={item.post.embed} style={styles.embed} />
|
||||
) : null}
|
||||
<PostCtrls
|
||||
style={styles.ctrls}
|
||||
itemHref={itemHref}
|
||||
|
|
Loading…
Reference in New Issue