Fixes youtube embed issues (#50)

* fixes youtube embed

* move extractMetaHtml test to its own file

* tests cleanup

* Add fallback for youtube meta data

* lint

* Check for youtube in the url domain

* use hostname instead of full url to check for link domain

* checks only for domain
This commit is contained in:
Aryan Goharzad 2023-01-19 13:53:11 -05:00 committed by GitHub
parent 9230d52ff5
commit f10a8308d9
12 changed files with 245 additions and 149 deletions

View file

@ -0,0 +1,65 @@
import {extractYoutubeMeta} from './extractYoutubeMeta'
interface ExtractHtmlMetaInput {
html: string
hostname?: string
}
export const extractHtmlMeta = ({
html,
hostname,
}: ExtractHtmlMetaInput): Record<string, string> => {
const htmlTitleRegex = /<title>([^<]+)<\/title>/i
let res: Record<string, string> = {}
const match = htmlTitleRegex.exec(html)
if (match) {
res.title = match[1].trim()
}
let metaMatch
let propMatch
const metaRe = /<meta[\s]([^>]+)>/gis
while ((metaMatch = metaRe.exec(html))) {
let propName
let propValue
const propRe = /(name|property|content)="([^"]+)"/gis
while ((propMatch = propRe.exec(metaMatch[1]))) {
if (propMatch[1] === 'content') {
propValue = propMatch[2]
} else {
propName = propMatch[2]
}
}
if (!propName || !propValue) {
continue
}
switch (propName?.trim()) {
case 'title':
case 'og:title':
case 'twitter:title':
res.title = propValue?.trim()
break
case 'description':
case 'og:description':
case 'twitter:description':
res.description = propValue?.trim()
break
case 'og:image':
case 'twitter:image':
res.image = propValue?.trim()
break
}
}
const isYoutubeUrl =
hostname?.includes('youtube.') || hostname?.includes('youtu.be')
if (isYoutubeUrl) {
// Workaround for Youtube not having a title in the meta tags
res = {...res, ...extractYoutubeMeta(html)}
}
return res
}

View file

@ -0,0 +1,26 @@
export const extractYoutubeMeta = (html: string): Record<string, string> => {
const res: Record<string, string> = {}
const youtubeTitleRegex = /"videoDetails":.*"title":"([^"]*)"/i
const youtubeDescriptionRegex =
/"videoDetails":.*"shortDescription":"([^"]*)"/i
const youtubeThumbnailRegex = /"videoDetails":.*"url":"(.*)(default\.jpg)/i
const youtubeTitleMatch = youtubeTitleRegex.exec(html)
const youtubeDescriptionMatch = youtubeDescriptionRegex.exec(html)
const youtubeThumbnailMatch = youtubeThumbnailRegex.exec(html)
if (youtubeTitleMatch && youtubeTitleMatch.length >= 1) {
res.title = decodeURI(youtubeTitleMatch[1])
}
if (youtubeDescriptionMatch && youtubeDescriptionMatch.length >= 1) {
res.description = decodeURI(youtubeDescriptionMatch[1]).replace(
/\\n/g,
'\n',
)
}
if (youtubeThumbnailMatch && youtubeThumbnailMatch.length >= 2) {
res.image = youtubeThumbnailMatch[1] + 'default.jpg'
}
return res
}

View file

@ -1,7 +1,8 @@
import he from 'he'
import {extractHtmlMeta, isBskyAppUrl} from './strings'
import {isBskyAppUrl} from './strings'
import {RootStoreModel} from '../state'
import {extractBskyMeta} from './extractBskyMeta'
import {extractHtmlMeta} from './extractHtmlMeta'
export enum LikelyType {
HTML,
@ -59,7 +60,10 @@ export async function getLinkMeta(
})
const httpResBody = await httpRes.text()
clearTimeout(to)
const httpResMeta = extractHtmlMeta(httpResBody)
const httpResMeta = extractHtmlMeta({
html: httpResBody,
hostname: urlp?.hostname,
})
meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined
meta.description = httpResMeta.description
? he.decode(httpResMeta.description)

View file

@ -265,54 +265,3 @@ export function convertBskyAppUrlIfNeeded(url: string): string {
}
return url
}
const htmlTitleRegex = /<title>([^<]+)<\/title>/i
export function extractHtmlMeta(html: string): Record<string, string> {
const res: Record<string, string> = {}
{
const match = htmlTitleRegex.exec(html)
if (match) {
res.title = match[1].trim()
}
}
{
let metaMatch
let propMatch
const metaRe = /<meta[\s]([^>]+)>/gis
while ((metaMatch = metaRe.exec(html))) {
let propName
let propValue
const propRe = /(name|property|content)="([^"]+)"/gis
while ((propMatch = propRe.exec(metaMatch[1]))) {
if (propMatch[1] === 'content') {
propValue = propMatch[2]
} else {
propName = propMatch[2]
}
}
if (!propName || !propValue) {
continue
}
switch (propName?.trim()) {
case 'title':
case 'og:title':
case 'twitter:title':
res.title = propValue?.trim()
break
case 'description':
case 'og:description':
case 'twitter:description':
res.description = propValue?.trim()
break
case 'og:image':
case 'twitter:image':
res.image = propValue?.trim()
break
}
}
}
return res
}