Fixes youtube embed issues (#50)
* fixes youtube embed * move extractMetaHtml test to its own file * tests cleanup * Add fallback for youtube meta data * lint * Check for youtube in the url domain * use hostname instead of full url to check for link domain * checks only for domain
This commit is contained in:
parent
9230d52ff5
commit
f10a8308d9
12 changed files with 245 additions and 149 deletions
65
src/lib/extractHtmlMeta.ts
Normal file
65
src/lib/extractHtmlMeta.ts
Normal file
|
@ -0,0 +1,65 @@
|
|||
import {extractYoutubeMeta} from './extractYoutubeMeta'
|
||||
|
||||
interface ExtractHtmlMetaInput {
|
||||
html: string
|
||||
hostname?: string
|
||||
}
|
||||
|
||||
export const extractHtmlMeta = ({
|
||||
html,
|
||||
hostname,
|
||||
}: ExtractHtmlMetaInput): Record<string, string> => {
|
||||
const htmlTitleRegex = /<title>([^<]+)<\/title>/i
|
||||
|
||||
let res: Record<string, string> = {}
|
||||
|
||||
const match = htmlTitleRegex.exec(html)
|
||||
|
||||
if (match) {
|
||||
res.title = match[1].trim()
|
||||
}
|
||||
|
||||
let metaMatch
|
||||
let propMatch
|
||||
const metaRe = /<meta[\s]([^>]+)>/gis
|
||||
while ((metaMatch = metaRe.exec(html))) {
|
||||
let propName
|
||||
let propValue
|
||||
const propRe = /(name|property|content)="([^"]+)"/gis
|
||||
while ((propMatch = propRe.exec(metaMatch[1]))) {
|
||||
if (propMatch[1] === 'content') {
|
||||
propValue = propMatch[2]
|
||||
} else {
|
||||
propName = propMatch[2]
|
||||
}
|
||||
}
|
||||
if (!propName || !propValue) {
|
||||
continue
|
||||
}
|
||||
switch (propName?.trim()) {
|
||||
case 'title':
|
||||
case 'og:title':
|
||||
case 'twitter:title':
|
||||
res.title = propValue?.trim()
|
||||
break
|
||||
case 'description':
|
||||
case 'og:description':
|
||||
case 'twitter:description':
|
||||
res.description = propValue?.trim()
|
||||
break
|
||||
case 'og:image':
|
||||
case 'twitter:image':
|
||||
res.image = propValue?.trim()
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const isYoutubeUrl =
|
||||
hostname?.includes('youtube.') || hostname?.includes('youtu.be')
|
||||
if (isYoutubeUrl) {
|
||||
// Workaround for Youtube not having a title in the meta tags
|
||||
res = {...res, ...extractYoutubeMeta(html)}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
26
src/lib/extractYoutubeMeta.ts
Normal file
26
src/lib/extractYoutubeMeta.ts
Normal file
|
@ -0,0 +1,26 @@
|
|||
export const extractYoutubeMeta = (html: string): Record<string, string> => {
|
||||
const res: Record<string, string> = {}
|
||||
const youtubeTitleRegex = /"videoDetails":.*"title":"([^"]*)"/i
|
||||
const youtubeDescriptionRegex =
|
||||
/"videoDetails":.*"shortDescription":"([^"]*)"/i
|
||||
const youtubeThumbnailRegex = /"videoDetails":.*"url":"(.*)(default\.jpg)/i
|
||||
|
||||
const youtubeTitleMatch = youtubeTitleRegex.exec(html)
|
||||
const youtubeDescriptionMatch = youtubeDescriptionRegex.exec(html)
|
||||
const youtubeThumbnailMatch = youtubeThumbnailRegex.exec(html)
|
||||
|
||||
if (youtubeTitleMatch && youtubeTitleMatch.length >= 1) {
|
||||
res.title = decodeURI(youtubeTitleMatch[1])
|
||||
}
|
||||
if (youtubeDescriptionMatch && youtubeDescriptionMatch.length >= 1) {
|
||||
res.description = decodeURI(youtubeDescriptionMatch[1]).replace(
|
||||
/\\n/g,
|
||||
'\n',
|
||||
)
|
||||
}
|
||||
if (youtubeThumbnailMatch && youtubeThumbnailMatch.length >= 2) {
|
||||
res.image = youtubeThumbnailMatch[1] + 'default.jpg'
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
|
@ -1,7 +1,8 @@
|
|||
import he from 'he'
|
||||
import {extractHtmlMeta, isBskyAppUrl} from './strings'
|
||||
import {isBskyAppUrl} from './strings'
|
||||
import {RootStoreModel} from '../state'
|
||||
import {extractBskyMeta} from './extractBskyMeta'
|
||||
import {extractHtmlMeta} from './extractHtmlMeta'
|
||||
|
||||
export enum LikelyType {
|
||||
HTML,
|
||||
|
@ -59,7 +60,10 @@ export async function getLinkMeta(
|
|||
})
|
||||
const httpResBody = await httpRes.text()
|
||||
clearTimeout(to)
|
||||
const httpResMeta = extractHtmlMeta(httpResBody)
|
||||
const httpResMeta = extractHtmlMeta({
|
||||
html: httpResBody,
|
||||
hostname: urlp?.hostname,
|
||||
})
|
||||
meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined
|
||||
meta.description = httpResMeta.description
|
||||
? he.decode(httpResMeta.description)
|
||||
|
|
|
@ -265,54 +265,3 @@ export function convertBskyAppUrlIfNeeded(url: string): string {
|
|||
}
|
||||
return url
|
||||
}
|
||||
|
||||
const htmlTitleRegex = /<title>([^<]+)<\/title>/i
|
||||
export function extractHtmlMeta(html: string): Record<string, string> {
|
||||
const res: Record<string, string> = {}
|
||||
|
||||
{
|
||||
const match = htmlTitleRegex.exec(html)
|
||||
if (match) {
|
||||
res.title = match[1].trim()
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let metaMatch
|
||||
let propMatch
|
||||
const metaRe = /<meta[\s]([^>]+)>/gis
|
||||
while ((metaMatch = metaRe.exec(html))) {
|
||||
let propName
|
||||
let propValue
|
||||
const propRe = /(name|property|content)="([^"]+)"/gis
|
||||
while ((propMatch = propRe.exec(metaMatch[1]))) {
|
||||
if (propMatch[1] === 'content') {
|
||||
propValue = propMatch[2]
|
||||
} else {
|
||||
propName = propMatch[2]
|
||||
}
|
||||
}
|
||||
if (!propName || !propValue) {
|
||||
continue
|
||||
}
|
||||
switch (propName?.trim()) {
|
||||
case 'title':
|
||||
case 'og:title':
|
||||
case 'twitter:title':
|
||||
res.title = propValue?.trim()
|
||||
break
|
||||
case 'description':
|
||||
case 'og:description':
|
||||
case 'twitter:description':
|
||||
res.description = propValue?.trim()
|
||||
break
|
||||
case 'og:image':
|
||||
case 'twitter:image':
|
||||
res.image = propValue?.trim()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue