Fixes youtube embed issues (#50)

* fixes youtube embed * move extractMetaHtml test to its own file * tests cleanup * Add fallback for youtube meta data * lint * Check for youtube in the url domain * use hostname instead of full url to check for link domain * checks only for domain
2023-01-19 13:53:11 -05:00 · 2023-01-19 13:53:11 -05:00 · f10a8308d9
commit f10a8308d9
parent 9230d52ff5
12 changed files with 245 additions and 149 deletions
--- a/tests/lib/mocks/exampleComHtml.ts
+++ b/tests/lib/mocks/exampleComHtml.ts
@ -0,0 +1,47 @@
 export const exampleComHtml = `<!doctype html>
 <html>
 <head>
    <title>Example Domain</title>
    <meta name="description" content="An example website">
    <meta charset="utf-8" />
    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />
    <style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>
 </head>
 <body>
 <div>
    <h1>Example Domain</h1>
    <p>This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.</p>
    <p><a href="https://www.iana.org/domains/example">More information...</a></p>
 </div>
 </body>
 </html>`
--- a/tests/lib/mocks/youtubeHtml.ts
+++ b/tests/lib/mocks/youtubeHtml.ts
--- a/tests/lib/extractMetaHtml.test.ts
+++ b/tests/lib/extractMetaHtml.test.ts
@ -0,0 +1,70 @@
 import {extractHtmlMeta} from '../../src/lib/extractHtmlMeta'
 import {exampleComHtml} from './__mocks__/exampleComHtml'
 import {youtubeHTML} from './__mocks__/youtubeHtml'
 describe('extractHtmlMeta', () => {
  const cases = [
    ['', {}],
    ['nothing', {}],
    ['<title>title</title>', {title: 'title'}],
    ['<title> aSd!@#AC </title>', {title: 'aSd!@#AC'}],
    ['<title>\n  title\n  </title>', {title: 'title'}],
    ['<meta name="title" content="meta title">', {title: 'meta title'}],
    [
      '<meta name="description" content="meta description">',
      {description: 'meta description'},
    ],
    ['<meta property="og:title" content="og title">', {title: 'og title'}],
    [
      '<meta property="og:description" content="og description">',
      {description: 'og description'},
    ],
    [
      '<meta property="og:image" content="https://ogimage.com/foo.png">',
      {image: 'https://ogimage.com/foo.png'},
    ],
    [
      '<meta property="twitter:title" content="twitter title">',
      {title: 'twitter title'},
    ],
    [
      '<meta property="twitter:description" content="twitter description">',
      {description: 'twitter description'},
    ],
    [
      '<meta property="twitter:image" content="https://twitterimage.com/foo.png">',
      {image: 'https://twitterimage.com/foo.png'},
    ],
    ['<meta\n  name="title"\n  content="meta title"\n>', {title: 'meta title'}],
  ]
  it.each(cases)(
    'given the html tag %p, returns %p',
    (input, expectedResult) => {
      const output = extractHtmlMeta(input)
      expect(output).toEqual(expectedResult)
    },
  )
  it('extracts title and description from a generic HTML page', () => {
    const input = exampleComHtml
    const expectedOutput = {
      title: 'Example Domain',
      description: 'An example website',
    }
    const output = extractHtmlMeta(input)
    expect(output).toEqual(expectedOutput)
  })
  it('extracts title and description from a generic youtube page', () => {
    const input = youtubeHTML
    const expectedOutput = {
      title: 'HD Video (1080p) with Relaxing Music of Native American Shamans',
      description:
        'Stunning HD Video ( 1080p ) of Patagonian Nature with Relaxing Native American Shamanic Music. HD footage used from ',
      image: 'https://i.ytimg.com/vi/x6UITRjhijI/sddefault.jpg',
    }
    const output = extractHtmlMeta(input)
    expect(output).toEqual(expectedOutput)
  })
 })
--- a/tests/lib/link-meta.test.ts
+++ b/tests/lib/link-meta.test.ts
@ -1,54 +1,7 @@
 import {LikelyType, getLinkMeta, getLikelyType} from '../../src/lib/link-meta'
 import {exampleComHtml} from './__mocks__/exampleComHtml'
 import {mockedRootStore} from '../../__mocks__/state-mock'
 const exampleComHtml = `<!doctype html>
 <html>
 <head>
    <title>Example Domain</title>
    <meta name="description" content="An example website">
    <meta charset="utf-8" />
    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />
    <style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>
 </head>
 <body>
 <div>
    <h1>Example Domain</h1>
    <p>This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.</p>
    <p><a href="https://www.iana.org/domains/example">More information...</a></p>
 </div>
 </body>
 </html>`
 describe('getLinkMeta', () => {
  const inputs = [
    '',
--- a/tests/lib/string.test.ts
+++ b/tests/lib/string.test.ts
@ -1,7 +1,6 @@
 import {
  extractEntities,
  detectLinkables,
  extractHtmlMeta,
  pluralize,
  makeRecordUri,
  ago,
@ -286,48 +285,6 @@ describe('detectLinkables', () => {
  })
 })
 describe('extractHtmlMeta', () => {
  const inputs = [
    '',
    'nothing',
    '<title>title</title>',
    '<title> aSd!@#AC </title>',
    '<title>\n  title\n  </title>',
    '<meta name="title" content="meta title">',
    '<meta name="description" content="meta description">',
    '<meta property="og:title" content="og title">',
    '<meta property="og:description" content="og description">',
    '<meta property="og:image" content="https://ogimage.com/foo.png">',
    '<meta property="twitter:title" content="twitter title">',
    '<meta property="twitter:description" content="twitter description">',
    '<meta property="twitter:image" content="https://twitterimage.com/foo.png">',
    '<meta\n  name="title"\n  content="meta title"\n>',
  ]
  const outputs = [
    {},
    {},
    {title: 'title'},
    {title: 'aSd!@#AC'},
    {title: 'title'},
    {title: 'meta title'},
    {description: 'meta description'},
    {title: 'og title'},
    {description: 'og description'},
    {image: 'https://ogimage.com/foo.png'},
    {title: 'twitter title'},
    {description: 'twitter description'},
    {image: 'https://twitterimage.com/foo.png'},
    {title: 'meta title'},
  ]
  it('correctly handles a set of text inputs', () => {
    for (let i = 0; i < inputs.length; i++) {
      const input = inputs[i]
      const output = extractHtmlMeta(input)
      expect(output).toEqual(outputs[i])
    }
  })
 })
 describe('pluralize', () => {
  const inputs: [number, string, string?][] = [
    [1, 'follower'],
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@ -565,13 +565,13 @@ EXTERNAL SOURCES:
    :path: "../node_modules/react-native/ReactCommon/yoga"
 SPEC CHECKSUMS:
-  boost: a7c83b31436843459a1961bfd74b96033dc77234
+  boost: 57d2868c099736d80fcd648bf211b4431e51a558
  BVLinearGradient: 34a999fda29036898a09c6a6b728b0b4189e1a44
-  DoubleConversion: 831926d9b8bf8166fd87886c4abab286c2422662
+  DoubleConversion: 5189b271737e1565bdce30deb4a08d647e3f5f54
  FBLazyVector: 61839cba7a48c570b7ac3e1cd8a4d0948382202f
  FBReactNativeSpec: 5a14398ccf5e27c1ca2d7109eb920594ce93c10d
  fmt: ff9d55029c625d3757ed641535fd4a75fedc7ce9
-  glog: 476ee3e89abb49e07f822b48323c51c57124b572
+  glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b
  hermes-engine: f6e715aa6c8bd38de6c13bc85e07b0a337edaa89
  libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913
  RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1
--- a/package.json
+++ b/package.json
@ -9,6 +9,7 @@
    "start": "react-native start",
    "clean-cache": "rm -rf node_modules/.cache/babel-loader/*",
    "test": "jest",
    "test-watch": "jest --watchAll",
    "test-ci": "jest --ci --forceExit --reporters=default --reporters=jest-junit",
    "test-coverage": "jest --coverage",
    "lint": "eslint . --ext .js,.jsx,.ts,.tsx"
@ -114,6 +115,7 @@
    "transformIgnorePatterns": [
      "node_modules/(?!(jest-)?react-native|react-clone-referenced-element|@react-native-community|rollbar-react-native|@fortawesome|@react-native|@react-navigation)"
    ],
    "modulePathIgnorePatterns": ["__tests__\/.*\/__mocks__"],
    "coveragePathIgnorePatterns": [
      "<rootDir>/node_modules/",
      "<rootDir>/src/platform",
--- a/src/lib/extractHtmlMeta.ts
+++ b/src/lib/extractHtmlMeta.ts
@ -0,0 +1,65 @@
 import {extractYoutubeMeta} from './extractYoutubeMeta'
 interface ExtractHtmlMetaInput {
  html: string
  hostname?: string
 }
 export const extractHtmlMeta = ({
  html,
  hostname,
 }: ExtractHtmlMetaInput): Record<string, string> => {
  const htmlTitleRegex = /<title>([^<]+)<\/title>/i
  let res: Record<string, string> = {}
  const match = htmlTitleRegex.exec(html)
  if (match) {
    res.title = match[1].trim()
  }
  let metaMatch
  let propMatch
  const metaRe = /<meta[\s]([^>]+)>/gis
  while ((metaMatch = metaRe.exec(html))) {
    let propName
    let propValue
    const propRe = /(name|property|content)="([^"]+)"/gis
    while ((propMatch = propRe.exec(metaMatch[1]))) {
      if (propMatch[1] === 'content') {
        propValue = propMatch[2]
      } else {
        propName = propMatch[2]
      }
    }
    if (!propName || !propValue) {
      continue
    }
    switch (propName?.trim()) {
      case 'title':
      case 'og:title':
      case 'twitter:title':
        res.title = propValue?.trim()
        break
      case 'description':
      case 'og:description':
      case 'twitter:description':
        res.description = propValue?.trim()
        break
      case 'og:image':
      case 'twitter:image':
        res.image = propValue?.trim()
        break
    }
  }
  const isYoutubeUrl =
    hostname?.includes('youtube.') || hostname?.includes('youtu.be')
  if (isYoutubeUrl) {
    // Workaround for Youtube not having a title in the meta tags
    res = {...res, ...extractYoutubeMeta(html)}
  }
  return res
 }
--- a/src/lib/extractYoutubeMeta.ts
+++ b/src/lib/extractYoutubeMeta.ts
@ -0,0 +1,26 @@
 export const extractYoutubeMeta = (html: string): Record<string, string> => {
  const res: Record<string, string> = {}
  const youtubeTitleRegex = /"videoDetails":.*"title":"([^"]*)"/i
  const youtubeDescriptionRegex =
    /"videoDetails":.*"shortDescription":"([^"]*)"/i
  const youtubeThumbnailRegex = /"videoDetails":.*"url":"(.*)(default\.jpg)/i
  const youtubeTitleMatch = youtubeTitleRegex.exec(html)
  const youtubeDescriptionMatch = youtubeDescriptionRegex.exec(html)
  const youtubeThumbnailMatch = youtubeThumbnailRegex.exec(html)
  if (youtubeTitleMatch && youtubeTitleMatch.length >= 1) {
    res.title = decodeURI(youtubeTitleMatch[1])
  }
  if (youtubeDescriptionMatch && youtubeDescriptionMatch.length >= 1) {
    res.description = decodeURI(youtubeDescriptionMatch[1]).replace(
      /\\n/g,
      '\n',
    )
  }
  if (youtubeThumbnailMatch && youtubeThumbnailMatch.length >= 2) {
    res.image = youtubeThumbnailMatch[1] + 'default.jpg'
  }
  return res
 }
--- a/src/lib/link-meta.ts
+++ b/src/lib/link-meta.ts
@ -1,7 +1,8 @@
 import he from 'he'
-import {extractHtmlMeta, isBskyAppUrl} from './strings'
+import {isBskyAppUrl} from './strings'
 import {RootStoreModel} from '../state'
 import {extractBskyMeta} from './extractBskyMeta'
 import {extractHtmlMeta} from './extractHtmlMeta'
 export enum LikelyType {
  HTML,
@ -59,7 +60,10 @@ export async function getLinkMeta(
    })
    const httpResBody = await httpRes.text()
    clearTimeout(to)
-    const httpResMeta = extractHtmlMeta(httpResBody)
+    const httpResMeta = extractHtmlMeta({
      html: httpResBody,
      hostname: urlp?.hostname,
    })
    meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined
    meta.description = httpResMeta.description
      ? he.decode(httpResMeta.description)
--- a/src/lib/strings.ts
+++ b/src/lib/strings.ts
@ -265,54 +265,3 @@ export function convertBskyAppUrlIfNeeded(url: string): string {
  }
  return url
 }
 const htmlTitleRegex = /<title>([^<]+)<\/title>/i
 export function extractHtmlMeta(html: string): Record<string, string> {
  const res: Record<string, string> = {}
  {
    const match = htmlTitleRegex.exec(html)
    if (match) {
      res.title = match[1].trim()
    }
  }
  {
    let metaMatch
    let propMatch
    const metaRe = /<meta[\s]([^>]+)>/gis
    while ((metaMatch = metaRe.exec(html))) {
      let propName
      let propValue
      const propRe = /(name|property|content)="([^"]+)"/gis
      while ((propMatch = propRe.exec(metaMatch[1]))) {
        if (propMatch[1] === 'content') {
          propValue = propMatch[2]
        } else {
          propName = propMatch[2]
        }
      }
      if (!propName || !propValue) {
        continue
      }
      switch (propName?.trim()) {
        case 'title':
        case 'og:title':
        case 'twitter:title':
          res.title = propValue?.trim()
          break
        case 'description':
        case 'og:description':
        case 'twitter:description':
          res.description = propValue?.trim()
          break
        case 'og:image':
        case 'twitter:image':
          res.image = propValue?.trim()
          break
      }
    }
  }
  return res
 }
--- a/src/view/com/posts/FeedItem.tsx
+++ b/src/view/com/posts/FeedItem.tsx
@ -39,7 +39,9 @@ export const FeedItem = observer(function ({
  const itemTitle = `Post by ${item.post.author.handle}`
  const authorHref = `/profile/${item.post.author.handle}`
  const replyAuthorDid = useMemo(() => {
-    if (!record?.reply) return ''
+    if (!record?.reply) {
      return ''
    }
    const urip = new AtUri(record.reply.parent?.uri || record.reply.root.uri)
    return urip.hostname
  }, [record?.reply])
@ -196,7 +198,9 @@ export const FeedItem = observer(function ({
            ) : (
              <View style={{height: 5}} />
            )}
            {item.post.embed ? (
              <PostEmbeds embed={item.post.embed} style={styles.embed} />
            ) : null}
            <PostCtrls
              style={styles.ctrls}
              itemHref={itemHref}