Fixes embed links for twitter and tiktok (#74)

2023-01-20 13:54:30 -05:00 · 2023-01-20 13:54:30 -05:00 · 2fce1637b4
commit 2fce1637b4
parent d4b9ef3b0a
5 changed files with 79 additions and 2 deletions
--- a/tests/lib/mocks/tiktokHtml.ts
+++ b/tests/lib/mocks/tiktokHtml.ts
--- a/tests/lib/extractHtmlMeta.test.ts
+++ b/tests/lib/extractHtmlMeta.test.ts
@ -1,6 +1,7 @@
 import {extractHtmlMeta} from '../../src/lib/extractHtmlMeta'
 import {exampleComHtml} from './__mocks__/exampleComHtml'
 import {youtubeHTML} from './__mocks__/youtubeHtml'
+import {tiktokHtml} from './__mocks__/tiktokHtml'

 describe('extractHtmlMeta', () => {
  const cases = [
@ -56,6 +57,18 @@ describe('extractHtmlMeta', () => {
    expect(output).toEqual(expectedOutput)
  })

+  it('extracts title and description from a Tiktok HTML page', () => {
+    const input = tiktokHtml
+    const expectedOutput = {
+      title:
+        'Coca-Cola and Mentos! Super Reaction! #cocacola #mentos #reaction #bal... | TikTok',
+      description:
+        '5.5M Likes, 20.8K Comments. TikTok video from Power Vision Tests (@_powervision_): &quot;Coca-Cola and Mentos! Super Reaction! #cocacola #mentos #reaction #balloon #sciencemoment #scienceexperiment #experiment #test #amazing #pvexp&quot;.  оригинальный звук - Power Vision Tests.',
+    }
+    const output = extractHtmlMeta({html: input, hostname: 'tiktok.com'})
+    expect(output).toEqual(expectedOutput)
+  })
+
  it('extracts title and description from a generic youtube page', () => {
    const input = youtubeHTML
    const expectedOutput = {
@ -67,4 +80,37 @@ describe('extractHtmlMeta', () => {
    const output = extractHtmlMeta({html: input, hostname: 'youtube.com'})
    expect(output).toEqual(expectedOutput)
  })
+
+  it('extracts username from the url a twitter profile page', () => {
+    const expectedOutput = {
+      title: '@bluesky on Twitter',
+    }
+    const output = extractHtmlMeta({
+      hostname: 'twitter.com',
+      pathname: '/bluesky',
+    })
+    expect(output).toEqual(expectedOutput)
+  })
+
+  it('extracts username from the url a tweet', () => {
+    const expectedOutput = {
+      title: 'Tweet by @bluesky',
+    }
+    const output = extractHtmlMeta({
+      hostname: 'twitter.com',
+      pathname: '/bluesky/status/1582437529969917953',
+    })
+    expect(output).toEqual(expectedOutput)
+  })
+
+  it("does not extract username from the url when it's not a tweet or profile page", () => {
+    const expectedOutput = {
+      title: 'Twitter',
+    }
+    const output = extractHtmlMeta({
+      hostname: 'twitter.com',
+      pathname: '/i/articles/follows/-1675653703?time_window=24',
+    })
+    expect(output).toEqual(expectedOutput)
+  })
 })
--- a/src/lib/extractHtmlMeta.ts
+++ b/src/lib/extractHtmlMeta.ts
@ -1,15 +1,18 @@
+import {extractTwitterMeta} from './extractTwitterMeta'
 import {extractYoutubeMeta} from './extractYoutubeMeta'

 interface ExtractHtmlMetaInput {
  html: string
  hostname?: string
+  pathname?: string
 }

 export const extractHtmlMeta = ({
  html,
  hostname,
+  pathname,
 }: ExtractHtmlMetaInput): Record<string, string> => {
-  const htmlTitleRegex = /<title>([^<]+)<\/title>/i
+  const htmlTitleRegex = /<title.*>([^<]+)<\/title>/i

  let res: Record<string, string> = {}

@ -56,9 +59,12 @@ export const extractHtmlMeta = ({

  const isYoutubeUrl =
    hostname?.includes('youtube.') || hostname?.includes('youtu.be')
+  const isTwitterUrl = hostname?.includes('twitter.')
+  // Workaround for some websites not having a title or description in the meta tags in the initial serve
  if (isYoutubeUrl) {
-    // Workaround for Youtube not having a title in the meta tags
    res = {...res, ...extractYoutubeMeta(html)}
+  } else if (isTwitterUrl) {
+    res = {...extractTwitterMeta({pathname})}
  }

  return res
--- a/src/lib/extractTwitterMeta.ts
+++ b/src/lib/extractTwitterMeta.ts
@ -0,0 +1,20 @@
+export const extractTwitterMeta = ({
+  pathname,
+}: {
+  pathname: string
+}): Record<string, string> => {
+  const res = {title: 'Twitter'}
+  const parsedPathname = pathname.split('/')
+  if (parsedPathname.length <= 1 || parsedPathname[1].length <= 1) {
+    // Excluding one letter usernames as they're reserved by twitter for things like cases like twitter.com/i/articles/follows/-1675653703
+    return res
+  }
+  const username = parsedPathname?.[1]
+  const isUserProfile = parsedPathname?.length === 2
+
+  res.title = isUserProfile
+    ? `@${username} on Twitter`
+    : `Tweet by @${username}`
+
+  return res
+}
--- a/src/lib/link-meta.ts
+++ b/src/lib/link-meta.ts
@ -63,6 +63,7 @@ export async function getLinkMeta(
    const httpResMeta = extractHtmlMeta({
      html: httpResBody,
      hostname: urlp?.hostname,
+      pathname: urlp?.pathname,
    })
    meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined
    meta.description = httpResMeta.description