From 2fce1637b4ae01667da8ceafaa07a6266ab88450 Mon Sep 17 00:00:00 2001 From: Aryan Goharzad Date: Fri, 20 Jan 2023 13:54:30 -0500 Subject: [PATCH] Fixes embed links for twitter and tiktok (#74) --- __tests__/lib/__mocks__/tiktokHtml.ts | 4 ++ ...taHtml.test.ts => extractHtmlMeta.test.ts} | 46 +++++++++++++++++++ src/lib/extractHtmlMeta.ts | 10 +++- src/lib/extractTwitterMeta.ts | 20 ++++++++ src/lib/link-meta.ts | 1 + 5 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 __tests__/lib/__mocks__/tiktokHtml.ts rename __tests__/lib/{extractMetaHtml.test.ts => extractHtmlMeta.test.ts} (60%) create mode 100644 src/lib/extractTwitterMeta.ts diff --git a/__tests__/lib/__mocks__/tiktokHtml.ts b/__tests__/lib/__mocks__/tiktokHtml.ts new file mode 100644 index 00000000..fa3d1128 --- /dev/null +++ b/__tests__/lib/__mocks__/tiktokHtml.ts @@ -0,0 +1,4 @@ +export const tiktokHtml = ` +Coca-Cola and Mentos! Super Reaction! #cocacola #mentos #reaction #bal... | TikTok
Upload

For You

Log in to follow creators, like videos, and view comments.

Suggested accounts

© 2023 TikTok
Coca-Cola and Mentos! Super Reaction! #cocacola #mentos #reaction #balloon #sciencemoment #scienceexperiment #experiment #test #amazing #pvexp
00:00/00:00
Coca-Cola and Mentos! Super Reaction! #cocacola #mentos #reaction #balloon #sciencemoment #scienceexperiment #experiment #test #amazing #pvexp
_powervision_
Power Vision Tests · 2019-10-19

Related videos

Get TikTok App
+` diff --git a/__tests__/lib/extractMetaHtml.test.ts b/__tests__/lib/extractHtmlMeta.test.ts similarity index 60% rename from __tests__/lib/extractMetaHtml.test.ts rename to __tests__/lib/extractHtmlMeta.test.ts index 10020f3a..c3308407 100644 --- a/__tests__/lib/extractMetaHtml.test.ts +++ b/__tests__/lib/extractHtmlMeta.test.ts @@ -1,6 +1,7 @@ import {extractHtmlMeta} from '../../src/lib/extractHtmlMeta' import {exampleComHtml} from './__mocks__/exampleComHtml' import {youtubeHTML} from './__mocks__/youtubeHtml' +import {tiktokHtml} from './__mocks__/tiktokHtml' describe('extractHtmlMeta', () => { const cases = [ @@ -56,6 +57,18 @@ describe('extractHtmlMeta', () => { expect(output).toEqual(expectedOutput) }) + it('extracts title and description from a Tiktok HTML page', () => { + const input = tiktokHtml + const expectedOutput = { + title: + 'Coca-Cola and Mentos! Super Reaction! #cocacola #mentos #reaction #bal... | TikTok', + description: + '5.5M Likes, 20.8K Comments. TikTok video from Power Vision Tests (@_powervision_): "Coca-Cola and Mentos! Super Reaction! #cocacola #mentos #reaction #balloon #sciencemoment #scienceexperiment #experiment #test #amazing #pvexp". оригинальный звук - Power Vision Tests.', + } + const output = extractHtmlMeta({html: input, hostname: 'tiktok.com'}) + expect(output).toEqual(expectedOutput) + }) + it('extracts title and description from a generic youtube page', () => { const input = youtubeHTML const expectedOutput = { @@ -67,4 +80,37 @@ describe('extractHtmlMeta', () => { const output = extractHtmlMeta({html: input, hostname: 'youtube.com'}) expect(output).toEqual(expectedOutput) }) + + it('extracts username from the url a twitter profile page', () => { + const expectedOutput = { + title: '@bluesky on Twitter', + } + const output = extractHtmlMeta({ + hostname: 'twitter.com', + pathname: '/bluesky', + }) + expect(output).toEqual(expectedOutput) + }) + + it('extracts username from the url a tweet', () => { + const expectedOutput = { + title: 'Tweet by @bluesky', + } + const output = extractHtmlMeta({ + hostname: 'twitter.com', + pathname: '/bluesky/status/1582437529969917953', + }) + expect(output).toEqual(expectedOutput) + }) + + it("does not extract username from the url when it's not a tweet or profile page", () => { + const expectedOutput = { + title: 'Twitter', + } + const output = extractHtmlMeta({ + hostname: 'twitter.com', + pathname: '/i/articles/follows/-1675653703?time_window=24', + }) + expect(output).toEqual(expectedOutput) + }) }) diff --git a/src/lib/extractHtmlMeta.ts b/src/lib/extractHtmlMeta.ts index 2517262b..038ca81c 100644 --- a/src/lib/extractHtmlMeta.ts +++ b/src/lib/extractHtmlMeta.ts @@ -1,15 +1,18 @@ +import {extractTwitterMeta} from './extractTwitterMeta' import {extractYoutubeMeta} from './extractYoutubeMeta' interface ExtractHtmlMetaInput { html: string hostname?: string + pathname?: string } export const extractHtmlMeta = ({ html, hostname, + pathname, }: ExtractHtmlMetaInput): Record => { - const htmlTitleRegex = /([^<]+)<\/title>/i + const htmlTitleRegex = /<title.*>([^<]+)<\/title>/i let res: Record<string, string> = {} @@ -56,9 +59,12 @@ export const extractHtmlMeta = ({ const isYoutubeUrl = hostname?.includes('youtube.') || hostname?.includes('youtu.be') + const isTwitterUrl = hostname?.includes('twitter.') + // Workaround for some websites not having a title or description in the meta tags in the initial serve if (isYoutubeUrl) { - // Workaround for Youtube not having a title in the meta tags res = {...res, ...extractYoutubeMeta(html)} + } else if (isTwitterUrl) { + res = {...extractTwitterMeta({pathname})} } return res diff --git a/src/lib/extractTwitterMeta.ts b/src/lib/extractTwitterMeta.ts new file mode 100644 index 00000000..d785903c --- /dev/null +++ b/src/lib/extractTwitterMeta.ts @@ -0,0 +1,20 @@ +export const extractTwitterMeta = ({ + pathname, +}: { + pathname: string +}): Record<string, string> => { + const res = {title: 'Twitter'} + const parsedPathname = pathname.split('/') + if (parsedPathname.length <= 1 || parsedPathname[1].length <= 1) { + // Excluding one letter usernames as they're reserved by twitter for things like cases like twitter.com/i/articles/follows/-1675653703 + return res + } + const username = parsedPathname?.[1] + const isUserProfile = parsedPathname?.length === 2 + + res.title = isUserProfile + ? `@${username} on Twitter` + : `Tweet by @${username}` + + return res +} diff --git a/src/lib/link-meta.ts b/src/lib/link-meta.ts index 7e0964c1..2826e969 100644 --- a/src/lib/link-meta.ts +++ b/src/lib/link-meta.ts @@ -63,6 +63,7 @@ export async function getLinkMeta( const httpResMeta = extractHtmlMeta({ html: httpResBody, hostname: urlp?.hostname, + pathname: urlp?.pathname, }) meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined meta.description = httpResMeta.description