Add tags and mute words (#2968)

* Add bare minimum hashtags support (#2804) * Add bare minimum hashtags support As atproto/api already parses hashtags, this is as simple as hooking it up like link segments. This is "bare minimum" because: - Opening hashtag "#foo" is actually just a search for "foo" right now to work around #2491. - There is no integration in the composer. This hasn't stopped people from using hashtags already, and can be added later. - This change itself only had to hook things up - thank you for having already put the hashtag parsing in place. * Remove workaround for hash search not working now that it's fixed * Add RichTextTag and TagMenu * Sketch * Remove hackfix * Some cleanup * Sketch web * Mobile design * Mobile handling of tags search * Web only * Fix navigation woes * Use new callback * Hook it up * Integrate muted tags * Fix dropdown styles * Type error * Use close callback * Fix styles * Cleanup, install latest sdk * Quick muted words screen * Targets * Dir structure * Icons, list view * Move to dialog * Add removal confirmation * Swap copy * Improve checkboxees * Update matching, add tests * Moderate embeds * Create global dialogs concept again to prevent flashing * Add access from moderation screen * Highlight tags on native * Add web highlighting * Add close to web modal * Adjust close color * Rename toggles and adjust logic * Icon update * Load states * Improve regex * Improve regex * Improve regex * Revert link test * Hyphenated words * Improve matching * Enhance * Some tweaks * Muted words modal changes * Handle invalid handles, handle long tags * Remove main regex * Better test * Space/punct check drop to includes * Lowercase post text before comparison * Add better real world test case --------- Co-authored-by: Kisaragi Hiu <mail@kisaragi-hiu.com>
2024-02-26 22:33:48 -06:00 · 2024-02-26 22:33:48 -06:00 · 58aaad704a
commit 58aaad704a
parent c8582924e2
49 changed files with 1983 additions and 39 deletions
--- a/src/lib/tests/moderatePost_wrapped.test.ts
+++ b/src/lib/tests/moderatePost_wrapped.test.ts
@ -0,0 +1,578 @@
+import {describe, it, expect} from '@jest/globals'
+import {RichText} from '@atproto/api'
+
+import {hasMutedWord} from '../moderatePost_wrapped'
+
+describe(`hasMutedWord`, () => {
+  describe(`tags`, () => {
+    it(`match: outline tag`, () => {
+      const rt = new RichText({
+        text: `This is a post #inlineTag`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: 'outlineTag', targets: ['tag']}],
+        rt.text,
+        rt.facets,
+        ['outlineTag'],
+      )
+
+      expect(match).toBe(true)
+    })
+
+    it(`match: inline tag`, () => {
+      const rt = new RichText({
+        text: `This is a post #inlineTag`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: 'inlineTag', targets: ['tag']}],
+        rt.text,
+        rt.facets,
+        ['outlineTag'],
+      )
+
+      expect(match).toBe(true)
+    })
+
+    it(`match: content target matches inline tag`, () => {
+      const rt = new RichText({
+        text: `This is a post #inlineTag`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: 'inlineTag', targets: ['content']}],
+        rt.text,
+        rt.facets,
+        ['outlineTag'],
+      )
+
+      expect(match).toBe(true)
+    })
+
+    it(`no match: only tag targets`, () => {
+      const rt = new RichText({
+        text: `This is a post`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: 'inlineTag', targets: ['tag']}],
+        rt.text,
+        rt.facets,
+        [],
+      )
+
+      expect(match).toBe(false)
+    })
+  })
+
+  describe(`early exits`, () => {
+    it(`match: single character 希`, () => {
+      /**
+       * @see https://bsky.app/profile/mukuuji.bsky.social/post/3klji4fvsdk2c
+       */
+      const rt = new RichText({
+        text: `改善希望です`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: '希', targets: ['content']}],
+        rt.text,
+        rt.facets,
+        [],
+      )
+
+      expect(match).toBe(true)
+    })
+
+    it(`no match: long muted word, short post`, () => {
+      const rt = new RichText({
+        text: `hey`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: 'politics', targets: ['content']}],
+        rt.text,
+        rt.facets,
+        [],
+      )
+
+      expect(match).toBe(false)
+    })
+
+    it(`match: exact text`, () => {
+      const rt = new RichText({
+        text: `javascript`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: 'javascript', targets: ['content']}],
+        rt.text,
+        rt.facets,
+        [],
+      )
+
+      expect(match).toBe(true)
+    })
+  })
+
+  describe(`general content`, () => {
+    it(`match: word within post`, () => {
+      const rt = new RichText({
+        text: `This is a post about javascript`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: 'javascript', targets: ['content']}],
+        rt.text,
+        rt.facets,
+        [],
+      )
+
+      expect(match).toBe(true)
+    })
+
+    it(`no match: partial word`, () => {
+      const rt = new RichText({
+        text: `Use your brain, Eric`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: 'ai', targets: ['content']}],
+        rt.text,
+        rt.facets,
+        [],
+      )
+
+      expect(match).toBe(false)
+    })
+
+    it(`match: multiline`, () => {
+      const rt = new RichText({
+        text: `Use your\n\tbrain, Eric`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: 'brain', targets: ['content']}],
+        rt.text,
+        rt.facets,
+        [],
+      )
+
+      expect(match).toBe(true)
+    })
+
+    it(`match: :)`, () => {
+      const rt = new RichText({
+        text: `So happy :)`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord(
+        [{value: `:)`, targets: ['content']}],
+        rt.text,
+        rt.facets,
+        [],
+      )
+
+      expect(match).toBe(true)
+    })
+  })
+
+  describe(`punctuation semi-fuzzy`, () => {
+    describe(`yay!`, () => {
+      const rt = new RichText({
+        text: `We're federating, yay!`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: yay!`, () => {
+        const match = hasMutedWord(
+          [{value: 'yay!', targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: yay`, () => {
+        const match = hasMutedWord(
+          [{value: 'yay', targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+    })
+
+    describe(`y!ppee!!`, () => {
+      const rt = new RichText({
+        text: `We're federating, y!ppee!!`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: y!ppee`, () => {
+        const match = hasMutedWord(
+          [{value: 'y!ppee', targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      // single exclamation point, source has double
+      it(`no match: y!ppee!`, () => {
+        const match = hasMutedWord(
+          [{value: 'y!ppee!', targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+    })
+
+    describe(`Why so S@assy?`, () => {
+      const rt = new RichText({
+        text: `Why so S@assy?`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: S@assy`, () => {
+        const match = hasMutedWord(
+          [{value: 'S@assy', targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: s@assy`, () => {
+        const match = hasMutedWord(
+          [{value: 's@assy', targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+    })
+
+    describe(`New York Times`, () => {
+      const rt = new RichText({
+        text: `New York Times`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      // case insensitive
+      it(`match: new york times`, () => {
+        const match = hasMutedWord(
+          [{value: 'new york times', targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+    })
+
+    describe(`!command`, () => {
+      const rt = new RichText({
+        text: `Idk maybe a bot !command`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: !command`, () => {
+        const match = hasMutedWord(
+          [{value: `!command`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: command`, () => {
+        const match = hasMutedWord(
+          [{value: `command`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`no match: !command`, () => {
+        const rt = new RichText({
+          text: `Idk maybe a bot command`,
+        })
+        rt.detectFacetsWithoutResolution()
+
+        const match = hasMutedWord(
+          [{value: `!command`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(false)
+      })
+    })
+
+    describe(`e/acc`, () => {
+      const rt = new RichText({
+        text: `I'm e/acc pilled`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: e/acc`, () => {
+        const match = hasMutedWord(
+          [{value: `e/acc`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: acc`, () => {
+        const match = hasMutedWord(
+          [{value: `acc`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+    })
+
+    describe(`super-bad`, () => {
+      const rt = new RichText({
+        text: `I'm super-bad`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: super-bad`, () => {
+        const match = hasMutedWord(
+          [{value: `super-bad`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: super`, () => {
+        const match = hasMutedWord(
+          [{value: `super`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: super bad`, () => {
+        const match = hasMutedWord(
+          [{value: `super bad`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: superbad`, () => {
+        const match = hasMutedWord(
+          [{value: `superbad`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(false)
+      })
+    })
+
+    describe(`idk_what_this_would_be`, () => {
+      const rt = new RichText({
+        text: `Weird post with idk_what_this_would_be`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: idk what this would be`, () => {
+        const match = hasMutedWord(
+          [{value: `idk what this would be`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`no match: idk what this would be for`, () => {
+        // extra word
+        const match = hasMutedWord(
+          [{value: `idk what this would be for`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(false)
+      })
+
+      it(`match: idk`, () => {
+        // extra word
+        const match = hasMutedWord(
+          [{value: `idk`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: idkwhatthiswouldbe`, () => {
+        const match = hasMutedWord(
+          [{value: `idkwhatthiswouldbe`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(false)
+      })
+    })
+
+    describe(`parentheses`, () => {
+      const rt = new RichText({
+        text: `Post with context(iykyk)`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: context(iykyk)`, () => {
+        const match = hasMutedWord(
+          [{value: `context(iykyk)`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: context`, () => {
+        const match = hasMutedWord(
+          [{value: `context`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: iykyk`, () => {
+        const match = hasMutedWord(
+          [{value: `iykyk`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: (iykyk)`, () => {
+        const match = hasMutedWord(
+          [{value: `(iykyk)`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+    })
+
+    describe(`🦋`, () => {
+      const rt = new RichText({
+        text: `Post with 🦋`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: 🦋`, () => {
+        const match = hasMutedWord(
+          [{value: `🦋`, targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+    })
+  })
+
+  describe(`phrases`, () => {
+    describe(`I like turtles, or how I learned to stop worrying and love the internet.`, () => {
+      const rt = new RichText({
+        text: `I like turtles, or how I learned to stop worrying and love the internet.`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: stop worrying`, () => {
+        const match = hasMutedWord(
+          [{value: 'stop worrying', targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: turtles, or how`, () => {
+        const match = hasMutedWord(
+          [{value: 'turtles, or how', targets: ['content']}],
+          rt.text,
+          rt.facets,
+          [],
+        )
+
+        expect(match).toBe(true)
+      })
+    })
+  })
+})
--- a/src/lib/moderatePost_wrapped.ts
+++ b/src/lib/moderatePost_wrapped.ts
@ -2,18 +2,122 @@ import {
  AppBskyEmbedRecord,
  AppBskyEmbedRecordWithMedia,
  moderatePost,
+  AppBskyActorDefs,
+  AppBskyFeedPost,
+  AppBskyRichtextFacet,
+  AppBskyEmbedImages,
 } from '@atproto/api'

 type ModeratePost = typeof moderatePost
 type Options = Parameters<ModeratePost>[1] & {
  hiddenPosts?: string[]
+  mutedWords?: AppBskyActorDefs.MutedWord[]
+}
+
+const REGEX = {
+  LEADING_TRAILING_PUNCTUATION: /(?:^\p{P}+|\p{P}+$)/gu,
+  ESCAPE: /[[\]{}()*+?.\\^$|\s]/g,
+  SEPARATORS: /[\/\-\–\—\(\)\[\]\_]+/g,
+  WORD_BOUNDARY: /[\s\n\t\r\f\v]+?/g,
+}
+
+export function hasMutedWord(
+  mutedWords: AppBskyActorDefs.MutedWord[],
+  text: string,
+  facets?: AppBskyRichtextFacet.Main[],
+  outlineTags?: string[],
+) {
+  const tags = ([] as string[])
+    .concat(outlineTags || [])
+    .concat(
+      facets
+        ?.filter(facet => {
+          return facet.features.find(feature =>
+            AppBskyRichtextFacet.isTag(feature),
+          )
+        })
+        .map(t => t.features[0].tag as string) || [],
+    )
+    .map(t => t.toLowerCase())
+
+  for (const mute of mutedWords) {
+    const mutedWord = mute.value.toLowerCase()
+    const postText = text.toLowerCase()
+
+    // `content` applies to tags as well
+    if (tags.includes(mutedWord)) return true
+    // rest of the checks are for `content` only
+    if (!mute.targets.includes('content')) continue
+    // single character, has to use includes
+    if (mutedWord.length === 1 && postText.includes(mutedWord)) return true
+    // too long
+    if (mutedWord.length > postText.length) continue
+    // exact match
+    if (mutedWord === postText) return true
+    // any muted phrase with space or punctuation
+    if (/(?:\s|\p{P})+?/u.test(mutedWord) && postText.includes(mutedWord))
+      return true
+
+    // check individual character groups
+    const words = postText.split(REGEX.WORD_BOUNDARY)
+    for (const word of words) {
+      if (word === mutedWord) return true
+
+      // compare word without leading/trailing punctuation, but allow internal
+      // punctuation (such as `s@ssy`)
+      const wordTrimmedPunctuation = word.replace(
+        REGEX.LEADING_TRAILING_PUNCTUATION,
+        '',
+      )
+
+      if (mutedWord === wordTrimmedPunctuation) return true
+      if (mutedWord.length > wordTrimmedPunctuation.length) continue
+
+      // handle hyphenated, slash separated words, etc
+      if (REGEX.SEPARATORS.test(wordTrimmedPunctuation)) {
+        // check against full normalized phrase
+        const wordNormalizedSeparators = wordTrimmedPunctuation.replace(
+          REGEX.SEPARATORS,
+          ' ',
+        )
+        const mutedWordNormalizedSeparators = mutedWord.replace(
+          REGEX.SEPARATORS,
+          ' ',
+        )
+        // hyphenated (or other sep) to spaced words
+        if (wordNormalizedSeparators === mutedWordNormalizedSeparators)
+          return true
+
+        /* Disabled for now e.g. `super-cool` to `supercool`
+        const wordNormalizedCompressed = wordNormalizedSeparators.replace(
+          REGEX.WORD_BOUNDARY,
+          '',
+        )
+        const mutedWordNormalizedCompressed =
+          mutedWordNormalizedSeparators.replace(/\s+?/g, '')
+        // hyphenated (or other sep) to non-hyphenated contiguous word
+        if (mutedWordNormalizedCompressed === wordNormalizedCompressed)
+          return true
+        */
+
+        // then individual parts of separated phrases/words
+        const wordParts = wordTrimmedPunctuation.split(REGEX.SEPARATORS)
+        for (const wp of wordParts) {
+          // still retain internal punctuation
+          if (wp === mutedWord) return true
+        }
+      }
+    }
+  }
+
+  return false
 }

 export function moderatePost_wrapped(
  subject: Parameters<ModeratePost>[0],
  opts: Options,
 ) {
-  const {hiddenPosts = [], ...options} = opts
+  const {hiddenPosts = [], mutedWords = [], ...options} = opts
  const moderations = moderatePost(subject, options)

  if (hiddenPosts.includes(subject.uri)) {
@ -29,15 +133,65 @@ export function moderatePost_wrapped(
    }
  }

+  if (AppBskyFeedPost.isRecord(subject.record)) {
+    let muted = hasMutedWord(
+      mutedWords,
+      subject.record.text,
+      subject.record.facets || [],
+      subject.record.tags || [],
+    )
+
+    if (
+      subject.record.embed &&
+      AppBskyEmbedImages.isMain(subject.record.embed)
+    ) {
+      for (const image of subject.record.embed.images) {
+        muted = muted || hasMutedWord(mutedWords, image.alt, [], [])
+      }
+    }
+
+    if (muted) {
+      moderations.content.filter = true
+      moderations.content.blur = true
+      if (!moderations.content.cause) {
+        moderations.content.cause = {
+          // @ts-ignore Temporary extension to the moderation system -prf
+          type: 'muted-word',
+          source: {type: 'user'},
+          priority: 1,
+        }
+      }
+    }
+  }
+
  if (subject.embed) {
    let embedHidden = false
    if (AppBskyEmbedRecord.isViewRecord(subject.embed.record)) {
      embedHidden = hiddenPosts.includes(subject.embed.record.uri)
+
+      if (AppBskyFeedPost.isRecord(subject.embed.record.value)) {
+        embedHidden =
+          embedHidden ||
+          hasMutedWord(
+            mutedWords,
+            subject.embed.record.value.text,
+            subject.embed.record.value.facets,
+            subject.embed.record.value.tags,
+          )
+
+        if (AppBskyEmbedImages.isMain(subject.embed.record.value.embed)) {
+          for (const image of subject.embed.record.value.embed.images) {
+            embedHidden =
+              embedHidden || hasMutedWord(mutedWords, image.alt, [], [])
+          }
+        }
+      }
    }
    if (
      AppBskyEmbedRecordWithMedia.isView(subject.embed) &&
      AppBskyEmbedRecord.isViewRecord(subject.embed.record.record)
    ) {
+      // TODO what
      embedHidden = hiddenPosts.includes(subject.embed.record.record.uri)
    }
    if (embedHidden) {
--- a/src/lib/moderation.ts
+++ b/src/lib/moderation.ts
@ -67,6 +67,13 @@ export function describeModerationCause(
      description: 'You have hidden this post',
    }
  }
+  // @ts-ignore Temporary extension to the moderation system -prf
+  if (cause.type === 'muted-word') {
+    return {
+      name: 'Post hidden by muted word',
+      description: `You've chosen to hide a word or tag within this post.`,
+    }
+  }
  return cause.labelDef.strings[context].en
 }

--- a/src/lib/routes/links.ts
+++ b/src/lib/routes/links.ts
@ -25,3 +25,13 @@ export function makeCustomFeedLink(
 export function makeListLink(did: string, rkey: string, ...segments: string[]) {
  return [`/profile`, did, 'lists', rkey, ...segments].join('/')
 }
+
+export function makeTagLink(did: string) {
+  return `/search?q=${encodeURIComponent(did)}`
+}
+
+export function makeSearchLink(props: {query: string; from?: 'me' | string}) {
+  return `/search?q=${encodeURIComponent(
+    props.query + (props.from ? ` from:${props.from}` : ''),
+  )}`
+}
--- a/src/lib/routes/types.ts
+++ b/src/lib/routes/types.ts
@ -33,6 +33,7 @@ export type CommonNavigatorParams = {
  PreferencesFollowingFeed: undefined
  PreferencesThreads: undefined
  PreferencesExternalEmbeds: undefined
+  Search: {q?: string}
 }

 export type BottomTabNavigatorParams = CommonNavigatorParams & {