Improve muted word matching (#3018)

* Use name params

* Add language exception matching
zio/stable
Eric Bailey 2024-02-28 10:38:31 -06:00 committed by GitHub
parent 5cb45f9c16
commit 0c3d55db6f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 313 additions and 248 deletions

View File

@ -11,12 +11,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: 'outlineTag', targets: ['tag']}],
rt.text,
rt.facets,
['outlineTag'],
)
const match = hasMutedWord({
mutedWords: [{value: 'outlineTag', targets: ['tag']}],
text: rt.text,
facets: rt.facets,
outlineTags: ['outlineTag'],
})
expect(match).toBe(true)
})
@ -27,12 +27,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: 'inlineTag', targets: ['tag']}],
rt.text,
rt.facets,
['outlineTag'],
)
const match = hasMutedWord({
mutedWords: [{value: 'inlineTag', targets: ['tag']}],
text: rt.text,
facets: rt.facets,
outlineTags: ['outlineTag'],
})
expect(match).toBe(true)
})
@ -43,12 +43,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: 'inlineTag', targets: ['content']}],
rt.text,
rt.facets,
['outlineTag'],
)
const match = hasMutedWord({
mutedWords: [{value: 'inlineTag', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: ['outlineTag'],
})
expect(match).toBe(true)
})
@ -59,12 +59,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: 'inlineTag', targets: ['tag']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'inlineTag', targets: ['tag']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(false)
})
@ -80,12 +80,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: '希', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: '希', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -96,12 +96,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: 'politics', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'politics', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(false)
})
@ -112,12 +112,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: 'javascript', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'javascript', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -130,12 +130,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: 'javascript', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'javascript', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -146,12 +146,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: 'ai', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'ai', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(false)
})
@ -162,12 +162,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: 'brain', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'brain', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -178,12 +178,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: `:)`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `:)`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -197,23 +197,23 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: yay!`, () => {
const match = hasMutedWord(
[{value: 'yay!', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'yay!', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: yay`, () => {
const match = hasMutedWord(
[{value: 'yay', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'yay', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -226,24 +226,24 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: y!ppee`, () => {
const match = hasMutedWord(
[{value: 'y!ppee', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'y!ppee', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
// single exclamation point, source has double
it(`no match: y!ppee!`, () => {
const match = hasMutedWord(
[{value: 'y!ppee!', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'y!ppee!', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -256,23 +256,23 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: S@assy`, () => {
const match = hasMutedWord(
[{value: 'S@assy', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'S@assy', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: s@assy`, () => {
const match = hasMutedWord(
[{value: 's@assy', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 's@assy', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -286,12 +286,12 @@ describe(`hasMutedWord`, () => {
// case insensitive
it(`match: new york times`, () => {
const match = hasMutedWord(
[{value: 'new york times', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'new york times', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -304,23 +304,23 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: !command`, () => {
const match = hasMutedWord(
[{value: `!command`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `!command`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: command`, () => {
const match = hasMutedWord(
[{value: `command`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `command`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -331,12 +331,12 @@ describe(`hasMutedWord`, () => {
})
rt.detectFacetsWithoutResolution()
const match = hasMutedWord(
[{value: `!command`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `!command`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(false)
})
@ -349,23 +349,23 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: e/acc`, () => {
const match = hasMutedWord(
[{value: `e/acc`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `e/acc`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: acc`, () => {
const match = hasMutedWord(
[{value: `acc`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `acc`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -378,45 +378,45 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: super-bad`, () => {
const match = hasMutedWord(
[{value: `super-bad`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `super-bad`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: super`, () => {
const match = hasMutedWord(
[{value: `super`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `super`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: super bad`, () => {
const match = hasMutedWord(
[{value: `super bad`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `super bad`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: superbad`, () => {
const match = hasMutedWord(
[{value: `superbad`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `superbad`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(false)
})
@ -429,47 +429,49 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: idk what this would be`, () => {
const match = hasMutedWord(
[{value: `idk what this would be`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `idk what this would be`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`no match: idk what this would be for`, () => {
// extra word
const match = hasMutedWord(
[{value: `idk what this would be for`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [
{value: `idk what this would be for`, targets: ['content']},
],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(false)
})
it(`match: idk`, () => {
// extra word
const match = hasMutedWord(
[{value: `idk`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `idk`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: idkwhatthiswouldbe`, () => {
const match = hasMutedWord(
[{value: `idkwhatthiswouldbe`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `idkwhatthiswouldbe`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(false)
})
@ -482,45 +484,45 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: context(iykyk)`, () => {
const match = hasMutedWord(
[{value: `context(iykyk)`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `context(iykyk)`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: context`, () => {
const match = hasMutedWord(
[{value: `context`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `context`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: iykyk`, () => {
const match = hasMutedWord(
[{value: `iykyk`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `iykyk`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: (iykyk)`, () => {
const match = hasMutedWord(
[{value: `(iykyk)`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `(iykyk)`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -533,12 +535,12 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: 🦋`, () => {
const match = hasMutedWord(
[{value: `🦋`, targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: `🦋`, targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
@ -553,23 +555,46 @@ describe(`hasMutedWord`, () => {
rt.detectFacetsWithoutResolution()
it(`match: stop worrying`, () => {
const match = hasMutedWord(
[{value: 'stop worrying', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'stop worrying', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
it(`match: turtles, or how`, () => {
const match = hasMutedWord(
[{value: 'turtles, or how', targets: ['content']}],
rt.text,
rt.facets,
[],
)
const match = hasMutedWord({
mutedWords: [{value: 'turtles, or how', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})
expect(match).toBe(true)
})
})
})
describe(`languages without spaces`, () => {
// I love turtles, or how I learned to stop worrying and love the internet
describe(`私はカメが好きです、またはどのようにして心配するのをやめてインターネットを愛するようになったのか`, () => {
const rt = new RichText({
text: `私はカメが好きです、またはどのようにして心配するのをやめてインターネットを愛するようになったのか`,
})
rt.detectFacetsWithoutResolution()
// internet
it(`match: インターネット`, () => {
const match = hasMutedWord({
mutedWords: [{value: 'インターネット', targets: ['content']}],
text: rt.text,
facets: rt.facets,
outlineTags: [],
languages: ['ja'],
})
expect(match).toBe(true)
})

View File

@ -21,12 +21,34 @@ const REGEX = {
WORD_BOUNDARY: /[\s\n\t\r\f\v]+?/g,
}
export function hasMutedWord(
mutedWords: AppBskyActorDefs.MutedWord[],
text: string,
facets?: AppBskyRichtextFacet.Main[],
outlineTags?: string[],
) {
/**
* List of 2-letter lang codes for languages that either don't use spaces, or
* don't use spaces in a way conducive to word-based filtering.
*
* For these, we use a simple `String.includes` to check for a match.
*/
const LANGUAGE_EXCEPTIONS = [
'ja', // Japanese
'zh', // Chinese
'ko', // Korean
'th', // Thai
'vi', // Vietnamese
]
export function hasMutedWord({
mutedWords,
text,
facets,
outlineTags,
languages,
}: {
mutedWords: AppBskyActorDefs.MutedWord[]
text: string
facets?: AppBskyRichtextFacet.Main[]
outlineTags?: string[]
languages?: string[]
}) {
const exception = LANGUAGE_EXCEPTIONS.includes(languages?.[0] || '')
const tags = ([] as string[])
.concat(outlineTags || [])
.concat(
@ -48,8 +70,9 @@ export function hasMutedWord(
if (tags.includes(mutedWord)) return true
// rest of the checks are for `content` only
if (!mute.targets.includes('content')) continue
// single character, has to use includes
if (mutedWord.length === 1 && postText.includes(mutedWord)) return true
// single character or other exception, has to use includes
if ((mutedWord.length === 1 || exception) && postText.includes(mutedWord))
return true
// too long
if (mutedWord.length > postText.length) continue
// exact match
@ -134,19 +157,28 @@ export function moderatePost_wrapped(
}
if (AppBskyFeedPost.isRecord(subject.record)) {
let muted = hasMutedWord(
let muted = hasMutedWord({
mutedWords,
subject.record.text,
subject.record.facets || [],
subject.record.tags || [],
)
text: subject.record.text,
facets: subject.record.facets || [],
outlineTags: subject.record.tags || [],
languages: subject.record.langs,
})
if (
subject.record.embed &&
AppBskyEmbedImages.isMain(subject.record.embed)
) {
for (const image of subject.record.embed.images) {
muted = muted || hasMutedWord(mutedWords, image.alt, [], [])
muted =
muted ||
hasMutedWord({
mutedWords,
text: image.alt,
facets: [],
outlineTags: [],
languages: subject.record.langs,
})
}
}
@ -172,17 +204,25 @@ export function moderatePost_wrapped(
if (AppBskyFeedPost.isRecord(subject.embed.record.value)) {
embedHidden =
embedHidden ||
hasMutedWord(
hasMutedWord({
mutedWords,
subject.embed.record.value.text,
subject.embed.record.value.facets,
subject.embed.record.value.tags,
)
text: subject.embed.record.value.text,
facets: subject.embed.record.value.facets,
outlineTags: subject.embed.record.value.tags,
languages: subject.embed.record.value.langs,
})
if (AppBskyEmbedImages.isMain(subject.embed.record.value.embed)) {
for (const image of subject.embed.record.value.embed.images) {
embedHidden =
embedHidden || hasMutedWord(mutedWords, image.alt, [], [])
embedHidden ||
hasMutedWord({
mutedWords,
text: image.alt,
facets: [],
outlineTags: [],
languages: subject.embed.record.value.langs,
})
}
}
}