Reduce the odds of false positives in the language suggester (close #2598) (#2700)

zio/stable
Paul Frazee 2024-01-30 20:16:49 -08:00 committed by GitHub
parent 42fe0e1609
commit 31bf286fdc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 10 deletions

View File

@ -23,7 +23,9 @@ const onIdle = globalThis.requestIdleCallback || (cb => setTimeout(cb, 1))
const cancelIdle = globalThis.cancelIdleCallback || clearTimeout const cancelIdle = globalThis.cancelIdleCallback || clearTimeout
export function SuggestedLanguage({text}: {text: string}) { export function SuggestedLanguage({text}: {text: string}) {
const [suggestedLanguage, setSuggestedLanguage] = useState<string>() const [suggestedLanguage, setSuggestedLanguage] = useState<
string | undefined
>()
const langPrefs = useLanguagePrefs() const langPrefs = useLanguagePrefs()
const setLangPrefs = useLanguagePrefsApi() const setLangPrefs = useLanguagePrefsApi()
const pal = usePalette('default') const pal = usePalette('default')
@ -40,14 +42,7 @@ export function SuggestedLanguage({text}: {text: string}) {
} }
const idle = onIdle(() => { const idle = onIdle(() => {
// Only select languages that have a high confidence and convert to code2 setSuggestedLanguage(guessLanguage(textTrimmed))
const result = lande(textTrimmed).filter(
([lang, value]) => value >= 0.97 && code3ToCode2Strict(lang),
)
setSuggestedLanguage(
result.length > 0 ? code3ToCode2Strict(result[0][0]) : undefined,
)
}) })
return () => cancelIdle(idle) return () => cancelIdle(idle)
@ -99,3 +94,22 @@ const styles = StyleSheet.create({
marginBottom: 10, marginBottom: 10,
}, },
}) })
/**
* This function is using the lande language model to attempt to detect the language
* We want to only make suggestions when we feel a high degree of certainty
* The magic numbers are based on debugging sessions against some test strings
*/
function guessLanguage(text: string): string | undefined {
const scores = lande(text).filter(([_lang, value]) => value >= 0.0002)
// if the model has multiple items with a score higher than 0.0002, it isn't certain enough
if (scores.length !== 1) {
return undefined
}
const [lang, value] = scores[0]
// if the model doesn't give a score of 0.97 or above, it isn't certain enough
if (value < 0.97) {
return undefined
}
return code3ToCode2Strict(lang)
}

View File

@ -316,7 +316,6 @@ function getImageFromUri(
const type = item.type const type = item.type
if (type === 'text/plain') { if (type === 'text/plain') {
console.log('hit')
item.getAsString(async itemString => { item.getAsString(async itemString => {
if (isUriImage(itemString)) { if (isUriImage(itemString)) {
const response = await fetch(itemString) const response = await fetch(itemString)