gearheads
/
mastodon
Archived
2
0
Fork 0

Exclude URLs from text analysis (#11759)

By the added regex, URLs, including the one without http or even www
like mysite.com will be removed from the toot's body so only the real
text of the toot will be analyzed for RTL detection
gh/stable
Mostafa Ahangarha 2019-09-05 01:00:49 +04:30 committed by Eugen Rochko
parent bdca8da8eb
commit e974d4923f
1 changed files with 1 additions and 0 deletions

View File

@ -20,6 +20,7 @@ export function isRtl(text) {
text = text.replace(/(?:^|[^\/\w])@([a-z0-9_]+(@[a-z0-9\.\-]+)?)/ig, ''); text = text.replace(/(?:^|[^\/\w])@([a-z0-9_]+(@[a-z0-9\.\-]+)?)/ig, '');
text = text.replace(/(?:^|[^\/\w])#([\S]+)/ig, ''); text = text.replace(/(?:^|[^\/\w])#([\S]+)/ig, '');
text = text.replace(/\s+/g, ''); text = text.replace(/\s+/g, '');
text = text.replace(/(\w\S+\.\w{2,}\S*)/g, '');
const matches = text.match(rtlChars); const matches = text.match(rtlChars);