From 3f2e31800ed9c74fc1b7bc4b475542f0256fefbd Mon Sep 17 00:00:00 2001 From: Christian Schmidt Date: Wed, 8 Mar 2023 19:56:41 +0100 Subject: [PATCH] Unescape HTML entities (#24019) --- app/lib/plain_text_formatter.rb | 6 ++- spec/lib/plain_text_formatter_spec.rb | 61 ++++++++++++++++++++++++--- 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/app/lib/plain_text_formatter.rb b/app/lib/plain_text_formatter.rb index 08aa29696..6fa2bc5d2 100644 --- a/app/lib/plain_text_formatter.rb +++ b/app/lib/plain_text_formatter.rb @@ -18,7 +18,7 @@ class PlainTextFormatter if local? text else - strip_tags(insert_newlines).chomp + html_entities.decode(strip_tags(insert_newlines)).chomp end end @@ -27,4 +27,8 @@ class PlainTextFormatter def insert_newlines text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" } end + + def html_entities + HTMLEntities.new + end end diff --git a/spec/lib/plain_text_formatter_spec.rb b/spec/lib/plain_text_formatter_spec.rb index c3d0ee630..81e4ae286 100644 --- a/spec/lib/plain_text_formatter_spec.rb +++ b/spec/lib/plain_text_formatter_spec.rb @@ -4,7 +4,7 @@ RSpec.describe PlainTextFormatter do describe '#to_s' do subject { described_class.new(status.text, status.local?).to_s } - context 'given a post with local status' do + context 'when status is local' do let(:status) { Fabricate(:status, text: '

a text by a nerd who uses an HTML tag in text

', uri: nil) } it 'returns the raw text' do @@ -12,12 +12,63 @@ RSpec.describe PlainTextFormatter do end end - context 'given a post with remote status' do + context 'when status is remote' do let(:remote_account) { Fabricate(:account, domain: 'remote.test', username: 'bob', url: 'https://remote.test/') } - let(:status) { Fabricate(:status, account: remote_account, text: '

Hello

') } - it 'returns tag-stripped text' do - is_expected.to eq 'Hello' + context 'when text contains inline HTML tags' do + let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem ipsum') } + + it 'strips the tags' do + expect(subject).to eq 'Lorem ipsum' + end + end + + context 'when text contains

tags' do + let(:status) { Fabricate(:status, account: remote_account, text: '

Lorem

ipsum

') } + + it 'inserts a newline' do + expect(subject).to eq "Lorem\nipsum" + end + end + + context 'when text contains a single
tag' do + let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem
ipsum') } + + it 'inserts a newline' do + expect(subject).to eq "Lorem\nipsum" + end + end + + context 'when text contains consecutive
tag' do + let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem


ipsum') } + + it 'inserts a single newline' do + expect(subject).to eq "Lorem\nipsum" + end + end + + context 'when text contains HTML entity' do + let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem & ipsum ❤') } + + it 'unescapes the entity' do + expect(subject).to eq 'Lorem & ipsum ❤' + end + end + + context 'when text contains ipsum') } + + it 'strips the tag and its contents' do + expect(subject).to eq 'Lorem ipsum' + end + end + + context 'when text contains an HTML comment tags' do + let(:status) { Fabricate(:status, account: remote_account, text: 'Lorem ipsum') } + + it 'strips the comment' do + expect(subject).to eq 'Lorem ipsum' + end end end end