From cefa526c6d3a45df2d0fcb7643ced828e2e87dea Mon Sep 17 00:00:00 2001
From: Eugen Rochko
Date: Sat, 26 Mar 2022 02:53:34 +0100
Subject: [PATCH] Refactor formatter (#17828)
* Refactor formatter
* Move custom emoji pre-rendering logic to view helpers
* Move more methods out of Formatter
* Fix code style issues
* Remove Formatter
* Add inline poll options to RSS feeds
* Remove unused helper method
* Fix code style issues
* Various fixes and improvements
* Fix test
---
app/chewy/statuses_index.rb | 2 +-
app/controllers/api/web/embeds_controller.rb | 2 +-
app/helpers/accounts_helper.rb | 6 +-
app/helpers/admin/trends/statuses_helper.rb | 5 +-
app/helpers/application_helper.rb | 4 +
app/helpers/formatting_helper.rb | 19 +
app/helpers/routing_helper.rb | 3 +-
app/helpers/statuses_helper.rb | 14 -
app/lib/activitypub/activity/create.rb | 4 +-
app/lib/emoji_formatter.rb | 98 +++
app/lib/extractor.rb | 82 ++-
app/lib/feed_manager.rb | 3 +-
app/lib/formatter.rb | 294 --------
app/lib/html_aware_formatter.rb | 38 ++
app/lib/plain_text_formatter.rb | 30 +
app/lib/rss/serializer.rb | 23 +-
app/lib/text_formatter.rb | 158 +++++
app/mailers/application_mailer.rb | 1 +
.../activitypub/actor_serializer.rb | 7 +-
.../activitypub/note_serializer.rb | 6 +-
app/serializers/rest/account_serializer.rb | 7 +-
.../rest/announcement_serializer.rb | 4 +-
.../rest/status_edit_serializer.rb | 4 +-
app/serializers/rest/status_serializer.rb | 4 +-
app/services/fetch_link_card_service.rb | 2 +-
app/views/accounts/_bio.html.haml | 6 +-
app/views/admin/accounts/show.html.haml | 6 +-
app/views/admin/reports/_status.html.haml | 6 +-
app/views/admin/reports/show.html.haml | 2 +-
app/views/directories/index.html.haml | 2 +-
app/views/disputes/strikes/show.html.haml | 2 +-
.../notification_mailer/_status.html.haml | 4 +-
.../notification_mailer/_status.text.erb | 2 +-
app/views/notification_mailer/digest.text.erb | 2 +-
app/views/statuses/_detailed_status.html.haml | 5 +-
app/views/statuses/_poll.html.haml | 4 +-
app/views/statuses/_simple_status.html.haml | 5 +-
app/views/user_mailer/warning.html.haml | 2 +-
config/initializers/twitter_regex.rb | 26 -
spec/lib/emoji_formatter_spec.rb | 55 ++
spec/lib/formatter_spec.rb | 626 ------------------
spec/lib/html_aware_formatter.rb | 44 ++
spec/lib/plain_text_formatter_spec.rb | 24 +
spec/lib/text_formatter_spec.rb | 313 +++++++++
44 files changed, 932 insertions(+), 1024 deletions(-)
create mode 100644 app/helpers/formatting_helper.rb
create mode 100644 app/lib/emoji_formatter.rb
delete mode 100644 app/lib/formatter.rb
create mode 100644 app/lib/html_aware_formatter.rb
create mode 100644 app/lib/plain_text_formatter.rb
create mode 100644 app/lib/text_formatter.rb
create mode 100644 spec/lib/emoji_formatter_spec.rb
delete mode 100644 spec/lib/formatter_spec.rb
create mode 100644 spec/lib/html_aware_formatter.rb
create mode 100644 spec/lib/plain_text_formatter_spec.rb
create mode 100644 spec/lib/text_formatter_spec.rb
diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb
index 65cbb6fcd..d119f7cac 100644
--- a/app/chewy/statuses_index.rb
+++ b/app/chewy/statuses_index.rb
@@ -57,7 +57,7 @@ class StatusesIndex < Chewy::Index
field :id, type: 'long'
field :account_id, type: 'long'
- field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.ordered_media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
+ field :text, type: 'text', value: ->(status) { [status.spoiler_text, PlainTextFormatter.new(status.text, status.local?).to_s].concat(status.ordered_media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
field :stemmed, type: 'text', analyzer: 'content'
end
diff --git a/app/controllers/api/web/embeds_controller.rb b/app/controllers/api/web/embeds_controller.rb
index 741ba910f..58f6345e6 100644
--- a/app/controllers/api/web/embeds_controller.rb
+++ b/app/controllers/api/web/embeds_controller.rb
@@ -15,7 +15,7 @@ class Api::Web::EmbedsController < Api::Web::BaseController
return not_found if oembed.nil?
begin
- oembed[:html] = Formatter.instance.sanitize(oembed[:html], Sanitize::Config::MASTODON_OEMBED)
+ oembed[:html] = Sanitize.fragment(oembed[:html], Sanitize::Config::MASTODON_OEMBED)
rescue ArgumentError
return not_found
end
diff --git a/app/helpers/accounts_helper.rb b/app/helpers/accounts_helper.rb
index a33961724..557f60f26 100644
--- a/app/helpers/accounts_helper.rb
+++ b/app/helpers/accounts_helper.rb
@@ -2,10 +2,12 @@
module AccountsHelper
def display_name(account, **options)
+ str = account.display_name.presence || account.username
+
if options[:custom_emojify]
- Formatter.instance.format_display_name(account, **options)
+ prerender_custom_emojis(h(str), account.emojis)
else
- account.display_name.presence || account.username
+ str
end
end
diff --git a/app/helpers/admin/trends/statuses_helper.rb b/app/helpers/admin/trends/statuses_helper.rb
index d16e3dd12..214c1e2a6 100644
--- a/app/helpers/admin/trends/statuses_helper.rb
+++ b/app/helpers/admin/trends/statuses_helper.rb
@@ -12,9 +12,6 @@ module Admin::Trends::StatusesHelper
return '' if text.blank?
- html = Formatter.instance.send(:encode, text)
- html = Formatter.instance.send(:encode_custom_emojis, html, status.emojis, prefers_autoplay?)
-
- html.html_safe # rubocop:disable Rails/OutputSafety
+ prerender_custom_emojis(h(text), status.emojis)
end
end
diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb
index e997570b5..651a98a85 100644
--- a/app/helpers/application_helper.rb
+++ b/app/helpers/application_helper.rb
@@ -239,4 +239,8 @@ module ApplicationHelper
end
end.values
end
+
+ def prerender_custom_emojis(html, custom_emojis)
+ EmojiFormatter.new(html, custom_emojis, animate: prefers_autoplay?).to_s
+ end
end
diff --git a/app/helpers/formatting_helper.rb b/app/helpers/formatting_helper.rb
new file mode 100644
index 000000000..66e9e1e91
--- /dev/null
+++ b/app/helpers/formatting_helper.rb
@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+
+module FormattingHelper
+ def html_aware_format(text, local, options = {})
+ HtmlAwareFormatter.new(text, local, options).to_s
+ end
+
+ def linkify(text, options = {})
+ TextFormatter.new(text, options).to_s
+ end
+
+ def extract_plain_text(text, local)
+ PlainTextFormatter.new(text, local).to_s
+ end
+
+ def status_content_format(status)
+ html_aware_format(status.text, status.local?, preloaded_accounts: [status.account] + (status.respond_to?(:active_mentions) ? status.active_mentions.map(&:account) : []))
+ end
+end
diff --git a/app/helpers/routing_helper.rb b/app/helpers/routing_helper.rb
index fb24a1b28..f95f46a56 100644
--- a/app/helpers/routing_helper.rb
+++ b/app/helpers/routing_helper.rb
@@ -2,6 +2,7 @@
module RoutingHelper
extend ActiveSupport::Concern
+
include Rails.application.routes.url_helpers
include ActionView::Helpers::AssetTagHelper
include Webpacker::Helper
@@ -22,8 +23,6 @@ module RoutingHelper
full_asset_url(asset_pack_path(source, **options))
end
- private
-
def use_storage?
Rails.configuration.x.use_s3 || Rails.configuration.x.use_swift
end
diff --git a/app/helpers/statuses_helper.rb b/app/helpers/statuses_helper.rb
index d328f89b7..e92b4c839 100644
--- a/app/helpers/statuses_helper.rb
+++ b/app/helpers/statuses_helper.rb
@@ -113,20 +113,6 @@ module StatusesHelper
end
end
- private
-
- def simplified_text(text)
- text.dup.tap do |new_text|
- URI.extract(new_text).each do |url|
- new_text.gsub!(url, '')
- end
-
- new_text.gsub!(Account::MENTION_RE, '')
- new_text.gsub!(Tag::HASHTAG_RE, '')
- new_text.gsub!(/\s+/, '')
- end
- end
-
def embedded_view?
params[:controller] == EMBEDDED_CONTROLLER && params[:action] == EMBEDDED_ACTION
end
diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb
index ea8d146d4..f4f98e29c 100644
--- a/app/lib/activitypub/activity/create.rb
+++ b/app/lib/activitypub/activity/create.rb
@@ -1,6 +1,8 @@
# frozen_string_literal: true
class ActivityPub::Activity::Create < ActivityPub::Activity
+ include FormattingHelper
+
def perform
dereference_object!
@@ -367,7 +369,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
end
def converted_text
- Formatter.instance.linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n"))
+ linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n"))
end
def unsupported_media_type?(mime_type)
diff --git a/app/lib/emoji_formatter.rb b/app/lib/emoji_formatter.rb
new file mode 100644
index 000000000..f808f3a22
--- /dev/null
+++ b/app/lib/emoji_formatter.rb
@@ -0,0 +1,98 @@
+# frozen_string_literal: true
+
+class EmojiFormatter
+ include RoutingHelper
+
+ DISALLOWED_BOUNDING_REGEX = /[[:alnum:]:]/.freeze
+
+ attr_reader :html, :custom_emojis, :options
+
+ # @param [ActiveSupport::SafeBuffer] html
+ # @param [Array] custom_emojis
+ # @param [Hash] options
+ # @option options [Boolean] :animate
+ def initialize(html, custom_emojis, options = {})
+ raise ArgumentError unless html.html_safe?
+
+ @html = html
+ @custom_emojis = custom_emojis
+ @options = options
+ end
+
+ def to_s
+ return html if custom_emojis.empty? || html.blank?
+
+ i = -1
+ tag_open_index = nil
+ inside_shortname = false
+ shortname_start_index = -1
+ invisible_depth = 0
+ last_index = 0
+ result = ''.dup
+
+ while i + 1 < html.size
+ i += 1
+
+ if invisible_depth.zero? && inside_shortname && html[i] == ':'
+ inside_shortname = false
+ shortcode = html[shortname_start_index + 1..i - 1]
+ char_after = html[i + 1]
+
+ next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode])
+
+ result << html[last_index..shortname_start_index - 1] if shortname_start_index.positive?
+ result << image_for_emoji(shortcode, emoji)
+ last_index = i + 1
+ elsif tag_open_index && html[i] == '>'
+ tag = html[tag_open_index..i]
+ tag_open_index = nil
+
+ if invisible_depth.positive?
+ invisible_depth += count_tag_nesting(tag)
+ elsif tag == ''
+ invisible_depth = 1
+ end
+ elsif html[i] == '<'
+ tag_open_index = i
+ inside_shortname = false
+ elsif !tag_open_index && html[i] == ':' && (i.zero? || !DISALLOWED_BOUNDING_REGEX.match?(html[i - 1]))
+ inside_shortname = true
+ shortname_start_index = i
+ end
+ end
+
+ result << html[last_index..-1]
+
+ result.html_safe # rubocop:disable Rails/OutputSafety
+ end
+
+ private
+
+ def emoji_map
+ @emoji_map ||= custom_emojis.each_with_object({}) { |e, h| h[e.shortcode] = [full_asset_url(e.image.url), full_asset_url(e.image.url(:static))] }
+ end
+
+ def count_tag_nesting(tag)
+ if tag[1] == '/'
+ -1
+ elsif tag[-2] == '/'
+ 0
+ else
+ 1
+ end
+ end
+
+ def image_for_emoji(shortcode, emoji)
+ original_url, static_url = emoji
+
+ if animate?
+ image_tag(original_url, draggable: false, class: 'emojione', alt: ":#{shortcode}:", title: ":#{shortcode}:")
+ else
+ image_tag(original_url, draggable: false, class: 'emojione custom-emoji', alt: ":#{shortcode}:", title: ":#{shortcode}:", data: { original: original_url, static: static_url })
+ end
+ end
+
+ def animate?
+ @options[:animate]
+ end
+end
diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb
index 8020aa916..ef9407864 100644
--- a/app/lib/extractor.rb
+++ b/app/lib/extractor.rb
@@ -5,18 +5,34 @@ module Extractor
module_function
- # :yields: username, list_slug, start, end
+ def extract_entities_with_indices(text, options = {}, &block)
+ entities = begin
+ extract_urls_with_indices(text, options) +
+ extract_hashtags_with_indices(text, check_url_overlap: false) +
+ extract_mentions_or_lists_with_indices(text) +
+ extract_extra_uris_with_indices(text)
+ end
+
+ return [] if entities.empty?
+
+ entities = remove_overlapping_entities(entities)
+ entities.each(&block) if block_given?
+ entities
+ end
+
def extract_mentions_or_lists_with_indices(text)
- return [] unless Twitter::TwitterText::Regex[:at_signs].match?(text)
+ return [] unless text && Twitter::TwitterText::Regex[:at_signs].match?(text)
possible_entries = []
- text.to_s.scan(Account::MENTION_RE) do |screen_name, _|
+ text.scan(Account::MENTION_RE) do |screen_name, _|
match_data = $LAST_MATCH_INFO
- after = $'
+ after = $'
+
unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
start_position = match_data.char_begin(1) - 1
- end_position = match_data.char_end(1)
+ end_position = match_data.char_end(1)
+
possible_entries << {
screen_name: screen_name,
indices: [start_position, end_position],
@@ -29,36 +45,70 @@ module Extractor
yield mention[:screen_name], mention[:indices].first, mention[:indices].last
end
end
+
possible_entries
end
- def extract_hashtags_with_indices(text, **)
- return [] unless /#/.match?(text)
+ def extract_hashtags_with_indices(text, _options = {})
+ return [] unless text&.index('#')
+
+ possible_entries = []
- tags = []
text.scan(Tag::HASHTAG_RE) do |hash_text, _|
- match_data = $LAST_MATCH_INFO
+ match_data = $LAST_MATCH_INFO
start_position = match_data.char_begin(1) - 1
- end_position = match_data.char_end(1)
- after = $'
+ end_position = match_data.char_end(1)
+ after = $'
+
if %r{\A://}.match?(after)
hash_text.match(/(.+)(https?\Z)/) do |matched|
- hash_text = matched[1]
+ hash_text = matched[1]
end_position -= matched[2].codepoint_length
end
end
- tags << {
+ possible_entries << {
hashtag: hash_text,
indices: [start_position, end_position],
}
end
- tags.each { |tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last } if block_given?
- tags
+ if block_given?
+ possible_entries.each do |tag|
+ yield tag[:hashtag], tag[:indices].first, tag[:indices].last
+ end
+ end
+
+ possible_entries
end
def extract_cashtags_with_indices(_text)
- [] # always returns empty array
+ []
+ end
+
+ def extract_extra_uris_with_indices(text)
+ return [] unless text&.index(':')
+
+ possible_entries = []
+
+ text.scan(Twitter::TwitterText::Regex[:valid_extended_uri]) do
+ valid_uri_match_data = $LAST_MATCH_INFO
+
+ start_position = valid_uri_match_data.char_begin(3)
+ end_position = valid_uri_match_data.char_end(3)
+
+ possible_entries << {
+ url: valid_uri_match_data[3],
+ indices: [start_position, end_position],
+ }
+ end
+
+ if block_given?
+ possible_entries.each do |url|
+ yield url[:url], url[:indices].first, url[:indices].last
+ end
+ end
+
+ possible_entries
end
end
diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb
index 46a55c7a4..53d1390d4 100644
--- a/app/lib/feed_manager.rb
+++ b/app/lib/feed_manager.rb
@@ -5,6 +5,7 @@ require 'singleton'
class FeedManager
include Singleton
include Redisable
+ include FormattingHelper
# Maximum number of items stored in a single feed
MAX_ITEMS = 400
@@ -445,7 +446,7 @@ class FeedManager
status = status.reblog if status.reblog?
combined_text = [
- Formatter.instance.plaintext(status),
+ extract_plain_text(status.text, status.local?),
status.spoiler_text,
status.preloadable_poll ? status.preloadable_poll.options.join("\n\n") : nil,
status.ordered_media_attachments.map(&:description).join("\n\n"),
diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb
deleted file mode 100644
index b6a13163d..000000000
--- a/app/lib/formatter.rb
+++ /dev/null
@@ -1,294 +0,0 @@
-# frozen_string_literal: true
-
-require 'singleton'
-
-class Formatter
- include Singleton
- include RoutingHelper
-
- include ActionView::Helpers::TextHelper
-
- def format(status, **options)
- if status.respond_to?(:reblog?) && status.reblog?
- prepend_reblog = status.reblog.account.acct
- status = status.proper
- else
- prepend_reblog = false
- end
-
- raw_content = status.text
-
- if options[:inline_poll_options] && status.preloadable_poll
- raw_content = raw_content + "\n\n" + status.preloadable_poll.options.map { |title| "[ ] #{title}" }.join("\n")
- end
-
- return '' if raw_content.blank?
-
- unless status.local?
- html = reformat(raw_content)
- html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
- return html.html_safe # rubocop:disable Rails/OutputSafety
- end
-
- linkable_accounts = status.respond_to?(:active_mentions) ? status.active_mentions.map(&:account) : []
- linkable_accounts << status.account
-
- html = raw_content
- html = "RT @#{prepend_reblog} #{html}" if prepend_reblog
- html = encode_and_link_urls(html, linkable_accounts)
- html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
- html = simple_format(html, {}, sanitize: false)
- html = html.delete("\n")
-
- html.html_safe # rubocop:disable Rails/OutputSafety
- end
-
- def reformat(html)
- sanitize(html, Sanitize::Config::MASTODON_STRICT)
- rescue ArgumentError
- ''
- end
-
- def plaintext(status)
- return status.text if status.local?
-
- text = status.text.gsub(/(
|
|<\/p>)+/) { |match| "#{match}\n" }
- strip_tags(text)
- end
-
- def simplified_format(account, **options)
- return '' if account.note.blank?
-
- html = account.local? ? linkify(account.note) : reformat(account.note)
- html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
- html.html_safe # rubocop:disable Rails/OutputSafety
- end
-
- def sanitize(html, config)
- Sanitize.fragment(html, config)
- end
-
- def format_spoiler(status, **options)
- html = encode(status.spoiler_text)
- html = encode_custom_emojis(html, status.emojis, options[:autoplay])
- html.html_safe # rubocop:disable Rails/OutputSafety
- end
-
- def format_poll_option(status, option, **options)
- html = encode(option.title)
- html = encode_custom_emojis(html, status.emojis, options[:autoplay])
- html.html_safe # rubocop:disable Rails/OutputSafety
- end
-
- def format_display_name(account, **options)
- html = encode(account.display_name.presence || account.username)
- html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
- html.html_safe # rubocop:disable Rails/OutputSafety
- end
-
- def format_field(account, str, **options)
- html = account.local? ? encode_and_link_urls(str, me: true, with_domain: true) : reformat(str)
- html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
- html.html_safe # rubocop:disable Rails/OutputSafety
- end
-
- def linkify(text)
- html = encode_and_link_urls(text)
- html = simple_format(html, {}, sanitize: false)
- html = html.delete("\n")
-
- html.html_safe # rubocop:disable Rails/OutputSafety
- end
-
- private
-
- def html_entities
- @html_entities ||= HTMLEntities.new
- end
-
- def encode(html)
- html_entities.encode(html)
- end
-
- def encode_and_link_urls(html, accounts = nil, options = {})
- entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)
-
- if accounts.is_a?(Hash)
- options = accounts
- accounts = nil
- end
-
- rewrite(html.dup, entities) do |entity|
- if entity[:url]
- link_to_url(entity, options)
- elsif entity[:hashtag]
- link_to_hashtag(entity)
- elsif entity[:screen_name]
- link_to_mention(entity, accounts, options)
- end
- end
- end
-
- def count_tag_nesting(tag)
- if tag[1] == '/' then -1
- elsif tag[-2] == '/' then 0
- else 1
- end
- end
-
- # rubocop:disable Metrics/BlockNesting
- def encode_custom_emojis(html, emojis, animate = false)
- return html if emojis.empty?
-
- emoji_map = emojis.each_with_object({}) { |e, h| h[e.shortcode] = [full_asset_url(e.image.url), full_asset_url(e.image.url(:static))] }
-
- i = -1
- tag_open_index = nil
- inside_shortname = false
- shortname_start_index = -1
- invisible_depth = 0
-
- while i + 1 < html.size
- i += 1
-
- if invisible_depth.zero? && inside_shortname && html[i] == ':'
- shortcode = html[shortname_start_index + 1..i - 1]
- emoji = emoji_map[shortcode]
-
- if emoji
- original_url, static_url = emoji
- replacement = begin
- if animate
- image_tag(original_url, draggable: false, class: 'emojione', alt: ":#{shortcode}:", title: ":#{shortcode}:")
- else
- image_tag(original_url, draggable: false, class: 'emojione custom-emoji', alt: ":#{shortcode}:", title: ":#{shortcode}:", data: { original: original_url, static: static_url })
- end
- end
- before_html = shortname_start_index.positive? ? html[0..shortname_start_index - 1] : ''
- html = before_html + replacement + html[i + 1..-1]
- i += replacement.size - (shortcode.size + 2) - 1
- else
- i -= 1
- end
-
- inside_shortname = false
- elsif tag_open_index && html[i] == '>'
- tag = html[tag_open_index..i]
- tag_open_index = nil
- if invisible_depth.positive?
- invisible_depth += count_tag_nesting(tag)
- elsif tag == ''
- invisible_depth = 1
- end
- elsif html[i] == '<'
- tag_open_index = i
- inside_shortname = false
- elsif !tag_open_index && html[i] == ':'
- inside_shortname = true
- shortname_start_index = i
- end
- end
-
- html
- end
- # rubocop:enable Metrics/BlockNesting
-
- def rewrite(text, entities)
- text = text.to_s
-
- # Sort by start index
- entities = entities.sort_by do |entity|
- indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
- indices.first
- end
-
- result = []
-
- last_index = entities.reduce(0) do |index, entity|
- indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
- result << encode(text[index...indices.first])
- result << yield(entity)
- indices.last
- end
-
- result << encode(text[last_index..-1])
-
- result.flatten.join
- end
-
- def utf8_friendly_extractor(text, options = {})
- # Note: I couldn't obtain list_slug with @user/list-name format
- # for mention so this requires additional check
- special = Extractor.extract_urls_with_indices(text, options)
- standard = Extractor.extract_entities_with_indices(text, options)
- extra = Extractor.extract_extra_uris_with_indices(text, options)
-
- Extractor.remove_overlapping_entities(special + standard + extra)
- end
-
- def link_to_url(entity, options = {})
- url = Addressable::URI.parse(entity[:url])
- html_attrs = { target: '_blank', rel: 'nofollow noopener noreferrer' }
-
- html_attrs[:rel] = "me #{html_attrs[:rel]}" if options[:me]
-
- Twitter::TwitterText::Autolink.send(:link_to_text, entity, link_html(entity[:url]), url, html_attrs)
- rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
- encode(entity[:url])
- end
-
- def link_to_mention(entity, linkable_accounts, options = {})
- acct = entity[:screen_name]
-
- return link_to_account(acct, options) unless linkable_accounts
-
- same_username_hits = 0
- account = nil
- username, domain = acct.split('@')
- domain = nil if TagManager.instance.local_domain?(domain)
-
- linkable_accounts.each do |item|
- same_username = item.username.casecmp(username).zero?
- same_domain = item.domain.nil? ? domain.nil? : item.domain.casecmp(domain)&.zero?
-
- if same_username && !same_domain
- same_username_hits += 1
- elsif same_username && same_domain
- account = item
- end
- end
-
- account ? mention_html(account, with_domain: same_username_hits.positive? || options[:with_domain]) : "@#{encode(acct)}"
- end
-
- def link_to_account(acct, options = {})
- username, domain = acct.split('@')
-
- domain = nil if TagManager.instance.local_domain?(domain)
- account = EntityCache.instance.mention(username, domain)
-
- account ? mention_html(account, with_domain: options[:with_domain]) : "@#{encode(acct)}"
- end
-
- def link_to_hashtag(entity)
- hashtag_html(entity[:hashtag])
- end
-
- def link_html(url)
- url = Addressable::URI.parse(url).to_s
- prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s
- text = url[prefix.length, 30]
- suffix = url[prefix.length + 30..-1]
- cutoff = url[prefix.length..-1].length > 30
-
- "#{encode(prefix)}#{encode(text)}#{encode(suffix)}"
- end
-
- def hashtag_html(tag)
- "##{encode(tag)}"
- end
-
- def mention_html(account, with_domain: false)
- "@#{encode(with_domain ? account.pretty_acct : account.username)}"
- end
-end
diff --git a/app/lib/html_aware_formatter.rb b/app/lib/html_aware_formatter.rb
new file mode 100644
index 000000000..64edba09b
--- /dev/null
+++ b/app/lib/html_aware_formatter.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+class HtmlAwareFormatter
+ attr_reader :text, :local, :options
+
+ alias local? local
+
+ # @param [String] text
+ # @param [Boolean] local
+ # @param [Hash] options
+ def initialize(text, local, options = {})
+ @text = text
+ @local = local
+ @options = options
+ end
+
+ def to_s
+ return ''.html_safe if text.blank?
+
+ if local?
+ linkify
+ else
+ reformat.html_safe # rubocop:disable Rails/OutputSafety
+ end
+ rescue ArgumentError
+ ''.html_safe
+ end
+
+ private
+
+ def reformat
+ Sanitize.fragment(text, Sanitize::Config::MASTODON_STRICT)
+ end
+
+ def linkify
+ TextFormatter.new(text, options).to_s
+ end
+end
diff --git a/app/lib/plain_text_formatter.rb b/app/lib/plain_text_formatter.rb
new file mode 100644
index 000000000..08aa29696
--- /dev/null
+++ b/app/lib/plain_text_formatter.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+class PlainTextFormatter
+ include ActionView::Helpers::TextHelper
+
+ NEWLINE_TAGS_RE = /(
|
|<\/p>)+/.freeze
+
+ attr_reader :text, :local
+
+ alias local? local
+
+ def initialize(text, local)
+ @text = text
+ @local = local
+ end
+
+ def to_s
+ if local?
+ text
+ else
+ strip_tags(insert_newlines).chomp
+ end
+ end
+
+ private
+
+ def insert_newlines
+ text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" }
+ end
+end
diff --git a/app/lib/rss/serializer.rb b/app/lib/rss/serializer.rb
index 7e3ed1f17..d44e94221 100644
--- a/app/lib/rss/serializer.rb
+++ b/app/lib/rss/serializer.rb
@@ -1,6 +1,8 @@
# frozen_string_literal: true
class RSS::Serializer
+ include FormattingHelper
+
private
def render_statuses(builder, statuses)
@@ -9,7 +11,7 @@ class RSS::Serializer
item.title(status_title(status))
.link(ActivityPub::TagManager.instance.url_for(status))
.pub_date(status.created_at)
- .description(status.spoiler_text.presence || Formatter.instance.format(status, inline_poll_options: true).to_str)
+ .description(status_description(status))
status.ordered_media_attachments.each do |media|
item.enclosure(full_asset_url(media.file.url(:original, false)), media.file.content_type, media.file.size)
@@ -19,9 +21,8 @@ class RSS::Serializer
end
def status_title(status)
- return "#{status.account.acct} deleted status" if status.destroyed?
-
preview = status.proper.spoiler_text.presence || status.proper.text
+
if preview.length > 30 || preview[0, 30].include?("\n")
preview = preview[0, 30]
preview = preview[0, preview.index("\n").presence || 30] + '…'
@@ -35,4 +36,20 @@ class RSS::Serializer
"#{status.account.acct}: #{preview}"
end
end
+
+ def status_description(status)
+ if status.proper.spoiler_text?
+ status.proper.spoiler_text
+ else
+ html = status_content_format(status.proper).to_str
+ after_html = ''
+
+ if status.proper.preloadable_poll
+ poll_options_html = status.proper.preloadable_poll.options.map { |o| "[ ] #{o}" }.join('
')
+ after_html = "#{poll_options_html}
"
+ end
+
+ "#{html}#{after_html}"
+ end
+ end
end
diff --git a/app/lib/text_formatter.rb b/app/lib/text_formatter.rb
new file mode 100644
index 000000000..48e2fc233
--- /dev/null
+++ b/app/lib/text_formatter.rb
@@ -0,0 +1,158 @@
+# frozen_string_literal: true
+
+class TextFormatter
+ include ActionView::Helpers::TextHelper
+ include ERB::Util
+ include RoutingHelper
+
+ URL_PREFIX_REGEX = /\A(https?:\/\/(www\.)?|xmpp:)/.freeze
+
+ DEFAULT_REL = %w(nofollow noopener noreferrer).freeze
+
+ DEFAULT_OPTIONS = {
+ multiline: true,
+ }.freeze
+
+ attr_reader :text, :options
+
+ # @param [String] text
+ # @param [Hash] options
+ # @option options [Boolean] :multiline
+ # @option options [Boolean] :with_domains
+ # @option options [Boolean] :with_rel_me
+ # @option options [Array] :preloaded_accounts
+ def initialize(text, options = {})
+ @text = text
+ @options = DEFAULT_OPTIONS.merge(options)
+ end
+
+ def entities
+ @entities ||= Extractor.extract_entities_with_indices(text, extract_url_without_protocol: false)
+ end
+
+ def to_s
+ return ''.html_safe if text.blank?
+
+ html = rewrite do |entity|
+ if entity[:url]
+ link_to_url(entity)
+ elsif entity[:hashtag]
+ link_to_hashtag(entity)
+ elsif entity[:screen_name]
+ link_to_mention(entity)
+ end
+ end
+
+ html = simple_format(html, {}, sanitize: false).delete("\n") if multiline?
+
+ html.html_safe # rubocop:disable Rails/OutputSafety
+ end
+
+ private
+
+ def rewrite
+ entities.sort_by! do |entity|
+ entity[:indices].first
+ end
+
+ result = ''.dup
+
+ last_index = entities.reduce(0) do |index, entity|
+ indices = entity[:indices]
+ result << h(text[index...indices.first])
+ result << yield(entity)
+ indices.last
+ end
+
+ result << h(text[last_index..-1])
+
+ result
+ end
+
+ def link_to_url(entity)
+ url = Addressable::URI.parse(entity[:url]).to_s
+ rel = with_rel_me? ? (DEFAULT_REL + %w(me)) : DEFAULT_REL
+
+ prefix = url.match(URL_PREFIX_REGEX).to_s
+ display_url = url[prefix.length, 30]
+ suffix = url[prefix.length + 30..-1]
+ cutoff = url[prefix.length..-1].length > 30
+
+ <<~HTML.squish
+ #{h(prefix)}#{h(display_url)}#{h(suffix)}
+ HTML
+ rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
+ h(entity[:url])
+ end
+
+ def link_to_hashtag(entity)
+ hashtag = entity[:hashtag]
+ url = tag_url(hashtag)
+
+ <<~HTML.squish
+ ##{h(hashtag)}
+ HTML
+ end
+
+ def link_to_mention(entity)
+ username, domain = entity[:screen_name].split('@')
+ domain = nil if local_domain?(domain)
+ account = nil
+
+ if preloaded_accounts?
+ same_username_hits = 0
+
+ preloaded_accounts.each do |other_account|
+ same_username = other_account.username.casecmp(username).zero?
+ same_domain = other_account.domain.nil? ? domain.nil? : other_account.domain.casecmp(domain)&.zero?
+
+ if same_username && !same_domain
+ same_username_hits += 1
+ elsif same_username && same_domain
+ account = other_account
+ end
+ end
+ else
+ account = entity_cache.mention(username, domain)
+ end
+
+ return "@#{h(entity[:screen_name])}" if account.nil?
+
+ url = ActivityPub::TagManager.instance.url_for(account)
+ display_username = same_username_hits&.positive? || with_domains? ? account.pretty_acct : account.username
+
+ <<~HTML.squish
+ @#{h(display_username)}
+ HTML
+ end
+
+ def entity_cache
+ @entity_cache ||= EntityCache.instance
+ end
+
+ def tag_manager
+ @tag_manager ||= TagManager.instance
+ end
+
+ delegate :local_domain?, to: :tag_manager
+
+ def multiline?
+ options[:multiline]
+ end
+
+ def with_domains?
+ options[:with_domains]
+ end
+
+ def with_rel_me?
+ options[:with_rel_me]
+ end
+
+ def preloaded_accounts
+ options[:preloaded_accounts]
+ end
+
+ def preloaded_accounts?
+ preloaded_accounts.present?
+ end
+end
diff --git a/app/mailers/application_mailer.rb b/app/mailers/application_mailer.rb
index cc585c3b7..a37682eca 100644
--- a/app/mailers/application_mailer.rb
+++ b/app/mailers/application_mailer.rb
@@ -5,6 +5,7 @@ class ApplicationMailer < ActionMailer::Base
helper :application
helper :instance
+ helper :formatting
protected
diff --git a/app/serializers/activitypub/actor_serializer.rb b/app/serializers/activitypub/actor_serializer.rb
index 48707aa16..bd1648348 100644
--- a/app/serializers/activitypub/actor_serializer.rb
+++ b/app/serializers/activitypub/actor_serializer.rb
@@ -2,6 +2,7 @@
class ActivityPub::ActorSerializer < ActivityPub::Serializer
include RoutingHelper
+ include FormattingHelper
context :security
@@ -102,7 +103,7 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer
end
def summary
- object.suspended? ? '' : Formatter.instance.simplified_format(object)
+ object.suspended? ? '' : html_aware_format(object.note, object.local?)
end
def icon
@@ -185,6 +186,8 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer
end
class Account::FieldSerializer < ActivityPub::Serializer
+ include FormattingHelper
+
attributes :type, :name, :value
def type
@@ -192,7 +195,7 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer
end
def value
- Formatter.instance.format_field(object.account, object.value)
+ html_aware_format(object.value, object.account.value?, with_rel_me: true, with_domains: true, multiline: false)
end
end
diff --git a/app/serializers/activitypub/note_serializer.rb b/app/serializers/activitypub/note_serializer.rb
index 7be2e2647..27e058199 100644
--- a/app/serializers/activitypub/note_serializer.rb
+++ b/app/serializers/activitypub/note_serializer.rb
@@ -1,6 +1,8 @@
# frozen_string_literal: true
class ActivityPub::NoteSerializer < ActivityPub::Serializer
+ include FormattingHelper
+
context_extensions :atom_uri, :conversation, :sensitive, :voters_count
attributes :id, :type, :summary,
@@ -39,11 +41,11 @@ class ActivityPub::NoteSerializer < ActivityPub::Serializer
end
def content
- Formatter.instance.format(object)
+ status_content_format(object)
end
def content_map
- { object.language => Formatter.instance.format(object) }
+ { object.language => content }
end
def replies
diff --git a/app/serializers/rest/account_serializer.rb b/app/serializers/rest/account_serializer.rb
index a78ec4507..2f67e06b2 100644
--- a/app/serializers/rest/account_serializer.rb
+++ b/app/serializers/rest/account_serializer.rb
@@ -2,6 +2,7 @@
class REST::AccountSerializer < ActiveModel::Serializer
include RoutingHelper
+ include FormattingHelper
attributes :id, :username, :acct, :display_name, :locked, :bot, :discoverable, :group, :created_at,
:note, :url, :avatar, :avatar_static, :header, :header_static,
@@ -14,10 +15,12 @@ class REST::AccountSerializer < ActiveModel::Serializer
attribute :suspended, if: :suspended?
class FieldSerializer < ActiveModel::Serializer
+ include FormattingHelper
+
attributes :name, :value, :verified_at
def value
- Formatter.instance.format_field(object.account, object.value)
+ html_aware_format(object.value, object.account.local?, with_rel_me: true, with_domains: true, multiline: false)
end
end
@@ -32,7 +35,7 @@ class REST::AccountSerializer < ActiveModel::Serializer
end
def note
- object.suspended? ? '' : Formatter.instance.simplified_format(object)
+ object.suspended? ? '' : html_aware_format(object.note, object.local?)
end
def url
diff --git a/app/serializers/rest/announcement_serializer.rb b/app/serializers/rest/announcement_serializer.rb
index 9343b97d2..23b2fa514 100644
--- a/app/serializers/rest/announcement_serializer.rb
+++ b/app/serializers/rest/announcement_serializer.rb
@@ -1,6 +1,8 @@
# frozen_string_literal: true
class REST::AnnouncementSerializer < ActiveModel::Serializer
+ include FormattingHelper
+
attributes :id, :content, :starts_at, :ends_at, :all_day,
:published_at, :updated_at
@@ -25,7 +27,7 @@ class REST::AnnouncementSerializer < ActiveModel::Serializer
end
def content
- Formatter.instance.linkify(object.text)
+ linkify(object.text)
end
def reactions
diff --git a/app/serializers/rest/status_edit_serializer.rb b/app/serializers/rest/status_edit_serializer.rb
index 05ccd5e94..f7a48797d 100644
--- a/app/serializers/rest/status_edit_serializer.rb
+++ b/app/serializers/rest/status_edit_serializer.rb
@@ -1,6 +1,8 @@
# frozen_string_literal: true
class REST::StatusEditSerializer < ActiveModel::Serializer
+ include FormattingHelper
+
has_one :account, serializer: REST::AccountSerializer
attributes :content, :spoiler_text, :sensitive, :created_at
@@ -11,7 +13,7 @@ class REST::StatusEditSerializer < ActiveModel::Serializer
attribute :poll, if: -> { object.poll_options.present? }
def content
- Formatter.instance.format(object)
+ status_content_format(object)
end
def poll
diff --git a/app/serializers/rest/status_serializer.rb b/app/serializers/rest/status_serializer.rb
index 7c3dd673e..32c4e405e 100644
--- a/app/serializers/rest/status_serializer.rb
+++ b/app/serializers/rest/status_serializer.rb
@@ -1,6 +1,8 @@
# frozen_string_literal: true
class REST::StatusSerializer < ActiveModel::Serializer
+ include FormattingHelper
+
attributes :id, :created_at, :in_reply_to_id, :in_reply_to_account_id,
:sensitive, :spoiler_text, :visibility, :language,
:uri, :url, :replies_count, :reblogs_count,
@@ -71,7 +73,7 @@ class REST::StatusSerializer < ActiveModel::Serializer
end
def content
- Formatter.instance.format(object)
+ status_content_format(object)
end
def url
diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb
index 239ab9b93..9c8b5ea20 100644
--- a/app/services/fetch_link_card_service.rb
+++ b/app/services/fetch_link_card_service.rb
@@ -134,7 +134,7 @@ class FetchLinkCardService < BaseService
when 'video'
@card.width = embed[:width].presence || 0
@card.height = embed[:height].presence || 0
- @card.html = Formatter.instance.sanitize(embed[:html], Sanitize::Config::MASTODON_OEMBED)
+ @card.html = Sanitize.fragment(embed[:html], Sanitize::Config::MASTODON_OEMBED)
@card.image_remote_url = (url + embed[:thumbnail_url]).to_s if embed[:thumbnail_url].present?
when 'rich'
# Most providers rely on } }
-
- it 'does not include the HTML in the URL' do
- is_expected.to include '"http://example.com/blahblahblahblah/a"'
- end
-
- it 'escapes the HTML' do
- is_expected.to include '<script>alert("Hello")</script>'
- end
- end
-
- context 'given text containing HTML code (script tag)' do
- let(:text) { '' }
-
- it 'escapes the HTML' do
- is_expected.to include '<script>alert("Hello")</script>
'
- end
- end
-
- context 'given text containing HTML (XSS attack)' do
- let(:text) { %q{} }
-
- it 'escapes the HTML' do
- is_expected.to include '<img src="javascript:alert('XSS');">
'
- end
- end
-
- context 'given an invalid URL' do
- let(:text) { 'http://www\.google\.com' }
-
- it 'outputs the raw URL' do
- is_expected.to eq 'http://www\.google\.com
'
- end
- end
-
- context 'given text containing a hashtag' do
- let(:text) { '#hashtag' }
-
- it 'creates a hashtag link' do
- is_expected.to include '/tags/hashtag" class="mention hashtag" rel="tag">#hashtag'
- end
- end
-
- context 'given text containing a hashtag with Unicode chars' do
- let(:text) { '#hashtagタグ' }
-
- it 'creates a hashtag link' do
- is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#hashtagタグ'
- end
- end
-
- context 'given a stand-alone xmpp: URI' do
- let(:text) { 'xmpp:user@instance.com' }
-
- it 'matches the full URI' do
- is_expected.to include 'href="xmpp:user@instance.com"'
- end
- end
-
- context 'given a an xmpp: URI with a query-string' do
- let(:text) { 'please join xmpp:muc@instance.com?join right now' }
-
- it 'matches the full URI' do
- is_expected.to include 'href="xmpp:muc@instance.com?join"'
- end
- end
-
- context 'given text containing a magnet: URI' do
- let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
-
- it 'matches the full URI' do
- is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
- end
- end
- end
-
- describe '#format_spoiler' do
- subject { Formatter.instance.format_spoiler(status) }
-
- context 'given a post containing plain text' do
- let(:status) { Fabricate(:status, text: 'text', spoiler_text: 'Secret!', uri: nil) }
-
- it 'Returns the spoiler text' do
- is_expected.to eq 'Secret!'
- end
- end
-
- context 'given a post with an emoji shortcode at the start' do
- let!(:emoji) { Fabricate(:custom_emoji) }
- let(:status) { Fabricate(:status, text: 'text', spoiler_text: ':coolcat: Secret!', uri: nil) }
- let(:text) { ':coolcat: Beep boop' }
-
- it 'converts the shortcode to an image tag' do
- is_expected.to match(/@alice Hello world'
- end
- end
-
- context 'given a post containing plain text' do
- let(:status) { Fabricate(:status, text: 'text', uri: nil) }
-
- it 'paragraphizes the text' do
- is_expected.to eq 'text
'
- end
- end
-
- context 'given a post containing line feeds' do
- let(:status) { Fabricate(:status, text: "line\nfeed", uri: nil) }
-
- it 'removes line feeds' do
- is_expected.not_to include "\n"
- end
- end
-
- context 'given a post containing linkable mentions' do
- let(:status) { Fabricate(:status, mentions: [ Fabricate(:mention, account: local_account) ], text: '@alice') }
-
- it 'creates a mention link' do
- is_expected.to include '@alice'
- end
- end
-
- context 'given a post containing unlinkable mentions' do
- let(:status) { Fabricate(:status, text: '@alice', uri: nil) }
-
- it 'does not create a mention link' do
- is_expected.to include '@alice'
- end
- end
-
- context do
- subject do
- status = Fabricate(:status, text: text, uri: nil)
- Formatter.instance.format(status)
- end
-
- include_examples 'encode and link URLs'
- end
-
- context 'given a post with custom_emojify option' do
- let!(:emoji) { Fabricate(:custom_emoji) }
- let(:status) { Fabricate(:status, account: local_account, text: text) }
-
- subject { Formatter.instance.format(status, custom_emojify: true) }
-
- context 'given a post with an emoji shortcode at the start' do
- let(:text) { ':coolcat: Beep boop' }
-
- it 'converts the shortcode to an image tag' do
- is_expected.to match(/:coolcat: Beep boop
' }
-
- it 'converts the shortcode to an image tag' do
- is_expected.to match(/
Beep :coolcat: boop
' }
-
- it 'converts the shortcode to an image tag' do
- is_expected.to match(/Beep :coolcat::coolcat:
' }
-
- it 'does not touch the shortcodes' do
- is_expected.to match(/:coolcat::coolcat:<\/p>/)
- end
- end
-
- context 'given a post with an emoji shortcode at the end' do
- let(:text) { '
Beep boop
:coolcat:
' }
-
- it 'converts the shortcode to an image tag' do
- is_expected.to match(/
alert("Hello")' }
-
- it 'strips the scripts' do
- is_expected.to_not include ''
- end
- end
-
- context 'given a post containing malicious classes' do
- let(:text) { 'Show more' }
-
- it 'strips the malicious classes' do
- is_expected.to_not include 'status__content__spoiler-link'
- end
- end
- end
-
- describe '#plaintext' do
- subject { Formatter.instance.plaintext(status) }
-
- context 'given a post with local status' do
- let(:status) { Fabricate(:status, text: 'a text by a nerd who uses an HTML tag in text
', uri: nil) }
-
- it 'returns the raw text' do
- is_expected.to eq 'a text by a nerd who uses an HTML tag in text
'
- end
- end
-
- context 'given a post with remote status' do
- let(:status) { Fabricate(:status, account: remote_account, text: '') }
-
- it 'returns tag-stripped text' do
- is_expected.to eq ''
- end
- end
- end
-
- describe '#simplified_format' do
- subject { Formatter.instance.simplified_format(account) }
-
- context 'given a post with local status' do
- let(:account) { Fabricate(:account, domain: nil, note: text) }
-
- context 'given a post containing linkable mentions for local accounts' do
- let(:text) { '@alice' }
-
- before { local_account }
-
- it 'creates a mention link' do
- is_expected.to eq '@alice
'
- end
- end
-
- context 'given a post containing linkable mentions for remote accounts' do
- let(:text) { '@bob@remote.test' }
-
- before { remote_account }
-
- it 'creates a mention link' do
- is_expected.to eq '@bob
'
- end
- end
-
- context 'given a post containing unlinkable mentions' do
- let(:text) { '@alice' }
-
- it 'does not create a mention link' do
- is_expected.to eq '@alice
'
- end
- end
-
- context 'given a post with custom_emojify option' do
- let!(:emoji) { Fabricate(:custom_emoji) }
-
- before { account.note = text }
- subject { Formatter.instance.simplified_format(account, custom_emojify: true) }
-
- context 'given a post with an emoji shortcode at the start' do
- let(:text) { ':coolcat: Beep boop' }
-
- it 'converts the shortcode to an image tag' do
- is_expected.to match(/alert("Hello")' }
- let(:account) { Fabricate(:account, domain: 'remote', note: text) }
-
- it 'reformats' do
- is_expected.to_not include ''
- end
-
- context 'with custom_emojify option' do
- let!(:emoji) { Fabricate(:custom_emoji, domain: remote_account.domain) }
-
- before { remote_account.note = text }
-
- subject { Formatter.instance.simplified_format(remote_account, custom_emojify: true) }
-
- context 'given a post with an emoji shortcode at the start' do
- let(:text) { '
:coolcat: Beep boop
' }
-
- it 'converts shortcode to image tag' do
- is_expected.to match(/
Beep :coolcat: boop
' }
-
- it 'converts shortcode to image tag' do
- is_expected.to match(/Beep :coolcat::coolcat:' }
-
- it 'does not touch the shortcodes' do
- is_expected.to match(/:coolcat::coolcat:<\/p>/)
- end
- end
-
- context 'given a post with an emoji shortcode at the end' do
- let(:text) { '
Beep boop
:coolcat:
' }
-
- it 'converts shortcode to image tag' do
- is_expected.to match(/
alert("Hello")' }
-
- subject { Formatter.instance.sanitize(html, Sanitize::Config::MASTODON_STRICT) }
-
- it 'sanitizes' do
- is_expected.to eq ''
- end
- end
-end
diff --git a/spec/lib/html_aware_formatter.rb b/spec/lib/html_aware_formatter.rb
new file mode 100644
index 000000000..18d23abf5
--- /dev/null
+++ b/spec/lib/html_aware_formatter.rb
@@ -0,0 +1,44 @@
+require 'rails_helper'
+
+RSpec.describe HtmlAwareFormatter do
+ describe '#to_s' do
+ subject { described_class.new(text, local).to_s }
+
+ context 'when local' do
+ let(:local) { true }
+ let(:text) { 'Foo bar' }
+
+ it 'returns formatted text' do
+ is_expected.to eq 'Foo bar
'
+ end
+ end
+
+ context 'when remote' do
+ let(:local) { false }
+
+ context 'given plain text' do
+ let(:text) { 'Beep boop' }
+
+ it 'keeps the plain text' do
+ is_expected.to include 'Beep boop'
+ end
+ end
+
+ context 'given text containing script tags' do
+ let(:text) { '' }
+
+ it 'strips the scripts' do
+ is_expected.to_not include ''
+ end
+ end
+
+ context 'given text containing malicious classes' do
+ let(:text) { 'Show more' }
+
+ it 'strips the malicious classes' do
+ is_expected.to_not include 'status__content__spoiler-link'
+ end
+ end
+ end
+ end
+end
diff --git a/spec/lib/plain_text_formatter_spec.rb b/spec/lib/plain_text_formatter_spec.rb
new file mode 100644
index 000000000..c3d0ee630
--- /dev/null
+++ b/spec/lib/plain_text_formatter_spec.rb
@@ -0,0 +1,24 @@
+require 'rails_helper'
+
+RSpec.describe PlainTextFormatter do
+ describe '#to_s' do
+ subject { described_class.new(status.text, status.local?).to_s }
+
+ context 'given a post with local status' do
+ let(:status) { Fabricate(:status, text: 'a text by a nerd who uses an HTML tag in text
', uri: nil) }
+
+ it 'returns the raw text' do
+ is_expected.to eq 'a text by a nerd who uses an HTML tag in text
'
+ end
+ end
+
+ context 'given a post with remote status' do
+ let(:remote_account) { Fabricate(:account, domain: 'remote.test', username: 'bob', url: 'https://remote.test/') }
+ let(:status) { Fabricate(:status, account: remote_account, text: 'Hello
') }
+
+ it 'returns tag-stripped text' do
+ is_expected.to eq 'Hello'
+ end
+ end
+ end
+end
diff --git a/spec/lib/text_formatter_spec.rb b/spec/lib/text_formatter_spec.rb
new file mode 100644
index 000000000..52a9d2498
--- /dev/null
+++ b/spec/lib/text_formatter_spec.rb
@@ -0,0 +1,313 @@
+require 'rails_helper'
+
+RSpec.describe TextFormatter do
+ describe '#to_s' do
+ let(:preloaded_accounts) { nil }
+
+ subject { described_class.new(text, preloaded_accounts: preloaded_accounts).to_s }
+
+ context 'given text containing plain text' do
+ let(:text) { 'text' }
+
+ it 'paragraphizes the text' do
+ is_expected.to eq 'text
'
+ end
+ end
+
+ context 'given text containing line feeds' do
+ let(:text) { "line\nfeed" }
+
+ it 'removes line feeds' do
+ is_expected.not_to include "\n"
+ end
+ end
+
+ context 'given text containing linkable mentions' do
+ let(:preloaded_accounts) { [Fabricate(:account, username: 'alice')] }
+ let(:text) { '@alice' }
+
+ it 'creates a mention link' do
+ is_expected.to include '@alice'
+ end
+ end
+
+ context 'given text containing unlinkable mentions' do
+ let(:preloaded_accounts) { [] }
+ let(:text) { '@alice' }
+
+ it 'does not create a mention link' do
+ is_expected.to include '@alice'
+ end
+ end
+
+ context 'given a stand-alone medium URL' do
+ let(:text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"'
+ end
+ end
+
+ context 'given a stand-alone google URL' do
+ let(:text) { 'http://google.com' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="http://google.com"'
+ end
+ end
+
+ context 'given a stand-alone URL with a newer TLD' do
+ let(:text) { 'http://example.gay' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="http://example.gay"'
+ end
+ end
+
+ context 'given a stand-alone IDN URL' do
+ let(:text) { 'https://nic.みんな/' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://nic.みんな/"'
+ end
+
+ it 'has display URL' do
+ is_expected.to include 'nic.みんな/'
+ end
+ end
+
+ context 'given a URL with a trailing period' do
+ let(:text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' }
+
+ it 'matches the full URL but not the period' do
+ is_expected.to include 'href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"'
+ end
+ end
+
+ context 'given a URL enclosed with parentheses' do
+ let(:text) { '(http://google.com/)' }
+
+ it 'matches the full URL but not the parentheses' do
+ is_expected.to include 'href="http://google.com/"'
+ end
+ end
+
+ context 'given a URL with a trailing exclamation point' do
+ let(:text) { 'http://www.google.com!' }
+
+ it 'matches the full URL but not the exclamation point' do
+ is_expected.to include 'href="http://www.google.com"'
+ end
+ end
+
+ context 'given a URL with a trailing single quote' do
+ let(:text) { "http://www.google.com'" }
+
+ it 'matches the full URL but not the single quote' do
+ is_expected.to include 'href="http://www.google.com"'
+ end
+ end
+
+ context 'given a URL with a trailing angle bracket' do
+ let(:text) { 'http://www.google.com>' }
+
+ it 'matches the full URL but not the angle bracket' do
+ is_expected.to include 'href="http://www.google.com"'
+ end
+ end
+
+ context 'given a URL with a query string' do
+ context 'with escaped unicode character' do
+ let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink"'
+ end
+ end
+
+ context 'with unicode character' do
+ let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓&q=autolink' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓&q=autolink"'
+ end
+ end
+
+ context 'with unicode character at the end' do
+ let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓"'
+ end
+ end
+
+ context 'with escaped and not escaped unicode characters' do
+ let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink' }
+
+ it 'preserves escaped unicode characters' do
+ is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink"'
+ end
+ end
+ end
+
+ context 'given a URL with parentheses in it' do
+ let(:text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://en.wikipedia.org/wiki/Diaspora_(software)"'
+ end
+ end
+
+ context 'given a URL in quotation marks' do
+ let(:text) { '"https://example.com/"' }
+
+ it 'does not match the quotation marks' do
+ is_expected.to include 'href="https://example.com/"'
+ end
+ end
+
+ context 'given a URL in angle brackets' do
+ let(:text) { '' }
+
+ it 'does not match the angle brackets' do
+ is_expected.to include 'href="https://example.com/"'
+ end
+ end
+
+ context 'given a URL with Japanese path string' do
+ let(:text) { 'https://ja.wikipedia.org/wiki/日本' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://ja.wikipedia.org/wiki/日本"'
+ end
+ end
+
+ context 'given a URL with Korean path string' do
+ let(:text) { 'https://ko.wikipedia.org/wiki/대한민국' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://ko.wikipedia.org/wiki/대한민국"'
+ end
+ end
+
+ context 'given a URL with a full-width space' do
+ let(:text) { 'https://example.com/ abc123' }
+
+ it 'does not match the full-width space' do
+ is_expected.to include 'href="https://example.com/"'
+ end
+ end
+
+ context 'given a URL in Japanese quotation marks' do
+ let(:text) { '「[https://example.org/」' }
+
+ it 'does not match the quotation marks' do
+ is_expected.to include 'href="https://example.org/"'
+ end
+ end
+
+ context 'given a URL with Simplified Chinese path string' do
+ let(:text) { 'https://baike.baidu.com/item/中华人民共和国' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://baike.baidu.com/item/中华人民共和国"'
+ end
+ end
+
+ context 'given a URL with Traditional Chinese path string' do
+ let(:text) { 'https://zh.wikipedia.org/wiki/臺灣' }
+
+ it 'matches the full URL' do
+ is_expected.to include 'href="https://zh.wikipedia.org/wiki/臺灣"'
+ end
+ end
+
+ context 'given a URL containing unsafe code (XSS attack, visible part)' do
+ let(:text) { %q{http://example.com/bb} }
+
+ it 'does not include the HTML in the URL' do
+ is_expected.to include '"http://example.com/b"'
+ end
+
+ it 'escapes the HTML' do
+ is_expected.to include '<del>b</del>'
+ end
+ end
+
+ context 'given a URL containing unsafe code (XSS attack, invisible part)' do
+ let(:text) { %q{http://example.com/blahblahblahblah/a} }
+
+ it 'does not include the HTML in the URL' do
+ is_expected.to include '"http://example.com/blahblahblahblah/a"'
+ end
+
+ it 'escapes the HTML' do
+ is_expected.to include '<script>alert("Hello")</script>'
+ end
+ end
+
+ context 'given text containing HTML code (script tag)' do
+ let(:text) { '' }
+
+ it 'escapes the HTML' do
+ is_expected.to include '<script>alert("Hello")</script>
'
+ end
+ end
+
+ context 'given text containing HTML (XSS attack)' do
+ let(:text) { %q{} }
+
+ it 'escapes the HTML' do
+ is_expected.to include '<img src="javascript:alert('XSS');">
'
+ end
+ end
+
+ context 'given an invalid URL' do
+ let(:text) { 'http://www\.google\.com' }
+
+ it 'outputs the raw URL' do
+ is_expected.to eq 'http://www\.google\.com
'
+ end
+ end
+
+ context 'given text containing a hashtag' do
+ let(:text) { '#hashtag' }
+
+ it 'creates a hashtag link' do
+ is_expected.to include '/tags/hashtag" class="mention hashtag" rel="tag">#hashtag'
+ end
+ end
+
+ context 'given text containing a hashtag with Unicode chars' do
+ let(:text) { '#hashtagタグ' }
+
+ it 'creates a hashtag link' do
+ is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#hashtagタグ'
+ end
+ end
+
+ context 'given text with a stand-alone xmpp: URI' do
+ let(:text) { 'xmpp:user@instance.com' }
+
+ it 'matches the full URI' do
+ is_expected.to include 'href="xmpp:user@instance.com"'
+ end
+ end
+
+ context 'given text with an xmpp: URI with a query-string' do
+ let(:text) { 'please join xmpp:muc@instance.com?join right now' }
+
+ it 'matches the full URI' do
+ is_expected.to include 'href="xmpp:muc@instance.com?join"'
+ end
+ end
+
+ context 'given text containing a magnet: URI' do
+ let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
+
+ it 'matches the full URI' do
+ is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
+ end
+ end
+ end
+end