Add support for linking XMPP URIs in toots (#12709)
* Fix wrong grouping in Twitter valid_url regex * Add support for xmpp URIs Fixes #9776 The difficult part is autolinking, because Twitter-text's extractor does some pretty ad-hoc stuff to find things that “look like” URLs, and XMPP URIs do not really match the assumptions of that lib, so it doesn't sound wise to try to shoehorn it into the existing regex. This is why I used a specific regex (very close, although slightly more permissive than the RFC), and a specific scan function (a simplified version of the generalized one from Twitter). * Remove leading “xmpp:” from auto-linked textgh/stable
parent
e9ea09d173
commit
ea436b355b
|
@ -245,8 +245,9 @@ class Formatter
|
||||||
end
|
end
|
||||||
|
|
||||||
standard = Extractor.extract_entities_with_indices(text, options)
|
standard = Extractor.extract_entities_with_indices(text, options)
|
||||||
|
xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
|
||||||
|
|
||||||
Extractor.remove_overlapping_entities(special + standard)
|
Extractor.remove_overlapping_entities(special + standard + xmpp)
|
||||||
end
|
end
|
||||||
|
|
||||||
def link_to_url(entity, options = {})
|
def link_to_url(entity, options = {})
|
||||||
|
@ -284,7 +285,7 @@ class Formatter
|
||||||
|
|
||||||
def link_html(url)
|
def link_html(url)
|
||||||
url = Addressable::URI.parse(url).to_s
|
url = Addressable::URI.parse(url).to_s
|
||||||
prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s
|
prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s
|
||||||
text = url[prefix.length, 30]
|
text = url[prefix.length, 30]
|
||||||
suffix = url[prefix.length + 30..-1]
|
suffix = url[prefix.length + 30..-1]
|
||||||
cutoff = url[prefix.length..-1].length > 30
|
cutoff = url[prefix.length..-1].length > 30
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
class Sanitize
|
class Sanitize
|
||||||
module Config
|
module Config
|
||||||
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', :relative].freeze
|
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze
|
||||||
|
|
||||||
CLASS_WHITELIST_TRANSFORMER = lambda do |env|
|
CLASS_WHITELIST_TRANSFORMER = lambda do |env|
|
||||||
node = env[:node]
|
node = env[:node]
|
||||||
|
|
|
@ -29,7 +29,7 @@ module Twitter
|
||||||
( # $1 total match
|
( # $1 total match
|
||||||
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
|
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
|
||||||
( # $3 URL
|
( # $3 URL
|
||||||
((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
|
((?:https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
|
||||||
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
||||||
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
|
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
|
||||||
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
|
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
|
||||||
|
@ -37,5 +37,54 @@ module Twitter
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
}iox
|
}iox
|
||||||
|
REGEXEN[:validate_nodeid] = /(?:
|
||||||
|
#{REGEXEN[:validate_url_unreserved]}|
|
||||||
|
#{REGEXEN[:validate_url_pct_encoded]}|
|
||||||
|
[!$()*+,;=]
|
||||||
|
)/iox
|
||||||
|
REGEXEN[:validate_resid] = /(?:
|
||||||
|
#{REGEXEN[:validate_url_unreserved]}|
|
||||||
|
#{REGEXEN[:validate_url_pct_encoded]}|
|
||||||
|
#{REGEXEN[:validate_url_sub_delims]}
|
||||||
|
)/iox
|
||||||
|
REGEXEN[:valid_xmpp_uri] = %r{
|
||||||
|
( # $1 total match
|
||||||
|
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
|
||||||
|
( # $3 URL
|
||||||
|
((?:xmpp):) # $4 Protocol
|
||||||
|
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional)
|
||||||
|
(#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional)
|
||||||
|
(#{REGEXEN[:valid_domain]}) # $7 Domain in path
|
||||||
|
(/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional)
|
||||||
|
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}iox
|
||||||
|
end
|
||||||
|
|
||||||
|
module Extractor
|
||||||
|
# Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
|
||||||
|
# with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
|
||||||
|
# XMPP URIs an empty array will be returned.
|
||||||
|
#
|
||||||
|
# If a block is given then it will be called for each XMPP URI.
|
||||||
|
def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
|
||||||
|
return [] unless text && text.index(":")
|
||||||
|
urls = []
|
||||||
|
|
||||||
|
text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
|
||||||
|
valid_uri_match_data = $~
|
||||||
|
|
||||||
|
start_position = valid_uri_match_data.char_begin(3)
|
||||||
|
end_position = valid_uri_match_data.char_end(3)
|
||||||
|
|
||||||
|
urls << {
|
||||||
|
:url => valid_uri_match_data[3],
|
||||||
|
:indices => [start_position, end_position]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
|
||||||
|
urls
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -242,6 +242,22 @@ RSpec.describe Formatter do
|
||||||
is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
|
is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'given a stand-alone xmpp: URI' do
|
||||||
|
let(:text) { 'xmpp:user@instance.com' }
|
||||||
|
|
||||||
|
it 'matches the full URI' do
|
||||||
|
is_expected.to include 'href="xmpp:user@instance.com"'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'given a an xmpp: URI with a query-string' do
|
||||||
|
let(:text) { 'please join xmpp:muc@instance.com?join right now' }
|
||||||
|
|
||||||
|
it 'matches the full URI' do
|
||||||
|
is_expected.to include 'href="xmpp:muc@instance.com?join"'
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '#format_spoiler' do
|
describe '#format_spoiler' do
|
||||||
|
|
Reference in New Issue