Fix unmatched quotes and prefixes causing search to fail (#26701)
parent
872145d1c2
commit
e754083e8a
|
@ -6,10 +6,10 @@ class SearchQueryParser < Parslet::Parser
|
||||||
rule(:colon) { str(':') }
|
rule(:colon) { str(':') }
|
||||||
rule(:space) { match('\s').repeat(1) }
|
rule(:space) { match('\s').repeat(1) }
|
||||||
rule(:operator) { (str('+') | str('-')).as(:operator) }
|
rule(:operator) { (str('+') | str('-')).as(:operator) }
|
||||||
rule(:prefix) { (term >> colon).as(:prefix) }
|
rule(:prefix) { term >> colon }
|
||||||
rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) }
|
rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) }
|
||||||
rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) }
|
rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) }
|
||||||
rule(:clause) { (operator.maybe >> prefix.maybe >> (phrase | term | shortcode)).as(:clause) }
|
rule(:clause) { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term | shortcode)).as(:clause) | prefix.as(:clause) | quote.as(:junk) }
|
||||||
rule(:query) { (clause >> space.maybe).repeat.as(:query) }
|
rule(:query) { (clause >> space.maybe).repeat.as(:query) }
|
||||||
root(:query)
|
root(:query)
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,50 +1,32 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class SearchQueryTransformer < Parslet::Transform
|
class SearchQueryTransformer < Parslet::Transform
|
||||||
|
SUPPORTED_PREFIXES = %w(
|
||||||
|
has
|
||||||
|
is
|
||||||
|
language
|
||||||
|
from
|
||||||
|
before
|
||||||
|
after
|
||||||
|
during
|
||||||
|
).freeze
|
||||||
|
|
||||||
class Query
|
class Query
|
||||||
attr_reader :should_clauses, :must_not_clauses, :must_clauses, :filter_clauses
|
attr_reader :must_not_clauses, :must_clauses, :filter_clauses
|
||||||
|
|
||||||
def initialize(clauses)
|
def initialize(clauses)
|
||||||
grouped = clauses.chunk(&:operator).to_h
|
grouped = clauses.compact.chunk(&:operator).to_h
|
||||||
@should_clauses = grouped.fetch(:should, [])
|
|
||||||
@must_not_clauses = grouped.fetch(:must_not, [])
|
@must_not_clauses = grouped.fetch(:must_not, [])
|
||||||
@must_clauses = grouped.fetch(:must, [])
|
@must_clauses = grouped.fetch(:must, [])
|
||||||
@filter_clauses = grouped.fetch(:filter, [])
|
@filter_clauses = grouped.fetch(:filter, [])
|
||||||
end
|
end
|
||||||
|
|
||||||
def apply(search)
|
def apply(search)
|
||||||
should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) }
|
must_clauses.each { |clause| search = search.query.must(clause.to_query) }
|
||||||
must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) }
|
must_not_clauses.each { |clause| search = search.query.must_not(clause.to_query) }
|
||||||
must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) }
|
filter_clauses.each { |clause| search = search.filter(**clause.to_query) }
|
||||||
filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause)) }
|
|
||||||
search.query.minimum_should_match(1)
|
search.query.minimum_should_match(1)
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def clause_to_query(clause)
|
|
||||||
case clause
|
|
||||||
when TermClause
|
|
||||||
{ multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'], operator: 'and' } }
|
|
||||||
when PhraseClause
|
|
||||||
{ match_phrase: { text: { query: clause.phrase } } }
|
|
||||||
else
|
|
||||||
raise "Unexpected clause type: #{clause}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def clause_to_filter(clause)
|
|
||||||
case clause
|
|
||||||
when PrefixClause
|
|
||||||
if clause.negated?
|
|
||||||
{ bool: { must_not: { clause.type => { clause.filter => clause.term } } } }
|
|
||||||
else
|
|
||||||
{ clause.type => { clause.filter => clause.term } }
|
|
||||||
end
|
|
||||||
else
|
|
||||||
raise "Unexpected clause type: #{clause}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
class Operator
|
class Operator
|
||||||
|
@ -63,31 +45,38 @@ class SearchQueryTransformer < Parslet::Transform
|
||||||
end
|
end
|
||||||
|
|
||||||
class TermClause
|
class TermClause
|
||||||
attr_reader :prefix, :operator, :term
|
attr_reader :operator, :term
|
||||||
|
|
||||||
def initialize(prefix, operator, term)
|
def initialize(operator, term)
|
||||||
@prefix = prefix
|
|
||||||
@operator = Operator.symbol(operator)
|
@operator = Operator.symbol(operator)
|
||||||
@term = term
|
@term = term
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def to_query
|
||||||
|
{ multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } }
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class PhraseClause
|
class PhraseClause
|
||||||
attr_reader :prefix, :operator, :phrase
|
attr_reader :operator, :phrase
|
||||||
|
|
||||||
def initialize(prefix, operator, phrase)
|
def initialize(operator, phrase)
|
||||||
@prefix = prefix
|
|
||||||
@operator = Operator.symbol(operator)
|
@operator = Operator.symbol(operator)
|
||||||
@phrase = phrase
|
@phrase = phrase
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def to_query
|
||||||
|
{ match_phrase: { text: { query: @phrase } } }
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class PrefixClause
|
class PrefixClause
|
||||||
attr_reader :type, :filter, :operator, :term
|
attr_reader :operator, :prefix, :term
|
||||||
|
|
||||||
def initialize(prefix, operator, term, options = {})
|
def initialize(prefix, operator, term, options = {})
|
||||||
@negated = operator == '-'
|
@prefix = prefix
|
||||||
@options = options
|
@negated = operator == '-'
|
||||||
|
@options = options
|
||||||
@operator = :filter
|
@operator = :filter
|
||||||
|
|
||||||
case prefix
|
case prefix
|
||||||
|
@ -116,12 +105,16 @@ class SearchQueryTransformer < Parslet::Transform
|
||||||
@type = :range
|
@type = :range
|
||||||
@term = { gte: term, lte: term, time_zone: @options[:current_account]&.user_time_zone || 'UTC' }
|
@term = { gte: term, lte: term, time_zone: @options[:current_account]&.user_time_zone || 'UTC' }
|
||||||
else
|
else
|
||||||
raise Mastodon::SyntaxError
|
raise "Unknown prefix: #{prefix}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def negated?
|
def to_query
|
||||||
@negated
|
if @negated
|
||||||
|
{ bool: { must_not: { @type => { @filter => @term } } } }
|
||||||
|
else
|
||||||
|
{ @type => { @filter => @term } }
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
@ -159,18 +152,26 @@ class SearchQueryTransformer < Parslet::Transform
|
||||||
prefix = clause[:prefix][:term].to_s if clause[:prefix]
|
prefix = clause[:prefix][:term].to_s if clause[:prefix]
|
||||||
operator = clause[:operator]&.to_s
|
operator = clause[:operator]&.to_s
|
||||||
|
|
||||||
if clause[:prefix]
|
if clause[:prefix] && SUPPORTED_PREFIXES.include?(prefix)
|
||||||
PrefixClause.new(prefix, operator, clause[:term].to_s, current_account: current_account)
|
PrefixClause.new(prefix, operator, clause[:term].to_s, current_account: current_account)
|
||||||
|
elsif clause[:prefix]
|
||||||
|
TermClause.new(operator, "#{prefix} #{clause[:term]}")
|
||||||
elsif clause[:term]
|
elsif clause[:term]
|
||||||
TermClause.new(prefix, operator, clause[:term].to_s)
|
TermClause.new(operator, clause[:term].to_s)
|
||||||
elsif clause[:shortcode]
|
elsif clause[:shortcode]
|
||||||
TermClause.new(prefix, operator, ":#{clause[:term]}:")
|
TermClause.new(operator, ":#{clause[:term]}:")
|
||||||
elsif clause[:phrase]
|
elsif clause[:phrase]
|
||||||
PhraseClause.new(prefix, operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s)
|
PhraseClause.new(operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s)
|
||||||
else
|
else
|
||||||
raise "Unexpected clause type: #{clause}"
|
raise "Unexpected clause type: #{clause}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
rule(query: sequence(:clauses)) { Query.new(clauses) }
|
rule(junk: subtree(:junk)) do
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
rule(query: sequence(:clauses)) do
|
||||||
|
Query.new(clauses)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require 'rails_helper'
|
||||||
|
require 'parslet/rig/rspec'
|
||||||
|
|
||||||
|
describe SearchQueryParser do
|
||||||
|
let(:parser) { described_class.new }
|
||||||
|
|
||||||
|
context 'with term' do
|
||||||
|
it 'consumes "hello"' do
|
||||||
|
expect(parser.term).to parse('hello')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with prefix' do
|
||||||
|
it 'consumes "foo:"' do
|
||||||
|
expect(parser.prefix).to parse('foo:')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with operator' do
|
||||||
|
it 'consumes "+"' do
|
||||||
|
expect(parser.operator).to parse('+')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes "-"' do
|
||||||
|
expect(parser.operator).to parse('-')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with shortcode' do
|
||||||
|
it 'consumes ":foo:"' do
|
||||||
|
expect(parser.shortcode).to parse(':foo:')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with phrase' do
|
||||||
|
it 'consumes "hello world"' do
|
||||||
|
expect(parser.phrase).to parse('"hello world"')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with clause' do
|
||||||
|
it 'consumes "foo"' do
|
||||||
|
expect(parser.clause).to parse('foo')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes "-foo"' do
|
||||||
|
expect(parser.clause).to parse('-foo')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes "foo:bar"' do
|
||||||
|
expect(parser.clause).to parse('foo:bar')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes "-foo:bar"' do
|
||||||
|
expect(parser.clause).to parse('-foo:bar')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes \'foo:"hello world"\'' do
|
||||||
|
expect(parser.clause).to parse('foo:"hello world"')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes \'-foo:"hello world"\'' do
|
||||||
|
expect(parser.clause).to parse('-foo:"hello world"')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes "foo:"' do
|
||||||
|
expect(parser.clause).to parse('foo:')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes \'"\'' do
|
||||||
|
expect(parser.clause).to parse('"')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with query' do
|
||||||
|
it 'consumes "hello -world"' do
|
||||||
|
expect(parser.query).to parse('hello -world')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes \'foo "hello world"\'' do
|
||||||
|
expect(parser.query).to parse('foo "hello world"')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes "foo:bar hello"' do
|
||||||
|
expect(parser.query).to parse('foo:bar hello')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes \'"hello" world "\'' do
|
||||||
|
expect(parser.query).to parse('"hello" world "')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'consumes "foo:bar bar: hello"' do
|
||||||
|
expect(parser.query).to parse('foo:bar bar: hello')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -3,16 +3,57 @@
|
||||||
require 'rails_helper'
|
require 'rails_helper'
|
||||||
|
|
||||||
describe SearchQueryTransformer do
|
describe SearchQueryTransformer do
|
||||||
describe 'initialization' do
|
subject { described_class.new.apply(parser, current_account: nil) }
|
||||||
let(:parser) { SearchQueryParser.new.parse('query') }
|
|
||||||
|
|
||||||
it 'sets attributes' do
|
let(:parser) { SearchQueryParser.new.parse(query) }
|
||||||
transformer = described_class.new.apply(parser)
|
|
||||||
|
|
||||||
expect(transformer.should_clauses.first).to be_nil
|
context 'with "hello world"' do
|
||||||
expect(transformer.must_clauses.first).to be_a(SearchQueryTransformer::TermClause)
|
let(:query) { 'hello world' }
|
||||||
expect(transformer.must_not_clauses.first).to be_nil
|
|
||||||
expect(transformer.filter_clauses.first).to be_nil
|
it 'transforms clauses' do
|
||||||
|
expect(subject.must_clauses.map(&:term)).to match_array %w(hello world)
|
||||||
|
expect(subject.must_not_clauses).to be_empty
|
||||||
|
expect(subject.filter_clauses).to be_empty
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with "hello -world"' do
|
||||||
|
let(:query) { 'hello -world' }
|
||||||
|
|
||||||
|
it 'transforms clauses' do
|
||||||
|
expect(subject.must_clauses.map(&:term)).to match_array %w(hello)
|
||||||
|
expect(subject.must_not_clauses.map(&:term)).to match_array %w(world)
|
||||||
|
expect(subject.filter_clauses).to be_empty
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with "hello is:reply"' do
|
||||||
|
let(:query) { 'hello is:reply' }
|
||||||
|
|
||||||
|
it 'transforms clauses' do
|
||||||
|
expect(subject.must_clauses.map(&:term)).to match_array %w(hello)
|
||||||
|
expect(subject.must_not_clauses).to be_empty
|
||||||
|
expect(subject.filter_clauses.map(&:term)).to match_array %w(reply)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with "foo: bar"' do
|
||||||
|
let(:query) { 'foo: bar' }
|
||||||
|
|
||||||
|
it 'transforms clauses' do
|
||||||
|
expect(subject.must_clauses.map(&:term)).to match_array %w(foo bar)
|
||||||
|
expect(subject.must_not_clauses).to be_empty
|
||||||
|
expect(subject.filter_clauses).to be_empty
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with "foo:bar"' do
|
||||||
|
let(:query) { 'foo:bar' }
|
||||||
|
|
||||||
|
it 'transforms clauses' do
|
||||||
|
expect(subject.must_clauses.map(&:term)).to contain_exactly('foo bar')
|
||||||
|
expect(subject.must_not_clauses).to be_empty
|
||||||
|
expect(subject.filter_clauses).to be_empty
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Reference in New Issue