0001-tokenize-query-strings-for-Issue.like-and-Query-sql_.patch

Jens Krämer, 2021-07-07 08:42

Download (3.93 KB)

View differences:

app/models/issue.rb
100 100
    ids.any? ? where(:assigned_to_id => ids) : none
101 101
  end)
102 102
  scope :like, (lambda do |q|
103
    q = q.to_s
104 103
    if q.present?
105
      where("LOWER(#{table_name}.subject) LIKE LOWER(?)", "%#{sanitize_sql_like q}%")
104
      where(*::Query.tokenized_like_conditions("#{table_name}.subject", q))
106 105
    end
107 106
  end)
108 107

  
app/models/query.rb
1440 1440
    prefix = suffix = nil
1441 1441
    prefix = '%' if options[:ends_with]
1442 1442
    suffix = '%' if options[:starts_with]
1443
    prefix = suffix = '%' if prefix.nil? && suffix.nil?
1444
    value = queried_class.sanitize_sql_like value
1445
    queried_class.sanitize_sql_for_conditions([
1446
      Redmine::Database.like(db_field, '?', :match => options[:match]),
1447
      "#{prefix}#{value}#{suffix}"
1448
    ])
1443
    if prefix || suffix
1444
      value = queried_class.sanitize_sql_like value
1445
      queried_class.sanitize_sql_for_conditions([
1446
        Redmine::Database.like(db_field, '?', :match => options[:match]),
1447
        "#{prefix}#{value}#{suffix}"
1448
      ])
1449
    else
1450
      queried_class.sanitize_sql_for_conditions(
1451
        ::Query.tokenized_like_conditions(db_field, value, **options)
1452
      )
1453
    end
1454
  end
1455

  
1456
  def self.tokenized_like_conditions(db_field, value, **options)
1457
    tokens = Redmine::Search::Tokenizer.new(value).tokens
1458
    tokens = [value] unless tokens.present?
1459
    sql, values = tokens.map do |token|
1460
      [Redmine::Database.like(db_field, '?', options), "%#{sanitize_sql_like token}%"]
1461
    end.transpose
1462
    [sql.join(" AND "), *values]
1449 1463
  end
1450 1464

  
1451 1465
  # Adds a filter for the given custom field
lib/redmine/search.rb
57 57
        @projects = projects
58 58
        @cache = options.delete(:cache)
59 59
        @options = options
60

  
61
        # extract tokens from the question
62
        # eg. hello "bye bye" => ["hello", "bye bye"]
63
        @tokens = @question.scan(%r{((\s|^)"[^"]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')}
64
        # tokens must be at least 2 characters long
65
        # but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
66
        @tokens = @tokens.uniq.select {|w| w.length > 1 || w =~ /\p{Han}/}
67
        # no more than 5 tokens to search for
68
        @tokens.slice! 5..-1
60
        @tokens = Tokenizer.new(@question).tokens
69 61
      end
70 62

  
71 63
      # Returns the total result count
......
135 127
      end
136 128
    end
137 129

  
130
    class Tokenizer
131
      def initialize(question)
132
        @question = question.to_s
133
      end
134

  
135
      def tokens
136
        # extract tokens from the question
137
        # eg. hello "bye bye" => ["hello", "bye bye"]
138
        tokens = @question.scan(%r{((\s|^)"[^"]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')}
139
        # tokens must be at least 2 characters long
140
        # but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
141
        # no more than 5 tokens to search for
142
        tokens.uniq.select{|w| w.length > 1 || w =~ /\p{Han}/}.first 5
143
      end
144
    end
145

  
138 146
    module Controller
139 147
      def self.included(base)
140 148
        base.extend(ClassMethods)
141
-