fix-tokenization-for-phrases-with-non-ascii-characte.patch

Jens Krämer, 2015-09-13 05:53

Download (1.39 KB)

View differences:

lib/redmine/search.rb
58 58

  
59 59
        # extract tokens from the question
60 60
        # eg. hello "bye bye" => ["hello", "bye bye"]
61
        @tokens = @question.scan(%r{((\s|^)"[\s\w]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')}
61
        @tokens = @question.scan(%r{((\s|^)"[^"]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')}
62 62
        # tokens must be at least 2 characters long
63 63
        @tokens = @tokens.uniq.select {|w| w.length > 1 }
64 64
        # no more than 5 tokens to search for
test/unit/search_test.rb
190 190
    Redmine::Database.reset
191 191
  end
192 192

  
193
  def test_fetcher_should_handle_accents_in_phrases
194
    f = Redmine::Search::Fetcher.new('No special chars "in a phrase"', User.anonymous, %w(issues), Project.all)
195
    assert_equal ['No', 'special', 'chars', 'in a phrase'], f.tokens
196

  
197
    f = Redmine::Search::Fetcher.new('Special chars "in a phrase Öö"', User.anonymous, %w(issues), Project.all)
198
    assert_equal ['Special', 'chars', 'in a phrase Öö'], f.tokens
199
  end
200

  
193 201
  private
194 202

  
195 203
  def remove_permission(role, permission)