support-ideographic-space-separator.patch

Go MAEDA, 2022-11-02 07:59

Download (1.43 KB)

View differences:

lib/redmine/search.rb
135 135
      def tokens
136 136
        # extract tokens from the question
137 137
        # eg. hello "bye bye" => ["hello", "bye bye"]
138
        tokens = @question.scan(%r{((\s|^)"[^"]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')}
138
        tokens = @question.scan(%r{(([[:space:]]|^)"[^"]+"([[:space:]]|$)|[[:^space:]]+)}).collect {|m| m.first.gsub(%r{(^[[:space:]]*"[[:space:]]*|[[:space:]]*"[[:space:]]*$)}, '')}
139 139
        # tokens must be at least 2 characters long
140 140
        # but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
141 141
        # no more than 5 tokens to search for
test/unit/lib/redmine/search_test.rb
24 24
    value = "hello \"bye bye\""
25 25
    assert_equal ["hello", "bye bye"], Redmine::Search::Tokenizer.new(value).tokens
26 26
  end
27

  
28
  def test_tokenize_should_consider_ideographic_space_as_separator
29
    value = "全角 スペース"  # The space character is U+3000, not U+0020
30
    assert_equal ["全角", "スペース"], Redmine::Search::Tokenizer.new(value).tokens
31
  end
27 32
end