Defect #41464 » fix-guess_encoding.patch
| app/models/import.rb | ||
|---|---|---|
| 69 | 69 |
encoding = lu(user, :general_csv_encoding) |
| 70 | 70 |
if file_exists? |
| 71 | 71 |
begin |
| 72 |
content = File.read(filepath, 256)
|
|
| 72 |
content = File.read(filepath, 4.kilobytes)
|
|
| 73 | 73 | |
| 74 | 74 |
separator = [',', ';'].max_by {|sep| content.count(sep)}
|
| 75 | 75 |
wrapper = ['"', "'"].max_by {|quote_char| content.count(quote_char)}
|
| lib/redmine/codeset_util.rb | ||
|---|---|---|
| 96 | 96 |
return if str.nil? |
| 97 | 97 | |
| 98 | 98 |
str = str.dup |
| 99 |
# Truncate the data at the last LF character to ensure that a partial |
|
| 100 |
# multibyte character, which could cause `String#valid_encoding?` to |
|
| 101 |
# return false, is not included at the end of the data. |
|
| 102 |
last_lf_index = str.rindex("\n")
|
|
| 103 |
str = str[..last_lf_index] if last_lf_index.to_i >= 64 |
|
| 104 | ||
| 99 | 105 |
encodings = Setting.repositories_encodings.split(',').collect(&:strip)
|
| 100 | 106 |
encodings = encodings.presence || ['UTF-8'] |
| 101 | 107 | |
| test/unit/lib/redmine/codeset_util_test.rb | ||
|---|---|---|
| 118 | 118 |
assert_nil Redmine::CodesetUtil.guess_encoding(str) |
| 119 | 119 |
end |
| 120 | 120 |
end |
| 121 | ||
| 122 |
def test_guess_encoding_handles_trailing_partial_multibyte_character |
|
| 123 |
str = <<~STR |
|
| 124 |
いろはにほへと ちりぬるを |
|
| 125 |
わかよたれそ つねならむ |
|
| 126 |
うゐのおくやま けふこえて |
|
| 127 |
あさきゆめみし ゑひもせす |
|
| 128 |
色は匂へど 散りぬるを |
|
| 129 |
我が世誰ぞ 常ならむ |
|
| 130 |
有為の奥山 今日越えて |
|
| 131 |
浅き夢見し 酔ひもせず |
|
| 132 |
STR |
|
| 133 | ||
| 134 |
# UTF-8 string truncated at an incomplete character boundary |
|
| 135 |
# str.byteslice(0, 256) => "いろは...\n浅き夢見\xE3\x81" |
|
| 136 |
# "\xE3\x81" is a part of "し" ("\xE3\x81\x97")
|
|
| 137 |
str_with_partial_char = str.byteslice(0, 256) |
|
| 138 |
assert_not str_with_partial_char.valid_encoding? |
|
| 139 |
assert_equal 'UTF-8', Redmine::CodesetUtil.guess_encoding(str_with_partial_char) |
|
| 140 |
end |
|
| 121 | 141 |
end |