Project

General

Profile

Defect #16859 » encodings.rb

Andrey Ni, 2015-01-30 08:45

 
1
# encoding: utf-8
2

    
3
module Mail
4
  # Raised when attempting to decode an unknown encoding type
5
  class UnknownEncodingType < StandardError #:nodoc:
6
  end
7

    
8
  module Encodings
9

    
10
    include Mail::Patterns
11
    extend  Mail::Utilities
12

    
13
    @transfer_encodings = {}
14

    
15
    # Register transfer encoding
16
    #
17
    # Example
18
    #
19
    # Encodings.register "base64", Mail::Encodings::Base64
20
    def Encodings.register(name, cls)
21
        @transfer_encodings[get_name(name)] = cls
22
    end
23

    
24
    # Is the encoding we want defined?
25
    #
26
    # Example:
27
    #
28
    #  Encodings.defined?(:base64) #=> true
29
    def Encodings.defined?( str )
30
      @transfer_encodings.include? get_name(str)
31
    end
32

    
33
    # Gets a defined encoding type, QuotedPrintable or Base64 for now.
34
    #
35
    # Each encoding needs to be defined as a Mail::Encodings::ClassName for
36
    # this to work, allows us to add other encodings in the future.
37
    #
38
    # Example:
39
    #
40
    #  Encodings.get_encoding(:base64) #=> Mail::Encodings::Base64
41
    def Encodings.get_encoding( str )
42
      @transfer_encodings[get_name(str)]
43
    end
44

    
45
    def Encodings.get_all
46
      @transfer_encodings.values
47
    end
48

    
49
    def Encodings.get_name(enc)
50
      enc = enc.to_s.gsub("-", "_").downcase
51
    end
52

    
53
    # Encodes a parameter value using URI Escaping, note the language field 'en' can
54
    # be set using Mail::Configuration, like so:
55
    #
56
    #  Mail.defaults do
57
    #    param_encode_language 'jp'
58
    #  end
59
    #
60
    # The character set used for encoding will either be the value of $KCODE for
61
    # Ruby < 1.9 or the encoding on the string passed in.
62
    #
63
    # Example:
64
    #
65
    #  Mail::Encodings.param_encode("This is fun") #=> "us-ascii'en'This%20is%20fun"
66
    def Encodings.param_encode(str)
67
      case
68
      when str.ascii_only? && str =~ TOKEN_UNSAFE
69
        %Q{"#{str}"}
70
      when str.ascii_only?
71
        str
72
      else
73
        RubyVer.param_encode(str)
74
      end
75
    end
76

    
77
    # Decodes a parameter value using URI Escaping.
78
    #
79
    # Example:
80
    #
81
    #  Mail::Encodings.param_decode("This%20is%20fun", 'us-ascii') #=> "This is fun"
82
    #
83
    #  str = Mail::Encodings.param_decode("This%20is%20fun", 'iso-8559-1')
84
    #  str.encoding #=> 'ISO-8859-1'      ## Only on Ruby 1.9
85
    #  str #=> "This is fun"
86
    def Encodings.param_decode(str, encoding)
87
      RubyVer.param_decode(str, encoding)
88
    end
89

    
90
    # Decodes or encodes a string as needed for either Base64 or QP encoding types in
91
    # the =?<encoding>?[QB]?<string>?=" format.
92
    #
93
    # The output type needs to be :decode to decode the input string or :encode to
94
    # encode the input string.  The character set used for encoding will either be
95
    # the value of $KCODE for Ruby < 1.9 or the encoding on the string passed in.
96
    #
97
    # On encoding, will only send out Base64 encoded strings.
98
    def Encodings.decode_encode(str, output_type)
99
      case
100
      when output_type == :decode
101
        Encodings.value_decode(str)
102
      else
103
        if str.ascii_only?
104
          str
105
        else
106
          Encodings.b_value_encode(str, find_encoding(str))
107
        end
108
      end
109
    end
110

    
111
    # Decodes a given string as Base64 or Quoted Printable, depending on what
112
    # type it is.
113
    #
114
    # String has to be of the format =?<encoding>?[QB]?<string>?=
115
    def Encodings.value_decode(str)
116
      # Optimization: If there's no encoded-words in the string, just return it
117
      return str unless str =~ /\=\?[^?]+\?[QB]\?[^?]+?\?\=/xmi
118

    
119
      lines = collapse_adjacent_encodings(str)
120

    
121
      # Split on white-space boundaries with capture, so we capture the white-space as well
122
      lines.map do |line|
123
        line.split(/([ \t])/).map do |text|
124
          if text.index('=?').nil?
125
            text
126
          else
127
            # Search for occurences of quoted strings or plain strings
128
            text.scan(/(                                 # Group around entire regex to include it in matches
129
                        \=\?[^?]+\?([QB])\?[^?]+?\?\=    # Quoted String with subgroup for encoding method
130
                        |                                # or
131
                        .+?(?=\=\?|$)                    # Plain String
132
                      )/xmi).map do |matches|
133
              string, method = *matches
134
              if    method == 'b' || method == 'B'
135
                b_value_decode(string)
136
              elsif method == 'q' || method == 'Q'
137
                q_value_decode(string)
138
              else
139
                string
140
              end
141
            end
142
          end
143
        end
144
      end.flatten.join("")
145
    end
146

    
147
    # Takes an encoded string of the format =?<encoding>?[QB]?<string>?=
148
    def Encodings.unquote_and_convert_to(str, to_encoding)
149
      output = value_decode( str ).to_s # output is already converted to UTF-8
150

    
151
      if 'utf8' == to_encoding.to_s.downcase.gsub("-", "")
152
        output
153
      elsif to_encoding
154
        begin
155
          if RUBY_VERSION >= '1.9'
156
            output.encode(to_encoding)
157
          else
158
            require 'iconv'
159
            Iconv.iconv(to_encoding, 'UTF-8', output).first
160
          end
161
        rescue Iconv::IllegalSequence, Iconv::InvalidEncoding, Errno::EINVAL
162
          # the 'from' parameter specifies a charset other than what the text
163
          # actually is...not much we can do in this case but just return the
164
          # unconverted text.
165
          #
166
          # Ditto if either parameter represents an unknown charset, like
167
          # X-UNKNOWN.
168
          output
169
        end
170
      else
171
        output
172
      end
173
    end
174

    
175
    def Encodings.address_encode(address, charset = 'utf-8')
176
      if address.is_a?(Array)
177
        # loop back through for each element
178
        address.compact.map { |a| Encodings.address_encode(a, charset) }.join(", ")
179
      else
180
        # find any word boundary that is not ascii and encode it
181
        encode_non_usascii(address, charset) if address
182
      end
183
    end
184

    
185
    def Encodings.encode_non_usascii(address, charset)
186
      return address if address.ascii_only? or charset.nil?
187
      us_ascii = %Q{\x00-\x7f}
188
      # Encode any non usascii strings embedded inside of quotes
189
      address = address.gsub(/(".*?[^#{us_ascii}].*?")/) { |s| Encodings.b_value_encode(unquote(s), charset) }
190
      # Then loop through all remaining items and encode as needed
191
      tokens = address.split(/\s/)
192
      map_with_index(tokens) do |word, i|
193
        if word.ascii_only?
194
          word
195
        else
196
          previous_non_ascii = i>0 && tokens[i-1] && !tokens[i-1].ascii_only?
197
          if previous_non_ascii #why are we adding an extra space here?
198
            word = " #{word}"
199
          end
200
          Encodings.b_value_encode(word, charset)
201
        end
202
      end.join(' ')
203
    end
204

    
205
    # Encode a string with Base64 Encoding and returns it ready to be inserted
206
    # as a value for a field, that is, in the =?<charset>?B?<string>?= format
207
    #
208
    # Example:
209
    #
210
    #  Encodings.b_value_encode('This is あ string', 'UTF-8')
211
    #  #=> "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?="
212
    def Encodings.b_value_encode(encoded_str, encoding = nil)
213
      return encoded_str if encoded_str.to_s.ascii_only?
214
      string, encoding = RubyVer.b_value_encode(encoded_str, encoding)
215
      map_lines(string) do |str|
216
        "=?#{encoding}?B?#{str.chomp}?="
217
      end.join(" ")
218
    end
219

    
220
    # Encode a string with Quoted-Printable Encoding and returns it ready to be inserted
221
    # as a value for a field, that is, in the =?<charset>?Q?<string>?= format
222
    #
223
    # Example:
224
    #
225
    #  Encodings.q_value_encode('This is あ string', 'UTF-8')
226
    #  #=> "=?UTF-8?Q?This_is_=E3=81=82_string?="
227
    def Encodings.q_value_encode(encoded_str, encoding = nil)
228
      return encoded_str if encoded_str.to_s.ascii_only?
229
      string, encoding = RubyVer.q_value_encode(encoded_str, encoding)
230
      string.gsub!("=\r\n", '') # We already have limited the string to the length we want
231
      map_lines(string) do |str|
232
        "=?#{encoding}?Q?#{str.chomp.gsub(/ /, '_')}?="
233
      end.join(" ")
234
    end
235

    
236
    private
237

    
238
    # Decodes a Base64 string from the "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=" format
239
    #
240
    # Example:
241
    #
242
    #  Encodings.b_value_decode("=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=")
243
    #  #=> 'This is あ string'
244
    def Encodings.b_value_decode(str)
245
      RubyVer.b_value_decode(str)
246
    end
247

    
248
    # Decodes a Quoted-Printable string from the "=?UTF-8?Q?This_is_=E3=81=82_string?=" format
249
    #
250
    # Example:
251
    #
252
    #  Encodings.q_value_decode("=?UTF-8?Q?This_is_=E3=81=82_string?=")
253
    #  #=> 'This is あ string'
254
    def Encodings.q_value_decode(str)
255
      RubyVer.q_value_decode(str)
256
    end
257

    
258
    def Encodings.split_encoding_from_string( str )
259
      match = str.match(/\=\?([^?]+)?\?[QB]\?(.+)?\?\=/mi)
260
      if match
261
        match[1]
262
      else
263
        nil
264
      end
265
    end
266

    
267
    def Encodings.find_encoding(str)
268
      RUBY_VERSION >= '1.9' ? str.encoding : $KCODE
269
    end
270

    
271
    # Gets the encoding type (Q or B) from the string.
272
    def Encodings.split_value_encoding_from_string(str)
273
      match = str.match(/\=\?[^?]+?\?([QB])\?(.+)?\?\=/mi)
274
      if match
275
        match[1]
276
      else
277
        nil
278
      end
279
    end
280

    
281
    # When the encoded string consists of multiple lines, lines with the same
282
    # encoding (Q or B) can be joined together.
283
    #
284
    # String has to be of the format =?<encoding>?[QB]?<string>?=
285
    def Encodings.collapse_adjacent_encodings(str)
286
      lines = str.split(/(\?=)\s*(=\?)/).each_slice(2).map(&:join)
287
      results = []
288
      previous_encoding = nil
289

    
290
      lines.each do |line|
291
        encoding = split_value_encoding_from_string(line)
292

    
293
        if encoding == previous_encoding
294
          line = results.pop + line
295
    #     line.gsub!(/\?\=\=\?.+?\?[QqBb]\?/m, '')
296
        end
297

    
298
        previous_encoding = encoding
299
        results << line
300
      end
301

    
302
      results
303
    end
304
  end
305
end
(11-11/11)