encode.rb

Drew Keller, 2011-08-08 22:23

Download (13.5 KB)

 
1
#--
2
# = COPYRIGHT:
3
#
4
#   Copyright (c) 1998-2003 Minero Aoki <aamine@loveruby.net>
5
#
6
#   Permission is hereby granted, free of charge, to any person obtaining
7
#   a copy of this software and associated documentation files (the
8
#   "Software"), to deal in the Software without restriction, including
9
#   without limitation the rights to use, copy, modify, merge, publish,
10
#   distribute, sublicense, and/or sell copies of the Software, and to
11
#   permit persons to whom the Software is furnished to do so, subject to
12
#   the following conditions:
13
#
14
#   The above copyright notice and this permission notice shall be
15
#   included in all copies or substantial portions of the Software.
16
#
17
#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
#   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
#   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
#   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
#   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
#   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
#   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
#
25
#   Note: Originally licensed under LGPL v2+. Using MIT license for Rails
26
#   with permission of Minero Aoki.
27
#++
28
#:stopdoc:
29
require 'nkf'
30
require 'tmail/base64'
31
require 'tmail/stringio'
32
require 'tmail/utils'
33
#:startdoc:
34

    
35

    
36
module TMail
37
  
38
  #:stopdoc:
39
  class << self
40
    attr_accessor :KCODE
41
  end
42
  self.KCODE = 'NONE'
43

    
44
  module StrategyInterface
45

    
46
    def create_dest( obj )
47
      case obj
48
      when nil
49
        StringOutput.new
50
      when String
51
        StringOutput.new(obj)
52
      when IO, StringOutput
53
        obj
54
      else
55
        raise TypeError, 'cannot handle this type of object for dest'
56
      end
57
    end
58
    module_function :create_dest
59

    
60
    #:startdoc:
61
    # Returns the TMail object encoded and ready to be sent via SMTP etc.
62
    # You should call this before you are packaging up your  email to
63
    # correctly escape all the values that need escaping in the email, line
64
    # wrap the email etc.
65
    # 
66
    # It is also a good idea to call this before you marshal or serialize
67
    # a TMail object.
68
    # 
69
    # For Example:
70
    # 
71
    #  email = TMail::Load(my_email_file)
72
    #  email_to_send = email.encoded
73
    def encoded( eol = "\r\n", charset = 'j', dest = nil )
74
      accept_strategy Encoder, eol, charset, dest
75
    end
76

    
77
    # Returns the TMail object decoded and ready to be used by you, your
78
    # program etc.
79
    # 
80
    # You should call this before you are packaging up your  email to
81
    # correctly escape all the values that need escaping in the email, line
82
    # wrap the email etc.
83
    # 
84
    # For Example:
85
    # 
86
    #  email = TMail::Load(my_email_file)
87
    #  email_to_send = email.encoded
88
    def decoded( eol = "\n", charset = 'e', dest = nil )
89
      # Turn the E-Mail into a string and return it with all
90
      # encoded characters decoded.  alias for to_s
91
      accept_strategy Decoder, eol, charset, dest
92
    end
93

    
94
    alias to_s decoded
95

    
96
    def accept_strategy( klass, eol, charset, dest = nil ) #:nodoc:
97
      dest ||= ''
98
      accept klass.new( create_dest(dest), charset, eol )
99
      dest
100
    end
101

    
102
  end
103

    
104
  #:stopdoc:
105

    
106
  ###
107
  ### MIME B encoding decoder
108
  ###
109

    
110
  class Decoder
111

    
112
    include TextUtils
113

    
114
    encoded = '=\?(?:iso-2022-jp|euc-jp|shift_jis)\?[QB]\?[a-z0-9+/=]+\?='
115
    ENCODED_WORDS = /#{encoded}(?:\s+#{encoded})*/i
116
    #SPACER       = "\t"
117

    
118
    OUTPUT_ENCODING = {
119
      'EUC'  => 'e',
120
      'SJIS' => 's',
121
    }
122

    
123
    def self.decode( str, encoding = nil )
124
      encoding ||= (OUTPUT_ENCODING[TMail.KCODE] || 'j')
125
      opt = '-mS' + encoding
126
      str.gsub(ENCODED_WORDS) {|s| NKF.nkf(opt, s) }
127
    end
128

    
129
    def initialize( dest, encoding = nil, eol = "\n" )
130
      @f = StrategyInterface.create_dest(dest)
131
      @encoding = (/\A[ejs]/ === encoding) ? encoding[0,1] : nil
132
      @eol = eol
133
    end
134

    
135
    def decode( str )
136
      self.class.decode(str, @encoding)
137
    end
138
    private :decode
139

    
140
    def terminate
141
    end
142

    
143
    def header_line( str )
144
      @f << decode(str)
145
    end
146

    
147
    def header_name( nm )
148
      @f << nm << ': '
149
    end
150

    
151
    def header_body( str )
152
      @f << decode(str)
153
    end
154

    
155
    def space
156
      @f << ' '
157
    end
158

    
159
    alias spc space
160

    
161
    def lwsp( str )
162
      @f << str
163
    end
164

    
165
    def meta( str )
166
      @f << str
167
    end
168

    
169
    #def puts_meta( str )
170
    #  @f << str
171
    #end
172

    
173
    def text( str )
174
      @f << decode(str)
175
    end
176

    
177
    def phrase( str )
178
      @f << quote_phrase(decode(str))
179
    end
180

    
181
    def kv_pair( k, v )
182
      v = dquote(v) unless token_safe?(v)
183
      @f << k << '=' << v
184
    end
185

    
186
    def puts( str = nil )
187
      @f << str if str
188
      @f << @eol
189
    end
190

    
191
    def write( str )
192
      @f << str
193
    end
194

    
195
  end
196

    
197

    
198
  ###
199
  ### MIME B-encoding encoder
200
  ###
201

    
202
  #
203
  # FIXME: This class can handle only (euc-jp/shift_jis -> iso-2022-jp).
204
  #
205
  class Encoder
206

    
207
    include TextUtils
208

    
209
    BENCODE_DEBUG = false unless defined?(BENCODE_DEBUG)
210

    
211
    def Encoder.encode( str )
212
      e = new()
213
      e.header_body str
214
      e.terminate
215
      e.dest.string
216
    end
217

    
218
    SPACER       = "\t"
219
    MAX_LINE_LEN = 78
220
    RFC_2822_MAX_LENGTH = 998
221

    
222
    OPTIONS = {
223
      'EUC'  => '-Ej -m0',
224
      'SJIS' => '-Sj -m0',
225
      'UTF8' => nil,      # FIXME
226
      'NONE' => nil
227
    }
228

    
229
    def initialize( dest = nil, encoding = nil, eol = "\r\n", limit = nil )
230
      @f = StrategyInterface.create_dest(dest)
231
      @opt = OPTIONS[TMail.KCODE]
232
      @eol = eol
233
      @folded = false
234
      @preserve_quotes = true
235
      reset
236
    end
237

    
238
    def preserve_quotes=( bool )
239
      @preserve_quotes
240
    end
241

    
242
    def preserve_quotes
243
      @preserve_quotes
244
    end
245

    
246
    def normalize_encoding( str )
247
      if @opt
248
      then NKF.nkf(@opt, str)
249
      else str
250
      end
251
    end
252

    
253
    def reset
254
      @text = ''
255
      @lwsp = ''
256
      @curlen = 0
257
    end
258

    
259
    def terminate
260
      add_lwsp ''
261
      reset
262
    end
263

    
264
    def dest
265
      @f
266
    end
267

    
268
    def puts( str = nil )
269
      @f << str if str
270
      @f << @eol
271
    end
272

    
273
    def write( str )
274
      @f << str
275
    end
276

    
277
    #
278
    # add
279
    #
280

    
281
    def header_line( line )
282
      scanadd line
283
    end
284

    
285
    def header_name( name )
286
      add_text name.split(/-/).map {|i| i.capitalize }.join('-')
287
      add_text ':'
288
      add_lwsp ' '
289
    end
290

    
291
    def header_body( str )
292
      scanadd normalize_encoding(str)
293
    end
294

    
295
    def space
296
      add_lwsp ' '
297
    end
298

    
299
    alias spc space
300

    
301
    def lwsp( str )
302
      add_lwsp str.sub(/[\r\n]+[^\r\n]*\z/, '')
303
    end
304

    
305
    def meta( str )
306
      add_text str
307
    end
308

    
309
    #def puts_meta( str )
310
    #  add_text str + @eol + SPACER
311
    #end
312

    
313
    def text( str )
314
      scanadd normalize_encoding(str)
315
    end
316

    
317
    def phrase( str )
318
      str = normalize_encoding(str)
319
      if CONTROL_CHAR === str
320
        scanadd str
321
      else
322
        add_text quote_phrase(str)
323
      end
324
    end
325

    
326
    # FIXME: implement line folding
327
    #
328
    def kv_pair( k, v )
329
      return if v.nil?
330
      v = normalize_encoding(v)
331
      if token_safe?(v)
332
        add_text k + '=' + v
333
      elsif not CONTROL_CHAR === v
334
        add_text k + '=' + quote_token(v)
335
      else
336
        # apply RFC2231 encoding
337
        kv = k + '*=' + "iso-2022-jp'ja'" + encode_value(v)
338
        add_text kv
339
      end
340
    end
341

    
342
    def encode_value( str )
343
      str.gsub(TOKEN_UNSAFE) {|s| '%%%02x' % s[0] }
344
    end
345

    
346
    private
347

    
348
    def scanadd( str, force = false )
349
      types = ''
350
      strs = []
351
      if str.respond_to?(:encoding)
352
        enc = str.encoding 
353
        str.force_encoding(Encoding::ASCII_8BIT)
354
      end
355
      until str.empty?
356
        if m = /\A[^\e\t\r\n ]+/.match(str)
357
          types << (force ? 'j' : 'a')
358
          if str.respond_to?(:encoding)
359
            strs.push m[0].force_encoding(enc)
360
          else
361
            strs.push m[0]
362
          end
363
        elsif m = /\A[\t\r\n ]+/.match(str)
364
          types << 's'
365
          if str.respond_to?(:encoding)
366
            strs.push m[0].force_encoding(enc)
367
          else
368
            strs.push m[0]
369
          end
370

    
371
        elsif m = /\A\e../.match(str)
372
          esc = m[0]
373
          str = m.post_match
374
          if esc != "\e(B" and m = /\A[^\e]+/.match(str)
375
            types << 'j'
376
            if str.respond_to?(:encoding)
377
              strs.push m[0].force_encoding(enc)
378
            else
379
              strs.push m[0]
380
            end
381
          end
382

    
383
        else
384
          raise 'TMail FATAL: encoder scan fail'
385
        end
386
        (str = m.post_match) unless m.nil?
387
      end
388

    
389
      do_encode types, strs
390
    end
391

    
392
    def do_encode( types, strs )
393
      #
394
      # result  : (A|E)(S(A|E))*
395
      # E       : W(SW)*
396
      # W       : (J|A)+ but must contain J  # (J|A)*J(J|A)*
397
      # A       : <<A character string not to be encoded>>
398
      # J       : <<A character string to be encoded>>
399
      # S       : <<LWSP>>
400
      #
401
      # An encoding unit is `E'.
402
      # Input (parameter `types') is  (J|A)(J|A|S)*(J|A)
403
      #
404
      if BENCODE_DEBUG
405
        puts
406
        puts '-- do_encode ------------'
407
        puts types.split(//).join(' ')
408
        p strs
409
      end
410

    
411
      e = /[ja]*j[ja]*(?:s[ja]*j[ja]*)*/
412

    
413
      while m = e.match(types)
414
        pre = m.pre_match
415
        concat_A_S pre, strs[0, pre.size] unless pre.empty?
416
        concat_E m[0], strs[m.begin(0) ... m.end(0)]
417
        types = m.post_match
418
        strs.slice! 0, m.end(0)
419
      end
420
      concat_A_S types, strs
421
    end
422

    
423
    def concat_A_S( types, strs )
424
      if RUBY_VERSION < '1.9'
425
        a = ?a; s = ?s
426
      else
427
        a = 'a'.ord; s = 's'.ord
428
      end
429
      i = 0
430
      types.each_byte do |t|
431
        case t
432
        when a then add_text strs[i]
433
        when s then add_lwsp strs[i]
434
        else
435
          raise "TMail FATAL: unknown flag: #{t.chr}"
436
        end
437
        i += 1
438
      end
439
    end
440

    
441
    METHOD_ID = {
442
      ?j => :extract_J,
443
      ?e => :extract_E,
444
      ?a => :extract_A,
445
      ?s => :extract_S
446
    }
447

    
448
    def concat_E( types, strs )
449
      if BENCODE_DEBUG
450
        puts '---- concat_E'
451
        puts "types=#{types.split(//).join(' ')}"
452
        puts "strs =#{strs.inspect}"
453
      end
454

    
455
      flush() unless @text.empty?
456

    
457
      chunk = ''
458
      strs.each_with_index do |s,i|
459
        mid = METHOD_ID[types[i]]
460
        until s.empty?
461
          unless c = __send__(mid, chunk.size, s)
462
            add_with_encode chunk unless chunk.empty?
463
            flush
464
            chunk = ''
465
            fold
466
            c = __send__(mid, 0, s)
467
            raise 'TMail FATAL: extract fail' unless c
468
          end
469
          chunk << c
470
        end
471
      end
472
      add_with_encode chunk unless chunk.empty?
473
    end
474

    
475
    def extract_J( chunksize, str )
476
      size = max_bytes(chunksize, str.size) - 6
477
      size = (size % 2 == 0) ? (size) : (size - 1)
478
      return nil if size <= 0
479
      if str.respond_to?(:encoding)
480
        enc = str.encoding
481
        str.force_encoding(Encoding::ASCII_8BIT)
482
        "\e$B#{str.slice!(0, size)}\e(B".force_encoding(enc)
483
      else
484
        "\e$B#{str.slice!(0, size)}\e(B"
485
      end
486
    end
487

    
488
    def extract_A( chunksize, str )
489
      size = max_bytes(chunksize, str.size)
490
      return nil if size <= 0
491
      str.slice!(0, size)
492
    end
493

    
494
    alias extract_S extract_A
495

    
496
    def max_bytes( chunksize, ssize )
497
      (restsize() - '=?iso-2022-jp?B??='.size) / 4 * 3 - chunksize
498
    end
499

    
500
    #
501
    # free length buffer
502
    #
503

    
504
    def add_text( str )
505
      @text << str
506
      # puts '---- text -------------------------------------'
507
      # puts "+ #{str.inspect}"
508
      # puts "txt >>>#{@text.inspect}<<<"
509
    end
510

    
511
    def add_with_encode( str )
512
      @text << "=?iso-2022-jp?B?#{Base64.encode(str)}?="
513
    end
514

    
515
    def add_lwsp( lwsp )
516
      # puts '---- lwsp -------------------------------------'
517
      # puts "+ #{lwsp.inspect}"
518
      fold if restsize() <= 0
519
      flush(@folded)
520
      @lwsp = lwsp
521
    end
522

    
523
    def flush(folded = false)
524
      # puts '---- flush ----'
525
      # puts "spc >>>#{@lwsp.inspect}<<<"
526
      # puts "txt >>>#{@text.inspect}<<<"
527
      @f << @lwsp << @text
528
      if folded
529
        @curlen = 0
530
      else
531
        @curlen += (@lwsp.size + @text.size)
532
      end
533
      @text = ''
534
      @lwsp = ''
535
    end
536

    
537
    def fold
538
      # puts '---- fold ----'
539
      unless @f.string =~ /^.*?:$/
540
        @f << @eol
541
        @lwsp = SPACER
542
      else
543
        fold_header
544
        @folded = true
545
      end
546
      @curlen = 0
547
    end
548

    
549
    def fold_header
550
      # Called because line is too long - so we need to wrap.
551
      # First look for whitespace in the text
552
      # if it has text, fold there
553
      # check the remaining text, if too long, fold again
554
      # if it doesn't, then don't fold unless the line goes beyond 998 chars
555

    
556
      # Check the text to see if there is whitespace, or if not
557
      @wrapped_text = []
558
      until @text.blank?
559
        fold_the_string
560
      end
561
      @text = @wrapped_text.join("#{@eol}#{SPACER}")
562
    end
563

    
564
    def fold_the_string
565
      @text.strip!
566
      whitespace_location = @text =~ /\s/ || @text.length
567
      # Is the location of the whitespace shorter than the RCF_2822_MAX_LENGTH?
568
      # if there is no whitespace in the string, then this
569
      unless mazsize(whitespace_location) <= 0
570
        #@text.strip!
571
        @wrapped_text << @text.slice!(0...whitespace_location)
572
      # If it is not less, we have to wrap it destructively
573
      else
574
        slice_point = RFC_2822_MAX_LENGTH - @curlen - @lwsp.length
575
        #@text.strip!
576
        @wrapped_text << @text.slice!(0...slice_point)
577
      end
578
    end
579

    
580
    def restsize
581
      MAX_LINE_LEN - (@curlen + @lwsp.size + @text.size)
582
    end
583

    
584
    def mazsize(whitespace_location)
585
      # Per RFC2822, the maximum length of a line is 998 chars
586
      RFC_2822_MAX_LENGTH - (@curlen + @lwsp.size + whitespace_location)
587
    end
588

    
589
  end
590
  #:startdoc:
591
end    # module TMail