Project

General

Profile

RE: C# scanner file for CodeRay » csharp.rb

C# scanner - Dmitry Popov, 2013-05-10 12:45

 
1
module CodeRay
2
module Scanners
3

    
4
  # Scanner for C++.
5
  # 
6
  # Aliases: cs, csharp
7
  class CSharp < Scanner
8

    
9
    register_for :csharp
10
    file_extension 'cs'
11
    title 'C#'
12
    
13
    #-- http://www.cppreference.com/wiki/keywords/start
14
    KEYWORDS = [
15
      'as', 'break', 'case', 'catch', 'class', 'const', 'continue', 'default', 
16
      'delegate', 'do', 'else', 'enum', 'event', 'finally', 'get', 'for', 'foreach',
17
      'goto', 'if', 'in', 'interface', 'is', 'lock', 'namespace', 'new', 
18
      'operator', 'out', 'params', 'readonly', 'ref', 'return', 'set', 'sizeof',
19
      'stackalloc', 'struct', 'switch', 'throw', 'try', 'typeof', 'using', 'var',
20
      'while', 'yield'
21
    ]  # :nodoc:
22
    
23
    PREDEFINED_TYPES = [
24
      'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'long',
25
      'object', 'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort'
26
    ]  # :nodoc:
27
    PREDEFINED_CONSTANTS = [
28
      'false', 'null', 'true'
29
    ]  # :nodoc:
30
    PREDEFINED_VARIABLES = [
31
      'base', 'this'
32
    ]  # :nodoc:
33
    DIRECTIVES = [
34
      'abstract', 'checked', 'explicit', 'extern', 'fixed', 'implicit', 'internal',
35
      'override', 'private', 'protected', 'public', 'sealed', 'static', 'unchecked',
36
      'unsafe', 'virtual', 'void', 'volatile'
37
    ]  # :nodoc:
38
    
39
    IDENT_KIND = WordList.new(:ident).
40
      add(KEYWORDS, :keyword).
41
      add(PREDEFINED_TYPES, :predefined_type).
42
      add(PREDEFINED_VARIABLES, :local_variable).
43
      add(DIRECTIVES, :directive).
44
      add(PREDEFINED_CONSTANTS, :predefined_constant)  # :nodoc:
45

    
46
    ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x  # :nodoc:
47
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x  # :nodoc:
48
    
49
  protected
50
    
51
    def scan_tokens encoder, options
52

    
53
      state = :initial
54
      label_expected = true
55
      case_expected = false
56
      label_expected_before_preproc_line = nil
57
      in_preproc_line = false
58

    
59
      until eos?
60

    
61
        case state
62

    
63
        when :initial
64

    
65
          if match = scan(/ \s+ | \\\n /x)
66
            if in_preproc_line && match != "\\\n" && match.index(?\n)
67
              in_preproc_line = false
68
              label_expected = label_expected_before_preproc_line
69
            end
70
            encoder.text_token match, :space
71

    
72
          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
73
            encoder.text_token match, :comment
74

    
75
          elsif match = scan(/ \# \s* if \s* 0 /x)
76
            match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
77
            encoder.text_token match, :comment
78

    
79
          elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
80
            label_expected = match =~ /[;\{\}]/
81
            if case_expected
82
              label_expected = true if match == ':'
83
              case_expected = false
84
            end
85
            encoder.text_token match, :operator
86

    
87
          elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
88
            kind = IDENT_KIND[match]
89
            if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
90
              kind = :label
91
              match << matched
92
            else
93
              label_expected = false
94
              if kind == :keyword
95
                case match
96
                when 'class', 'interface', 'struct'
97
                  state = :class_name_expected
98
                when 'case', 'default'
99
                  case_expected = true
100
                end
101
              end
102
            end
103
            encoder.text_token match, kind
104

    
105
          elsif match = scan(/\$/)
106
            encoder.text_token match, :ident
107
          
108
          elsif match = scan(/@?"/)
109
            encoder.begin_group :string
110
            state = :string
111
            encoder.text_token match, :delimiter
112

    
113
          elsif match = scan(/'/)
114
            encoder.begin_group :char
115
            state = :char
116
            encoder.text_token match, :delimiter
117

    
118
          elsif match = scan(/#[ \t]*(\w*)/)
119
            encoder.text_token match, :preprocessor
120
            in_preproc_line = true
121
            label_expected_before_preproc_line = label_expected
122

    
123
          elsif match = scan(/\d+[dDfFmM]|\d*\.\d+(?:[eE][+-]?\d+)?[dDfFmM]?|\d+[eE][+-]?\d+[dDfFmM]?/)
124
            label_expected = false
125
            encoder.text_token match, :float
126

    
127
          elsif match = scan(/0[xX][0-9A-Fa-f]+|[0-9]+(([uU][lL])|[lL])?/)
128
            label_expected = false
129
            encoder.text_token match, :integer
130

    
131
          else
132
            encoder.text_token getch, :error
133

    
134
          end
135

    
136
        when :string
137
          if match = scan(/[^\\"]+/)
138
            encoder.text_token match, :content
139
          elsif match = scan(/"/)
140
            encoder.text_token match, :delimiter
141
            encoder.end_group :string
142
            state = :initial
143
            label_expected = false
144
          elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
145
            encoder.text_token match, :char
146
          elsif match = scan(/ \\ | $ /x)
147
            encoder.end_group :string
148
            encoder.text_token match, :error
149
            state = :initial
150
            label_expected = false
151
          else
152
            raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
153
          end
154

    
155
        when :char
156
          if match = scan(/[^\\'']+/)
157
            encoder.text_token match, :content
158
          elsif match = scan(/'/)
159
            encoder.text_token match, :delimiter
160
            encoder.end_group :char
161
            state = :initial
162
            label_expected = false
163
          elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
164
            encoder.text_token match, :char
165
          elsif match = scan(/ \\ | $ /x)
166
            encoder.end_group :char
167
            encoder.text_token match, :error
168
            state = :initial
169
            label_expected = false
170
          else
171
            raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
172
          end
173

    
174
        when :class_name_expected
175
          if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
176
            encoder.text_token match, :class
177
            state = :initial
178

    
179
          elsif match = scan(/\s+/)
180
            encoder.text_token match, :space
181

    
182
          else
183
            encoder.text_token getch, :error
184
            state = :initial
185

    
186
          end
187
          
188
        else
189
          raise_inspect 'Unknown state', encoder
190

    
191
        end
192

    
193
      end
194

    
195
      if state == :string
196
        encoder.end_group :string
197
      end
198

    
199
      encoder
200
    end
201

    
202
  end
203

    
204
end
205
end
(3-3/5)