A
download regexcode.cs
Language: C#
Copyright: (c) 2006 Microsoft Corporation. All rights reserved.
LOC: 293
Project Info
Shared Source Common Language Infrastructure(sscli20)
Server: Shared Source Common Language Infrastructure
Type: filesystem
...em\text\regularexpressions\
   compiledregexrunner.cs
   ...edregexrunnerfactory.cs
   regex.cs
   regexboyermoore.cs
   regexcapture.cs
   regexcapturecollection.cs
   regexcharclass.cs
   regexcode.cs
   regexcompilationinfo.cs
   regexcompiler.cs
   regexfcd.cs
   regexgroup.cs
   regexgroupcollection.cs
   regexinterpreter.cs
   regexmatch.cs
   regexmatchcollection.cs
   regexnode.cs
   regexoptions.cs
   regexparser.cs
   regexreplacement.cs
   regexrunner.cs
   regexrunnerfactory.cs
   regextree.cs
   regexwriter.cs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
//------------------------------------------------------------------------------
// <copyright file="RegexCode.cs" company="Microsoft">
//     
//      Copyright (c) 2006 Microsoft Corporation.  All rights reserved.
//     
//      The use and distribution terms for this software are contained in the file
//      named license.txt, which can be found in the root of this distribution.
//      By using this software in any fashion, you are agreeing to be bound by the
//      terms of this license.
//     
//      You must not remove this notice, or any other, from this software.
//     
// </copyright>                                                                
//------------------------------------------------------------------------------

// This RegexCode class is internal to the regular expression package.
// It provides operator constants for use by the Builder and the Machine.

// Implementation notes:
//
// Regexps are built into RegexCodes, which contain an operation array,
// a string table, and some constants.
//
// Each operation is one of the codes below, followed by the integer
// operands specified for each op.
//
// Strings and sets are indices into a string table.


namespace System.Text.RegularExpressions {

    using System.Collections;
    using System.Diagnostics;
    using System.Globalization;

    internal sealed class RegexCode {
        // the following primitive operations come directly from the parser

        // lef/back operands        description

        internal const int Onerep         = 0;    // lef,back char,min,max    a {n}
        internal const int Notonerep      = 1;    // lef,back char,min,max    .{n}
        internal const int Setrep         = 2;    // lef,back set,min,max     [\d]{n}

        internal const int Oneloop        = 3;    // lef,back char,min,max    a {,n}
        internal const int Notoneloop     = 4;    // lef,back char,min,max    .{,n}
        internal const int Setloop        = 5;    // lef,back set,min,max     [\d]{,n}

        internal const int Onelazy        = 6;    // lef,back char,min,max    a {,n}?
        internal const int Notonelazy     = 7;    // lef,back char,min,max    .{,n}?
        internal const int Setlazy        = 8;    // lef,back set,min,max     [\d]{,n}?

        internal const int One            = 9;    // lef      char            a
        internal const int Notone         = 10;   // lef      char            [^a]
        internal const int Set            = 11;   // lef      set             [a-z\s]  \w \s \d

        internal const int Multi          = 12;   // lef      string          abcd
        internal const int Ref            = 13;   // lef      group           \#

        internal const int Bol            = 14;   //                          ^
        internal const int Eol            = 15;   //                          $
        internal const int Boundary       = 16;   //                          \b
        internal const int Nonboundary    = 17;   //                          \B
        internal const int Beginning      = 18;   //                          \A
        internal const int Start          = 19;   //                          \G
        internal const int EndZ           = 20;   //                          \Z
        internal const int End            = 21;   //                          \Z

        internal const int Nothing        = 22;   //                          Reject!

        // primitive control structures

        internal const int Lazybranch     = 23;   // back     jump            straight first
        internal const int Branchmark     = 24;   // back     jump            branch first for loop
        internal const int Lazybranchmark = 25;   // back     jump            straight first for loop
        internal const int Nullcount      = 26;   // back     val             set counter, null mark
        internal const int Setcount       = 27;   // back     val             set counter, make mark
        internal const int Branchcount    = 28;   // back     jump,limit      branch++ if zero<=c<limit
        internal const int Lazybranchcount= 29;   // back     jump,limit      same, but straight first
        internal const int Nullmark       = 30;   // back                     save position
        internal const int Setmark        = 31;   // back                     save position
        internal const int Capturemark    = 32;   // back     group           define group
        internal const int Getmark        = 33;   // back                     recall position
        internal const int Setjump        = 34;   // back                     save backtrack state
        internal const int Backjump       = 35;   //                          zap back to saved state
        internal const int Forejump       = 36;   //                          zap backtracking state
        internal const int Testref        = 37;   //                          backtrack if ref undefined
        internal const int Goto           = 38;   //          jump            just go

        internal const int Prune          = 39;   //                          prune it baby
        internal const int Stop           = 40;   //                          done!

        internal const int ECMABoundary   = 41;   //                          \b
        internal const int NonECMABoundary= 42;   //                          \B

        // modifiers for alternate modes

        internal const int Mask           = 63;   // Mask to get unmodified ordinary operator
        internal const int Rtl            = 64;   // bit to indicate that we're reverse scanning.
        internal const int Back           = 128;  // bit to indicate that we're backtracking.
        internal const int Back2          = 256;  // bit to indicate that we're backtracking on a second branch.
        internal const int Ci             = 512;  // bit to indicate that we're case-insensitive.

        // the code

        internal int[]           _codes;                 // the code
        internal String[]        _strings;               // the string/set table
        // not used! internal int[]           _sparseIndex;           // a list of the groups that are used
        internal int             _trackcount;            // how many instructions use backtracking
        internal Hashtable       _caps;                  // mapping of user group numbers -> impl group slots
        internal int             _capsize;               // number of impl group slots
        internal RegexPrefix     _fcPrefix;              // the set of candidate first characters (may be null)
        internal RegexBoyerMoore _bmPrefix;              // the fixed prefix string as a Boyer-Moore machine (may be null)
        internal int             _anchors;               // the set of zero-length start anchors (RegexFCD.Bol, etc)
        internal bool         _rightToLeft;           // true if right to left

        // optimizations

        // constructor

        internal RegexCode(int [] codes, ArrayList stringlist, int trackcount,
                           Hashtable caps, int capsize,
                           RegexBoyerMoore bmPrefix, RegexPrefix fcPrefix, 
                           int anchors, bool rightToLeft) {
            _codes = codes;
            _strings = new String[stringlist.Count];
            _trackcount = trackcount;
            _caps = caps;
            _capsize = capsize;
            _bmPrefix = bmPrefix;
            _fcPrefix = fcPrefix;
            _anchors = anchors;
            _rightToLeft = rightToLeft;
            stringlist.CopyTo(0, _strings, 0, stringlist.Count);
        }

        internal static bool OpcodeBacktracks(int Op) {
            Op &= Mask;

            switch (Op) {
                case Oneloop:
                case Notoneloop:
                case Setloop:
                case Onelazy:
                case Notonelazy:
                case Setlazy:
                case Lazybranch:
                case Branchmark:
                case Lazybranchmark:
                case Nullcount: 
                case Setcount: 
                case Branchcount:
                case Lazybranchcount:
                case Setmark:
                case Capturemark:
                case Getmark:
                case Setjump:
                case Backjump:
                case Forejump:
                case Goto:
                    return true;

                default:
                    return false;
            }
        }

        internal static int OpcodeSize(int Opcode) {
            Opcode &= Mask;

            switch (Opcode) {
                case Nothing:
                case Bol:
                case Eol:
                case Boundary:
                case Nonboundary:
                case ECMABoundary:
                case NonECMABoundary:
                case Beginning:
                case Start:
                case EndZ:
                case End:

                case Nullmark:
                case Setmark:
                case Getmark:
                case Setjump:
                case Backjump:
                case Forejump:
                case Stop:

                    return 1;

                case One:
                case Notone:
                case Multi:
                case Ref:
                case Testref:


                case Goto:
                case Nullcount:
                case Setcount:
                case Lazybranch:
                case Branchmark:
                case Lazybranchmark:
                case Prune:
                case Set:

                    return 2;

                case Capturemark:
                case Branchcount:
                case Lazybranchcount:

                case Onerep:
                case Notonerep:
                case Oneloop:
                case Notoneloop:
                case Onelazy:
                case Notonelazy:
                case Setlazy:
                case Setrep:
                case Setloop:

                    return 3;

                default:

                    throw MakeException(SR.GetString(SR.UnexpectedOpcode, Opcode.ToString(CultureInfo.CurrentCulture)));
            }
        }

        internal static ArgumentException MakeException(String message) {
            return new ArgumentException(message);
        }

        // Debug only code below

#if DBG
        internal static String[] CodeStr = new String[]
        {
            "Onerep", "Notonerep", "Setrep",
            "Oneloop", "Notoneloop", "Setloop",
            "Onelazy", "Notonelazy", "Setlazy",
            "One", "Notone", "Set",
            "Multi", "Ref",
            "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
            "Nothing",
            "Lazybranch", "Branchmark", "Lazybranchmark",
            "Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
            "Nullmark", "Setmark", "Capturemark", "Getmark",
            "Setjump", "Backjump", "Forejump", "Testref", "Goto",
            "Prune", "Stop",
            "ECMABoundary", "NonECMABoundary",
        };

        internal static String OperatorDescription(int Opcode) {
            bool isCi   = ((Opcode & Ci) != 0);
            bool isRtl  = ((Opcode & Rtl) != 0);
            bool isBack = ((Opcode & Back) != 0);
            bool isBack2 = ((Opcode & Back2) != 0);

            return CodeStr[Opcode & Mask] +
            (isCi ? "-Ci" : "") + (isRtl ? "-Rtl" : "") + (isBack ? "-Back" : "") + (isBack2 ? "-Back2" : "");
        }

        internal String OpcodeDescription(int offset) {
            StringBuilder sb = new StringBuilder();
            int opcode = _codes[offset];

            sb.AppendFormat("{0:D6} ", offset);
            sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' ');
            sb.Append(OperatorDescription(opcode));
            sb.Append('(');

            opcode &= Mask;

            switch (opcode) {
                case One:
                case Notone:
                case Onerep:
                case Notonerep:
                case Oneloop:
                case Notoneloop:
                case Onelazy:
                case Notonelazy:
                    sb.Append("Ch = ");
                    sb.Append(RegexCharClass.CharDescription((char)_codes[offset+1]));
                    break;

                case Set:
                case Setrep:
                case Setloop:
                case Setlazy:
                    sb.Append("Set = ");
                    sb.Append(RegexCharClass.SetDescription(_strings[_codes[offset+1]]));
                    break;

                case Multi:
                    sb.Append("String = ");
                    sb.Append(_strings[_codes[offset+1]]);
                    break;

                case Ref:
                case Testref:
                    sb.Append("Index = ");
                    sb.Append(_codes[offset+1]);
                    break;

                case Capturemark:
                    sb.Append("Index = ");
                    sb.Append(_codes[offset+1]);
                    if (_codes[offset+2] != -1) {
                        sb.Append(", Unindex = ");
                        sb.Append(_codes[offset+2]);
                    }
                    break;

                case Nullcount:
                case Setcount:
                    sb.Append("Value = ");
                    sb.Append(_codes[offset+1]);
                    break;

                case Goto:
                case Lazybranch:
                case Branchmark:
                case Lazybranchmark:
                case Branchcount:
                case Lazybranchcount:
                    sb.Append("Addr = ");
                    sb.Append(_codes[offset+1]);
                    break;
            }

            switch (opcode) {
                case Onerep:
                case Notonerep:
                case Oneloop:
                case Notoneloop:
                case Onelazy:
                case Notonelazy:
                case Setrep:
                case Setloop:
                case Setlazy:
                    sb.Append(", Rep = ");
                    if (_codes[offset + 2] == Int32.MaxValue)
                        sb.Append("inf");
                    else
                        sb.Append(_codes[offset + 2]);
                    break;

                case Branchcount:
                case Lazybranchcount:
                    sb.Append(", Limit = ");
                    if (_codes[offset + 2] == Int32.MaxValue)
                        sb.Append("inf");
                    else
                        sb.Append(_codes[offset + 2]);
                    break;
            }

            sb.Append(")");

            return sb.ToString();
        }

        internal void Dump() {
            int i;

            Debug.WriteLine("Direction:  " + (_rightToLeft ? "right-to-left" : "left-to-right"));
            Debug.WriteLine("Firstchars: " + (_fcPrefix == null ? "n/a" : RegexCharClass.SetDescription(_fcPrefix.Prefix)));
            Debug.WriteLine("Prefix:     " + (_bmPrefix == null ? "n/a" : Regex.Escape(_bmPrefix.ToString())));
            Debug.WriteLine("Anchors:    " + RegexFCD.AnchorDescription(_anchors));
            Debug.WriteLine("");
            if (_bmPrefix != null) {
                Debug.WriteLine("BoyerMoore:");
                Debug.WriteLine(_bmPrefix.Dump("    "));
            }
            for (i = 0; i < _codes.Length;) {
                Debug.WriteLine(OpcodeDescription(i));
                i += OpcodeSize(_codes[i]);
            }

            Debug.WriteLine("");
        }
#endif

    }
}

About Koders | Resources | Downloads | Support | Black Duck | Terms of Service | DMCA | Privacy Policy | Contact Us