ddparser.gram source code

1 /*
2    Copyright 2002-2004 John Plevyak, All Rights Reserved
3  */
4 module ddparser.gram;
5 
6 import std.bitmanip;
7 import ddparser.util;
8 import ddparser.dparse_tables;
9 import ddparser.lr;
10 import ddparser.lex;
11 import ddparser.dparse;
12 import ddparser.parse;
13 import std.stdio;
14 import std.json;
15 import std.conv;
16 import ddparser.serialize;
17 import std.string;
18 import std.ascii;
19 import std.algorithm;
20 import std.array;
21 
22 enum EOF_SENTINAL = "\377";
23 enum NO_PROD            =0xFFFFFFFF;
24 
25 
26 alias Item = Elem;
27 
28 struct Code {
29     string code;
30     D_ReductionCode _f;
31 
32     int line;
33     void serialize(Serializer s)
34     {
35         //s.map(line, "line");
36         //if (line != -1)
37         //    s.mapCString(code, "code");
38     }
39 
40     @property void f(D_ReductionCode c)
41     {
42         _f = c;
43         line = -1;
44         code = "";
45     }
46 
47     @property D_ReductionCode f()
48     {
49         if (line == -1) return _f;
50         return null;
51     }
52 }
53 
54 struct Goto {
55     Elem    *elem;
56     State   *state;
57 }
58 
59 enum ActionKind {
60     ACTION_ACCEPT, ACTION_SHIFT, ACTION_REDUCE, ACTION_SHIFT_TRAILING
61 }
62 
63 struct Action {
64     ActionKind      kind;
65     Term        *term;
66     Rule        *rule;
67     State       *state;
68     uint            index;
69     size_t[2] temp_key = [size_t.max];
70 }
71 
72 alias VecAction = Vec!(Action*);
73 
74 struct Hint {
75     uint        depth;
76     State   *state;
77     Rule    *rule;
78 }
79 alias VecHint = Vec!(Hint*);
80 
81 alias VecScanStateTransition = Vec!(ScanStateTransition*);
82 
83 struct Scanner {
84     ScanState*[] states;
85     VecScanStateTransition  transitions;
86 }
87 
88 struct State {
89     uint        index;
90     uint64  hash;
91     Item*[] items;
92     bool[Item*] items_hash;
93     Goto*[] gotos;
94     VecAction   shift_actions;
95     VecAction   reduce_actions;
96     VecHint right_epsilon_hints;
97     VecHint error_recovery_hints;
98     Scanner scanner;
99     mixin(bitfields!(
100                 bool, "accept", 1,
101                 uint, "scanner_code", 1,
102                 uint, "goto_on_token", 1,
103                 uint, "scan_kind", 3,
104                 uint, "trailing_context", 1,
105                 uint, "", 25
106                 ));
107     int     goto_table_offset;
108     State   *same_shifts;
109     State  *reduces_to;
110     Rule   *reduces_with;
111     Rule   *reduces_to_then_with;
112 }
113 
114 enum ASSOC_LEFT     =0x0001;
115 enum ASSOC_RIGHT    =0x0002;
116 enum ASSOC_NARY     =0x0004;
117 enum ASSOC_UNARY    =0x0008;
118 enum ASSOC_BINARY   =0x0010;
119 
120 enum AssocKind {
121     ASSOC_NONE      = 0,
122     ASSOC_NARY_LEFT     = (ASSOC_NARY|ASSOC_LEFT),
123     ASSOC_NARY_RIGHT    = (ASSOC_NARY|ASSOC_RIGHT),
124     ASSOC_UNARY_LEFT    = (ASSOC_UNARY|ASSOC_LEFT),
125     ASSOC_UNARY_RIGHT   = (ASSOC_UNARY|ASSOC_RIGHT),
126     ASSOC_BINARY_LEFT   = (ASSOC_BINARY|ASSOC_LEFT),
127     ASSOC_BINARY_RIGHT  = (ASSOC_BINARY|ASSOC_RIGHT),
128     ASSOC_NO        = 0x0020
129 }
130 
131 @nogc @safe nothrow pure
132 {
133     bool IS_RIGHT_ASSOC(AssocKind _x) { return cast(bool)(_x & ASSOC_RIGHT); }
134     bool IS_LEFT_ASSOC(AssocKind _x) { return cast(bool)(_x & ASSOC_LEFT); }
135     bool IS_NARY_ASSOC(AssocKind _x) { return cast(bool)(_x & ASSOC_NARY); }
136     bool IS_BINARY_ASSOC(AssocKind _x) { return cast(bool)(_x & ASSOC_BINARY); }
137     bool IS_UNARY_ASSOC(AssocKind _x) { return cast(bool)(_x & ASSOC_UNARY); }
138     bool IS_UNARY_BINARY_ASSOC(AssocKind _x) { return IS_UNARY_ASSOC(_x) || IS_BINARY_ASSOC(_x); }
139     bool IS_BINARY_NARY_ASSOC(AssocKind _x) { return IS_NARY_ASSOC(_x) || IS_BINARY_ASSOC(_x); }
140 
141     /* not valid for NARY */
142     bool IS_EXPECT_RIGHT_ASSOC(AssocKind _x) { return _x && _x != AssocKind.ASSOC_UNARY_LEFT; }
143     bool IS_EXPECT_LEFT_ASSOC(AssocKind _x) { return _x && _x != AssocKind.ASSOC_UNARY_RIGHT; }
144 }
145 
146 struct Rule {
147     uint            index;
148     Production  *prod;
149     int         op_priority;
150     AssocKind       op_assoc;
151     int         rule_priority;
152     AssocKind       rule_assoc;
153     Elem*[]     elems;
154     Elem        *end;
155     Code            speculative_code;
156     Code            final_code;
157     Vec!(Code*)     pass_code;
158     int         action_index;
159     Rule        *same_reduction;
160 
161     void serialize(Serializer s)
162     {
163         s.map(index, "index");
164         s.map(prod, "prod");
165         s.map(op_priority, "op_priority");
166         s.map(op_assoc, "op_assoc");
167         s.map(rule_priority, "rule_priority");
168         s.map(rule_assoc, "rule_assoc");
169         s.map(elems, "elems");
170         s.map(end, "end");
171         s.map(speculative_code, "speculative_code");
172         s.map(final_code, "final_code");
173         s.map(pass_code, "pass_code");
174         s.map(action_index, "action_index");
175         s.map(same_reduction, "same_reduction");
176     }
177 }
178 
179 enum TermKind {
180     TERM_STRING, TERM_REGEX, TERM_CODE, TERM_TOKEN
181 }
182 struct Term {
183     TermKind        kind;
184     uint            index;
185     int         term_priority;
186     string      term_name;
187     AssocKind       op_assoc;
188     int         op_priority;
189     string string_;
190     mixin(bitfields!(
191                 uint, "scan_kind", 3,
192                 uint, "ignore_case", 1,
193                 uint, "trailing_context", 1,
194                 uint, "", 27
195                 ));
196     Production  *regex_production;
197 
198     void serialize(Serializer s)
199     {
200         s.map(kind, "kind");
201         s.map(index, "index");
202         s.map(term_priority, "term_priority");
203         //s.map(term_name, "term_name");
204         s.map(op_assoc, "op_assoc");
205         s.map(op_priority, "op_priority");
206         //s.map(string_, "string_");
207 
208         mixin(fieldMap!("scan_kind", s));
209         mixin(fieldMap!("ignore_case", s));
210         mixin(fieldMap!("trailing_context", s));
211         s.map(regex_production, "regex_production");
212     }
213 
214 }
215 
216 enum DeclarationKind {
217     DECLARE_TOKENIZE, DECLARE_LONGEST_MATCH, DECLARE_ALL_MATCHES,
218     DECLARE_SET_OP_PRIORITY, DECLARE_STATES_FOR_ALL_NTERMS,
219     DECLARE_STATE_FOR, DECLARE_WHITESPACE, DECLARE_SAVE_PARSE_TREE, DECLARE_NUM
220 }
221 
222 alias DECLARE_NUM = DeclarationKind.DECLARE_NUM;
223 
224 struct Declaration {
225     Elem *  elem;
226     DeclarationKind    kind;
227     uint        index;
228 
229     void serialize(Serializer s)
230     {
231         s.map(elem, "elem");
232         s.map(kind, "kind");
233         s.map(index, "index");
234     }
235 }
236 
237 enum InternalKind {
238     INTERNAL_NOT, INTERNAL_HIDDEN, INTERNAL_CONDITIONAL, INTERNAL_STAR, INTERNAL_PLUS
239 }
240 
241 struct Production {
242     string          name;
243     Rule*[]         rules;
244     uint            index;
245     mixin(bitfields!(
246                 uint, "regex", 1,
247                 uint, "in_regex", 1,
248                 uint, "internal", 3,    /* production used for EBNF */
249                 uint, "live", 1,
250                 uint, "", 26
251                 ));
252     Rule            *nullable;  /* shortest rule for epsilon reduction */
253     Production*[DECLARE_NUM] declaration_group;
254     Declaration*[DECLARE_NUM] last_declaration;
255     State           *state; /* state for independent parsing of this productions*/
256     Elem        *elem;  /* base elem for the item set of the above state */
257     Term        *regex_term;    /* regex production terminal */
258     Production  *next;
259 
260     void serialize(Serializer s)
261     {
262         //s.map(name, "name");
263         s.map(rules, "rules");
264         s.map(index, "index");
265         mixin(fieldMap!("regex", s));
266         mixin(fieldMap!("in_regex", s));
267         mixin(fieldMap!("internal", s));
268         mixin(fieldMap!("live", s));
269         s.map(nullable, "nullable");
270         s.map(declaration_group, "declaration_group");
271         s.map(last_declaration, "last_declaration");
272         s.map(state, "state");
273         s.map(elem, "elem");
274         s.map(regex_term, "regex_term");
275         s.map(next, "next");
276     }
277 }
278 
279 
280 
281 enum ElemKind {
282     ELEM_NTERM, ELEM_TERM, ELEM_UNRESOLVED, ELEM_END
283 }
284 
285 bool elemTermOrNtermEqual(in Elem a, in Elem b)
286 {
287     if (a.kind != b.kind) return false;
288     if (__ctfe)
289     {
290           if (a.kind == ElemKind.ELEM_NTERM) return a.e.nterm == b.e.nterm;
291           else if (a.kind == ElemKind.ELEM_TERM) return a.e.term == b.e.term;
292           else return a.e.unresolved == b.e.unresolved;
293     }
294     return a.e.term_or_nterm == b.e.term_or_nterm;
295 }
296 
297 struct Elem {
298     ElemKind    kind;
299     uint        index;
300     Rule        *rule;
301     union E {
302         Production  *nterm;
303         Term    *term;
304         void    *term_or_nterm;
305         string unresolved;
306     }
307     E e;
308 
309     @property inout(Term)* term() inout @trusted
310     {
311         assert(kind == ElemKind.ELEM_TERM);
312         return e.term;
313     }
314 
315     @property void term(Term* t) @trusted
316     {
317         e.term = t;
318         kind = ElemKind.ELEM_TERM;
319     }
320 
321     @property inout(Production)* nterm() inout @trusted
322     {
323         assert(kind == ElemKind.ELEM_NTERM);
324         return e.nterm;
325     }
326 
327     @property void nterm(Production* p)
328     {
329         kind = ElemKind.ELEM_NTERM;
330         e.nterm = p;
331     }
332 
333     string toString()
334     {
335         if (kind == ElemKind.ELEM_NTERM) return "NTERM: " ~ nterm.name;
336         if (kind == ElemKind.ELEM_TERM) return "TERM: " ~ term.term_name;
337         if (kind == ElemKind.ELEM_UNRESOLVED) return "UNRES: " ~ e.unresolved;
338         return "END";
339     }
340 
341     void serialize(Serializer s)
342     {
343         s.map(kind, "kind");
344         s.map(index, "index");
345         s.map(rule, "rule");
346 
347         if (kind == ElemKind.ELEM_NTERM)
348             s.map(e.nterm, "nterm");
349         else if (kind == ElemKind.ELEM_TERM)
350             s.map(e.term, "term");
351         /* else if (kind == ElemKind.ELEM_UNRESOLVED) */
352         /*     s.map(e.unresolved, "unresolved"); */
353     }
354 }
355 
356 struct Grammar {
357     Production*[]   productions;
358     Term*[]         terminals;
359     State*[]        states;
360     Action*[]       actions;
361     Code            scanner;
362     /* Code         *code; */
363     /* int          ncode; */
364     Declaration*[]  declarations; 
365     Vec!(D_Pass *)      passes;
366     string          default_white_space;
367     /* grammar construction options */
368     int         set_op_priority_from_rule;
369     int         right_recursive_BNF;
370     int         states_for_whitespace;
371     int         states_for_all_nterms;
372     int         tokenizer;
373     int         longest_match;
374     int         save_parse_tree;
375     /* grammar writing options */
376     int         scanner_blocks;
377     int         scanner_block_size;
378     /* temporary variables for grammar construction */
379     Production *    p;
380     Rule *      r;
381     Elem *      e;
382     int         action_index;
383     int         action_count;
384     int         pass_index;
385     int         rule_index;
386 
387     void serialize(Serializer s)
388     {
389         /* s.mapCArray(code, "code", ncode); */
390         s.map(scanner, "scanner");
391         s.map(productions, "productions");
392         s.map(terminals, "terminals");
393         s.map(states, "states");
394         s.map(actions, "actions");
395         s.map(declarations, "declarations");
396         s.map(passes, "passes");
397         //s.mapCString(default_white_space, "default_white_space");
398 
399 
400         s.map(set_op_priority_from_rule, "set_op_priority_from_rule");
401         s.map(right_recursive_BNF, "right_recursive_BNF");
402         s.map(states_for_whitespace, "states_for_whitespace");
403         s.map(states_for_all_nterms, "states_for_all_nterms");
404         s.map(tokenizer, "tokenizer");
405         s.map(longest_match, "longest_match");
406         s.map(save_parse_tree, "save_parse_tree");
407 
408         s.map(scanner_blocks, "scanner_blocks");
409         s.map(scanner_block_size, "scanner_block_size");
410     }
411 }
412 
413 alias D_Grammar = Grammar;
414 
415 /*
416    Copyright 2002-2004 John Plevyak, All Rights Reserved
417  */
418 
419 
420 immutable string[] action_types = [ "ACCEPT", "SHIFT", "REDUCE" ];
421 
422 
423 Production* new_production(Grammar *g, string name) @safe {
424     Production *p  = lookup_production(g, name);
425     if (p) {
426         return p;
427     }
428     p = new Production();
429     g.productions ~= p;
430     p.name = name;
431     return p;
432 }
433 
434 Rule *
435 new_rule(Grammar *g, Production *p) @safe {
436     Rule *r = new Rule();
437     r.prod = p;
438     r.end = new Elem();
439     r.end.kind = ElemKind.ELEM_END;
440     r.end.rule = r;
441     r.action_index = g.action_index;
442     return r;
443 }
444 
445 private Elem *
446 new_elem_term(Term *t, Rule *r) @safe {
447     Elem *e = new Elem();
448     e.term = t;
449     e.rule = r;
450     r.elems ~= e;
451     return e;
452 }
453 
454 Elem *
455 new_elem_nterm(Production *p, Rule *r) @safe {
456     Elem *e = new Elem();
457     e.kind = ElemKind.ELEM_NTERM;
458     e.e.nterm = p;
459     e.rule = r;
460     return e;
461 }
462 
463 private Elem *
464 new_term_string(Grammar *g, string s, Rule *r) @safe
465 {
466     Term *t = new Term();
467     t.string_ = s;
468     g.terminals ~= t;
469     return new_elem_term(t, r);
470 }
471 
472 string escape_string_for_regex(const char[] s) @safe
473 {
474     auto result = appender!string();
475     result.reserve(s.length * 2);
476     foreach(c; s)
477     {
478         switch (c)
479         {
480             case '(':
481             case ')':
482             case '[':
483             case ']':
484             case '-':
485             case '^':
486             case '*':
487             case '?':
488             case '+':
489                 result ~= '\\';
490                 goto default;
491             default:
492                 result ~= c;
493         }
494     }
495 
496     return result.data;
497 }
498 
499 private string unescapeTermString(const(char)[] termString, TermKind kind) @safe
500 {
501     string result;
502 
503     uint start = 0;
504     int length;
505     uint base = 0;
506 
507 
508     for (uint i = 0; i < termString.length; ++i) {
509         if (termString[i] == '\\') {
510             switch (termString[i + 1]) {
511                 case '\\':
512                     if (kind == TermKind.TERM_STRING)
513                     { result ~= '\\'; i++; break; }
514                     else
515                         goto default;
516                 case 'b': result ~= '\b'; i++; break;
517                 case 'f': result ~= '\f'; i++; break;
518                 case 'n': result ~= '\n'; i++; break;
519                 case 'r': result ~= '\r'; i++; break;
520                 case 't': result ~= '\t'; i++; break;
521                 case 'v': result ~= '\v'; i++; break;
522                 case 'a': result ~= '\a'; i++; break;
523                 case 'c': assert(false); //return;
524                 case '\"':
525                           if (kind == TermKind.TERM_REGEX)
526                           { result ~= '\"'; i++; break; }
527                           else
528                               goto default;
529                 case '\'':
530                           if (kind == TermKind.TERM_STRING)
531                           { result ~= '\''; i++; break; }
532                           else
533                               goto default;
534                 case 'x':
535                           length = 0;
536                           if (termString[i + 2].isHexDigit()) {
537                               base = 16;
538                               start = i + 2;
539                               length++;
540                               if (termString[i + 3].isHexDigit())
541                                   length++;
542                           }
543                           i += length + 1;
544                           goto Lncont;
545                 case 'd':
546                           length = 0;
547                           if (termString[i + 2].isDigit()) {
548                               base = 10;
549                               start = i + 2;
550                               length++;
551                               if ((termString[i + 3]).isDigit()) {
552                                   length++;
553                                   if (termString[i + 4].isDigit() && ((termString[i + 2] < '2') || ((termString[i + 2] == '2') && ((termString[i + 3] < '5') ||
554                                                       ((termString[i + 3] == '5') && (termString[i + 4] < '6'))))))
555                                       length++;
556                               }
557                           }
558                           i += length + 1;
559                           goto Lncont;
560                 case '0': case '1': case '2': case '3':
561                 case '4': case '5': case '6': case '7':
562                           length = 1;
563                           base = 8;
564                           start = i + 1;
565                           if (termString[i + 2].isDigit() && (termString[i + 2] != '8') && (termString[i + 2] != '9')) {
566                               length++;
567                               if (termString[i + 3].isDigit() && (termString[i + 3] != '8') && (termString[i + 3] != '9')) {
568                                   length++;
569                               }
570                           }
571                           i += length;
572                           /* fall through */
573 Lncont:
574                           if (length > 0) {
575                               result ~= cast(char)termString[start .. start + length].to!ubyte(base);
576                               if (i < termString.length)
577                                   break;
578                               d_fail("encountered an escaped null while processing '%s'", termString);
579                           } else
580                               goto next;
581                           break;
582                 default:
583                           result ~= termString[i];
584                           result ~= termString[i + 1];
585                           i ++;
586                           break;
587             }
588         }
589         else
590         {
591             result ~= termString[i];
592         }
593 next:;
594     }
595 
596     if (!result.length)
597         d_fail("empty string after unescape '%s'", termString);
598 
599     return result;
600 }
601 
602 private void
603 unescape_term_string(Term *t) {
604     t.string_ = unescapeTermString(t.string_, t.kind);
605 }
606 
607 Elem * new_string(Grammar *g, string s, Rule *r)
608 {
609     Elem *x = new_term_string(g, s[1 .. $ - 1], r);
610     x.e.term.kind = (s[0] == '"') ? TermKind.TERM_REGEX : TermKind.TERM_STRING;
611     unescape_term_string(x.e.term);
612     return x;
613 }
614 
615 Elem *
616 new_utf8_char(Grammar *g, const(char) *s, const(char) *e, Rule *r) {
617     char[4] utf8_code;
618     ulong utf32_code, base, len = 0;
619     for (utf32_code=0, base=1; e>=s+3; base*=16) {
620         e--;
621         if (*e >= '0' && *e <= '9')
622             utf32_code += base * (*e - '0');
623         else if (*e >= 'a' && *e <= 'f')
624             utf32_code += base * (*e - 'a' + 10);
625         else if (*e >= 'A' && *e <= 'F')
626             utf32_code += base * (*e - 'A' + 10);
627     }
628     if (utf32_code < 0x80) {
629         utf8_code[0] = cast(char)utf32_code;
630         len = 1;
631     } else if (utf32_code < 0x800) {
632         utf8_code[0] = 0xc0 | ((utf32_code >> 6) & 0x1f);
633         utf8_code[1] = 0x80 | (utf32_code & 0x3f);
634         len = 2;
635     } else if (utf32_code < 0x10000) {
636         utf8_code[0] = 0xe0 | ((utf32_code >> 12) & 0x0f);
637         utf8_code[1] = 0x80 | ((utf32_code >> 6) & 0x3f);
638         utf8_code[2] = 0x80 | (utf32_code & 0x3f);
639         len = 3;
640     } else if (utf32_code < 0x02000000) {
641         utf8_code[0] = 0xf0 | ((utf32_code >> 18) & 0x07);
642         utf8_code[1] = 0x80 | ((utf32_code >> 12) & 0x3f);
643         utf8_code[2] = 0x80 | ((utf32_code >> 6) & 0x3f);
644         utf8_code[3] = 0x80 | (utf32_code & 0x3f);
645         len = 4;
646     } else {
647         d_fail("UTF32 Unicode value U+%8X too large for valid UTF-8 encoding (cf. Unicode Spec 4.0, section 3.9)", utf32_code);
648     }
649     Elem *x = new_term_string(g, utf8_code[0 .. len].idup, r);
650     x.e.term.kind = TermKind.TERM_STRING;
651     return x;
652 }
653 
654 Elem *
655 new_ident(string s, Rule *r) @safe
656 {
657     Elem *x = new Elem();
658     x.kind = ElemKind.ELEM_UNRESOLVED;
659     x.e.unresolved = s;
660     x.rule = r;
661     if (r)
662         r.elems ~= x;
663     return x;
664 }
665 
666 void
667 new_token(Grammar *g, string s) @safe {
668     Term *t = new Term();
669     t.string_ = s;
670     g.terminals ~= t;
671     t.kind = TermKind.TERM_TOKEN;
672 }
673 
674 Elem *
675 new_code(Grammar *g, string s, Rule *r) {
676     Elem *x = new_term_string(g, s, r);
677     x.e.term.kind = TermKind.TERM_CODE;
678     return x;
679 }
680 
681 Elem *
682 dup_elem(Elem *e, Rule *r) @safe {
683     Elem *ee = new Elem();
684     *ee = *e;
685     ee.rule = r;
686     return ee;
687 }
688 
689 void
690 new_declaration(Grammar *g, Elem *e, DeclarationKind kind) {
691     Declaration *d = new Declaration();
692     d.elem = e;
693     d.kind = kind;
694     d.index = cast(uint)g.declarations.length;
695     g.declarations ~= d;
696 }
697 
698 void
699 add_declaration(Grammar *g, string s, DeclarationKind kind, uint line) {
700     if (s.length == 0) {
701         switch (kind) {
702             case DeclarationKind.DECLARE_SET_OP_PRIORITY: g.set_op_priority_from_rule = 1; return;
703             case DeclarationKind.DECLARE_STATES_FOR_ALL_NTERMS: g.states_for_all_nterms = 1; return;
704             case DeclarationKind.DECLARE_LONGEST_MATCH: g.longest_match = 1; break;
705             case DeclarationKind.DECLARE_ALL_MATCHES: g.longest_match = 0; break;
706             case DeclarationKind.DECLARE_TOKENIZE: g.tokenizer = 1; break;
707             case DeclarationKind.DECLARE_SAVE_PARSE_TREE: g.save_parse_tree = 1; return;
708             default: d_fail("declare expects argument, line %d", line);
709         }
710     }
711     switch (kind) {
712         case DeclarationKind.DECLARE_WHITESPACE: g.default_white_space = s; return;
713         case DeclarationKind.DECLARE_SET_OP_PRIORITY:
714                                                  d_fail("declare does not expect argument, line %d", line);
715                                                  break;
716         default:
717                                                  new_declaration(g, new_ident(s, null), kind);
718                                                  break;
719     }
720 }
721 
722 D_Pass *
723 find_pass(Grammar *g, const(char)[] name) {
724     name = name.stripLeft();
725     foreach (p; g.passes)
726         if (p.name == name)
727             return p;
728     return null;
729 }
730 
731 void
732 add_pass(Grammar *g, string name, uint kind, uint line) {
733     if (find_pass(g, name))
734         d_fail("duplicate pass '%s' line %d", name, line);
735     else {
736         D_Pass *p = new D_Pass();
737         p.name = name;
738         p.kind = kind;
739         p.index = g.pass_index++;
740         vec_add(&g.passes, p);
741     }
742 }
743 
744     void
745 add_pass_code(Grammar *g, Rule *r, string name,
746         string code, uint pass_line, uint code_line)
747 {
748     D_Pass *p = find_pass(g, name);
749     if (!p)
750         d_fail("unknown pass '%s' line %d", name, pass_line);
751     while (r.pass_code.length <= p.index) vec_add(&r.pass_code, null);
752     r.pass_code[p.index] = new Code();
753     r.pass_code[p.index].code = code;
754     r.pass_code[p.index].line = code_line;
755 }
756 
757 
758 Production *
759 new_internal_production(Grammar *g, Production *p) @safe {
760     string n = p ? p.name : " _synthetic";
761     string name = n ~ "__" ~ g.productions.length.to!string();
762     Production *pp = new_production(g, name);
763     pp.internal = InternalKind.INTERNAL_HIDDEN;
764     pp.regex = p ? p.regex : 0;
765     if (p) {
766         bool found = false;
767         Production *tp = null;
768         for (int i = 0; i < g.productions.length; i++) {
769             if (found) {
770                 Production *ttp = g.productions[i];
771                 g.productions[i] = tp;
772                 tp = ttp;
773             } else if (p == g.productions[i]) {
774                 found = true;
775                 tp = g.productions[i+1];
776                 g.productions[i+1] = pp;
777                 i++;
778             }
779         }
780     }
781     return pp;
782 }
783 
784 void
785 conditional_EBNF(Grammar *g) {
786     Production *pp = new_internal_production(g, g.p);
787     pp.internal = InternalKind.INTERNAL_CONDITIONAL;
788     Rule *rr = new_rule(g, pp);
789     rr.elems ~= g.r.elems[$-1];
790     g.r.elems[$-1].rule = rr;
791     rr.elems[$-1].rule = rr;
792     pp.rules ~= rr;
793     pp.rules ~= new_rule(g, pp);
794     g.r.elems[$-1] = new_elem_nterm(pp, g.r);
795 }
796 
797 void
798 star_EBNF(Grammar *g) {
799     Production *pp = new_internal_production(g, g.p);
800     pp.internal = InternalKind.INTERNAL_STAR;
801     Rule *rr = new_rule(g, pp);
802     if (!g.right_recursive_BNF) {
803         rr.elems ~= new_elem_nterm(pp, rr);
804         rr.elems ~= g.r.elems[$-1];
805         g.r.elems[$-1] = new_elem_nterm(pp, g.r);
806         rr.elems[$-1].rule = rr;
807     } else {
808         rr.elems ~= g.r.elems[$-1];
809         g.r.elems[$-1] = new_elem_nterm(pp, g.r);
810         rr.elems[$-1].rule = rr;
811         rr.elems ~= new_elem_nterm(pp, rr);
812     }
813     pp.rules ~= rr;
814     pp.rules ~= new_rule(g, pp);
815 }
816 
817 void
818 plus_EBNF(Grammar *g) {
819     Production *pp = new_internal_production(g, g.p);
820     pp.internal = InternalKind.INTERNAL_PLUS;
821     Rule *rr = new_rule(g, pp);
822     Elem *elem = g.r.elems[$-1];
823     if (!g.right_recursive_BNF) {
824         rr.elems ~= new_elem_nterm(pp, rr);
825         rr.elems ~= dup_elem(elem, rr);
826         g.r.elems[$-1] = new_elem_nterm(pp, g.r);
827         if (g.r.rule_priority) {
828             rr.rule_priority = g.r.rule_priority;
829             rr.rule_assoc = AssocKind.ASSOC_NARY_LEFT;
830         }
831     } else {
832         rr.elems ~= dup_elem(elem, rr);
833         g.r.elems[$-1] = new_elem_nterm(pp, g.r);
834         rr.elems ~= new_elem_nterm(pp, rr);
835         if (g.r.rule_priority) {
836             rr.rule_priority = g.r.rule_priority;
837             rr.rule_assoc = AssocKind.ASSOC_NARY_RIGHT;
838         }
839     }
840     pp.rules ~= rr;
841     rr = new_rule(g, pp);
842     rr.elems ~= elem;
843     elem.rule = rr;
844     pp.rules ~= rr;
845 }
846 
847 void
848 rep_EBNF(Grammar *g, int min, int max) {
849     if (max < min) max = min;
850 
851     Production *pp = new_internal_production(g, g.p);
852     Elem *elem = g.r.elems[$-1];
853     for (int i = min; i <= max; i++) {
854         Rule *rr = new_rule(g, pp);
855         for (int j = 0; j < i; j++)
856             rr.elems ~= dup_elem(elem, rr);
857         pp.rules ~= rr;
858     }
859     g.r.elems[$-1] = new_elem_nterm(pp, g.r);
860     FREE(elem);
861 }
862 
863 void
864 initialize_productions(Grammar *g) @safe {
865     Production *pp = new_production(g, "0 Start");
866     pp.internal = InternalKind.INTERNAL_HIDDEN;
867 }
868 
869 void
870 finish_productions(Grammar *g) {
871     Production *pp = g.productions[0];
872     Rule *rr = new_rule(g, pp);
873     rr.elems ~= new_elem_nterm(null, rr);
874     pp.rules ~= rr;
875     rr.elems[0].e.nterm = g.productions[1];
876 }
877 
878 Production* lookup_production(Grammar* g, const(char)[] name) @safe
879 {
880     foreach(p; g.productions)
881         if (p.name == name) return p;
882     return null;
883 }
884 
885 private Term *
886 lookup_token(Grammar *g, const(char)[] name) @safe {
887     foreach(t; g.terminals)
888         if (t.kind == TermKind.TERM_TOKEN && t.string_ == name)
889             return t;
890     return null;
891 }
892 
893 private Term *
894 unique_term(Grammar *g, Term *t) @safe {
895     foreach (i; g.terminals)
896         if (t.kind == i.kind &&
897                 t.term_priority == i.term_priority &&
898                 t.term_name == i.term_name &&
899                 (!g.set_op_priority_from_rule ||
900                  (t.op_assoc == i.op_assoc &&
901                   t.op_priority == i.op_priority)) &&
902                 t.string_ == i.string_)
903             return i;
904     return t;
905 }
906 
907 private void
908 compute_nullable(Grammar *g) {
909     /* ensure that the trivial case is the first cause */
910     foreach(p; g.productions) {
911         foreach (r; p.rules)
912             if (!r.elems.length) {
913                 p.nullable = r;
914                 break;
915             }
916     }
917 
918     bool changed = true;
919     /* transitive closure */
920     while (changed) {
921         changed = false;
922         foreach (p; g.productions) {
923             if (!p.nullable)
924                 foreach (r; p.rules) {
925                     foreach (e; r.elems) {
926                         if (e.kind != ElemKind.ELEM_NTERM || !e.e.nterm.nullable)
927                             goto Lnot_nullable;
928                     }
929                     changed = true;
930                     p.nullable = r;
931                     break;
932                 }
933 Lnot_nullable:;
934         }
935     }
936 }
937 
938 /*
939    verify and cleanup the grammar datastructures
940    - resolve non-terminals
941    - set element indexes
942  */
943 private void
944 resolve_grammar(Grammar *g) {
945     Production *p, pp;
946     Elem *e;
947     Term *last_term, t;
948 
949     g.rule_index = 0;
950     for (int i = 0; i < g.productions.length; i++) {
951         p = g.productions[i];
952         if (p != lookup_production(g, p.name))
953             d_fail("duplicate production '%s'", p.name);
954         p.index = i;
955         foreach (r; p.rules) {
956             r.index = g.rule_index++;
957             last_term = null;
958             for (int k = 0; k < r.elems.length; k++) {
959                 e = r.elems[k];
960                 e.index = k;
961                 if (e.kind == ElemKind.ELEM_UNRESOLVED) {
962                     pp = lookup_production(g, e.e.unresolved);
963                     if (pp) {
964                         e.e.unresolved = null;
965                         e.kind = ElemKind.ELEM_NTERM;
966                         e.e.nterm = pp;
967                     } else
968                     {
969                         t = lookup_token(g, e.e.unresolved);
970                         if (t) {
971                             e.e.unresolved = null;
972                             e.kind = ElemKind.ELEM_TERM;
973                             e.e.term = t;
974                         } else {
975                             d_fail("unresolved identifier: '%s'", e.e.unresolved);
976                         }
977                     }
978                 }
979                 if (e.kind == ElemKind.ELEM_TERM)
980                     last_term = e.e.term;
981             }
982             r.end.index = cast(uint)r.elems.length;
983             if (g.set_op_priority_from_rule) {
984                 if (last_term && r.rule_assoc) {
985                     last_term.op_assoc = r.rule_assoc;
986                     last_term.op_priority = r.rule_priority;
987                 }
988             }
989         }
990     }
991     for (int i = 0; i < g.terminals.length; i++)
992         g.terminals[i].index = i;
993     compute_nullable(g);
994 }
995 
996 private void
997 merge_identical_terminals(Grammar *g) {
998     foreach (p; g.productions) {
999         foreach (r; p.rules) {
1000             foreach (e; r.elems) {
1001                 if (e.kind == ElemKind.ELEM_TERM)
1002                     e.e.term = unique_term(g, e.e.term);
1003             }
1004         }
1005     }
1006 }
1007 
1008 void
1009 print_term(Term *t) {
1010     string s = t.string_ ? escape_string(t.string_) : null;
1011     if (t.term_name)
1012         logf("term_name(\"%s\") ", t.term_name);
1013     else if (t.kind == TermKind.TERM_STRING) {
1014         if (!t.string_.length)
1015             logf("<EOF> ");
1016         else
1017             logf("string(\"%s\") ", s);
1018     } else if (t.kind == TermKind.TERM_REGEX) {
1019         logf("regex(\"%s\") ", s);
1020     } else if (t.kind == TermKind.TERM_CODE)
1021         logf("code(\"%s\") ", s);
1022     else if (t.kind == TermKind.TERM_TOKEN)
1023         logf("token(\"%s\") ", s);
1024     else
1025         d_fail("unknown token kind");
1026 }
1027 
1028 void
1029 print_elem(Elem *ee) {
1030     if (ee.kind == ElemKind.ELEM_TERM)
1031         print_term(ee.e.term);
1032     else if (ee.kind == ElemKind.ELEM_UNRESOLVED)
1033         logf("%s ", ee.e.unresolved);
1034     else
1035         logf("%s ", ee.e.nterm.name);
1036 }
1037 
1038 private string
1039 assoc_str(AssocKind e) {
1040     return [
1041         AssocKind.ASSOC_NONE : "none",
1042         AssocKind.ASSOC_NARY_LEFT: "left" ,
1043         AssocKind.ASSOC_NARY_RIGHT: "right" ,
1044         AssocKind.ASSOC_UNARY_LEFT: "unary_left" ,
1045         AssocKind.ASSOC_UNARY_RIGHT: "unary_right" ,
1046         AssocKind.ASSOC_BINARY_LEFT: "binary_left" ,
1047         AssocKind.ASSOC_BINARY_RIGHT: "binary_right" ,
1048         AssocKind.ASSOC_NO: "noassoc"][e];
1049 }
1050 
1051 void
1052 print_rule(Rule *r) {
1053     int k;
1054 
1055     logf("%s: ", r.prod.name);
1056     for (k = 0; k < r.elems.length; k++)
1057         print_elem(r.elems[k]);
1058     if (r.speculative_code.code)
1059         logf("SPECULATIVE_CODE\n%s\nEND CODE\n", r.speculative_code.code);
1060     if (r.final_code.code)
1061         logf("FINAL_CODE\n%s\nEND CODE\n", r.final_code.code);
1062 }
1063 
1064 void
1065 print_grammar(Grammar *g) {
1066     uint i, j, k;
1067     Production *pp;
1068     Rule *rr;
1069 
1070     if (!g.productions.length)
1071         return;
1072     logf("PRODUCTIONS\n\n");
1073     for (i = 0; i < g.productions.length; i++) {
1074         pp = g.productions[i];
1075         logf("%s (%d)\n", pp.name, i);
1076         for (j = 0; j < pp.rules.length; j++) {
1077             rr = pp.rules[j];
1078             if (!j)
1079                 logf("\t: ");
1080             else
1081                 logf("\t| ");
1082             for (k = 0; k < rr.elems.length; k++)
1083                 print_elem(rr.elems[k]);
1084             if (rr.op_priority)
1085                 logf("op %d ", rr.op_priority);
1086             if (rr.op_assoc)
1087                 logf("$%s ", assoc_str(rr.op_assoc));
1088             if (rr.rule_priority)
1089                 logf("rule %d ", rr.rule_priority);
1090             if (rr.rule_assoc)
1091                 logf("$%s ", assoc_str(rr.rule_assoc));
1092             if (rr.speculative_code.code)
1093                 logf("%s ", rr.speculative_code.code);
1094             if (rr.final_code.code)
1095                 logf("%s ", rr.final_code.code);
1096             logf("\n");
1097         }
1098         logf("\t;\n");
1099         logf("\n");
1100     }
1101     logf("TERMINALS\n\n");
1102     for (i = 0; i < g.terminals.length; i++) {
1103         logf("\t");
1104         print_term(g.terminals[i]);
1105         logf("(%d)\n", i + g.productions.length);
1106     }
1107     logf("\n");
1108 }
1109 
1110 private void
1111 print_item(Item *i) {
1112     int j, end = 1;
1113 
1114     logf("\t%s: ", i.rule.prod.name);
1115     for (j = 0; j < i.rule.elems.length; j++) {
1116         Elem *e = i.rule.elems[j];
1117         if (i == e) {
1118             logf(". ");
1119             end = 0;
1120         }
1121         print_elem(e);
1122     }
1123     if (end)
1124         logf(". ");
1125     logf("\n");
1126 }
1127 
1128 private void
1129 print_conflict(string kind, int *conflict) {
1130     if (!*conflict) {
1131         logf("  CONFLICT (before precedence and associativity)\n");
1132         *conflict = 1;
1133     }
1134     logf("\t%s conflict ", kind);
1135     logf("\n");
1136 }
1137 
1138 private void
1139 print_state(State *s) {
1140     int j, conflict = 0;
1141 
1142     writefln("STATE %d (%d ITEMS)%s", s.index, s.items.length,
1143             s.accept ? " ACCEPT" : "");
1144     for (j = 0; j < s.items.length; j++)
1145         print_item(s.items[j]);
1146     if (s.gotos.length)
1147         logf("  GOTO\n");
1148     for (j = 0; j < s.gotos.length; j++) {
1149         logf("\t");
1150         print_elem(s.gotos[j].elem);
1151         logf(" : %d\n", s.gotos[j].state.index);
1152     }
1153     logf("  ACTION\n");
1154     for (j = 0; j < s.reduce_actions.length; j++) {
1155         Action *a = s.reduce_actions[j];
1156         writefln("\t%s\t", action_types[a.kind]);
1157         print_rule(a.rule);
1158         logf("\n");
1159     }
1160     for (j = 0; j < s.shift_actions.length; j++) {
1161         Action *a = s.shift_actions[j];
1162         writefln("\t%s\t", action_types[a.kind]);
1163         if (a.kind == ActionKind.ACTION_SHIFT) {
1164             print_term(a.term);
1165             logf("%d", a.state.index);
1166         }
1167         logf("\n");
1168     }
1169     if (s.reduce_actions.length > 1)
1170         print_conflict("reduce/reduce", &conflict);
1171     if (s.reduce_actions.length && s.shift_actions.length)
1172         print_conflict("shift/reduce", &conflict);
1173     logf("\n");
1174 }
1175 
1176 void
1177 print_states(Grammar *g) {
1178     int i;
1179 
1180     for (i = 0; i < g.states.length; i++)
1181         print_state(g.states[i]);
1182 }
1183 
1184 private bool
1185 state_for_declaration(Grammar *g, int iproduction) {
1186     foreach (d; g.declarations)
1187         if (d.kind == DeclarationKind.DECLARE_STATE_FOR &&
1188                 d.elem.e.nterm.index == iproduction)
1189             return true;
1190     return false;
1191 }
1192 
1193 private void
1194 make_elems_for_productions(Grammar *g) {
1195     int i, j, k, l;
1196     Rule *rr;
1197     Production *ppp;
1198 
1199     Production *pp = g.productions[0];
1200     for (i = 0; i < g.productions.length; i++)
1201         if (!g.productions[i].internal) {
1202             if (g.states_for_all_nterms ||
1203                     state_for_declaration(g, i)) {
1204                 /* try to find an existing elem */
1205                 for (j = 0; j < g.productions.length; j++)
1206                     for (k = 0; k < g.productions[j].rules.length; k++) {
1207                         rr = g.productions[j].rules[k];
1208                         for (l = 0; l < rr.elems.length; l++)
1209                             if (rr.elems[l].e.nterm == g.productions[i]) {
1210                                 g.productions[i].elem = rr.elems[l];
1211                                 break;
1212                             }
1213                     }
1214                 if (j >= g.productions.length) { /* not found */
1215                     g.productions[i].elem =
1216                         new_elem_nterm(g.productions[i], new_rule(g, pp));
1217                     g.productions[i].elem.rule.index = g.rule_index++; /* fake */
1218                 }
1219             }
1220         }
1221     if (!g.states_for_all_nterms &&
1222             g.states_for_whitespace)
1223     {
1224         ppp = lookup_production(g, "whitespace");
1225         if (ppp)
1226         {
1227             ppp.elem = new_elem_nterm(ppp, new_rule(g, pp));
1228             ppp.elem.rule.index = g.rule_index++; /* fake */
1229         }
1230     }
1231 }
1232 
1233 private void
1234 convert_regex_production_one(Grammar *g, Production *p) {
1235     size_t l;
1236     Rule *rr;
1237 
1238     if (p.regex_term) /* already done */
1239         return;
1240 
1241     bool circular = false;
1242 
1243     if (p.in_regex)
1244         d_fail("circular regex production '%s'", p.name);
1245     p.in_regex = 1;
1246     foreach (r; p.rules) {
1247         if (r.final_code.code || (r.speculative_code.code && p.rules.length > 1))
1248             d_fail("final and/or multi-rule code not permitted in regex productions '%s'", p.name);
1249         foreach (e; r.elems) {
1250             if (e.kind == ElemKind.ELEM_NTERM) {
1251                 if (!e.e.nterm.regex)
1252                     d_fail("regex production '%s' cannot invoke non-regex production '%s'",
1253                             p.name, e.e.nterm.name);
1254                 Production *pp = e.e.nterm;
1255                 for (l = 0; l < pp.rules.length; l++)
1256                     if (pp.rules[l].speculative_code.code || pp.rules[l].final_code.code)
1257                         d_fail("code not permitted in rule %d of regex productions '%s'", l, p.name);
1258                 if (p != pp) {
1259                     convert_regex_production_one(g, pp);
1260                 } else {
1261                     circular = true;
1262                 }
1263             } else { /* e.kind == ElemKind.ELEM_TERM */
1264                 if (e.e.term.kind == TermKind.TERM_CODE || e.e.term.kind == TermKind.TERM_TOKEN)
1265                     d_fail("regex production '%s' cannot include scanners or tokens");
1266             }
1267         }
1268     }
1269     string buffer;
1270     Term *t = new Term();
1271     t.kind = TermKind.TERM_REGEX;
1272     t.index = cast(uint)g.terminals.length;
1273     t.regex_production = p;
1274     g.terminals ~= t;
1275     p.regex_term = t;
1276     p.regex_term.term_name = p.name;
1277     //Elem *e;
1278     if (circular) { /* attempt to match to regex operators */
1279         if (p.rules.length != 2)
1280             Lfail: d_fail("unable to resolve circular regex production: '%s'", p.name);
1281         l = p.rules[0].elems.length + p.rules[1].elems.length;
1282         if (l == 2 || l == 3) {
1283             if (p.rules[0].elems.length != 2 && p.rules[1].elems.length != 2)
1284                 goto Lfail;
1285             Rule *r = p.rules[0].elems.length == 2 ? p.rules[0] : p.rules[1];
1286             rr = p.rules[0] == r ? p.rules[1] : p.rules[0];
1287             if (r.elems[0].e.nterm != p && r.elems[1].e.nterm != p)
1288                 goto Lfail;
1289             Elem *e = r.elems[0].e.nterm == p ? r.elems[1] : r.elems[1];
1290             if (rr.elems.length && e.e.term_or_nterm != rr.elems[0].e.term_or_nterm)
1291                 goto Lfail;
1292             t = e.kind == ElemKind.ELEM_TERM ? e.e.term : e.e.nterm.regex_term;
1293             buffer ~= '(';
1294             if (t.kind == TermKind.TERM_STRING)
1295                 buffer ~= escape_string_for_regex(t.string_);
1296             else
1297                 buffer ~= t.string_;
1298             buffer ~= ')';
1299             if (l == 2)
1300             {   buffer ~= '*'; }
1301             else
1302             {   buffer ~= '+'; }
1303             p.regex_term.string_ = buffer;
1304         } else
1305             goto Lfail;
1306     } else { /* handle the base case, p = (r | r'), r = (e e') */
1307         if (p.rules.length > 1)
1308         {  buffer ~= '('; }
1309         for (int j = 0; j < p.rules.length; j++) {
1310             Rule *r = p.rules[j];
1311             if (r.elems.length > 1)
1312             { buffer ~= '('; }
1313             foreach (e; r.elems) {
1314                 t = e.kind == ElemKind.ELEM_TERM ? e.e.term : e.e.nterm.regex_term;
1315                 if (t.kind == TermKind.TERM_STRING)
1316                     buffer ~= escape_string_for_regex(t.string_);
1317                 else
1318                     buffer ~= t.string_;
1319             }
1320             if (r.elems.length > 1)
1321             { buffer ~= ')'; }
1322             if (j != p.rules.length - 1)
1323             { buffer ~= '|'; }
1324         }
1325         if (p.rules.length > 1)
1326         {  buffer ~= ')'; }
1327         p.regex_term.string_ = buffer;
1328     }
1329     p.in_regex = 0;
1330 }
1331 
1332 private void
1333 convert_regex_productions(Grammar *g) {
1334     foreach (p; g.productions) {
1335         if (!p.regex)
1336             continue;
1337         convert_regex_production_one(g, p);
1338     }
1339     foreach (p; g.productions) {
1340         foreach (r; p.rules) {
1341             foreach (e; r.elems) {
1342                 if (e.kind == ElemKind.ELEM_NTERM && e.e.nterm.regex_term) {
1343                     e.e.term = e.e.nterm.regex_term;
1344                     e.kind = ElemKind.ELEM_TERM;
1345                 }
1346             }
1347         }
1348     }
1349 }
1350 
1351 private void
1352 check_default_actions(Grammar *g) {
1353     Production *pdefault;
1354 
1355     pdefault = lookup_production(g, "_");
1356     if (pdefault && pdefault.rules.length > 1)
1357         d_fail("number of rules in default action != 1");
1358 }
1359 
1360 struct EqState {
1361     State   *eq;
1362     Rule    *diff_rule;
1363     State   *diff_state;
1364 }
1365 
1366 void
1367 build_eq(Grammar *g) {
1368     int i, j, k, changed = 1;
1369     State *s, ss;
1370     EqState *e, ee;
1371 
1372     EqState[] eq = new EqState[g.states.length];
1373     while (changed) {
1374         changed = 0;
1375         for (i = 0; i < g.states.length; i++) {
1376             s = g.states[i];
1377             e = &eq[s.index];
1378             for (j = i + 1; j < g.states.length; j++) {
1379                 ss = g.states[j];
1380                 ee = &eq[ss.index];
1381                 if (e.eq || ee.eq)
1382                     continue;
1383                 if (s.same_shifts != ss.same_shifts && ss.same_shifts != s)
1384                     continue;
1385                 /* check gotos */
1386                 if (s.gotos.length != ss.gotos.length)
1387                     continue;
1388                 for (k = 0; k < s.gotos.length; k++) {
1389                     if (elem_symbol(g, s.gotos[k].elem) != elem_symbol(g, ss.gotos[k].elem))
1390                         goto Lcontinue;
1391                     if (s.gotos[k].state != ss.gotos[k].state) {
1392                         EqState *ge = &eq[s.gotos[k].state.index];
1393                         EqState *gee = &eq[ss.gotos[k].state.index];
1394                         if (ge.eq != ss.gotos[k].state && gee.eq != s.gotos[k].state)
1395                             goto Lcontinue;
1396                         if ((ee.diff_state && ee.diff_state != eq[ss.gotos[k].state.index].eq) ||
1397                                 (e.diff_state && e.diff_state != eq[s.gotos[k].state.index].eq))
1398                             goto Lcontinue;
1399                         /* allow one different state */
1400                         ee.diff_state = ss.gotos[k].state;
1401                         e.diff_state = s.gotos[k].state;
1402                     }
1403                 }
1404                 /* check reductions */
1405                 if (s.reduce_actions.length != ss.reduce_actions.length)
1406                     continue;
1407                 for (k = 0; k < s.reduce_actions.length; k++) {
1408                     if (s.reduce_actions[k].rule == ss.reduce_actions[k].rule)
1409                         continue;
1410                     if (s.reduce_actions[k].rule.prod !=
1411                             ss.reduce_actions[k].rule.prod)
1412                         goto Lcontinue;
1413                     if (s.reduce_actions[k].rule.elems.length !=
1414                             ss.reduce_actions[k].rule.elems.length) {
1415                         if ((ee.diff_rule && ee.diff_rule != ss.reduce_actions[k].rule) ||
1416                                 (e.diff_rule && e.diff_rule != s.reduce_actions[k].rule))
1417                             goto Lcontinue;
1418                         /* allow one different rule */
1419                         ee.diff_rule = ss.reduce_actions[k].rule;
1420                         e.diff_rule = s.reduce_actions[k].rule;
1421                     }
1422                 }
1423                 ee.eq = s;
1424                 changed = 1;
1425 Lcontinue:;
1426             }
1427         }
1428     }
1429     for (i = 0; i < g.states.length; i++) {
1430         s = g.states[i];
1431         e = &eq[s.index];
1432         if (e.eq) {
1433             if (!__ctfe && d_verbose_level > 2) {
1434                 logf("eq %d %d ", s.index, e.eq.index);
1435                 if (e.diff_state)
1436                     logf("diff state (%d %d) ",
1437                             e.diff_state.index,
1438                             eq[e.eq.index].diff_state.index);
1439                 if (e.diff_rule) {
1440                     logf("diff rule ");
1441                     logf("[ ");
1442                     print_rule(e.diff_rule);
1443                     logf("][ ");
1444                     print_rule(eq[e.eq.index].diff_rule);
1445                     logf("]");
1446                 }
1447                 logf("\n");
1448             }
1449         }
1450     }
1451     for (i = 0; i < g.states.length; i++) {
1452         s = g.states[i];
1453         e = &eq[s.index];
1454         if (e.eq && e.diff_state) {
1455             if (eq[e.diff_state.index].diff_rule &&
1456                     eq[e.diff_state.index].diff_rule.elems.length == 2)
1457             {
1458                 s.reduces_to = e.eq;
1459                 s.reduces_with = eq[e.eq.index].diff_rule;
1460                 s.reduces_to_then_with = e.diff_rule;
1461             } else if (eq[eq[e.eq.index].diff_state.index].diff_rule &&
1462                     eq[eq[e.eq.index].diff_state.index].diff_rule.elems.length == 2)
1463             {
1464                 e.eq.reduces_to = s;
1465                 s.reduces_with = e.diff_rule;
1466                 s.reduces_to_then_with = eq[e.eq.index].diff_rule;
1467             }
1468         }
1469     }
1470     for (i = 0; i < g.states.length; i++) {
1471         s = g.states[i];
1472         if (s.reduces_to)
1473             if (d_verbose_level)
1474                 logf("reduces_to %d %d\n", s.index, s.reduces_to.index);
1475     }
1476 }
1477 
1478 Grammar *
1479 new_D_Grammar() {
1480     return new Grammar();
1481 }
1482 
1483 private void
1484 free_rule(Rule *r) {
1485     int i;
1486     FREE(r.end);
1487     vec_free(&r.pass_code);
1488     FREE(r);
1489 }
1490 
1491 void
1492 free_D_Grammar(Grammar *g) {
1493     int i, j, k;
1494 
1495     for (i = 0; i < g.productions.length; i++) {
1496         Production *p = g.productions[i];
1497         for (j = 0; j < p.rules.length; j++) {
1498             Rule *r = p.rules[j];
1499             if (r == g.r)
1500                 g.r = null;
1501             for (k = 0; k < r.elems.length; k++) {
1502                 Elem *e = r.elems[k];
1503                 if (e == p.elem)
1504                     p.elem = null;
1505                 FREE(e);
1506             }
1507             if (r.end == p.elem)
1508                 p.elem = null;
1509             free_rule(r);
1510         }
1511         if (p.elem) {
1512             free_rule(p.elem.rule);
1513             FREE(p.elem);
1514         }
1515         FREE(p);
1516     }
1517     for (i = 0; i < g.actions.length; i++)
1518         free_Action(g.actions[i]);
1519     for (i = 0; i < g.states.length; i++) {
1520         State *s = g.states[i];
1521         for (j = 0; j < s.gotos.length; j++) {
1522             FREE(s.gotos[j].elem);
1523             FREE(s.gotos[j]);
1524         }
1525         for (j = 0; j < s.right_epsilon_hints.length; j++)
1526             FREE(s.right_epsilon_hints[j]);
1527         vec_free(&s.right_epsilon_hints);
1528         for (j = 0; j < s.error_recovery_hints.length; j++)
1529             FREE(s.error_recovery_hints[j]);
1530         vec_free(&s.error_recovery_hints);
1531         if (!s.same_shifts) {
1532             for (j = 0; j < s.scanner.states.length; j++) {
1533                 vec_free(&s.scanner.states[j].accepts);
1534                 vec_free(&s.scanner.states[j].live);
1535                 FREE(s.scanner.states[j]);
1536             }
1537             for (j = 0; j < s.scanner.transitions.length; j++)
1538                 if (s.scanner.transitions[j]) {
1539                     vec_free(&s.scanner.transitions[j].live_diff);
1540                     vec_free(&s.scanner.transitions[j].accepts_diff);
1541                     FREE(s.scanner.transitions[j]);
1542                 }
1543             vec_free(&s.scanner.transitions);
1544         }
1545         FREE(s);
1546     }
1547     for (i = 0; i < g.declarations.length; i++) {
1548         FREE(g.declarations[i].elem);
1549         FREE(g.declarations[i]);
1550     }
1551     for (i = 0; i < g.passes.length; i++) {
1552         FREE(g.passes[i]);
1553     }
1554     vec_free(&g.passes);
1555     FREE(g);
1556 }
1557 
1558 private int
1559 scanner_declaration(Declaration *d) {
1560     switch (d.kind) {
1561         case DeclarationKind.DECLARE_TOKENIZE:
1562         case DeclarationKind.DECLARE_LONGEST_MATCH:
1563         case DeclarationKind.DECLARE_ALL_MATCHES:
1564             return 1;
1565         default:
1566             return 0;
1567     }
1568 }
1569 
1570 private void
1571 set_declaration_group(Production *p, Production *root, Declaration *d) {
1572     if (p.declaration_group[d.kind] == root)
1573         return;
1574     if (d.kind == DeclarationKind.DECLARE_TOKENIZE && p.declaration_group[d.kind]) {
1575         d_fail("shared tokenize subtrees");
1576         return;
1577     }
1578     p.declaration_group[d.kind] = root;
1579     p.last_declaration[d.kind] = d;
1580     foreach (r; p.rules) {
1581         foreach (e; r.elems)
1582             if (e.kind == ElemKind.ELEM_NTERM)
1583                 set_declaration_group(e.e.nterm, root, d);
1584     }
1585 }
1586 
1587 private void
1588 propogate_declarations(Grammar *g) {
1589     Production *start = g.productions[0];
1590 
1591     /* global defaults */
1592     if (g.tokenizer)
1593         new_declaration(g, new_elem_nterm(g.productions[0], null), DeclarationKind.DECLARE_TOKENIZE);
1594     if (g.longest_match)
1595         new_declaration(g, new_elem_nterm(g.productions[0], null), DeclarationKind.DECLARE_LONGEST_MATCH);
1596     /* resolve declarations */
1597     foreach (d; g.declarations) {
1598         Elem *e = d.elem;
1599         if (e.kind == ElemKind.ELEM_UNRESOLVED) {
1600             Production *p;
1601             if (e.e.unresolved.length == 0)
1602                 p = g.productions[0];
1603             else
1604             {
1605                 p = lookup_production(g, e.e.unresolved);
1606                 if (!p)
1607                     d_fail("unresolved declaration '%s'", e.e.unresolved);
1608             }
1609             e.e.unresolved = null;
1610             e.kind = ElemKind.ELEM_NTERM;
1611             e.e.nterm = p;
1612         }
1613     }
1614     /* build declaration groups (covering a production subtrees) */
1615     foreach (d; g.declarations) {
1616         if (scanner_declaration(d)) {
1617             Production *p = d.elem.e.nterm;
1618             if (p == start) {
1619                 foreach (pp; g.productions) {
1620                     pp.declaration_group[d.kind] = start;
1621                     pp.last_declaration[d.kind] = d;
1622                 }
1623             } else
1624                 set_declaration_group(p, p, d);
1625         }
1626     }
1627     /* set terminal scan_kind */
1628     foreach (p; g.productions) {
1629         foreach (r; p.rules) {
1630             foreach (e; r.elems) {
1631                 if (e.kind == ElemKind.ELEM_TERM) {
1632                     if (!p.declaration_group[DeclarationKind.DECLARE_LONGEST_MATCH] &&
1633                             !p.declaration_group[DeclarationKind.DECLARE_ALL_MATCHES])
1634                         e.e.term.scan_kind = D_SCAN_DEFAULT;
1635                     else if (p.declaration_group[DeclarationKind.DECLARE_LONGEST_MATCH] &&
1636                             !p.declaration_group[DeclarationKind.DECLARE_ALL_MATCHES])
1637                         e.e.term.scan_kind = D_SCAN_LONGEST;
1638                     else if (!p.declaration_group[DeclarationKind.DECLARE_LONGEST_MATCH] &&
1639                             p.declaration_group[DeclarationKind.DECLARE_ALL_MATCHES])
1640                         e.e.term.scan_kind = D_SCAN_ALL;
1641                     else {
1642                         if (p.last_declaration[DeclarationKind.DECLARE_LONGEST_MATCH].index >
1643                                 p.last_declaration[DeclarationKind.DECLARE_ALL_MATCHES].index)
1644                             e.e.term.scan_kind = D_SCAN_LONGEST;
1645                         else
1646                             e.e.term.scan_kind = D_SCAN_ALL;
1647                     }
1648                 }
1649             }
1650         }
1651     }
1652 }
1653 
1654 private void
1655 merge_shift_actions(State *to, State *from) {
1656     foreach (f; from.shift_actions) {
1657         foreach (t; to.shift_actions)
1658             if (f.term == t.term)
1659                 goto Lnext;
1660         to.shift_actions ~= f;
1661 Lnext:;
1662     }
1663 }
1664 
1665 private void
1666 compute_declaration_states(Grammar *g, Production *p, Declaration *d) {
1667     State *base_s = null;
1668     int scanner = scanner_declaration(d);
1669 
1670     foreach (s; g.states) {
1671         if (d.kind == DeclarationKind.DECLARE_TOKENIZE) {
1672             if (!base_s)
1673                 base_s = s;
1674             else {
1675                 s.same_shifts = base_s;
1676                 merge_shift_actions(base_s, s);
1677             }
1678         }
1679         if (scanner) {
1680             foreach (k; s.items)
1681                 if (k.kind == ElemKind.ELEM_TERM)
1682                     switch (k.e.term.scan_kind) {
1683                         case D_SCAN_LONGEST:
1684                             if (s.scan_kind == D_SCAN_RESERVED ||
1685                                     s.scan_kind == D_SCAN_LONGEST)
1686                                 s.scan_kind = D_SCAN_LONGEST;
1687                             else
1688                                 s.scan_kind = D_SCAN_MIXED;
1689                             break;
1690                         case D_SCAN_ALL:
1691                             if (s.scan_kind == D_SCAN_RESERVED ||
1692                                     s.scan_kind == D_SCAN_ALL)
1693                                 s.scan_kind = D_SCAN_ALL;
1694                             else
1695                                 s.scan_kind = D_SCAN_MIXED;
1696                             break;
1697                         default:
1698                             break;
1699                     }
1700         }
1701     }
1702 }
1703 
1704 private void
1705 map_declarations_to_states(Grammar *g) {
1706     foreach (s; g.states) {
1707         s.scan_kind = D_SCAN_RESERVED;
1708     }
1709     /* map groups to sets of states */
1710     foreach (d; g.declarations)
1711         if (scanner_declaration(d))
1712             compute_declaration_states(g, d.elem.e.nterm, d);
1713     foreach (s; g.states) {
1714         if (s.scan_kind == D_SCAN_RESERVED)
1715             s.scan_kind = D_SCAN_DEFAULT; /* set the default */
1716     }
1717 }
1718 
1719 int
1720 build_grammar(Grammar *g) {
1721     resolve_grammar(g);
1722     convert_regex_productions(g);
1723     propogate_declarations(g);
1724     merge_identical_terminals(g);
1725     make_elems_for_productions(g);
1726     check_default_actions(g);
1727     build_LR_tables(g);
1728     map_declarations_to_states(g);
1729     if (!__ctfe && d_verbose_level) {
1730         logf("%d productions %d terminals %d states %d declarations\n",
1731                 g.productions.length, g.terminals.length, g.states.length,
1732                 g.declarations.length);
1733     }
1734     if (!__ctfe && d_verbose_level > 1) {
1735         print_grammar(g);
1736         print_states(g);
1737     }
1738     build_scanners(g);
1739     build_eq(g);
1740     return 0;
1741 }
1742 
1743 
1744 /*   Wlodek Bzyl, <matwb@univ.gda.pl>
1745  */
1746 
1747 private void
1748 print_term_escaped(Term *t, int double_escaped) {
1749     string s;
1750     if (t.term_name) {
1751         logf("%s ", t.term_name);
1752     } else if (t.kind == TermKind.TERM_STRING) {
1753         s = t.string_ ? escape_string_single_quote(t.string_) : null;
1754         if (!t.string_)
1755             logf("<EOF> ");
1756         else {
1757             logf("'%s' ", double_escaped ? escape_string_single_quote(s) : s);
1758             if (t.ignore_case)
1759                 logf("/i ");
1760             if (t.term_priority)
1761                 writefln("%sterm %d ", double_escaped?"#":"$", t.term_priority);
1762         }
1763     } else if (t.kind == TermKind.TERM_REGEX) {
1764         s = t.string_ ? escape_string(t.string_) : null;
1765         //char *s = t.string_; // ? escape_string(t.string_) : null;
1766         string quote = double_escaped ? "\\\"" : "\"";
1767         logf("%s%s%s ", quote, double_escaped ? escape_string(s) : s, quote);
1768         if (t.ignore_case)
1769             logf("/i ");
1770         if (t.term_priority)
1771             writefln("%sterm %d ", double_escaped?"#":"$", t.term_priority);
1772     } else if (t.kind == TermKind.TERM_CODE) {
1773         s = t.string_ ? escape_string(t.string_) : null;
1774         logf("code(\"%s\") ", s);
1775     } else if (t.kind == TermKind.TERM_TOKEN) {
1776         s = t.string_ ? escape_string(t.string_) : null;
1777         logf("%s ", s);
1778     } else
1779         d_fail("unknown token kind");
1780 }
1781 
1782 /* print_elem changed to call print_term_escaped */
1783 private void
1784 print_element_escaped(Elem *ee, int double_escaped) {
1785     if (ee.kind == ElemKind.ELEM_TERM)
1786         print_term_escaped(ee.e.term, double_escaped);
1787     else if (ee.kind == ElemKind.ELEM_UNRESOLVED)
1788         logf("%s ", ee.e.unresolved);
1789     else
1790         logf("%s ", ee.e.nterm.name);
1791 }
1792 
1793 private void
1794 print_production(Production *p) {
1795     uint j, k;
1796     Rule *r;
1797     string[] opening = [ ""        , "\n\t  [ logf(\""    , "\n\t  { logf(\"" ];
1798     string[] closing = [ "\n"      , "\\n\"); ]\n"          , "\\n\"); }\n" ];
1799     string[] middle  = [ "\n\t:   ", "  <-  "               , "  <=  " ];
1800     string[] assoc   = [ "$"       , "#"                    , "#" ];
1801     string speculative_final_closing = "\\n\"); ]";  /* closing[1] without final newline */
1802     string next_or_rule = "\t|   ";
1803     //  char *regex_production = "  ::=  ";
1804 
1805     uint variant = 0;
1806 
1807     for (j = 0; j < p.rules.length; j++) {
1808 Lmore:
1809         r = p.rules[j];
1810         if (!j) {
1811             //      if (p.regex) {
1812             //  logf("%s%s%s", opening[variant], p.name, regex_production);
1813             //      } else {
1814             writefln("%s%s%s", opening[variant], p.name, middle[variant]);
1815             //      }
1816         } else {
1817             if (variant==0)
1818                 writefln("%s", next_or_rule);
1819             else
1820                 writefln("%s%s%s", opening[variant], p.name, middle[variant]);
1821         }
1822 
1823         for (k = 0; k < r.elems.length; k++)
1824             print_element_escaped(r.elems[k], variant);
1825 
1826         if (r.op_assoc)
1827             writefln(" %s%s ", assoc[variant], assoc_str(r.op_assoc));
1828         if (r.op_priority)
1829             writefln("%d ", r.op_priority);
1830         if (r.rule_assoc)
1831             writefln(" %s%s ", assoc[variant], assoc_str(r.rule_assoc));
1832         if (r.rule_priority)
1833             writefln("%d ", r.rule_priority);
1834 
1835         if ((d_rdebug_grammar_level == 1 && variant == 0) ||
1836                 (d_rdebug_grammar_level == 3 && variant == 0)) {
1837             variant=1;
1838             goto Lmore;
1839         }
1840         if ((d_rdebug_grammar_level == 2 && variant == 0) ||
1841                 (d_rdebug_grammar_level == 3 && variant == 1)) {
1842             if (variant==1)
1843                 writefln("%s", speculative_final_closing);
1844             variant=2;
1845             goto Lmore;
1846         }
1847 
1848         writefln("%s", closing[variant]);
1849         variant = 0;
1850     }
1851     logf("\t;\n");
1852     logf("\n");
1853 }
1854 
1855 private void
1856 print_productions(Grammar *g) {
1857     uint i;
1858     if (!g.productions.length) {
1859         logf("/*\n  There were no productions in the grammar\n*/\n");
1860         return;
1861     }
1862     for (i = 1; i < g.productions.length; i++)
1863         print_production(g.productions[i]);
1864 }
1865 
1866 private void print_declare(string s, string n) {
1867     while(n.length && (n[0].isWhite() || n[0].isDigit())) n = n[1 .. $];
1868     logf(s, n);
1869 }
1870 
1871 private void
1872 print_declarations(Grammar *g) {
1873     int i;
1874 
1875     if (g.tokenizer)
1876         logf("${declare tokenize}\n");
1877     for (i = 0; i < g.declarations.length; i++) {
1878         Declaration *dd = g.declarations[i];
1879         Elem *ee = dd.elem;
1880         switch (dd.kind) {
1881             case DeclarationKind.DECLARE_LONGEST_MATCH:
1882                 if (g.longest_match)
1883                     logf("${declare longest_match}\n");
1884                 else
1885                     print_declare("${declare longest_match %s}\n", ee.e.nterm.name);
1886                 break;
1887             case DeclarationKind.DECLARE_ALL_MATCHES:
1888                 if (!g.longest_match)
1889                     logf("${declare all_matches}\n");
1890                 else
1891                     print_declare("${declare all_matches %s}\n", ee.e.nterm.name);
1892                 break;
1893             default:
1894                 logf("\n/*\nDeclaration.kind: %d", dd.kind);
1895                 logf("\nElem.kind:        %d\n*/\n", ee.kind);
1896         }
1897     }
1898     if (g.set_op_priority_from_rule)
1899         logf("${declare set_op_priority_from_rule}\n");
1900     if (g.states_for_all_nterms)
1901         logf("${declare all_subparsers}\n");
1902     /* todo: DeclarationKind.DECLARE_STATE_FOR */
1903     if (g.default_white_space)
1904         logf("${declare whitespace %s}\n", g.default_white_space);
1905     if (g.save_parse_tree)
1906         logf("${declare save_parse_tree}\n");
1907     /* todo: DECLARE_NUM */
1908 
1909     if (g.scanner.code)
1910         logf("${scanner %s}\n", g.scanner.code);
1911 
1912     { int token_exists = 0;
1913         for (i = 0; i < g.terminals.length; i++) {
1914             Term *t = g.terminals[i];
1915             if (t.kind == TermKind.TERM_TOKEN) {
1916                 writefln("%s %s", token_exists?"":"${token", t.string_);
1917                 token_exists = 1;
1918             }
1919         }
1920         if (token_exists)
1921             logf("}\n");
1922     }
1923 
1924     logf("\n");
1925 }
1926 
1927 void
1928 print_rdebug_grammar(Grammar *g) {
1929     logf("/*\n  Generated by Make DParser\n");
1930     logf("  Available at http://dparser.sf.net\n*/\n\n");
1931 
1932     print_declarations(g);
1933     print_productions(g);
1934 }
1935