1 module ddparser.dparser;
2 
3 import std.string;
4 import std.algorithm;
5 import std.stdio;
6 import core.memory;
7 import std.conv;
8 import std.typecons;
9 import std.variant;
10 import std.exception;
11 import ddparser.gram;
12 import ddparser.dparse_tables;
13 import ddparser.dparse;
14 import ddparser.gramgram;
15 import ddparser.parse;
16 import ddparser.write_tables;
17 
18 alias D_Grammar = ddparser.gram.Grammar;
19 alias _Parser = ddparser.parse.Parser;
20 
21 void hexDump(const ubyte* ptr, uint len)
22 {
23     for(uint i = 0; i < len; ++i)
24     {
25         writef("%X", ptr[i]);
26     }
27 }
28 
29 private auto uniqueTypeValue(string f = __FILE__, int i = __LINE__)()
30 {
31     enum Value { value }
32     return Value.value;
33 }
34 
35 class Grammar
36 {
37     alias Action = void delegate(Parser.Context c);
38 
39     enum propagate = uniqueTypeValue();
40     enum spec = uniqueTypeValue();
41 
42     static struct Production
43     {
44         string g;
45         bool spec = false;
46         Action a;
47     }
48 
49     Grammar opBinary(string s : "<<")(string g)
50     {
51         productions ~= Production(g);
52         return this;
53     }
54 
55     Grammar opBinary(string s : "<<")(Action a)
56     {
57         productions[$-1].a = a;
58         return this;
59     }
60 
61     Grammar opBinary(string s : "<<")(typeof(propagate))
62     {
63         return this << (c)
64         {
65             enforce(c.length == 1);
66             c.result = c[0];
67         };
68     }
69 
70     Grammar opBinary(string s : "<<")(typeof(spec))
71     {
72         productions[$-1].spec = true;
73         return this;
74     }
75 
76     Action actionAtIndex(uint i)
77     {
78         return productions[i].a;
79     }
80 
81     final @property void rootSymbol(string symbol)
82     {
83         uint start = firstIndexOfProductionForSymbol(symbol);
84         if (start == 0) return; // The needed production is already first
85 
86         uint end = endOfProductionStartingAtIndex(start);
87 
88         // Move productions from start to end to the beginning of array
89         productions = productions[start .. end] ~ productions[0 .. start] ~ productions[end .. $];
90     }
91 
92     final @property string rootSymbol() const
93     {
94         assert(productions.length, "Grammar is empty");
95         auto s = productions[0].g;
96         auto indexOfColon = s.indexOf(":");
97         assert(indexOfColon != -1, "Malformed production");
98         return s[0 .. indexOfColon].strip();
99     }
100 
101     override string toString() const
102     {
103         string result;
104         foreach(i, p; productions)
105         {
106             if (i)
107             {
108                 if (!p.g.strip().startsWith("|")) result ~= ";";
109                 result ~= "\n";
110             }
111             result ~= p.g;
112             result ~= " ${action}";
113         }
114         return result ~ ";";
115     }
116 
117     private final uint endOfProductionStartingAtIndex(uint index) const
118     {
119         foreach(i, p; productions[index + 1 .. $])
120         {
121             if (!p.g.strip.startsWith("|")) return cast(uint)i + index;
122         }
123         return cast(uint)productions.length;
124     }
125 
126     private final uint firstIndexOfProductionForSymbol(string sym) const
127     {
128         string searchString = sym ~ ":";
129         foreach(i, p; productions)
130         {
131             if (p.g.strip().startsWith(searchString)) return cast(uint)i;
132         }
133         assert(false, "Production " ~ sym ~ " not found");
134     }
135 
136     private static D_Grammar* grammarWithString(string grammarString)
137     {
138         D_Grammar *g = new_D_Grammar();
139 
140         g.set_op_priority_from_rule = 0;
141         g.right_recursive_BNF = 0;
142         g.states_for_whitespace = 1;
143         g.states_for_all_nterms = 1;
144         g.tokenizer = 0;
145         g.longest_match = 1;
146         g.scanner_blocks = 4;
147         g.scanner_block_size = 0;
148 
149         // TODO: Can't handle syntax error here =(
150         if (parse_grammar(g, null, grammarString) < 0) return null;
151 
152         if (g.productions.length < 2) throw new Exception("Too few productions");
153 
154         if (build_grammar(g) < 0) return null;
155         return g;
156     }
157 
158     private @property final D_ParserTables* tables()
159     {
160         if (!_tables)
161         {
162             _tables = createTablesFromGrammar(
163                     grammarWithString(toString()), &Parser.spec_code, &Parser.final_code);
164         }
165         return _tables;
166     }
167 
168     Production[] productions;
169     private D_ParserTables* _tables;
170 }
171 
172 struct Location
173 {
174     ulong offset;
175     uint line;
176     uint column;
177     uint length;
178 }
179 
180 abstract class ParseNode
181 {
182     string identifier;
183     Variant value;
184     Location location;
185     abstract @property string stringValue() const;
186     abstract override string toString() const;
187     @property bool isTerminal() const { return true; }
188     @property bool isEmpty() const { return false; }
189 }
190 
191 class TerminalNode : ParseNode
192 {
193     override @property string stringValue() const
194     {
195         return value.get!(const string)();
196     }
197 
198     override string toString() const
199     {
200         return "'" ~ stringValue ~ "'";
201     }
202 }
203 
204 class NonTerminalNode : ParseNode
205 {
206     ParseNode[] children;
207     override @property string stringValue() const
208     {
209         return toString();
210     }
211 
212     override string toString() const
213     {
214         return identifier ~ children.to!string();
215     }
216 
217     override @property bool isTerminal() const { return false; }
218     override @property bool isEmpty() const { return children.length == 0; }
219 }
220 
221 extern(C) void BREAK()
222 {
223 
224 }
225 
226 class Parser
227 {
228     alias AmbiguityHandler = ParseNode delegate(ParseNode[]);
229 
230     this()
231     {
232         context = new Context();
233         context.parser = this;
234     }
235 
236     ~this()
237     {
238         //if (binaryTables) free_BinaryTables(binaryTables);
239         //if (parser) free_D_Parser(parser);
240     }
241 
242     bool setGrammar(Grammar g)
243     {
244         binaryTables = g.tables;
245         if (binaryTables)
246         {
247             grammar = g;
248             createParser();
249         }
250         return binaryTables != null;
251     }
252 
253     ParseNode parse(string s)
254     {
255         D_ParseNode * node = dparse(parser, s);
256         ParseNode result;
257         bool ok = false;
258         if (node && !parser.syntax_errors)
259         {
260             result = cast(ParseNode)node.user;
261             ok = true;
262         }
263 
264         if (node)
265         {
266             free_D_ParseNode(parser, node);
267         }
268 
269         if (!ok && !errorRecoveryEnabled)
270         {
271             syntaxError();
272         }
273 
274         return result;
275     }
276 
277     static class Context
278     {
279         Parser parser;
280         Object userInfo;
281         bool isFinal;
282 
283         @property size_t length() pure @safe nothrow
284         {
285             return children.length;
286         }
287 
288         auto opDollar() pure @safe nothrow
289         {
290             return length;
291         }
292 
293         auto opIndex(size_t index) pure
294         {
295             index < children.length || assert(false, "Out of bounds: "/* ~ this.toString()*/);
296             return children[index];
297         }
298 
299         @property Location location() const
300         {
301             return result.location;
302         }
303 
304         ParseNode[] children;
305         ParseNode result;
306 
307         override string toString() const
308         {
309             return result.toString();
310         }
311     }
312 
313     final @property Location location() const
314     {
315         return Location(parser.loc.s - inputPtr, parser.loc.line, parser.loc.col);
316     }
317 
318     @property bool useGreedinessForDisambiguation() const
319     {
320         if (parser) return !parser.dont_use_greediness_for_disambiguation;
321         return _useGreedinessForDisambiguation;
322     }
323 
324     @property void useGreedinessForDisambiguation(bool flag)
325     {
326         _useGreedinessForDisambiguation = flag;
327         if (parser) parser.dont_use_greediness_for_disambiguation = !flag;
328     }
329 
330     @property bool useHeightForDisambiguation() const
331     {
332         if (parser) return !parser.dont_use_height_for_disambiguation;
333         return _useHeightForDisambiguation;
334     }
335 
336     @property void useHeightForDisambiguation(bool flag)
337     {
338         _useHeightForDisambiguation = flag;
339         if (parser) parser.dont_use_height_for_disambiguation = !flag;
340     }
341 
342     @property bool errorRecoveryEnabled() const
343     {
344         if (parser) return !!parser.error_recovery;
345         return _errorRecoveryEnabled;
346     }
347 
348     @property void errorRecoveryEnabled(bool flag)
349     {
350         _errorRecoveryEnabled = flag;
351         if (parser) parser.error_recovery = !!flag;
352     }
353 
354     AmbiguityHandler ambiguityHandler;
355     void delegate(Parser) syntaxErrorHandler;
356 
357 private:
358     void createParser()
359     {
360         parser = new_D_Parser(binaryTables, D_ParseNode_User.sizeof);
361         parser.initial_globals = cast(void*)this;
362         parser.save_parse_tree = 1;
363         parser.free_node_fn = &free_node;
364 //        parser.dont_fixup_internal_productions = 0;
365         parser.commit_actions_interval = 0;
366         parser.ambiguity_fn = &ambigfn;
367         parser.syntax_error_fn = &syntaxErrorFn;
368         parser.dont_use_greediness_for_disambiguation = !_useGreedinessForDisambiguation;
369         parser.dont_use_height_for_disambiguation = !_useHeightForDisambiguation;
370         parser.error_recovery = _errorRecoveryEnabled;
371     }
372 
373     static D_ParseNode * ambigfn(D_Parser* p, int n, D_ParseNode **v)
374     {
375         if (p.initial_globals)
376         {
377             return (cast(Parser)p.initial_globals).ambiguity(p, v[0 .. n]);
378         }
379         return null;
380     }
381 
382     final int tryResolvingAmbiguityByPreferringStringOverRegex(D_ParseNode*[] nodes)
383     {
384         int resolvingIndex = -1;
385 
386         foreach(i, pn; nodes)
387         {
388             bool metString = false;
389             bool metRegex = false;
390             auto tempNode = pn;
391             do
392             {
393                 auto sym = symbolForNode(tempNode);
394                 metString = sym.kind == D_SymbolKind.D_SYMBOL_STRING;
395                 metRegex = sym.kind == D_SymbolKind.D_SYMBOL_REGEX;
396                 if (metString || metRegex) break;
397                 if (d_get_number_of_children(tempNode) != 1)
398                     return -1;
399                 tempNode = d_get_child(tempNode, 0);
400             } while(true);
401 
402             if (metString)
403             {
404                 if (resolvingIndex == -1)
405                     resolvingIndex = cast(int)i;
406                 else
407                     return -1;
408             }
409             else if (!metRegex)
410                 return -1;
411         }
412         return resolvingIndex;
413     }
414 
415     final D_ParseNode* ambiguity(D_Parser* p, D_ParseNode*[] v)
416     {   
417         writeln("AMBIGUITY: ");
418 
419         foreach (i, n; v)
420         {
421             writeln("NODE ", i);
422             printNode(n);
423         }
424 
425         int res = tryResolvingAmbiguityByPreferringStringOverRegex(v);
426         if (res != -1) return v[res];
427 
428 
429 //        throw new Exception("AMBIG!!");
430 
431         return null;
432     }
433 
434     static void free_node(D_ParseNode *d)
435     {
436         if (d.user)
437         {
438         }
439     }
440 
441     static void syntaxErrorFn(D_Parser* p)
442     {
443         assert(p.initial_globals);
444         (cast(Parser)p.initial_globals).syntaxError();
445     }
446 
447     void syntaxError()
448     {
449         if (syntaxErrorHandler) syntaxErrorHandler(this);
450         else
451         {
452             throw new Exception("Syntax error: " ~ location.line.to!string() ~ ":" ~ location.column.to!string());
453         }
454     }
455 
456     static int spec_code(void *new_ps, void **children, int n_children, int pn_offset, D_Parser *parser)
457     {
458         if (parser.initial_globals)
459         {
460             return (cast(Parser)parser.initial_globals).action(new_ps, children, n_children, pn_offset, true);
461         }
462         return 0;
463     }
464 
465     static int final_code(void *new_ps, void **children, int n_children, int pn_offset, D_Parser *parser)
466     {
467         if (parser.initial_globals)
468         {
469             return (cast(Parser)parser.initial_globals).action(new_ps, children, n_children, pn_offset, false);
470         }
471         return 0;
472     }
473 
474     final void appendNodeResults(ref ParseNode[] output, D_ParseNode* node)
475     {
476         ParseNode u = cast(ParseNode)node.user;
477 
478         if (u is null)
479         {
480             auto childSym = symbolForNode(node);
481 
482             if (childSym.kind == D_SymbolKind.D_SYMBOL_REGEX || childSym.kind == D_SymbolKind.D_SYMBOL_STRING)
483             {
484                 string symbolName = symbolNameForSymbol(childSym);
485                 if (logLevel) writeln("CH ", symbolName);
486                 string value = node.matchedString;
487                 auto term = new TerminalNode();
488                 copyLocationFromDParseNodeToParseNode(node, term);
489                 term.location.length = cast(uint)value.length;
490                 term.identifier = symbolName;
491                 term.value = value;
492                 node.user = cast(void*)term;
493                 output ~= term;
494             }
495             else if (childSym.kind == D_SymbolKind.D_SYMBOL_INTERNAL || childSym.kind == D_SymbolKind.D_SYMBOL_EBNF)
496             {
497                 int count = d_get_number_of_children(node);
498                 if (logLevel) writeln("CH INT {");
499                 for (int i = 0; i < count; ++i)
500                     appendNodeResults(output, d_get_child(node, i));
501                 if (logLevel) writeln("}");
502             }
503             else if (logLevel)
504             {
505                 writeln("CH UNKNOWN ", childSym.kind);
506             }
507         }
508         else
509         {
510             output ~= u;
511         }
512     }
513 
514     final int action(void *new_ps, void **children, int n_children, int pn_offset, bool speculative)
515     {
516         uint actionIndex = (cast(PNode*)new_ps).reduction.action_index;
517 
518         if (!speculative || grammar.productions[actionIndex].spec)
519         {
520             context.isFinal = !speculative;
521             auto sym = symbolForNode(D_PN(new_ps, pn_offset));
522             ParseNode[] args;
523         //    writeln(speculative ? "SPEC" : "FINAL", " DEB ", symbolNameForSymbol(sym), " ", sym.kind);
524         //    if(symbolNameForSymbol(sym) == "FUNC_ARG"){ logLevel = 1; printNode(D_PN(new_ps, pn_offset)); }
525             foreach (child; children[0 .. n_children])
526             {
527                 appendNodeResults(args, D_PN(child, pn_offset));
528             }
529             logLevel = 0;
530             doReduceAction(D_PN(new_ps, pn_offset), (cast(PNode*)new_ps).reduction.action_index, args);
531         }
532 
533         return 0;
534     }
535 
536     final void doReduceAction(D_ParseNode* newNode, uint actionIndex, ParseNode[] args)
537     {
538         free_node(newNode);
539         auto action = grammar.actionAtIndex(actionIndex);
540         context.result = _defaultAction(args, newNode);
541         context.children = args;
542         try
543         {
544             if (action) action(context);
545         }
546         catch(Exception e)
547         {
548             writeln("Exception while reducing: ", context);
549             throw e;
550         }
551         newNode.user = cast(void*)context.result;
552     }
553 
554     static string symbolNameForSymbol(const D_Symbol* s) @trusted nothrow pure
555     {
556         return s.name;
557     }
558 
559     final const(D_Symbol)* symbolForNode(D_ParseNode* n) const @trusted nothrow pure
560     {
561         return &binaryTables.symbols[n.symbol];
562     }
563 
564     final ParseNode _defaultAction(ParseNode[] args, D_ParseNode* newNode) @safe nothrow pure
565     {
566 //        if (args.length == 1) return args[0];
567         auto result = new NonTerminalNode();
568         result.children = args;
569         result.identifier = symbolNameForSymbol(symbolForNode(newNode));
570         copyLocationFromDParseNodeToParseNode(newNode, result);
571         if (args.length)
572         {
573             result.location.length = cast(uint)(args[$-1].location.offset - result.location.offset + args[$-1].location.length);
574         }
575         return result;
576     }
577 
578     final printNode(D_ParseNode* node) @safe nothrow
579     {
580         //print_parsetree(*binaryTables, node);
581     }
582 
583     final @property const(char)* inputPtr() const pure nothrow @trusted
584     {
585         assert(parser);
586         return parser.start;
587     }
588 
589     final void copyLocationFromDParseNodeToParseNode(D_ParseNode* from, ParseNode to) pure nothrow @safe const
590     {
591         to.location.offset = from.start_loc.s - inputPtr;
592         to.location.line = from.start_loc.line;
593         to.location.column = from.start_loc.col;
594     }
595 
596     ParseNode[] allNodes; // This array is needed not to let GC collect the objects which are referenced from
597     // somewhere in C code
598 
599     Context context;
600     D_ParserTables * binaryTables;
601     D_Parser* parser;
602     Grammar grammar;
603     uint logLevel;
604     bool _useGreedinessForDisambiguation = true;
605     bool _useHeightForDisambiguation = true;
606     bool _errorRecoveryEnabled = true;
607 }