1 module ddparser.dparser; 2 3 import std.string; 4 import std.algorithm; 5 import std.stdio; 6 import core.memory; 7 import std.conv; 8 import std.typecons; 9 import std.variant; 10 import std.exception; 11 import ddparser.gram; 12 import ddparser.dparse_tables; 13 import ddparser.dparse; 14 import ddparser.gramgram; 15 import ddparser.parse; 16 import ddparser.write_tables; 17 18 alias D_Grammar = ddparser.gram.Grammar; 19 alias _Parser = ddparser.parse.Parser; 20 21 void hexDump(const ubyte* ptr, uint len) 22 { 23 for(uint i = 0; i < len; ++i) 24 { 25 writef("%X", ptr[i]); 26 } 27 } 28 29 private auto uniqueTypeValue(string f = __FILE__, int i = __LINE__)() 30 { 31 enum Value { value } 32 return Value.value; 33 } 34 35 class Grammar 36 { 37 alias Action = void delegate(Parser.Context c); 38 39 enum propagate = uniqueTypeValue(); 40 enum spec = uniqueTypeValue(); 41 42 static struct Production 43 { 44 string g; 45 bool spec = false; 46 Action a; 47 } 48 49 Grammar opBinary(string s : "<<")(string g) 50 { 51 productions ~= Production(g); 52 return this; 53 } 54 55 Grammar opBinary(string s : "<<")(Action a) 56 { 57 productions[$-1].a = a; 58 return this; 59 } 60 61 Grammar opBinary(string s : "<<")(typeof(propagate)) 62 { 63 return this << (c) 64 { 65 enforce(c.length == 1); 66 c.result = c[0]; 67 }; 68 } 69 70 Grammar opBinary(string s : "<<")(typeof(spec)) 71 { 72 productions[$-1].spec = true; 73 return this; 74 } 75 76 Action actionAtIndex(uint i) 77 { 78 return productions[i].a; 79 } 80 81 final @property void rootSymbol(string symbol) 82 { 83 uint start = firstIndexOfProductionForSymbol(symbol); 84 if (start == 0) return; // The needed production is already first 85 86 uint end = endOfProductionStartingAtIndex(start); 87 88 // Move productions from start to end to the beginning of array 89 productions = productions[start .. end] ~ productions[0 .. start] ~ productions[end .. $]; 90 } 91 92 final @property string rootSymbol() const 93 { 94 assert(productions.length, "Grammar is empty"); 95 auto s = productions[0].g; 96 auto indexOfColon = s.indexOf(":"); 97 assert(indexOfColon != -1, "Malformed production"); 98 return s[0 .. indexOfColon].strip(); 99 } 100 101 override string toString() const 102 { 103 string result; 104 foreach(i, p; productions) 105 { 106 if (i) 107 { 108 if (!p.g.strip().startsWith("|")) result ~= ";"; 109 result ~= "\n"; 110 } 111 result ~= p.g; 112 result ~= " ${action}"; 113 } 114 return result ~ ";"; 115 } 116 117 private final uint endOfProductionStartingAtIndex(uint index) const 118 { 119 foreach(i, p; productions[index + 1 .. $]) 120 { 121 if (!p.g.strip.startsWith("|")) return cast(uint)i + index; 122 } 123 return cast(uint)productions.length; 124 } 125 126 private final uint firstIndexOfProductionForSymbol(string sym) const 127 { 128 string searchString = sym ~ ":"; 129 foreach(i, p; productions) 130 { 131 if (p.g.strip().startsWith(searchString)) return cast(uint)i; 132 } 133 assert(false, "Production " ~ sym ~ " not found"); 134 } 135 136 private static D_Grammar* grammarWithString(string grammarString) 137 { 138 D_Grammar *g = new_D_Grammar(); 139 140 g.set_op_priority_from_rule = 0; 141 g.right_recursive_BNF = 0; 142 g.states_for_whitespace = 1; 143 g.states_for_all_nterms = 1; 144 g.tokenizer = 0; 145 g.longest_match = 1; 146 g.scanner_blocks = 4; 147 g.scanner_block_size = 0; 148 149 // TODO: Can't handle syntax error here =( 150 if (parse_grammar(g, null, grammarString) < 0) return null; 151 152 if (g.productions.length < 2) throw new Exception("Too few productions"); 153 154 if (build_grammar(g) < 0) return null; 155 return g; 156 } 157 158 private @property final D_ParserTables* tables() 159 { 160 if (!_tables) 161 { 162 _tables = createTablesFromGrammar( 163 grammarWithString(toString()), &Parser.spec_code, &Parser.final_code); 164 } 165 return _tables; 166 } 167 168 Production[] productions; 169 private D_ParserTables* _tables; 170 } 171 172 struct Location 173 { 174 ulong offset; 175 uint line; 176 uint column; 177 uint length; 178 } 179 180 abstract class ParseNode 181 { 182 string identifier; 183 Variant value; 184 Location location; 185 abstract @property string stringValue() const; 186 abstract override string toString() const; 187 @property bool isTerminal() const { return true; } 188 @property bool isEmpty() const { return false; } 189 } 190 191 class TerminalNode : ParseNode 192 { 193 override @property string stringValue() const 194 { 195 return value.get!(const string)(); 196 } 197 198 override string toString() const 199 { 200 return "'" ~ stringValue ~ "'"; 201 } 202 } 203 204 class NonTerminalNode : ParseNode 205 { 206 ParseNode[] children; 207 override @property string stringValue() const 208 { 209 return toString(); 210 } 211 212 override string toString() const 213 { 214 return identifier ~ children.to!string(); 215 } 216 217 override @property bool isTerminal() const { return false; } 218 override @property bool isEmpty() const { return children.length == 0; } 219 } 220 221 extern(C) void BREAK() 222 { 223 224 } 225 226 class Parser 227 { 228 alias AmbiguityHandler = ParseNode delegate(ParseNode[]); 229 230 this() 231 { 232 context = new Context(); 233 context.parser = this; 234 } 235 236 ~this() 237 { 238 //if (binaryTables) free_BinaryTables(binaryTables); 239 //if (parser) free_D_Parser(parser); 240 } 241 242 bool setGrammar(Grammar g) 243 { 244 binaryTables = g.tables; 245 if (binaryTables) 246 { 247 grammar = g; 248 createParser(); 249 } 250 return binaryTables != null; 251 } 252 253 ParseNode parse(string s) 254 { 255 D_ParseNode * node = dparse(parser, s); 256 ParseNode result; 257 bool ok = false; 258 if (node && !parser.syntax_errors) 259 { 260 result = cast(ParseNode)node.user; 261 ok = true; 262 } 263 264 if (node) 265 { 266 free_D_ParseNode(parser, node); 267 } 268 269 if (!ok && !errorRecoveryEnabled) 270 { 271 syntaxError(); 272 } 273 274 return result; 275 } 276 277 static class Context 278 { 279 Parser parser; 280 Object userInfo; 281 bool isFinal; 282 283 @property size_t length() pure @safe nothrow 284 { 285 return children.length; 286 } 287 288 auto opDollar() pure @safe nothrow 289 { 290 return length; 291 } 292 293 auto opIndex(size_t index) pure 294 { 295 index < children.length || assert(false, "Out of bounds: "/* ~ this.toString()*/); 296 return children[index]; 297 } 298 299 @property Location location() const 300 { 301 return result.location; 302 } 303 304 ParseNode[] children; 305 ParseNode result; 306 307 override string toString() const 308 { 309 return result.toString(); 310 } 311 } 312 313 final @property Location location() const 314 { 315 return Location(parser.loc.s - inputPtr, parser.loc.line, parser.loc.col); 316 } 317 318 @property bool useGreedinessForDisambiguation() const 319 { 320 if (parser) return !parser.dont_use_greediness_for_disambiguation; 321 return _useGreedinessForDisambiguation; 322 } 323 324 @property void useGreedinessForDisambiguation(bool flag) 325 { 326 _useGreedinessForDisambiguation = flag; 327 if (parser) parser.dont_use_greediness_for_disambiguation = !flag; 328 } 329 330 @property bool useHeightForDisambiguation() const 331 { 332 if (parser) return !parser.dont_use_height_for_disambiguation; 333 return _useHeightForDisambiguation; 334 } 335 336 @property void useHeightForDisambiguation(bool flag) 337 { 338 _useHeightForDisambiguation = flag; 339 if (parser) parser.dont_use_height_for_disambiguation = !flag; 340 } 341 342 @property bool errorRecoveryEnabled() const 343 { 344 if (parser) return !!parser.error_recovery; 345 return _errorRecoveryEnabled; 346 } 347 348 @property void errorRecoveryEnabled(bool flag) 349 { 350 _errorRecoveryEnabled = flag; 351 if (parser) parser.error_recovery = !!flag; 352 } 353 354 AmbiguityHandler ambiguityHandler; 355 void delegate(Parser) syntaxErrorHandler; 356 357 private: 358 void createParser() 359 { 360 parser = new_D_Parser(binaryTables, D_ParseNode_User.sizeof); 361 parser.initial_globals = cast(void*)this; 362 parser.save_parse_tree = 1; 363 parser.free_node_fn = &free_node; 364 // parser.dont_fixup_internal_productions = 0; 365 parser.commit_actions_interval = 0; 366 parser.ambiguity_fn = &ambigfn; 367 parser.syntax_error_fn = &syntaxErrorFn; 368 parser.dont_use_greediness_for_disambiguation = !_useGreedinessForDisambiguation; 369 parser.dont_use_height_for_disambiguation = !_useHeightForDisambiguation; 370 parser.error_recovery = _errorRecoveryEnabled; 371 } 372 373 static D_ParseNode * ambigfn(D_Parser* p, int n, D_ParseNode **v) 374 { 375 if (p.initial_globals) 376 { 377 return (cast(Parser)p.initial_globals).ambiguity(p, v[0 .. n]); 378 } 379 return null; 380 } 381 382 final int tryResolvingAmbiguityByPreferringStringOverRegex(D_ParseNode*[] nodes) 383 { 384 int resolvingIndex = -1; 385 386 foreach(i, pn; nodes) 387 { 388 bool metString = false; 389 bool metRegex = false; 390 auto tempNode = pn; 391 do 392 { 393 auto sym = symbolForNode(tempNode); 394 metString = sym.kind == D_SymbolKind.D_SYMBOL_STRING; 395 metRegex = sym.kind == D_SymbolKind.D_SYMBOL_REGEX; 396 if (metString || metRegex) break; 397 if (d_get_number_of_children(tempNode) != 1) 398 return -1; 399 tempNode = d_get_child(tempNode, 0); 400 } while(true); 401 402 if (metString) 403 { 404 if (resolvingIndex == -1) 405 resolvingIndex = cast(int)i; 406 else 407 return -1; 408 } 409 else if (!metRegex) 410 return -1; 411 } 412 return resolvingIndex; 413 } 414 415 final D_ParseNode* ambiguity(D_Parser* p, D_ParseNode*[] v) 416 { 417 writeln("AMBIGUITY: "); 418 419 foreach (i, n; v) 420 { 421 writeln("NODE ", i); 422 printNode(n); 423 } 424 425 int res = tryResolvingAmbiguityByPreferringStringOverRegex(v); 426 if (res != -1) return v[res]; 427 428 429 // throw new Exception("AMBIG!!"); 430 431 return null; 432 } 433 434 static void free_node(D_ParseNode *d) 435 { 436 if (d.user) 437 { 438 } 439 } 440 441 static void syntaxErrorFn(D_Parser* p) 442 { 443 assert(p.initial_globals); 444 (cast(Parser)p.initial_globals).syntaxError(); 445 } 446 447 void syntaxError() 448 { 449 if (syntaxErrorHandler) syntaxErrorHandler(this); 450 else 451 { 452 throw new Exception("Syntax error: " ~ location.line.to!string() ~ ":" ~ location.column.to!string()); 453 } 454 } 455 456 static int spec_code(void *new_ps, void **children, int n_children, int pn_offset, D_Parser *parser) 457 { 458 if (parser.initial_globals) 459 { 460 return (cast(Parser)parser.initial_globals).action(new_ps, children, n_children, pn_offset, true); 461 } 462 return 0; 463 } 464 465 static int final_code(void *new_ps, void **children, int n_children, int pn_offset, D_Parser *parser) 466 { 467 if (parser.initial_globals) 468 { 469 return (cast(Parser)parser.initial_globals).action(new_ps, children, n_children, pn_offset, false); 470 } 471 return 0; 472 } 473 474 final void appendNodeResults(ref ParseNode[] output, D_ParseNode* node) 475 { 476 ParseNode u = cast(ParseNode)node.user; 477 478 if (u is null) 479 { 480 auto childSym = symbolForNode(node); 481 482 if (childSym.kind == D_SymbolKind.D_SYMBOL_REGEX || childSym.kind == D_SymbolKind.D_SYMBOL_STRING) 483 { 484 string symbolName = symbolNameForSymbol(childSym); 485 if (logLevel) writeln("CH ", symbolName); 486 string value = node.matchedString; 487 auto term = new TerminalNode(); 488 copyLocationFromDParseNodeToParseNode(node, term); 489 term.location.length = cast(uint)value.length; 490 term.identifier = symbolName; 491 term.value = value; 492 node.user = cast(void*)term; 493 output ~= term; 494 } 495 else if (childSym.kind == D_SymbolKind.D_SYMBOL_INTERNAL || childSym.kind == D_SymbolKind.D_SYMBOL_EBNF) 496 { 497 int count = d_get_number_of_children(node); 498 if (logLevel) writeln("CH INT {"); 499 for (int i = 0; i < count; ++i) 500 appendNodeResults(output, d_get_child(node, i)); 501 if (logLevel) writeln("}"); 502 } 503 else if (logLevel) 504 { 505 writeln("CH UNKNOWN ", childSym.kind); 506 } 507 } 508 else 509 { 510 output ~= u; 511 } 512 } 513 514 final int action(void *new_ps, void **children, int n_children, int pn_offset, bool speculative) 515 { 516 uint actionIndex = (cast(PNode*)new_ps).reduction.action_index; 517 518 if (!speculative || grammar.productions[actionIndex].spec) 519 { 520 context.isFinal = !speculative; 521 auto sym = symbolForNode(D_PN(new_ps, pn_offset)); 522 ParseNode[] args; 523 // writeln(speculative ? "SPEC" : "FINAL", " DEB ", symbolNameForSymbol(sym), " ", sym.kind); 524 // if(symbolNameForSymbol(sym) == "FUNC_ARG"){ logLevel = 1; printNode(D_PN(new_ps, pn_offset)); } 525 foreach (child; children[0 .. n_children]) 526 { 527 appendNodeResults(args, D_PN(child, pn_offset)); 528 } 529 logLevel = 0; 530 doReduceAction(D_PN(new_ps, pn_offset), (cast(PNode*)new_ps).reduction.action_index, args); 531 } 532 533 return 0; 534 } 535 536 final void doReduceAction(D_ParseNode* newNode, uint actionIndex, ParseNode[] args) 537 { 538 free_node(newNode); 539 auto action = grammar.actionAtIndex(actionIndex); 540 context.result = _defaultAction(args, newNode); 541 context.children = args; 542 try 543 { 544 if (action) action(context); 545 } 546 catch(Exception e) 547 { 548 writeln("Exception while reducing: ", context); 549 throw e; 550 } 551 newNode.user = cast(void*)context.result; 552 } 553 554 static string symbolNameForSymbol(const D_Symbol* s) @trusted nothrow pure 555 { 556 return s.name; 557 } 558 559 final const(D_Symbol)* symbolForNode(D_ParseNode* n) const @trusted nothrow pure 560 { 561 return &binaryTables.symbols[n.symbol]; 562 } 563 564 final ParseNode _defaultAction(ParseNode[] args, D_ParseNode* newNode) @safe nothrow pure 565 { 566 // if (args.length == 1) return args[0]; 567 auto result = new NonTerminalNode(); 568 result.children = args; 569 result.identifier = symbolNameForSymbol(symbolForNode(newNode)); 570 copyLocationFromDParseNodeToParseNode(newNode, result); 571 if (args.length) 572 { 573 result.location.length = cast(uint)(args[$-1].location.offset - result.location.offset + args[$-1].location.length); 574 } 575 return result; 576 } 577 578 final printNode(D_ParseNode* node) @safe nothrow 579 { 580 //print_parsetree(*binaryTables, node); 581 } 582 583 final @property const(char)* inputPtr() const pure nothrow @trusted 584 { 585 assert(parser); 586 return parser.start; 587 } 588 589 final void copyLocationFromDParseNodeToParseNode(D_ParseNode* from, ParseNode to) pure nothrow @safe const 590 { 591 to.location.offset = from.start_loc.s - inputPtr; 592 to.location.line = from.start_loc.line; 593 to.location.column = from.start_loc.col; 594 } 595 596 ParseNode[] allNodes; // This array is needed not to let GC collect the objects which are referenced from 597 // somewhere in C code 598 599 Context context; 600 D_ParserTables * binaryTables; 601 D_Parser* parser; 602 Grammar grammar; 603 uint logLevel; 604 bool _useGreedinessForDisambiguation = true; 605 bool _useHeightForDisambiguation = true; 606 bool _errorRecoveryEnabled = true; 607 }