1 // A simple tool to convert gdextension_interface.h to D 2 // This tool doesn't support the whole C grammar and is not a C parser by any means, 3 // it expects a well-formed source code as input. 4 module makebind; 5 6 import std.algorithm; 7 import std.exception; 8 import std.string; 9 import std.array; 10 import std.stdio; 11 import std.file; 12 import std.ascii; 13 import std.range; 14 15 // any AST like node such a enum declaration, enum value, a function decl, etc... 16 class Node 17 { 18 19 } 20 21 // Top of the header, can have zero or more child nodes 22 class Root : Node 23 { 24 Node[] child; 25 } 26 27 class BlockNode : Node 28 { 29 this(Block block, Node parent) { this.blk = block; this.parent = parent;} 30 31 Block blk; 32 Node parent; 33 Node[] child; 34 } 35 36 class Comment : Node 37 { 38 this(string text, bool multiline) { this.text = text; this.multiline = multiline; } 39 40 string text; 41 bool multiline; 42 } 43 44 // Enum decralation containing zero or more members 45 class EnumDecl : Node 46 { 47 this(string name) { this.name = name; } 48 49 string name; 50 EnumMemberDecl[] members; 51 } 52 53 class EnumMemberDecl : Node 54 { 55 string name; 56 string value; 57 } 58 59 // ugh, this one has two responsibilities but ok 60 class Type 61 { 62 this(string name) { this.name = name; } 63 64 // the type itself 65 string name; 66 // Function Pointer info 67 bool isFunPtr; 68 string fptrName; // name of a function pointer part 69 string paramName; // name of a parameter 70 Type[] params; 71 Type ret; 72 } 73 74 // plain typedef for a type 75 class TypeAliasDecl : Node 76 { 77 this(string name, Type type) { this.name = name; this.targetType = type; } 78 79 Type targetType; 80 string name; 81 } 82 83 class StructDecl : Node 84 { 85 this(string name) { this.name = name; } 86 87 string name; 88 StructMemberDecl[] members; 89 } 90 91 class StructMemberDecl : Node 92 { 93 string name; 94 Type type; 95 } 96 97 // Only most important tokens or constructs, since header doesn't have any expressions or templates 98 // we don't have to parse whole language. 99 enum TokType 100 { 101 lparen, rparen, // ( ) 102 lbracket, rbracket, // [ ] 103 lbrace, rbrace, // { } 104 105 newline, // \n or \r\n 106 whitespace, 107 comma, // , 108 semicolon, // ; 109 identifier, // C identifier (can be invalid in D) 110 text, // quoted string 111 comment, // single-line or multiline comment 112 113 typedef_, // type alias 114 enum_, // typedef enum 115 struct_, // typedef struct 116 block, // nested code block such as code inside braces { } 117 } 118 119 // primitive pseudo parser that operates on raw text 120 // 1) it counts number of opening and closing braces { } and builds nested structure 121 // 2) it then reads where is "typedef" is encountered 122 // 3) typedefs are branched to match enum vs struct vs type declarations 123 // 4) these decls is then translated to a D code 124 class Parser 125 { 126 Root root; 127 string _source; 128 size_t _offset; 129 130 // other tokens buffer, for example when reading multipart stuff like `const void *` 131 char[] buf; 132 133 134 135 void parse(string source) 136 { 137 _offset = 0; 138 _source = source; 139 root = new Root(); 140 parseDecls(); 141 142 //writeln(root.child.filter!(s => cast(Comment)s is null)); 143 144 //foreach (c; root.child) 145 //if (auto com = cast(Comment) c) 146 // writeln(com.text); 147 // writeln(c); 148 149 //foreach (c; root.child) 150 //if (auto e = cast(EnumDecl) c) 151 //{ 152 // writeln(e.name); 153 // e.child.each!(s => writeln(s.name ~ " " ~ s.value)); 154 //} 155 156 //foreach (c; root.child) 157 //if (auto s = cast(StructDecl) c) 158 //{ 159 // writeln(s.name); 160 // s.child.each!(m => writeln(" " ~ m.name ~ " : " ~ m.type)); 161 //} 162 163 //foreach (c; root.child) 164 //if (auto d = cast(TypeAliasDecl) c) 165 //{ 166 // writeln("typedef " ~ d.name ~ " = " ~ d.targetType.name); 167 //} 168 169 } 170 171 void parseDecls() 172 { 173 Lnext: 174 if (_offset >= _source.length) 175 return; 176 177 if (_offset > 2626) 178 int x = 0; 179 180 if (_source[_offset] == '/' && (next=='*' || next=='/')) 181 readComment(); 182 else if (_source[_offset] == '{') 183 readBlock(); 184 else if (startsWith(_source[_offset..$], "typedef") && isWhite(_source[_offset+"typedef".length])) 185 readTypedef(); 186 else if (_source[_offset] == '(') 187 readParens(); 188 else if (startsWith(_source[_offset..$], `extern "C"`)) // this is just to skip this block that spans over whole file 189 { 190 auto pos = indexOf(_source, '{', _offset); 191 _offset = pos; 192 readBlock(); 193 BlockNode externc = cast(BlockNode) root.child[$-1]; 194 _offset = externc.blk.start + 1; 195 parseDecls(); 196 _offset = externc.blk.end + 1; 197 } 198 else 199 { 200 if (!buf.length && isLineBreak(_source[_offset])) 201 { 202 // no-op, skip lots of empty lines 203 } 204 else 205 { 206 // keep reading into a buf until next special symbol is encountered 207 buf ~= _source[_offset]; 208 } 209 _offset++; 210 } 211 212 goto Lnext; 213 } 214 215 dchar next(size_t inc = 1) const 216 { 217 return _source[_offset+inc]; 218 } 219 220 private void readComment() 221 { 222 dchar next = _source[_offset+1]; 223 size_t end = _source.length; // assume EOF if read fails 224 bool multiline = false; 225 if (next == '/') 226 { 227 multiline = false; 228 auto lineEnd = countUntil!isLineBreak(_source[_offset..$]); 229 if (lineEnd != -1) 230 end = _offset+lineEnd+1; 231 } 232 else if (next == '*') 233 { 234 multiline = true; 235 auto cend = _source.indexOf("*/", _offset); 236 if (cend != -1) 237 end = cend+2; // indexOf returns start position 238 } 239 else 240 return; 241 auto content = _source[_offset..end]; 242 //if (end < _source.length) 243 // writefln("pos: %d:%d next: %d :: %s", _offset, end, end+1, _source[end+1..end+5]); 244 root.child ~= new Comment(content, multiline); 245 _offset = end; // next symbol after comment break 246 } 247 248 private void readBlock() 249 { 250 auto block = readNextBlock(_source, _offset); 251 _offset = block.end + 1; 252 root.child ~= new BlockNode(block, root); 253 } 254 255 private void readParens() 256 { 257 auto block = readNextBlock!('(', ')')(_source, _offset); 258 _offset = block.end + 1; 259 } 260 261 private void readTypedef() 262 { 263 // we already know this is correct `typedef ` string 264 _offset += "typedef ".length; 265 266 if (_source[_offset..$].startsWith("enum")) 267 { 268 _offset += "enum".length; 269 readEnum(); 270 } 271 else if (_source[_offset..$].startsWith("struct")) 272 { 273 _offset += "struct".length; 274 readStruct(); 275 } 276 else 277 { 278 // it's a type then, we are doomed... 279 // technically typedef can have multiple aliases to the same type listed after comma, 280 // moreover it allows slap pointer to them too, eww 281 // we don't do that here though 282 size_t next; 283 size_t end = _source.indexOf(';', _offset); 284 auto ltype = parseType(_source[_offset..end], next); 285 if (!ltype.isFunPtr) 286 { 287 auto rtype = parseType(_source[_offset+next..end], next); 288 root.child ~= new TypeAliasDecl(rtype.name, ltype); 289 } 290 else 291 root.child ~= new TypeAliasDecl(ltype.fptrName, ltype); // well, that's crazy 292 _offset = end; 293 } 294 } 295 296 private void readEnum() 297 { 298 // note that enum can have comments inside but we don't care if they are shifted 299 auto begin = indexOf(_source, '{', _offset); 300 auto end = indexOf(_source, '}', begin); 301 auto semicolon = indexOf(_source, ';', end); 302 auto name = _source[end+1..semicolon].strip(); 303 auto members = split(_source[begin+1..end], ','); 304 auto enumDecl = new EnumDecl(name); 305 foreach (i, m; members) 306 { 307 auto member = new EnumMemberDecl(); 308 auto valueIdx = m.indexOf('='); 309 if (valueIdx != -1) 310 { 311 member.name = m[0..valueIdx].strip(); 312 member.value = m[valueIdx+1..$].strip(); 313 } 314 else 315 { 316 member.name = m.strip(); 317 } 318 enumDecl.members ~= member; 319 } 320 root.child ~= enumDecl; 321 _offset = semicolon + 1; 322 } 323 324 private void readStruct() 325 { 326 const s = _source; 327 // due to possible nesting it is probably safer to use readBlock here but ok 328 auto begin = indexOf(_source, '{', _offset); 329 auto end = indexOf(_source, '}', begin); 330 auto semicolon = indexOf(_source, ';', end); 331 auto name = s[end+1..semicolon].strip(); 332 auto members = split(s[begin+1..end].ignoreComments, ';').filter!(s => !s.all!isWhite); 333 auto structDecl = new StructDecl(name); 334 foreach (m; members) 335 { 336 auto member = new StructMemberDecl(); 337 auto trimmed = m.strip; 338 size_t next; 339 auto ty = parseType(trimmed, next); 340 member.type = ty; 341 if (ty.isFunPtr) 342 member.name = ty.fptrName; 343 else 344 member.name = trimmed[next..$].strip(); 345 structDecl.members ~= member; 346 } 347 root.child ~= structDecl; 348 _offset = semicolon + 1; 349 } 350 351 // tries to build a type from a string or null on failure. 352 // string is assumed to be clear of comments, 353 // no identifier validation is done, i.e. it will hapily return 1int as a type 354 private Type parseType(string s, out size_t outpos) 355 { 356 char[] parts; 357 size_t pos; 358 int funNamePartPos = -1; 359 string fptrName; 360 char lastChar; 361 bool isSep; 362 bool isFunName; 363 bool isConst; // FIXME: unused 364 bool isReadingParams; 365 int level; // current level of parenthesis 366 Type[] args; 367 Louter: 368 while (pos < s.length) 369 { 370 if (s[pos] == ';') 371 break; 372 if (s[pos..$].startsWith("const ")) 373 { 374 //isConst = true; 375 pos += 6; 376 parts ~= "const "; 377 lastChar = ' '; 378 isConst = true; 379 isSep = true; 380 continue; 381 } 382 if (s[pos].isWhite || s[pos].isLineBreak) 383 { 384 isSep = true; 385 lastChar = s[pos]; 386 pos++; 387 //parts ~= ' '; 388 continue; 389 } 390 if (lastChar == '(' && s[pos] == '*') 391 { 392 if (parts.length && pos+2 <= s.length) 393 { 394 isFunName = true; 395 funNamePartPos = cast(int) pos-1; 396 } 397 } 398 // when encountered white space can only look for pointers 399 // but also there is special case for function pointer name part 400 if (isSep && !isConst) 401 { 402 size_t succ; 403 404 // check fptr name first 405 auto lparen = s.indexOf('(', pos); 406 if (lparen != -1 && lparen+1 < s.length && s[lparen+1] == '*') 407 { 408 isSep = false; 409 goto Lout; 410 } 411 412 for (auto i = pos; i < s.length; i++) 413 { 414 if ((s[i].isWhite || s[i].isLineBreak) && !parts[$-1].isWhite) 415 continue; 416 else if (s[i] == '*') 417 { 418 succ = i; 419 // parsing has reached end of string and will now jump to exit 420 // this last '*' will be picked up at the end of this function 421 // otherwise it will emit extra '*' for example in fptr parameters 422 if (i == s.length-1) 423 break Louter; 424 else 425 parts ~= '*'; 426 } 427 else 428 { 429 if (succ) 430 pos = succ+1; // move to next symbol 431 break Louter; 432 } 433 } 434 Lout: 435 } 436 if (s[pos] == '(') 437 level++; 438 else if (s[pos] == ')') 439 { 440 level--; 441 parts ~= ')'; 442 pos++; 443 if (pos >= s.length) 444 break; 445 if (level < 1 && (!isFunName || isReadingParams)) 446 { 447 // assume we are done 448 break; 449 } 450 else 451 { 452 if (level < 1 && isFunName) 453 { 454 fptrName = s[funNamePartPos+2..pos-1]; // e.g. (*someFunctionPtr) without ptr and parens 455 } 456 isReadingParams = true; 457 } 458 } 459 460 if (s[pos] == ',' && !isReadingParams) 461 { 462 break; 463 } 464 if (isReadingParams) 465 { 466 parts ~= '('; 467 level++; 468 scope(exit) level--; 469 pos++; 470 471 // messed up declaration 472 if (s.canFind("GDExtensionInterfaceWorkerThreadPoolAddNativeGroupTask")) 473 int x = 0; 474 475 // list of comma breaks 476 size_t[] commas = getCommaPositions(s[pos..$]); 477 foreach(ref c; commas) 478 c += pos; 479 //if (commas.empty && pos < s.length) // just add one stop for that case... 480 { 481 commas ~= s.length-1; 482 } 483 size_t skip; 484 for (auto nextcomma = 0; nextcomma < commas.length; nextcomma++) 485 { 486 auto paramStr = s[pos..commas[nextcomma]]; 487 // clean up a bit... because this algorithm is stupid 488 while(paramStr.length) 489 { 490 if (paramStr[0] == ',' || paramStr[0].isWhite) 491 { 492 paramStr = paramStr[1..$]; 493 pos++; 494 } 495 else break; 496 } 497 498 auto ty = parseType(paramStr, skip); 499 string pname; 500 // read the remaining part as parameter name 501 if (skip < paramStr.length) 502 { 503 pname = paramStr[skip..$]; 504 } 505 ty.paramName = pname; 506 args ~= ty; 507 pos += paramStr.length; 508 } 509 // yeah, that 'if s[pos] == )' got it first... 510 //parts ~= ')'; 511 512 } 513 514 if (pos >= s.length) 515 { 516 pos = s.length; 517 break; 518 } 519 520 lastChar = s[pos]; 521 parts ~= s[pos]; 522 pos++; 523 isSep = false; 524 isConst = false; 525 } 526 // advance offset position and build type representation 527 outpos = pos; 528 auto ty = new Type(cast(string)parts); 529 if (isFunName) 530 { 531 ty.isFunPtr = isFunName; 532 ty.fptrName = fptrName; 533 ty.ret = new Type(s[0..funNamePartPos].strip()); 534 ty.params = args; 535 } 536 return ty; 537 } 538 539 private Block readNextBlock(dchar B = '{', dchar E = '}')(in string source, size_t offset) 540 { 541 int level = 0; 542 int it = cast(int) offset; 543 int start = cast(int) offset; 544 for(; it < source.length; it++) 545 { 546 if (source[it] == B) 547 level++; 548 if (source[it] == E) 549 { 550 if (level-1 == 0) { 551 break; 552 } 553 level--; 554 } 555 } 556 return Block(start, it, source); 557 } 558 559 // list of comma positions on zero parentehisis level 560 private size_t[] getCommaPositions(string s) 561 { 562 int level; 563 size_t[] commas; 564 for(auto i = 0; i < s.length; i++) 565 { 566 if (s[i] == '(') 567 level++; 568 else if (s[i] == ')') 569 level--; 570 else if (s[i] == ',' && level == 0) 571 commas ~= i; 572 } 573 return commas; 574 } 575 } 576 577 578 // takes an input string and clear all comments 579 string ignoreComments(in string source) 580 { 581 char[] buf; 582 size_t pos; 583 char lastChar; 584 bool isSlash; 585 bool isInsideComment; 586 bool isMultiline; 587 while (pos < source.length) 588 { 589 lastChar = source[pos]; 590 if (isSlash && (source[pos] == '/' || source[pos] == '*')) 591 { 592 isMultiline = source[pos] == '*'; 593 isInsideComment = true; 594 isSlash = false; 595 } 596 if (!isInsideComment && source[pos] == '/') { 597 isSlash = true; 598 pos++; 599 continue; 600 } 601 if (isInsideComment && !isMultiline && source[pos].isLineBreak) 602 { 603 isInsideComment = false; 604 isMultiline = false; 605 isSlash = false; 606 pos++; 607 continue; 608 } 609 if (isInsideComment && pos>0 && source[pos] == '/' && source[pos-1] == '*') 610 { 611 isInsideComment = false; 612 isMultiline = false; 613 isSlash = false; 614 pos++; 615 continue; 616 } 617 if (!isInsideComment) 618 buf ~= lastChar; 619 pos++; 620 } 621 return cast(string) buf; 622 } 623 624 size_t getLineNumber(string source, size_t loc) 625 { 626 size_t count; 627 for (int i = 0; i < loc; i++) 628 { 629 if (isLineBreak(source[i])) 630 { 631 count += 1; 632 } 633 } 634 return count; 635 } 636 637 bool isLineBreak(dchar d) { return d == '\n' || d == '\r'; } 638 639 // range in the parent scope and the inner source text 640 struct Block 641 { 642 // symbol offsets, i.e. character in array 643 int start = -1; 644 int end = -1; 645 string source; 646 647 @property bool isValid() { return start != -1 && source; } 648 649 // check if two blocks overlaps and not nested 650 @property bool isOverlaps(Block other) 651 { 652 Block a = this; 653 Block b = other; 654 if (start > other.start) 655 { 656 a = other; 657 b = this; 658 } 659 bool isTouching = a.start < b.start && b.start < a.end; 660 if (isTouching) 661 { 662 return a.end < b.end; // a overlaps with b and b is not nested 663 } 664 return true; 665 } 666 } 667 668 669 // fake preprocessor, simply discards any preprocessor directive on that line 670 // in case of godot it is possible to just discard the preprocessor work 671 string preprocess(string source) 672 { 673 size_t offset = 0; 674 Lnext: 675 if (source[offset] == '#') 676 { 677 if (source.canMatch(offset, "#define") 678 || source.canMatch(offset, "#ifndef") 679 || source.canMatch(offset, "#ifdef") 680 || source.canMatch(offset, "#endif") 681 || source.canMatch(offset, "#include")) 682 { 683 auto found = source[offset..$].countUntil("\r\n", "\n", "\r"); 684 if (found != -1) 685 { 686 source = source[0..offset] ~ source[offset+found..$]; 687 } 688 } 689 } 690 691 offset++; 692 if (source.length > offset) 693 goto Lnext; 694 return source; 695 } 696 697 bool canMatch(string str, size_t offset, string what) 698 { 699 return str[offset..$].startsWith(what); 700 } 701 702 void writeBindings(Root header, string outFile) 703 { 704 auto file = File(outFile, "w"); 705 scope(exit) 706 file.close(); 707 708 file.writeln("module godot.abi.gdextension_binding;"); 709 file.writeln(); 710 file.writeln("import godot.abi.types;"); 711 file.writeln("import core.stdc.config;"); 712 file.writeln("public import core.stdc.stddef : wchar_t;"); 713 file.writeln(); 714 file.writeln("extern (C):"); 715 file.writeln(); 716 717 foreach(decl; header.child) 718 { 719 auto s = print(decl); 720 file.writeln(s); 721 } 722 } 723 724 // Formats the node declaration as a D code 725 string print(Node n) 726 { 727 if (auto c = cast(Comment) n) 728 return print(cast(Comment) c); 729 if (auto td = cast(TypeAliasDecl) n) 730 return print(cast(TypeAliasDecl) td); 731 if (auto st = cast(StructDecl) n) 732 return print(cast(StructDecl) st); 733 if (auto e = cast(EnumDecl) n) 734 return print(cast(EnumDecl) e); 735 return null; 736 } 737 738 string print(Comment c) 739 { 740 return c.text; 741 } 742 743 string print(EnumDecl decl) 744 { 745 string buf; 746 buf ~= "alias " ~ decl.name ~ " = int;\n"; 747 buf ~= "enum : " ~ decl.name ~ "\n{\n"; 748 foreach(i, m; decl.members) 749 { 750 buf ~= " " ~ m.name; 751 if (m.value) 752 buf ~= " = " ~ m.value; 753 if (i+1 < decl.members.length) 754 buf ~= ",\n"; 755 } 756 buf ~= "\n}\n"; 757 return buf; 758 } 759 760 string print(StructDecl decl) 761 { 762 string buf; 763 buf ~= "struct " ~ decl.name ~ "\n{\n"; 764 foreach (m; decl.members) 765 { 766 buf ~= " " ~ print(m.type) ~ " " ~ m.name; 767 buf ~= ";\n"; 768 } 769 buf ~= "}\n"; 770 return buf; 771 } 772 773 string print(TypeAliasDecl decl) 774 { 775 string buf; 776 buf ~= "alias " ~ decl.name ~ " = "; 777 buf ~= print(decl.targetType); 778 buf ~= ";\n"; 779 return buf; 780 } 781 782 string print(Type type) 783 { 784 string buf; 785 if (!type.isFunPtr) 786 { 787 auto constPosition = type.name.indexOf("const "); 788 if (constPosition != -1) 789 { 790 auto tmp = type.name.dup; 791 tmp["const".length] = '('; 792 793 auto nextPtrPart = tmp.indexOf('*', constPosition); 794 auto nextWsPart = tmp.indexOf(' ', constPosition+"const ".length+1); 795 size_t stop; 796 if (nextPtrPart != -1 && nextWsPart != -1) 797 { 798 stop = min(nextPtrPart, nextWsPart); 799 } 800 else 801 stop = max(nextPtrPart, nextWsPart); 802 if (stop != -1) 803 { 804 tmp.insertInPlace(stop, ')'); 805 //tmp[stop] = ')'; 806 } 807 else 808 tmp ~= ')'; 809 buf ~= tmp.strip(); 810 } 811 else 812 buf ~= type.name.strip(); 813 } 814 else 815 { 816 buf ~= print(type.ret) ~ " function("; 817 foreach(i, p; type.params) 818 { 819 if (p.isFunPtr) 820 buf ~= print(p); 821 else 822 { 823 buf ~= print(p); 824 if (p.paramName) 825 buf ~= " " ~ p.paramName.strip(); 826 } 827 if (i+1 < type.params.length) 828 buf ~= ", "; 829 } 830 buf ~= ")"; 831 } 832 return buf; 833 } 834 835 836 void main(string[] args) 837 { 838 enforce(args.length > 2, format("2 arguments expected: inpath, outpath - %d given", args.length-1)); 839 840 auto inFilePath = args[1]; 841 auto outFilePath = args[2]; 842 843 auto headerText = readText(inFilePath); 844 auto header = preprocess(headerText); 845 846 auto parser = new Parser(); 847 parser.parse(header); 848 849 writeBindings(parser.root, outFilePath); 850 writeln("Writing file '" ~ outFilePath ~ "' done."); 851 }