makebind source code

1 // A simple tool to convert gdextension_interface.h to D
2 // This tool doesn't support the whole C grammar and is not a C parser by any means, 
3 // it expects a well-formed source code as input.
4 module makebind;
5 
6 import std.algorithm;
7 import std.exception;
8 import std.string;
9 import std.array;
10 import std.stdio;
11 import std.file;
12 import std.ascii;
13 import std.range;
14 
15 // any AST like node such a enum declaration, enum value, a function decl, etc...
16 class Node
17 {
18     
19 }
20 
21 // Top of the header, can have zero or more child nodes
22 class Root : Node
23 {
24     Node[] child;
25 }
26 
27 class BlockNode : Node
28 {
29     this(Block block, Node parent) { this.blk = block; this.parent = parent;}
30 
31     Block blk;
32     Node parent;
33     Node[] child;
34 }
35 
36 class Comment : Node 
37 {
38     this(string text, bool multiline) { this.text = text; this.multiline = multiline; }
39 
40     string text;
41     bool multiline;
42 }
43 
44 // Enum decralation containing zero or more members
45 class EnumDecl : Node
46 {
47     this(string name) { this.name = name; }
48 
49     string name;
50     EnumMemberDecl[] members;
51 }
52 
53 class EnumMemberDecl : Node
54 {
55     string name;
56     string value;
57 }
58 
59 // ugh, this one has two responsibilities but ok
60 class Type
61 {
62     this(string name) { this.name = name; }
63 
64     // the type itself 
65     string name;
66     // Function Pointer info
67     bool isFunPtr;
68     string fptrName; // name of a function pointer part
69     string paramName; // name of a parameter
70     Type[] params;
71     Type ret;
72 }
73 
74 // plain typedef for a type
75 class TypeAliasDecl : Node
76 {
77     this(string name, Type type) { this.name = name; this.targetType = type; }
78 
79     Type targetType;
80     string name;
81 }
82 
83 class StructDecl : Node
84 {
85     this(string name) { this.name = name; }
86 
87     string name;
88     StructMemberDecl[] members;
89 }
90 
91 class StructMemberDecl : Node
92 {
93     string name;
94     Type type;
95 }
96 
97 // Only most important tokens or constructs, since header doesn't have any expressions or templates
98 // we don't have to parse whole language.
99 enum TokType
100 {
101     lparen, rparen,      // ( )
102     lbracket, rbracket,  // [ ]
103     lbrace, rbrace,      // { }
104 
105     newline,             // \n or \r\n
106     whitespace,
107     comma,               // ,
108     semicolon,           // ;
109     identifier,          // C identifier (can be invalid in D)
110     text,                // quoted string
111     comment,             // single-line or multiline comment
112 
113     typedef_,            // type alias
114     enum_,               // typedef enum
115     struct_,             // typedef struct
116     block,               // nested code block such as code inside braces { } 
117 }
118 
119 // primitive pseudo parser that operates on raw text
120 // 1) it counts number of opening and closing braces { } and builds nested structure
121 // 2) it then reads where is "typedef" is encountered
122 // 3) typedefs are branched to match enum vs struct vs type declarations
123 // 4) these decls is then translated to a D code
124 class Parser
125 {
126     Root root;
127     string _source;
128     size_t _offset;
129 
130     // other tokens buffer, for example when reading multipart stuff like `const void *`
131     char[] buf; 
132 
133     
134 
135     void parse(string source)
136     {
137         _offset = 0;
138         _source = source;
139         root = new Root();
140         parseDecls();
141 
142         //writeln(root.child.filter!(s => cast(Comment)s is null));
143 
144         //foreach (c; root.child)
145         //if (auto com = cast(Comment) c)
146         //    writeln(com.text);
147         //    writeln(c);
148 
149         //foreach (c; root.child)
150         //if (auto e = cast(EnumDecl) c)
151         //{
152         //    writeln(e.name);
153         //    e.child.each!(s => writeln(s.name ~ " " ~ s.value));
154         //}
155 
156         //foreach (c; root.child)
157         //if (auto s = cast(StructDecl) c)
158         //{
159         //    writeln(s.name);
160         //    s.child.each!(m => writeln("    " ~ m.name ~ " : " ~ m.type));
161         //}
162 
163         //foreach (c; root.child)
164         //if (auto d = cast(TypeAliasDecl) c)
165         //{
166         //    writeln("typedef " ~ d.name ~ " = " ~ d.targetType.name);
167         //}
168 
169     }
170 
171     void parseDecls()
172     {
173     Lnext:
174         if (_offset >= _source.length)
175             return;
176 
177         if (_offset > 2626)
178             int x = 0;
179 
180         if (_source[_offset] == '/' && (next=='*' || next=='/'))
181             readComment();
182         else if (_source[_offset] == '{') 
183             readBlock();
184         else if (startsWith(_source[_offset..$], "typedef") && isWhite(_source[_offset+"typedef".length]))
185             readTypedef();
186         else if (_source[_offset] == '(')
187             readParens();
188         else if (startsWith(_source[_offset..$], `extern "C"`)) // this is just to skip this block that spans over whole file
189         {
190             auto pos = indexOf(_source, '{', _offset);
191             _offset = pos;
192             readBlock();
193             BlockNode externc = cast(BlockNode) root.child[$-1];
194             _offset = externc.blk.start + 1;
195             parseDecls();
196             _offset = externc.blk.end + 1;
197         }
198         else 
199         {
200             if (!buf.length && isLineBreak(_source[_offset]))
201             {
202                 // no-op, skip lots of empty lines
203             }
204             else
205             {
206                 // keep reading into a buf until next special symbol is encountered
207                 buf ~= _source[_offset];
208             }
209             _offset++;
210         }
211 
212         goto Lnext;
213     }
214 
215     dchar next(size_t inc = 1) const
216     {
217         return _source[_offset+inc];
218     }
219 
220     private void readComment()
221     {
222         dchar next = _source[_offset+1];
223         size_t end = _source.length; // assume EOF if read fails
224         bool multiline = false;
225         if (next == '/')
226         {
227             multiline = false;
228             auto lineEnd = countUntil!isLineBreak(_source[_offset..$]);
229             if (lineEnd != -1)
230                 end = _offset+lineEnd+1;
231         }
232         else if (next == '*')
233         {
234             multiline = true;
235             auto cend = _source.indexOf("*/", _offset);
236             if (cend != -1)
237                 end = cend+2; // indexOf returns start position
238         }
239         else
240             return;
241         auto content = _source[_offset..end];
242         //if (end < _source.length)
243         //    writefln("pos: %d:%d next: %d :: %s", _offset, end, end+1, _source[end+1..end+5]);
244         root.child ~= new Comment(content, multiline);
245         _offset = end; // next symbol after comment break
246     }
247     
248     private void readBlock()
249     {
250         auto block = readNextBlock(_source, _offset);
251         _offset = block.end + 1;
252         root.child ~= new BlockNode(block, root);
253     }
254 
255     private void readParens()
256     {
257         auto block = readNextBlock!('(', ')')(_source, _offset);
258         _offset = block.end + 1;
259     }
260 
261     private void readTypedef()
262     {
263         // we already know this is correct `typedef ` string
264         _offset += "typedef ".length;
265 
266         if (_source[_offset..$].startsWith("enum"))
267         {
268             _offset += "enum".length;
269             readEnum();
270         }
271         else if (_source[_offset..$].startsWith("struct"))
272         {
273             _offset += "struct".length;
274             readStruct();
275         }
276         else 
277         {
278             // it's a type then, we are doomed...
279             // technically typedef can have multiple aliases to the same type listed after comma, 
280             // moreover it allows slap pointer to them too, eww
281             // we don't do that here though
282             size_t next;
283             size_t end = _source.indexOf(';', _offset);
284             auto ltype = parseType(_source[_offset..end], next);
285             if (!ltype.isFunPtr)
286             {
287                 auto rtype = parseType(_source[_offset+next..end], next);
288                 root.child ~= new TypeAliasDecl(rtype.name, ltype);
289             }
290             else
291                 root.child ~= new TypeAliasDecl(ltype.fptrName, ltype); // well, that's crazy
292             _offset = end;
293         }
294     }
295 
296     private void readEnum()
297     {
298         // note that enum can have comments inside but we don't care if they are shifted
299         auto begin = indexOf(_source, '{', _offset);
300         auto end = indexOf(_source, '}', begin);
301         auto semicolon = indexOf(_source, ';', end);
302         auto name = _source[end+1..semicolon].strip();
303         auto members = split(_source[begin+1..end], ',');
304         auto enumDecl = new EnumDecl(name);
305         foreach (i, m; members)
306         {
307             auto member = new EnumMemberDecl();
308             auto valueIdx = m.indexOf('=');
309             if (valueIdx != -1)
310             {
311                 member.name = m[0..valueIdx].strip();
312                 member.value = m[valueIdx+1..$].strip();
313             }
314             else
315             {
316                 member.name = m.strip();
317             }
318             enumDecl.members ~= member;
319         }
320         root.child ~= enumDecl;
321         _offset = semicolon + 1;
322     }
323 
324     private void readStruct()
325     {
326         const s = _source;
327         // due to possible nesting it is probably safer to use readBlock here but ok
328         auto begin = indexOf(_source, '{', _offset);
329         auto end = indexOf(_source, '}', begin);
330         auto semicolon = indexOf(_source, ';', end);
331         auto name = s[end+1..semicolon].strip();
332         auto members = split(s[begin+1..end].ignoreComments, ';').filter!(s => !s.all!isWhite);
333         auto structDecl = new StructDecl(name);
334         foreach (m; members)
335         {
336             auto member = new StructMemberDecl();
337             auto trimmed = m.strip;
338             size_t next;
339             auto ty = parseType(trimmed, next);
340             member.type = ty;
341             if (ty.isFunPtr)
342                 member.name = ty.fptrName;
343             else
344                 member.name = trimmed[next..$].strip();
345             structDecl.members ~= member;
346         }
347         root.child ~= structDecl;
348         _offset = semicolon + 1;
349     }
350 
351     // tries to build a type from a string or null on failure.
352     // string is assumed to be clear of comments, 
353     // no identifier validation is done, i.e. it will hapily return 1int as a type
354     private Type parseType(string s, out size_t outpos)
355     {
356         char[] parts;
357         size_t pos;
358         int funNamePartPos = -1;
359         string fptrName;
360         char lastChar;
361         bool isSep;
362         bool isFunName;
363         bool isConst; // FIXME: unused
364         bool isReadingParams;
365         int level; // current level of parenthesis
366         Type[] args;
367     Louter:
368         while (pos < s.length)
369         {
370             if (s[pos] == ';')
371                 break;
372             if (s[pos..$].startsWith("const "))
373             {
374                 //isConst = true;
375                 pos += 6;
376                 parts ~= "const ";
377                 lastChar = ' ';
378                 isConst = true;
379                 isSep = true;
380                 continue;
381             }
382             if (s[pos].isWhite || s[pos].isLineBreak)
383             {
384                 isSep = true;
385                 lastChar = s[pos];
386                 pos++;
387                 //parts ~= ' ';
388                 continue;
389             }
390             if (lastChar == '(' && s[pos] == '*')
391             {
392                 if (parts.length && pos+2 <= s.length)
393                 {
394                     isFunName = true;
395                     funNamePartPos = cast(int) pos-1;
396                 }
397             }
398             // when encountered white space can only look for pointers
399             // but also there is special case for function pointer name part
400             if (isSep && !isConst) 
401             {
402                 size_t succ;
403 
404                 // check fptr name first
405                 auto lparen = s.indexOf('(', pos);
406                 if (lparen != -1 && lparen+1 < s.length && s[lparen+1] == '*')
407                 {
408                     isSep = false;
409                     goto Lout;
410                 }
411 
412                 for (auto i = pos; i < s.length; i++)
413                 {
414                     if ((s[i].isWhite || s[i].isLineBreak) && !parts[$-1].isWhite)
415                         continue;
416                     else if (s[i] == '*') 
417                     {
418                         succ = i;
419                         // parsing has reached end of string and will now jump to exit
420                         // this last '*' will be picked up at the end of this function
421                         // otherwise it will emit extra '*' for example in fptr parameters
422                         if (i == s.length-1)
423                             break Louter;
424                         else
425                             parts ~= '*';
426                     }
427                     else 
428                     {
429                         if (succ)
430                             pos = succ+1; // move to next symbol
431                         break Louter;
432                     }
433                 }
434                 Lout:
435             }
436             if (s[pos] == '(')
437                 level++;
438             else if (s[pos] == ')')
439             {   
440                 level--;
441                 parts ~= ')';
442                 pos++;
443                 if (pos >= s.length)
444                     break;
445                 if (level < 1 && (!isFunName || isReadingParams))
446                 {
447                     // assume we are done
448                     break;
449                 }
450                 else
451                 {
452                     if (level < 1 && isFunName)
453                     {
454                         fptrName = s[funNamePartPos+2..pos-1]; // e.g. (*someFunctionPtr) without ptr and parens
455                     }
456                     isReadingParams = true;
457                 }
458             }
459 
460             if (s[pos] == ',' && !isReadingParams)
461             {
462                 break;
463             }
464             if (isReadingParams)
465             {
466                 parts ~= '(';
467                 level++;
468                 scope(exit) level--;
469                 pos++;
470 
471                 // messed up declaration
472                 if (s.canFind("GDExtensionInterfaceWorkerThreadPoolAddNativeGroupTask"))
473                     int x = 0;
474 
475                 // list of comma breaks
476                 size_t[] commas = getCommaPositions(s[pos..$]);
477                 foreach(ref c; commas)
478                     c += pos;
479                 //if (commas.empty && pos < s.length) // just add one stop for that case...
480                 {
481                     commas ~= s.length-1;
482                 }
483                 size_t skip;
484                 for (auto nextcomma = 0; nextcomma < commas.length; nextcomma++)
485                 {
486                     auto paramStr = s[pos..commas[nextcomma]];
487                     // clean up a bit... because this algorithm is stupid
488                     while(paramStr.length)
489                     {
490                         if (paramStr[0] == ',' || paramStr[0].isWhite) 
491                         {
492                             paramStr = paramStr[1..$];
493                             pos++;
494                         }
495                         else break;
496                     }
497 
498                     auto ty = parseType(paramStr, skip);
499                     string pname;
500                     // read the remaining part as parameter name
501                     if (skip < paramStr.length)
502                     {
503                         pname = paramStr[skip..$];
504                     }
505                     ty.paramName = pname;
506                     args ~= ty;
507                     pos += paramStr.length;
508                 }
509                 // yeah, that 'if s[pos] == )' got it first...
510                 //parts ~= ')';
511 
512             }
513 
514             if (pos >= s.length)
515             {
516                 pos = s.length;
517                 break;
518             }
519 
520             lastChar = s[pos];
521             parts ~= s[pos];
522             pos++;
523             isSep = false;
524             isConst = false;
525         }
526         // advance offset position and build type representation
527         outpos = pos;
528         auto ty = new Type(cast(string)parts);
529         if (isFunName)
530         {
531             ty.isFunPtr = isFunName;
532             ty.fptrName = fptrName;
533             ty.ret = new Type(s[0..funNamePartPos].strip());
534             ty.params = args;
535         }
536         return ty;
537     }
538 
539     private Block readNextBlock(dchar B = '{', dchar E = '}')(in string source, size_t offset)
540     {
541         int level = 0;
542         int it = cast(int) offset;
543         int start = cast(int) offset;
544         for(; it < source.length; it++)
545         {
546             if (source[it] == B)
547                 level++;
548             if (source[it] == E)
549             {
550                 if (level-1 == 0) {
551                     break;
552                 }
553                 level--;
554             }
555         }
556         return Block(start, it, source);
557     }
558 
559     // list of comma positions on zero parentehisis level
560     private size_t[] getCommaPositions(string s)
561     {
562         int level;
563         size_t[] commas;
564         for(auto i = 0; i < s.length; i++)
565         {
566             if (s[i] == '(')
567                 level++;
568             else if (s[i] == ')')
569                 level--;
570             else if (s[i] == ',' && level == 0)
571                 commas ~= i;
572         }
573         return commas;
574     }
575 }
576 
577 
578 // takes an input string and clear all comments
579 string ignoreComments(in string source)
580 {
581     char[] buf;
582     size_t pos;
583     char lastChar;
584     bool isSlash;
585     bool isInsideComment;
586     bool isMultiline;
587     while (pos < source.length)
588     {
589         lastChar = source[pos];
590         if (isSlash && (source[pos] == '/' || source[pos] == '*'))
591         {
592             isMultiline = source[pos] == '*';
593             isInsideComment = true;
594             isSlash = false;
595         }
596         if (!isInsideComment && source[pos] == '/') {
597             isSlash = true;
598             pos++;
599             continue;
600         }
601         if (isInsideComment && !isMultiline && source[pos].isLineBreak)
602         {
603             isInsideComment = false;
604             isMultiline = false;
605             isSlash = false;
606             pos++;
607             continue;
608         }
609         if (isInsideComment && pos>0 && source[pos] == '/' && source[pos-1] == '*')
610         {
611             isInsideComment = false;
612             isMultiline = false;
613             isSlash = false;
614             pos++;
615             continue;
616         }
617         if (!isInsideComment)
618             buf ~= lastChar;
619         pos++;
620     }
621     return cast(string) buf;
622 }
623 
624 size_t getLineNumber(string source, size_t loc)
625 {
626     size_t count;
627     for (int i = 0; i < loc; i++)
628     {
629         if (isLineBreak(source[i]))
630         {
631             count += 1;
632         }
633     }
634     return count;
635 }
636 
637 bool isLineBreak(dchar d) { return d == '\n' || d == '\r'; }
638 
639 // range in the parent scope and the inner source text
640 struct Block
641 {
642     // symbol offsets, i.e. character in array
643     int start = -1; 
644     int end = -1;
645     string source;
646 
647     @property bool isValid() { return start != -1 && source; }
648 
649     // check if two blocks overlaps and not nested
650     @property bool isOverlaps(Block other) 
651     { 
652         Block a = this;
653         Block b = other;
654         if (start > other.start)
655         {
656             a = other;
657             b = this;
658         }
659         bool isTouching = a.start < b.start && b.start < a.end;
660         if (isTouching)
661         {
662             return a.end < b.end; // a overlaps with b and b is not nested
663         }
664         return true;
665     }
666 }
667 
668 
669 // fake preprocessor, simply discards any preprocessor directive on that line
670 // in case of godot it is possible to just discard the preprocessor work
671 string preprocess(string source) 
672 {
673     size_t offset = 0;
674 Lnext:
675     if (source[offset] == '#')
676     {
677         if (source.canMatch(offset, "#define") 
678             || source.canMatch(offset, "#ifndef")
679             || source.canMatch(offset, "#ifdef")
680             || source.canMatch(offset, "#endif")
681             || source.canMatch(offset, "#include"))
682         {
683             auto found = source[offset..$].countUntil("\r\n", "\n", "\r");
684             if (found != -1)
685             {
686                 source = source[0..offset] ~ source[offset+found..$];
687             }
688         }
689     }
690 
691     offset++;
692     if (source.length > offset)
693         goto Lnext;
694     return source;
695 }
696 
697 bool canMatch(string str, size_t offset, string what)
698 {
699     return str[offset..$].startsWith(what);
700 }
701 
702 void writeBindings(Root header, string outFile)
703 {
704     auto file = File(outFile, "w");
705     scope(exit) 
706         file.close();
707 
708     file.writeln("module godot.abi.gdextension_binding;");
709     file.writeln();
710     file.writeln("import godot.abi.types;");
711     file.writeln("import core.stdc.config;");
712     file.writeln("public import core.stdc.stddef : wchar_t;");
713     file.writeln();
714     file.writeln("extern (C):");
715     file.writeln();
716 
717     foreach(decl; header.child)
718     {
719         auto s = print(decl);
720         file.writeln(s);
721     }
722 }
723 
724 // Formats the node declaration as a D code
725 string print(Node n)
726 {
727     if (auto c = cast(Comment) n)
728         return print(cast(Comment) c);
729     if (auto td = cast(TypeAliasDecl) n)
730         return print(cast(TypeAliasDecl) td);
731     if (auto st = cast(StructDecl) n)
732         return print(cast(StructDecl) st);
733     if (auto e = cast(EnumDecl) n)
734         return print(cast(EnumDecl) e);
735     return null;
736 }
737 
738 string print(Comment c)
739 {
740     return c.text;
741 }
742 
743 string print(EnumDecl decl)
744 {
745     string buf;
746     buf ~= "alias " ~ decl.name ~ " = int;\n";
747     buf ~= "enum : " ~ decl.name ~ "\n{\n";
748     foreach(i, m; decl.members)
749     {
750         buf ~= "    " ~ m.name;
751         if (m.value)
752             buf ~= " = " ~ m.value;
753         if (i+1 < decl.members.length)
754             buf ~= ",\n";
755     }
756     buf ~= "\n}\n";
757     return buf;
758 }
759 
760 string print(StructDecl decl)
761 {
762     string buf;
763     buf ~= "struct " ~ decl.name ~ "\n{\n";
764     foreach (m; decl.members)
765     {
766         buf ~= "    " ~ print(m.type) ~ " " ~ m.name;
767         buf ~= ";\n";
768     }
769     buf ~= "}\n";
770     return buf;
771 }
772 
773 string print(TypeAliasDecl decl)
774 {
775     string buf;
776     buf ~= "alias " ~ decl.name ~ " = ";
777     buf ~= print(decl.targetType);
778     buf ~= ";\n";
779     return buf;
780 }
781 
782 string print(Type type)
783 {
784     string buf;
785     if (!type.isFunPtr)
786     {
787         auto constPosition = type.name.indexOf("const ");
788         if (constPosition != -1)
789         {
790             auto tmp = type.name.dup;
791             tmp["const".length] = '(';
792 
793             auto nextPtrPart = tmp.indexOf('*', constPosition);
794             auto nextWsPart = tmp.indexOf(' ', constPosition+"const ".length+1);
795             size_t stop;
796             if (nextPtrPart != -1 && nextWsPart != -1)
797             {
798                 stop = min(nextPtrPart, nextWsPart);
799             }
800             else
801                 stop = max(nextPtrPart, nextWsPart);
802             if (stop != -1)
803             {
804                 tmp.insertInPlace(stop, ')');
805                 //tmp[stop] = ')';
806             }
807             else
808                 tmp ~= ')';
809             buf ~= tmp.strip();
810         }
811         else
812             buf ~= type.name.strip();
813     }
814     else
815     {
816         buf ~= print(type.ret) ~ " function(";
817         foreach(i, p; type.params)
818         {
819             if (p.isFunPtr)
820                 buf ~= print(p);
821             else
822             {
823                 buf ~= print(p);
824                 if (p.paramName)
825                     buf ~= " " ~ p.paramName.strip();
826             }
827             if (i+1 < type.params.length)
828                 buf ~= ", ";
829         }
830         buf ~= ")";
831     }
832     return buf;
833 }
834 
835 
836 void main(string[] args)
837 {
838     enforce(args.length > 2, format("2 arguments expected: inpath, outpath - %d given", args.length-1));
839 
840     auto inFilePath = args[1];
841     auto outFilePath = args[2];
842 
843     auto headerText = readText(inFilePath);
844     auto header = preprocess(headerText);
845 
846     auto parser = new Parser();
847     parser.parse(header);
848 
849     writeBindings(parser.root, outFilePath);
850     writeln("Writing file '" ~ outFilePath ~ "' done.");
851 }