diff options
| author | genotrance <dev@genotrance.com> | 2020-04-26 11:29:27 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-04-26 11:29:27 -0500 |
| commit | 73ef7c4ccdcdc0934280f002af23ba663bf38426 (patch) | |
| tree | facee03b526d1991cdd25f1742e7f9a841cfc899 | |
| parent | 43dd43e3183178e71abd3319290c566cb4dd80a9 (diff) | |
| parent | 89c10c4b25226a88062d01e6bf57a9e9146920c5 (diff) | |
| download | nimterop-73ef7c4ccdcdc0934280f002af23ba663bf38426.tar.gz nimterop-73ef7c4ccdcdc0934280f002af23ba663bf38426.zip | |
Merge pull request #191 from jyapayne/poc_print_ast
[ast2] Enable parsing some C Macro expressions to Nim expressions using treesitter
| -rw-r--r-- | nimterop/ast2.nim | 112 | ||||
| -rw-r--r-- | nimterop/comphelp.nim | 18 | ||||
| -rw-r--r-- | nimterop/exprparser.nim | 587 | ||||
| -rw-r--r-- | nimterop/getters.nim | 43 | ||||
| -rw-r--r-- | nimterop/globals.nim | 16 | ||||
| -rw-r--r-- | nimterop/toast.nim | 42 | ||||
| -rw-r--r-- | nimterop/tshelp.nim | 28 | ||||
| -rw-r--r-- | tests/include/tast2.h | 36 | ||||
| -rw-r--r-- | tests/tast2.nim | 56 | ||||
| -rw-r--r-- | tests/tmath.nim | 6 |
10 files changed, 822 insertions, 122 deletions
diff --git a/nimterop/ast2.nim b/nimterop/ast2.nim index ce45f1d..474ec69 100644 --- a/nimterop/ast2.nim +++ b/nimterop/ast2.nim @@ -1,12 +1,12 @@ import macros, os, sequtils, sets, strformat, strutils, tables, times -import regex +import options as opts -import compiler/[ast, idents, lineinfos, modulegraphs, msgs, options, parser, renderer] +import compiler/[ast, idents, lineinfos, modulegraphs, msgs, options, renderer] import "."/treesitter/api -import "."/[globals, getters] +import "."/[globals, getters, exprparser, comphelp, tshelp] proc getPtrType*(str: string): string = result = case str: @@ -19,55 +19,6 @@ proc getPtrType*(str: string): string = else: str -proc handleError*(conf: ConfigRef, info: TLineInfo, msg: TMsgKind, arg: string) = - # Raise exception in parseString() instead of exiting for errors - if msg < warnMin: - raise newException(Exception, msgKindToString(msg)) - -proc parseString(gState: State, str: string): PNode = - # Parse a string into Nim AST - use custom error handler that raises - # an exception rather than exiting on failure - try: - result = parseString( - str, gState.identCache, gState.config, errorHandler = handleError - ) - except: - decho getCurrentExceptionMsg() - -proc getLit*(gState: State, str: string, expression = false): PNode = - # Used to convert #define literals into const and expressions - # in array sizes - # - # `expression` is true when `str` should be converted into a Nim expression - let - str = str.replace(re"/[/*].*?(?:\*/)?$", "").strip() - - if str.contains(re"^[\-]?[\d]+$"): # decimal - result = newIntNode(nkIntLit, parseInt(str)) - - elif str.contains(re"^[\-]?[\d]*[.]?[\d]+$"): # float - result = newFloatNode(nkFloatLit, parseFloat(str)) - - elif str.contains(re"^0x[\da-fA-F]+$"): # hexadecimal - result = gState.parseString(str) - - elif str.contains(re"^'[[:ascii:]]'$"): # char - result = newNode(nkCharLit) - result.intVal = str[1].int64 - - elif str.contains(re"""^"[[:ascii:]]+"$"""): # char * - result = newStrNode(nkStrLit, str[1 .. ^2]) - - else: - let - str = - if expression: gState.getNimExpression(str) - else: str - result = gState.parseString(str) - - if result.isNil: - result = newNode(nkNilLit) - proc getOverrideOrSkip(gState: State, node: TSNode, origname: string, kind: NimSymKind): PNode = # Check if symbol `origname` of `kind` and `origname` has any cOverride defined # and use that if present @@ -90,6 +41,7 @@ proc getOverrideOrSkip(gState: State, node: TSNode, origname: string, kind: NimS result = pnode[0][0] else: gecho &"\n# $1'{origname}' skipped" % skind + gState.skippedSyms.incl origname if gState.debug: gState.skipStr &= &"\n{gState.getNodeVal(node)}" @@ -148,15 +100,36 @@ proc newConstDef(gState: State, node: TSNode, fname = "", fval = ""): PNode = fval else: gState.getNodeVal(node[1]) - valident = - gState.getLit(val) + + var valident = newNode(nkNone) + + withCodeAst(val, gState.mode): + # This section is a hack for determining that the first + # node is a type, which shouldn't be accepted by a const + # def section. Need to replace this with some other mechanism + # to handle type aliases + var maybeTyNode: TSNode + # Take the very first node, which may be 2 levels + # down if there is an error node + if root.len > 0 and root[0].getName() == "ERROR": + maybeTyNode = root[0][0] + elif root.len > 0: + maybeTyNode = root[0] + + if not maybeTyNode.isNil: + let name = maybeTyNode.getName() + case name + of "type_descriptor", "sized_type_specifier": + discard + else: + # Can't do gState.parseCExpression(root) here for some reason? + # get a SEGFAULT if we use root + valident = gState.parseCExpression(val) if name.Bl: # Name skipped or overridden since blank result = gState.getOverrideOrSkip(node, origname, nskConst) - elif valident.kind in {nkCharLit .. nkStrLit} or - (valident.kind == nkStmtList and valident.len > 0 and - valident[0].kind in {nkCharLit .. nkStrLit}): + elif valident.kind != nkNone: if gState.addNewIdentifer(name): # const X* = Y # @@ -180,6 +153,7 @@ proc newConstDef(gState: State, node: TSNode, fname = "", fval = ""): PNode = gecho &"# const '{origname}' is duplicate, skipped" else: gecho &"# const '{origname}' has invalid value '{val}'" + gState.skippedSyms.incl origname proc addConst(gState: State, node: TSNode) = # Add a const to the AST @@ -1012,8 +986,8 @@ proc getTypeArray(gState: State, node: TSNode, tident: PNode, name: string): PNo # type name[X] => array[X, type] let # Size of array could be a Nim expression - size = gState.getLit(gState.getNodeVal(cnode[1]), expression = true) - if size.kind != nkNilLit: + size = gState.parseCExpression(gState.getNodeVal(cnode[1])) + if size.kind != nkNone: result = gState.newArrayTree(cnode, result, size) cnode = cnode[0] elif cnode.len == 1: @@ -1417,6 +1391,9 @@ proc addEnum(gState: State, node: TSNode) = # Create const for fields var fnames: HashSet[string] + # Hold all of field information so that we can add all of them + # after the const identifiers has been updated + fieldDeclarations: seq[tuple[fname: string, fval: string, cexpr: Option[TSNode]]] for i in 0 .. enumlist.len - 1: let en = enumlist[i] @@ -1435,20 +1412,25 @@ proc addEnum(gState: State, node: TSNode) = fval = &"({prev} + 1).{name}" if en.len > 1 and en[1].getName() in gEnumVals: - # Explicit value - fval = "(" & gState.getNimExpression(gState.getNodeVal(en[1]), name) & ")." & name - - # Cannot use newConstDef() since parseString(fval) adds backticks to and/or - gState.constSection.add gState.parseString(&"const {fname}* = {fval}")[0][0] + fieldDeclarations.add((fname, "", some(en[1]))) + else: + fieldDeclarations.add((fname, fval, none(TSNode))) fnames.incl fname - prev = fname # Add fields to list of consts after processing enum so that we don't cast # enum field to itself gState.constIdentifiers.incl fnames + # parseCExpression requires all const identifiers to be present for the enum + for (fname, fval, cexprNode) in fieldDeclarations: + var fval = fval + if cexprNode.isSome: + fval = "(" & $gState.parseCExpression(gState.getNodeVal(cexprNode.get()), name) & ")." & name + # Cannot use newConstDef() since parseString(fval) adds backticks to and/or + gState.constSection.add gState.parseString(&"const {fname}* = {fval}")[0][0] + # Add other names if node.getName() == "type_definition" and node.len > 1: gState.addTypeTyped(node, ftname = name, offset = offset) diff --git a/nimterop/comphelp.nim b/nimterop/comphelp.nim new file mode 100644 index 0000000..1709f8b --- /dev/null +++ b/nimterop/comphelp.nim @@ -0,0 +1,18 @@ +import compiler/[ast, lineinfos, msgs, options, parser, renderer] + +import "."/[globals, getters] + +proc handleError*(conf: ConfigRef, info: TLineInfo, msg: TMsgKind, arg: string) = + # Raise exception in parseString() instead of exiting for errors + if msg < warnMin: + raise newException(Exception, msgKindToString(msg)) + +proc parseString*(gState: State, str: string): PNode = + # Parse a string into Nim AST - use custom error handler that raises + # an exception rather than exiting on failure + try: + result = parseString( + str, gState.identCache, gState.config, errorHandler = handleError + ) + except: + decho getCurrentExceptionMsg()
\ No newline at end of file diff --git a/nimterop/exprparser.nim b/nimterop/exprparser.nim new file mode 100644 index 0000000..c74f0b6 --- /dev/null +++ b/nimterop/exprparser.nim @@ -0,0 +1,587 @@ +import strformat, strutils, macros, sets + +import regex + +import compiler/[ast, renderer] + +import "."/treesitter/[api, c, cpp] + +import "."/[globals, getters, comphelp, tshelp] + +# This version of exprparser should be able to handle: +# +# All integers + integer like expressions (hex, octal, suffixes) +# All floating point expressions (except for C++'s hex floating point stuff) +# Strings and character literals, including C's escape characters (not sure if this is the same as C++'s escape characters or not) +# Math operators (+, -, /, *) +# Some Unary operators (-, !, ~). ++, --, and & are yet to be implemented +# Any identifiers +# C type descriptors (int, char, etc) +# Boolean values (true, false) +# Shift expressions (containing anything in this list) +# Cast expressions (containing anything in this list) +# Math expressions (containing anything in this list) +# Sizeof expressions (containing anything in this list) +# Cast expressions (containing anything in this list) +# Parentheses expressions (containing anything in this list) +# Expressions containing other expressions +# +# In addition to the above, it should also handle most type coercions, except +# for where Nim can't (such as uint + -int) + +type + ExprParseError* = object of CatchableError + +template val(node: TSNode): string = + gState.currentExpr.getNodeVal(node) + +proc printDebugExpr*(gState: State, node: TSNode) = + if gState.debug: + gecho ("Input => " & node.val).getCommented() + gecho gState.currentExpr.printLisp(node).getCommented() + +proc getExprIdent*(gState: State, identName: string, kind = nskConst, parent = ""): PNode = + ## Gets a cPlugin transformed identifier from `identName` + ## + ## Returns PNode(nkNone) if the identifier is blank + result = newNode(nkNone) + if identName notin gState.skippedSyms: + var ident = identName + if ident != "_": + # Process the identifier through cPlugin + ident = gState.getIdentifier(ident, kind, parent) + if kind == nskType: + result = gState.getIdent(ident) + elif ident.nBl and ident in gState.constIdentifiers: + if gState.currentTyCastName.nBl: + ident = ident & "." & gState.currentTyCastName + result = gState.getIdent(ident) + +proc getExprIdent*(gState: State, node: TSNode, kind = nskConst, parent = ""): PNode = + ## Gets a cPlugin transformed identifier from `identName` + ## + ## Returns PNode(nkNone) if the identifier is blank + gState.getExprIdent(node.val, kind, parent) + +proc parseChar(charStr: string): uint8 {.inline.} = + ## Parses a character literal out of a string. This is needed + ## because treesitter gives unescaped characters when parsing + ## strings. + if charStr.len == 1: + return charStr[0].uint8 + + # Handle octal, hex, unicode? + if charStr.startsWith("\\x"): + result = parseHexInt(charStr.replace("\\x", "0x")).uint8 + elif charStr.len == 4: # Octal + result = parseOctInt("0o" & charStr[1 ..< charStr.len]).uint8 + + if result == 0: + case charStr + of "\\0": + result = ord('\0') + of "\\a": + result = 0x07 + of "\\b": + result = 0x08 + of "\\e": + result = 0x1B + of "\\f": + result = 0x0C + of "\\n": + result = '\n'.uint8 + of "\\r": + result = 0x0D + of "\\t": + result = 0x09 + of "\\v": + result = 0x0B + of "\\\\": + result = 0x5C + of "\\'": + result = '\''.uint8 + of "\\\"": + result = '\"'.uint8 + of "\\?": + result = 0x3F + else: + discard + + if result > uint8.high: + result = uint8.high + +proc getCharLit(charStr: string): PNode {.inline.} = + ## Convert a character string into a proper Nim char lit node + result = newNode(nkCharLit) + result.intVal = parseChar(charStr).int64 + +proc getFloatNode(number, suffix: string): PNode {.inline.} = + ## Get a Nim float node from a C float expression + suffix + let floatSuffix = number[number.len-1] + try: + case floatSuffix + of 'l', 'L': + # TODO: handle long double (128 bits) + # result = newNode(nkFloat128Lit) + result = newFloatNode(nkFloat64Lit, parseFloat(number[0 ..< number.len - 1])) + of 'f', 'F': + result = newFloatNode(nkFloat64Lit, parseFloat(number[0 ..< number.len - 1])) + else: + result = newFloatNode(nkFloatLit, parseFloat(number)) + except ValueError: + raise newException(ExprParseError, &"Could not parse float value \"{number}\".") + +proc getIntNode(number, suffix: string): PNode {.inline.} = + ## Get a Nim int node from a C integer expression + suffix + case suffix + of "u", "U": + result = newNode(nkUintLit) + of "l", "L": + result = newNode(nkInt32Lit) + of "ul", "UL": + result = newNode(nkUint32Lit) + of "ll", "LL": + result = newNode(nkInt64Lit) + of "ull", "ULL": + result = newNode(nkUint64Lit) + else: + result = newNode(nkIntLit) + + # I realize these regex are wasteful on performance, but + # couldn't come up with a better idea. + if number.contains(re"0[xX]"): + result.intVal = parseHexInt(number) + result.flags = {nfBase16} + elif number.contains(re"0[bB]"): + result.intVal = parseBinInt(number) + result.flags = {nfBase2} + elif number.contains(re"0[oO]"): + result.intVal = parseOctInt(number) + result.flags = {nfBase8} + else: + result.intVal = parseInt(number) + +proc getNumNode(number, suffix: string): PNode {.inline.} = + ## Convert a C number to a Nim number PNode + if number.contains("."): + getFloatNode(number, suffix) + else: + getIntNode(number, suffix) + +proc processNumberLiteral(gState: State, node: TSNode): PNode = + ## Parse a number literal from a TSNode. Can be a float, hex, long, etc + result = newNode(nkNone) + let nodeVal = node.val + + var match: RegexMatch + const reg = re"(\-)?(0\d+|0[xX][0-9a-fA-F]+|0[bB][01]+|\d+\.\d*[fFlL]?|\d*\.\d+[fFlL]?|\d+)([ulUL]*)" + let found = nodeVal.find(reg, match) + if found: + let + prefix = if match.group(0).len > 0: nodeVal[match.group(0)[0]] else: "" + number = nodeVal[match.group(1)[0]] + suffix = nodeVal[match.group(2)[0]] + + result = getNumNode(number, suffix) + + if result.kind != nkNone and prefix == "-": + result = nkPrefix.newTree( + gState.getIdent("-"), + result + ) + else: + raise newException(ExprParseError, &"Could not find a number in number_literal: \"{nodeVal}\"") + +proc processCharacterLiteral(gState: State, node: TSNode): PNode = + # Input => 'G' + # + # (char_literal 1 1 3 "'G'") + # + # Output => 'G' + # + # nkCharLit("G") + let val = node.val + result = getCharLit(val[1 ..< val.len - 1]) + +proc processStringLiteral(gState: State, node: TSNode): PNode = + # Input => "\n\rfoobar\0\'" + # + # (string_literal 1 1 16 ""\n\rfoobar\0\'"" + # (escape_sequence 1 2 2 "\n") + # (escape_sequence 1 4 2 "\r") + # (escape_sequence 1 12 2 "\0") + # (escape_sequence 1 14 2 "\'") + # ) + # + # Output => "\n\cfoobar\x00\'" + # + # nkStrLit("\x0A\x0Dfoobar\x00\'") + let + nodeVal = node.val + strVal = nodeVal[1 ..< nodeVal.len - 1] + + const + str = "(\\\\x[[:xdigit:]]{2}|\\\\\\d{3}|\\\\0|\\\\a|\\\\b|\\\\e|\\\\f|\\\\n|\\\\r|\\\\t|\\\\v|\\\\\\\\|\\\\'|\\\\\"|[[:ascii:]])" + reg = re(str) + + # Convert the c string escape sequences/etc to Nim chars + var nimStr = newStringOfCap(nodeVal.len) + for m in strVal.findAll(reg): + nimStr.add(parseChar(strVal[m.group(0)[0]]).chr) + + result = newStrNode(nkStrLit, nimStr) + +proc processTSNode(gState: State, node: TSNode, typeofNode: var PNode): PNode + +proc processParenthesizedExpr(gState: State, node: TSNode, typeofNode: var PNode): PNode = + # Input => (a + b) + # + # (parenthesized_expression 1 1 7 + # (math_expression 1 2 5 + # (identifier 1 2 1 "a") + # (identifier 1 6 1 "b") + # ) + # ) + # + # Output => (typeof(a)(a + typeof(a)(b))) + # + # nkPar( + # nkCall( + # nkCall( + # nkIdent("typeof"), + # nkIdent("a") + # ), + # nkInfix( + # nkIdent("+"), + # nkIdent("a"), + # nkCall( + # nkCall( + # nkIdent("typeof"), + # nkIdent("a") + # ), + # nkIdent("b") + # ) + # ) + # ) + # ) + result = newNode(nkPar) + for i in 0 ..< node.len(): + result.add(gState.processTSNode(node[i], typeofNode)) + +proc processCastExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode = + # Input => (int)a + # + # (cast_expression 1 1 6 "(int)a" + # (type_descriptor 1 2 3 "int" + # (primitive_type 1 2 3 "int") + # ) + # (identifier 1 6 1 "a") + # ) + # + # Output => cast[cint](a) + # + # nkCast( + # nkIdent("cint"), + # nkIdent("a") + # ) + result = nkCast.newTree( + gState.processTSNode(node[0], typeofNode), + gState.processTSNode(node[1], typeofNode) + ) + +proc getNimUnarySym(csymbol: string): string = + ## Get the Nim equivalent of a unary C symbol + ## + ## TODO: Add ++, --, + case csymbol + of "+", "-": + result = csymbol + of "~", "!": + result = "not" + else: + raise newException(ExprParseError, &"Unsupported unary symbol \"{csymbol}\"") + +proc getNimBinarySym(csymbol: string): string = + case csymbol + of "|", "||": + result = "or" + of "&", "&&": + result = "and" + of "^": + result = "xor" + of "==", "!=", + "+", "-", "/", "*", + ">", "<", ">=", "<=": + result = csymbol + of "%": + result = "mod" + of "<<": + result = "shl" + of ">>": + result = "shr" + else: + raise newException(ExprParseError, &"Unsupported binary symbol \"{csymbol}\"") + +proc processBinaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode = + # Node has left and right children ie: (2 + 7) + # + # Input => a == b + # + # (equality_expression 1 1 6 + # (identifier 1 1 1 "a") + # (identifier 1 6 1 "b") + # ) + # + # Output => a == typeof(a)(b) + # + # nkInfix( + # nkIdent("=="), + # nkIdent("a"), + # nkCall( + # nkCall( + # nkIdent("typeof"), + # nkIdent("a") + # ), + # nkIdent("b") + # ) + # ) + result = newNode(nkInfix) + + let + left = node[0] + right = node[1] + binarySym = node.tsNodeChild(1).val.strip() + nimSym = getNimBinarySym(binarySym) + + result.add gState.getIdent(nimSym) + let leftNode = gState.processTSNode(left, typeofNode) + + if typeofNode.isNil: + typeofNode = nkCall.newTree( + gState.getIdent("typeof"), + leftNode + ) + + let rightNode = gState.processTSNode(right, typeofNode) + + result.add leftNode + result.add nkCall.newTree( + typeofNode, + rightNode + ) + if binarySym == "/": + # Special case. Nim's operators generally output + # the same type they take in, except for division. + # So we need to emulate C here and cast the whole + # expression to the type of the first arg + result = nkCall.newTree( + typeofNode, + result + ) + +proc processUnaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode = + # Input => !a + # + # (logical_expression 1 1 2 "!a" + # (identifier 1 2 1 "a") + # ) + # + # Output => (not a) + # + # nkPar( + # nkPrefix( + # nkIdent("not"), + # nkIdent("a") + # ) + # ) + result = newNode(nkPar) + + let + child = node[0] + unarySym = node.tsNodeChild(0).val.strip() + nimSym = getNimUnarySym(unarySym) + + if nimSym == "-": + # Special case. The minus symbol must be in front of an integer, + # so we have to make a gentle cast here to coerce it to one. + # Might be bad because we are overwriting the type + # There's probably a better way of doing this + if typeofNode.isNil: + typeofNode = gState.getIdent("int64") + + result.add nkPrefix.newTree( + gState.getIdent(unarySym), + nkPar.newTree( + nkCall.newTree( + gState.getIdent("int64"), + gState.processTSNode(child, typeofNode) + ) + ) + ) + else: + result.add nkPrefix.newTree( + gState.getIdent(nimSym), + gState.processTSNode(child, typeofNode) + ) + +proc processUnaryOrBinaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode = + ## Processes both unary (-1, ~true, !something) and binary (a + b, c * d) expressions + if node.len > 1: + # Node has left and right children ie: (2 + 7) + result = processBinaryExpression(gState, node, typeofNode) + elif node.len() == 1: + # Node has only one child, ie -(20 + 7) + result = processUnaryExpression(gState, node, typeofNode) + else: + raise newException(ExprParseError, &"Invalid {node.getName()} \"{node.val}\"") + +proc processSizeofExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode = + # Input => sizeof(int) + # + # (sizeof_expression 1 1 11 "sizeof(int)" + # (type_descriptor 1 8 3 "int" + # (primitive_type 1 8 3 "int") + # ) + # ) + # + # Output => sizeof(cint) + # + # nkCall( + # nkIdent("sizeof"), + # nkIdent("cint") + # ) + result = nkCall.newTree( + gState.getIdent("sizeof"), + gState.processTSNode(node[0], typeofNode) + ) + +proc processTSNode(gState: State, node: TSNode, typeofNode: var PNode): PNode = + ## Handle all of the types of expressions here. This proc gets called recursively + ## in the processX procs and will drill down to sub nodes. + result = newNode(nkNone) + let nodeName = node.getName() + + decho "NODE: ", nodeName, ", VAL: ", node.val + + case nodeName + of "number_literal": + # Input -> 0x1234FE, 1231, 123u, 123ul, 123ull, 1.334f + # Output -> 0x1234FE, 1231, 123'u, 123'u32, 123'u64, 1.334 + result = gState.processNumberLiteral(node) + of "string_literal": + # Input -> "foo\0\x42" + # Output -> "foo\0" + result = gState.processStringLiteral(node) + of "char_literal": + # Input -> 'F', '\060' // Octal, '\x5A' // Hex, '\r' // escape sequences + # Output -> 'F', '0', 'Z', '\r' + result = gState.processCharacterLiteral(node) + of "expression_statement", "ERROR", "translation_unit": + # Note that we're parsing partial expressions, so the TSNode might contain + # an ERROR node. If that's the case, they usually contain children with + # partial results, which will contain parsed expressions + # + # Input (top level statement) -> ((1 + 3 - IDENT) - (int)400.0) + # Output -> (1 + typeof(1)(3) - typeof(1)(IDENT) - typeof(1)(cast[int](400.0))) # Type casting in case some args differ + if node.len == 1: + result = gState.processTSNode(node[0], typeofNode) + elif node.len > 1: + var nodes: seq[PNode] + for i in 0 ..< node.len: + let subNode = gState.processTSNode(node[i], typeofNode) + if subNode.kind != nkNone: + nodes.add(subNode) + # Multiple nodes can get tricky. Don't support them yet, unless they + # have at most one valid node + if nodes.len > 1: + raise newException(ExprParseError, &"Node type \"{nodeName}\" with val ({node.val}) has more than one non empty node") + if nodes.len == 1: + result = nodes[0] + else: + raise newException(ExprParseError, &"Node type \"{nodeName}\" has no children") + of "parenthesized_expression": + # Input -> (IDENT - OTHERIDENT) + # Output -> (IDENT - typeof(IDENT)(OTHERIDENT)) # Type casting in case OTHERIDENT is a slightly different type (uint vs int) + result = gState.processParenthesizedExpr(node, typeofNode) + of "sizeof_expression": + # Input -> sizeof(char) + # Output -> sizeof(cchar) + result = gState.processSizeofExpression(node, typeofNode) + # binary_expression from the new treesitter upgrade should work here + # once we upgrade + of "math_expression", "logical_expression", "relational_expression", + "bitwise_expression", "equality_expression", "binary_expression", + "shift_expression": + # Input -> a == b, a != b, !a, ~a, a < b, a > b, a <= b, a >= b, a >> b, a << b + # Output -> + # typeof(a)(a == typeof(a)(b)) + # typeof(a)(a != typeof(a)(b)) + # (not a) + # (not a) + # typeof(a)(a < typeof(a)(b)) + # typeof(a)(a > typeof(a)(b)) + # typeof(a)(a <= typeof(a)(b)) + # typeof(a)(a >= typeof(a)(b)) + # a shr typeof(a)(b) + # a shl typeof(a)(b) + result = gState.processUnaryOrBinaryExpression(node, typeofNode) + of "cast_expression": + # Input -> (int) a + # Output -> cast[cint](a) + result = gState.processCastExpression(node, typeofNode) + # Why are these node types named true/false? + of "true", "false": + # Input -> true, false + # Output -> true, false + result = gState.parseString(node.val) + of "type_descriptor", "sized_type_specifier": + # Input -> int, unsigned int, long int, etc + # Output -> cint, cuint, clong, etc + let ty = getType(node.val) + if ty.len > 0: + # If ty is not empty, one of C's builtin types has been found + result = gState.getExprIdent(ty, nskType, parent=node.getName()) + else: + result = gState.getExprIdent(node.val, nskType, parent=node.getName()) + if result.kind == nkNone: + raise newException(ExprParseError, &"Missing type specifier \"{node.val}\"") + of "identifier": + # Input -> IDENT + # Output -> IDENT (if found in sym table, else error) + result = gState.getExprIdent(node, parent=node.getName()) + if result.kind == nkNone: + raise newException(ExprParseError, &"Missing identifier \"{node.val}\"") + of "comment": + discard + else: + raise newException(ExprParseError, &"Unsupported node type \"{nodeName}\" for node \"{node.val}\"") + + decho "NODE RESULT: ", result + +proc parseCExpression*(gState: State, codeRoot: TSNode, name = ""): PNode = + ## Parse a c expression from a root ts node + + # This var is used for keeping track of the type of the first + # symbol used for type casting + var tnode: PNode = nil + result = newNode(nkNone) + try: + result = gState.processTSNode(codeRoot, tnode) + except ExprParseError as e: + decho e.msg + result = newNode(nkNone) + except Exception as e: + decho "UNEXPECTED EXCEPTION: ", e.msg + result = newNode(nkNone) + +proc parseCExpression*(gState: State, code: string, name = ""): PNode = + ## Convert the C string to a nim PNode tree + gState.currentExpr = code + gState.currentTyCastName = name + + withCodeAst(gState.currentExpr, gState.mode): + result = gState.parseCExpression(root, name) + + # Clear the state + gState.currentExpr = "" + gState.currentTyCastName = ""
\ No newline at end of file diff --git a/nimterop/getters.nim b/nimterop/getters.nim index 2d8d9bb..121c8d5 100644 --- a/nimterop/getters.nim +++ b/nimterop/getters.nim @@ -221,16 +221,19 @@ proc len*(node: TSNode): int = result = node.tsNodeNamedChildCount().int proc `[]`*(node: TSNode, i: SomeInteger): TSNode = - if i < node.len: + if i < type(i)(node.len()): result = node.tsNodeNamedChild(i.uint32) proc getName*(node: TSNode): string {.inline.} = if not node.isNil: return $node.tsNodeType() -proc getNodeVal*(gState: State, node: TSNode): string = +proc getNodeVal*(code: var string, node: TSNode): string = if not node.isNil: - return gState.code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip() + return code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip() + +proc getNodeVal*(gState: State, node: TSNode): string = + gState.code.getNodeVal(node) proc getAtom*(node: TSNode): TSNode = if not node.isNil: @@ -349,13 +352,16 @@ proc inChildren*(node: TSNode, ntype: string): bool = result = true break -proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] = +proc getLineCol*(code: var string, node: TSNode): tuple[line, col: int] = # Get line number and column info for node let point = node.tsNodeStartPoint() result.line = point.row.int + 1 result.col = point.column.int + 1 +proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] = + getLineCol(gState.code, node) + proc getTSNodeNamedChildCountSansComments*(node: TSNode): int = for i in 0 ..< node.len: if node.getName() != "comment": @@ -374,7 +380,7 @@ proc getPxName*(node: TSNode, offset: int): string = if count == offset and not np.isNil: return np.getName() -proc printLisp*(gState: State, root: TSNode): string = +proc printLisp*(code: var string, root: TSNode): string = var node = root nextnode: TSNode @@ -384,18 +390,18 @@ proc printLisp*(gState: State, root: TSNode): string = if not node.isNil and depth > -1: result &= spaces(depth) let - (line, col) = gState.getLineCol(node) + (line, col) = code.getLineCol(node) result &= &"({$node.tsNodeType()} {line} {col} {node.tsNodeEndByte() - node.tsNodeStartByte()}" let - val = gState.getNodeVal(node) + val = code.getNodeVal(node) if "\n" notin val and " " notin val: result &= &" \"{val}\"" else: break - if node.tsNodeNamedChildCount() != 0: + if node.len() != 0: result &= "\n" - nextnode = node.tsNodeNamedChild(0) + nextnode = node[0] depth += 1 else: result &= ")\n" @@ -419,21 +425,24 @@ proc printLisp*(gState: State, root: TSNode): string = if node == root: break +proc printLisp*(gState: State, root: TSNode): string = + printLisp(gState.code, root) + proc getCommented*(str: string): string = "\n# " & str.strip().replace("\n", "\n# ") proc printTree*(gState: State, pnode: PNode, offset = ""): string = - if gState.debug and pnode.kind != nkNone: + if not pnode.isNil and gState.debug and pnode.kind != nkNone: result &= "\n# " & offset & $pnode.kind & "(" case pnode.kind of nkCharLit: - result &= "'" & pnode.intVal.char & "')" + result &= ($pnode.intVal.char).escape & ")" of nkIntLit..nkUInt64Lit: result &= $pnode.intVal & ")" of nkFloatLit..nkFloat128Lit: result &= $pnode.floatVal & ")" of nkStrLit..nkTripleStrLit: - result &= "\"" & pnode.strVal & "\")" + result &= pnode.strVal.escape & ")" of nkSym: result &= $pnode.sym & ")" of nkIdent: @@ -452,13 +461,13 @@ proc printTree*(gState: State, pnode: PNode, offset = ""): string = proc printDebug*(gState: State, node: TSNode) = if gState.debug: - gecho ("Input => " & gState.getNodeVal(node)).getCommented() & "\n" & - gState.printLisp(node).getCommented() + gecho ("Input => " & gState.getNodeVal(node)).getCommented() + gecho gState.printLisp(node).getCommented() proc printDebug*(gState: State, pnode: PNode) = - if gState.debug: - gecho ("Output => " & $pnode).getCommented() & "\n" & - gState.printTree(pnode) + if gState.debug and pnode.kind != nkNone: + gecho ("Output => " & $pnode).getCommented() + gecho gState.printTree(pnode) # Compiler shortcuts diff --git a/nimterop/globals.nim b/nimterop/globals.nim index f159124..5db17a3 100644 --- a/nimterop/globals.nim +++ b/nimterop/globals.nim @@ -1,4 +1,4 @@ -import sequtils, sets, tables +import sequtils, sets, tables, strutils import regex @@ -76,6 +76,11 @@ type # All const names for enum casting constIdentifiers*: HashSet[string] + # All symbols that have been skipped due to + # being unwrappable or the user provided + # override is blank + skippedSyms*: HashSet[string] + # Legacy ast fields, remove when ast2 becomes default constStr*, enumStr*, procStr*, typeStr*: string @@ -93,6 +98,9 @@ type currentHeader*, impShort*, sourceFile*: string + # Used for the exprparser.nim module + currentExpr*, currentTyCastName*: string + data*: seq[tuple[name, val: string]] nodeBranch*: seq[string] @@ -113,12 +121,12 @@ when not declared(CIMPORT): export gAtoms, gExpressions, gEnumVals, Kind, Ast, AstTable, State, nBl, Bl # Redirect output to file when required - template gecho*(args: string) {.dirty.} = + template gecho*(args: string) = if gState.outputHandle.isNil: echo args else: gState.outputHandle.writeLine(args) - template decho*(str: untyped): untyped = + template decho*(args: varargs[string, `$`]): untyped = if gState.debug: - gecho str.getCommented() + gecho join(args, "").getCommented()
\ No newline at end of file diff --git a/nimterop/toast.nim b/nimterop/toast.nim index ab44970..98045bf 100644 --- a/nimterop/toast.nim +++ b/nimterop/toast.nim @@ -2,16 +2,11 @@ import os, osproc, strformat, strutils, tables, times import "."/treesitter/[api, c, cpp] -import "."/[ast, ast2, globals, getters, grammar, build] +import "."/[ast, ast2, globals, getters, grammar, build, tshelp] proc process(gState: State, path: string, astTable: AstTable) = doAssert existsFile(path), &"Invalid path {path}" - var parser = tsParserNew() - - defer: - parser.tsParserDelete() - if gState.mode.Bl: gState.mode = getCompilerMode(path) @@ -20,31 +15,16 @@ proc process(gState: State, path: string, astTable: AstTable) = else: gState.code = readFile(path) - doAssert gState.code.nBl, "Empty file or preprocessor error" - - if gState.mode == "c": - doAssert parser.tsParserSetLanguage(treeSitterC()), "Failed to load C parser" - elif gState.mode == "cpp": - doAssert parser.tsParserSetLanguage(treeSitterCpp()), "Failed to load C++ parser" - else: - doAssert false, &"Invalid parser {gState.mode}" - - var - tree = parser.tsParserParseString(nil, gState.code.cstring, gState.code.len.uint32) - root = tree.tsTreeRootNode() - - defer: - tree.tsTreeDelete() - - if gState.past: - gecho gState.printLisp(root) - elif gState.pnim: - if Feature.ast2 in gState.feature: - ast2.parseNim(gState, path, root) - else: - ast.parseNim(gState, path, root, astTable) - elif gState.preprocess: - gecho gState.code + withCodeAst(gState.code, gState.mode): + if gState.past: + gecho gState.printLisp(root) + elif gState.pnim: + if Feature.ast2 in gState.feature: + ast2.parseNim(gState, path, root) + else: + ast.parseNim(gState, path, root, astTable) + elif gState.preprocess: + gecho gState.code # CLI processing with default values proc main( diff --git a/nimterop/tshelp.nim b/nimterop/tshelp.nim new file mode 100644 index 0000000..109321c --- /dev/null +++ b/nimterop/tshelp.nim @@ -0,0 +1,28 @@ +import "."/treesitter/[c, cpp] + +template withCodeAst*(code: string, mode: string, body: untyped): untyped = + ## A simple template to inject the TSNode into a body of code + mixin treeSitterC + mixin treeSitterCpp + + var parser = tsParserNew() + defer: + parser.tsParserDelete() + + doAssert code.nBl, "Empty code or preprocessor error" + + if mode == "c": + doAssert parser.tsParserSetLanguage(treeSitterC()), "Failed to load C parser" + elif mode == "cpp": + doAssert parser.tsParserSetLanguage(treeSitterCpp()), "Failed to load C++ parser" + else: + doAssert false, "Invalid parser " & mode + + var + tree = parser.tsParserParseString(nil, code.cstring, code.len.uint32) + root {.inject.} = tree.tsTreeRootNode() + + body + + defer: + tree.tsTreeDelete()
\ No newline at end of file diff --git a/tests/include/tast2.h b/tests/include/tast2.h index bdf8823..b47a801 100644 --- a/tests/include/tast2.h +++ b/tests/include/tast2.h @@ -8,6 +8,42 @@ extern "C" { #define D "hello" #define E 'c' +#define UEXPR (1234u << 1) +#define ULEXPR (1234ul << 2) +#define ULLEXPR (1234ull << 3) +#define LEXPR (1234l << 4) +#define LLEXPR (1234ll << 5) + +#define SHL1 (1u << 1) +#define SHL2 (1u << 2) +#define SHL3 (1u << 3) +#define COERCE 645635634896ull + 35436 +#define COERCE2 645635634896 + 35436ul +#define BINEXPR ~(-(1u << !-1)) ^ (10 >> 1) +#define BOOL true +#define MATHEXPR (1 + 2/3*20 - 100) +#define ANDEXPR (100 & 11000) +#define CASTEXPR (char) 34 +#define AVAL 100 +#define BVAL 200 +#define EQ1 AVAL <= BVAL +#define EQ2 AVAL >= BVAL +#define EQ3 AVAL > BVAL +#define EQ4 AVAL < BVAL +#define EQ5 AVAL != BVAL +#define EQ6 AVAL == BVAL + +#define SIZEOF sizeof(char) +#define REG_STR "regular string" +#define NOTSUPPORTEDSTR "not a " REG_STR + +#define NULLCHAR '\0'/* comments should not break things*/ +#define OCTCHAR '\012' // nor should this comment +#define HEXCHAR '\xFE' +#define TRICKYSTR "\x4E\034\nfoo\0\'\"\r\v\a\b\e\f\t\\\?bar" + +#define ALLSHL (SHL1 | SHL2 | SHL3) + struct A0; struct A1 {}; typedef struct A2; diff --git a/tests/tast2.nim b/tests/tast2.nim index e13c4ac..4cfbeac 100644 --- a/tests/tast2.nim +++ b/tests/tast2.nim @@ -3,6 +3,10 @@ import macros, os, sets, strutils import nimterop/[cimport] static: + # Skip casting on lower nim compilers because + # the VM does not support it + when (NimMajor, NimMinor, NimPatch) < (1, 0, 0): + cSkipSymbol @["CASTEXPR"] cDebug() const @@ -93,11 +97,11 @@ macro testFields(t: typed, fields: static[string] = "") = for i in 0 ..< rl.len: let name = ($rl[i][0]).strip(chars = {'*'}) - typ = ($(rl[i][1].repr())).replace("\n", "").replace(" ", "") + typ = ($(rl[i][1].repr())).replace("\n", "").replace(" ", "").replace("typeof", "type") n = names.find(name) assert n != -1, $t & "." & name & " invalid" - assert types[n] == typ, - "typeof(" & $t & ":" & name & ") != " & types[n] & ", is " & typ + assert types[n].replace("typeof", "type") == typ, + "typeof(" & $t & ":" & name & ") != " & types[n].replace("typeof", "type") & ", is " & typ assert A == 2 assert B == 1.0 @@ -105,6 +109,48 @@ assert C == 0x10 assert D == "hello" assert E == 'c' +assert not defined(NOTSUPPORTEDSTR) + +assert UEXPR == (1234.uint shl 1) +assert ULEXPR == (1234.uint32 shl 2) +assert ULLEXPR == (1234.uint64 shl 3) +assert LEXPR == (1234.int32 shl 4) +assert LLEXPR == (1234.int64 shl 5) + +assert AVAL == 100 +assert BVAL == 200 + +assert EQ1 == (AVAL <= BVAL) +assert EQ2 == (AVAL >= BVAL) +assert EQ3 == (AVAL > BVAL) +assert EQ4 == (AVAL < BVAL) +assert EQ5 == (AVAL != BVAL) +assert EQ6 == (AVAL == BVAL) + +assert SIZEOF == 1 + +assert COERCE == 645635670332'u64 +assert COERCE2 == 645635670332'i64 + +assert BINEXPR == 5 +assert BOOL == true +assert MATHEXPR == -99 +assert ANDEXPR == 96 + +when (NimMajor, NimMinor, NimPatch) >= (1, 0, 0): + assert CASTEXPR == 34.chr + +assert TRICKYSTR == "N\x1C\nfoo\x00\'\"\c\v\a\b\e\f\t\\\\?bar" +assert NULLCHAR == '\0' +assert OCTCHAR == '\n' +assert HEXCHAR.int == 0xFE + +assert SHL1 == (1.uint shl 1) +assert SHL2 == (1.uint shl 2) +assert SHL3 == (1.uint shl 3) + +assert ALLSHL == (SHL1 or SHL2 or SHL3) + assert A0 is object testFields(A0, "f1!cint") checkPragmas(A0, pHeaderBy, istype = false) @@ -271,7 +317,7 @@ var a21p: A21p a21p = addr a20 assert A22 is object -testFields(A22, "f1|f2!ptr ptr cint|array[123 + 132, ptr cint]") +testFields(A22, "f1|f2!ptr ptr cint|array[123 + type(123)(132), ptr cint]") checkPragmas(A22, pHeaderBy, istype = false) var a22: A22 a22.f1 = addr a15.a2[0] @@ -427,4 +473,4 @@ checkPragmas(nested, pHeaderImpBy) when defined(HEADER): assert sitest1(5) == 10 - assert sitest1(10) == 20
\ No newline at end of file + assert sitest1(10) == 20 diff --git a/tests/tmath.nim b/tests/tmath.nim index 5d84700..b8477c1 100644 --- a/tests/tmath.nim +++ b/tests/tmath.nim @@ -13,6 +13,12 @@ when defined(windows): complex = object static: + when (NimMajor, NimMinor, NimPatch) < (1, 0, 0): + # FP_ILOGB0 and FP_ILOGBNAN are casts that are unsupported + # on lower Nim VMs + cSkipSymbol @["math_errhandling", "FP_ILOGB0", "FP_ILOGBNAN"] + else: + cSkipSymbol @["math_errhandling"] cDebug() cDisableCaching() cAddStdDir() |
