Merge pull request #191 from jyapayne/poc_print_ast

[ast2] Enable parsing some C Macro expressions to Nim expressions using treesitter
author: genotrance <dev@genotrance.com> 2020-04-26 11:29:27 -0500
committer: GitHub <noreply@github.com> 2020-04-26 11:29:27 -0500
commit: 73ef7c4ccdcdc0934280f002af23ba663bf38426 (patch)
tree: facee03b526d1991cdd25f1742e7f9a841cfc899
parent: 43dd43e3183178e71abd3319290c566cb4dd80a9 (diff)
parent: 89c10c4b25226a88062d01e6bf57a9e9146920c5 (diff)
download: nimterop-73ef7c4ccdcdc0934280f002af23ba663bf38426.tar.gz
nimterop-73ef7c4ccdcdc0934280f002af23ba663bf38426.zip
10 files changed, 822 insertions, 122 deletions
diff --git a/nimterop/ast2.nim b/nimterop/ast2.nim
index ce45f1d..474ec69 100644
--- a/nimterop/ast2.nim
+++ b/nimterop/ast2.nim
@@ -1,12 +1,12 @@
 import macros, os, sequtils, sets, strformat, strutils, tables, times
 
-import regex
+import options as opts
 
-import compiler/[ast, idents, lineinfos, modulegraphs, msgs, options, parser, renderer]
+import compiler/[ast, idents, lineinfos, modulegraphs, msgs, options, renderer]
 
 import "."/treesitter/api
 
-import "."/[globals, getters]
+import "."/[globals, getters, exprparser, comphelp, tshelp]
 
 proc getPtrType*(str: string): string =
   result = case str:
@@ -19,55 +19,6 @@ proc getPtrType*(str: string): string =
     else:
       str
 
-proc handleError*(conf: ConfigRef, info: TLineInfo, msg: TMsgKind, arg: string) =
-  # Raise exception in parseString() instead of exiting for errors
-  if msg < warnMin:
-    raise newException(Exception, msgKindToString(msg))
-
-proc parseString(gState: State, str: string): PNode =
-  # Parse a string into Nim AST - use custom error handler that raises
-  # an exception rather than exiting on failure
-  try:
-    result = parseString(
-      str, gState.identCache, gState.config, errorHandler = handleError
-    )
-  except:
-    decho getCurrentExceptionMsg()
-
-proc getLit*(gState: State, str: string, expression = false): PNode =
-  # Used to convert #define literals into const and expressions
-  # in array sizes
-  #
-  # `expression` is true when `str` should be converted into a Nim expression
-  let
-    str = str.replace(re"/[/*].*?(?:\*/)?$", "").strip()
-
-  if str.contains(re"^[\-]?[\d]+$"):              # decimal
-    result = newIntNode(nkIntLit, parseInt(str))
-
-  elif str.contains(re"^[\-]?[\d]*[.]?[\d]+$"):   # float
-    result = newFloatNode(nkFloatLit, parseFloat(str))
-
-  elif str.contains(re"^0x[\da-fA-F]+$"):         # hexadecimal
-    result = gState.parseString(str)
-
-  elif str.contains(re"^'[[:ascii:]]'$"):         # char
-    result = newNode(nkCharLit)
-    result.intVal = str[1].int64
-
-  elif str.contains(re"""^"[[:ascii:]]+"$"""):    # char *
-    result = newStrNode(nkStrLit, str[1 .. ^2])
-
-  else:
-    let
-      str =
-        if expression: gState.getNimExpression(str)
-        else: str
-    result = gState.parseString(str)
-
-  if result.isNil:
-    result = newNode(nkNilLit)
-
 proc getOverrideOrSkip(gState: State, node: TSNode, origname: string, kind: NimSymKind): PNode =
   # Check if symbol `origname` of `kind` and `origname` has any cOverride defined
   # and use that if present
@@ -90,6 +41,7 @@ proc getOverrideOrSkip(gState: State, node: TSNode, origname: string, kind: NimS
       result = pnode[0][0]
   else:
     gecho &"\n# $1'{origname}' skipped" % skind
+    gState.skippedSyms.incl origname
     if gState.debug:
       gState.skipStr &= &"\n{gState.getNodeVal(node)}"
 
@@ -148,15 +100,36 @@ proc newConstDef(gState: State, node: TSNode, fname = "", fval = ""): PNode =
         fval
       else:
         gState.getNodeVal(node[1])
-    valident =
-      gState.getLit(val)
+
+  var valident = newNode(nkNone)
+
+  withCodeAst(val, gState.mode):
+    # This section is a hack for determining that the first
+    # node is a type, which shouldn't be accepted by a const
+    # def section. Need to replace this with some other mechanism
+    # to handle type aliases
+    var maybeTyNode: TSNode
+    # Take the very first node, which may be 2 levels
+    # down if there is an error node
+    if root.len > 0 and root[0].getName() == "ERROR":
+      maybeTyNode = root[0][0]
+    elif root.len > 0:
+      maybeTyNode = root[0]
+
+    if not maybeTyNode.isNil:
+      let name = maybeTyNode.getName()
+      case name
+      of "type_descriptor", "sized_type_specifier":
+        discard
+      else:
+        # Can't do gState.parseCExpression(root) here for some reason?
+        # get a SEGFAULT if we use root
+        valident = gState.parseCExpression(val)
 
   if name.Bl:
     # Name skipped or overridden since blank
     result = gState.getOverrideOrSkip(node, origname, nskConst)
-  elif valident.kind in {nkCharLit .. nkStrLit} or
-    (valident.kind == nkStmtList and valident.len > 0 and
-    valident[0].kind in {nkCharLit .. nkStrLit}):
+  elif valident.kind != nkNone:
     if gState.addNewIdentifer(name):
       # const X* = Y
       #
@@ -180,6 +153,7 @@ proc newConstDef(gState: State, node: TSNode, fname = "", fval = ""): PNode =
       gecho &"# const '{origname}' is duplicate, skipped"
   else:
     gecho &"# const '{origname}' has invalid value '{val}'"
+    gState.skippedSyms.incl origname
 
 proc addConst(gState: State, node: TSNode) =
   # Add a const to the AST
@@ -1012,8 +986,8 @@ proc getTypeArray(gState: State, node: TSNode, tident: PNode, name: string): PNo
       # type name[X] => array[X, type]
       let
         # Size of array could be a Nim expression
-        size = gState.getLit(gState.getNodeVal(cnode[1]), expression = true)
-      if size.kind != nkNilLit:
+        size = gState.parseCExpression(gState.getNodeVal(cnode[1]))
+      if size.kind != nkNone:
         result = gState.newArrayTree(cnode, result, size)
         cnode = cnode[0]
     elif cnode.len == 1:
@@ -1417,6 +1391,9 @@ proc addEnum(gState: State, node: TSNode) =
       # Create const for fields
       var
         fnames: HashSet[string]
+        # Hold all of field information so that we can add all of them
+        # after the const identifiers has been updated
+        fieldDeclarations: seq[tuple[fname: string, fval: string, cexpr: Option[TSNode]]]
       for i in 0 .. enumlist.len - 1:
         let
           en = enumlist[i]
@@ -1435,20 +1412,25 @@ proc addEnum(gState: State, node: TSNode) =
             fval = &"({prev} + 1).{name}"
 
           if en.len > 1 and en[1].getName() in gEnumVals:
-            # Explicit value
-            fval = "(" & gState.getNimExpression(gState.getNodeVal(en[1]), name) & ")." & name
-
-          # Cannot use newConstDef() since parseString(fval) adds backticks to and/or
-          gState.constSection.add gState.parseString(&"const {fname}* = {fval}")[0][0]
+            fieldDeclarations.add((fname, "", some(en[1])))
+          else:
+            fieldDeclarations.add((fname, fval, none(TSNode)))
 
           fnames.incl fname
-
           prev = fname
 
       # Add fields to list of consts after processing enum so that we don't cast
       # enum field to itself
       gState.constIdentifiers.incl fnames
 
+      # parseCExpression requires all const identifiers to be present for the enum
+      for (fname, fval, cexprNode) in fieldDeclarations:
+        var fval = fval
+        if cexprNode.isSome:
+          fval = "(" & $gState.parseCExpression(gState.getNodeVal(cexprNode.get()), name) & ")." & name
+        # Cannot use newConstDef() since parseString(fval) adds backticks to and/or
+        gState.constSection.add gState.parseString(&"const {fname}* = {fval}")[0][0]
+
       # Add other names
       if node.getName() == "type_definition" and node.len > 1:
         gState.addTypeTyped(node, ftname = name, offset = offset)
diff --git a/nimterop/comphelp.nim b/nimterop/comphelp.nim
new file mode 100644
index 0000000..1709f8b
--- /dev/null
+++ b/nimterop/comphelp.nim
@@ -0,0 +1,18 @@
+import compiler/[ast, lineinfos, msgs, options, parser, renderer]
+
+import "."/[globals, getters]
+
+proc handleError*(conf: ConfigRef, info: TLineInfo, msg: TMsgKind, arg: string) =
+  # Raise exception in parseString() instead of exiting for errors
+  if msg < warnMin:
+    raise newException(Exception, msgKindToString(msg))
+
+proc parseString*(gState: State, str: string): PNode =
+  # Parse a string into Nim AST - use custom error handler that raises
+  # an exception rather than exiting on failure
+  try:
+    result = parseString(
+      str, gState.identCache, gState.config, errorHandler = handleError
+    )
+  except:
+    decho getCurrentExceptionMsg()
+\ No newline at end of file
diff --git a/nimterop/exprparser.nim b/nimterop/exprparser.nim
new file mode 100644
index 0000000..c74f0b6
--- /dev/null
+++ b/nimterop/exprparser.nim
@@ -0,0 +1,587 @@
+import strformat, strutils, macros, sets
+
+import regex
+
+import compiler/[ast, renderer]
+
+import "."/treesitter/[api, c, cpp]
+
+import "."/[globals, getters, comphelp, tshelp]
+
+# This version of exprparser should be able to handle:
+#
+# All integers + integer like expressions (hex, octal, suffixes)
+# All floating point expressions (except for C++'s hex floating point stuff)
+# Strings and character literals, including C's escape characters (not sure if this is the same as C++'s escape characters or not)
+# Math operators (+, -, /, *)
+# Some Unary operators (-, !, ~). ++, --, and & are yet to be implemented
+# Any identifiers
+# C type descriptors (int, char, etc)
+# Boolean values (true, false)
+# Shift expressions (containing anything in this list)
+# Cast expressions (containing anything in this list)
+# Math expressions (containing anything in this list)
+# Sizeof expressions (containing anything in this list)
+# Cast expressions (containing anything in this list)
+# Parentheses expressions (containing anything in this list)
+# Expressions containing other expressions
+#
+# In addition to the above, it should also handle most type coercions, except
+# for where Nim can't (such as uint + -int)
+
+type
+  ExprParseError* = object of CatchableError
+
+template val(node: TSNode): string =
+  gState.currentExpr.getNodeVal(node)
+
+proc printDebugExpr*(gState: State, node: TSNode) =
+  if gState.debug:
+    gecho ("Input => " & node.val).getCommented()
+    gecho gState.currentExpr.printLisp(node).getCommented()
+
+proc getExprIdent*(gState: State, identName: string, kind = nskConst, parent = ""): PNode =
+  ## Gets a cPlugin transformed identifier from `identName`
+  ##
+  ## Returns PNode(nkNone) if the identifier is blank
+  result = newNode(nkNone)
+  if identName notin gState.skippedSyms:
+    var ident = identName
+    if ident != "_":
+      # Process the identifier through cPlugin
+      ident = gState.getIdentifier(ident, kind, parent)
+    if kind == nskType:
+      result = gState.getIdent(ident)
+    elif ident.nBl and ident in gState.constIdentifiers:
+      if gState.currentTyCastName.nBl:
+        ident = ident & "." & gState.currentTyCastName
+      result = gState.getIdent(ident)
+
+proc getExprIdent*(gState: State, node: TSNode, kind = nskConst, parent = ""): PNode =
+  ## Gets a cPlugin transformed identifier from `identName`
+  ##
+  ## Returns PNode(nkNone) if the identifier is blank
+  gState.getExprIdent(node.val, kind, parent)
+
+proc parseChar(charStr: string): uint8 {.inline.} =
+  ## Parses a character literal out of a string. This is needed
+  ## because treesitter gives unescaped characters when parsing
+  ## strings.
+  if charStr.len == 1:
+    return charStr[0].uint8
+
+  # Handle octal, hex, unicode?
+  if charStr.startsWith("\\x"):
+    result = parseHexInt(charStr.replace("\\x", "0x")).uint8
+  elif charStr.len == 4: # Octal
+    result = parseOctInt("0o" & charStr[1 ..< charStr.len]).uint8
+
+  if result == 0:
+    case charStr
+    of "\\0":
+      result = ord('\0')
+    of "\\a":
+      result = 0x07
+    of "\\b":
+      result = 0x08
+    of "\\e":
+      result = 0x1B
+    of "\\f":
+      result = 0x0C
+    of "\\n":
+      result = '\n'.uint8
+    of "\\r":
+      result = 0x0D
+    of "\\t":
+      result = 0x09
+    of "\\v":
+      result = 0x0B
+    of "\\\\":
+      result = 0x5C
+    of "\\'":
+      result = '\''.uint8
+    of "\\\"":
+      result = '\"'.uint8
+    of "\\?":
+      result = 0x3F
+    else:
+      discard
+
+  if result > uint8.high:
+    result = uint8.high
+
+proc getCharLit(charStr: string): PNode {.inline.} =
+  ## Convert a character string into a proper Nim char lit node
+  result = newNode(nkCharLit)
+  result.intVal = parseChar(charStr).int64
+
+proc getFloatNode(number, suffix: string): PNode {.inline.} =
+  ## Get a Nim float node from a C float expression + suffix
+  let floatSuffix = number[number.len-1]
+  try:
+    case floatSuffix
+    of 'l', 'L':
+      # TODO: handle long double (128 bits)
+      # result = newNode(nkFloat128Lit)
+      result = newFloatNode(nkFloat64Lit, parseFloat(number[0 ..< number.len - 1]))
+    of 'f', 'F':
+      result = newFloatNode(nkFloat64Lit, parseFloat(number[0 ..< number.len - 1]))
+    else:
+      result = newFloatNode(nkFloatLit, parseFloat(number))
+  except ValueError:
+    raise newException(ExprParseError, &"Could not parse float value \"{number}\".")
+
+proc getIntNode(number, suffix: string): PNode {.inline.} =
+  ## Get a Nim int node from a C integer expression + suffix
+  case suffix
+  of "u", "U":
+    result = newNode(nkUintLit)
+  of "l", "L":
+    result = newNode(nkInt32Lit)
+  of "ul", "UL":
+    result = newNode(nkUint32Lit)
+  of "ll", "LL":
+    result = newNode(nkInt64Lit)
+  of "ull", "ULL":
+    result = newNode(nkUint64Lit)
+  else:
+    result = newNode(nkIntLit)
+
+  # I realize these regex are wasteful on performance, but
+  # couldn't come up with a better idea.
+  if number.contains(re"0[xX]"):
+    result.intVal = parseHexInt(number)
+    result.flags = {nfBase16}
+  elif number.contains(re"0[bB]"):
+    result.intVal = parseBinInt(number)
+    result.flags = {nfBase2}
+  elif number.contains(re"0[oO]"):
+    result.intVal = parseOctInt(number)
+    result.flags = {nfBase8}
+  else:
+    result.intVal = parseInt(number)
+
+proc getNumNode(number, suffix: string): PNode {.inline.} =
+  ## Convert a C number to a Nim number PNode
+  if number.contains("."):
+    getFloatNode(number, suffix)
+  else:
+    getIntNode(number, suffix)
+
+proc processNumberLiteral(gState: State, node: TSNode): PNode =
+  ## Parse a number literal from a TSNode. Can be a float, hex, long, etc
+  result = newNode(nkNone)
+  let nodeVal = node.val
+
+  var match: RegexMatch
+  const reg = re"(\-)?(0\d+|0[xX][0-9a-fA-F]+|0[bB][01]+|\d+\.\d*[fFlL]?|\d*\.\d+[fFlL]?|\d+)([ulUL]*)"
+  let found = nodeVal.find(reg, match)
+  if found:
+    let
+      prefix = if match.group(0).len > 0: nodeVal[match.group(0)[0]] else: ""
+      number = nodeVal[match.group(1)[0]]
+      suffix = nodeVal[match.group(2)[0]]
+
+    result = getNumNode(number, suffix)
+
+    if result.kind != nkNone and prefix == "-":
+      result = nkPrefix.newTree(
+        gState.getIdent("-"),
+        result
+      )
+  else:
+    raise newException(ExprParseError, &"Could not find a number in number_literal: \"{nodeVal}\"")
+
+proc processCharacterLiteral(gState: State, node: TSNode): PNode =
+  # Input => 'G'
+  #
+  # (char_literal 1 1 3 "'G'")
+  #
+  # Output => 'G'
+  #
+  # nkCharLit("G")
+  let val = node.val
+  result = getCharLit(val[1 ..< val.len - 1])
+
+proc processStringLiteral(gState: State, node: TSNode): PNode =
+  # Input => "\n\rfoobar\0\'"
+  #
+  # (string_literal 1 1 16 ""\n\rfoobar\0\'""
+  #  (escape_sequence 1 2 2 "\n")
+  #  (escape_sequence 1 4 2 "\r")
+  #  (escape_sequence 1 12 2 "\0")
+  #  (escape_sequence 1 14 2 "\'")
+  # )
+  #
+  # Output => "\n\cfoobar\x00\'"
+  #
+  # nkStrLit("\x0A\x0Dfoobar\x00\'")
+  let
+    nodeVal = node.val
+    strVal = nodeVal[1 ..< nodeVal.len - 1]
+
+  const
+    str = "(\\\\x[[:xdigit:]]{2}|\\\\\\d{3}|\\\\0|\\\\a|\\\\b|\\\\e|\\\\f|\\\\n|\\\\r|\\\\t|\\\\v|\\\\\\\\|\\\\'|\\\\\"|[[:ascii:]])"
+    reg = re(str)
+
+  # Convert the c string escape sequences/etc to Nim chars
+  var nimStr = newStringOfCap(nodeVal.len)
+  for m in strVal.findAll(reg):
+    nimStr.add(parseChar(strVal[m.group(0)[0]]).chr)
+
+  result = newStrNode(nkStrLit, nimStr)
+
+proc processTSNode(gState: State, node: TSNode, typeofNode: var PNode): PNode
+
+proc processParenthesizedExpr(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+  # Input => (a + b)
+  #
+  # (parenthesized_expression 1 1 7
+  #  (math_expression 1 2 5
+  #   (identifier 1 2 1 "a")
+  #   (identifier 1 6 1 "b")
+  #  )
+  # )
+  #
+  # Output => (typeof(a)(a + typeof(a)(b)))
+  #
+  # nkPar(
+  #  nkCall(
+  #   nkCall(
+  #    nkIdent("typeof"),
+  #    nkIdent("a")
+  #   ),
+  #   nkInfix(
+  #    nkIdent("+"),
+  #    nkIdent("a"),
+  #    nkCall(
+  #     nkCall(
+  #      nkIdent("typeof"),
+  #      nkIdent("a")
+  #     ),
+  #     nkIdent("b")
+  #    )
+  #   )
+  #  )
+  # )
+  result = newNode(nkPar)
+  for i in 0 ..< node.len():
+    result.add(gState.processTSNode(node[i], typeofNode))
+
+proc processCastExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+  # Input => (int)a
+  #
+  # (cast_expression 1 1 6 "(int)a"
+  #  (type_descriptor 1 2 3 "int"
+  #   (primitive_type 1 2 3 "int")
+  #  )
+  #  (identifier 1 6 1 "a")
+  # )
+  #
+  # Output => cast[cint](a)
+  #
+  # nkCast(
+  #  nkIdent("cint"),
+  #  nkIdent("a")
+  # )
+  result = nkCast.newTree(
+    gState.processTSNode(node[0], typeofNode),
+    gState.processTSNode(node[1], typeofNode)
+  )
+
+proc getNimUnarySym(csymbol: string): string =
+  ## Get the Nim equivalent of a unary C symbol
+  ##
+  ## TODO: Add ++, --,
+  case csymbol
+  of "+", "-":
+    result = csymbol
+  of "~", "!":
+    result = "not"
+  else:
+    raise newException(ExprParseError, &"Unsupported unary symbol \"{csymbol}\"")
+
+proc getNimBinarySym(csymbol: string): string =
+  case csymbol
+  of "|", "||":
+    result = "or"
+  of "&", "&&":
+    result = "and"
+  of "^":
+    result = "xor"
+  of "==", "!=",
+     "+", "-", "/", "*",
+     ">", "<", ">=", "<=":
+    result = csymbol
+  of "%":
+    result = "mod"
+  of "<<":
+    result = "shl"
+  of ">>":
+    result = "shr"
+  else:
+    raise newException(ExprParseError, &"Unsupported binary symbol \"{csymbol}\"")
+
+proc processBinaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+  # Node has left and right children ie: (2 + 7)
+  #
+  # Input => a == b
+  #
+  # (equality_expression 1 1 6
+  #  (identifier 1 1 1 "a")
+  #  (identifier 1 6 1 "b")
+  # )
+  #
+  # Output => a == typeof(a)(b)
+  #
+  # nkInfix(
+  #  nkIdent("=="),
+  #  nkIdent("a"),
+  #  nkCall(
+  #   nkCall(
+  #    nkIdent("typeof"),
+  #    nkIdent("a")
+  #   ),
+  #   nkIdent("b")
+  #  )
+  # )
+  result = newNode(nkInfix)
+
+  let
+    left = node[0]
+    right = node[1]
+    binarySym = node.tsNodeChild(1).val.strip()
+    nimSym = getNimBinarySym(binarySym)
+
+  result.add gState.getIdent(nimSym)
+  let leftNode = gState.processTSNode(left, typeofNode)
+
+  if typeofNode.isNil:
+    typeofNode = nkCall.newTree(
+      gState.getIdent("typeof"),
+      leftNode
+    )
+
+  let rightNode = gState.processTSNode(right, typeofNode)
+
+  result.add leftNode
+  result.add nkCall.newTree(
+    typeofNode,
+    rightNode
+  )
+  if binarySym == "/":
+    # Special case. Nim's operators generally output
+    # the same type they take in, except for division.
+    # So we need to emulate C here and cast the whole
+    # expression to the type of the first arg
+    result = nkCall.newTree(
+      typeofNode,
+      result
+    )
+
+proc processUnaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+  # Input => !a
+  #
+  # (logical_expression 1 1 2 "!a"
+  #  (identifier 1 2 1 "a")
+  # )
+  #
+  # Output => (not a)
+  #
+  # nkPar(
+  #  nkPrefix(
+  #   nkIdent("not"),
+  #   nkIdent("a")
+  #  )
+  # )
+  result = newNode(nkPar)
+
+  let
+    child = node[0]
+    unarySym = node.tsNodeChild(0).val.strip()
+    nimSym = getNimUnarySym(unarySym)
+
+  if nimSym == "-":
+    # Special case. The minus symbol must be in front of an integer,
+    # so we have to make a gentle cast here to coerce it to one.
+    # Might be bad because we are overwriting the type
+    # There's probably a better way of doing this
+    if typeofNode.isNil:
+      typeofNode = gState.getIdent("int64")
+
+    result.add nkPrefix.newTree(
+      gState.getIdent(unarySym),
+      nkPar.newTree(
+        nkCall.newTree(
+          gState.getIdent("int64"),
+          gState.processTSNode(child, typeofNode)
+        )
+      )
+    )
+  else:
+    result.add nkPrefix.newTree(
+      gState.getIdent(nimSym),
+      gState.processTSNode(child, typeofNode)
+    )
+
+proc processUnaryOrBinaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+  ## Processes both unary (-1, ~true, !something) and binary (a + b, c * d) expressions
+  if node.len > 1:
+    # Node has left and right children ie: (2 + 7)
+    result = processBinaryExpression(gState, node, typeofNode)
+  elif node.len() == 1:
+    # Node has only one child, ie -(20 + 7)
+    result = processUnaryExpression(gState, node, typeofNode)
+  else:
+    raise newException(ExprParseError, &"Invalid {node.getName()} \"{node.val}\"")
+
+proc processSizeofExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+  # Input => sizeof(int)
+  #
+  # (sizeof_expression 1 1 11 "sizeof(int)"
+  #  (type_descriptor 1 8 3 "int"
+  #   (primitive_type 1 8 3 "int")
+  #  )
+  # )
+  #
+  # Output => sizeof(cint)
+  #
+  # nkCall(
+  #  nkIdent("sizeof"),
+  #  nkIdent("cint")
+  # )
+  result = nkCall.newTree(
+    gState.getIdent("sizeof"),
+    gState.processTSNode(node[0], typeofNode)
+  )
+
+proc processTSNode(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+  ## Handle all of the types of expressions here. This proc gets called recursively
+  ## in the processX procs and will drill down to sub nodes.
+  result = newNode(nkNone)
+  let nodeName = node.getName()
+
+  decho "NODE: ", nodeName, ", VAL: ", node.val
+
+  case nodeName
+  of "number_literal":
+    # Input -> 0x1234FE, 1231, 123u, 123ul, 123ull, 1.334f
+    # Output -> 0x1234FE, 1231, 123'u, 123'u32, 123'u64, 1.334
+    result = gState.processNumberLiteral(node)
+  of "string_literal":
+    # Input -> "foo\0\x42"
+    # Output -> "foo\0"
+    result = gState.processStringLiteral(node)
+  of "char_literal":
+    # Input -> 'F', '\060' // Octal, '\x5A' // Hex, '\r' // escape sequences
+    # Output -> 'F', '0', 'Z', '\r'
+    result = gState.processCharacterLiteral(node)
+  of "expression_statement", "ERROR", "translation_unit":
+    # Note that we're parsing partial expressions, so the TSNode might contain
+    # an ERROR node. If that's the case, they usually contain children with
+    # partial results, which will contain parsed expressions
+    #
+    # Input (top level statement) -> ((1 + 3 - IDENT) - (int)400.0)
+    # Output -> (1 + typeof(1)(3) - typeof(1)(IDENT) - typeof(1)(cast[int](400.0))) # Type casting in case some args differ
+    if node.len == 1:
+      result = gState.processTSNode(node[0], typeofNode)
+    elif node.len > 1:
+      var nodes: seq[PNode]
+      for i in 0 ..< node.len:
+        let subNode = gState.processTSNode(node[i], typeofNode)
+        if subNode.kind != nkNone:
+          nodes.add(subNode)
+          # Multiple nodes can get tricky. Don't support them yet, unless they
+          # have at most one valid node
+          if nodes.len > 1:
+            raise newException(ExprParseError, &"Node type \"{nodeName}\" with val ({node.val}) has more than one non empty node")
+      if nodes.len == 1:
+        result = nodes[0]
+    else:
+      raise newException(ExprParseError, &"Node type \"{nodeName}\" has no children")
+  of "parenthesized_expression":
+    # Input -> (IDENT - OTHERIDENT)
+    # Output -> (IDENT - typeof(IDENT)(OTHERIDENT)) # Type casting in case OTHERIDENT is a slightly different type (uint vs int)
+    result = gState.processParenthesizedExpr(node, typeofNode)
+  of "sizeof_expression":
+    # Input -> sizeof(char)
+    # Output -> sizeof(cchar)
+    result = gState.processSizeofExpression(node, typeofNode)
+  # binary_expression from the new treesitter upgrade should work here
+  # once we upgrade
+  of "math_expression", "logical_expression", "relational_expression",
+     "bitwise_expression", "equality_expression", "binary_expression",
+     "shift_expression":
+    # Input -> a == b, a != b, !a, ~a, a < b, a > b, a <= b, a >= b, a >> b, a << b
+    # Output ->
+    #   typeof(a)(a == typeof(a)(b))
+    #   typeof(a)(a != typeof(a)(b))
+    #   (not a)
+    #   (not a)
+    #   typeof(a)(a < typeof(a)(b))
+    #   typeof(a)(a > typeof(a)(b))
+    #   typeof(a)(a <= typeof(a)(b))
+    #   typeof(a)(a >= typeof(a)(b))
+    #   a shr typeof(a)(b)
+    #   a shl typeof(a)(b)
+    result = gState.processUnaryOrBinaryExpression(node, typeofNode)
+  of "cast_expression":
+    # Input -> (int) a
+    # Output -> cast[cint](a)
+    result = gState.processCastExpression(node, typeofNode)
+  # Why are these node types named true/false?
+  of "true", "false":
+    # Input -> true, false
+    # Output -> true, false
+    result = gState.parseString(node.val)
+  of "type_descriptor", "sized_type_specifier":
+    # Input -> int, unsigned int, long int, etc
+    # Output -> cint, cuint, clong, etc
+    let ty = getType(node.val)
+    if ty.len > 0:
+      # If ty is not empty, one of C's builtin types has been found
+      result = gState.getExprIdent(ty, nskType, parent=node.getName())
+    else:
+      result = gState.getExprIdent(node.val, nskType, parent=node.getName())
+    if result.kind == nkNone:
+      raise newException(ExprParseError, &"Missing type specifier \"{node.val}\"")
+  of "identifier":
+    # Input -> IDENT
+    # Output -> IDENT (if found in sym table, else error)
+    result = gState.getExprIdent(node, parent=node.getName())
+    if result.kind == nkNone:
+      raise newException(ExprParseError, &"Missing identifier \"{node.val}\"")
+  of "comment":
+    discard
+  else:
+    raise newException(ExprParseError, &"Unsupported node type \"{nodeName}\" for node \"{node.val}\"")
+
+  decho "NODE RESULT: ", result
+
+proc parseCExpression*(gState: State, codeRoot: TSNode, name = ""): PNode =
+  ## Parse a c expression from a root ts node
+
+  # This var is used for keeping track of the type of the first
+  # symbol used for type casting
+  var tnode: PNode = nil
+  result = newNode(nkNone)
+  try:
+    result = gState.processTSNode(codeRoot, tnode)
+  except ExprParseError as e:
+    decho e.msg
+    result = newNode(nkNone)
+  except Exception as e:
+    decho "UNEXPECTED EXCEPTION: ", e.msg
+    result = newNode(nkNone)
+
+proc parseCExpression*(gState: State, code: string, name = ""): PNode =
+  ## Convert the C string to a nim PNode tree
+  gState.currentExpr = code
+  gState.currentTyCastName = name
+
+  withCodeAst(gState.currentExpr, gState.mode):
+    result = gState.parseCExpression(root, name)
+
+  # Clear the state
+  gState.currentExpr = ""
+  gState.currentTyCastName = ""
+\ No newline at end of file
diff --git a/nimterop/getters.nim b/nimterop/getters.nim
index 2d8d9bb..121c8d5 100644
--- a/nimterop/getters.nim
+++ b/nimterop/getters.nim
@@ -221,16 +221,19 @@ proc len*(node: TSNode): int =
     result = node.tsNodeNamedChildCount().int
 
 proc `[]`*(node: TSNode, i: SomeInteger): TSNode =
-  if i < node.len:
+  if i < type(i)(node.len()):
     result = node.tsNodeNamedChild(i.uint32)
 
 proc getName*(node: TSNode): string {.inline.} =
   if not node.isNil:
     return $node.tsNodeType()
 
-proc getNodeVal*(gState: State, node: TSNode): string =
+proc getNodeVal*(code: var string, node: TSNode): string =
   if not node.isNil:
-    return gState.code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip()
+    return code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip()
+
+proc getNodeVal*(gState: State, node: TSNode): string =
+  gState.code.getNodeVal(node)
 
 proc getAtom*(node: TSNode): TSNode =
   if not node.isNil:
@@ -349,13 +352,16 @@ proc inChildren*(node: TSNode, ntype: string): bool =
       result = true
       break
 
-proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] =
+proc getLineCol*(code: var string, node: TSNode): tuple[line, col: int] =
   # Get line number and column info for node
   let
     point = node.tsNodeStartPoint()
   result.line = point.row.int + 1
   result.col = point.column.int + 1
 
+proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] =
+  getLineCol(gState.code, node)
+
 proc getTSNodeNamedChildCountSansComments*(node: TSNode): int =
   for i in 0 ..< node.len:
     if node.getName() != "comment":
@@ -374,7 +380,7 @@ proc getPxName*(node: TSNode, offset: int): string =
   if count == offset and not np.isNil:
     return np.getName()
 
-proc printLisp*(gState: State, root: TSNode): string =
+proc printLisp*(code: var string, root: TSNode): string =
   var
     node = root
     nextnode: TSNode
@@ -384,18 +390,18 @@ proc printLisp*(gState: State, root: TSNode): string =
     if not node.isNil and depth > -1:
       result &= spaces(depth)
       let
-        (line, col) = gState.getLineCol(node)
+        (line, col) = code.getLineCol(node)
       result &= &"({$node.tsNodeType()} {line} {col} {node.tsNodeEndByte() - node.tsNodeStartByte()}"
       let
-        val = gState.getNodeVal(node)
+        val = code.getNodeVal(node)
       if "\n" notin val and " " notin val:
         result &= &" \"{val}\""
     else:
       break
 
-    if node.tsNodeNamedChildCount() != 0:
+    if node.len() != 0:
       result &= "\n"
-      nextnode = node.tsNodeNamedChild(0)
+      nextnode = node[0]
       depth += 1
     else:
       result &= ")\n"
@@ -419,21 +425,24 @@ proc printLisp*(gState: State, root: TSNode): string =
     if node == root:
       break
 
+proc printLisp*(gState: State, root: TSNode): string =
+  printLisp(gState.code, root)
+
 proc getCommented*(str: string): string =
   "\n# " & str.strip().replace("\n", "\n# ")
 
 proc printTree*(gState: State, pnode: PNode, offset = ""): string =
-  if gState.debug and pnode.kind != nkNone:
+  if not pnode.isNil and gState.debug and pnode.kind != nkNone:
     result &= "\n# " & offset & $pnode.kind & "("
     case pnode.kind
     of nkCharLit:
-      result &= "'" & pnode.intVal.char & "')"
+      result &= ($pnode.intVal.char).escape & ")"
     of nkIntLit..nkUInt64Lit:
       result &= $pnode.intVal & ")"
     of nkFloatLit..nkFloat128Lit:
       result &= $pnode.floatVal & ")"
     of nkStrLit..nkTripleStrLit:
-      result &= "\"" & pnode.strVal & "\")"
+      result &= pnode.strVal.escape & ")"
     of nkSym:
       result &= $pnode.sym & ")"
     of nkIdent:
@@ -452,13 +461,13 @@ proc printTree*(gState: State, pnode: PNode, offset = ""): string =
 
 proc printDebug*(gState: State, node: TSNode) =
   if gState.debug:
-    gecho ("Input => " & gState.getNodeVal(node)).getCommented() & "\n" &
-          gState.printLisp(node).getCommented()
+    gecho ("Input => " & gState.getNodeVal(node)).getCommented()
+    gecho gState.printLisp(node).getCommented()
 
 proc printDebug*(gState: State, pnode: PNode) =
-  if gState.debug:
-    gecho ("Output => " & $pnode).getCommented() & "\n" &
-          gState.printTree(pnode)
+  if gState.debug and pnode.kind != nkNone:
+    gecho ("Output => " & $pnode).getCommented()
+    gecho gState.printTree(pnode)
 
 # Compiler shortcuts
 
diff --git a/nimterop/globals.nim b/nimterop/globals.nim
index f159124..5db17a3 100644
--- a/nimterop/globals.nim
+++ b/nimterop/globals.nim
@@ -1,4 +1,4 @@
-import sequtils, sets, tables
+import sequtils, sets, tables, strutils
 
 import regex
 
@@ -76,6 +76,11 @@ type
     # All const names for enum casting
     constIdentifiers*: HashSet[string]
 
+    # All symbols that have been skipped due to
+    # being unwrappable or the user provided
+    # override is blank
+    skippedSyms*: HashSet[string]
+
     # Legacy ast fields, remove when ast2 becomes default
     constStr*, enumStr*, procStr*, typeStr*: string
 
@@ -93,6 +98,9 @@ type
 
     currentHeader*, impShort*, sourceFile*: string
 
+    # Used for the exprparser.nim module
+    currentExpr*, currentTyCastName*: string
+
     data*: seq[tuple[name, val: string]]
 
     nodeBranch*: seq[string]
@@ -113,12 +121,12 @@ when not declared(CIMPORT):
   export gAtoms, gExpressions, gEnumVals, Kind, Ast, AstTable, State, nBl, Bl
 
   # Redirect output to file when required
-  template gecho*(args: string) {.dirty.} =
+  template gecho*(args: string) =
     if gState.outputHandle.isNil:
       echo args
     else:
       gState.outputHandle.writeLine(args)
 
-  template decho*(str: untyped): untyped =
+  template decho*(args: varargs[string, `$`]): untyped =
     if gState.debug:
-      gecho str.getCommented()
+      gecho join(args, "").getCommented()
+\ No newline at end of file
diff --git a/nimterop/toast.nim b/nimterop/toast.nim
index ab44970..98045bf 100644
--- a/nimterop/toast.nim
+++ b/nimterop/toast.nim
@@ -2,16 +2,11 @@ import os, osproc, strformat, strutils, tables, times
 
 import "."/treesitter/[api, c, cpp]
 
-import "."/[ast, ast2, globals, getters, grammar, build]
+import "."/[ast, ast2, globals, getters, grammar, build, tshelp]
 
 proc process(gState: State, path: string, astTable: AstTable) =
   doAssert existsFile(path), &"Invalid path {path}"
 
-  var parser = tsParserNew()
-
-  defer:
-    parser.tsParserDelete()
-
   if gState.mode.Bl:
     gState.mode = getCompilerMode(path)
 
@@ -20,31 +15,16 @@ proc process(gState: State, path: string, astTable: AstTable) =
   else:
     gState.code = readFile(path)
 
-  doAssert gState.code.nBl, "Empty file or preprocessor error"
-
-  if gState.mode == "c":
-    doAssert parser.tsParserSetLanguage(treeSitterC()), "Failed to load C parser"
-  elif gState.mode == "cpp":
-    doAssert parser.tsParserSetLanguage(treeSitterCpp()), "Failed to load C++ parser"
-  else:
-    doAssert false, &"Invalid parser {gState.mode}"
-
-  var
-    tree = parser.tsParserParseString(nil, gState.code.cstring, gState.code.len.uint32)
-    root = tree.tsTreeRootNode()
-
-  defer:
-    tree.tsTreeDelete()
-
-  if gState.past:
-    gecho gState.printLisp(root)
-  elif gState.pnim:
-    if Feature.ast2 in gState.feature:
-      ast2.parseNim(gState, path, root)
-    else:
-      ast.parseNim(gState, path, root, astTable)
-  elif gState.preprocess:
-    gecho gState.code
+  withCodeAst(gState.code, gState.mode):
+    if gState.past:
+      gecho gState.printLisp(root)
+    elif gState.pnim:
+      if Feature.ast2 in gState.feature:
+        ast2.parseNim(gState, path, root)
+      else:
+        ast.parseNim(gState, path, root, astTable)
+    elif gState.preprocess:
+      gecho gState.code
 
 # CLI processing with default values
 proc main(
diff --git a/nimterop/tshelp.nim b/nimterop/tshelp.nim
new file mode 100644
index 0000000..109321c
--- /dev/null
+++ b/nimterop/tshelp.nim
@@ -0,0 +1,28 @@
+import "."/treesitter/[c, cpp]
+
+template withCodeAst*(code: string, mode: string, body: untyped): untyped =
+  ## A simple template to inject the TSNode into a body of code
+  mixin treeSitterC
+  mixin treeSitterCpp
+
+  var parser = tsParserNew()
+  defer:
+    parser.tsParserDelete()
+
+  doAssert code.nBl, "Empty code or preprocessor error"
+
+  if mode == "c":
+    doAssert parser.tsParserSetLanguage(treeSitterC()), "Failed to load C parser"
+  elif mode == "cpp":
+    doAssert parser.tsParserSetLanguage(treeSitterCpp()), "Failed to load C++ parser"
+  else:
+    doAssert false, "Invalid parser " & mode
+
+  var
+    tree = parser.tsParserParseString(nil, code.cstring, code.len.uint32)
+    root {.inject.} = tree.tsTreeRootNode()
+
+  body
+
+  defer:
+    tree.tsTreeDelete()
+\ No newline at end of file
diff --git a/tests/include/tast2.h b/tests/include/tast2.h
index bdf8823..b47a801 100644
--- a/tests/include/tast2.h
+++ b/tests/include/tast2.h
@@ -8,6 +8,42 @@ extern "C" {
 #define D "hello"
 #define E 'c'
 
+#define UEXPR (1234u << 1)
+#define ULEXPR (1234ul << 2)
+#define ULLEXPR (1234ull << 3)
+#define LEXPR (1234l << 4)
+#define LLEXPR (1234ll << 5)
+
+#define SHL1 (1u << 1)
+#define SHL2 (1u << 2)
+#define SHL3 (1u << 3)
+#define COERCE 645635634896ull + 35436
+#define COERCE2 645635634896 + 35436ul
+#define BINEXPR ~(-(1u << !-1)) ^ (10 >> 1)
+#define BOOL true
+#define MATHEXPR (1 + 2/3*20 - 100)
+#define ANDEXPR (100 & 11000)
+#define CASTEXPR (char) 34
+#define AVAL 100
+#define BVAL 200
+#define EQ1 AVAL <= BVAL
+#define EQ2 AVAL >= BVAL
+#define EQ3 AVAL > BVAL
+#define EQ4 AVAL < BVAL
+#define EQ5 AVAL != BVAL
+#define EQ6 AVAL == BVAL
+
+#define SIZEOF sizeof(char)
+#define REG_STR "regular string"
+#define NOTSUPPORTEDSTR "not a " REG_STR
+
+#define NULLCHAR '\0'/* comments should not break things*/
+#define OCTCHAR '\012' // nor should this comment
+#define HEXCHAR '\xFE'
+#define TRICKYSTR "\x4E\034\nfoo\0\'\"\r\v\a\b\e\f\t\\\?bar"
+
+#define ALLSHL (SHL1 | SHL2 | SHL3)
+
 struct A0;
 struct A1 {};
 typedef struct A2;
diff --git a/tests/tast2.nim b/tests/tast2.nim
index e13c4ac..4cfbeac 100644
--- a/tests/tast2.nim
+++ b/tests/tast2.nim
@@ -3,6 +3,10 @@ import macros, os, sets, strutils
 import nimterop/[cimport]
 
 static:
+  # Skip casting on lower nim compilers because
+  # the VM does not support it
+  when (NimMajor, NimMinor, NimPatch) < (1, 0, 0):
+    cSkipSymbol @["CASTEXPR"]
   cDebug()
 
 const
@@ -93,11 +97,11 @@ macro testFields(t: typed, fields: static[string] = "") =
     for i in 0 ..< rl.len:
       let
         name = ($rl[i][0]).strip(chars = {'*'})
-        typ = ($(rl[i][1].repr())).replace("\n", "").replace("  ", "")
+        typ = ($(rl[i][1].repr())).replace("\n", "").replace("  ", "").replace("typeof", "type")
         n = names.find(name)
       assert n != -1, $t & "." & name & " invalid"
-      assert types[n] == typ,
-        "typeof(" & $t & ":" & name & ") != " & types[n] & ", is " & typ
+      assert types[n].replace("typeof", "type") == typ,
+        "typeof(" & $t & ":" & name & ") != " & types[n].replace("typeof", "type") & ", is " & typ
 
 assert A == 2
 assert B == 1.0
@@ -105,6 +109,48 @@ assert C == 0x10
 assert D == "hello"
 assert E == 'c'
 
+assert not defined(NOTSUPPORTEDSTR)
+
+assert UEXPR == (1234.uint shl 1)
+assert ULEXPR == (1234.uint32 shl 2)
+assert ULLEXPR == (1234.uint64 shl 3)
+assert LEXPR == (1234.int32 shl 4)
+assert LLEXPR == (1234.int64 shl 5)
+
+assert AVAL == 100
+assert BVAL == 200
+
+assert EQ1 == (AVAL <= BVAL)
+assert EQ2 == (AVAL >= BVAL)
+assert EQ3 == (AVAL > BVAL)
+assert EQ4 == (AVAL < BVAL)
+assert EQ5 == (AVAL != BVAL)
+assert EQ6 == (AVAL == BVAL)
+
+assert SIZEOF == 1
+
+assert COERCE == 645635670332'u64
+assert COERCE2 == 645635670332'i64
+
+assert BINEXPR == 5
+assert BOOL == true
+assert MATHEXPR == -99
+assert ANDEXPR == 96
+
+when (NimMajor, NimMinor, NimPatch) >= (1, 0, 0):
+  assert CASTEXPR == 34.chr
+
+assert TRICKYSTR == "N\x1C\nfoo\x00\'\"\c\v\a\b\e\f\t\\\\?bar"
+assert NULLCHAR == '\0'
+assert OCTCHAR == '\n'
+assert HEXCHAR.int == 0xFE
+
+assert SHL1 == (1.uint shl 1)
+assert SHL2 == (1.uint shl 2)
+assert SHL3 == (1.uint shl 3)
+
+assert ALLSHL == (SHL1 or SHL2 or SHL3)
+
 assert A0 is object
 testFields(A0, "f1!cint")
 checkPragmas(A0, pHeaderBy, istype = false)
@@ -271,7 +317,7 @@ var a21p: A21p
 a21p = addr a20
 
 assert A22 is object
-testFields(A22, "f1|f2!ptr ptr cint|array[123 + 132, ptr cint]")
+testFields(A22, "f1|f2!ptr ptr cint|array[123 + type(123)(132), ptr cint]")
 checkPragmas(A22, pHeaderBy, istype = false)
 var a22: A22
 a22.f1 = addr a15.a2[0]
@@ -427,4 +473,4 @@ checkPragmas(nested, pHeaderImpBy)
 
 when defined(HEADER):
   assert sitest1(5) == 10
-  assert sitest1(10) == 20
-\ No newline at end of file
+  assert sitest1(10) == 20
diff --git a/tests/tmath.nim b/tests/tmath.nim
index 5d84700..b8477c1 100644
--- a/tests/tmath.nim
+++ b/tests/tmath.nim
@@ -13,6 +13,12 @@ when defined(windows):
       complex = object
 
 static:
+  when (NimMajor, NimMinor, NimPatch) < (1, 0, 0):
+    # FP_ILOGB0 and FP_ILOGBNAN are casts that are unsupported
+    # on lower Nim VMs
+    cSkipSymbol @["math_errhandling", "FP_ILOGB0", "FP_ILOGBNAN"]
+  else:
+    cSkipSymbol @["math_errhandling"]
   cDebug()
   cDisableCaching()
   cAddStdDir()
author	genotrance <dev@genotrance.com>	2020-04-26 11:29:27 -0500
committer	GitHub <noreply@github.com>	2020-04-26 11:29:27 -0500
commit	73ef7c4ccdcdc0934280f002af23ba663bf38426 (patch)
tree	facee03b526d1991cdd25f1742e7f9a841cfc899
parent	43dd43e3183178e71abd3319290c566cb4dd80a9 (diff)
parent	89c10c4b25226a88062d01e6bf57a9e9146920c5 (diff)
download	nimterop-73ef7c4ccdcdc0934280f002af23ba663bf38426.tar.gz nimterop-73ef7c4ccdcdc0934280f002af23ba663bf38426.zip