aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgenotrance <dev@genotrance.com>2020-04-26 11:29:27 -0500
committerGitHub <noreply@github.com>2020-04-26 11:29:27 -0500
commit73ef7c4ccdcdc0934280f002af23ba663bf38426 (patch)
treefacee03b526d1991cdd25f1742e7f9a841cfc899
parent43dd43e3183178e71abd3319290c566cb4dd80a9 (diff)
parent89c10c4b25226a88062d01e6bf57a9e9146920c5 (diff)
downloadnimterop-73ef7c4ccdcdc0934280f002af23ba663bf38426.tar.gz
nimterop-73ef7c4ccdcdc0934280f002af23ba663bf38426.zip
Merge pull request #191 from jyapayne/poc_print_ast
[ast2] Enable parsing some C Macro expressions to Nim expressions using treesitter
-rw-r--r--nimterop/ast2.nim112
-rw-r--r--nimterop/comphelp.nim18
-rw-r--r--nimterop/exprparser.nim587
-rw-r--r--nimterop/getters.nim43
-rw-r--r--nimterop/globals.nim16
-rw-r--r--nimterop/toast.nim42
-rw-r--r--nimterop/tshelp.nim28
-rw-r--r--tests/include/tast2.h36
-rw-r--r--tests/tast2.nim56
-rw-r--r--tests/tmath.nim6
10 files changed, 822 insertions, 122 deletions
diff --git a/nimterop/ast2.nim b/nimterop/ast2.nim
index ce45f1d..474ec69 100644
--- a/nimterop/ast2.nim
+++ b/nimterop/ast2.nim
@@ -1,12 +1,12 @@
import macros, os, sequtils, sets, strformat, strutils, tables, times
-import regex
+import options as opts
-import compiler/[ast, idents, lineinfos, modulegraphs, msgs, options, parser, renderer]
+import compiler/[ast, idents, lineinfos, modulegraphs, msgs, options, renderer]
import "."/treesitter/api
-import "."/[globals, getters]
+import "."/[globals, getters, exprparser, comphelp, tshelp]
proc getPtrType*(str: string): string =
result = case str:
@@ -19,55 +19,6 @@ proc getPtrType*(str: string): string =
else:
str
-proc handleError*(conf: ConfigRef, info: TLineInfo, msg: TMsgKind, arg: string) =
- # Raise exception in parseString() instead of exiting for errors
- if msg < warnMin:
- raise newException(Exception, msgKindToString(msg))
-
-proc parseString(gState: State, str: string): PNode =
- # Parse a string into Nim AST - use custom error handler that raises
- # an exception rather than exiting on failure
- try:
- result = parseString(
- str, gState.identCache, gState.config, errorHandler = handleError
- )
- except:
- decho getCurrentExceptionMsg()
-
-proc getLit*(gState: State, str: string, expression = false): PNode =
- # Used to convert #define literals into const and expressions
- # in array sizes
- #
- # `expression` is true when `str` should be converted into a Nim expression
- let
- str = str.replace(re"/[/*].*?(?:\*/)?$", "").strip()
-
- if str.contains(re"^[\-]?[\d]+$"): # decimal
- result = newIntNode(nkIntLit, parseInt(str))
-
- elif str.contains(re"^[\-]?[\d]*[.]?[\d]+$"): # float
- result = newFloatNode(nkFloatLit, parseFloat(str))
-
- elif str.contains(re"^0x[\da-fA-F]+$"): # hexadecimal
- result = gState.parseString(str)
-
- elif str.contains(re"^'[[:ascii:]]'$"): # char
- result = newNode(nkCharLit)
- result.intVal = str[1].int64
-
- elif str.contains(re"""^"[[:ascii:]]+"$"""): # char *
- result = newStrNode(nkStrLit, str[1 .. ^2])
-
- else:
- let
- str =
- if expression: gState.getNimExpression(str)
- else: str
- result = gState.parseString(str)
-
- if result.isNil:
- result = newNode(nkNilLit)
-
proc getOverrideOrSkip(gState: State, node: TSNode, origname: string, kind: NimSymKind): PNode =
# Check if symbol `origname` of `kind` and `origname` has any cOverride defined
# and use that if present
@@ -90,6 +41,7 @@ proc getOverrideOrSkip(gState: State, node: TSNode, origname: string, kind: NimS
result = pnode[0][0]
else:
gecho &"\n# $1'{origname}' skipped" % skind
+ gState.skippedSyms.incl origname
if gState.debug:
gState.skipStr &= &"\n{gState.getNodeVal(node)}"
@@ -148,15 +100,36 @@ proc newConstDef(gState: State, node: TSNode, fname = "", fval = ""): PNode =
fval
else:
gState.getNodeVal(node[1])
- valident =
- gState.getLit(val)
+
+ var valident = newNode(nkNone)
+
+ withCodeAst(val, gState.mode):
+ # This section is a hack for determining that the first
+ # node is a type, which shouldn't be accepted by a const
+ # def section. Need to replace this with some other mechanism
+ # to handle type aliases
+ var maybeTyNode: TSNode
+ # Take the very first node, which may be 2 levels
+ # down if there is an error node
+ if root.len > 0 and root[0].getName() == "ERROR":
+ maybeTyNode = root[0][0]
+ elif root.len > 0:
+ maybeTyNode = root[0]
+
+ if not maybeTyNode.isNil:
+ let name = maybeTyNode.getName()
+ case name
+ of "type_descriptor", "sized_type_specifier":
+ discard
+ else:
+ # Can't do gState.parseCExpression(root) here for some reason?
+ # get a SEGFAULT if we use root
+ valident = gState.parseCExpression(val)
if name.Bl:
# Name skipped or overridden since blank
result = gState.getOverrideOrSkip(node, origname, nskConst)
- elif valident.kind in {nkCharLit .. nkStrLit} or
- (valident.kind == nkStmtList and valident.len > 0 and
- valident[0].kind in {nkCharLit .. nkStrLit}):
+ elif valident.kind != nkNone:
if gState.addNewIdentifer(name):
# const X* = Y
#
@@ -180,6 +153,7 @@ proc newConstDef(gState: State, node: TSNode, fname = "", fval = ""): PNode =
gecho &"# const '{origname}' is duplicate, skipped"
else:
gecho &"# const '{origname}' has invalid value '{val}'"
+ gState.skippedSyms.incl origname
proc addConst(gState: State, node: TSNode) =
# Add a const to the AST
@@ -1012,8 +986,8 @@ proc getTypeArray(gState: State, node: TSNode, tident: PNode, name: string): PNo
# type name[X] => array[X, type]
let
# Size of array could be a Nim expression
- size = gState.getLit(gState.getNodeVal(cnode[1]), expression = true)
- if size.kind != nkNilLit:
+ size = gState.parseCExpression(gState.getNodeVal(cnode[1]))
+ if size.kind != nkNone:
result = gState.newArrayTree(cnode, result, size)
cnode = cnode[0]
elif cnode.len == 1:
@@ -1417,6 +1391,9 @@ proc addEnum(gState: State, node: TSNode) =
# Create const for fields
var
fnames: HashSet[string]
+ # Hold all of field information so that we can add all of them
+ # after the const identifiers has been updated
+ fieldDeclarations: seq[tuple[fname: string, fval: string, cexpr: Option[TSNode]]]
for i in 0 .. enumlist.len - 1:
let
en = enumlist[i]
@@ -1435,20 +1412,25 @@ proc addEnum(gState: State, node: TSNode) =
fval = &"({prev} + 1).{name}"
if en.len > 1 and en[1].getName() in gEnumVals:
- # Explicit value
- fval = "(" & gState.getNimExpression(gState.getNodeVal(en[1]), name) & ")." & name
-
- # Cannot use newConstDef() since parseString(fval) adds backticks to and/or
- gState.constSection.add gState.parseString(&"const {fname}* = {fval}")[0][0]
+ fieldDeclarations.add((fname, "", some(en[1])))
+ else:
+ fieldDeclarations.add((fname, fval, none(TSNode)))
fnames.incl fname
-
prev = fname
# Add fields to list of consts after processing enum so that we don't cast
# enum field to itself
gState.constIdentifiers.incl fnames
+ # parseCExpression requires all const identifiers to be present for the enum
+ for (fname, fval, cexprNode) in fieldDeclarations:
+ var fval = fval
+ if cexprNode.isSome:
+ fval = "(" & $gState.parseCExpression(gState.getNodeVal(cexprNode.get()), name) & ")." & name
+ # Cannot use newConstDef() since parseString(fval) adds backticks to and/or
+ gState.constSection.add gState.parseString(&"const {fname}* = {fval}")[0][0]
+
# Add other names
if node.getName() == "type_definition" and node.len > 1:
gState.addTypeTyped(node, ftname = name, offset = offset)
diff --git a/nimterop/comphelp.nim b/nimterop/comphelp.nim
new file mode 100644
index 0000000..1709f8b
--- /dev/null
+++ b/nimterop/comphelp.nim
@@ -0,0 +1,18 @@
+import compiler/[ast, lineinfos, msgs, options, parser, renderer]
+
+import "."/[globals, getters]
+
+proc handleError*(conf: ConfigRef, info: TLineInfo, msg: TMsgKind, arg: string) =
+ # Raise exception in parseString() instead of exiting for errors
+ if msg < warnMin:
+ raise newException(Exception, msgKindToString(msg))
+
+proc parseString*(gState: State, str: string): PNode =
+ # Parse a string into Nim AST - use custom error handler that raises
+ # an exception rather than exiting on failure
+ try:
+ result = parseString(
+ str, gState.identCache, gState.config, errorHandler = handleError
+ )
+ except:
+ decho getCurrentExceptionMsg() \ No newline at end of file
diff --git a/nimterop/exprparser.nim b/nimterop/exprparser.nim
new file mode 100644
index 0000000..c74f0b6
--- /dev/null
+++ b/nimterop/exprparser.nim
@@ -0,0 +1,587 @@
+import strformat, strutils, macros, sets
+
+import regex
+
+import compiler/[ast, renderer]
+
+import "."/treesitter/[api, c, cpp]
+
+import "."/[globals, getters, comphelp, tshelp]
+
+# This version of exprparser should be able to handle:
+#
+# All integers + integer like expressions (hex, octal, suffixes)
+# All floating point expressions (except for C++'s hex floating point stuff)
+# Strings and character literals, including C's escape characters (not sure if this is the same as C++'s escape characters or not)
+# Math operators (+, -, /, *)
+# Some Unary operators (-, !, ~). ++, --, and & are yet to be implemented
+# Any identifiers
+# C type descriptors (int, char, etc)
+# Boolean values (true, false)
+# Shift expressions (containing anything in this list)
+# Cast expressions (containing anything in this list)
+# Math expressions (containing anything in this list)
+# Sizeof expressions (containing anything in this list)
+# Cast expressions (containing anything in this list)
+# Parentheses expressions (containing anything in this list)
+# Expressions containing other expressions
+#
+# In addition to the above, it should also handle most type coercions, except
+# for where Nim can't (such as uint + -int)
+
+type
+ ExprParseError* = object of CatchableError
+
+template val(node: TSNode): string =
+ gState.currentExpr.getNodeVal(node)
+
+proc printDebugExpr*(gState: State, node: TSNode) =
+ if gState.debug:
+ gecho ("Input => " & node.val).getCommented()
+ gecho gState.currentExpr.printLisp(node).getCommented()
+
+proc getExprIdent*(gState: State, identName: string, kind = nskConst, parent = ""): PNode =
+ ## Gets a cPlugin transformed identifier from `identName`
+ ##
+ ## Returns PNode(nkNone) if the identifier is blank
+ result = newNode(nkNone)
+ if identName notin gState.skippedSyms:
+ var ident = identName
+ if ident != "_":
+ # Process the identifier through cPlugin
+ ident = gState.getIdentifier(ident, kind, parent)
+ if kind == nskType:
+ result = gState.getIdent(ident)
+ elif ident.nBl and ident in gState.constIdentifiers:
+ if gState.currentTyCastName.nBl:
+ ident = ident & "." & gState.currentTyCastName
+ result = gState.getIdent(ident)
+
+proc getExprIdent*(gState: State, node: TSNode, kind = nskConst, parent = ""): PNode =
+ ## Gets a cPlugin transformed identifier from `identName`
+ ##
+ ## Returns PNode(nkNone) if the identifier is blank
+ gState.getExprIdent(node.val, kind, parent)
+
+proc parseChar(charStr: string): uint8 {.inline.} =
+ ## Parses a character literal out of a string. This is needed
+ ## because treesitter gives unescaped characters when parsing
+ ## strings.
+ if charStr.len == 1:
+ return charStr[0].uint8
+
+ # Handle octal, hex, unicode?
+ if charStr.startsWith("\\x"):
+ result = parseHexInt(charStr.replace("\\x", "0x")).uint8
+ elif charStr.len == 4: # Octal
+ result = parseOctInt("0o" & charStr[1 ..< charStr.len]).uint8
+
+ if result == 0:
+ case charStr
+ of "\\0":
+ result = ord('\0')
+ of "\\a":
+ result = 0x07
+ of "\\b":
+ result = 0x08
+ of "\\e":
+ result = 0x1B
+ of "\\f":
+ result = 0x0C
+ of "\\n":
+ result = '\n'.uint8
+ of "\\r":
+ result = 0x0D
+ of "\\t":
+ result = 0x09
+ of "\\v":
+ result = 0x0B
+ of "\\\\":
+ result = 0x5C
+ of "\\'":
+ result = '\''.uint8
+ of "\\\"":
+ result = '\"'.uint8
+ of "\\?":
+ result = 0x3F
+ else:
+ discard
+
+ if result > uint8.high:
+ result = uint8.high
+
+proc getCharLit(charStr: string): PNode {.inline.} =
+ ## Convert a character string into a proper Nim char lit node
+ result = newNode(nkCharLit)
+ result.intVal = parseChar(charStr).int64
+
+proc getFloatNode(number, suffix: string): PNode {.inline.} =
+ ## Get a Nim float node from a C float expression + suffix
+ let floatSuffix = number[number.len-1]
+ try:
+ case floatSuffix
+ of 'l', 'L':
+ # TODO: handle long double (128 bits)
+ # result = newNode(nkFloat128Lit)
+ result = newFloatNode(nkFloat64Lit, parseFloat(number[0 ..< number.len - 1]))
+ of 'f', 'F':
+ result = newFloatNode(nkFloat64Lit, parseFloat(number[0 ..< number.len - 1]))
+ else:
+ result = newFloatNode(nkFloatLit, parseFloat(number))
+ except ValueError:
+ raise newException(ExprParseError, &"Could not parse float value \"{number}\".")
+
+proc getIntNode(number, suffix: string): PNode {.inline.} =
+ ## Get a Nim int node from a C integer expression + suffix
+ case suffix
+ of "u", "U":
+ result = newNode(nkUintLit)
+ of "l", "L":
+ result = newNode(nkInt32Lit)
+ of "ul", "UL":
+ result = newNode(nkUint32Lit)
+ of "ll", "LL":
+ result = newNode(nkInt64Lit)
+ of "ull", "ULL":
+ result = newNode(nkUint64Lit)
+ else:
+ result = newNode(nkIntLit)
+
+ # I realize these regex are wasteful on performance, but
+ # couldn't come up with a better idea.
+ if number.contains(re"0[xX]"):
+ result.intVal = parseHexInt(number)
+ result.flags = {nfBase16}
+ elif number.contains(re"0[bB]"):
+ result.intVal = parseBinInt(number)
+ result.flags = {nfBase2}
+ elif number.contains(re"0[oO]"):
+ result.intVal = parseOctInt(number)
+ result.flags = {nfBase8}
+ else:
+ result.intVal = parseInt(number)
+
+proc getNumNode(number, suffix: string): PNode {.inline.} =
+ ## Convert a C number to a Nim number PNode
+ if number.contains("."):
+ getFloatNode(number, suffix)
+ else:
+ getIntNode(number, suffix)
+
+proc processNumberLiteral(gState: State, node: TSNode): PNode =
+ ## Parse a number literal from a TSNode. Can be a float, hex, long, etc
+ result = newNode(nkNone)
+ let nodeVal = node.val
+
+ var match: RegexMatch
+ const reg = re"(\-)?(0\d+|0[xX][0-9a-fA-F]+|0[bB][01]+|\d+\.\d*[fFlL]?|\d*\.\d+[fFlL]?|\d+)([ulUL]*)"
+ let found = nodeVal.find(reg, match)
+ if found:
+ let
+ prefix = if match.group(0).len > 0: nodeVal[match.group(0)[0]] else: ""
+ number = nodeVal[match.group(1)[0]]
+ suffix = nodeVal[match.group(2)[0]]
+
+ result = getNumNode(number, suffix)
+
+ if result.kind != nkNone and prefix == "-":
+ result = nkPrefix.newTree(
+ gState.getIdent("-"),
+ result
+ )
+ else:
+ raise newException(ExprParseError, &"Could not find a number in number_literal: \"{nodeVal}\"")
+
+proc processCharacterLiteral(gState: State, node: TSNode): PNode =
+ # Input => 'G'
+ #
+ # (char_literal 1 1 3 "'G'")
+ #
+ # Output => 'G'
+ #
+ # nkCharLit("G")
+ let val = node.val
+ result = getCharLit(val[1 ..< val.len - 1])
+
+proc processStringLiteral(gState: State, node: TSNode): PNode =
+ # Input => "\n\rfoobar\0\'"
+ #
+ # (string_literal 1 1 16 ""\n\rfoobar\0\'""
+ # (escape_sequence 1 2 2 "\n")
+ # (escape_sequence 1 4 2 "\r")
+ # (escape_sequence 1 12 2 "\0")
+ # (escape_sequence 1 14 2 "\'")
+ # )
+ #
+ # Output => "\n\cfoobar\x00\'"
+ #
+ # nkStrLit("\x0A\x0Dfoobar\x00\'")
+ let
+ nodeVal = node.val
+ strVal = nodeVal[1 ..< nodeVal.len - 1]
+
+ const
+ str = "(\\\\x[[:xdigit:]]{2}|\\\\\\d{3}|\\\\0|\\\\a|\\\\b|\\\\e|\\\\f|\\\\n|\\\\r|\\\\t|\\\\v|\\\\\\\\|\\\\'|\\\\\"|[[:ascii:]])"
+ reg = re(str)
+
+ # Convert the c string escape sequences/etc to Nim chars
+ var nimStr = newStringOfCap(nodeVal.len)
+ for m in strVal.findAll(reg):
+ nimStr.add(parseChar(strVal[m.group(0)[0]]).chr)
+
+ result = newStrNode(nkStrLit, nimStr)
+
+proc processTSNode(gState: State, node: TSNode, typeofNode: var PNode): PNode
+
+proc processParenthesizedExpr(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+ # Input => (a + b)
+ #
+ # (parenthesized_expression 1 1 7
+ # (math_expression 1 2 5
+ # (identifier 1 2 1 "a")
+ # (identifier 1 6 1 "b")
+ # )
+ # )
+ #
+ # Output => (typeof(a)(a + typeof(a)(b)))
+ #
+ # nkPar(
+ # nkCall(
+ # nkCall(
+ # nkIdent("typeof"),
+ # nkIdent("a")
+ # ),
+ # nkInfix(
+ # nkIdent("+"),
+ # nkIdent("a"),
+ # nkCall(
+ # nkCall(
+ # nkIdent("typeof"),
+ # nkIdent("a")
+ # ),
+ # nkIdent("b")
+ # )
+ # )
+ # )
+ # )
+ result = newNode(nkPar)
+ for i in 0 ..< node.len():
+ result.add(gState.processTSNode(node[i], typeofNode))
+
+proc processCastExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+ # Input => (int)a
+ #
+ # (cast_expression 1 1 6 "(int)a"
+ # (type_descriptor 1 2 3 "int"
+ # (primitive_type 1 2 3 "int")
+ # )
+ # (identifier 1 6 1 "a")
+ # )
+ #
+ # Output => cast[cint](a)
+ #
+ # nkCast(
+ # nkIdent("cint"),
+ # nkIdent("a")
+ # )
+ result = nkCast.newTree(
+ gState.processTSNode(node[0], typeofNode),
+ gState.processTSNode(node[1], typeofNode)
+ )
+
+proc getNimUnarySym(csymbol: string): string =
+ ## Get the Nim equivalent of a unary C symbol
+ ##
+ ## TODO: Add ++, --,
+ case csymbol
+ of "+", "-":
+ result = csymbol
+ of "~", "!":
+ result = "not"
+ else:
+ raise newException(ExprParseError, &"Unsupported unary symbol \"{csymbol}\"")
+
+proc getNimBinarySym(csymbol: string): string =
+ case csymbol
+ of "|", "||":
+ result = "or"
+ of "&", "&&":
+ result = "and"
+ of "^":
+ result = "xor"
+ of "==", "!=",
+ "+", "-", "/", "*",
+ ">", "<", ">=", "<=":
+ result = csymbol
+ of "%":
+ result = "mod"
+ of "<<":
+ result = "shl"
+ of ">>":
+ result = "shr"
+ else:
+ raise newException(ExprParseError, &"Unsupported binary symbol \"{csymbol}\"")
+
+proc processBinaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+ # Node has left and right children ie: (2 + 7)
+ #
+ # Input => a == b
+ #
+ # (equality_expression 1 1 6
+ # (identifier 1 1 1 "a")
+ # (identifier 1 6 1 "b")
+ # )
+ #
+ # Output => a == typeof(a)(b)
+ #
+ # nkInfix(
+ # nkIdent("=="),
+ # nkIdent("a"),
+ # nkCall(
+ # nkCall(
+ # nkIdent("typeof"),
+ # nkIdent("a")
+ # ),
+ # nkIdent("b")
+ # )
+ # )
+ result = newNode(nkInfix)
+
+ let
+ left = node[0]
+ right = node[1]
+ binarySym = node.tsNodeChild(1).val.strip()
+ nimSym = getNimBinarySym(binarySym)
+
+ result.add gState.getIdent(nimSym)
+ let leftNode = gState.processTSNode(left, typeofNode)
+
+ if typeofNode.isNil:
+ typeofNode = nkCall.newTree(
+ gState.getIdent("typeof"),
+ leftNode
+ )
+
+ let rightNode = gState.processTSNode(right, typeofNode)
+
+ result.add leftNode
+ result.add nkCall.newTree(
+ typeofNode,
+ rightNode
+ )
+ if binarySym == "/":
+ # Special case. Nim's operators generally output
+ # the same type they take in, except for division.
+ # So we need to emulate C here and cast the whole
+ # expression to the type of the first arg
+ result = nkCall.newTree(
+ typeofNode,
+ result
+ )
+
+proc processUnaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+ # Input => !a
+ #
+ # (logical_expression 1 1 2 "!a"
+ # (identifier 1 2 1 "a")
+ # )
+ #
+ # Output => (not a)
+ #
+ # nkPar(
+ # nkPrefix(
+ # nkIdent("not"),
+ # nkIdent("a")
+ # )
+ # )
+ result = newNode(nkPar)
+
+ let
+ child = node[0]
+ unarySym = node.tsNodeChild(0).val.strip()
+ nimSym = getNimUnarySym(unarySym)
+
+ if nimSym == "-":
+ # Special case. The minus symbol must be in front of an integer,
+ # so we have to make a gentle cast here to coerce it to one.
+ # Might be bad because we are overwriting the type
+ # There's probably a better way of doing this
+ if typeofNode.isNil:
+ typeofNode = gState.getIdent("int64")
+
+ result.add nkPrefix.newTree(
+ gState.getIdent(unarySym),
+ nkPar.newTree(
+ nkCall.newTree(
+ gState.getIdent("int64"),
+ gState.processTSNode(child, typeofNode)
+ )
+ )
+ )
+ else:
+ result.add nkPrefix.newTree(
+ gState.getIdent(nimSym),
+ gState.processTSNode(child, typeofNode)
+ )
+
+proc processUnaryOrBinaryExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+ ## Processes both unary (-1, ~true, !something) and binary (a + b, c * d) expressions
+ if node.len > 1:
+ # Node has left and right children ie: (2 + 7)
+ result = processBinaryExpression(gState, node, typeofNode)
+ elif node.len() == 1:
+ # Node has only one child, ie -(20 + 7)
+ result = processUnaryExpression(gState, node, typeofNode)
+ else:
+ raise newException(ExprParseError, &"Invalid {node.getName()} \"{node.val}\"")
+
+proc processSizeofExpression(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+ # Input => sizeof(int)
+ #
+ # (sizeof_expression 1 1 11 "sizeof(int)"
+ # (type_descriptor 1 8 3 "int"
+ # (primitive_type 1 8 3 "int")
+ # )
+ # )
+ #
+ # Output => sizeof(cint)
+ #
+ # nkCall(
+ # nkIdent("sizeof"),
+ # nkIdent("cint")
+ # )
+ result = nkCall.newTree(
+ gState.getIdent("sizeof"),
+ gState.processTSNode(node[0], typeofNode)
+ )
+
+proc processTSNode(gState: State, node: TSNode, typeofNode: var PNode): PNode =
+ ## Handle all of the types of expressions here. This proc gets called recursively
+ ## in the processX procs and will drill down to sub nodes.
+ result = newNode(nkNone)
+ let nodeName = node.getName()
+
+ decho "NODE: ", nodeName, ", VAL: ", node.val
+
+ case nodeName
+ of "number_literal":
+ # Input -> 0x1234FE, 1231, 123u, 123ul, 123ull, 1.334f
+ # Output -> 0x1234FE, 1231, 123'u, 123'u32, 123'u64, 1.334
+ result = gState.processNumberLiteral(node)
+ of "string_literal":
+ # Input -> "foo\0\x42"
+ # Output -> "foo\0"
+ result = gState.processStringLiteral(node)
+ of "char_literal":
+ # Input -> 'F', '\060' // Octal, '\x5A' // Hex, '\r' // escape sequences
+ # Output -> 'F', '0', 'Z', '\r'
+ result = gState.processCharacterLiteral(node)
+ of "expression_statement", "ERROR", "translation_unit":
+ # Note that we're parsing partial expressions, so the TSNode might contain
+ # an ERROR node. If that's the case, they usually contain children with
+ # partial results, which will contain parsed expressions
+ #
+ # Input (top level statement) -> ((1 + 3 - IDENT) - (int)400.0)
+ # Output -> (1 + typeof(1)(3) - typeof(1)(IDENT) - typeof(1)(cast[int](400.0))) # Type casting in case some args differ
+ if node.len == 1:
+ result = gState.processTSNode(node[0], typeofNode)
+ elif node.len > 1:
+ var nodes: seq[PNode]
+ for i in 0 ..< node.len:
+ let subNode = gState.processTSNode(node[i], typeofNode)
+ if subNode.kind != nkNone:
+ nodes.add(subNode)
+ # Multiple nodes can get tricky. Don't support them yet, unless they
+ # have at most one valid node
+ if nodes.len > 1:
+ raise newException(ExprParseError, &"Node type \"{nodeName}\" with val ({node.val}) has more than one non empty node")
+ if nodes.len == 1:
+ result = nodes[0]
+ else:
+ raise newException(ExprParseError, &"Node type \"{nodeName}\" has no children")
+ of "parenthesized_expression":
+ # Input -> (IDENT - OTHERIDENT)
+ # Output -> (IDENT - typeof(IDENT)(OTHERIDENT)) # Type casting in case OTHERIDENT is a slightly different type (uint vs int)
+ result = gState.processParenthesizedExpr(node, typeofNode)
+ of "sizeof_expression":
+ # Input -> sizeof(char)
+ # Output -> sizeof(cchar)
+ result = gState.processSizeofExpression(node, typeofNode)
+ # binary_expression from the new treesitter upgrade should work here
+ # once we upgrade
+ of "math_expression", "logical_expression", "relational_expression",
+ "bitwise_expression", "equality_expression", "binary_expression",
+ "shift_expression":
+ # Input -> a == b, a != b, !a, ~a, a < b, a > b, a <= b, a >= b, a >> b, a << b
+ # Output ->
+ # typeof(a)(a == typeof(a)(b))
+ # typeof(a)(a != typeof(a)(b))
+ # (not a)
+ # (not a)
+ # typeof(a)(a < typeof(a)(b))
+ # typeof(a)(a > typeof(a)(b))
+ # typeof(a)(a <= typeof(a)(b))
+ # typeof(a)(a >= typeof(a)(b))
+ # a shr typeof(a)(b)
+ # a shl typeof(a)(b)
+ result = gState.processUnaryOrBinaryExpression(node, typeofNode)
+ of "cast_expression":
+ # Input -> (int) a
+ # Output -> cast[cint](a)
+ result = gState.processCastExpression(node, typeofNode)
+ # Why are these node types named true/false?
+ of "true", "false":
+ # Input -> true, false
+ # Output -> true, false
+ result = gState.parseString(node.val)
+ of "type_descriptor", "sized_type_specifier":
+ # Input -> int, unsigned int, long int, etc
+ # Output -> cint, cuint, clong, etc
+ let ty = getType(node.val)
+ if ty.len > 0:
+ # If ty is not empty, one of C's builtin types has been found
+ result = gState.getExprIdent(ty, nskType, parent=node.getName())
+ else:
+ result = gState.getExprIdent(node.val, nskType, parent=node.getName())
+ if result.kind == nkNone:
+ raise newException(ExprParseError, &"Missing type specifier \"{node.val}\"")
+ of "identifier":
+ # Input -> IDENT
+ # Output -> IDENT (if found in sym table, else error)
+ result = gState.getExprIdent(node, parent=node.getName())
+ if result.kind == nkNone:
+ raise newException(ExprParseError, &"Missing identifier \"{node.val}\"")
+ of "comment":
+ discard
+ else:
+ raise newException(ExprParseError, &"Unsupported node type \"{nodeName}\" for node \"{node.val}\"")
+
+ decho "NODE RESULT: ", result
+
+proc parseCExpression*(gState: State, codeRoot: TSNode, name = ""): PNode =
+ ## Parse a c expression from a root ts node
+
+ # This var is used for keeping track of the type of the first
+ # symbol used for type casting
+ var tnode: PNode = nil
+ result = newNode(nkNone)
+ try:
+ result = gState.processTSNode(codeRoot, tnode)
+ except ExprParseError as e:
+ decho e.msg
+ result = newNode(nkNone)
+ except Exception as e:
+ decho "UNEXPECTED EXCEPTION: ", e.msg
+ result = newNode(nkNone)
+
+proc parseCExpression*(gState: State, code: string, name = ""): PNode =
+ ## Convert the C string to a nim PNode tree
+ gState.currentExpr = code
+ gState.currentTyCastName = name
+
+ withCodeAst(gState.currentExpr, gState.mode):
+ result = gState.parseCExpression(root, name)
+
+ # Clear the state
+ gState.currentExpr = ""
+ gState.currentTyCastName = "" \ No newline at end of file
diff --git a/nimterop/getters.nim b/nimterop/getters.nim
index 2d8d9bb..121c8d5 100644
--- a/nimterop/getters.nim
+++ b/nimterop/getters.nim
@@ -221,16 +221,19 @@ proc len*(node: TSNode): int =
result = node.tsNodeNamedChildCount().int
proc `[]`*(node: TSNode, i: SomeInteger): TSNode =
- if i < node.len:
+ if i < type(i)(node.len()):
result = node.tsNodeNamedChild(i.uint32)
proc getName*(node: TSNode): string {.inline.} =
if not node.isNil:
return $node.tsNodeType()
-proc getNodeVal*(gState: State, node: TSNode): string =
+proc getNodeVal*(code: var string, node: TSNode): string =
if not node.isNil:
- return gState.code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip()
+ return code[node.tsNodeStartByte() .. node.tsNodeEndByte()-1].strip()
+
+proc getNodeVal*(gState: State, node: TSNode): string =
+ gState.code.getNodeVal(node)
proc getAtom*(node: TSNode): TSNode =
if not node.isNil:
@@ -349,13 +352,16 @@ proc inChildren*(node: TSNode, ntype: string): bool =
result = true
break
-proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] =
+proc getLineCol*(code: var string, node: TSNode): tuple[line, col: int] =
# Get line number and column info for node
let
point = node.tsNodeStartPoint()
result.line = point.row.int + 1
result.col = point.column.int + 1
+proc getLineCol*(gState: State, node: TSNode): tuple[line, col: int] =
+ getLineCol(gState.code, node)
+
proc getTSNodeNamedChildCountSansComments*(node: TSNode): int =
for i in 0 ..< node.len:
if node.getName() != "comment":
@@ -374,7 +380,7 @@ proc getPxName*(node: TSNode, offset: int): string =
if count == offset and not np.isNil:
return np.getName()
-proc printLisp*(gState: State, root: TSNode): string =
+proc printLisp*(code: var string, root: TSNode): string =
var
node = root
nextnode: TSNode
@@ -384,18 +390,18 @@ proc printLisp*(gState: State, root: TSNode): string =
if not node.isNil and depth > -1:
result &= spaces(depth)
let
- (line, col) = gState.getLineCol(node)
+ (line, col) = code.getLineCol(node)
result &= &"({$node.tsNodeType()} {line} {col} {node.tsNodeEndByte() - node.tsNodeStartByte()}"
let
- val = gState.getNodeVal(node)
+ val = code.getNodeVal(node)
if "\n" notin val and " " notin val:
result &= &" \"{val}\""
else:
break
- if node.tsNodeNamedChildCount() != 0:
+ if node.len() != 0:
result &= "\n"
- nextnode = node.tsNodeNamedChild(0)
+ nextnode = node[0]
depth += 1
else:
result &= ")\n"
@@ -419,21 +425,24 @@ proc printLisp*(gState: State, root: TSNode): string =
if node == root:
break
+proc printLisp*(gState: State, root: TSNode): string =
+ printLisp(gState.code, root)
+
proc getCommented*(str: string): string =
"\n# " & str.strip().replace("\n", "\n# ")
proc printTree*(gState: State, pnode: PNode, offset = ""): string =
- if gState.debug and pnode.kind != nkNone:
+ if not pnode.isNil and gState.debug and pnode.kind != nkNone:
result &= "\n# " & offset & $pnode.kind & "("
case pnode.kind
of nkCharLit:
- result &= "'" & pnode.intVal.char & "')"
+ result &= ($pnode.intVal.char).escape & ")"
of nkIntLit..nkUInt64Lit:
result &= $pnode.intVal & ")"
of nkFloatLit..nkFloat128Lit:
result &= $pnode.floatVal & ")"
of nkStrLit..nkTripleStrLit:
- result &= "\"" & pnode.strVal & "\")"
+ result &= pnode.strVal.escape & ")"
of nkSym:
result &= $pnode.sym & ")"
of nkIdent:
@@ -452,13 +461,13 @@ proc printTree*(gState: State, pnode: PNode, offset = ""): string =
proc printDebug*(gState: State, node: TSNode) =
if gState.debug:
- gecho ("Input => " & gState.getNodeVal(node)).getCommented() & "\n" &
- gState.printLisp(node).getCommented()
+ gecho ("Input => " & gState.getNodeVal(node)).getCommented()
+ gecho gState.printLisp(node).getCommented()
proc printDebug*(gState: State, pnode: PNode) =
- if gState.debug:
- gecho ("Output => " & $pnode).getCommented() & "\n" &
- gState.printTree(pnode)
+ if gState.debug and pnode.kind != nkNone:
+ gecho ("Output => " & $pnode).getCommented()
+ gecho gState.printTree(pnode)
# Compiler shortcuts
diff --git a/nimterop/globals.nim b/nimterop/globals.nim
index f159124..5db17a3 100644
--- a/nimterop/globals.nim
+++ b/nimterop/globals.nim
@@ -1,4 +1,4 @@
-import sequtils, sets, tables
+import sequtils, sets, tables, strutils
import regex
@@ -76,6 +76,11 @@ type
# All const names for enum casting
constIdentifiers*: HashSet[string]
+ # All symbols that have been skipped due to
+ # being unwrappable or the user provided
+ # override is blank
+ skippedSyms*: HashSet[string]
+
# Legacy ast fields, remove when ast2 becomes default
constStr*, enumStr*, procStr*, typeStr*: string
@@ -93,6 +98,9 @@ type
currentHeader*, impShort*, sourceFile*: string
+ # Used for the exprparser.nim module
+ currentExpr*, currentTyCastName*: string
+
data*: seq[tuple[name, val: string]]
nodeBranch*: seq[string]
@@ -113,12 +121,12 @@ when not declared(CIMPORT):
export gAtoms, gExpressions, gEnumVals, Kind, Ast, AstTable, State, nBl, Bl
# Redirect output to file when required
- template gecho*(args: string) {.dirty.} =
+ template gecho*(args: string) =
if gState.outputHandle.isNil:
echo args
else:
gState.outputHandle.writeLine(args)
- template decho*(str: untyped): untyped =
+ template decho*(args: varargs[string, `$`]): untyped =
if gState.debug:
- gecho str.getCommented()
+ gecho join(args, "").getCommented() \ No newline at end of file
diff --git a/nimterop/toast.nim b/nimterop/toast.nim
index ab44970..98045bf 100644
--- a/nimterop/toast.nim
+++ b/nimterop/toast.nim
@@ -2,16 +2,11 @@ import os, osproc, strformat, strutils, tables, times
import "."/treesitter/[api, c, cpp]
-import "."/[ast, ast2, globals, getters, grammar, build]
+import "."/[ast, ast2, globals, getters, grammar, build, tshelp]
proc process(gState: State, path: string, astTable: AstTable) =
doAssert existsFile(path), &"Invalid path {path}"
- var parser = tsParserNew()
-
- defer:
- parser.tsParserDelete()
-
if gState.mode.Bl:
gState.mode = getCompilerMode(path)
@@ -20,31 +15,16 @@ proc process(gState: State, path: string, astTable: AstTable) =
else:
gState.code = readFile(path)
- doAssert gState.code.nBl, "Empty file or preprocessor error"
-
- if gState.mode == "c":
- doAssert parser.tsParserSetLanguage(treeSitterC()), "Failed to load C parser"
- elif gState.mode == "cpp":
- doAssert parser.tsParserSetLanguage(treeSitterCpp()), "Failed to load C++ parser"
- else:
- doAssert false, &"Invalid parser {gState.mode}"
-
- var
- tree = parser.tsParserParseString(nil, gState.code.cstring, gState.code.len.uint32)
- root = tree.tsTreeRootNode()
-
- defer:
- tree.tsTreeDelete()
-
- if gState.past:
- gecho gState.printLisp(root)
- elif gState.pnim:
- if Feature.ast2 in gState.feature:
- ast2.parseNim(gState, path, root)
- else:
- ast.parseNim(gState, path, root, astTable)
- elif gState.preprocess:
- gecho gState.code
+ withCodeAst(gState.code, gState.mode):
+ if gState.past:
+ gecho gState.printLisp(root)
+ elif gState.pnim:
+ if Feature.ast2 in gState.feature:
+ ast2.parseNim(gState, path, root)
+ else:
+ ast.parseNim(gState, path, root, astTable)
+ elif gState.preprocess:
+ gecho gState.code
# CLI processing with default values
proc main(
diff --git a/nimterop/tshelp.nim b/nimterop/tshelp.nim
new file mode 100644
index 0000000..109321c
--- /dev/null
+++ b/nimterop/tshelp.nim
@@ -0,0 +1,28 @@
+import "."/treesitter/[c, cpp]
+
+template withCodeAst*(code: string, mode: string, body: untyped): untyped =
+ ## A simple template to inject the TSNode into a body of code
+ mixin treeSitterC
+ mixin treeSitterCpp
+
+ var parser = tsParserNew()
+ defer:
+ parser.tsParserDelete()
+
+ doAssert code.nBl, "Empty code or preprocessor error"
+
+ if mode == "c":
+ doAssert parser.tsParserSetLanguage(treeSitterC()), "Failed to load C parser"
+ elif mode == "cpp":
+ doAssert parser.tsParserSetLanguage(treeSitterCpp()), "Failed to load C++ parser"
+ else:
+ doAssert false, "Invalid parser " & mode
+
+ var
+ tree = parser.tsParserParseString(nil, code.cstring, code.len.uint32)
+ root {.inject.} = tree.tsTreeRootNode()
+
+ body
+
+ defer:
+ tree.tsTreeDelete() \ No newline at end of file
diff --git a/tests/include/tast2.h b/tests/include/tast2.h
index bdf8823..b47a801 100644
--- a/tests/include/tast2.h
+++ b/tests/include/tast2.h
@@ -8,6 +8,42 @@ extern "C" {
#define D "hello"
#define E 'c'
+#define UEXPR (1234u << 1)
+#define ULEXPR (1234ul << 2)
+#define ULLEXPR (1234ull << 3)
+#define LEXPR (1234l << 4)
+#define LLEXPR (1234ll << 5)
+
+#define SHL1 (1u << 1)
+#define SHL2 (1u << 2)
+#define SHL3 (1u << 3)
+#define COERCE 645635634896ull + 35436
+#define COERCE2 645635634896 + 35436ul
+#define BINEXPR ~(-(1u << !-1)) ^ (10 >> 1)
+#define BOOL true
+#define MATHEXPR (1 + 2/3*20 - 100)
+#define ANDEXPR (100 & 11000)
+#define CASTEXPR (char) 34
+#define AVAL 100
+#define BVAL 200
+#define EQ1 AVAL <= BVAL
+#define EQ2 AVAL >= BVAL
+#define EQ3 AVAL > BVAL
+#define EQ4 AVAL < BVAL
+#define EQ5 AVAL != BVAL
+#define EQ6 AVAL == BVAL
+
+#define SIZEOF sizeof(char)
+#define REG_STR "regular string"
+#define NOTSUPPORTEDSTR "not a " REG_STR
+
+#define NULLCHAR '\0'/* comments should not break things*/
+#define OCTCHAR '\012' // nor should this comment
+#define HEXCHAR '\xFE'
+#define TRICKYSTR "\x4E\034\nfoo\0\'\"\r\v\a\b\e\f\t\\\?bar"
+
+#define ALLSHL (SHL1 | SHL2 | SHL3)
+
struct A0;
struct A1 {};
typedef struct A2;
diff --git a/tests/tast2.nim b/tests/tast2.nim
index e13c4ac..4cfbeac 100644
--- a/tests/tast2.nim
+++ b/tests/tast2.nim
@@ -3,6 +3,10 @@ import macros, os, sets, strutils
import nimterop/[cimport]
static:
+ # Skip casting on lower nim compilers because
+ # the VM does not support it
+ when (NimMajor, NimMinor, NimPatch) < (1, 0, 0):
+ cSkipSymbol @["CASTEXPR"]
cDebug()
const
@@ -93,11 +97,11 @@ macro testFields(t: typed, fields: static[string] = "") =
for i in 0 ..< rl.len:
let
name = ($rl[i][0]).strip(chars = {'*'})
- typ = ($(rl[i][1].repr())).replace("\n", "").replace(" ", "")
+ typ = ($(rl[i][1].repr())).replace("\n", "").replace(" ", "").replace("typeof", "type")
n = names.find(name)
assert n != -1, $t & "." & name & " invalid"
- assert types[n] == typ,
- "typeof(" & $t & ":" & name & ") != " & types[n] & ", is " & typ
+ assert types[n].replace("typeof", "type") == typ,
+ "typeof(" & $t & ":" & name & ") != " & types[n].replace("typeof", "type") & ", is " & typ
assert A == 2
assert B == 1.0
@@ -105,6 +109,48 @@ assert C == 0x10
assert D == "hello"
assert E == 'c'
+assert not defined(NOTSUPPORTEDSTR)
+
+assert UEXPR == (1234.uint shl 1)
+assert ULEXPR == (1234.uint32 shl 2)
+assert ULLEXPR == (1234.uint64 shl 3)
+assert LEXPR == (1234.int32 shl 4)
+assert LLEXPR == (1234.int64 shl 5)
+
+assert AVAL == 100
+assert BVAL == 200
+
+assert EQ1 == (AVAL <= BVAL)
+assert EQ2 == (AVAL >= BVAL)
+assert EQ3 == (AVAL > BVAL)
+assert EQ4 == (AVAL < BVAL)
+assert EQ5 == (AVAL != BVAL)
+assert EQ6 == (AVAL == BVAL)
+
+assert SIZEOF == 1
+
+assert COERCE == 645635670332'u64
+assert COERCE2 == 645635670332'i64
+
+assert BINEXPR == 5
+assert BOOL == true
+assert MATHEXPR == -99
+assert ANDEXPR == 96
+
+when (NimMajor, NimMinor, NimPatch) >= (1, 0, 0):
+ assert CASTEXPR == 34.chr
+
+assert TRICKYSTR == "N\x1C\nfoo\x00\'\"\c\v\a\b\e\f\t\\\\?bar"
+assert NULLCHAR == '\0'
+assert OCTCHAR == '\n'
+assert HEXCHAR.int == 0xFE
+
+assert SHL1 == (1.uint shl 1)
+assert SHL2 == (1.uint shl 2)
+assert SHL3 == (1.uint shl 3)
+
+assert ALLSHL == (SHL1 or SHL2 or SHL3)
+
assert A0 is object
testFields(A0, "f1!cint")
checkPragmas(A0, pHeaderBy, istype = false)
@@ -271,7 +317,7 @@ var a21p: A21p
a21p = addr a20
assert A22 is object
-testFields(A22, "f1|f2!ptr ptr cint|array[123 + 132, ptr cint]")
+testFields(A22, "f1|f2!ptr ptr cint|array[123 + type(123)(132), ptr cint]")
checkPragmas(A22, pHeaderBy, istype = false)
var a22: A22
a22.f1 = addr a15.a2[0]
@@ -427,4 +473,4 @@ checkPragmas(nested, pHeaderImpBy)
when defined(HEADER):
assert sitest1(5) == 10
- assert sitest1(10) == 20 \ No newline at end of file
+ assert sitest1(10) == 20
diff --git a/tests/tmath.nim b/tests/tmath.nim
index 5d84700..b8477c1 100644
--- a/tests/tmath.nim
+++ b/tests/tmath.nim
@@ -13,6 +13,12 @@ when defined(windows):
complex = object
static:
+ when (NimMajor, NimMinor, NimPatch) < (1, 0, 0):
+ # FP_ILOGB0 and FP_ILOGBNAN are casts that are unsupported
+ # on lower Nim VMs
+ cSkipSymbol @["math_errhandling", "FP_ILOGB0", "FP_ILOGBNAN"]
+ else:
+ cSkipSymbol @["math_errhandling"]
cDebug()
cDisableCaching()
cAddStdDir()