initial commit

author: Oskari Timperi <oskari.timperi@iki.fi> 2017-10-25 22:00:27 +0300
committer: Oskari Timperi <oskari.timperi@iki.fi> 2017-10-25 23:15:54 +0300
commit: 46b03de3685b8714f97013d435a337ba4c0eaa8e (patch)
tree: 7c36306af66285e26dc5bfddd47a4ab45dd86f4c
download: nimrec-46b03de3685b8714f97013d435a337ba4c0eaa8e.tar.gz
nimrec-46b03de3685b8714f97013d435a337ba4c0eaa8e.zip
6 files changed, 444 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5e61df5
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Oskari Timperi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..89c1871
--- /dev/null
+++ b/README.md
@@ -0,0 +1,28 @@
+# Rec file parser for Nim
+
+Using this library you can parse rec files made by the
+[recutils][https://www.gnu.org/software/recutils/] software.
+
+# Examples
+
+If you have the following recfile:
+
+```
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+```
+
+You can read the names of the persons like this:
+
+```nim
+import nimrec
+import streams
+
+for record in records(newFileStream("persons.rec")):
+    echo(record["Name"])
+```
+
+More examples can be found in the `examples` directory.
diff --git a/nimrec.nim b/nimrec.nim
new file mode 100644
index 0000000..034bbfc
--- /dev/null
+++ b/nimrec.nim
@@ -0,0 +1,158 @@
+import streams
+import strutils
+import tables
+
+type
+    Field* = ref object
+        label*: string
+        value*: string
+
+    Record* = ref object
+        fields: OrderedTableRef[string, seq[string]]
+
+    ParseState {.pure.} = enum
+        Initial
+        Comment
+        Label
+        Value
+        ValueSkipSpace
+        FieldReady
+
+    RecParser* = ref object
+        state: ParseState
+        field: Field
+        record: Record
+
+    RecParseError* = object of Exception
+
+const
+    LabelFirstChar = {'a'..'z', 'A'..'Z', '%'}
+
+    LabelChar = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+
+    EofMarker = '\0'
+
+proc newRecParser*(): RecParser =
+    new(result)
+    result.state = ParseState.Initial
+
+proc newField(): Field =
+    new(result)
+    result.label = ""
+    result.value = ""
+
+proc newField(label, value: string): Field =
+    new(result)
+    result.label = label
+    result.value = value
+
+proc newRecord(): Record =
+    new(result)
+    result.fields = newOrderedTable[string, seq[string]]()
+
+proc feed*(parser: RecParser, ch: char, record: var Record): bool =
+    while true:
+        case parser.state
+        of ParseState.Initial:
+            case ch
+            of '#':
+                parser.state = ParseState.Comment
+            of '\l', EofMarker:
+                if parser.record != nil:
+                    result = true
+                    record = parser.record
+                    parser.record = nil
+            of LabelFirstChar:
+                parser.state = ParseState.Label
+                parser.field = newField()
+                parser.field.label &= ch
+            else:
+                raise newException(RecParseError, "parse error: expected a comment, a label or an empty line")
+        of ParseState.Comment:
+            case ch
+            of '\l':
+                parser.state = ParseState.Initial
+            else: discard
+        of ParseState.Label:
+            case ch
+            of ':':
+                parser.state = ParseState.ValueSkipSpace
+            of LabelChar:
+                parser.field.label &= ch
+            else:
+                raise newException(RecParseError,
+                    "parse error: invalid label char: " & ch)
+        of ParseState.Value:
+            case ch
+            of '\l':
+                let valueLen = len(parser.field.value)
+                if valueLen > 0 and parser.field.value[valueLen-1] == '\\':
+                    setLen(parser.field.value, valueLen - 1)
+                else:
+                    parser.state = ParseState.FieldReady
+            of EofMarker:
+                raise newException(RecParseError,
+                    "parse error: value must be terminated by a newline")
+            else:
+                parser.field.value &= ch
+        of ParseState.ValueSkipSpace:
+            case ch
+            of (WhiteSpace - NewLines):
+                discard
+            else:
+                parser.field.value &= ch
+            parser.state = ParseState.Value
+        of ParseState.FieldReady:
+            case ch
+            of '+':
+                parser.state = ParseState.ValueSkipSpace
+                parser.field.value &= '\l'
+            else:
+                if parser.record == nil:
+                    parser.record = newRecord()
+                if hasKey(parser.record.fields, parser.field.label):
+                    add(parser.record.fields[parser.field.label], parser.field.value)
+                else:
+                    add(parser.record.fields, parser.field.label,
+                        @[parser.field.value])
+                parser.field = nil
+                parser.state = ParseState.Initial
+                continue
+
+        break
+
+proc `[]`*(record: Record, label: string): string =
+    result = record.fields[label][0]
+
+proc len*(record: Record): int =
+    result = len(record.fields)
+
+iterator records*(stream: Stream): Record =
+    let parser = newRecParser()
+    var record: Record
+
+    while true:
+        var ch = readChar(stream)
+
+        if feed(parser, ch, record):
+            yield record
+
+        if ch == EofMarker:
+            break
+
+iterator pairs*(record: Record): (string, string) =
+    for label, values in record.fields:
+        for value in values:
+            yield (label, value)
+
+iterator items*(record: Record): Field =
+    for label, value in record:
+        yield newField(label, value)
+
+proc hasField*(record: Record, label: string): bool =
+    for field in record:
+        if field.label == label:
+            return true
+
+proc contains*(record: Record, label: string): bool =
+    result = hasField(record, label)
diff --git a/nimrec.nimble b/nimrec.nimble
new file mode 100644
index 0000000..fe1dc33
--- /dev/null
+++ b/nimrec.nimble
@@ -0,0 +1,12 @@
+# Package
+
+version       = "0.1.0"
+author        = "Oskari Timperi"
+description   = "Rec file parser for Nim"
+license       = "MIT"
+
+# Dependencies
+
+requires "nim >= 0.17.2"
+
+skipDirs = @["tests"]
diff --git a/tests/nim.cfg b/tests/nim.cfg
new file mode 100644
index 0000000..37e96f0
--- /dev/null
+++ b/tests/nim.cfg
@@ -0,0 +1 @@
+--path:"../"
diff --git a/tests/test.nim b/tests/test.nim
new file mode 100644
index 0000000..5fb9821
--- /dev/null
+++ b/tests/test.nim
@@ -0,0 +1,224 @@
+import unittest
+import streams
+import sequtils
+
+import nimrec
+
+suite "parsing":
+    test "basics":
+        const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+"""
+
+        var ss = newStringStream(data)
+        var records = toSeq(records(ss))
+        check(len(records) == 2)
+        check(len(records[0]) == 2)
+        check(len(records[1]) == 2)
+        check(records[0]["Name"] == "John Doe")
+        check(records[0]["Age"] == "34")
+        check(records[1]["Name"] == "Jane Doe")
+        check(records[1]["Age"] == "32")
+
+    test "comments":
+        const data = """
+# This is a comment
+Name: John Doe
+Age: 34
+
+# A comment between records
+# With multiple lines!
+
+Name: Jane Doe
+# A comment between fields
+Age: 32
+"""
+
+        let ss = newStringStream(data)
+        let records = toSeq(records(ss))
+        check(len(records) == 2)
+        check(records[0]["Name"] == "John Doe")
+        check(records[0]["Age"] == "34")
+        check(records[1]["Name"] == "Jane Doe")
+        check(records[1]["Age"] == "32")
+
+    test "only initial whitespace skipped from values":
+        const data = """
+Name:  John Doe
+Age:   34
+
+Name:	Jane Doe
+Age:		32
+"""
+
+        let ss = newStringStream(data)
+        let records = toSeq(records(ss))
+        check(len(records) == 2)
+        check(records[0]["Name"] == " John Doe")
+        check(records[0]["Age"] == "  34")
+        check(records[1]["Name"] == "Jane Doe")
+        check(records[1]["Age"] == "\t32")
+
+    test "trailing whitespace included in values":
+        const data =
+            "Name: John Doe   \l" &
+            "Age: 34\t\l"
+
+        let ss = newStringStream(data)
+        let records = toSeq(records(ss))
+        check(len(records) == 1)
+        check(records[0]["Name"] == "John Doe   ")
+        check(records[0]["Age"] == "34\t")
+
+    test "records with single field":
+        const data = """
+Name: John Doe
+
+Name: Jane Doe
+
+Name: Foobar!
+"""
+
+        let ss = newStringStream(data)
+        let records = toSeq(records(ss))
+        check(len(records) == 3)
+        check(len(records[0]) == 1)
+        check(records[0]["Name"] == "John Doe")
+        check(len(records[1]) == 1)
+        check(records[1]["Name"] == "Jane Doe")
+        check(len(records[2]) == 1)
+        check(records[2]["Name"] == "Foobar!")
+
+    test "parse error if colon missing":
+        let ss = newStringStream("Name\nAge: 34\n")
+        expect(RecParseError):
+            discard toSeq(records(ss))
+
+    test "parse error if invalid label":
+        let ss = newStringStream("Name: John Doe\nFoo-bar: 111")
+        expect(RecParseError):
+            discard toSeq(records(ss))
+
+    test "label can start with %":
+        let ss = newStringStream("%rec: Entry\n")
+        let records = toSeq(records(ss))
+        check(len(records) == 1)
+        check(len(records[0]) == 1)
+        let fields = toSeq(items(records[0]))
+        check(fields[0].label == "%rec")
+        check(fields[0].value == "Entry")
+
+    test "field must be terminated by newline":
+        let ss = newStringStream("%rec: Entry\n%type: Id int")
+        expect(RecParseError):
+            discard toSeq(records(ss))
+
+    test "multiple fields with same label":
+        const data = """
+Name: John Doe
+Age: 34
+Email: john@doe.me
+Email: john.doe@foobar.com
+
+Name: Jane Doe
+Age: 32
+Email: jane@doe.me
+"""
+
+        var ss = newStringStream(data)
+        var emails: seq[string] = @[]
+        for record in records(ss):
+            for label, value in record:
+                if label == "Email":
+                    add(emails, value)
+        check(len(emails) == 3)
+        check(emails[0] == "john@doe.me")
+        check(emails[1] == "john.doe@foobar.com")
+        check(emails[2] == "jane@doe.me")
+
+
+suite "misc":
+    test "record items iterator":
+        const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+"""
+
+        var ss = newStringStream(data)
+        var fields: seq[Field] = @[]
+        for record in records(ss):
+            for field in record:
+                add(fields, field)
+        check(len(fields) == 4)
+        check(fields[0].label == "Name")
+        check(fields[0].value == "John Doe")
+        check(fields[1].label == "Age")
+        check(fields[1].value == "34")
+        check(fields[2].label == "Name")
+        check(fields[2].value == "Jane Doe")
+        check(fields[3].label == "Age")
+        check(fields[3].value == "32")
+
+    test "record pairs iterator":
+        const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+"""
+
+        var ss = newStringStream(data)
+        var results: seq[string] = @[]
+        for record in records(ss):
+            for label, value in record:
+                add(results, label)
+                add(results, value)
+        check(len(results) == 8)
+        check(results[0] == "Name")
+        check(results[1] == "John Doe")
+        check(results[2] == "Age")
+        check(results[3] == "34")
+        check(results[4] == "Name")
+        check(results[5] == "Jane Doe")
+        check(results[6] == "Age")
+        check(results[7] == "32")
+
+    test "hasField":
+        const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+Email: jane@doe.me
+"""
+
+        var ss = newStringStream(data)
+        var records = toSeq(records(ss))
+        check(len(records) == 2)
+        check(not hasField(records[0], "Email"))
+        check(hasField(records[1], "Email"))
+
+    test "contains":
+        const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+Email: jane@doe.me
+"""
+
+        var ss = newStringStream(data)
+        var records = toSeq(records(ss))
+        check(len(records) == 2)
+        check("Email" notin records[0])
+        check("Email" in records[1])
author	Oskari Timperi <oskari.timperi@iki.fi>	2017-10-25 22:00:27 +0300
committer	Oskari Timperi <oskari.timperi@iki.fi>	2017-10-25 23:15:54 +0300
commit	46b03de3685b8714f97013d435a337ba4c0eaa8e (patch)
tree	7c36306af66285e26dc5bfddd47a4ab45dd86f4c
download	nimrec-46b03de3685b8714f97013d435a337ba4c0eaa8e.tar.gz nimrec-46b03de3685b8714f97013d435a337ba4c0eaa8e.zip