aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOskari Timperi <oskari.timperi@iki.fi>2017-10-25 22:00:27 +0300
committerOskari Timperi <oskari.timperi@iki.fi>2017-10-25 23:15:54 +0300
commit46b03de3685b8714f97013d435a337ba4c0eaa8e (patch)
tree7c36306af66285e26dc5bfddd47a4ab45dd86f4c
downloadnimrec-46b03de3685b8714f97013d435a337ba4c0eaa8e.tar.gz
nimrec-46b03de3685b8714f97013d435a337ba4c0eaa8e.zip
initial commit
-rw-r--r--LICENSE21
-rw-r--r--README.md28
-rw-r--r--nimrec.nim158
-rw-r--r--nimrec.nimble12
-rw-r--r--tests/nim.cfg1
-rw-r--r--tests/test.nim224
6 files changed, 444 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5e61df5
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Oskari Timperi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..89c1871
--- /dev/null
+++ b/README.md
@@ -0,0 +1,28 @@
+# Rec file parser for Nim
+
+Using this library you can parse rec files made by the
+[recutils][https://www.gnu.org/software/recutils/] software.
+
+# Examples
+
+If you have the following recfile:
+
+```
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+```
+
+You can read the names of the persons like this:
+
+```nim
+import nimrec
+import streams
+
+for record in records(newFileStream("persons.rec")):
+ echo(record["Name"])
+```
+
+More examples can be found in the `examples` directory.
diff --git a/nimrec.nim b/nimrec.nim
new file mode 100644
index 0000000..034bbfc
--- /dev/null
+++ b/nimrec.nim
@@ -0,0 +1,158 @@
+import streams
+import strutils
+import tables
+
+type
+ Field* = ref object
+ label*: string
+ value*: string
+
+ Record* = ref object
+ fields: OrderedTableRef[string, seq[string]]
+
+ ParseState {.pure.} = enum
+ Initial
+ Comment
+ Label
+ Value
+ ValueSkipSpace
+ FieldReady
+
+ RecParser* = ref object
+ state: ParseState
+ field: Field
+ record: Record
+
+ RecParseError* = object of Exception
+
+const
+ LabelFirstChar = {'a'..'z', 'A'..'Z', '%'}
+
+ LabelChar = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+
+ EofMarker = '\0'
+
+proc newRecParser*(): RecParser =
+ new(result)
+ result.state = ParseState.Initial
+
+proc newField(): Field =
+ new(result)
+ result.label = ""
+ result.value = ""
+
+proc newField(label, value: string): Field =
+ new(result)
+ result.label = label
+ result.value = value
+
+proc newRecord(): Record =
+ new(result)
+ result.fields = newOrderedTable[string, seq[string]]()
+
+proc feed*(parser: RecParser, ch: char, record: var Record): bool =
+ while true:
+ case parser.state
+ of ParseState.Initial:
+ case ch
+ of '#':
+ parser.state = ParseState.Comment
+ of '\l', EofMarker:
+ if parser.record != nil:
+ result = true
+ record = parser.record
+ parser.record = nil
+ of LabelFirstChar:
+ parser.state = ParseState.Label
+ parser.field = newField()
+ parser.field.label &= ch
+ else:
+ raise newException(RecParseError, "parse error: expected a comment, a label or an empty line")
+ of ParseState.Comment:
+ case ch
+ of '\l':
+ parser.state = ParseState.Initial
+ else: discard
+ of ParseState.Label:
+ case ch
+ of ':':
+ parser.state = ParseState.ValueSkipSpace
+ of LabelChar:
+ parser.field.label &= ch
+ else:
+ raise newException(RecParseError,
+ "parse error: invalid label char: " & ch)
+ of ParseState.Value:
+ case ch
+ of '\l':
+ let valueLen = len(parser.field.value)
+ if valueLen > 0 and parser.field.value[valueLen-1] == '\\':
+ setLen(parser.field.value, valueLen - 1)
+ else:
+ parser.state = ParseState.FieldReady
+ of EofMarker:
+ raise newException(RecParseError,
+ "parse error: value must be terminated by a newline")
+ else:
+ parser.field.value &= ch
+ of ParseState.ValueSkipSpace:
+ case ch
+ of (WhiteSpace - NewLines):
+ discard
+ else:
+ parser.field.value &= ch
+ parser.state = ParseState.Value
+ of ParseState.FieldReady:
+ case ch
+ of '+':
+ parser.state = ParseState.ValueSkipSpace
+ parser.field.value &= '\l'
+ else:
+ if parser.record == nil:
+ parser.record = newRecord()
+ if hasKey(parser.record.fields, parser.field.label):
+ add(parser.record.fields[parser.field.label], parser.field.value)
+ else:
+ add(parser.record.fields, parser.field.label,
+ @[parser.field.value])
+ parser.field = nil
+ parser.state = ParseState.Initial
+ continue
+
+ break
+
+proc `[]`*(record: Record, label: string): string =
+ result = record.fields[label][0]
+
+proc len*(record: Record): int =
+ result = len(record.fields)
+
+iterator records*(stream: Stream): Record =
+ let parser = newRecParser()
+ var record: Record
+
+ while true:
+ var ch = readChar(stream)
+
+ if feed(parser, ch, record):
+ yield record
+
+ if ch == EofMarker:
+ break
+
+iterator pairs*(record: Record): (string, string) =
+ for label, values in record.fields:
+ for value in values:
+ yield (label, value)
+
+iterator items*(record: Record): Field =
+ for label, value in record:
+ yield newField(label, value)
+
+proc hasField*(record: Record, label: string): bool =
+ for field in record:
+ if field.label == label:
+ return true
+
+proc contains*(record: Record, label: string): bool =
+ result = hasField(record, label)
diff --git a/nimrec.nimble b/nimrec.nimble
new file mode 100644
index 0000000..fe1dc33
--- /dev/null
+++ b/nimrec.nimble
@@ -0,0 +1,12 @@
+# Package
+
+version = "0.1.0"
+author = "Oskari Timperi"
+description = "Rec file parser for Nim"
+license = "MIT"
+
+# Dependencies
+
+requires "nim >= 0.17.2"
+
+skipDirs = @["tests"]
diff --git a/tests/nim.cfg b/tests/nim.cfg
new file mode 100644
index 0000000..37e96f0
--- /dev/null
+++ b/tests/nim.cfg
@@ -0,0 +1 @@
+--path:"../"
diff --git a/tests/test.nim b/tests/test.nim
new file mode 100644
index 0000000..5fb9821
--- /dev/null
+++ b/tests/test.nim
@@ -0,0 +1,224 @@
+import unittest
+import streams
+import sequtils
+
+import nimrec
+
+suite "parsing":
+ test "basics":
+ const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+"""
+
+ var ss = newStringStream(data)
+ var records = toSeq(records(ss))
+ check(len(records) == 2)
+ check(len(records[0]) == 2)
+ check(len(records[1]) == 2)
+ check(records[0]["Name"] == "John Doe")
+ check(records[0]["Age"] == "34")
+ check(records[1]["Name"] == "Jane Doe")
+ check(records[1]["Age"] == "32")
+
+ test "comments":
+ const data = """
+# This is a comment
+Name: John Doe
+Age: 34
+
+# A comment between records
+# With multiple lines!
+
+Name: Jane Doe
+# A comment between fields
+Age: 32
+"""
+
+ let ss = newStringStream(data)
+ let records = toSeq(records(ss))
+ check(len(records) == 2)
+ check(records[0]["Name"] == "John Doe")
+ check(records[0]["Age"] == "34")
+ check(records[1]["Name"] == "Jane Doe")
+ check(records[1]["Age"] == "32")
+
+ test "only initial whitespace skipped from values":
+ const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+"""
+
+ let ss = newStringStream(data)
+ let records = toSeq(records(ss))
+ check(len(records) == 2)
+ check(records[0]["Name"] == " John Doe")
+ check(records[0]["Age"] == " 34")
+ check(records[1]["Name"] == "Jane Doe")
+ check(records[1]["Age"] == "\t32")
+
+ test "trailing whitespace included in values":
+ const data =
+ "Name: John Doe \l" &
+ "Age: 34\t\l"
+
+ let ss = newStringStream(data)
+ let records = toSeq(records(ss))
+ check(len(records) == 1)
+ check(records[0]["Name"] == "John Doe ")
+ check(records[0]["Age"] == "34\t")
+
+ test "records with single field":
+ const data = """
+Name: John Doe
+
+Name: Jane Doe
+
+Name: Foobar!
+"""
+
+ let ss = newStringStream(data)
+ let records = toSeq(records(ss))
+ check(len(records) == 3)
+ check(len(records[0]) == 1)
+ check(records[0]["Name"] == "John Doe")
+ check(len(records[1]) == 1)
+ check(records[1]["Name"] == "Jane Doe")
+ check(len(records[2]) == 1)
+ check(records[2]["Name"] == "Foobar!")
+
+ test "parse error if colon missing":
+ let ss = newStringStream("Name\nAge: 34\n")
+ expect(RecParseError):
+ discard toSeq(records(ss))
+
+ test "parse error if invalid label":
+ let ss = newStringStream("Name: John Doe\nFoo-bar: 111")
+ expect(RecParseError):
+ discard toSeq(records(ss))
+
+ test "label can start with %":
+ let ss = newStringStream("%rec: Entry\n")
+ let records = toSeq(records(ss))
+ check(len(records) == 1)
+ check(len(records[0]) == 1)
+ let fields = toSeq(items(records[0]))
+ check(fields[0].label == "%rec")
+ check(fields[0].value == "Entry")
+
+ test "field must be terminated by newline":
+ let ss = newStringStream("%rec: Entry\n%type: Id int")
+ expect(RecParseError):
+ discard toSeq(records(ss))
+
+ test "multiple fields with same label":
+ const data = """
+Name: John Doe
+Age: 34
+Email: john@doe.me
+Email: john.doe@foobar.com
+
+Name: Jane Doe
+Age: 32
+Email: jane@doe.me
+"""
+
+ var ss = newStringStream(data)
+ var emails: seq[string] = @[]
+ for record in records(ss):
+ for label, value in record:
+ if label == "Email":
+ add(emails, value)
+ check(len(emails) == 3)
+ check(emails[0] == "john@doe.me")
+ check(emails[1] == "john.doe@foobar.com")
+ check(emails[2] == "jane@doe.me")
+
+
+suite "misc":
+ test "record items iterator":
+ const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+"""
+
+ var ss = newStringStream(data)
+ var fields: seq[Field] = @[]
+ for record in records(ss):
+ for field in record:
+ add(fields, field)
+ check(len(fields) == 4)
+ check(fields[0].label == "Name")
+ check(fields[0].value == "John Doe")
+ check(fields[1].label == "Age")
+ check(fields[1].value == "34")
+ check(fields[2].label == "Name")
+ check(fields[2].value == "Jane Doe")
+ check(fields[3].label == "Age")
+ check(fields[3].value == "32")
+
+ test "record pairs iterator":
+ const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+"""
+
+ var ss = newStringStream(data)
+ var results: seq[string] = @[]
+ for record in records(ss):
+ for label, value in record:
+ add(results, label)
+ add(results, value)
+ check(len(results) == 8)
+ check(results[0] == "Name")
+ check(results[1] == "John Doe")
+ check(results[2] == "Age")
+ check(results[3] == "34")
+ check(results[4] == "Name")
+ check(results[5] == "Jane Doe")
+ check(results[6] == "Age")
+ check(results[7] == "32")
+
+ test "hasField":
+ const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+Email: jane@doe.me
+"""
+
+ var ss = newStringStream(data)
+ var records = toSeq(records(ss))
+ check(len(records) == 2)
+ check(not hasField(records[0], "Email"))
+ check(hasField(records[1], "Email"))
+
+ test "contains":
+ const data = """
+Name: John Doe
+Age: 34
+
+Name: Jane Doe
+Age: 32
+Email: jane@doe.me
+"""
+
+ var ss = newStringStream(data)
+ var records = toSeq(records(ss))
+ check(len(records) == 2)
+ check("Email" notin records[0])
+ check("Email" in records[1])