diff options
| author | Oskari Timperi <oskari.timperi@iki.fi> | 2017-10-25 22:00:27 +0300 |
|---|---|---|
| committer | Oskari Timperi <oskari.timperi@iki.fi> | 2017-10-25 23:15:54 +0300 |
| commit | 46b03de3685b8714f97013d435a337ba4c0eaa8e (patch) | |
| tree | 7c36306af66285e26dc5bfddd47a4ab45dd86f4c | |
| download | nimrec-46b03de3685b8714f97013d435a337ba4c0eaa8e.tar.gz nimrec-46b03de3685b8714f97013d435a337ba4c0eaa8e.zip | |
initial commit
| -rw-r--r-- | LICENSE | 21 | ||||
| -rw-r--r-- | README.md | 28 | ||||
| -rw-r--r-- | nimrec.nim | 158 | ||||
| -rw-r--r-- | nimrec.nimble | 12 | ||||
| -rw-r--r-- | tests/nim.cfg | 1 | ||||
| -rw-r--r-- | tests/test.nim | 224 |
6 files changed, 444 insertions, 0 deletions
@@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Oskari Timperi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..89c1871 --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# Rec file parser for Nim + +Using this library you can parse rec files made by the +[recutils][https://www.gnu.org/software/recutils/] software. + +# Examples + +If you have the following recfile: + +``` +Name: John Doe +Age: 34 + +Name: Jane Doe +Age: 32 +``` + +You can read the names of the persons like this: + +```nim +import nimrec +import streams + +for record in records(newFileStream("persons.rec")): + echo(record["Name"]) +``` + +More examples can be found in the `examples` directory. diff --git a/nimrec.nim b/nimrec.nim new file mode 100644 index 0000000..034bbfc --- /dev/null +++ b/nimrec.nim @@ -0,0 +1,158 @@ +import streams +import strutils +import tables + +type + Field* = ref object + label*: string + value*: string + + Record* = ref object + fields: OrderedTableRef[string, seq[string]] + + ParseState {.pure.} = enum + Initial + Comment + Label + Value + ValueSkipSpace + FieldReady + + RecParser* = ref object + state: ParseState + field: Field + record: Record + + RecParseError* = object of Exception + +const + LabelFirstChar = {'a'..'z', 'A'..'Z', '%'} + + LabelChar = {'a'..'z', 'A'..'Z', '0'..'9', '_'} + + EofMarker = '\0' + +proc newRecParser*(): RecParser = + new(result) + result.state = ParseState.Initial + +proc newField(): Field = + new(result) + result.label = "" + result.value = "" + +proc newField(label, value: string): Field = + new(result) + result.label = label + result.value = value + +proc newRecord(): Record = + new(result) + result.fields = newOrderedTable[string, seq[string]]() + +proc feed*(parser: RecParser, ch: char, record: var Record): bool = + while true: + case parser.state + of ParseState.Initial: + case ch + of '#': + parser.state = ParseState.Comment + of '\l', EofMarker: + if parser.record != nil: + result = true + record = parser.record + parser.record = nil + of LabelFirstChar: + parser.state = ParseState.Label + parser.field = newField() + parser.field.label &= ch + else: + raise newException(RecParseError, "parse error: expected a comment, a label or an empty line") + of ParseState.Comment: + case ch + of '\l': + parser.state = ParseState.Initial + else: discard + of ParseState.Label: + case ch + of ':': + parser.state = ParseState.ValueSkipSpace + of LabelChar: + parser.field.label &= ch + else: + raise newException(RecParseError, + "parse error: invalid label char: " & ch) + of ParseState.Value: + case ch + of '\l': + let valueLen = len(parser.field.value) + if valueLen > 0 and parser.field.value[valueLen-1] == '\\': + setLen(parser.field.value, valueLen - 1) + else: + parser.state = ParseState.FieldReady + of EofMarker: + raise newException(RecParseError, + "parse error: value must be terminated by a newline") + else: + parser.field.value &= ch + of ParseState.ValueSkipSpace: + case ch + of (WhiteSpace - NewLines): + discard + else: + parser.field.value &= ch + parser.state = ParseState.Value + of ParseState.FieldReady: + case ch + of '+': + parser.state = ParseState.ValueSkipSpace + parser.field.value &= '\l' + else: + if parser.record == nil: + parser.record = newRecord() + if hasKey(parser.record.fields, parser.field.label): + add(parser.record.fields[parser.field.label], parser.field.value) + else: + add(parser.record.fields, parser.field.label, + @[parser.field.value]) + parser.field = nil + parser.state = ParseState.Initial + continue + + break + +proc `[]`*(record: Record, label: string): string = + result = record.fields[label][0] + +proc len*(record: Record): int = + result = len(record.fields) + +iterator records*(stream: Stream): Record = + let parser = newRecParser() + var record: Record + + while true: + var ch = readChar(stream) + + if feed(parser, ch, record): + yield record + + if ch == EofMarker: + break + +iterator pairs*(record: Record): (string, string) = + for label, values in record.fields: + for value in values: + yield (label, value) + +iterator items*(record: Record): Field = + for label, value in record: + yield newField(label, value) + +proc hasField*(record: Record, label: string): bool = + for field in record: + if field.label == label: + return true + +proc contains*(record: Record, label: string): bool = + result = hasField(record, label) diff --git a/nimrec.nimble b/nimrec.nimble new file mode 100644 index 0000000..fe1dc33 --- /dev/null +++ b/nimrec.nimble @@ -0,0 +1,12 @@ +# Package + +version = "0.1.0" +author = "Oskari Timperi" +description = "Rec file parser for Nim" +license = "MIT" + +# Dependencies + +requires "nim >= 0.17.2" + +skipDirs = @["tests"] diff --git a/tests/nim.cfg b/tests/nim.cfg new file mode 100644 index 0000000..37e96f0 --- /dev/null +++ b/tests/nim.cfg @@ -0,0 +1 @@ +--path:"../" diff --git a/tests/test.nim b/tests/test.nim new file mode 100644 index 0000000..5fb9821 --- /dev/null +++ b/tests/test.nim @@ -0,0 +1,224 @@ +import unittest +import streams +import sequtils + +import nimrec + +suite "parsing": + test "basics": + const data = """ +Name: John Doe +Age: 34 + +Name: Jane Doe +Age: 32 +""" + + var ss = newStringStream(data) + var records = toSeq(records(ss)) + check(len(records) == 2) + check(len(records[0]) == 2) + check(len(records[1]) == 2) + check(records[0]["Name"] == "John Doe") + check(records[0]["Age"] == "34") + check(records[1]["Name"] == "Jane Doe") + check(records[1]["Age"] == "32") + + test "comments": + const data = """ +# This is a comment +Name: John Doe +Age: 34 + +# A comment between records +# With multiple lines! + +Name: Jane Doe +# A comment between fields +Age: 32 +""" + + let ss = newStringStream(data) + let records = toSeq(records(ss)) + check(len(records) == 2) + check(records[0]["Name"] == "John Doe") + check(records[0]["Age"] == "34") + check(records[1]["Name"] == "Jane Doe") + check(records[1]["Age"] == "32") + + test "only initial whitespace skipped from values": + const data = """ +Name: John Doe +Age: 34 + +Name: Jane Doe +Age: 32 +""" + + let ss = newStringStream(data) + let records = toSeq(records(ss)) + check(len(records) == 2) + check(records[0]["Name"] == " John Doe") + check(records[0]["Age"] == " 34") + check(records[1]["Name"] == "Jane Doe") + check(records[1]["Age"] == "\t32") + + test "trailing whitespace included in values": + const data = + "Name: John Doe \l" & + "Age: 34\t\l" + + let ss = newStringStream(data) + let records = toSeq(records(ss)) + check(len(records) == 1) + check(records[0]["Name"] == "John Doe ") + check(records[0]["Age"] == "34\t") + + test "records with single field": + const data = """ +Name: John Doe + +Name: Jane Doe + +Name: Foobar! +""" + + let ss = newStringStream(data) + let records = toSeq(records(ss)) + check(len(records) == 3) + check(len(records[0]) == 1) + check(records[0]["Name"] == "John Doe") + check(len(records[1]) == 1) + check(records[1]["Name"] == "Jane Doe") + check(len(records[2]) == 1) + check(records[2]["Name"] == "Foobar!") + + test "parse error if colon missing": + let ss = newStringStream("Name\nAge: 34\n") + expect(RecParseError): + discard toSeq(records(ss)) + + test "parse error if invalid label": + let ss = newStringStream("Name: John Doe\nFoo-bar: 111") + expect(RecParseError): + discard toSeq(records(ss)) + + test "label can start with %": + let ss = newStringStream("%rec: Entry\n") + let records = toSeq(records(ss)) + check(len(records) == 1) + check(len(records[0]) == 1) + let fields = toSeq(items(records[0])) + check(fields[0].label == "%rec") + check(fields[0].value == "Entry") + + test "field must be terminated by newline": + let ss = newStringStream("%rec: Entry\n%type: Id int") + expect(RecParseError): + discard toSeq(records(ss)) + + test "multiple fields with same label": + const data = """ +Name: John Doe +Age: 34 +Email: john@doe.me +Email: john.doe@foobar.com + +Name: Jane Doe +Age: 32 +Email: jane@doe.me +""" + + var ss = newStringStream(data) + var emails: seq[string] = @[] + for record in records(ss): + for label, value in record: + if label == "Email": + add(emails, value) + check(len(emails) == 3) + check(emails[0] == "john@doe.me") + check(emails[1] == "john.doe@foobar.com") + check(emails[2] == "jane@doe.me") + + +suite "misc": + test "record items iterator": + const data = """ +Name: John Doe +Age: 34 + +Name: Jane Doe +Age: 32 +""" + + var ss = newStringStream(data) + var fields: seq[Field] = @[] + for record in records(ss): + for field in record: + add(fields, field) + check(len(fields) == 4) + check(fields[0].label == "Name") + check(fields[0].value == "John Doe") + check(fields[1].label == "Age") + check(fields[1].value == "34") + check(fields[2].label == "Name") + check(fields[2].value == "Jane Doe") + check(fields[3].label == "Age") + check(fields[3].value == "32") + + test "record pairs iterator": + const data = """ +Name: John Doe +Age: 34 + +Name: Jane Doe +Age: 32 +""" + + var ss = newStringStream(data) + var results: seq[string] = @[] + for record in records(ss): + for label, value in record: + add(results, label) + add(results, value) + check(len(results) == 8) + check(results[0] == "Name") + check(results[1] == "John Doe") + check(results[2] == "Age") + check(results[3] == "34") + check(results[4] == "Name") + check(results[5] == "Jane Doe") + check(results[6] == "Age") + check(results[7] == "32") + + test "hasField": + const data = """ +Name: John Doe +Age: 34 + +Name: Jane Doe +Age: 32 +Email: jane@doe.me +""" + + var ss = newStringStream(data) + var records = toSeq(records(ss)) + check(len(records) == 2) + check(not hasField(records[0], "Email")) + check(hasField(records[1], "Email")) + + test "contains": + const data = """ +Name: John Doe +Age: 34 + +Name: Jane Doe +Age: 32 +Email: jane@doe.me +""" + + var ss = newStringStream(data) + var records = toSeq(records(ss)) + check(len(records) == 2) + check("Email" notin records[0]) + check("Email" in records[1]) |
