[Atom, RSS] More tests, CDATA fix

author: John Conway <john.a.conway@gmail.com> 2019-05-10 11:09:16 +0100
committer: John Conway <john.a.conway@gmail.com> 2019-05-10 11:09:16 +0100
commit: e4652b267a04401b528b4c19fa81da6dcbc32ff1 (patch)
tree: b0c9194f5f384c03e3d6e12f24f6ef203ccabf27
parent: b685d3253d7305637bd35a7a06491430fec5d6a6 (diff)
download: feed-nim-e4652b267a04401b528b4c19fa81da6dcbc32ff1.tar.gz
feed-nim-e4652b267a04401b528b4c19fa81da6dcbc32ff1.zip
7 files changed, 44 insertions, 21 deletions
diff --git a/src/feednim.nim.cfg b/src/feednim.nim.cfg
new file mode 100644
index 0000000..ace4e9c
--- /dev/null
+++ b/src/feednim.nim.cfg
@@ -0,0 +1 @@
+-d:ssl
+\ No newline at end of file
diff --git a/src/feednim/atom.nim b/src/feednim/atom.nim
index a432844..bbdcbe9 100644
--- a/src/feednim/atom.nim
+++ b/src/feednim/atom.nim
@@ -12,7 +12,7 @@ import streams
 import sugar
 
 type
-    AtomCommon = ref object of RootObj  # These properties aren't gathered yet
+    AtomCommon = ref object of RootObj  # These properties aren't'gathered
         xmlbase*: string
         xmllang*: string
 
@@ -92,7 +92,7 @@ type
 
 
 # Promotes text node to the top of an AtomText object if caller expects a string
-converter atomToString*(obj: AtomText): string =
+converter toString*(obj: AtomText): string =
     return obj.text
 
 
@@ -140,13 +140,26 @@ func parseLink ( node: XmlNode ): AtomLink =
         if node.attr("length") != "": link.length = node.attr("length")
     return link
 
+func parseText ( node: XmlNode ): string =
+    if node.attr("type") == "xhtml" or node.attr("type") == "html":
+        var content = ""
+        for item in node.items:
+            content = content & $item
+        # Strip CDATA
+        if content[0 .. 8] == "<![CDATA[":
+            content = content.substr[9 .. content.len()-4 ]
+        return content
+    else:
+        return node.innerText
+
 func parseEntry( node: XmlNode ) : AtomEntry =
     var entry: AtomEntry = AtomEntry()
 
     # Fill the required fields
     entry.id = node.child("id").innerText
     entry.title = AtomText()
-    entry.title.text = node.child("title").innerText
+    if node.attrs != nil: entry.title.textType = node.attr("type")
+    entry.title.text = node.child("title").parseText()
     entry.updated = node.child("updated").innerText
 
     # Fill the optinal fields
@@ -160,16 +173,9 @@ func parseEntry( node: XmlNode ) : AtomEntry =
         entry.content.text = content_node.innerText
 
         if content_node.attrs != nil:
-            if content_node.attr("type") == "xhtml" or content_node.attr("type") == "html":
-                var content = ""
-                entry.content.texttype = content_node.attr("type")
-                for item in content_node.items:
-                    content = content & $item
-                entry.content.text = content
-            else:
-                entry.content.text = content_node.innerText
-
             entry.content.src = content_node.attr("src")
+            entry.content.texttype = content_node.attr("type")
+            entry.content.text = content_node.parseText()
 
     if node.child("contributor") != nil:
         entry.contributors = node.parseAuthors(mode="contributor")
@@ -192,8 +198,8 @@ func parseEntry( node: XmlNode ) : AtomEntry =
         if source.child("link") != nil: entry.source.link = source.child("link").parseLink()
         if source.child("logo") != nil: entry.source.logo = source.child("logo").innerText
         if source.child("rights") != nil: entry.source.rights = source.child("rights").innerText
-        if source.child("subtitle") != nil: entry.source.subtitle = source.child("subtitle").innerText
-        if source.child("title") != nil: entry.source.title = source.child("title").innerText
+        if source.child("subtitle") != nil: entry.source.subtitle = source.child("subtitle").parseText()
+        if source.child("title") != nil: entry.source.title = source.child("title").parseText()
         if source.child("updated") != nil: entry.source.updated = source.child("updated").innerText
 
         entry.source.author = entry.source.authors[0]
@@ -221,7 +227,7 @@ proc parseAtom* ( data: string ): Atom =
     atom.id = node.child("id").innerText
 
     atom.title = AtomText()
-    atom.title.text = node.child("title").innerText
+    atom.title.text = node.child("title").parseText()
     atom.updated = node.child("updated").innerText
 
     # Fill in the optional fields
diff --git a/src/feednim/rss.nim b/src/feednim/rss.nim
index 91fc0a4..35d3520 100644
--- a/src/feednim/rss.nim
+++ b/src/feednim/rss.nim
@@ -96,14 +96,19 @@ func parseCategories( node: XmlNode ): seq[RSSCategory] =
     if categories.len == 0: return @[]
     return categories
 
+func parseText ( node: XmlNode ): string =
+    var content = node.innerText
+    if content[0 .. 8] == "<![CDATA[":
+        return content.substr[9 .. content.len()-4 ]
+    return content
 
 func parseItem( node: XmlNode) : RSSItem =
     var item: RSSItem = RSSItem()
-    if node.child("title") != nil: item.title = node.child("title").innerText
+    if node.child("title") != nil: item.title = node.child("title").parseText()
 
     if node.child("link") != nil: item.link = node.child("link").innerText
 
-    if node.child("description") != nil: item.description = node.child("description").innerText
+    if node.child("description") != nil: item.description = node.child("description").parseText()
 
     for key in @["author", "dc:creator"]:
         if node.child(key) != nil: item.author = node.child(key).innerText
@@ -141,9 +146,9 @@ proc parseRSS*(data: string): RSS =
     var rss: RSS = RSS()
 
     # Fill the required fields.
-    rss.title = channel.child("title").innerText
+    rss.title = channel.child("title").parseText()
     rss.link = channel.child("link").innerText
-    rss.description = channel.child("description").innerText
+    rss.description = channel.child("description").parseText()
 
     # Fill the optional fields.
     for key in @["language", "dc:language"]:
@@ -189,7 +194,7 @@ proc parseRSS*(data: string): RSS =
         if img.child("link") != nil:  image.link = img.child("link").innerText
         if img.child("width") != nil: image.width = img.child("width").innerText
         if img.child("height") != nil: image.height = img.child("height").innerText
-        if img.child("description") != nil: image.description = img.child("description").innerText
+        if img.child("description") != nil: image.description = img.child("description").parseText()
         rss.image = image
 
     if channel.child("rating") != nil: rss.rating = channel.child("rating").innerText
diff --git a/tests/nim.cfg b/tests/nim.cfg
new file mode 100644
index 0000000..ace4e9c
--- /dev/null
+++ b/tests/nim.cfg
@@ -0,0 +1 @@
+-d:ssl
+\ No newline at end of file
diff --git a/tests/test_atom b/tests/test_atom
index 2c75d21..a8473ff 100755
--- a/tests/test_atom
+++ b/tests/test_atom
diff --git a/tests/test_atom.nim b/tests/test_atom.nim
index 37f5899..88b6916 100644
--- a/tests/test_atom.nim
+++ b/tests/test_atom.nim
@@ -100,4 +100,14 @@ test "Read Valid Atom Feed":
     check feed.entries[1].link.length == "1000000"
     check feed.entries[1].published == "2003-12-13T18:20:02Z"
     check feed.entries[1].rights == "Copyright Jane Bloggs"
-    check feed.entries[1].summary == "Trains!"
-\ No newline at end of file
+    check feed.entries[1].summary == "Trains!"
+
+test "Fetch Atom Feed from W3C":
+    let feed = getAtom("https://www.w3.org/blog/news/feed/atom")
+    check feed.title != ""
+    check feed.id != ""
+    check feed.link.href != ""
+    check feed.entries.len() > 0
+    check feed.entries[0].title != ""
+    check feed.entries[0].updated != ""
+    check feed.entries[0].id != ""
diff --git a/tests/test_rss b/tests/test_rss
index 7521afe..ed877b2 100755
--- a/tests/test_rss
+++ b/tests/test_rss
author	John Conway <john.a.conway@gmail.com>	2019-05-10 11:09:16 +0100
committer	John Conway <john.a.conway@gmail.com>	2019-05-10 11:09:16 +0100
commit	e4652b267a04401b528b4c19fa81da6dcbc32ff1 (patch)
tree	b0c9194f5f384c03e3d6e12f24f6ef203ccabf27
parent	b685d3253d7305637bd35a7a06491430fec5d6a6 (diff)
download	feed-nim-e4652b267a04401b528b4c19fa81da6dcbc32ff1.tar.gz feed-nim-e4652b267a04401b528b4c19fa81da6dcbc32ff1.zip