aboutsummaryrefslogtreecommitdiff
path: root/toolsrc/src/Paragraphs.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'toolsrc/src/Paragraphs.cpp')
-rw-r--r--toolsrc/src/Paragraphs.cpp170
1 files changed, 170 insertions, 0 deletions
diff --git a/toolsrc/src/Paragraphs.cpp b/toolsrc/src/Paragraphs.cpp
new file mode 100644
index 000000000..6dde5da7c
--- /dev/null
+++ b/toolsrc/src/Paragraphs.cpp
@@ -0,0 +1,170 @@
+#include "pch.h"
+#include "Paragraphs.h"
+#include "vcpkg_Files.h"
+
+namespace vcpkg::Paragraphs
+{
+ struct Parser
+ {
+ Parser(const char* c, const char* e) : cur(c), end(e)
+ {
+ }
+
+ private:
+ const char* cur;
+ const char* const end;
+
+ void peek(char& ch) const
+ {
+ if (cur == end)
+ ch = 0;
+ else
+ ch = *cur;
+ }
+
+ void next(char& ch)
+ {
+ if (cur == end)
+ ch = 0;
+ else
+ {
+ ++cur;
+ peek(ch);
+ }
+ }
+
+ void skip_spaces(char& ch)
+ {
+ while (ch == ' ' || ch == '\t')
+ next(ch);
+ }
+
+ static bool is_alphanum(char ch)
+ {
+ return (ch >= 'A' && ch <= 'Z')
+ || (ch >= 'a' && ch <= 'z')
+ || (ch >= '0' && ch <= '9');
+ }
+
+ static bool is_lineend(char ch)
+ {
+ return ch == '\r' || ch == '\n' || ch == 0;
+ }
+
+ void get_fieldvalue(char& ch, std::string& fieldvalue)
+ {
+ fieldvalue.clear();
+
+ auto beginning_of_line = cur;
+ do
+ {
+ // scan to end of current line (it is part of the field value)
+ while (!is_lineend(ch))
+ next(ch);
+
+ fieldvalue.append(beginning_of_line, cur);
+
+ if (ch == '\r')
+ next(ch);
+ if (ch == '\n')
+ next(ch);
+
+ if (is_alphanum(ch))
+ {
+ // Line begins a new field.
+ return;
+ }
+
+ beginning_of_line = cur;
+
+ // Line may continue the current field with data or terminate the paragraph,
+ // depending on first nonspace character.
+ skip_spaces(ch);
+
+ if (is_lineend(ch))
+ {
+ // Line was whitespace or empty.
+ // This terminates the field and the paragraph.
+ // We leave the blank line's whitespace consumed, because it doesn't matter.
+ return;
+ }
+
+ // First nonspace is not a newline. This continues the current field value.
+ // We forcibly convert all newlines into single '\n' for ease of text handling later on.
+ fieldvalue.push_back('\n');
+ }
+ while (true);
+ }
+
+ void get_fieldname(char& ch, std::string& fieldname)
+ {
+ auto begin_fieldname = cur;
+ while (is_alphanum(ch) || ch == '-')
+ next(ch);
+ Checks::check_throw(ch == ':', "Expected ':'");
+ fieldname = std::string(begin_fieldname, cur);
+
+ // skip ': '
+ next(ch);
+ skip_spaces(ch);
+ }
+
+ void get_paragraph(char& ch, std::unordered_map<std::string, std::string>& fields)
+ {
+ fields.clear();
+ std::string fieldname;
+ std::string fieldvalue;
+ do
+ {
+ get_fieldname(ch, fieldname);
+
+ auto it = fields.find(fieldname);
+ Checks::check_throw(it == fields.end(), "Duplicate field");
+
+ get_fieldvalue(ch, fieldvalue);
+
+ fields.emplace(fieldname, fieldvalue);
+ }
+ while (!is_lineend(ch));
+ }
+
+ public:
+ std::vector<std::unordered_map<std::string, std::string>> get_paragraphs()
+ {
+ std::vector<std::unordered_map<std::string, std::string>> paragraphs;
+
+ char ch;
+ peek(ch);
+
+ while (ch != 0)
+ {
+ if (ch == '\n' || ch == '\r' || ch == ' ' || ch == '\t')
+ {
+ next(ch);
+ continue;
+ }
+
+ paragraphs.emplace_back();
+ get_paragraph(ch, paragraphs.back());
+ }
+
+ return paragraphs;
+ }
+ };
+
+ std::vector<std::unordered_map<std::string, std::string>> get_paragraphs(const fs::path& control_path)
+ {
+ const expected<std::string> contents = Files::read_contents(control_path);
+ if (auto spgh = contents.get())
+ {
+ return parse_paragraphs(*spgh);
+ }
+
+ Checks::exit_with_message("Error while reading %s: %s", control_path.generic_string(), contents.error_code().message());
+ }
+
+ std::vector<std::unordered_map<std::string, std::string>> parse_paragraphs(const std::string& str)
+ {
+ return Parser(str.c_str(), str.c_str() + str.size()).get_paragraphs();
+ }
+}