diff options
| author | nicole mazzuca <mazzucan@outlook.com> | 2020-04-17 18:16:20 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-04-17 18:16:20 -0700 |
| commit | 09af1e9b55fdb79ef5aa04de3f1710759b2de990 (patch) | |
| tree | fc151fb148667d5cf10bf3dcd8e26b5d04cc6d16 /toolsrc/include | |
| parent | 556325a1f7b6049d91565257c00db2f0bf1eadc5 (diff) | |
| download | vcpkg-09af1e9b55fdb79ef5aa04de3f1710759b2de990.tar.gz vcpkg-09af1e9b55fdb79ef5aa04de3f1710759b2de990.zip | |
[vcpkg] Add initial JSON support (#10521)
* [vcpkg] Add initial JSON support
This adds a JSON parser, as well as the amount of unicode support
required for JSON parsing to work according to the specification. In the
future, I hope to rewrite our existing XML files into JSON.
Additionally, as a drive-by, we've added the following:
* add /wd4800 to pragmas.h -- this is a "performance warning", for when
you implicitly convert pointers or integers to bool, and shouldn't be
an issue for us.
* Switched Parse::ParserBase to read unicode (as utf-8), as opposed to
ASCII
* Building again under VCPKG_DEVELOPMENT_WARNINGS, yay!
Diffstat (limited to 'toolsrc/include')
| -rw-r--r-- | toolsrc/include/vcpkg/base/expected.h | 2 | ||||
| -rw-r--r-- | toolsrc/include/vcpkg/base/json.h | 257 | ||||
| -rw-r--r-- | toolsrc/include/vcpkg/base/parse.h (renamed from toolsrc/include/vcpkg/parse.h) | 72 | ||||
| -rw-r--r-- | toolsrc/include/vcpkg/base/stringview.h | 4 | ||||
| -rw-r--r-- | toolsrc/include/vcpkg/base/unicode.h | 148 | ||||
| -rw-r--r-- | toolsrc/include/vcpkg/packagespec.h | 2 | ||||
| -rw-r--r-- | toolsrc/include/vcpkg/paragraphparser.h | 6 | ||||
| -rw-r--r-- | toolsrc/include/vcpkg/pragmas.h | 5 | ||||
| -rw-r--r-- | toolsrc/include/vcpkg/textrowcol.h | 5 |
9 files changed, 454 insertions, 47 deletions
diff --git a/toolsrc/include/vcpkg/base/expected.h b/toolsrc/include/vcpkg/base/expected.h index 2b19bad2a..88b09fdb2 100644 --- a/toolsrc/include/vcpkg/base/expected.h +++ b/toolsrc/include/vcpkg/base/expected.h @@ -187,7 +187,7 @@ namespace vcpkg } else { - return {std::move(error()), expected_right_tag}; + return {std::move(*this).error(), expected_right_tag}; } } diff --git a/toolsrc/include/vcpkg/base/json.h b/toolsrc/include/vcpkg/base/json.h new file mode 100644 index 000000000..3da7ed776 --- /dev/null +++ b/toolsrc/include/vcpkg/base/json.h @@ -0,0 +1,257 @@ +#pragma once + +#include <vcpkg/base/expected.h> +#include <vcpkg/base/files.h> +#include <vcpkg/base/parse.h> +#include <vcpkg/base/stringview.h> + +#include <functional> +#include <stddef.h> +#include <stdint.h> +#include <string> +#include <utility> +#include <vector> + +namespace vcpkg::Json +{ + struct JsonStyle + { + enum class Newline + { + Lf, + CrLf + } newline_kind = Newline::Lf; + + constexpr JsonStyle() noexcept = default; + + static JsonStyle with_tabs() noexcept { return JsonStyle{-1}; } + static JsonStyle with_spaces(int indent) noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, indent >= 0); + return JsonStyle{indent}; + } + + void set_tabs() noexcept { this->indent = -1; } + void set_spaces(int indent_) noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, indent >= 0); + this->indent = indent_; + } + + bool use_tabs() const noexcept { return indent == -1; } + bool use_spaces() const noexcept { return indent >= 0; } + + int spaces() const noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, indent >= 0); + return indent; + } + + const char* newline() const noexcept + { + switch (this->newline_kind) + { + case Newline::Lf: return "\n"; + case Newline::CrLf: return "\r\n"; + default: Checks::exit_fail(VCPKG_LINE_INFO); + } + } + + private: + constexpr explicit JsonStyle(int indent) : indent(indent) { } + // -1 for tab, >=0 gives # of spaces + int indent = 2; + }; + + struct Array; + struct Object; + + enum class ValueKind + { + Null, + Boolean, + Number, + String, + Array, + Object + }; + + namespace impl + { + struct ValueImpl; + struct SyntaxErrorImpl; + } + + struct Value + { + ValueKind kind() const noexcept; + + bool is_null() const noexcept; + bool is_boolean() const noexcept; + bool is_number() const noexcept; + bool is_string() const noexcept; + bool is_array() const noexcept; + bool is_object() const noexcept; + + // a.x() asserts when !a.is_x() + bool boolean() const noexcept; + int64_t number() const noexcept; + StringView string() const noexcept; + + const Array& array() const noexcept; + Array& array() noexcept; + + const Object& object() const noexcept; + Object& object() noexcept; + + Value(Value&&) noexcept; + Value& operator=(Value&&) noexcept; + ~Value(); + + Value() noexcept; // equivalent to Value::null() + static Value null(std::nullptr_t) noexcept; + static Value boolean(bool) noexcept; + static Value number(int64_t i) noexcept; + static Value string(StringView) noexcept; + static Value array(Array&&) noexcept; + static Value object(Object&&) noexcept; + Value clone() const noexcept; + + private: + friend struct impl::ValueImpl; + std::unique_ptr<impl::ValueImpl> underlying_; + }; + + struct Array + { + private: + using underlying_t = std::vector<Value>; + + public: + using iterator = underlying_t::iterator; + using const_iterator = underlying_t::const_iterator; + + void push_back(Value&& value) { this->underlying_.push_back(std::move(value)); } + void insert_before(iterator it, Value&& value) { this->underlying_.insert(it, std::move(value)); } + + std::size_t size() const noexcept { return this->underlying_.size(); } + + // asserts idx < size + Value& operator[](std::size_t idx) noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, idx < this->size()); + return this->underlying_[idx]; + } + const Value& operator[](std::size_t idx) const noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, idx < this->size()); + return this->underlying_[idx]; + } + + void sort(const std::function<bool(const Value&, const Value&)>& lt) + { + std::sort(this->begin(), this->end(), std::ref(lt)); + } + + Array clone() const noexcept; + + iterator begin() { return underlying_.begin(); } + iterator end() { return underlying_.end(); } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + const_iterator cbegin() const { return underlying_.cbegin(); } + const_iterator cend() const { return underlying_.cend(); } + + private: + underlying_t underlying_; + }; + + struct Object + { + private: + using underlying_t = std::vector<std::pair<std::string, Value>>; + + underlying_t::const_iterator internal_find_key(StringView key) const noexcept; + + public: + // asserts if the key is found + void insert(std::string key, Value value) noexcept; + + // replaces the value if the key is found, otherwise inserts a new + // value. + void insert_or_replace(std::string key, Value value) noexcept; + + // returns whether the key existed + bool remove(StringView key) noexcept; + + // asserts on lookup failure + Value& operator[](StringView key) noexcept + { + auto res = this->get(key); + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, res); + return *res; + } + const Value& operator[](StringView key) const noexcept + { + auto res = this->get(key); + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, res); + return *res; + } + + Value* get(StringView key) noexcept; + const Value* get(StringView key) const noexcept; + + bool contains(StringView key) const noexcept { return this->get(key); } + + std::size_t size() const noexcept { return this->underlying_.size(); } + + void sort_keys(const std::function<bool(StringView, StringView)>& lt) noexcept; + + Object clone() const noexcept; + + struct const_iterator + { + using value_type = std::pair<StringView, const Value&>; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; + + value_type operator*() const noexcept { return *underlying_; } + const_iterator& operator++() noexcept + { + ++underlying_; + return *this; + } + const_iterator operator++(int) noexcept + { + auto res = *this; + ++underlying_; + return res; + } + + bool operator==(const_iterator other) const noexcept { return this->underlying_ == other.underlying_; } + bool operator!=(const_iterator other) const noexcept { return !(this->underlying_ == other.underlying_); } + + private: + friend struct Object; + explicit const_iterator(const underlying_t::const_iterator& it) : underlying_(it) { } + underlying_t::const_iterator underlying_; + }; + using iterator = const_iterator; + + const_iterator begin() const noexcept { return this->cbegin(); } + const_iterator end() const noexcept { return this->cend(); } + const_iterator cbegin() const noexcept { return const_iterator{this->underlying_.begin()}; } + const_iterator cend() const noexcept { return const_iterator{this->underlying_.end()}; } + + private: + underlying_t underlying_; + }; + + // currently, a hard assertion on file errors + ExpectedT<std::pair<Value, JsonStyle>, std::unique_ptr<Parse::IParseError>> parse_file( + const Files::Filesystem&, const fs::path&, std::error_code& ec) noexcept; + ExpectedT<std::pair<Value, JsonStyle>, std::unique_ptr<Parse::IParseError>> parse( + StringView text, const fs::path& filepath = "") noexcept; + std::string stringify(const Value&, JsonStyle style) noexcept; + +} diff --git a/toolsrc/include/vcpkg/parse.h b/toolsrc/include/vcpkg/base/parse.h index 539bb2ed7..c5044865e 100644 --- a/toolsrc/include/vcpkg/parse.h +++ b/toolsrc/include/vcpkg/base/parse.h @@ -3,6 +3,7 @@ #include <vcpkg/base/cstringview.h> #include <vcpkg/base/optional.h> #include <vcpkg/base/stringview.h> +#include <vcpkg/base/unicode.h> #include <vcpkg/textrowcol.h> #include <memory> @@ -42,41 +43,31 @@ namespace vcpkg::Parse { struct SourceLoc { - const char* it; + Unicode::Utf8Decoder it; + Unicode::Utf8Decoder start_of_line; int row; int column; }; - void init(CStringView text, CStringView origin, TextRowCol init_rowcol = {}) - { - m_text = text; - m_origin = origin; - m_it = text.c_str(); - row = init_rowcol.row ? init_rowcol.row : 1; - column = init_rowcol.column ? init_rowcol.column : 1; - } + ParserBase(StringView text, StringView origin, TextRowCol init_rowcol = {}); - static constexpr bool is_whitespace(char ch) { return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; } - static constexpr bool is_lower_alpha(char ch) { return ch >= 'a' && ch <= 'z'; } - static constexpr bool is_upper_alpha(char ch) { return ch >= 'A' && ch <= 'Z'; } - static constexpr bool is_ascii_digit(char ch) { return ch >= '0' && ch <= '9'; } - static constexpr bool is_lineend(char ch) { return ch == '\r' || ch == '\n' || ch == '\0'; } - static constexpr bool is_alphanum(char ch) + static constexpr bool is_whitespace(char32_t ch) { return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; } + static constexpr bool is_lower_alpha(char32_t ch) { return ch >= 'a' && ch <= 'z'; } + static constexpr bool is_upper_alpha(char32_t ch) { return ch >= 'A' && ch <= 'Z'; } + static constexpr bool is_ascii_digit(char32_t ch) { return ch >= '0' && ch <= '9'; } + static constexpr bool is_lineend(char32_t ch) { return ch == '\r' || ch == '\n' || ch == Unicode::end_of_file; } + static constexpr bool is_alphanum(char32_t ch) { return is_upper_alpha(ch) || is_lower_alpha(ch) || is_ascii_digit(ch); } - static constexpr bool is_alphanumdash(char ch) { return is_alphanum(ch) || ch == '-'; } + static constexpr bool is_alphanumdash(char32_t ch) { return is_alphanum(ch) || ch == '-'; } StringView skip_whitespace() { return match_zero_or_more(is_whitespace); } StringView skip_tabs_spaces() { - return match_zero_or_more([](char ch) { return ch == ' ' || ch == '\t'; }); - } - void skip_to_eof() - { - while (cur()) - ++m_it; + return match_zero_or_more([](char32_t ch) { return ch == ' ' || ch == '\t'; }); } + void skip_to_eof() { m_it = m_it.end(); } void skip_newline() { if (cur() == '\r') next(); @@ -91,29 +82,29 @@ namespace vcpkg::Parse template<class Pred> StringView match_zero_or_more(Pred p) { - const char* start = m_it; + const char* start = m_it.pointer_to_current(); auto ch = cur(); - while (ch != '\0' && p(ch)) + while (ch != Unicode::end_of_file && p(ch)) ch = next(); - return {start, m_it}; + return {start, m_it.pointer_to_current()}; } template<class Pred> StringView match_until(Pred p) { - const char* start = m_it; + const char* start = m_it.pointer_to_current(); auto ch = cur(); - while (ch != '\0' && !p(ch)) + while (ch != Unicode::end_of_file && !p(ch)) ch = next(); - return {start, m_it}; + return {start, m_it.pointer_to_current()}; } - CStringView text() const { return m_text; } - const char* it() const { return m_it; } - char cur() const { return *m_it; } - SourceLoc cur_loc() const { return {m_it, row, column}; } - TextRowCol cur_rowcol() const { return {row, column}; } - char next(); - bool at_eof() const { return *m_it == 0; } + StringView text() const { return m_text; } + Unicode::Utf8Decoder it() const { return m_it; } + char32_t cur() const { return m_it == m_it.end() ? Unicode::end_of_file : *m_it; } + SourceLoc cur_loc() const { return {m_it, m_start_of_line, m_row, m_column}; } + TextRowCol cur_rowcol() const { return {m_row, m_column}; } + char32_t next(); + bool at_eof() const { return m_it == m_it.end(); } void add_error(std::string message) { add_error(std::move(message), cur_loc()); } void add_error(std::string message, const SourceLoc& loc); @@ -122,12 +113,13 @@ namespace vcpkg::Parse std::unique_ptr<Parse::IParseError> extract_error() { return std::move(m_err); } private: - const char* m_it; - int row; - int column; + Unicode::Utf8Decoder m_it; + Unicode::Utf8Decoder m_start_of_line; + int m_row; + int m_column; - CStringView m_text; - CStringView m_origin; + StringView m_text; + StringView m_origin; std::unique_ptr<IParseError> m_err; }; diff --git a/toolsrc/include/vcpkg/base/stringview.h b/toolsrc/include/vcpkg/base/stringview.h index 8503f5f10..1bb8fba6b 100644 --- a/toolsrc/include/vcpkg/base/stringview.h +++ b/toolsrc/include/vcpkg/base/stringview.h @@ -23,8 +23,10 @@ namespace vcpkg constexpr StringView() = default; StringView(const std::string& s); // Implicit by design + + // NOTE: we do this instead of the delegating constructor since delegating ctors are a perf footgun template<size_t Sz> - StringView(const char (&arr)[Sz]) : m_ptr(arr), m_size(Sz - 1) + constexpr StringView(const char (&arr)[Sz]) : m_ptr(arr), m_size(Sz - 1) { } diff --git a/toolsrc/include/vcpkg/base/unicode.h b/toolsrc/include/vcpkg/base/unicode.h new file mode 100644 index 000000000..c2143a235 --- /dev/null +++ b/toolsrc/include/vcpkg/base/unicode.h @@ -0,0 +1,148 @@ +#pragma once + +#include <stddef.h> + +namespace vcpkg::Unicode +{ + enum class Utf8CodeUnitKind + { + Invalid = -1, + Continue = 0, + StartOne = 1, + StartTwo = 2, + StartThree = 3, + StartFour = 4, + }; + + Utf8CodeUnitKind utf8_code_unit_kind(unsigned char code_unit) noexcept; + int utf8_code_unit_count(Utf8CodeUnitKind kind) noexcept; + int utf8_code_unit_count(char code_unit) noexcept; + + int utf8_encode_code_point(char (&array)[4], char32_t code_point) noexcept; + + template<class String> + String& utf8_append_code_point(String& str, char32_t code_point) + { + char buf[4] = {}; + int count = ::vcpkg::Unicode::utf8_encode_code_point(buf, code_point); + str.append(buf, buf + count); + return str; + } + + bool utf8_is_valid_string(const char* first, const char* last) noexcept; + + constexpr bool utf16_is_leading_surrogate_code_point(char32_t code_point) + { + return code_point >= 0xD800 && code_point < 0xDC00; + } + constexpr bool utf16_is_trailing_surrogate_code_point(char32_t code_point) + { + return code_point >= 0xDC00 && code_point < 0xE000; + } + constexpr bool utf16_is_surrogate_code_point(char32_t code_point) + { + return code_point >= 0xD800 && code_point < 0xE000; + } + + char32_t utf16_surrogates_to_code_point(char32_t leading, char32_t trailing); + + constexpr static char32_t end_of_file = 0xFFFF'FFFF; + + enum class utf8_errc + { + NoError = 0, + InvalidCodeUnit = 1, + InvalidCodePoint = 2, + PairedSurrogates = 3, + UnexpectedContinue = 4, + UnexpectedStart = 5, + UnexpectedEof = 6, + }; + + struct utf8_category : std::error_category + { + const char* name() const noexcept override; + std::string message(int condition) const override; + }; + + inline std::error_code make_error_code(utf8_errc err) noexcept + { + return std::error_code(static_cast<int>(err), utf8_category()); + } + + /* + There are two ways to parse utf-8: we could allow unpaired surrogates (as in [wtf-8]) -- this is important + for representing things like file paths on Windows. We could also require strict utf-8, as in the JSON + specification. We need both, since when parsing JSON, we need to require strict utf-8; however, when + outputting JSON, we need to be able to stringify unpaired surrogates (as '\uDxyz'). This dichotomy is an + issue _because_ we need to be able to decode two different kinds of utf-8: utf-8 as read off of a disk + (strict), and utf-8 as contained in a C++ string (non-strict). + + Since one is a strict superset of the other, we allow the non-strict utf-8 in this decoder; if a consumer + wishes to make certain that the utf-8 is strictly conforming, it will have to do the check on it's own with + `utf16_is_surrogate_code_point`. + + [wtf-8]: https://simonsapin.github.io/wtf-8/ + */ + struct Utf8Decoder + { + Utf8Decoder() noexcept; + Utf8Decoder(const char* first, const char* last) noexcept; + + struct sentinel + { + }; + + bool is_eof() const noexcept; + + void next(std::error_code& ec); + + Utf8Decoder& operator=(sentinel) noexcept; + + char const* pointer_to_current() const noexcept; + + char32_t operator*() const noexcept; + + Utf8Decoder& operator++() noexcept; + Utf8Decoder operator++(int) noexcept + { + auto res = *this; + ++*this; + return res; + } + + Utf8Decoder begin() const { return *this; } + + sentinel end() const { return sentinel(); } + + friend bool operator==(const Utf8Decoder& lhs, const Utf8Decoder& rhs) noexcept; + + using difference_type = std::ptrdiff_t; + using value_type = char32_t; + using pointer = void; + using reference = char32_t; + using iterator_category = std::forward_iterator_tag; + + private: + char32_t current_; + const char* next_; + const char* last_; + }; + + inline bool operator!=(const Utf8Decoder& lhs, const Utf8Decoder& rhs) noexcept { return !(lhs == rhs); } + + inline bool operator==(const Utf8Decoder& d, Utf8Decoder::sentinel) { return d.is_eof(); } + inline bool operator==(Utf8Decoder::sentinel s, const Utf8Decoder& d) { return d == s; } + inline bool operator!=(const Utf8Decoder& d, Utf8Decoder::sentinel) { return !d.is_eof(); } + inline bool operator!=(Utf8Decoder::sentinel s, const Utf8Decoder& d) { return d != s; } + +} + +namespace std +{ + template<> + struct is_error_code_enum<vcpkg::Unicode::utf8_errc> : std::true_type + { + }; + +} diff --git a/toolsrc/include/vcpkg/packagespec.h b/toolsrc/include/vcpkg/packagespec.h index a37dc99b3..9c90f3fa6 100644 --- a/toolsrc/include/vcpkg/packagespec.h +++ b/toolsrc/include/vcpkg/packagespec.h @@ -139,7 +139,7 @@ namespace vcpkg Optional<std::string> parse_feature_name(Parse::ParserBase& parser); Optional<std::string> parse_package_name(Parse::ParserBase& parser); - ExpectedS<ParsedQualifiedSpecifier> parse_qualified_specifier(CStringView input); + ExpectedS<ParsedQualifiedSpecifier> parse_qualified_specifier(StringView input); Optional<ParsedQualifiedSpecifier> parse_qualified_specifier(Parse::ParserBase& parser); bool operator==(const PackageSpec& left, const PackageSpec& right); diff --git a/toolsrc/include/vcpkg/paragraphparser.h b/toolsrc/include/vcpkg/paragraphparser.h index cc4637402..699838fbd 100644 --- a/toolsrc/include/vcpkg/paragraphparser.h +++ b/toolsrc/include/vcpkg/paragraphparser.h @@ -40,12 +40,12 @@ namespace vcpkg::Parse }; ExpectedS<std::vector<std::string>> parse_default_features_list(const std::string& str, - CStringView origin = "<unknown>", + StringView origin = "<unknown>", TextRowCol textrowcol = {}); ExpectedS<std::vector<ParsedQualifiedSpecifier>> parse_qualified_specifier_list(const std::string& str, - CStringView origin = "<unknown>", + StringView origin = "<unknown>", TextRowCol textrowcol = {}); ExpectedS<std::vector<Dependency>> parse_dependencies_list(const std::string& str, - CStringView origin = "<unknown>", + StringView origin = "<unknown>", TextRowCol textrowcol = {}); } diff --git a/toolsrc/include/vcpkg/pragmas.h b/toolsrc/include/vcpkg/pragmas.h index fa1039bce..69b958fff 100644 --- a/toolsrc/include/vcpkg/pragmas.h +++ b/toolsrc/include/vcpkg/pragmas.h @@ -5,6 +5,11 @@ #pragma warning(disable : 5030) #endif +#if defined(_MSC_VER) && _MSC_VER < 1910 +// https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4800?view=vs-2019 +#pragma warning(disable : 4800) +#endif + #if defined(__GNUC__) && __GNUC__ < 7 // [[nodiscard]] is not recognized before GCC version 7 #pragma GCC diagnostic ignored "-Wattributes" diff --git a/toolsrc/include/vcpkg/textrowcol.h b/toolsrc/include/vcpkg/textrowcol.h index 90c50d887..bf6f31d7c 100644 --- a/toolsrc/include/vcpkg/textrowcol.h +++ b/toolsrc/include/vcpkg/textrowcol.h @@ -5,10 +5,13 @@ namespace vcpkg::Parse struct TextRowCol
{
constexpr TextRowCol() noexcept = default;
- constexpr TextRowCol(int row, int column) noexcept : row(row), column(column) {}
+ constexpr TextRowCol(int row, int column) noexcept : row(row), column(column) { }
/// '0' indicates uninitialized; '1' is the first row.
int row = 0;
/// '0' indicates uninitialized; '1' is the first column.
int column = 0;
+
+ constexpr int row_or(int def) const noexcept { return row ? row : def; }
+ constexpr int column_or(int def) const noexcept { return column ? column : def; }
};
}
|
