diff options
| author | nicole mazzuca <mazzucan@outlook.com> | 2020-04-17 18:16:20 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-04-17 18:16:20 -0700 |
| commit | 09af1e9b55fdb79ef5aa04de3f1710759b2de990 (patch) | |
| tree | fc151fb148667d5cf10bf3dcd8e26b5d04cc6d16 /toolsrc/src | |
| parent | 556325a1f7b6049d91565257c00db2f0bf1eadc5 (diff) | |
| download | vcpkg-09af1e9b55fdb79ef5aa04de3f1710759b2de990.tar.gz vcpkg-09af1e9b55fdb79ef5aa04de3f1710759b2de990.zip | |
[vcpkg] Add initial JSON support (#10521)
* [vcpkg] Add initial JSON support
This adds a JSON parser, as well as the amount of unicode support
required for JSON parsing to work according to the specification. In the
future, I hope to rewrite our existing XML files into JSON.
Additionally, as a drive-by, we've added the following:
* add /wd4800 to pragmas.h -- this is a "performance warning", for when
you implicitly convert pointers or integers to bool, and shouldn't be
an issue for us.
* Switched Parse::ParserBase to read unicode (as utf-8), as opposed to
ASCII
* Building again under VCPKG_DEVELOPMENT_WARNINGS, yay!
Diffstat (limited to 'toolsrc/src')
| -rw-r--r-- | toolsrc/src/vcpkg-fuzz/main.cpp | 144 | ||||
| -rw-r--r-- | toolsrc/src/vcpkg-test/json.cpp | 159 | ||||
| -rw-r--r-- | toolsrc/src/vcpkg-test/large-json-document.json.inc | 516 | ||||
| -rw-r--r-- | toolsrc/src/vcpkg/base/json.cpp | 988 | ||||
| -rw-r--r-- | toolsrc/src/vcpkg/base/parse.cpp (renamed from toolsrc/src/vcpkg/parse.cpp) | 96 | ||||
| -rw-r--r-- | toolsrc/src/vcpkg/base/unicode.cpp | 285 | ||||
| -rw-r--r-- | toolsrc/src/vcpkg/export.prefab.cpp | 2 | ||||
| -rw-r--r-- | toolsrc/src/vcpkg/logicexpression.cpp | 8 | ||||
| -rw-r--r-- | toolsrc/src/vcpkg/packagespec.cpp | 15 | ||||
| -rw-r--r-- | toolsrc/src/vcpkg/paragraphs.cpp | 14 |
10 files changed, 2164 insertions, 63 deletions
diff --git a/toolsrc/src/vcpkg-fuzz/main.cpp b/toolsrc/src/vcpkg-fuzz/main.cpp new file mode 100644 index 000000000..881577654 --- /dev/null +++ b/toolsrc/src/vcpkg-fuzz/main.cpp @@ -0,0 +1,144 @@ +#include <vcpkg/base/checks.h> +#include <vcpkg/base/json.h> +#include <vcpkg/base/stringview.h> +#include <vcpkg/base/system.print.h> + +#include <iostream> +#include <sstream> +#include <string.h> +#include <utility> + +using namespace vcpkg; + +namespace +{ + enum class FuzzKind + { + None, + Utf8Decoder, + JsonParser, + }; + + struct FuzzArgs + { + FuzzArgs(int argc, char** argv) + { + if (argc <= 1) + { + print_help_and_exit(); + } + + char** it = argv + 1; // skip the name of the program + char** last = argv + argc; + + for (; it != last; ++it) + { + auto arg = StringView(*it, strlen(*it)); + if (arg == "/?") + { + print_help_and_exit(); + } + + auto pr = split_arg(arg); + auto key = pr.first; + auto value = pr.second; + if (key == "h" || key == "help") + { + print_help_and_exit(); + } + + if (key == "kind") + { + if (value == "json") + { + kind = FuzzKind::JsonParser; + } + else if (value == "utf-8") + { + kind = FuzzKind::Utf8Decoder; + } + else + { + System::print2(System::Color::error, "Invalid kind: ", value, "\n"); + System::print2(System::Color::error, " Expected one of: utf-8, json\n\n"); + print_help_and_exit(true); + } + } + else + { + System::print2("Unknown option: ", key, "\n\n"); + print_help_and_exit(true); + } + } + } + + // returns {arg, ""} when there isn't an `=` + // skips preceding `-`s + std::pair<StringView, StringView> split_arg(StringView arg) + { + auto first = std::find_if(arg.begin(), arg.end(), [](char c) { return c != '-'; }); + auto division = std::find(first, arg.end(), '='); + if (division == arg.end()) { + return {StringView(first, arg.end()), StringView(arg.end(), arg.end())}; + } else { + return {StringView(first, division), StringView(division + 1, arg.end())}; + } + } + + [[noreturn]] void print_help_and_exit(bool invalid = false) + { + constexpr auto help = + R"( +Usage: vcpkg-fuzz <options> + +Accepts input on stdin. + +Options: + --kind=... One of {utf-8, json} +)"; + + auto color = invalid ? System::Color::error : System::Color::success; + + System::print2(color, help); + if (invalid) + { + Checks::exit_fail(VCPKG_LINE_INFO); + } + else + { + Checks::exit_success(VCPKG_LINE_INFO); + } + } + + FuzzKind kind; + }; + + std::string read_all_of_stdin() + { + std::stringstream ss; + ss << std::cin.rdbuf(); + return std::move(ss).str(); + } + +} + +int main(int argc, char** argv) +{ + auto args = FuzzArgs(argc, argv); + + if (args.kind == FuzzKind::None) + { + args.print_help_and_exit(true); + } + + auto text = read_all_of_stdin(); + auto res = Json::parse(text); + if (!res) + { + System::print2(System::Color::error, res.error()->format()); + } + else + { + System::print2(System::Color::success, "success!"); + } +} diff --git a/toolsrc/src/vcpkg-test/json.cpp b/toolsrc/src/vcpkg-test/json.cpp new file mode 100644 index 000000000..09f5d98fc --- /dev/null +++ b/toolsrc/src/vcpkg-test/json.cpp @@ -0,0 +1,159 @@ +#include <catch2/catch.hpp> + +#include <iostream> +#include <vcpkg/base/json.h> +#include <vcpkg/base/unicode.h> + +// TODO: remove this once we switch to C++20 completely +// This is the worst, but we also can't really deal with it any other way. +#if __cpp_char8_t +template<size_t Sz> +static auto _u8_string_to_char_string(const char8_t (&literal)[Sz]) -> const char (&)[Sz] +{ + return reinterpret_cast<const char(&)[Sz]>(literal); +} + +#define U8_STR(s) (::vcpkg::Unicode::_u8_string_to_char_string(u8"" s)) +#else +#define U8_STR(s) (u8"" s) +#endif + +namespace Json = vcpkg::Json; +using Json::Value; + +static std::string mystringify(const Value& val) { return Json::stringify(val, Json::JsonStyle{}); } + +TEST_CASE ("JSON stringify weird strings", "[json]") +{ + vcpkg::StringView str = U8_STR("😀 😁 😂 🤣 😃 😄 😅 😆 😉"); + REQUIRE(mystringify(Value::string(str)) == ('"' + str.to_string() + '"')); + REQUIRE(mystringify(Value::string("\xED\xA0\x80")) == "\"\\ud800\""); // unpaired surrogate +} + +TEST_CASE ("JSON parse keywords", "[json]") +{ + auto res = Json::parse("true"); + REQUIRE(res); + REQUIRE(res.get()->first.is_boolean()); + REQUIRE(res.get()->first.boolean()); + res = Json::parse(" false "); + REQUIRE(res); + REQUIRE(res.get()->first.is_boolean()); + REQUIRE(!res.get()->first.boolean()); + res = Json::parse(" null\t "); + REQUIRE(res); + REQUIRE(res.get()->first.is_null()); +} + +TEST_CASE ("JSON parse strings", "[json]") +{ + auto res = Json::parse(R"("")"); + REQUIRE(res); + REQUIRE(res.get()->first.is_string()); + REQUIRE(res.get()->first.string().size() == 0); + + res = Json::parse(R"("\ud800")"); // unpaired surrogate + REQUIRE(res); + REQUIRE(res.get()->first.is_string()); + REQUIRE(res.get()->first.string() == "\xED\xA0\x80"); + + const auto make_json_string = [] (vcpkg::StringView sv) { + return '"' + sv.to_string() + '"'; + }; + const vcpkg::StringView radical = U8_STR("⎷"); + const vcpkg::StringView grin = U8_STR("😁"); + + res = Json::parse(R"("\uD83D\uDE01")"); // paired surrogates for grin + REQUIRE(res); + REQUIRE(res.get()->first.is_string()); + REQUIRE(res.get()->first.string() == grin.to_string()); + + res = Json::parse(make_json_string(radical)); // character in BMP + REQUIRE(res); + REQUIRE(res.get()->first.is_string()); + REQUIRE(res.get()->first.string() == radical); + + res = Json::parse(make_json_string(grin)); // character above BMP + REQUIRE(res); + REQUIRE(res.get()->first.is_string()); + REQUIRE(res.get()->first.string() == grin); +} + +TEST_CASE ("JSON parse numbers", "[json]") +{ + auto res = Json::parse("0"); + REQUIRE(res); + REQUIRE(res.get()->first.is_number()); + REQUIRE(res.get()->first.number() == 0); + res = Json::parse("12345"); + REQUIRE(res); + REQUIRE(res.get()->first.is_number()); + REQUIRE(res.get()->first.number() == 12345); + res = Json::parse("-12345"); + REQUIRE(res); + REQUIRE(res.get()->first.is_number()); + REQUIRE(res.get()->first.number() == -12345); + res = Json::parse("9223372036854775807"); // INT64_MAX + REQUIRE(res); + REQUIRE(res.get()->first.is_number()); + REQUIRE(res.get()->first.number() == 9223372036854775807); + res = Json::parse("-9223372036854775808"); + REQUIRE(res); + REQUIRE(res.get()->first.is_number()); + REQUIRE(res.get()->first.number() == (-9223372036854775807 - 1)); // INT64_MIN (C++'s parser is fun) +} + +TEST_CASE ("JSON parse arrays", "[json]") +{ + auto res = Json::parse("[]"); + REQUIRE(res); + auto val = std::move(res.get()->first); + REQUIRE(val.is_array()); + REQUIRE(val.array().size() == 0); + + res = Json::parse("[123]"); + REQUIRE(res); + val = std::move(res.get()->first); + REQUIRE(val.is_array()); + REQUIRE(val.array().size() == 1); + REQUIRE(val.array()[0].is_number()); + REQUIRE(val.array()[0].number() == 123); + + res = Json::parse("[123, 456]"); + REQUIRE(res); + val = std::move(res.get()->first); + REQUIRE(val.is_array()); + REQUIRE(val.array().size() == 2); + REQUIRE(val.array()[0].is_number()); + REQUIRE(val.array()[0].number() == 123); + REQUIRE(val.array()[1].is_number()); + REQUIRE(val.array()[1].number() == 456); + + res = Json::parse("[123, 456, [null]]"); + REQUIRE(res); + val = std::move(res.get()->first); + REQUIRE(val.is_array()); + REQUIRE(val.array().size() == 3); + REQUIRE(val.array()[2].is_array()); + REQUIRE(val.array()[2].array().size() == 1); + REQUIRE(val.array()[2].array()[0].is_null()); +} + +TEST_CASE ("JSON parse objects", "[json]") +{ + auto res = Json::parse("{}"); + REQUIRE(res); + auto val = std::move(res.get()->first); + REQUIRE(val.is_object()); + REQUIRE(val.object().size() == 0); +} + +TEST_CASE ("JSON parse full file", "[json]") +{ + vcpkg::StringView json = +#include "large-json-document.json.inc" + ; + + auto res = Json::parse(json); + REQUIRE(res); +} diff --git a/toolsrc/src/vcpkg-test/large-json-document.json.inc b/toolsrc/src/vcpkg-test/large-json-document.json.inc new file mode 100644 index 000000000..b692d23ee --- /dev/null +++ b/toolsrc/src/vcpkg-test/large-json-document.json.inc @@ -0,0 +1,516 @@ +// randomly generated by json-generator.com +R"json([ + { + "_id": "5e7a86c71cf688019f4c4b9f", + "index": 0, + "guid": "0e3c8a89-9960-4d87-8634-f9d3fcdaf735", + "isActive": false, + "balance": "$2,473.46", + "picture": "http://placehold.it/32x32", + "age": 21, + "eyeColor": "blue", + "name": { + "first": "Dana", + "last": "Romero" + }, + "company": "NETPLODE", + "email": "dana.romero@netplode.info", + "phone": "+1 (981) 479-2769", + "address": "501 Cass Place, Strykersville, Missouri, 8329", + "about": "Incididunt in fugiat ipsum proident aliqua voluptate Lorem nostrud laboris ut velit minim. Dolor culpa magna tempor cupidatat. Id esse quis ipsum incididunt. Aliqua deserunt aliquip esse minim cillum esse aliquip veniam non labore dolor incididunt. Sint ea consectetur exercitation aliqua proident reprehenderit tempor ea eu. Amet ipsum labore magna elit.", + "registered": "Wednesday, September 4, 2019 8:29 AM", + "latitude": "-49.844379", + "longitude": "-5.565357", + "tags": [ + "dolor", + "fugiat", + "ea", + "nisi", + "non" + ], + "range": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "friends": [ + { + "id": 0, + "name": "Elena Suarez" + }, + { + "id": 1, + "name": "Pruitt Leach" + }, + { + "id": 2, + "name": "Pugh Robinson" + } + ], + "greeting": "Hello, Dana! You have 8 unread messages.", + "favoriteFruit": "apple" + }, + { + "_id": "5e7a86c70efbf62ab5579408", + "index": 1, + "guid": "2c64c2d3-a830-4598-a399-f68f798ba6dc", + "isActive": true, + "balance": "$1,838.58", + "picture": "http://placehold.it/32x32", + "age": 33, + "eyeColor": "brown", + "name": { + "first": "Ladonna", + "last": "Willis" + }, + "company": "JOVIOLD", + "email": "ladonna.willis@joviold.us", + "phone": "+1 (921) 591-2296", + "address": "441 Highland Place, Leyner, Pennsylvania, 1788", + "about": "Consequat deserunt nisi sit ex occaecat. Magna pariatur irure nisi duis laborum proident ipsum duis. Tempor qui consectetur consequat sunt proident ex ad id sint cupidatat sint.", + "registered": "Wednesday, January 13, 2016 6:03 PM", + "latitude": "-62.130182", + "longitude": "-102.884995", + "tags": [ + "fugiat", + "ipsum", + "ut", + "pariatur", + "enim" + ], + "range": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "friends": [ + { + "id": 0, + "name": "Gamble Rose" + }, + { + "id": 1, + "name": "Olive Horn" + }, + { + "id": 2, + "name": "Benita Ochoa" + } + ], + "greeting": "Hello, Ladonna! You have 6 unread messages.", + "favoriteFruit": "banana" + }, + { + "_id": "5e7a86c71e30c95bbb0ff386", + "index": 2, + "guid": "04e45222-d785-461b-99be-b330585eb1a1", + "isActive": true, + "balance": "$3,591.60", + "picture": "http://placehold.it/32x32", + "age": 32, + "eyeColor": "brown", + "name": { + "first": "Lee", + "last": "Buckley" + }, + "company": "TELEQUIET", + "email": "lee.buckley@telequiet.biz", + "phone": "+1 (897) 511-2132", + "address": "675 Argyle Road, Kempton, Ohio, 4411", + "about": "Sunt aliquip excepteur veniam fugiat consequat commodo ex est nulla laboris cillum enim. Laboris cupidatat et ipsum anim reprehenderit officia officia aute aliqua tempor. Incididunt sunt cupidatat mollit deserunt id nisi esse elit nisi est eiusmod aliquip. Lorem cillum ipsum quis aliquip laboris ex minim eu quis. Dolore incididunt officia labore enim Lorem in occaecat aliquip. Mollit ad duis non non qui et Lorem cillum.", + "registered": "Tuesday, January 16, 2018 3:14 PM", + "latitude": "-51.283144", + "longitude": "-112.569722", + "tags": [ + "id", + "veniam", + "dolor", + "nulla", + "pariatur" + ], + "range": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "friends": [ + { + "id": 0, + "name": "Dina Craft" + }, + { + "id": 1, + "name": "Ashlee Ferrell" + }, + { + "id": 2, + "name": "Mcbride Gill" + } + ], + "greeting": "Hello, Lee! You have 7 unread messages.", + "favoriteFruit": "banana" + }, + { + "_id": "5e7a86c70e08bf0c278749f9", + "index": 3, + "guid": "0928ccac-e028-405a-a614-76628ba131b4", + "isActive": false, + "balance": "$2,867.88", + "picture": "http://placehold.it/32x32", + "age": 26, + "eyeColor": "green", + "name": { + "first": "Chen", + "last": "Rosa" + }, + "company": "EXPOSA", + "email": "chen.rosa@exposa.me", + "phone": "+1 (956) 519-3064", + "address": "239 Amersfort Place, Fillmore, South Carolina, 2443", + "about": "Est ipsum cillum proident veniam voluptate enim sit eu excepteur veniam sit. Sunt aliqua qui incididunt id irure nulla qui. Et consequat ad anim proident minim dolor quis aliquip Lorem qui fugiat voluptate ex.", + "registered": "Wednesday, April 9, 2014 12:45 PM", + "latitude": "-15.222992", + "longitude": "76.730424", + "tags": [ + "eiusmod", + "sit", + "do", + "aute", + "ea" + ], + "range": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "friends": [ + { + "id": 0, + "name": "Huff Townsend" + }, + { + "id": 1, + "name": "Stacie Downs" + }, + { + "id": 2, + "name": "Liza Barron" + } + ], + "greeting": "Hello, Chen! You have 8 unread messages.", + "favoriteFruit": "banana" + }, + { + "_id": "5e7a86c7086fc15efc387abc", + "index": 4, + "guid": "c71e62aa-8428-44cd-a2a5-7c30aaf963fe", + "isActive": true, + "balance": "$3,022.64", + "picture": "http://placehold.it/32x32", + "age": 32, + "eyeColor": "blue", + "name": { + "first": "Walton", + "last": "Mendez" + }, + "company": "BUZZNESS", + "email": "walton.mendez@buzzness.tv", + "phone": "+1 (849) 560-2058", + "address": "507 Colonial Court, Collins, New York, 7941", + "about": "Et nisi in excepteur velit non incididunt sit. Consectetur magna sunt dolor eu Lorem adipisicing incididunt laborum consequat proident. Laboris ut dolor laboris esse ut dolor adipisicing ad fugiat commodo fugiat incididunt pariatur anim. Amet reprehenderit aute fugiat incididunt irure eu duis sint amet aliquip excepteur tempor dolore. Anim reprehenderit commodo irure sint et tempor occaecat fugiat ex commodo consectetur. Id ex dolor et culpa mollit. Voluptate magna est ex proident deserunt ullamco enim quis nulla cupidatat voluptate culpa exercitation Lorem.", + "registered": "Tuesday, May 9, 2017 2:38 PM", + "latitude": "86.083203", + "longitude": "57.386268", + "tags": [ + "Lorem", + "aute", + "proident", + "eu", + "incididunt" + ], + "range": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "friends": [ + { + "id": 0, + "name": "Byers Sims" + }, + { + "id": 1, + "name": "Suzanne Gonzalez" + }, + { + "id": 2, + "name": "Vicki Velasquez" + } + ], + "greeting": "Hello, Walton! You have 5 unread messages.", + "favoriteFruit": "banana" + }, + { + "_id": "5e7a86c7dc9b82ffcb2868a2", + "index": 5, + "guid": "76f1c1cc-9164-43df-8858-633ee696df1c", + "isActive": true, + "balance": "$2,625.28", + "picture": "http://placehold.it/32x32", + "age": 40, + "eyeColor": "green", + "name": { + "first": "Wise", + "last": "Head" + }, + "company": "FARMEX", + "email": "wise.head@farmex.com", + "phone": "+1 (850) 478-3280", + "address": "425 Kent Street, Witmer, New Jersey, 2411", + "about": "Velit amet fugiat enim occaecat do. Nulla sint officia anim ullamco. Ea quis excepteur excepteur enim ullamco. Amet aliqua mollit ad excepteur minim voluptate in velit sunt elit duis quis consequat nulla. Est dolor quis culpa aute id occaecat adipisicing mollit do consectetur fugiat. Mollit elit ex nostrud pariatur. Deserunt proident et voluptate occaecat labore occaecat Lorem exercitation est minim magna.", + "registered": "Monday, March 23, 2015 10:14 PM", + "latitude": "29.880281", + "longitude": "126.094567", + "tags": [ + "enim", + "sunt", + "cupidatat", + "officia", + "aute" + ], + "range": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "friends": [ + { + "id": 0, + "name": "Fuentes Tyler" + }, + { + "id": 1, + "name": "Flora Massey" + }, + { + "id": 2, + "name": "Manuela Parks" + } + ], + "greeting": "Hello, Wise! You have 8 unread messages.", + "favoriteFruit": "strawberry" + }, + { + "_id": "5e7a86c7b3605b4ab198b25f", + "index": 6, + "guid": "818a0d46-9595-4066-a819-c93979220983", + "isActive": true, + "balance": "$3,193.77", + "picture": "http://placehold.it/32x32", + "age": 30, + "eyeColor": "brown", + "name": { + "first": "Baldwin", + "last": "Mcguire" + }, + "company": "ZILCH", + "email": "baldwin.mcguire@zilch.io", + "phone": "+1 (803) 562-3968", + "address": "273 Homecrest Court, Caron, Virgin Islands, 7930", + "about": "Nostrud exercitation Lorem reprehenderit commodo aliquip. Exercitation exercitation proident aliquip et do cillum id ad ad reprehenderit ipsum elit nostrud. Occaecat velit sit commodo aliquip esse.", + "registered": "Saturday, January 14, 2017 3:13 AM", + "latitude": "-38.674801", + "longitude": "78.160951", + "tags": [ + "reprehenderit", + "eu", + "magna", + "nulla", + "non" + ], + "range": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "friends": [ + { + "id": 0, + "name": "Austin Gonzales" + }, + { + "id": 1, + "name": "Polly Mcknight" + }, + { + "id": 2, + "name": "Lucy Wagner" + } + ], + "greeting": "Hello, Baldwin! You have 7 unread messages.", + "favoriteFruit": "banana" + }, + { + "_id": "5e7a86c700d93af1dcbe69f3", + "index": 7, + "guid": "09dc8fc1-207f-45f2-9d2c-f1e70e278e9a", + "isActive": true, + "balance": "$3,895.90", + "picture": "http://placehold.it/32x32", + "age": 21, + "eyeColor": "brown", + "name": { + "first": "Key", + "last": "Bolton" + }, + "company": "MAGNINA", + "email": "key.bolton@magnina.net", + "phone": "+1 (918) 466-2785", + "address": "139 Blake Court, Chautauqua, Georgia, 3570", + "about": "Cupidatat excepteur reprehenderit eiusmod aute ea commodo ipsum pariatur dolore veniam adipisicing dolor. Excepteur ex in laborum cupidatat cillum cillum qui dolore consequat excepteur. Lorem deserunt eiusmod esse proident et ullamco reprehenderit ad ea. Cupidatat veniam deserunt magna eu labore ipsum et officia officia irure non eiusmod.", + "registered": "Wednesday, October 5, 2016 8:28 AM", + "latitude": "89.82294", + "longitude": "45.807834", + "tags": [ + "et", + "excepteur", + "sunt", + "ea", + "irure" + ], + "range": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "friends": [ + { + "id": 0, + "name": "Farmer Adkins" + }, + { + "id": 1, + "name": "Summers Huffman" + }, + { + "id": 2, + "name": "Lessie Holden" + } + ], + "greeting": "Hello, Key! You have 9 unread messages.", + "favoriteFruit": "strawberry" + }, + { + "_id": "5e7a86c7a3fcbd25660a493d", + "index": 8, + "guid": "7d58a9c2-6940-499e-ba24-75227b1a09d9", + "isActive": true, + "balance": "$1,606.76", + "picture": "http://placehold.it/32x32", + "age": 31, + "eyeColor": "green", + "name": { + "first": "Gonzales", + "last": "Manning" + }, + "company": "ORGANICA", + "email": "gonzales.manning@organica.name", + "phone": "+1 (993) 556-2745", + "address": "690 Regent Place, Rodman, Marshall Islands, 7114", + "about": "Magna ullamco voluptate nostrud et magna ea aute sint id quis proident ad excepteur ullamco. Aliquip nostrud qui quis duis occaecat commodo laborum labore aute. Mollit ullamco in qui eu voluptate dolore aute mollit sint do sit nulla aliqua. Occaecat laborum ex velit ea ex ad eiusmod enim fugiat.", + "registered": "Monday, December 1, 2014 6:32 AM", + "latitude": "48.780262", + "longitude": "-75.333042", + "tags": [ + "non", + "laborum", + "et", + "Lorem", + "id" + ], + "range": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "friends": [ + { + "id": 0, + "name": "Hester Finch" + }, + { + "id": 1, + "name": "Tia Cooley" + }, + { + "id": 2, + "name": "Cathryn Howe" + } + ], + "greeting": "Hello, Gonzales! You have 5 unread messages.", + "favoriteFruit": "apple" + } +])json" diff --git a/toolsrc/src/vcpkg/base/json.cpp b/toolsrc/src/vcpkg/base/json.cpp new file mode 100644 index 000000000..d8d0faab0 --- /dev/null +++ b/toolsrc/src/vcpkg/base/json.cpp @@ -0,0 +1,988 @@ +#include "pch.h" + +#include <vcpkg/base/files.h> +#include <vcpkg/base/json.h> +#include <vcpkg/base/system.debug.h> +#include <vcpkg/base/unicode.h> + +namespace vcpkg::Json +{ + using VK = ValueKind; + + // struct Value { + namespace impl + { + // TODO: add a value_kind value template once we get rid of VS2015 support + template<ValueKind Vk> + using ValueKindConstant = std::integral_constant<ValueKind, Vk>; + + struct ValueImpl + { + VK tag; + union + { + std::nullptr_t null; + bool boolean; + int64_t number; + std::string string; + Array array; + Object object; + }; + + ValueImpl(ValueKindConstant<VK::Null> vk, std::nullptr_t) : tag(vk), null() { } + ValueImpl(ValueKindConstant<VK::Boolean> vk, bool b) : tag(vk), boolean(b) { } + ValueImpl(ValueKindConstant<VK::Number> vk, int64_t i) : tag(vk), number(i) { } + ValueImpl(ValueKindConstant<VK::String> vk, std::string&& s) : tag(vk), string(std::move(s)) { } + ValueImpl(ValueKindConstant<VK::Array> vk, Array&& arr) : tag(vk), array(std::move(arr)) { } + ValueImpl(ValueKindConstant<VK::Object> vk, Object&& obj) : tag(vk), object(std::move(obj)) { } + + ValueImpl& operator=(ValueImpl&& other) noexcept + { + switch (other.tag) + { + case VK::Null: return internal_assign(VK::Null, &ValueImpl::null, other); + case VK::Boolean: return internal_assign(VK::Boolean, &ValueImpl::boolean, other); + case VK::Number: return internal_assign(VK::Number, &ValueImpl::number, other); + case VK::String: return internal_assign(VK::String, &ValueImpl::string, other); + case VK::Array: return internal_assign(VK::Array, &ValueImpl::array, other); + case VK::Object: return internal_assign(VK::Object, &ValueImpl::object, other); + } + } + + ~ValueImpl() { destroy_underlying(); } + + private: + template<class T> + ValueImpl& internal_assign(ValueKind vk, T ValueImpl::*mp, ValueImpl& other) noexcept + { + if (tag == vk) + { + this->*mp = std::move(other.*mp); + } + else + { + destroy_underlying(); + new (&(this->*mp)) T(std::move(other.*mp)); + tag = vk; + } + + return *this; + } + + void destroy_underlying() noexcept + { + switch (tag) + { + case VK::String: string.~basic_string(); break; + case VK::Array: array.~Array(); break; + case VK::Object: object.~Object(); break; + default: break; + } + new (&null) std::nullptr_t(); + tag = VK::Null; + } + }; + } + + using impl::ValueImpl; + using impl::ValueKindConstant; + + VK Value::kind() const noexcept + { + if (underlying_) + { + return underlying_->tag; + } + else + { + return VK::Null; + } + } + + bool Value::is_null() const noexcept { return kind() == VK::Null; } + bool Value::is_boolean() const noexcept { return kind() == VK::Boolean; } + bool Value::is_number() const noexcept { return kind() == VK::Number; } + bool Value::is_string() const noexcept { return kind() == VK::String; } + bool Value::is_array() const noexcept { return kind() == VK::Array; } + bool Value::is_object() const noexcept { return kind() == VK::Object; } + + bool Value::boolean() const noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, is_boolean()); + return underlying_->boolean; + } + int64_t Value::number() const noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, is_number()); + return underlying_->number; + } + StringView Value::string() const noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, is_string()); + return underlying_->string; + } + + const Array& Value::array() const noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, is_array()); + return underlying_->array; + } + Array& Value::array() noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, is_array()); + return underlying_->array; + } + + const Object& Value::object() const noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, is_object()); + return underlying_->object; + } + Object& Value::object() noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, is_object()); + return underlying_->object; + } + + Value::Value() noexcept = default; + Value::Value(Value&&) noexcept = default; + Value& Value::operator=(Value&&) noexcept = default; + Value::~Value() = default; + + Value Value::clone() const noexcept + { + switch (kind()) + { + case ValueKind::Null: return Value::null(nullptr); + case ValueKind::Boolean: return Value::boolean(boolean()); + case ValueKind::Number: return Value::number(number()); + case ValueKind::String: return Value::string(string()); + case ValueKind::Array: return Value::array(array().clone()); + case ValueKind::Object: return Value::object(object().clone()); + default: Checks::exit_fail(VCPKG_LINE_INFO); + } + } + + Value Value::null(std::nullptr_t) noexcept { return Value(); } + Value Value::boolean(bool b) noexcept + { + Value val; + val.underlying_ = std::make_unique<ValueImpl>(ValueKindConstant<VK::Boolean>(), b); + return val; + } + Value Value::number(int64_t i) noexcept + { + Value val; + val.underlying_ = std::make_unique<ValueImpl>(ValueKindConstant<VK::Number>(), i); + return val; + } + Value Value::string(StringView sv) noexcept + { + if (!Unicode::utf8_is_valid_string(sv.begin(), sv.end())) + { + Debug::print("Invalid string: ", sv, '\n'); + vcpkg::Checks::exit_with_message(VCPKG_LINE_INFO, "Invalid utf8 passed to Value::string(StringView)"); + } + Value val; + val.underlying_ = std::make_unique<ValueImpl>(ValueKindConstant<VK::String>(), sv.to_string()); + return val; + } + Value Value::array(Array&& arr) noexcept + { + Value val; + val.underlying_ = std::make_unique<ValueImpl>(ValueKindConstant<VK::Array>(), std::move(arr)); + return val; + } + Value Value::object(Object&& obj) noexcept + { + Value val; + val.underlying_ = std::make_unique<ValueImpl>(ValueKindConstant<VK::Object>(), std::move(obj)); + return val; + } + // } struct Value + // struct Array { + Array Array::clone() const noexcept + { + Array arr; + arr.underlying_.reserve(size()); + for (const auto& el : *this) + { + arr.underlying_.push_back(el.clone()); + } + return arr; + } + // } struct Array + // struct Object { + void Object::insert(std::string key, Value value) noexcept + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, !contains(key)); + underlying_.push_back(std::make_pair(std::move(key), std::move(value))); + } + void Object::insert_or_replace(std::string key, Value value) noexcept + { + auto v = get(key); + if (v) + { + *v = std::move(value); + } + else + { + underlying_.push_back(std::make_pair(std::move(key), std::move(value))); + } + } + + auto Object::internal_find_key(StringView key) const noexcept -> underlying_t::const_iterator + { + return std::find_if( + underlying_.begin(), underlying_.end(), [key](const auto& pair) { return pair.first == key; }); + } + + // returns whether the key existed + bool Object::remove(StringView key) noexcept + { + auto it = internal_find_key(key); + if (it == underlying_.end()) + { + return false; + } + else + { + underlying_.erase(it); + return true; + } + } + + Value* Object::get(StringView key) noexcept + { + auto it = internal_find_key(key); + if (it == underlying_.end()) + { + return nullptr; + } + else + { + return &underlying_[it - underlying_.begin()].second; + } + } + const Value* Object::get(StringView key) const noexcept + { + auto it = internal_find_key(key); + if (it == underlying_.end()) + { + return nullptr; + } + else + { + return &it->second; + } + } + + Object Object::clone() const noexcept + { + Object obj; + obj.underlying_.reserve(size()); + for (const auto& el : *this) + { + obj.insert(el.first.to_string(), el.second.clone()); + } + return obj; + } + // } struct Object + + // auto parse() { + namespace + { + struct Parser : private Parse::ParserBase + { + Parser(StringView text, StringView origin) : Parse::ParserBase(text, origin), style_() { } + + char32_t next() noexcept + { + auto ch = cur(); + if (ch == '\r') style_.newline_kind = JsonStyle::Newline::CrLf; + if (ch == '\t') style_.set_tabs(); + return Parse::ParserBase::next(); + } + + static constexpr bool is_digit(char32_t code_point) noexcept + { + return code_point >= '0' && code_point <= '9'; + } + static constexpr bool is_hex_digit(char32_t code_point) noexcept + { + return is_digit(code_point) || (code_point >= 'a' && code_point <= 'f') || + (code_point >= 'A' && code_point <= 'F'); + } + static bool is_number_start(char32_t code_point) noexcept + { + return code_point == '-' || is_digit(code_point); + } + static bool is_keyword_start(char32_t code_point) noexcept + { + return code_point == 'f' || code_point == 'n' || code_point == 't'; + } + + static unsigned char from_hex_digit(char32_t code_point) noexcept + { + if (is_digit(code_point)) + { + return static_cast<unsigned char>(code_point) - '0'; + } + else if (code_point >= 'a' && code_point <= 'f') + { + return static_cast<unsigned char>(code_point) - 'a' + 10; + } + else if (code_point >= 'A' && code_point <= 'F') + { + return static_cast<unsigned char>(code_point) - 'A' + 10; + } + else + { + vcpkg::Checks::exit_fail(VCPKG_LINE_INFO); + } + } + + // parses a _single_ code point of a string -- either a literal code point, or an escape sequence + // returns end_of_file if it reaches an unescaped '"' + // _does not_ pair escaped surrogates -- returns the literal surrogate. + char32_t parse_string_code_point() noexcept + { + char32_t current = cur(); + if (current == '"') + { + next(); + return Unicode::end_of_file; + } + else if (current <= 0x001F) + { + add_error("Control character in string"); + next(); + return Unicode::end_of_file; + } + else if (current != '\\') + { + next(); + return current; + } + + // cur == '\\' + if (at_eof()) + { + add_error("Unexpected EOF after escape character"); + return Unicode::end_of_file; + } + current = next(); + + switch (current) + { + case '"': return '"'; + case '\\': return '\\'; + case '/': return '/'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'u': + { + char16_t code_unit = 0; + for (int i = 0; i < 4; ++i) + { + current = next(); + + if (current == Unicode::end_of_file) + { + add_error("Unexpected end of file in middle of unicode escape"); + return Unicode::end_of_file; + } + if (is_hex_digit(current)) + { + code_unit *= 16; + code_unit += from_hex_digit(current); + } + else + { + add_error("Invalid hex digit in unicode escape"); + return Unicode::end_of_file; + } + } + next(); + + return code_unit; + } + default: add_error("Unexpected escape sequence continuation"); return Unicode::end_of_file; + } + } + + std::string parse_string() noexcept + { + Checks::check_exit(VCPKG_LINE_INFO, cur() == '"'); + next(); + + std::string res; + char32_t previous_leading_surrogate = Unicode::end_of_file; + while (!at_eof()) + { + auto code_point = parse_string_code_point(); + + if (previous_leading_surrogate != Unicode::end_of_file) + { + if (Unicode::utf16_is_trailing_surrogate_code_point(code_point)) + { + const auto full_code_point = + Unicode::utf16_surrogates_to_code_point(previous_leading_surrogate, code_point); + Unicode::utf8_append_code_point(res, full_code_point); + previous_leading_surrogate = Unicode::end_of_file; + continue; + } + else + { + Unicode::utf8_append_code_point(res, previous_leading_surrogate); + } + } + previous_leading_surrogate = Unicode::end_of_file; + + if (Unicode::utf16_is_leading_surrogate_code_point(code_point)) + { + previous_leading_surrogate = code_point; + } + else if (code_point == Unicode::end_of_file) + { + return res; + } + else + { + Unicode::utf8_append_code_point(res, code_point); + } + } + + add_error("Unexpected EOF in middle of string"); + return res; + } + + Value parse_number() noexcept + { + Checks::check_exit(VCPKG_LINE_INFO, is_number_start(cur())); + bool negative = false; + + char32_t current = cur(); + if (cur() == '-') + { + negative = true; + current = next(); + if (current == Unicode::end_of_file) + { + add_error("Unexpected EOF after minus sign"); + return Value(); + } + } + + if (current == '0') + { + current = next(); + if (current != Unicode::end_of_file) + { + if (is_digit(current)) + { + add_error("Unexpected digits after a leading zero"); + } + if (current == '.') + { + add_error("Found a `.` -- this JSON implementation does not support floating point"); + } + } + return Value::number(0); + } + + // parse as negative so that someone can write INT64_MIN; otherwise, they'd only be able to get + // -INT64_MAX = INT64_MIN + 1 + constexpr auto min_value = std::numeric_limits<int64_t>::min(); + int64_t result = 0; + while (current != Unicode::end_of_file && is_digit(current)) + { + const int digit = current - '0'; + // result * 10 - digit < min_value : remember that result < 0 + if (result < (min_value + digit) / 10) + { + add_error("Number is too big for an int64_t"); + return Value(); + } + result *= 10; + result -= digit; + current = next(); + } + if (current == '.') + { + add_error("Found a `.` -- this JSON implementation doesn't support floating point"); + return Value(); + } + + if (!negative) + { + if (result == min_value) + { + add_error("Number is too big for a uint64_t"); + return Value(); + } + result = -result; + } + + return Value::number(result); + } + + Value parse_keyword() noexcept + { + char32_t current = cur(); + const char32_t* rest; + Value val; + switch (current) + { + case 't': // parse true + rest = U"rue"; + val = Value::boolean(true); + break; + case 'f': // parse false + rest = U"alse"; + val = Value::boolean(false); + break; + case 'n': // parse null + rest = U"ull"; + val = Value::null(nullptr); + break; + default: vcpkg::Checks::exit_fail(VCPKG_LINE_INFO); + } + + for (const char32_t* rest_it = rest; *rest_it != '\0'; ++rest_it) + { + current = next(); + + if (current == Unicode::end_of_file) + { + add_error("Unexpected EOF in middle of keyword"); + return Value(); + } + if (current != *rest_it) + { + add_error("Unexpected character in middle of keyword"); + } + } + next(); + + return val; + } + + Value parse_array() noexcept + { + Checks::check_exit(VCPKG_LINE_INFO, cur() == '['); + next(); + + Array arr; + bool first = true; + for (;;) + { + skip_whitespace(); + + char32_t current = cur(); + if (current == Unicode::end_of_file) + { + add_error("Unexpected EOF in middle of array"); + return Value(); + } + if (current == ']') + { + next(); + return Value::array(std::move(arr)); + } + + if (first) + { + first = false; + } + else if (current == ',') + { + next(); + skip_whitespace(); + current = cur(); + if (current == Unicode::end_of_file) + { + add_error("Unexpected EOF in middle of array"); + return Value(); + } + if (current == ']') + { + add_error("Trailing comma in array"); + return Value::array(std::move(arr)); + } + } + else + { + add_error("Unexpected character in middle of array"); + return Value(); + } + + arr.push_back(parse_value()); + } + } + + std::pair<std::string, Value> parse_kv_pair() noexcept + { + skip_whitespace(); + + auto current = cur(); + + auto res = std::make_pair(std::string(""), Value()); + + if (current == Unicode::end_of_file) + { + add_error("Unexpected EOF; expected property name"); + return res; + } + if (current != '"') + { + add_error("Unexpected character; expected property name"); + return res; + } + res.first = parse_string(); + + skip_whitespace(); + current = cur(); + if (current == ':') + { + next(); + } + else if (current == Unicode::end_of_file) + { + add_error("Unexpected EOF; expected colon"); + return res; + } + else + { + add_error("Unexpected character; expected colon"); + return res; + } + + res.second = parse_value(); + + return res; + } + + Value parse_object() noexcept + { + char32_t current = cur(); + + Checks::check_exit(VCPKG_LINE_INFO, current == '{'); + next(); + + Object obj; + bool first = true; + for (;;) + { + skip_whitespace(); + current = cur(); + if (current == Unicode::end_of_file) + { + add_error("Unexpected EOF; expected property or close brace"); + return Value(); + } + else if (current == '}') + { + next(); + return Value::object(std::move(obj)); + } + + if (first) + { + first = false; + } + else if (current == ',') + { + next(); + skip_whitespace(); + current = cur(); + if (current == Unicode::end_of_file) + { + add_error("Unexpected EOF; expected property"); + return Value(); + } + else if (current == '}') + { + add_error("Trailing comma in an object"); + return Value(); + } + } + else + { + add_error("Unexpected character; expected comma or close brace"); + } + + auto val = parse_kv_pair(); + obj.insert(std::move(val.first), std::move(val.second)); + } + } + + Value parse_value() noexcept + { + skip_whitespace(); + char32_t current = cur(); + if (current == Unicode::end_of_file) + { + add_error("Unexpected EOF; expected value"); + return Value(); + } + + switch (current) + { + case '{': return parse_object(); + case '[': return parse_array(); + case '"': return Value::string(parse_string()); + case 'n': + case 't': + case 'f': return parse_keyword(); + default: + if (is_number_start(current)) + { + return parse_number(); + } + else + { + add_error("Unexpected character; expected value"); + return Value(); + } + } + } + + static ExpectedT<std::pair<Value, JsonStyle>, std::unique_ptr<Parse::IParseError>> parse( + StringView json, StringView origin) noexcept + { + auto parser = Parser(json, origin); + + auto val = parser.parse_value(); + + parser.skip_whitespace(); + if (!parser.at_eof()) + { + parser.add_error("Unexpected character; expected EOF"); + return std::move(parser).extract_error(); + } + else if (parser.get_error()) + { + return std::move(parser).extract_error(); + } + else + { + return std::make_pair(std::move(val), parser.style()); + } + } + + JsonStyle style() const noexcept { return style_; } + + private: + JsonStyle style_; + }; + } + + ExpectedT<std::pair<Value, JsonStyle>, std::unique_ptr<Parse::IParseError>> parse_file(const Files::Filesystem& fs, + const fs::path& path, + std::error_code& ec) noexcept + { + auto res = fs.read_contents(path); + if (auto buf = res.get()) + { + return parse(*buf, path); + } + else + { + ec = res.error(); + return std::unique_ptr<Parse::IParseError>(); + } + } + + ExpectedT<std::pair<Value, JsonStyle>, std::unique_ptr<Parse::IParseError>> parse(StringView json, + const fs::path& filepath) noexcept + { + return Parser::parse(json, filepath.generic_u8string()); + } + // } auto parse() + + // auto stringify() { + static std::string& append_unicode_escape(std::string& s, char16_t code_unit) + { + s.append("\\u"); + + // AFAIK, there's no standard way of doing this? + constexpr const char hex_digit[16] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + + s.push_back(hex_digit[(code_unit >> 12) & 0x0F]); + s.push_back(hex_digit[(code_unit >> 8) & 0x0F]); + s.push_back(hex_digit[(code_unit >> 4) & 0x0F]); + s.push_back(hex_digit[(code_unit >> 0) & 0x0F]); + + return s; + } + + // taken from the ECMAScript 2020 standard, 24.5.2.2: Runtime Semantics: QuoteJSONString + static std::string& append_quoted_json_string(std::string& product, StringView sv) + { + // Table 66: JSON Single Character Escape Sequences + constexpr static std::array<std::pair<char32_t, const char*>, 7> escape_sequences = { + std::make_pair(0x0008, R"(\b)"), // BACKSPACE + std::make_pair(0x0009, R"(\t)"), // CHARACTER TABULATION + std::make_pair(0x000A, R"(\n)"), // LINE FEED (LF) + std::make_pair(0x000C, R"(\f)"), // FORM FEED (FF) + std::make_pair(0x000D, R"(\r)"), // CARRIAGE RETURN (CR) + std::make_pair(0x0022, R"(\")"), // QUOTATION MARK + std::make_pair(0x005C, R"(\\)") // REVERSE SOLIDUS + }; + // 1. Let product be the String value consisting solely of the code unit 0x0022 (QUOTATION MARK). + product.push_back('"'); + + // 2. For each code point C in ! UTF16DecodeString(value), do + // (note that we use utf8 instead of utf16) + for (auto code_point : Unicode::Utf8Decoder(sv.begin(), sv.end())) + { + bool matched = false; // early exit boolean + // a. If C is listed in the "Code Point" column of Table 66, then + for (auto pr : escape_sequences) + { + // i. Set product to the string-concatenation of product and the escape sequence for C as specified in + // the "Escape Sequence" column of the corresponding row. + if (code_point == pr.first) + { + product.append(pr.second); + matched = true; + break; + } + } + if (matched) break; + + // b. Else if C has a numeric value less than 0x0020 (SPACE), or if C has the same numeric value as a + // leading surrogate or trailing surrogate, then + if (code_point < 0x0020 || Unicode::utf16_is_surrogate_code_point(code_point)) + { + // i. Let unit be the code unit whose numeric value is that of C. + // ii. Set product to the string-concatenation of product and UnicodeEscape(unit). + append_unicode_escape(product, static_cast<char16_t>(code_point)); + break; + } + + // c. Else, + // i. Set product to the string-concatenation of product and the UTF16Encoding of C. + // (again, we use utf-8 here instead) + Unicode::utf8_append_code_point(product, code_point); + } + + // 3. Set product to the string-concatenation of product and the code unit 0x0022 (QUOTATION MARK). + product.push_back('"'); + + // 4. Return product. + return product; + } + + static std::string quote_json_string(StringView sv) + { + std::string product; + append_quoted_json_string(product, sv); + return product; + } + + static void internal_stringify(const Value& value, JsonStyle style, std::string& buffer, int current_indent) + { + const auto append_indent = [&](int indent) { + if (style.use_tabs()) + { + buffer.append(indent, '\t'); + } + else + { + buffer.append(indent * style.spaces(), ' '); + } + }; + switch (value.kind()) + { + case VK::Null: buffer.append("null"); break; + case VK::Boolean: + { + auto v = value.boolean(); + buffer.append(v ? "true" : "false"); + break; + } + case VK::Number: buffer.append(std::to_string(value.number())); break; + case VK::String: + { + append_quoted_json_string(buffer, value.string()); + break; + } + case VK::Array: + { + const auto& arr = value.array(); + buffer.push_back('['); + if (arr.size() == 0) + { + buffer.push_back(']'); + } + else + { + bool first = true; + + for (const auto& el : arr) + { + if (!first) + { + buffer.push_back(','); + } + first = false; + + buffer.append(style.newline()); + append_indent(current_indent + 1); + + internal_stringify(el, style, buffer, current_indent + 1); + } + buffer.append(style.newline()); + append_indent(current_indent); + buffer.push_back(']'); + } + break; + } + case VK::Object: + { + const auto& obj = value.object(); + buffer.push_back('{'); + if (obj.size() != 0) + { + bool first = true; + + for (const auto& el : obj) + { + if (!first) + { + buffer.push_back(','); + } + first = false; + + buffer.append(style.newline()); + append_indent(current_indent + 1); + + auto key = quote_json_string(el.first); + buffer.append(key.begin(), key.end()); + buffer.append(": "); + internal_stringify(el.second, style, buffer, current_indent + 1); + } + buffer.append(style.newline()); + append_indent(current_indent); + } + buffer.push_back('}'); + break; + } + } + } + + std::string stringify(const Value& value, JsonStyle style) noexcept + { + std::string res; + internal_stringify(value, style, res, 0); + return res; + } + // } auto stringify() + +} diff --git a/toolsrc/src/vcpkg/parse.cpp b/toolsrc/src/vcpkg/base/parse.cpp index 9e33c95f9..0d2c5f8fc 100644 --- a/toolsrc/src/vcpkg/parse.cpp +++ b/toolsrc/src/vcpkg/base/parse.cpp @@ -1,17 +1,18 @@ #include "pch.h" +#include <vcpkg/base/parse.h> + #include <utility> #include <vcpkg/base/system.print.h> #include <vcpkg/base/util.h> #include <vcpkg/packagespec.h> #include <vcpkg/paragraphparser.h> -#include <vcpkg/parse.h> using namespace vcpkg; namespace vcpkg::Parse { - static void advance_rowcol(char ch, int& row, int& column) + static void advance_rowcol(char32_t ch, int& row, int& column) { if (ch == '\t') column = (column + 7) / 8 * 8 + 1; // round to next 8-width tab stop @@ -28,10 +29,14 @@ namespace vcpkg::Parse std::string ParseError::format() const { - int ignore_row = 1; - int spacing = 20; - for (int i = 0; i < caret_col; ++i) - advance_rowcol(line[i], ignore_row, spacing); + auto caret_spacing = std::string(18, ' '); + auto decoder = Unicode::Utf8Decoder(line.data(), line.data() + line.size()); + for (int i = 0; i < caret_col; ++i, ++decoder) + { + const char32_t cp = *decoder; + // this may eventually want to check for full-width characters and grapheme clusters as well + caret_spacing.push_back(cp == '\t' ? '\t' : ' '); + } return Strings::concat("Error: ", origin, @@ -42,49 +47,59 @@ namespace vcpkg::Parse ": ", message, "\n" - " on expression: \"", // 9 columns + " on expression: ", // 18 columns line, - "\"\n", - std::string(spacing - 1, ' '), + "\n", + caret_spacing, "^\n"); } - char ParserBase::next() + ParserBase::ParserBase(StringView text, StringView origin, TextRowCol init_rowcol) + : m_it(text.begin(), text.end()) + , m_start_of_line(m_it) + , m_row(init_rowcol.row_or(1)) + , m_column(init_rowcol.column_or(1)) + , m_text(text) + , m_origin(origin) { - char ch = *m_it; + } + + char32_t ParserBase::next() + { + if (m_it == m_it.end()) + { + return Unicode::end_of_file; + } // See https://www.gnu.org/prep/standards/standards.html#Errors - if (ch == '\0') + advance_rowcol(*m_it, m_row, m_column); + + ++m_it; + if (m_it != m_it.end() && Unicode::utf16_is_surrogate_code_point(*m_it)) { - return '\0'; + m_it = m_it.end(); } - else - advance_rowcol(ch, row, column); - return *++m_it; + + return cur(); } - void ParserBase::add_error(std::string message, const ParserBase::SourceLoc& loc) + void ParserBase::add_error(std::string message, const SourceLoc& loc) { // avoid cascading errors by only saving the first if (!m_err) { - // find beginning of line - auto linestart = loc.it; - while (linestart != m_text.c_str()) + // find end of line + auto line_end = loc.it; + while (line_end != line_end.end() && *line_end != '\n' && *line_end != '\r') { - if (linestart[-1] == '\n') break; - --linestart; + ++line_end; } - - // find end of line - auto lineend = loc.it; - while (*lineend != '\n' && *lineend != '\r' && *lineend != '\0') - ++lineend; - m_err.reset(new ParseError(m_origin.c_str(), - loc.row, - loc.column, - static_cast<int>(loc.it - linestart), - {linestart, lineend}, - std::move(message))); + m_err = std::make_unique<ParseError>( + m_origin.to_string(), + loc.row, + loc.column, + static_cast<int>(std::distance(loc.start_of_line, loc.it)), + std::string(loc.start_of_line.pointer_to_current(), line_end.pointer_to_current()), + std::move(message)); } // Avoid error loops by skipping to the end @@ -167,21 +182,19 @@ namespace vcpkg::Parse } ExpectedS<std::vector<std::string>> parse_default_features_list(const std::string& str, - CStringView origin, + StringView origin, TextRowCol textrowcol) { - Parse::ParserBase parser; - parser.init(str, origin, textrowcol); + auto parser = Parse::ParserBase(str, origin, textrowcol); auto opt = parse_list_until_eof<std::string>("default features", parser, &parse_feature_name); if (!opt) return {parser.get_error()->format(), expected_right_tag}; return {std::move(opt).value_or_exit(VCPKG_LINE_INFO), expected_left_tag}; } ExpectedS<std::vector<ParsedQualifiedSpecifier>> parse_qualified_specifier_list(const std::string& str, - CStringView origin, + StringView origin, TextRowCol textrowcol) { - Parse::ParserBase parser; - parser.init(str, origin, textrowcol); + auto parser = Parse::ParserBase(str, origin, textrowcol); auto opt = parse_list_until_eof<ParsedQualifiedSpecifier>( "dependencies", parser, [](ParserBase& parser) { return parse_qualified_specifier(parser); }); if (!opt) return {parser.get_error()->format(), expected_right_tag}; @@ -189,11 +202,10 @@ namespace vcpkg::Parse return {std::move(opt).value_or_exit(VCPKG_LINE_INFO), expected_left_tag}; } ExpectedS<std::vector<Dependency>> parse_dependencies_list(const std::string& str, - CStringView origin, + StringView origin, TextRowCol textrowcol) { - Parse::ParserBase parser; - parser.init(str, origin, textrowcol); + auto parser = Parse::ParserBase(str, origin, textrowcol); auto opt = parse_list_until_eof<Dependency>("dependencies", parser, [](ParserBase& parser) { auto loc = parser.cur_loc(); return parse_qualified_specifier(parser).then([&](ParsedQualifiedSpecifier&& pqs) -> Optional<Dependency> { diff --git a/toolsrc/src/vcpkg/base/unicode.cpp b/toolsrc/src/vcpkg/base/unicode.cpp new file mode 100644 index 000000000..41f996cdd --- /dev/null +++ b/toolsrc/src/vcpkg/base/unicode.cpp @@ -0,0 +1,285 @@ +#include "pch.h" + +#include <vcpkg/base/unicode.h> + +#include <vcpkg/base/checks.h> + +namespace vcpkg::Unicode +{ + Utf8CodeUnitKind utf8_code_unit_kind(unsigned char code_unit) noexcept + { + if (code_unit < 0b1000'0000) + { + return Utf8CodeUnitKind::StartOne; + } + else if (code_unit < 0b1100'0000) + { + return Utf8CodeUnitKind::Continue; + } + else if (code_unit < 0b1110'0000) + { + return Utf8CodeUnitKind::StartTwo; + } + else if (code_unit < 0b1111'0000) + { + return Utf8CodeUnitKind::StartThree; + } + else if (code_unit < 0b1111'1000) + { + return Utf8CodeUnitKind::StartFour; + } + else + { + return Utf8CodeUnitKind::Invalid; + } + } + + int utf8_code_unit_count(Utf8CodeUnitKind kind) noexcept { return static_cast<int>(kind); } + int utf8_code_unit_count(char code_unit) noexcept { return utf8_code_unit_count(utf8_code_unit_kind(code_unit)); } + + static int utf8_encode_code_unit_count(char32_t code_point) noexcept + { + if (code_point < 0x80) + { + return 1; + } + else if (code_point < 0x800) + { + return 2; + } + else if (code_point < 0x10000) + { + return 3; + } + else if (code_point < 0x110000) + { + return 4; + } + else + { + vcpkg::Checks::exit_with_message( + VCPKG_LINE_INFO, "Invalid code point passed to utf8_encoded_code_point_count (%x)", code_point); + } + } + + int utf8_encode_code_point(char (&array)[4], char32_t code_point) noexcept + { + // count \in {2, 3, 4} + const auto start_code_point = [](char32_t code_point, int count) { + const unsigned char and_mask = 0xFF >> (count + 1); + const unsigned char or_mask = (0xFF << (8 - count)) & 0xFF; + const int shift = 6 * (count - 1); + return static_cast<char>(or_mask | ((code_point >> shift) & and_mask)); + }; + // count \in {2, 3, 4}, byte \in {1, 2, 3} + const auto continue_code_point = [](char32_t code_point, int count, int byte) { + constexpr unsigned char and_mask = 0xFF >> 2; + constexpr unsigned char or_mask = (0xFF << 7) & 0xFF; + const int shift = 6 * (count - byte - 1); + return static_cast<char>(or_mask | ((code_point >> shift) & and_mask)); + }; + + int count = utf8_encode_code_unit_count(code_point); + if (count == 1) + { + array[0] = static_cast<char>(code_point); + return 1; + } + + array[0] = start_code_point(code_point, count); + for (int i = 1; i < count; ++i) + { + array[i] = continue_code_point(code_point, count, i); + } + + return count; + } + + bool utf8_is_valid_string(const char* first, const char* last) noexcept + { + std::error_code ec; + for (auto dec = Utf8Decoder(first, last); dec != dec.end(); dec.next(ec)) + { + } + return !ec; + } + + char32_t utf16_surrogates_to_code_point(char32_t leading, char32_t trailing) + { + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, utf16_is_leading_surrogate_code_point(leading)); + vcpkg::Checks::check_exit(VCPKG_LINE_INFO, utf16_is_trailing_surrogate_code_point(trailing)); + + char32_t res = (leading & 0b11'1111'1111) << 10; + res |= trailing & 0b11'1111'1111; + res += 0x0001'0000; + + return res; + } + + const char* utf8_category::name() const noexcept { return "utf8"; } + std::string utf8_category::message(int condition) const + { + switch (static_cast<utf8_errc>(condition)) + { + case utf8_errc::NoError: return "no error"; + case utf8_errc::InvalidCodeUnit: return "invalid code unit"; + case utf8_errc::InvalidCodePoint: return "invalid code point (>0x10FFFF)"; + case utf8_errc::PairedSurrogates: + return "trailing surrogate following leading surrogate (paired surrogates are invalid)"; + case utf8_errc::UnexpectedContinue: return "found continue code unit in start position"; + case utf8_errc::UnexpectedStart: return "found start code unit in continue position"; + case utf8_errc::UnexpectedEof: return "found end of string in middle of code point"; + default: return "error code out of range"; + } + } + + Utf8Decoder::Utf8Decoder() noexcept : current_(end_of_file), next_(nullptr), last_(nullptr) { } + Utf8Decoder::Utf8Decoder(const char* first, const char* last) noexcept : current_(0), next_(first), last_(last) + { + if (next_ != last_) + { + ++*this; + } + else + { + current_ = end_of_file; + } + } + + char const* Utf8Decoder::pointer_to_current() const noexcept + { + if (is_eof()) + { + return last_; + } + + auto count = utf8_encode_code_unit_count(current_); + return next_ - count; + } + + bool Utf8Decoder::is_eof() const noexcept { return current_ == end_of_file; } + char32_t Utf8Decoder::operator*() const noexcept + { + if (is_eof()) + { + Checks::exit_with_message(VCPKG_LINE_INFO, "Dereferenced Utf8Decoder on the end of a string"); + } + return current_; + } + + void Utf8Decoder::next(std::error_code& ec) + { + ec.clear(); + + if (is_eof()) + { + vcpkg::Checks::exit_with_message(VCPKG_LINE_INFO, "Incremented Utf8Decoder at the end of the string"); + } + + if (next_ == last_) + { + current_ = end_of_file; + return; + } + + auto set_error = [&ec, this](utf8_errc err) { + ec = err; + *this = sentinel(); + }; + + unsigned char code_unit = static_cast<unsigned char>(*next_++); + + auto kind = utf8_code_unit_kind(code_unit); + if (kind == Utf8CodeUnitKind::Invalid) + { + return set_error(utf8_errc::InvalidCodeUnit); + } + else if (kind == Utf8CodeUnitKind::Continue) + { + return set_error(utf8_errc::UnexpectedContinue); + } + + const int count = utf8_code_unit_count(kind); + if (count == 1) + { + current_ = static_cast<char32_t>(code_unit); + } + else + { + // 2 -> 0b0001'1111, 6 + // 3 -> 0b0000'1111, 12 + // 4 -> 0b0000'0111, 18 + const auto start_mask = static_cast<unsigned char>(0xFF >> (count + 1)); + const int start_shift = 6 * (count - 1); + auto code_point = static_cast<char32_t>(code_unit & start_mask) << start_shift; + + constexpr unsigned char continue_mask = 0b0011'1111; + for (int byte = 1; byte < count; ++byte) + { + if (next_ == last_) + { + return set_error(utf8_errc::UnexpectedContinue); + } + code_unit = static_cast<unsigned char>(*next_++); + + kind = utf8_code_unit_kind(code_unit); + if (kind == Utf8CodeUnitKind::Invalid) + { + return set_error(utf8_errc::InvalidCodeUnit); + } + else if (kind != Utf8CodeUnitKind::Continue) + { + return set_error(utf8_errc::UnexpectedStart); + } + + const int shift = 6 * (count - byte - 1); + code_point |= (code_unit & continue_mask) << shift; + } + + if (code_point > 0x10'FFFF) + { + return set_error(utf8_errc::InvalidCodePoint); + } + else if (utf16_is_trailing_surrogate_code_point(code_point) && + utf16_is_leading_surrogate_code_point(current_)) + { + return set_error(utf8_errc::PairedSurrogates); + } + else + { + current_ = code_point; + } + } + } + + Utf8Decoder& Utf8Decoder::operator++() noexcept + { + std::error_code ec; + next(ec); + if (ec) + { + vcpkg::Checks::exit_with_message(VCPKG_LINE_INFO, ec.message()); + } + + return *this; + } + + Utf8Decoder& Utf8Decoder::operator=(sentinel) noexcept + { + next_ = last_; + current_ = end_of_file; + return *this; + } + + bool operator==(const Utf8Decoder& lhs, const Utf8Decoder& rhs) noexcept + { + if (lhs.last_ != rhs.last_) + { + Checks::exit_with_message(VCPKG_LINE_INFO, + "Comparing Utf8Decoders with different provenance; this is always an error"); + } + + return lhs.next_ == rhs.next_; + } + +} diff --git a/toolsrc/src/vcpkg/export.prefab.cpp b/toolsrc/src/vcpkg/export.prefab.cpp index 9a3f240fa..30a3ffa5a 100644 --- a/toolsrc/src/vcpkg/export.prefab.cpp +++ b/toolsrc/src/vcpkg/export.prefab.cpp @@ -18,8 +18,6 @@ namespace vcpkg::Export::Prefab using Install::InstallDir; using System::CPUArchitecture; - - static std::vector<fs::path> find_modules(const VcpkgPaths& system, const fs::path& root, const std::string& ext) { std::vector<fs::path> paths; diff --git a/toolsrc/src/vcpkg/logicexpression.cpp b/toolsrc/src/vcpkg/logicexpression.cpp index e91c57310..c4365194d 100644 --- a/toolsrc/src/vcpkg/logicexpression.cpp +++ b/toolsrc/src/vcpkg/logicexpression.cpp @@ -1,9 +1,9 @@ #include "pch.h" +#include <vcpkg/base/parse.h> #include <vcpkg/base/strings.h> #include <vcpkg/base/system.print.h> #include <vcpkg/logicexpression.h> -#include <vcpkg/parse.h> #include <string> #include <vector> @@ -48,9 +48,9 @@ namespace vcpkg class ExpressionParser : public Parse::ParserBase { public: - ExpressionParser(const std::string& str, const ExpressionContext& context) : evaluation_context(context) + ExpressionParser(const std::string& str, const ExpressionContext& context) : + Parse::ParserBase(str, "CONTROL"), evaluation_context(context) { - this->init(str, "CONTROL"); { auto override_vars = evaluation_context.cmake_context.find("VCPKG_DEP_INFO_OVERRIDE_VARS"); if (override_vars != evaluation_context.cmake_context.end()) @@ -90,7 +90,7 @@ namespace vcpkg bool final_result; - static bool is_identifier_char(char ch) + static bool is_identifier_char(char32_t ch) { return is_upper_alpha(ch) || is_lower_alpha(ch) || is_ascii_digit(ch) || ch == '-'; } diff --git a/toolsrc/src/vcpkg/packagespec.cpp b/toolsrc/src/vcpkg/packagespec.cpp index 702f7aad3..0f3bfd880 100644 --- a/toolsrc/src/vcpkg/packagespec.cpp +++ b/toolsrc/src/vcpkg/packagespec.cpp @@ -1,10 +1,10 @@ #include "pch.h" #include <vcpkg/base/checks.h> +#include <vcpkg/base/parse.h> #include <vcpkg/base/util.h> #include <vcpkg/packagespec.h> #include <vcpkg/paragraphparser.h> -#include <vcpkg/parse.h> namespace vcpkg { @@ -102,21 +102,20 @@ namespace vcpkg }); } - static bool is_package_name_char(char ch) + static bool is_package_name_char(char32_t ch) { return Parse::ParserBase::is_lower_alpha(ch) || Parse::ParserBase::is_ascii_digit(ch) || ch == '-'; } - static bool is_feature_name_char(char ch) { + static bool is_feature_name_char(char32_t ch) { // TODO: we do not intend underscores to be valid, however there is currently a feature using them (libwebp[vwebp_sdl]). // TODO: we need to rename this feature, then remove underscores from this list. return is_package_name_char(ch) || ch == '_'; } - ExpectedS<ParsedQualifiedSpecifier> parse_qualified_specifier(CStringView input) + ExpectedS<ParsedQualifiedSpecifier> parse_qualified_specifier(StringView input) { - Parse::ParserBase parser; - parser.init(input, "<unknown>"); + auto parser = Parse::ParserBase(input, "<unknown>"); auto maybe_pqs = parse_qualified_specifier(parser); if (!parser.at_eof()) parser.add_error("expected eof"); if (auto e = parser.get_error()) return e->format(); @@ -236,7 +235,9 @@ namespace vcpkg parser.add_error("unmatched open braces in qualifier", loc); return nullopt; } - ret.qualifier = StringView(loc.it + 1, parser.it()).to_string(); + ret.qualifier = std::string( + (++loc.it).pointer_to_current(), + parser.it().pointer_to_current()); parser.next(); } // This makes the behavior of the parser more consistent -- otherwise, it will skip tabs and spaces only if diff --git a/toolsrc/src/vcpkg/paragraphs.cpp b/toolsrc/src/vcpkg/paragraphs.cpp index 82bc7b109..7032620a1 100644 --- a/toolsrc/src/vcpkg/paragraphs.cpp +++ b/toolsrc/src/vcpkg/paragraphs.cpp @@ -1,13 +1,13 @@ #include "pch.h" #include <vcpkg/base/files.h> +#include <vcpkg/base/parse.h> #include <vcpkg/base/system.debug.h> #include <vcpkg/base/system.print.h> #include <vcpkg/base/util.h> #include <vcpkg/binaryparagraph.h> #include <vcpkg/paragraphparseresult.h> #include <vcpkg/paragraphs.h> -#include <vcpkg/parse.h> using namespace vcpkg::Parse; using namespace vcpkg; @@ -67,12 +67,12 @@ namespace vcpkg::Paragraphs } public: - ExpectedS<std::vector<Paragraph>> get_paragraphs(CStringView text, CStringView origin) + PghParser(StringView text, StringView origin) : Parse::ParserBase(text, origin) {} + + ExpectedS<std::vector<Paragraph>> get_paragraphs() { std::vector<Paragraph> paragraphs; - init(text, origin); - skip_whitespace(); while (!at_eof()) { @@ -88,8 +88,7 @@ namespace vcpkg::Paragraphs static ExpectedS<Paragraph> parse_single_paragraph(const std::string& str, const std::string& origin) { - PghParser parser; - auto pghs = parser.get_paragraphs(str, origin); + auto pghs = PghParser(str, origin).get_paragraphs(); if (auto p = pghs.get()) { @@ -126,8 +125,7 @@ namespace vcpkg::Paragraphs ExpectedS<std::vector<Paragraph>> parse_paragraphs(const std::string& str, const std::string& origin) { - PghParser parser; - return parser.get_paragraphs(str, origin); + return PghParser(str, origin).get_paragraphs(); } ParseExpected<SourceControlFile> try_load_port(const Files::Filesystem& fs, const fs::path& path) |
