aboutsummaryrefslogtreecommitdiff
path: root/src/metadata.cpp
diff options
context:
space:
mode:
authorEven Rouault <even.rouault@spatialys.com>2018-12-04 14:46:41 +0100
committerEven Rouault <even.rouault@spatialys.com>2018-12-04 14:46:41 +0100
commitd06c1c55c1c3fc7209abdbdfbf2e3cf34f18cf98 (patch)
tree98fe336a41f1397280ee665d08040339967d711b /src/metadata.cpp
parentaddf30e4446fd39891fd5bdcb22413ed41e0913b (diff)
downloadPROJ-d06c1c55c1c3fc7209abdbdfbf2e3cf34f18cf98.tar.gz
PROJ-d06c1c55c1c3fc7209abdbdfbf2e3cf34f18cf98.zip
Improve recognition of WKT1 datum names
Diffstat (limited to 'src/metadata.cpp')
-rw-r--r--src/metadata.cpp60
1 files changed, 58 insertions, 2 deletions
diff --git a/src/metadata.cpp b/src/metadata.cpp
index 033782c9..af8dc1fe 100644
--- a/src/metadata.cpp
+++ b/src/metadata.cpp
@@ -1106,6 +1106,40 @@ static bool isIgnoredChar(char ch) {
// ---------------------------------------------------------------------------
//! @cond Doxygen_Suppress
+static const struct utf8_to_lower {
+ const char *utf8;
+ char ascii;
+} map_utf8_to_lower[] = {
+ {"\xc3\xa1", 'a'}, // a acute
+ {"\xc3\xa4", 'a'}, // a tremma
+
+ {"\xc4\x9b", 'e'}, // e reverse circumflex
+ {"\xc3\xa8", 'e'}, // e grave
+ {"\xc3\xa9", 'e'}, // e acute
+ {"\xc3\xab", 'e'}, // e tremma
+
+ {"\xc3\xad", 'i'}, // i grave
+
+ {"\xc3\xb4", 'o'}, // o circumflex
+ {"\xc3\xb6", 'o'}, // o tremma
+
+ {"\xc3\xa7", 'c'}, // c cedilla
+};
+
+static const struct utf8_to_lower *get_ascii_replacement(const char *c_str) {
+ for (const auto &pair : map_utf8_to_lower) {
+ if (*c_str == pair.utf8[0] &&
+ strncmp(c_str, pair.utf8, strlen(pair.utf8)) == 0) {
+ return &pair;
+ }
+ }
+ return nullptr;
+}
+//! @endcond
+
+// ---------------------------------------------------------------------------
+
+//! @cond Doxygen_Suppress
std::string Identifier::canonicalizeName(const std::string &str) {
std::string res;
const char *c_str = str.c_str();
@@ -1121,6 +1155,14 @@ std::string Identifier::canonicalizeName(const std::string &str) {
++i;
continue;
}
+ if (static_cast<unsigned char>(ch) > 127) {
+ const auto *replacement = get_ascii_replacement(c_str + i);
+ if (replacement) {
+ res.push_back(replacement->ascii);
+ i += strlen(replacement->utf8) - 1;
+ continue;
+ }
+ }
if (!isIgnoredChar(ch)) {
res.push_back(static_cast<char>(::tolower(ch)));
}
@@ -1142,8 +1184,8 @@ bool Identifier::isEquivalentName(const char *a, const char *b) noexcept {
char lastValidA = 0;
char lastValidB = 0;
while (a[i] != 0 && b[j] != 0) {
- const char aCh = a[i];
- const char bCh = b[j];
+ char aCh = a[i];
+ char bCh = b[j];
if (aCh == ' ' && a[i + 1] == '+' && a[i + 2] == ' ') {
i += 3;
continue;
@@ -1172,6 +1214,20 @@ bool Identifier::isEquivalentName(const char *a, const char *b) noexcept {
lastValidB = '9';
continue;
}
+ if (static_cast<unsigned char>(aCh) > 127) {
+ const auto *replacement = get_ascii_replacement(a + i);
+ if (replacement) {
+ aCh = replacement->ascii;
+ i += strlen(replacement->utf8) - 1;
+ }
+ }
+ if (static_cast<unsigned char>(bCh) > 127) {
+ const auto *replacement = get_ascii_replacement(b + j);
+ if (replacement) {
+ bCh = replacement->ascii;
+ j += strlen(replacement->utf8) - 1;
+ }
+ }
if (::tolower(aCh) != ::tolower(bCh)) {
return false;
}