diff options
| author | Even Rouault <even.rouault@spatialys.com> | 2018-12-04 14:46:41 +0100 |
|---|---|---|
| committer | Even Rouault <even.rouault@spatialys.com> | 2018-12-04 14:46:41 +0100 |
| commit | d06c1c55c1c3fc7209abdbdfbf2e3cf34f18cf98 (patch) | |
| tree | 98fe336a41f1397280ee665d08040339967d711b /src/metadata.cpp | |
| parent | addf30e4446fd39891fd5bdcb22413ed41e0913b (diff) | |
| download | PROJ-d06c1c55c1c3fc7209abdbdfbf2e3cf34f18cf98.tar.gz PROJ-d06c1c55c1c3fc7209abdbdfbf2e3cf34f18cf98.zip | |
Improve recognition of WKT1 datum names
Diffstat (limited to 'src/metadata.cpp')
| -rw-r--r-- | src/metadata.cpp | 60 |
1 files changed, 58 insertions, 2 deletions
diff --git a/src/metadata.cpp b/src/metadata.cpp index 033782c9..af8dc1fe 100644 --- a/src/metadata.cpp +++ b/src/metadata.cpp @@ -1106,6 +1106,40 @@ static bool isIgnoredChar(char ch) { // --------------------------------------------------------------------------- //! @cond Doxygen_Suppress +static const struct utf8_to_lower { + const char *utf8; + char ascii; +} map_utf8_to_lower[] = { + {"\xc3\xa1", 'a'}, // a acute + {"\xc3\xa4", 'a'}, // a tremma + + {"\xc4\x9b", 'e'}, // e reverse circumflex + {"\xc3\xa8", 'e'}, // e grave + {"\xc3\xa9", 'e'}, // e acute + {"\xc3\xab", 'e'}, // e tremma + + {"\xc3\xad", 'i'}, // i grave + + {"\xc3\xb4", 'o'}, // o circumflex + {"\xc3\xb6", 'o'}, // o tremma + + {"\xc3\xa7", 'c'}, // c cedilla +}; + +static const struct utf8_to_lower *get_ascii_replacement(const char *c_str) { + for (const auto &pair : map_utf8_to_lower) { + if (*c_str == pair.utf8[0] && + strncmp(c_str, pair.utf8, strlen(pair.utf8)) == 0) { + return &pair; + } + } + return nullptr; +} +//! @endcond + +// --------------------------------------------------------------------------- + +//! @cond Doxygen_Suppress std::string Identifier::canonicalizeName(const std::string &str) { std::string res; const char *c_str = str.c_str(); @@ -1121,6 +1155,14 @@ std::string Identifier::canonicalizeName(const std::string &str) { ++i; continue; } + if (static_cast<unsigned char>(ch) > 127) { + const auto *replacement = get_ascii_replacement(c_str + i); + if (replacement) { + res.push_back(replacement->ascii); + i += strlen(replacement->utf8) - 1; + continue; + } + } if (!isIgnoredChar(ch)) { res.push_back(static_cast<char>(::tolower(ch))); } @@ -1142,8 +1184,8 @@ bool Identifier::isEquivalentName(const char *a, const char *b) noexcept { char lastValidA = 0; char lastValidB = 0; while (a[i] != 0 && b[j] != 0) { - const char aCh = a[i]; - const char bCh = b[j]; + char aCh = a[i]; + char bCh = b[j]; if (aCh == ' ' && a[i + 1] == '+' && a[i + 2] == ' ') { i += 3; continue; @@ -1172,6 +1214,20 @@ bool Identifier::isEquivalentName(const char *a, const char *b) noexcept { lastValidB = '9'; continue; } + if (static_cast<unsigned char>(aCh) > 127) { + const auto *replacement = get_ascii_replacement(a + i); + if (replacement) { + aCh = replacement->ascii; + i += strlen(replacement->utf8) - 1; + } + } + if (static_cast<unsigned char>(bCh) > 127) { + const auto *replacement = get_ascii_replacement(b + j); + if (replacement) { + bCh = replacement->ascii; + j += strlen(replacement->utf8) - 1; + } + } if (::tolower(aCh) != ::tolower(bCh)) { return false; } |
