diff options
Diffstat (limited to 'src/proj_strtod.cpp')
| -rw-r--r-- | src/proj_strtod.cpp | 440 |
1 files changed, 440 insertions, 0 deletions
diff --git a/src/proj_strtod.cpp b/src/proj_strtod.cpp new file mode 100644 index 00000000..05d448ec --- /dev/null +++ b/src/proj_strtod.cpp @@ -0,0 +1,440 @@ +/*********************************************************************** + + proj_strtod: Convert string to double, accepting underscore separators + + Thomas Knudsen, 2017-01-17/09-19 + +************************************************************************ + +Conventionally, PROJ.4 does not honor locale settings, consistently +behaving as if LC_ALL=C. + +For this to work, we have, for many years, been using other solutions +than the C standard library strtod/atof functions for converting strings +to doubles. + +In the early versions of proj, iirc, a gnu version of strtod was used, +mostly to work around cases where the same system library was used for +C and Fortran linking, hence making strtod accept "D" and "d" as +exponentiation indicators, following Fortran Double Precision constant +syntax. This broke the proj angular syntax, accepting a "d" to mean +"degree": 12d34'56", meaning 12 degrees 34 minutes and 56 seconds. + +With an explicit MIT licence, PROJ.4 could not include GPL code any +longer, and apparently at some time, the GPL code was replaced by the +current C port of a GDAL function (in pj_strtod.c), which reads the +LC_NUMERIC setting and, behind the back of the user, momentarily changes +the conventional '.' delimiter to whatever the locale requires, then +calls the system supplied strtod. + +While this requires a minimum amount of coding, it only solves one +problem, and not in a very generic way. + +Another problem, I would like to see solved, is the handling of underscores +as generic delimiters. This is getting popular in a number of programming +languages (Ada, C++, C#, D, Java, Julia, Perl 5, Python, Rust, etc. +cf. e.g. https://www.python.org/dev/peps/pep-0515/), and in our case of +handling numbers being in the order of magnitude of the Earth's dimensions, +and a resolution of submillimetre, i.e. having 10 or more significant digits, +splitting the "wall of digits" into smaller chunks is of immense value. + +Hence this reimplementation of strtod, which hardcodes '.' as indicator of +numeric fractions, and accepts '_' anywhere in a numerical string sequence: +So a typical northing value can be written + + 6_098_907.8250 m +rather than + 6098907.8250 m + +which, in my humble opinion, is well worth the effort. + +While writing this code, I took ample inspiration from Michael Ringgaard's +strtod version over at http://www.jbox.dk/sanos/source/lib/strtod.c.html, +and Yasuhiro Matsumoto's public domain version over at +https://gist.github.com/mattn/1890186. The code below is, however, not +copied from any of the two mentioned - it is a reimplementation, and +probably suffers from its own set of bugs. So for now, it is intended +not as a replacement of pj_strtod, but only as an experimental piece of +code for use in an experimental new transformation program, cct. + +************************************************************************ + +Thomas Knudsen, thokn@sdfe.dk, 2017-01-17/2017-09-18 + +************************************************************************ + +* Copyright (c) 2017 Thomas Knudsen & SDFE +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. + +***********************************************************************/ + +#include "proj_strtod.h" + +#include <stdlib.h> /* for abs */ +#include <string.h> /* for strchr */ +#include <errno.h> +#include <ctype.h> +#include <float.h> /* for HUGE_VAL */ +#include <math.h> /* for pow() */ + + +double proj_strtod(const char *str, char **endptr) { + double number = 0, integral_part = 0; + int exponent = 0; + int fraction_is_nonzero = 0; + int sign = 0; + char *p = (char *) str; + int n = 0; + int num_digits_total = 0; + int num_digits_after_comma = 0; + int num_prefixed_zeros = 0; + + if (0==str) { + errno = EFAULT; + if (endptr) + *endptr = p; + return HUGE_VAL; + } + + /* First skip leading whitespace */ + while (isspace(*p)) + p++; + + /* Empty string? */ + if (0==*p) { + if (endptr) + *endptr = (char *) str; + return 0; + } + + /* non-numeric? */ + if (0==strchr("0123456789+-._", *p)) { + if (endptr) + *endptr = (char *) str; + return 0; + } + + /* Then handle optional prefixed sign and skip prefix zeros */ + switch (*p) { + case '-': + sign = -1; + p++; + break; + case '+': + sign = 1; + p++; + break; + default: + if (isdigit(*p) || '_'==*p || '.'==*p) + break; + if (endptr) + *endptr = (char *) str; + return 0; + } + + /* stray sign, as in "+/-"? */ + if (0!=sign && (0==strchr ("0123456789._", *p) || 0==*p)) { + if (endptr) + *endptr = (char *) str; + return 0; + } + + /* skip prefixed zeros before '.' */ + while ('0'==*p || '_'==*p) + p++; + + /* zero? */ + if ((0==*p) || 0==strchr ("0123456789eE.", *p) || isspace(*p)) { + if (endptr) + *endptr = p; + return sign==-1? -0: 0; + } + + /* Now expect a (potentially zero-length) string of digits */ + while (isdigit(*p) || ('_'==*p)) { + if ('_'==*p) { + p++; + continue; + } + number = number * 10. + (*p - '0'); + p++; + num_digits_total++; + } + integral_part = number; + + /* Done? */ + if (0==*p) { + if (endptr) + *endptr = p; + if (sign==-1) + return -number; + return number; + } + + /* Do we have a fractional part? */ + if ('.'==*p) { + p++; + + /* keep on skipping prefixed zeros (i.e. allow writing 1e-20 */ + /* as 0.00000000000000000001 without losing precision) */ + if (0==integral_part) + while ('0'==*p || '_'==*p) { + if ('0'==*p) + num_prefixed_zeros++; + p++; + } + + /* if the next character is nonnumeric, we have reached the end */ + if (0==*p || 0==strchr ("_0123456789eE+-", *p)) { + if (endptr) + *endptr = p; + if (sign==-1) + return -number; + return number; + } + + while (isdigit(*p) || '_'==*p) { + /* Don't let pathologically long fractions destroy precision */ + if ('_'==*p || num_digits_total > 17) { + p++; + continue; + } + + number = number * 10. + (*p - '0'); + if (*p!='0') + fraction_is_nonzero = 1; + p++; + num_digits_total++; + num_digits_after_comma++; + } + + /* Avoid having long zero-tails (4321.000...000) destroy precision */ + if (fraction_is_nonzero) + exponent = -(num_digits_after_comma + num_prefixed_zeros); + else + number = integral_part; + } /* end of fractional part */ + + + /* non-digit */ + if (0==num_digits_total) { + errno = EINVAL; + if (endptr) + *endptr = p; + return HUGE_VAL; + } + + if (sign==-1) + number = -number; + + /* Do we have an exponent part? */ + while (*p == 'e' || *p == 'E') { + p++; + + /* Just a stray "e", as in 100elephants? */ + if (0==*p || 0==strchr ("0123456789+-_", *p)) { + p--; + break; + } + + while ('_'==*p) + p++; + /* Does it have a sign? */ + sign = 0; + if ('-'==*p) + sign = -1; + if ('+'==*p) + sign = +1; + if (0==sign) { + if (!isdigit(*p) && *p!='_') { + if (endptr) + *endptr = p; + return HUGE_VAL; + } + } + else + p++; + + + /* Go on and read the exponent */ + n = 0; + while (isdigit(*p) || '_'==*p) { + if ('_'==*p) { + p++; + continue; + } + n = n * 10 + (*p - '0'); + p++; + } + + if (-1==sign) + n = -n; + exponent += n; + break; + } + + if (endptr) + *endptr = p; + + if ((exponent < DBL_MIN_EXP) || (exponent > DBL_MAX_EXP)) { + errno = ERANGE; + return HUGE_VAL; + } + + /* on some platforms pow() is very slow - so don't call it if exponent is close to 0 */ + if (0==exponent) + return number; + if (abs (exponent) < 20) { + double ex = 1; + int absexp = exponent < 0? -exponent: exponent; + while (absexp--) + ex *= 10; + number = exponent < 0? number / ex: number * ex; + } + else + number *= pow (10, exponent); + + return number; +} + +double proj_atof(const char *str) { + return proj_strtod(str, nullptr); +} + +#ifdef TEST + +/* compile/run: gcc -DTEST -o proj_strtod_test proj_strtod.c && proj_strtod_test */ + +#include <stdio.h> + +char *un_underscore (char *s) { + static char u[1024]; + int i, m, n; + for (i = m = 0, n = strlen (s); i < n; i++) { + if (s[i]=='_') { + m++; + continue; + } + u[i - m] = s[i]; + } + u[n-m] = 0; + return u; +} + +int thetest (char *s, int line) { + char *endp, *endq, *u; + double p, q; + int errnop, errnoq, prev_errno; + + prev_errno = errno; + + u = un_underscore (s); + + errno = 0; + p = proj_strtod (s, &endp); + errnop = errno; + errno = 0; + q = strtod (u, &endq); + errnoq = errno; + + errno = prev_errno; + + if (q==p && 0==strcmp (endp, endq) && errnop==errnoq) + return 0; + + errno = line; + printf ("Line: %3.3d - [%s] [%s]\n", line, s, u); + printf ("proj_strtod: %2d %.17g [%s]\n", errnop, p, endp); + printf ("libc_strtod: %2d %.17g [%s]\n", errnoq, q, endq); + return 1; +} + +#define test(s) thetest(s, __LINE__) + +int main (int argc, char **argv) { + double res; + char *endptr; + + errno = 0; + + test (""); + test (" "); + test (" abcde"); + test (" edcba"); + test ("abcde"); + test ("edcba"); + test ("+"); + test ("-"); + test ("+ "); + test ("- "); + test (" + "); + test (" - "); + test ("e 1"); + test ("e1"); + test ("0 66"); + test ("1."); + test ("0."); + test ("1.0"); + test ("0.0"); + test ("1 "); + test ("0 "); + test ("-0 "); + test ("0_ "); + test ("0_"); + test ("1e"); + test ("_1.0"); + test ("_0.0"); + test ("1_.0"); + test ("0_.0"); + test ("1__.0"); + test ("0__.0"); + test ("1.__0"); + test ("0.__0"); + test ("1.0___"); + test ("0.0___"); + test ("1e2"); + test ("__123_456_789_._10_11_12"); + test ("1______"); + test ("1___e__2__"); + test ("-1"); + test ("-1.0"); + test ("-0"); + test ("-1e__-_2__rest"); + test ("0.00002"); + test ("0.00001"); + test ("-0.00002"); + test ("-0.00001"); + test ("-0.00001e-2"); + test ("-0.00001e2"); + test ("1e9999"); + + /* We expect this one to differ */ + test ("0.000000000000000000000000000000000000000000000000000000000000000000000000002"); + + if (errno) + printf ("First discrepancy in line %d\n", errno); + + if (argc < 2) + return 0; + res = proj_strtod (argv[1], &endptr); + printf ("res = %20.15g. Rest = [%s], errno = %d\n", res, endptr, (int) errno); + return 0; +} +#endif |
