summaryrefslogtreecommitdiff
path: root/thirdparty/vsv.c
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/vsv.c')
-rw-r--r--thirdparty/vsv.c1962
1 files changed, 1962 insertions, 0 deletions
diff --git a/thirdparty/vsv.c b/thirdparty/vsv.c
new file mode 100644
index 0000000..25992c4
--- /dev/null
+++ b/thirdparty/vsv.c
@@ -0,0 +1,1962 @@
+/*
+** 2020-02-08 modified by Keith Medcalf who also disclaims all copyright
+** on the modifications and hereby places this code in the public domain
+**
+** This file contains the implementation of an SQLite virtual table for
+** reading VSV (Variably Separated Values), which are like CSV files,
+** but subtly different. VSV supports a number of extensions to the
+** CSV format as well as more processing options.
+**
+**
+** Usage:
+**
+** create virtual table temp.vsv using vsv(...);
+** select * from vsv;
+**
+** The parameters to the vsv module (the vsv(...) part) are as follows:
+**
+** filename=STRING the filename, passed to the Operating System
+** data=STRING alternative data
+** schema=STRING Alternate Schema to use
+** columns=N columns parsed from the VSV file
+** header=BOOL whether or not a header row is present
+** skip=N number of leading data rows to skip
+** rsep=STRING record separator
+** fsep=STRING field separator
+** validatetext=BOOL validate UTF-8 encoding of text fields
+** affinity=AFFINITY affinity to apply to each returned value
+** nulls=BOOL empty fields are returned as NULL
+**
+**
+** Defaults:
+**
+** filename / data nothing. You must provide one or the other
+** it is an error to provide both or neither
+** schema nothing. If not provided then one will be
+** generated for you from the header, or if no
+** header is available then autogenerated using
+** field names manufactured as cX where X is the
+** column number
+** columns nothing. If not specified then the number of
+** columns is determined by counting the fields
+** in the first record of the VSV file (which
+** will be the header row if header is specified),
+** the number of columns is not parsed from the
+** schema even if one is provided
+** header=no no header row in the VSV file
+** skip=0 do not skip any data rows in the VSV file
+** fsep=',' default field separator is a comma
+** rsep='\n' default record separator is a newline
+** validatetext=no do not validate text field encoding
+** affinity=none do not apply affinity to each returned value
+** nulls=off empty fields returned as zero-length
+**
+**
+** Parameter types:
+**
+** STRING means a quoted string
+** N means a whole number not containing a sign
+** BOOL means something that evaluates as true or false
+** it is case insensitive
+** yes, no, true, false, 1, 0
+** AFFINITY means an SQLite3 type specification
+** it is case insensitive
+** none, blob, text, integer, real, numeric
+**
+** STRING means a quoted string. The quote character may be either
+** a single quote or a double quote. Two quote characters in a row
+** will be replaced with a single quote character. STRINGS do not
+** need to be quoted if it is obvious where they begin and end
+** (that is, they do not contain a comma). Leading and trailing
+** spaces will be trimmed from unquoted strings.
+**
+** filename =./this/filename.here, ...
+** filename =./this/filename.here , ...
+** filename = ./this/filename.here, ...
+** filename = ./this/filename.here , ...
+** filename = './this/filename.here', ...
+** filename = "./this/filename.here", ...
+**
+** are all equivalent.
+**
+** BOOL defaults to true so the following specifications are all the
+** same:
+**
+** header = true
+** header = yes
+** header = 1
+** header
+**
+**
+** Specific Parameters:
+**
+** The platform/compiler/OS fopen call is responsible for interpreting
+** the filename. It may contain anything recognized by the OS.
+**
+** The separator string containing exactly one character, or a valid
+** escape sequence. Recognized escape sequences are:
+**
+** \t horizontal tab, ascii character 9 (0x09)
+** \n linefeed, ascii character 10 (0x0a)
+** \v vertical tab, ascii character 11 (0x0b)
+** \f form feed, ascii character 12 (0x0c)
+** \xhh specific byte where hh is hexadecimal
+**
+** The validatetext setting will cause the validity of the field
+** encoding (not its contents) to be verified. It effects how
+** fields that are supposed to contain text will be returned to
+** the SQLite3 library in order to prevent invalid utf8 data from
+** being stored or processed as if it were valid utf8 text.
+**
+** The nulls option will cause fields that do not contain anything
+** to return NULL rather than an empty result. Two separators
+** side-by-each with no intervening characters at all will be
+** returned as NULL if nulls is true and if nulls is false or
+** the contents are explicity empty ("") then a 0 length blob
+** (if affinity=blob) or 0 length text string.
+**
+** For the affinity setting, the following processing is applied to
+** each value returned by the VSV virtual table:
+**
+** none no affinity is applied, all fields will be
+** returned as text just like in the original
+** csv module, embedded nulls will terminate
+** the text. if validatetext is in effect then
+** an error will be thrown if the field does
+** not contain validly encoded text or contains
+** embedded nulls
+**
+** blob all fields will be returned as blobs
+** validatetext has no effect
+**
+** text all fields will be returned as text just
+** like in the original csv module, embedded
+** nulls will terminate the text.
+** if validatetext is in effect then a blob
+** will be returned if the field does not
+** contain validly encoded text or the field
+** contains embedded nulls
+**
+** integer if the field data looks like an integer,
+** (regex "^ *(\+|-)?\d+ *$"),
+** then an integer will be returned as
+** provided by the compiler and platform
+** runtime strtoll function
+** otherwise the field will be processed as
+** text as defined above
+**
+** real if the field data looks like a number,
+** (regex "^ *(\+|-)?(\d+\.?\d*|\d*\.?\d+)([eE](\+|-)?\d+)? *$")
+** then a double will be returned as
+** provided by the compiler and platform
+** runtime strtold function otherwise the
+** field will be processed as text as
+** defined above
+**
+** numeric if the field looks like an integer
+** (see integer above) that integer will be
+** returned
+** if the field looks like a number
+** (see real above) then the number will
+** returned as an integer if it has no
+** fractional part and
+** (a) your platform/compiler supports
+** long double and the number will fit in
+** a 64-bit integer; or,
+** (b) your platform/compiler does not
+** support long double (treats it as a double)
+** then a 64-bit integer will only be returned
+** if the value would fit in a 6-byte varint,
+** otherwise a double will be returned
+**
+** The nulls option will cause fields that do not contain anything
+** to return NULL rather than an empty result. Two separators
+** side-by-each with no intervening characters at all will be
+** returned as NULL if nulls is true and if nulls is false or
+** the contents are explicity empty ("") then a 0 length blob
+** (if affinity=blob) or 0 length text string will be returned.
+**
+*/
+/*
+** 2016-05-28
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains the implementation of an SQLite virtual table for
+** reading CSV files.
+**
+** Usage:
+**
+** .load ./csv
+** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
+** SELECT * FROM csv;
+**
+** The columns are named "c1", "c2", "c3", ... by default. Or the
+** application can define its own CREATE TABLE statement using the
+** schema= parameter, like this:
+**
+** CREATE VIRTUAL TABLE temp.csv2 USING csv(
+** filename = "../http.log",
+** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
+** );
+**
+** Instead of specifying a file, the text of the CSV can be loaded using
+** the data= parameter.
+**
+** If the columns=N parameter is supplied, then the CSV file is assumed to have
+** N columns. If both the columns= and schema= parameters are omitted, then
+** the number and names of the columns is determined by the first line of
+** the CSV input.
+**
+*/
+#include <sqlite3ext.h>
+SQLITE_EXTENSION_INIT1
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef SQLITE_HAVE_ZLIB
+#include <zlib.h>
+#define fopen gzopen
+#define fclose gzclose
+#define fread gzfread
+#define fseek gzseek
+#define ftell gztell
+#endif
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+
+/*
+** A macro to hint to the compiler that a function should not be
+** inlined.
+*/
+#if defined(__GNUC__)
+#define VSV_NOINLINE __attribute__((noinline))
+#elif defined(_MSC_VER) && _MSC_VER>=1310
+#define VSV_NOINLINE __declspec(noinline)
+#else
+#define VSV_NOINLINE
+#endif
+
+
+/*
+** Max size of the error message in a VsvReader
+*/
+#define VSV_MXERR 200
+
+/*
+** Size of the VsvReader input buffer
+*/
+#define VSV_INBUFSZ 1024
+
+/*
+** A context object used when read a VSV file.
+*/
+typedef struct VsvReader VsvReader;
+struct VsvReader
+{
+#ifdef SQLITE_HAVE_ZLIB
+ gzFile in; /* Read the VSV text from this compressed input stream */
+#else
+ FILE *in; /* Read the VSV text from this input stream */
+#endif
+ char *z; /* Accumulated text for a field */
+ int n; /* Number of bytes in z */
+ int nAlloc; /* Space allocated for z[] */
+ int nLine; /* Current line number */
+ int bNotFirst; /* True if prior text has been seen */
+ int cTerm; /* Character that terminated the most recent field */
+ int fsep; /* Field Seperator Character */
+ int rsep; /* Record Seperator Character */
+ int affinity; /* Perform Affinity Conversions */
+ int notNull; /* Have we seen data for field */
+ size_t iIn; /* Next unread character in the input buffer */
+ size_t nIn; /* Number of characters in the input buffer */
+ char *zIn; /* The input buffer */
+ char zErr[VSV_MXERR]; /* Error message */
+};
+
+/*
+** Initialize a VsvReader object
+*/
+static void vsv_reader_init(VsvReader *p)
+{
+ p->in = 0;
+ p->z = 0;
+ p->n = 0;
+ p->nAlloc = 0;
+ p->nLine = 0;
+ p->bNotFirst = 0;
+ p->nIn = 0;
+ p->zIn = 0;
+ p->notNull = 0;
+ p->zErr[0] = 0;
+}
+
+/*
+** Close and reset a VsvReader object
+*/
+static void vsv_reader_reset(VsvReader *p)
+{
+ if (p->in)
+ {
+ fclose(p->in);
+ sqlite3_free(p->zIn);
+ }
+ sqlite3_free(p->z);
+ vsv_reader_init(p);
+}
+
+/*
+** Report an error on a VsvReader
+*/
+static void vsv_errmsg(VsvReader *p, const char *zFormat, ...)
+{
+ va_list ap;
+ va_start(ap, zFormat);
+ sqlite3_vsnprintf(VSV_MXERR, p->zErr, zFormat, ap);
+ va_end(ap);
+}
+
+/*
+** Open the file associated with a VsvReader
+** Return the number of errors.
+*/
+static int vsv_reader_open(
+ VsvReader *p, /* The reader to open */
+ const char *zFilename, /* Read from this filename */
+ const char *zData /* ... or use this data */
+ )
+{
+ if (zFilename)
+ {
+ p->zIn = sqlite3_malloc(VSV_INBUFSZ);
+ if (p->zIn==0)
+ {
+ vsv_errmsg(p, "out of memory");
+ return 1;
+ }
+ p->in = fopen(zFilename, "rb");
+ if (p->in==0)
+ {
+ sqlite3_free(p->zIn);
+ vsv_reader_reset(p);
+ vsv_errmsg(p, "cannot open '%s' for reading", zFilename);
+ return 1;
+ }
+ }
+ else
+ {
+ assert( p->in==0 );
+ p->zIn = (char*)zData;
+ p->nIn = strlen(zData);
+ }
+ return 0;
+}
+
+/*
+** The input buffer has overflowed. Refill the input buffer, then
+** return the next character
+*/
+static VSV_NOINLINE int vsv_getc_refill(VsvReader *p)
+{
+ size_t got;
+
+ assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
+ assert( p->in!=0 ); /* Only called if reading from a file */
+
+ got = fread(p->zIn, 1, VSV_INBUFSZ, p->in);
+ if (got==0)
+ {
+ return EOF;
+ }
+ p->nIn = got;
+ p->iIn = 1;
+ return p->zIn[0];
+}
+
+/*
+** Return the next character of input. Return EOF at end of input.
+*/
+static int vsv_getc(VsvReader *p)
+{
+ if (p->iIn >= p->nIn)
+ {
+ if (p->in!=0)
+ {
+ return vsv_getc_refill(p);
+ }
+ return EOF;
+ }
+ return((unsigned char*)p->zIn)[p->iIn++];
+}
+
+/*
+** Increase the size of p->z and append character c to the end.
+** Return 0 on success and non-zero if there is an OOM error
+*/
+static VSV_NOINLINE int vsv_resize_and_append(VsvReader *p, char c)
+{
+ char *zNew;
+ int nNew = p->nAlloc*2 + 100;
+ zNew = sqlite3_realloc64(p->z, nNew);
+ if (zNew)
+ {
+ p->z = zNew;
+ p->nAlloc = nNew;
+ p->z[p->n++] = c;
+ return 0;
+ }
+ else
+ {
+ vsv_errmsg(p, "out of memory");
+ return 1;
+ }
+}
+
+/*
+** Append a single character to the VsvReader.z[] array.
+** Return 0 on success and non-zero if there is an OOM error
+*/
+static int vsv_append(VsvReader *p, char c)
+{
+ if (p->n>=p->nAlloc-1)
+ {
+ return vsv_resize_and_append(p, c);
+ }
+ p->z[p->n++] = c;
+ return 0;
+}
+
+/*
+** Read a single field of VSV text. Compatible with rfc4180 and extended
+** with the option of having a separator other than ",".
+**
+** + Input comes from p->in.
+** + Store results in p->z of length p->n. Space to hold p->z comes
+** from sqlite3_malloc64().
+** + Keep track of the line number in p->nLine.
+** + Store the character that terminates the field in p->cTerm. Store
+** EOF on end-of-file.
+**
+** Return 0 at EOF or on OOM. On EOF, the p->cTerm character will have
+** been set to EOF.
+*/
+static char *vsv_read_one_field(VsvReader *p)
+{
+ int c;
+ p->notNull = 0;
+ p->n = 0;
+ c = vsv_getc(p);
+ if (c==EOF)
+ {
+ p->cTerm = EOF;
+ return 0;
+ }
+ if (c=='"')
+ {
+ int pc, ppc;
+ int startLine = p->nLine;
+ p->notNull = 1;
+ pc = ppc = 0;
+ while (1)
+ {
+ c = vsv_getc(p);
+ if (c=='\n')
+ {
+ p->nLine++;
+ }
+ if (c=='"' && pc=='"')
+ {
+ pc = ppc;
+ ppc = 0;
+ continue;
+ }
+ if ( (c==p->fsep && pc=='"')
+ || (c==p->rsep && pc=='"')
+ || (p->rsep=='\n' && c=='\n' && pc=='\r' && ppc=='"')
+ || (c==EOF && pc=='"')
+ )
+ {
+ do
+ {
+ p->n--;
+ }
+ while (p->z[p->n]!='"');
+ p->cTerm = (char)c;
+ break;
+ }
+ if (pc=='"' && p->rsep=='\n' && c!='\r')
+ {
+ vsv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
+ break;
+ }
+ if (c==EOF)
+ {
+ vsv_errmsg(p, "line %d: unterminated %c-quoted field\n", startLine, '"');
+ p->cTerm = (char)c;
+ break;
+ }
+ if (vsv_append(p, (char)c))
+ {
+ return 0;
+ }
+ ppc = pc;
+ pc = c;
+ }
+ }
+ else
+ {
+ /*
+ ** If this is the first field being parsed and it begins with the
+ ** UTF-8 BOM (0xEF BB BF) then skip the BOM
+ */
+ if ((c&0xff)==0xef && p->bNotFirst==0)
+ {
+ vsv_append(p, (char)c);
+ c = vsv_getc(p);
+ if ((c&0xff)==0xbb)
+ {
+ vsv_append(p, (char)c);
+ c = vsv_getc(p);
+ if ((c&0xff)==0xbf)
+ {
+ p->bNotFirst = 1;
+ p->n = 0;
+ return vsv_read_one_field(p);
+ }
+ }
+ }
+ while (c!=EOF && c!=p->rsep && c!=p->fsep)
+ {
+ if (c=='\n')
+ p->nLine++;
+ if (!p->notNull)
+ p->notNull = 1;
+ if (vsv_append(p, (char)c))
+ return 0;
+ c = vsv_getc(p);
+ }
+ if (c=='\n')
+ {
+ p->nLine++;
+ }
+ if (p->n>0 && (p->rsep=='\n' || p->fsep=='\n') && p->z[p->n-1]=='\r')
+ {
+ p->n--;
+ if (p->n==0)
+ {
+ p->notNull = 0;
+ }
+ }
+ p->cTerm = (char)c;
+ }
+ assert( p->z==0 || p->n<p->nAlloc );
+ if (p->z)
+ {
+ p->z[p->n] = 0;
+ }
+ p->bNotFirst = 1;
+ return p->z;
+}
+
+
+/*
+** Forward references to the various virtual table methods implemented
+** in this file.
+*/
+static int vsvtabCreate(sqlite3*, void*, int, const char*const*, sqlite3_vtab**,char**);
+static int vsvtabConnect(sqlite3*, void*, int, const char*const*, sqlite3_vtab**,char**);
+static int vsvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
+static int vsvtabDisconnect(sqlite3_vtab*);
+static int vsvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
+static int vsvtabClose(sqlite3_vtab_cursor*);
+static int vsvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, int argc, sqlite3_value **argv);
+static int vsvtabNext(sqlite3_vtab_cursor*);
+static int vsvtabEof(sqlite3_vtab_cursor*);
+static int vsvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
+static int vsvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
+
+/*
+** An instance of the VSV virtual table
+*/
+typedef struct VsvTable
+{
+ sqlite3_vtab base; /* Base class. Must be first */
+ char *zFilename; /* Name of the VSV file */
+ char *zData; /* Raw VSV data in lieu of zFilename */
+ long iStart; /* Offset to start of data in zFilename */
+ int nCol; /* Number of columns in the VSV file */
+ int fsep; /* The field seperator for this VSV file */
+ int rsep; /* The record seperator for this VSV file */
+ int affinity; /* Perform affinity conversions */
+ int nulls; /* Process NULLs */
+ int validateUTF8; /* Validate UTF8 */
+ unsigned int tstFlags; /* Bit values used for testing */
+} VsvTable;
+
+/*
+** Allowed values for tstFlags
+*/
+#define VSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
+
+/*
+** A cursor for the VSV virtual table
+*/
+typedef struct VsvCursor
+{
+ sqlite3_vtab_cursor base; /* Base class. Must be first */
+ VsvReader rdr; /* The VsvReader object */
+ char **azVal; /* Value of the current row */
+ int *aLen; /* Allocation Length of each entry */
+ int *dLen; /* Data Length of each entry */
+ sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
+} VsvCursor;
+
+/*
+** Transfer error message text from a reader into a VsvTable
+*/
+static void vsv_xfer_error(VsvTable *pTab, VsvReader *pRdr)
+{
+ sqlite3_free(pTab->base.zErrMsg);
+ pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
+}
+
+/*
+** This method is the destructor for a VsvTable object.
+*/
+static int vsvtabDisconnect(sqlite3_vtab *pVtab)
+{
+ VsvTable *p = (VsvTable*)pVtab;
+ sqlite3_free(p->zFilename);
+ sqlite3_free(p->zData);
+ sqlite3_free(p);
+ return SQLITE_OK;
+}
+
+/*
+** Skip leading whitespace. Return a pointer to the first non-whitespace
+** character, or to the zero terminator if the string has only whitespace
+*/
+static const char *vsv_skip_whitespace(const char *z)
+{
+ while (isspace((unsigned char)z[0]))
+ {
+ z++;
+ }
+ return z;
+}
+
+/*
+** Remove trailing whitespace from the end of string z[]
+*/
+static void vsv_trim_whitespace(char *z)
+{
+ size_t n = strlen(z);
+ while (n>0 && isspace((unsigned char)z[n]))
+ {
+ n--;
+ }
+ z[n] = 0;
+}
+
+/*
+** Dequote the string
+*/
+static void vsv_dequote(char *z)
+{
+ int j;
+ char cQuote = z[0];
+ size_t i, n;
+
+ if (cQuote!='\'' && cQuote!='"')
+ {
+ return;
+ }
+ n = strlen(z);
+ if (n<2 || z[n-1]!=z[0])
+ {
+ return;
+ }
+ for (i=1, j=0; i<n-1; i++)
+ {
+ if (z[i]==cQuote && z[i+1]==cQuote)
+ {
+ i++;
+ }
+ z[j++] = z[i];
+ }
+ z[j] = 0;
+}
+
+/*
+** Check to see if the string is of the form: "TAG = VALUE" with optional
+** whitespace before and around tokens. If it is, return a pointer to the
+** first character of VALUE. If it is not, return NULL.
+*/
+static const char *vsv_parameter(const char *zTag, int nTag, const char *z)
+{
+ z = vsv_skip_whitespace(z);
+ if (strncmp(zTag, z, nTag)!=0)
+ {
+ return 0;
+ }
+ z = vsv_skip_whitespace(z+nTag);
+ if (z[0]!='=')
+ {
+ return 0;
+ }
+ return vsv_skip_whitespace(z+1);
+}
+
+/*
+** Decode a parameter that requires a dequoted string.
+**
+** Return 1 if the parameter is seen, or 0 if not. 1 is returned
+** even if there is an error. If an error occurs, then an error message
+** is left in p->zErr. If there are no errors, p->zErr[0]==0.
+*/
+static int vsv_string_parameter(
+ VsvReader *p, /* Leave the error message here, if there is one */
+ const char *zParam, /* Parameter we are checking for */
+ const char *zArg, /* Raw text of the virtual table argment */
+ char **pzVal /* Write the dequoted string value here */
+ )
+{
+ const char *zValue;
+ zValue = vsv_parameter(zParam,(int)strlen(zParam),zArg);
+ if (zValue==0)
+ {
+ return 0;
+ }
+ p->zErr[0] = 0;
+ if (*pzVal)
+ {
+ vsv_errmsg(p, "more than one '%s' parameter", zParam);
+ return 1;
+ }
+ *pzVal = sqlite3_mprintf("%s", zValue);
+ if (*pzVal==0)
+ {
+ vsv_errmsg(p, "out of memory");
+ return 1;
+ }
+ vsv_trim_whitespace(*pzVal);
+ vsv_dequote(*pzVal);
+ return 1;
+}
+
+
+/*
+** Return 0 if the argument is false and 1 if it is true. Return -1 if
+** we cannot really tell.
+*/
+static int vsv_boolean(const char *z)
+{
+ if (sqlite3_stricmp("yes",z)==0
+ || sqlite3_stricmp("on",z)==0
+ || sqlite3_stricmp("true",z)==0
+ || (z[0]=='1' && z[1]==0)
+ )
+ {
+ return 1;
+ }
+ if (sqlite3_stricmp("no",z)==0
+ || sqlite3_stricmp("off",z)==0
+ || sqlite3_stricmp("false",z)==0
+ || (z[0]=='0' && z[1]==0)
+ )
+ {
+ return 0;
+ }
+ return -1;
+}
+
+/*
+** Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG".
+** If it is, set *pValue to be the value of the boolean ("true" if there is
+** not "= BOOLEAN" component) and return non-zero. If the input string
+** does not begin with TAG, return zero.
+*/
+static int vsv_boolean_parameter(
+ const char *zTag, /* Tag we are looking for */
+ int nTag, /* Size of the tag in bytes */
+ const char *z, /* Input parameter */
+ int *pValue /* Write boolean value here */
+ )
+{
+ int b;
+ z = vsv_skip_whitespace(z);
+ if (strncmp(zTag, z, nTag)!=0)
+ {
+ return 0;
+ }
+ z = vsv_skip_whitespace(z + nTag);
+ if (z[0]==0)
+ {
+ *pValue = 1;
+ return 1;
+ }
+ if (z[0]!='=')
+ {
+ return 0;
+ }
+ z = vsv_skip_whitespace(z+1);
+ b = vsv_boolean(z);
+ if (b>=0)
+ {
+ *pValue = b;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+** Convert the seperator character specification into the character code
+** Return 1 signifies error, 0 for no error
+**
+** Recognized inputs:
+** any single character
+** escaped characters \f \n \t \v
+** escaped hex byte \x1e \x1f etc (RS and US respectively)
+**
+*/
+static int vsv_parse_sep_char(char *in, int dflt, int *out)
+{
+ if (!in)
+ {
+ *out = dflt;
+ return 0;
+ }
+ switch (strlen(in))
+ {
+ case 0:
+ {
+ *out = dflt;
+ return 0;
+ }
+ case 1:
+ {
+ *out = in[0];
+ return 0;
+ }
+ case 2:
+ {
+ if (in[0] != '\\')
+ {
+ return 1;
+ }
+ switch (in[1])
+ {
+ case 'f':
+ {
+ *out = 12;
+ return 0;
+ }
+ case 'n':
+ {
+ *out = 10;
+ return 0;
+ }
+ case 't':
+ {
+ *out = 9;
+ return 0;
+ }
+ case 'v':
+ {
+ *out = 11;
+ return 0;
+ }
+ }
+ return 1;
+ }
+ case 4:
+ {
+ if (sqlite3_strnicmp(in, "\\x", 2) != 0)
+ {
+ return 1;
+ }
+ if (!isxdigit(in[2]) || !isxdigit(in[3]))
+ {
+ return 1;
+ }
+ *out = ((in[2] > '9' ? (in[2] & 0x0f) + 9 : in[2] & 0x0f) << 4) +
+ (in[3] > '9' ? (in[3] & 0x0f) + 9 : in[3] & 0x0f);
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
+** Parameters:
+** filename=FILENAME Name of file containing VSV content
+** data=TEXT Direct VSV content.
+** schema=SCHEMA Alternative VSV schema.
+** header=YES|NO First row of VSV defines the names of
+** columns if "yes". Default "no".
+** columns=N Assume the VSV file contains N columns.
+** fsep=FSET Field Seperator
+** rsep=RSEP Record Seperator
+** skip=N skip N records of file (default 0)
+** affinity=AFF affinity to apply to ALL columns
+** default: none
+** none text integer real numeric
+**
+** Only available if compiled with SQLITE_TEST:
+**
+** testflags=N Bitmask of test flags. Optional
+**
+** If schema= is omitted, then the columns are named "c0", "c1", "c2",
+** and so forth. If columns=N is omitted, then the file is opened and
+** the number of columns in the first row is counted to determine the
+** column count. If header=YES, then the first row is skipped.
+*/
+static int vsvtabConnect(
+ sqlite3 *db,
+ void *pAux,
+ int argc, const char *const*argv,
+ sqlite3_vtab **ppVtab,
+ char **pzErr
+ )
+{
+ VsvTable *pNew = 0; /* The VsvTable object to construct */
+ int affinity = -1; /* Affinity coercion */
+ int bHeader = -1; /* header= flags. -1 means not seen yet */
+ int validateUTF8 = -1; /* validateUTF8 flag */
+ int rc = SQLITE_OK; /* Result code from this routine */
+ int i, j; /* Loop counters */
+#ifdef SQLITE_TEST
+ int tstFlags = 0; /* Value for testflags=N parameter */
+#endif
+ int b; /* Value of a boolean parameter */
+ int nCol = -99; /* Value of the columns= parameter */
+ int nSkip = -1; /* Value of the skip= parameter */
+ int bNulls = -1; /* Process Nulls flag */
+ VsvReader sRdr; /* A VSV file reader used to store an error
+ ** message and/or to count the number of columns */
+ static const char *azParam[] = {
+ "filename", "data", "schema", "fsep", "rsep"
+ };
+ char *azPValue[5]; /* Parameter values */
+#define VSV_FILENAME (azPValue[0])
+#define VSV_DATA (azPValue[1])
+#define VSV_SCHEMA (azPValue[2])
+#define VSV_FSEP (azPValue[3])
+#define VSV_RSEP (azPValue[4])
+
+
+ assert( sizeof(azPValue)==sizeof(azParam) );
+ memset(&sRdr, 0, sizeof(sRdr));
+ memset(azPValue, 0, sizeof(azPValue));
+ for (i=3; i<argc; i++)
+ {
+ const char *z = argv[i];
+ const char *zValue;
+ for (j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++)
+ {
+ if (vsv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]))
+ {
+ break;
+ }
+ }
+ if (j<sizeof(azParam)/sizeof(azParam[0]))
+ {
+ if (sRdr.zErr[0])
+ {
+ goto vsvtab_connect_error;
+ }
+ }
+ else if (vsv_boolean_parameter("header",6,z,&b))
+ {
+ if (bHeader>=0)
+ {
+ vsv_errmsg(&sRdr, "more than one 'header' parameter");
+ goto vsvtab_connect_error;
+ }
+ bHeader = b;
+ }
+ else if (vsv_boolean_parameter("validatetext",12,z,&b))
+ {
+ if (validateUTF8>=0)
+ {
+ vsv_errmsg(&sRdr, "more than one 'validatetext' parameter");
+ goto vsvtab_connect_error;
+ }
+ validateUTF8 = b;
+ }
+ else if (vsv_boolean_parameter("nulls",5,z,&b))
+ {
+ if (bNulls>=0)
+ {
+ vsv_errmsg(&sRdr, "more than one 'nulls' parameter");
+ goto vsvtab_connect_error;
+ }
+ bNulls = b;
+ }
+ else
+#ifdef SQLITE_TEST
+ if ((zValue = vsv_parameter("testflags",9,z))!=0)
+ {
+ tstFlags = (unsigned int)atoi(zValue);
+ }
+ else
+#endif
+ if ((zValue = vsv_parameter("columns",7,z))!=0)
+ {
+ if (nCol>0)
+ {
+ vsv_errmsg(&sRdr, "more than one 'columns' parameter");
+ goto vsvtab_connect_error;
+ }
+ nCol = atoi(zValue);
+ if (nCol<=0)
+ {
+ vsv_errmsg(&sRdr, "column= value must be positive");
+ goto vsvtab_connect_error;
+ }
+ }
+ else if ((zValue = vsv_parameter("skip",4,z))!=0)
+ {
+ if (nSkip>0)
+ {
+ vsv_errmsg(&sRdr, "more than one 'skip' parameter");
+ goto vsvtab_connect_error;
+ }
+ nSkip = atoi(zValue);
+ if (nSkip<=0)
+ {
+ vsv_errmsg(&sRdr, "skip= value must be positive");
+ goto vsvtab_connect_error;
+ }
+ }
+ else if ((zValue = vsv_parameter("affinity",8,z))!=0)
+ {
+ if (affinity>-1)
+ {
+ vsv_errmsg(&sRdr, "more than one 'affinity' parameter");
+ goto vsvtab_connect_error;
+ }
+ if (sqlite3_strnicmp(zValue,"none",4)==0) affinity=0;
+ else if (sqlite3_strnicmp(zValue,"blob",4)==0) affinity=1;
+ else if (sqlite3_strnicmp(zValue,"text",4)==0) affinity=2;
+ else if (sqlite3_strnicmp(zValue,"integer",7)==0) affinity=3;
+ else if (sqlite3_strnicmp(zValue,"real",4)==0) affinity=4;
+ else if (sqlite3_strnicmp(zValue,"numeric",7)==0) affinity=5;
+ else
+ {
+ vsv_errmsg(&sRdr, "unknown affinity: '%s'", zValue);
+ goto vsvtab_connect_error;
+ }
+ }
+ else
+ {
+ vsv_errmsg(&sRdr, "bad parameter: '%s'", z);
+ goto vsvtab_connect_error;
+ }
+ }
+ if (affinity==-1)
+ {
+ affinity = 0;
+ }
+ if (bNulls==-1)
+ {
+ bNulls = 0;
+ }
+ if (validateUTF8==-1)
+ {
+ validateUTF8 = 0;
+ }
+ if ((VSV_FILENAME==0)==(VSV_DATA==0))
+ {
+ vsv_errmsg(&sRdr, "must specify either filename= or data= but not both");
+ goto vsvtab_connect_error;
+ }
+ if (vsv_parse_sep_char(VSV_FSEP, ',', &(sRdr.fsep)))
+ {
+ vsv_errmsg(&sRdr, "cannot parse fsep: '%s'", VSV_FSEP);
+ goto vsvtab_connect_error;
+ }
+ if (vsv_parse_sep_char(VSV_RSEP, '\n', &(sRdr.rsep)))
+ {
+ vsv_errmsg(&sRdr, "cannot parse rsep: '%s'", VSV_RSEP);
+ goto vsvtab_connect_error;
+ }
+
+
+ if ((nCol <= 0 || bHeader == 1) && vsv_reader_open(&sRdr, VSV_FILENAME, VSV_DATA))
+ {
+ goto vsvtab_connect_error;
+ }
+ pNew = sqlite3_malloc( sizeof(*pNew) );
+ *ppVtab = (sqlite3_vtab*)pNew;
+ if (pNew==0)
+ {
+ goto vsvtab_connect_oom;
+ }
+ memset(pNew, 0, sizeof(*pNew));
+ pNew->fsep = sRdr.fsep;
+ pNew->rsep = sRdr.rsep;
+ pNew->affinity = affinity;
+ pNew->validateUTF8 = validateUTF8;
+ pNew->nulls = bNulls;
+ if (VSV_SCHEMA==0)
+ {
+ sqlite3_str *pStr = sqlite3_str_new(0);
+ char *zSep = "";
+ int iCol = 0;
+ sqlite3_str_appendf(pStr, "CREATE TABLE x(");
+ if (nCol<0 && bHeader<1)
+ {
+ nCol = 0;
+ do
+ {
+ vsv_read_one_field(&sRdr);
+ nCol++;
+ }
+ while (sRdr.cTerm==sRdr.fsep);
+ }
+ if (nCol>0 && bHeader<1)
+ {
+ for (iCol=0; iCol<nCol; iCol++)
+ {
+ sqlite3_str_appendf(pStr, "%sc%d", zSep, iCol);
+ zSep = ",";
+ }
+ }
+ else
+ {
+ do
+ {
+ char *z = vsv_read_one_field(&sRdr);
+ if ((nCol>0 && iCol<nCol) || (nCol<0 && bHeader))
+ {
+ sqlite3_str_appendf(pStr,"%s\"%w\"", zSep, z);
+ zSep = ",";
+ iCol++;
+ }
+ }
+ while (sRdr.cTerm==sRdr.fsep);
+ if (nCol<0)
+ {
+ nCol = iCol;
+ }
+ else
+ {
+ while (iCol<nCol)
+ {
+ sqlite3_str_appendf(pStr,"%sc%d", zSep, ++iCol);
+ zSep = ",";
+ }
+ }
+ }
+ sqlite3_str_appendf(pStr, ")");
+ VSV_SCHEMA = sqlite3_str_finish(pStr);
+ if (VSV_SCHEMA==0)
+ {
+ goto vsvtab_connect_oom;
+ }
+ }
+ else if (nCol<0)
+ {
+ nCol = 0;
+ do
+ {
+ vsv_read_one_field(&sRdr);
+ nCol++;
+ }
+ while (sRdr.cTerm==sRdr.fsep);
+ }
+ else if (nSkip<1 && bHeader==1)
+ {
+ do
+ {
+ vsv_read_one_field(&sRdr);
+ }
+ while (sRdr.cTerm==sRdr.fsep);
+ }
+ pNew->nCol = nCol;
+ if (nSkip>0)
+ {
+ int tskip = nSkip + (bHeader==1);
+ vsv_reader_reset(&sRdr);
+ if (vsv_reader_open(&sRdr, VSV_FILENAME, VSV_DATA))
+ {
+ goto vsvtab_connect_error;
+ }
+ do
+ {
+ do
+ {
+ if (!vsv_read_one_field(&sRdr))
+ goto vsvtab_connect_error;
+ }
+ while (sRdr.cTerm==sRdr.fsep);
+ tskip--;
+ }
+ while (tskip>0 && sRdr.cTerm==sRdr.rsep);
+ if (tskip>0)
+ {
+ vsv_errmsg(&sRdr, "premature end of file during skip");
+ goto vsvtab_connect_error;
+ }
+ }
+ pNew->zFilename = VSV_FILENAME; VSV_FILENAME = 0;
+ pNew->zData = VSV_DATA; VSV_DATA = 0;
+#ifdef SQLITE_TEST
+ pNew->tstFlags = tstFlags;
+#endif
+ if (bHeader!=1 && nSkip<1)
+ {
+ pNew->iStart = 0;
+ }
+ else if (pNew->zData)
+ {
+ pNew->iStart = (int)sRdr.iIn;
+ }
+ else
+ {
+ pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn);
+ }
+ vsv_reader_reset(&sRdr);
+ rc = sqlite3_declare_vtab(db, VSV_SCHEMA);
+ if (rc)
+ {
+ vsv_errmsg(&sRdr, "bad schema: '%s' - %s", VSV_SCHEMA, sqlite3_errmsg(db));
+ goto vsvtab_connect_error;
+ }
+ for (i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++)
+ {
+ sqlite3_free(azPValue[i]);
+ }
+ /*
+ ** Rationale for DIRECTONLY:
+ ** An attacker who controls a database schema could use this vtab
+ ** to exfiltrate sensitive data from other files in the filesystem.
+ ** And, recommended practice is to put all VSV virtual tables in the
+ ** TEMP namespace, so they should still be usable from within TEMP
+ ** views, so there shouldn't be a serious loss of functionality by
+ ** prohibiting the use of this vtab from persistent triggers and views.
+ */
+ sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
+ return SQLITE_OK;
+
+vsvtab_connect_oom:
+ rc = SQLITE_NOMEM;
+ vsv_errmsg(&sRdr, "out of memory");
+
+vsvtab_connect_error:
+ if (pNew)
+ {
+ vsvtabDisconnect(&pNew->base);
+ }
+ for (i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++)
+ {
+ sqlite3_free(azPValue[i]);
+ }
+ if (sRdr.zErr[0])
+ {
+ sqlite3_free(*pzErr);
+ *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
+ }
+ vsv_reader_reset(&sRdr);
+ if (rc==SQLITE_OK)
+ {
+ rc = SQLITE_ERROR;
+ }
+ return rc;
+}
+
+/*
+** Reset the current row content held by a VsvCursor.
+*/
+static void vsvtabCursorRowReset(VsvCursor *pCur)
+{
+ VsvTable *pTab = (VsvTable*)pCur->base.pVtab;
+ int i;
+ for (i=0; i<pTab->nCol; i++)
+ {
+ sqlite3_free(pCur->azVal[i]);
+ pCur->azVal[i] = 0;
+ pCur->aLen[i] = 0;
+ pCur->dLen[i] = -1;
+ }
+}
+
+/*
+** The xConnect and xCreate methods do the same thing, but they must be
+** different so that the virtual table is not an eponymous virtual table.
+*/
+static int vsvtabCreate(
+ sqlite3 *db,
+ void *pAux,
+ int argc, const char *const*argv,
+ sqlite3_vtab **ppVtab,
+ char **pzErr
+ )
+{
+ return vsvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
+}
+
+/*
+** Destructor for a VsvCursor.
+*/
+static int vsvtabClose(sqlite3_vtab_cursor *cur)
+{
+ VsvCursor *pCur = (VsvCursor*)cur;
+ vsvtabCursorRowReset(pCur);
+ vsv_reader_reset(&pCur->rdr);
+ sqlite3_free(cur);
+ return SQLITE_OK;
+}
+
+/*
+** Constructor for a new VsvTable cursor object.
+*/
+static int vsvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor)
+{
+ VsvTable *pTab = (VsvTable*)p;
+ VsvCursor *pCur;
+ size_t nByte;
+ nByte = sizeof(*pCur) + (sizeof(char*)+(2*sizeof(int)))*pTab->nCol;
+ pCur = sqlite3_malloc64( nByte );
+ if (pCur==0)
+ return SQLITE_NOMEM;
+ memset(pCur, 0, nByte);
+ pCur->azVal = (char**)&pCur[1];
+ pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
+ pCur->dLen = (int*)&pCur->aLen[pTab->nCol];
+ pCur->rdr.fsep = pTab->fsep;
+ pCur->rdr.rsep = pTab->rsep;
+ pCur->rdr.affinity = pTab->affinity;
+ *ppCursor = &pCur->base;
+ if (vsv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData))
+ {
+ vsv_xfer_error(pTab, &pCur->rdr);
+ return SQLITE_ERROR;
+ }
+ return SQLITE_OK;
+}
+
+
+/*
+** Advance a VsvCursor to its next row of input.
+** Set the EOF marker if we reach the end of input.
+*/
+static int vsvtabNext(sqlite3_vtab_cursor *cur)
+{
+ VsvCursor *pCur = (VsvCursor*)cur;
+ VsvTable *pTab = (VsvTable*)cur->pVtab;
+ int i = 0;
+ char *z;
+ do
+ {
+ z = vsv_read_one_field(&pCur->rdr);
+ if (z==0)
+ {
+ if (i<pTab->nCol)
+ pCur->dLen[i] = -1;
+ }
+ else if (i<pTab->nCol)
+ {
+ if (pCur->aLen[i] < pCur->rdr.n+1)
+ {
+ char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
+ if (zNew==0)
+ {
+ z = 0;
+ vsv_errmsg(&pCur->rdr, "out of memory");
+ vsv_xfer_error(pTab, &pCur->rdr);
+ break;
+ }
+ pCur->azVal[i] = zNew;
+ pCur->aLen[i] = pCur->rdr.n+1;
+ }
+ if (!pCur->rdr.notNull && pTab->nulls)
+ {
+ pCur->dLen[i] = -1;
+ }
+ else
+ {
+ pCur->dLen[i] = pCur->rdr.n;
+ memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
+ }
+ i++;
+ }
+ }
+ while (pCur->rdr.cTerm==pCur->rdr.fsep);
+ if ((pCur->rdr.cTerm==EOF && i==0))
+ {
+ pCur->iRowid = -1;
+ }
+ else
+ {
+ pCur->iRowid++;
+ while (i<pTab->nCol)
+ {
+ pCur->dLen[i] = -1;
+ i++;
+ }
+ }
+ return SQLITE_OK;
+}
+
+/*
+**
+** Determine affinity of field
+**
+** ignore leading space
+** then may have + or -
+** then may have digits or . (if . found then type=real)
+** then may have digits (if another . then not number)
+** then may have e (if found then type=real)
+** then may have + or -
+** then may have digits
+** then may have trailing space
+*/
+static int vsv_isValidNumber(char *arg)
+{
+ char *start;
+ char *stop;
+ int isValid = 0;
+ int hasDigit = 0;
+
+ start = arg;
+ stop = arg + strlen(arg) - 1;
+ while (start <= stop && *start==' ') // strip spaces from begining
+ {
+ start++;
+ }
+ while (start <= stop && *stop==' ') // strip spaces from end
+ {
+ stop--;
+ }
+ if (start > stop)
+ {
+ goto vsv_end_isValidNumber;
+ }
+ if (start <= stop && (*start=='+' || *start=='-')) // may have + or -
+ {
+ start++;
+ }
+ if (start <= stop && isdigit(*start)) // must have a digit to be valid
+ {
+ hasDigit = 1;
+ isValid = 1;
+ }
+ while (start <= stop && isdigit(*start)) // bunch of digits
+ {
+ start++;
+ }
+ if (start <= stop && *start=='.') // may have .
+ {
+ isValid = 2;
+ start++;
+ }
+ if (start <= stop && isdigit(*start))
+ {
+ hasDigit = 1;
+ }
+ while (start <= stop && isdigit(*start)) // bunch of digits
+ {
+ start++;
+ }
+ if (!hasDigit) // no digits then invalid
+ {
+ isValid = 0;
+ goto vsv_end_isValidNumber;
+ }
+ if (start <= stop && (*start=='e' || *start=='E')) // may have 'e' or 'E'
+ {
+ isValid = 3;
+ start++;
+ }
+ if (start <= stop && isValid == 3 && (*start == '+' || *start == '-'))
+ {
+ start++;
+ }
+ if (start <= stop && isValid == 3 && isdigit(*start))
+ {
+ isValid = 2;
+ }
+ while (start <= stop && isdigit(*start)) // bunch of digits
+ {
+ start++;
+ }
+ if (isValid == 3)
+ {
+ isValid = 0;
+ }
+vsv_end_isValidNumber:
+ if (start <= stop)
+ {
+ isValid = 0;
+ }
+ return isValid;
+}
+
+
+/*
+** Validate UTF-8
+** Return -1 if invalid else length
+*/
+static long long vsv_utf8IsValid(char *string)
+{
+ long long length = 0;
+ unsigned char *start;
+ int trailing = 0;
+ unsigned char c;
+
+ start = (unsigned char *)string;
+ while ((c = *start))
+ {
+ if (trailing)
+ {
+ if ((c & 0xC0) == 0x80)
+ {
+ trailing--;
+ start++;
+ length++;
+ continue;
+ }
+ else
+ {
+ length = -1;
+ break;
+ }
+ }
+ if ((c & 0x80) == 0)
+ {
+ start++;
+ length++;
+ continue;
+ }
+ if ((c & 0xE0) == 0xC0)
+ {
+ trailing = 1;
+ start++;
+ length++;
+ continue;
+ }
+ if ((c & 0xF0) == 0xE0)
+ {
+ trailing = 2;
+ start++;
+ length++;
+ continue;
+ }
+ if ((c & 0xF8) == 0xF0)
+ {
+ trailing = 3;
+ start++;
+ length++;
+ continue;
+ }
+ length = -1;
+ break;
+ }
+ return length;
+}
+
+/*
+** Return values of columns for the row at which the VsvCursor
+** is currently pointing.
+*/
+static int vsvtabColumn(
+ sqlite3_vtab_cursor *cur, /* The cursor */
+ sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
+ int i /* Which column to return */
+ )
+{
+ VsvCursor *pCur = (VsvCursor*)cur;
+ VsvTable *pTab = (VsvTable*)cur->pVtab;
+ long long dLen = pCur->dLen[i];
+ long long length = 0;
+
+ if (i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 && dLen>-1)
+ {
+ switch (pTab->affinity)
+ {
+ case 0:
+ {
+ if (pTab->validateUTF8)
+ {
+ length = vsv_utf8IsValid(pCur->azVal[i]);
+ if (length == dLen)
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT);
+ }
+ else
+ {
+ sqlite3_result_error(ctx, "Invalid UTF8 Data", -1);
+ }
+ }
+ else
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
+ }
+ break;
+ }
+ case 1:
+ {
+ sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT);
+ break;
+ }
+ case 2:
+ {
+ if (pTab->validateUTF8)
+ {
+ length = vsv_utf8IsValid(pCur->azVal[i]);
+ if (length < dLen)
+ {
+ sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT);
+ }
+ else
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], length, SQLITE_TRANSIENT);
+ }
+ }
+ else
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
+ }
+ break;
+ }
+ case 3:
+ {
+ switch (vsv_isValidNumber(pCur->azVal[i]))
+ {
+ case 1:
+ {
+ sqlite3_result_int64(ctx, strtoll(pCur->azVal[i], 0, 10));
+ break;
+ }
+ default:
+ {
+ if (pTab->validateUTF8)
+ {
+ length = vsv_utf8IsValid(pCur->azVal[i]);
+ if (length < dLen)
+ {
+ sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT);
+ }
+ else
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], length, SQLITE_TRANSIENT);
+ }
+ }
+ else
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
+ }
+ break;
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ switch (vsv_isValidNumber(pCur->azVal[i]))
+ {
+ case 1:
+ case 2:
+ {
+ sqlite3_result_double(ctx, strtod(pCur->azVal[i], 0));
+ break;
+ }
+ default:
+ {
+ if (pTab->validateUTF8)
+ {
+ length = vsv_utf8IsValid(pCur->azVal[i]);
+ if (length < dLen)
+ {
+ sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT);
+ }
+ else
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], length, SQLITE_TRANSIENT);
+ }
+ }
+ else
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
+ }
+ break;
+ }
+ }
+ break;
+ }
+ case 5:
+ {
+ switch (vsv_isValidNumber(pCur->azVal[i]))
+ {
+ case 1:
+ {
+ sqlite3_result_int64(ctx, strtoll(pCur->azVal[i], 0, 10));
+ break;
+ }
+ case 2:
+ {
+ long double dv, fp, ip;
+
+ dv = strtold(pCur->azVal[i], 0);
+ fp = modfl(dv, &ip);
+ if (sizeof(long double)>sizeof(double))
+ {
+ if (fp==0.0L && dv >= -9223372036854775808.0L && dv <= 9223372036854775807.0L)
+ {
+ sqlite3_result_int64(ctx, (long long)dv);
+ }
+ else
+ {
+ sqlite3_result_double(ctx, (double)dv);
+ }
+ }
+ else
+ {
+ // Only convert if it will fit in a 6-byte varint
+ if (fp==0.0L && dv >= -140737488355328.0L && dv <= 140737488355328.0L)
+ {
+ sqlite3_result_int64(ctx, (long long)dv);
+ }
+ else
+ {
+ sqlite3_result_double(ctx, (double)dv);
+ }
+ }
+ break;
+ }
+ default:
+ {
+ if (pTab->validateUTF8)
+ {
+ length = vsv_utf8IsValid(pCur->azVal[i]);
+ if (length < dLen)
+ {
+ sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT);
+ }
+ else
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], length, SQLITE_TRANSIENT);
+ }
+ }
+ else
+ {
+ sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+ return SQLITE_OK;
+}
+
+/*
+** Return the rowid for the current row.
+*/
+static int vsvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid)
+{
+ VsvCursor *pCur = (VsvCursor*)cur;
+ *pRowid = pCur->iRowid;
+ return SQLITE_OK;
+}
+
+/*
+** Return TRUE if the cursor has been moved off of the last
+** row of output.
+*/
+static int vsvtabEof(sqlite3_vtab_cursor *cur)
+{
+ VsvCursor *pCur = (VsvCursor*)cur;
+ return pCur->iRowid<0;
+}
+
+/*
+** Only a full table scan is supported. So xFilter simply rewinds to
+** the beginning.
+*/
+static int vsvtabFilter(
+ sqlite3_vtab_cursor *pVtabCursor,
+ int idxNum, const char *idxStr,
+ int argc, sqlite3_value **argv
+ )
+{
+ VsvCursor *pCur = (VsvCursor*)pVtabCursor;
+ VsvTable *pTab = (VsvTable*)pVtabCursor->pVtab;
+ pCur->iRowid = 0;
+
+ /* Ensure the field buffer is always allocated. Otherwise, if the
+ ** first field is zero bytes in size, this may be mistaken for an OOM
+ ** error in csvtabNext(). */
+ if( vsv_append(&pCur->rdr, 0) ) return SQLITE_NOMEM;
+
+ if (pCur->rdr.in==0)
+ {
+ assert( pCur->rdr.zIn==pTab->zData );
+ assert( pTab->iStart>=0 );
+ assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
+ pCur->rdr.iIn = pTab->iStart;
+ }
+ else
+ {
+ fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
+ pCur->rdr.iIn = 0;
+ pCur->rdr.nIn = 0;
+ }
+ return vsvtabNext(pVtabCursor);
+}
+
+/*
+** Only a forward full table scan is supported. xBestIndex is mostly
+** a no-op. If VSVTEST_FIDX is set, then the presence of equality
+** constraints lowers the estimated cost, which is fiction, but is useful
+** for testing certain kinds of virtual table behavior.
+*/
+static int vsvtabBestIndex(
+ sqlite3_vtab *tab,
+ sqlite3_index_info *pIdxInfo
+ )
+{
+ pIdxInfo->estimatedCost = 1000000;
+#ifdef SQLITE_TEST
+ if ((((VsvTable*)tab)->tstFlags & VSVTEST_FIDX)!=0)
+ {
+ /* The usual (and sensible) case is to always do a full table scan.
+ ** The code in this branch only runs when testflags=1. This code
+ ** generates an artifical and unrealistic plan which is useful
+ ** for testing virtual table logic but is not helpful to real applications.
+ **
+ ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
+ ** table (even though it is not) and the cost of running the virtual table
+ ** is reduced from 1 million to just 10. The constraints are *not* marked
+ ** as omittable, however, so the query planner should still generate a
+ ** plan that gives a correct answer, even if they plan is not optimal.
+ */
+ int i;
+ int nConst = 0;
+ for (i=0; i<pIdxInfo->nConstraint; i++)
+ {
+ unsigned char op;
+ if (pIdxInfo->aConstraint[i].usable==0) continue;
+ op = pIdxInfo->aConstraint[i].op;
+ if (op==SQLITE_INDEX_CONSTRAINT_EQ
+ || op==SQLITE_INDEX_CONSTRAINT_LIKE
+ || op==SQLITE_INDEX_CONSTRAINT_GLOB
+ )
+ {
+ pIdxInfo->estimatedCost = 10;
+ pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
+ nConst++;
+ }
+ }
+ }
+#endif
+ return SQLITE_OK;
+}
+
+
+static sqlite3_module VsvModule = {
+ 0, /* iVersion */
+ vsvtabCreate, /* xCreate */
+ vsvtabConnect, /* xConnect */
+ vsvtabBestIndex, /* xBestIndex */
+ vsvtabDisconnect, /* xDisconnect */
+ vsvtabDisconnect, /* xDestroy */
+ vsvtabOpen, /* xOpen - open a cursor */
+ vsvtabClose, /* xClose - close a cursor */
+ vsvtabFilter, /* xFilter - configure scan constraints */
+ vsvtabNext, /* xNext - advance a cursor */
+ vsvtabEof, /* xEof - check for end of scan */
+ vsvtabColumn, /* xColumn - read data */
+ vsvtabRowid, /* xRowid - read data */
+ 0, /* xUpdate */
+ 0, /* xBegin */
+ 0, /* xSync */
+ 0, /* xCommit */
+ 0, /* xRollback */
+ 0, /* xFindMethod */
+ 0, /* xRename */
+};
+
+#ifdef SQLITE_TEST
+/*
+** For virtual table testing, make a version of the VSV virtual table
+** available that has an xUpdate function. But the xUpdate always returns
+** SQLITE_READONLY since the VSV file is not really writable.
+*/
+static int vsvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x)
+{
+ return SQLITE_READONLY;
+}
+static sqlite3_module VsvModuleFauxWrite = {
+ 0, /* iVersion */
+ vsvtabCreate, /* xCreate */
+ vsvtabConnect, /* xConnect */
+ vsvtabBestIndex, /* xBestIndex */
+ vsvtabDisconnect, /* xDisconnect */
+ vsvtabDisconnect, /* xDestroy */
+ vsvtabOpen, /* xOpen - open a cursor */
+ vsvtabClose, /* xClose - close a cursor */
+ vsvtabFilter, /* xFilter - configure scan constraints */
+ vsvtabNext, /* xNext - advance a cursor */
+ vsvtabEof, /* xEof - check for end of scan */
+ vsvtabColumn, /* xColumn - read data */
+ vsvtabRowid, /* xRowid - read data */
+ vsvtabUpdate, /* xUpdate */
+ 0, /* xBegin */
+ 0, /* xSync */
+ 0, /* xCommit */
+ 0, /* xRollback */
+ 0, /* xFindMethod */
+ 0, /* xRename */
+};
+#endif /* SQLITE_TEST */
+
+#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
+
+/*
+** This routine is called when the extension is loaded. The new
+** VSV virtual table module is registered with the calling database
+** connection.
+*/
+#ifndef SQLITE_CORE
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+#endif
+int sqlite3_vsv_init(
+ sqlite3 *db,
+ char **pzErrMsg,
+ const sqlite3_api_routines *pApi
+ )
+{
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+ int rc;
+ SQLITE_EXTENSION_INIT2(pApi);
+ rc = sqlite3_create_module(db, "vsv", &VsvModule, 0);
+#ifdef SQLITE_TEST
+ if (rc==SQLITE_OK)
+ {
+ rc = sqlite3_create_module(db, "vsv_wr", &VsvModuleFauxWrite, 0);
+ }
+#endif
+ return rc;
+#else
+ return SQLITE_OK;
+#endif
+}