summaryrefslogtreecommitdiff
path: root/lua/ftcsv.lua
diff options
context:
space:
mode:
Diffstat (limited to 'lua/ftcsv.lua')
-rw-r--r--lua/ftcsv.lua490
1 files changed, 490 insertions, 0 deletions
diff --git a/lua/ftcsv.lua b/lua/ftcsv.lua
new file mode 100644
index 0000000..af4abec
--- /dev/null
+++ b/lua/ftcsv.lua
@@ -0,0 +1,490 @@
+local ftcsv = {
+ _VERSION = 'ftcsv 1.1.3',
+ _DESCRIPTION = 'CSV library for Lua',
+ _URL = 'https://github.com/FourierTransformer/ftcsv',
+ _LICENSE = [[
+ The MIT License (MIT)
+
+ Copyright (c) 2016 Shakil Thakur
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+ ]]
+}
+
+-- lua 5.1 load compat
+local M = {}
+if type(jit) == 'table' or _ENV then
+ M.load = _G.load
+else
+ M.load = loadstring
+end
+
+-- perf
+local sbyte = string.byte
+local ssub = string.sub
+
+-- luajit specific speedups
+-- luajit performs faster with iterating over string.byte,
+-- whereas vanilla lua performs faster with string.find
+if type(jit) == 'table' then
+ -- finds the end of an escape sequence
+ function M.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape)
+ -- local doubleQuoteEscape = doubleQuoteEscape
+ local currentChar, nextChar = sbyte(inputString, i), nil
+ while i <= inputLength do
+ -- print(i)
+ nextChar = sbyte(inputString, i+1)
+
+ -- this one deals with " double quotes that are escaped "" within single quotes "
+ -- these should be turned into a single quote at the end of the field
+ if currentChar == quote and nextChar == quote then
+ doubleQuoteEscape = true
+ i = i + 2
+ currentChar = sbyte(inputString, i)
+
+ -- identifies the escape toggle
+ elseif currentChar == quote and nextChar ~= quote then
+ -- print("exiting", i-1)
+ return i-1, doubleQuoteEscape
+ else
+ i = i + 1
+ currentChar = nextChar
+ end
+ end
+ end
+
+else
+ -- vanilla lua closing quote finder
+ function M.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape)
+ local firstCharIndex = 1
+ local firstChar, iChar = nil, nil
+ repeat
+ firstCharIndex, i = inputString:find('".?', i+1)
+ firstChar = sbyte(inputString, firstCharIndex)
+ iChar = sbyte(inputString, i)
+ -- nextChar = string.byte(inputString, i+1)
+ -- print("HI", offset, i)
+ -- print(firstChar, iChar)
+ if firstChar == quote and iChar == quote then
+ doubleQuoteEscape = true
+ end
+ until iChar ~= quote
+ if i == nil then
+ return inputLength-1, doubleQuoteEscape
+ end
+ -- print("exiting", i-2)
+ return i-2, doubleQuoteEscape
+ end
+
+end
+
+-- load an entire file into memory
+local function loadFile(textFile)
+ local file = io.open(textFile, "r")
+ if not file then error("ftcsv: File not found at " .. textFile) end
+ local allLines = file:read("*all")
+ file:close()
+ return allLines
+end
+
+-- creates a new field and adds it to the main table
+local function createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
+ local field
+ -- so, if we just recently de-escaped, we don't want the trailing \"
+ if sbyte(inputString, i-1) == quote then
+ -- print("Skipping last \"")
+ field = ssub(inputString, fieldStart, i-2)
+ else
+ field = ssub(inputString, fieldStart, i-1)
+ end
+ if doubleQuoteEscape then
+ -- print("QUOTE REPLACE")
+ -- print(line[fieldNum])
+ field = field:gsub('""', '"')
+ end
+ return field
+end
+
+-- main function used to parse
+local function parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep)
+
+ -- keep track of my chars!
+ local currentChar, nextChar = sbyte(inputString, i), nil
+ local skipChar = 0
+ local field
+ local fieldStart = i
+ local fieldNum = 1
+ local lineNum = 1
+ local doubleQuoteEscape = false
+ local exit = false
+
+ --bytes
+ local CR = sbyte("\r")
+ local LF = sbyte("\n")
+ local quote = sbyte('"')
+ local delimiterByte = sbyte(delimiter)
+
+ local assignValue
+ local outResults
+ -- the headers haven't been set yet.
+ -- aka this is the first run!
+ if headerField == nil then
+ -- print("this is for headers")
+ headerField = {}
+ assignValue = function()
+ headerField[fieldNum] = field
+ return true
+ end
+ else
+ -- print("this is for magic")
+ outResults = {}
+ outResults[1] = {}
+ assignValue = function()
+ if not pcall(function()
+ outResults[lineNum][headerField[fieldNum]] = field
+ end) then
+ error('ftcsv: too many columns in row ' .. lineNum)
+ end
+ end
+ end
+
+ -- calculate the initial line count (note: this can include duplicates)
+ local headerFieldsExist = {}
+ local initialLineCount = 0
+ for _, value in pairs(headerField) do
+ if not headerFieldsExist[value] and (fieldsToKeep == nil or fieldsToKeep[value]) then
+ headerFieldsExist[value] = true
+ initialLineCount = initialLineCount + 1
+ end
+ end
+
+ while i <= inputLength do
+ -- go by two chars at a time! currentChar is set at the bottom.
+ -- currentChar = string.byte(inputString, i)
+ nextChar = sbyte(inputString, i+1)
+ -- print(i, string.char(currentChar), string.char(nextChar))
+
+ -- empty string
+ if currentChar == quote and nextChar == quote then
+ -- print("EMPTY STRING")
+ skipChar = 1
+ fieldStart = i + 2
+ -- print("fs+2:", fieldStart)
+
+ -- identifies the escape toggle.
+ -- This can only happen if fields have quotes around them
+ -- so the current "start" has to be where a quote character is.
+ elseif currentChar == quote and nextChar ~= quote and fieldStart == i then
+ -- print("New Quoted Field", i)
+ fieldStart = i + 1
+ i, doubleQuoteEscape = M.findClosingQuote(i+1, inputLength, inputString, quote, doubleQuoteEscape)
+ -- print("I VALUE", i, doubleQuoteEscape)
+ skipChar = 1
+
+ -- create some fields if we can!
+ elseif currentChar == delimiterByte then
+ -- create the new field
+ -- print(headerField[fieldNum])
+ if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
+ field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
+ -- print("FIELD", field, "FIELDEND", headerField[fieldNum], lineNum)
+ -- outResults[headerField[fieldNum]][lineNum] = field
+ assignValue()
+ end
+ doubleQuoteEscape = false
+
+ fieldNum = fieldNum + 1
+ fieldStart = i + 1
+ -- print("fs+1:", fieldStart)
+ -- end
+
+ -- newline?!
+ elseif ((currentChar == CR and nextChar == LF) or currentChar == LF) then
+ if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
+ -- create the new field
+ field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
+
+ -- outResults[headerField[fieldNum]][lineNum] = field
+ exit = assignValue()
+ if exit then
+ if (currentChar == CR and nextChar == LF) then
+ return headerField, i + 1
+ else
+ return headerField, i
+ end
+ end
+ end
+ doubleQuoteEscape = false
+
+ -- determine how line ends
+ if (currentChar == CR and nextChar == LF) then
+ -- print("CRLF DETECTED")
+ skipChar = 1
+ fieldStart = fieldStart + 1
+ -- print("fs:", fieldStart)
+ end
+
+ -- incrememnt for new line
+ if fieldNum < initialLineCount then
+ error('ftcsv: too few columns in row ' .. lineNum)
+ end
+ lineNum = lineNum + 1
+ outResults[lineNum] = {}
+ fieldNum = 1
+ fieldStart = i + 1 + skipChar
+ -- print("fs:", fieldStart)
+
+ end
+
+ i = i + 1 + skipChar
+ if (skipChar > 0) then
+ currentChar = sbyte(inputString, i)
+ else
+ currentChar = nextChar
+ end
+ skipChar = 0
+ end
+
+ -- create last new field
+ if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
+ field = createField(inputString, quote, fieldStart, i, doubleQuoteEscape)
+ assignValue()
+ end
+
+ -- clean up last line if it's weird (this happens when there is a CRLF newline at end of file)
+ -- doing a count gets it to pick up the oddballs
+ local finalLineCount = 0
+ local lastValue = nil
+ for k, v in pairs(outResults[lineNum]) do
+ finalLineCount = finalLineCount + 1
+ lastValue = v
+ end
+
+ -- this indicates a CRLF
+ -- print("Final/Initial", finalLineCount, initialLineCount)
+ if finalLineCount == 1 and lastValue == "" then
+ outResults[lineNum] = nil
+
+ -- otherwise there might not be enough line
+ elseif finalLineCount < initialLineCount then
+ error('ftcsv: too few columns in row ' .. lineNum)
+ end
+
+ return outResults
+end
+
+-- runs the show!
+function ftcsv.parse(inputFile, delimiter, options)
+ -- delimiter MUST be one character
+ assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
+
+ -- OPTIONS yo
+ local header = true
+ local rename
+ local fieldsToKeep = nil
+ local loadFromString = false
+ local headerFunc
+ if options then
+ if options.headers ~= nil then
+ assert(type(options.headers) == "boolean", "ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
+ header = options.headers
+ end
+ if options.rename ~= nil then
+ assert(type(options.rename) == "table", "ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in '" .. tostring(options.rename) .. "' of type '" .. type(options.rename) .. "'.")
+ rename = options.rename
+ end
+ if options.fieldsToKeep ~= nil then
+ assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.fieldsToKeep) .. "' of type '" .. type(options.fieldsToKeep) .. "'.")
+ local ofieldsToKeep = options.fieldsToKeep
+ if ofieldsToKeep ~= nil then
+ fieldsToKeep = {}
+ for j = 1, #ofieldsToKeep do
+ fieldsToKeep[ofieldsToKeep[j]] = true
+ end
+ end
+ if header == false and options.rename == nil then
+ error("ftcsv: fieldsToKeep only works with header-less files when using the 'rename' functionality")
+ end
+ end
+ if options.loadFromString ~= nil then
+ assert(type(options.loadFromString) == "boolean", "ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in '" .. tostring(options.loadFromString) .. "' of type '" .. type(options.loadFromString) .. "'.")
+ loadFromString = options.loadFromString
+ end
+ if options.headerFunc ~= nil then
+ assert(type(options.headerFunc) == "function", "ftcsv only takes a function value for optional parameter 'headerFunc'. You passed in '" .. tostring(options.headerFunc) .. "' of type '" .. type(options.headerFunc) .. "'.")
+ headerFunc = options.headerFunc
+ end
+ end
+
+ -- handle input via string or file!
+ local inputString
+ if loadFromString then
+ inputString = inputFile
+ else
+ inputString = loadFile(inputFile)
+ end
+ local inputLength = #inputString
+
+ -- if they sent in an empty file...
+ if inputLength == 0 then
+ error('ftcsv: Cannot parse an empty file')
+ end
+
+ -- parse through the headers!
+ local headerField, i = parseString(inputString, inputLength, delimiter, 1)
+ i = i + 1 -- start at the next char
+
+ -- make sure a header isn't empty
+ for _, header in ipairs(headerField) do
+ if #header == 0 then
+ error('ftcsv: Cannot parse a file which contains empty headers')
+ end
+ end
+
+ -- for files where there aren't headers!
+ if header == false then
+ i = 0
+ for j = 1, #headerField do
+ headerField[j] = j
+ end
+ end
+
+ -- rename fields as needed!
+ if rename then
+ -- basic rename (["a" = "apple"])
+ for j = 1, #headerField do
+ if rename[headerField[j]] then
+ -- print("RENAMING", headerField[j], rename[headerField[j]])
+ headerField[j] = rename[headerField[j]]
+ end
+ end
+ -- files without headers, but with a rename need to be handled too!
+ if #rename > 0 then
+ for j = 1, #rename do
+ headerField[j] = rename[j]
+ end
+ end
+ end
+
+ -- apply some sweet header manipulation
+ if headerFunc then
+ for j = 1, #headerField do
+ headerField[j] = headerFunc(headerField[j])
+ end
+ end
+
+ local output = parseString(inputString, inputLength, delimiter, i, headerField, fieldsToKeep)
+ return output
+end
+
+-- a function that delimits " to "", used by the writer
+local function delimitField(field)
+ field = tostring(field)
+ if field:find('"') then
+ return field:gsub('"', '""')
+ else
+ return field
+ end
+end
+
+-- a function that compiles some lua code to quickly print out the csv
+local function writer(inputTable, dilimeter, headers)
+ -- they get re-created here if they need to be escaped so lua understands it based on how
+ -- they came in
+ for i = 1, #headers do
+ if inputTable[1][headers[i]] == nil then
+ error("ftcsv: the field '" .. headers[i] .. "' doesn't exist in the inputTable")
+ end
+ if headers[i]:find('"') then
+ headers[i] = headers[i]:gsub('"', '\\"')
+ end
+ end
+
+ local outputFunc = [[
+ local state, i = ...
+ local d = state.delimitField
+ i = i + 1;
+ if i > state.tableSize then return nil end;
+ return i, '"' .. d(state.t[i]["]] .. table.concat(headers, [["]) .. '"]] .. dilimeter .. [["' .. d(state.t[i]["]]) .. [["]) .. '"\r\n']]
+
+ -- print(outputFunc)
+
+ local state = {}
+ state.t = inputTable
+ state.tableSize = #inputTable
+ state.delimitField = delimitField
+
+ return M.load(outputFunc), state, 0
+
+end
+
+-- takes the values from the headers in the first row of the input table
+local function extractHeaders(inputTable)
+ local headers = {}
+ for key, _ in pairs(inputTable[1]) do
+ headers[#headers+1] = key
+ end
+
+ -- lets make the headers alphabetical
+ table.sort(headers)
+
+ return headers
+end
+
+-- turns a lua table into a csv
+-- works really quickly with luajit-2.1, because table.concat life
+function ftcsv.encode(inputTable, delimiter, options)
+ local output = {}
+
+ -- dilimeter MUST be one character
+ assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
+
+ -- grab the headers from the options if they are there
+ local headers = nil
+ if options then
+ if options.fieldsToKeep ~= nil then
+ assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
+ headers = options.fieldsToKeep
+ end
+ end
+ if headers == nil then
+ headers = extractHeaders(inputTable)
+ end
+
+ -- newHeaders are needed if there are quotes within the header
+ -- because they need to be escaped
+ local newHeaders = {}
+ for i = 1, #headers do
+ if headers[i]:find('"') then
+ newHeaders[i] = headers[i]:gsub('"', '""')
+ else
+ newHeaders[i] = headers[i]
+ end
+ end
+ output[1] = '"' .. table.concat(newHeaders, '"' .. delimiter .. '"') .. '"\r\n'
+
+ -- add each line by line.
+ for i, line in writer(inputTable, delimiter, headers) do
+ output[i+1] = line
+ end
+ return table.concat(output)
+end
+
+return ftcsv
+