From 20f4dd306e4cb34b14c156a731f0e03cdac9aa45 Mon Sep 17 00:00:00 2001 From: FourierTransformer Date: Sun, 6 Nov 2016 11:36:24 -0600 Subject: [PATCH] better error handling with unit tests! --- ERRORS.md | 15 +++++ README.md | 6 ++ ...1.1.1-1.rockspec => ftcsv-1.1.2-1.rockspec | 4 +- ftcsv.lua | 59 +++++++++++++++---- spec/bad_csvs/empty_file.csv | 0 spec/bad_csvs/empty_file_newline.csv | 1 + spec/bad_csvs/empty_header.csv | 2 + spec/bad_csvs/too_few_cols.csv | 3 + spec/bad_csvs/too_few_cols_end.csv | 3 + spec/bad_csvs/too_many_cols.csv | 3 + spec/error_spec.lua | 44 ++++++++++++++ spec/feature_spec.lua | 5 -- 12 files changed, 125 insertions(+), 20 deletions(-) create mode 100644 ERRORS.md rename ftcsv-1.1.1-1.rockspec => ftcsv-1.1.2-1.rockspec (95%) create mode 100644 spec/bad_csvs/empty_file.csv create mode 100644 spec/bad_csvs/empty_file_newline.csv create mode 100644 spec/bad_csvs/empty_header.csv create mode 100644 spec/bad_csvs/too_few_cols.csv create mode 100644 spec/bad_csvs/too_few_cols_end.csv create mode 100644 spec/bad_csvs/too_many_cols.csv create mode 100644 spec/error_spec.lua diff --git a/ERRORS.md b/ERRORS.md new file mode 100644 index 0000000..7a136e4 --- /dev/null +++ b/ERRORS.md @@ -0,0 +1,15 @@ +#Error Handling +Below you can find a more detailed explanation of some of the errors that can be encountered while using ftcsv. For parsing, examples of these files can be found in /spec/bad_csvs/ + + + +##Parsing +Note: `[row_number]` indicates the row number of the parsed lua table. As such, it will be one off from the line number in the csv. However, for header-less files, the row returned *will* match the csv line number. + +| Error Message | Detailed Explanation | +| ------------- | ------------- | +| ftcsv: Cannot parse an empty file | The file passed in contains no information. It is an empty file. | +| ftcsv: Cannot parse a file which contains empty headers | If a header field contains no information, then it can't be parsed
(ex: `Name,City,,Zipcode`) | +| ftcsv: too few columns in row [row_number] | The number of columns is less than the amount in the header after transformations (renaming, keeping certain fields, etc) | +| ftcsv: too many columns in row [row_number] | The number of columns is greater than the amount in the header after transformations. It can't map the field's count with an existing header. | +| ftcsv: File not found at [path] | When loading, lua can't open the file at [path] | \ No newline at end of file diff --git a/README.md b/README.md index 3b0ae93..6ec5a93 100644 --- a/README.md +++ b/README.md @@ -117,8 +117,14 @@ I did some basic testing and found that in lua, if you want to iterate over a st +## Error Handling +ftcsv returns a litany of errors when passed a bad csv file or incorrect parameters. You can find a more detailed explanation of the more cryptic errors in [ERRORS.md](ERRORS.md) + + + ## Contributing Feel free to create a new issue for any bugs you've found or help you need. If you want to contribute back to the project please do the following: + 0. If it's a major change (aka more than a quick little < 5 line bugfix), please create an issue so we can discuss it! 1. Fork the repo 2. Create a new branch 3. Push your changes to the branch diff --git a/ftcsv-1.1.1-1.rockspec b/ftcsv-1.1.2-1.rockspec similarity index 95% rename from ftcsv-1.1.1-1.rockspec rename to ftcsv-1.1.2-1.rockspec index 235e058..b73c94f 100644 --- a/ftcsv-1.1.1-1.rockspec +++ b/ftcsv-1.1.2-1.rockspec @@ -1,9 +1,9 @@ package = "ftcsv" -version = "1.1.1-1" +version = "1.1.2-1" source = { url = "git://github.com/FourierTransformer/ftcsv.git", - tag = "1.1.1" + tag = "1.1.2" } description = { diff --git a/ftcsv.lua b/ftcsv.lua index c9ef444..172434c 100644 --- a/ftcsv.lua +++ b/ftcsv.lua @@ -1,5 +1,5 @@ local ftcsv = { - _VERSION = 'ftcsv 1.1.1', + _VERSION = 'ftcsv 1.1.2', _DESCRIPTION = 'CSV library for Lua', _URL = 'https://github.com/FourierTransformer/ftcsv', _LICENSE = [[ @@ -97,7 +97,7 @@ end -- load an entire file into memory local function loadFile(textFile) local file = io.open(textFile, "r") - if not file then error("File not found at " .. textFile) end + if not file then error("ftcsv: File not found at " .. textFile) end local allLines = file:read("*all") file:close() return allLines @@ -156,7 +156,21 @@ local function parseString(inputString, inputLength, delimiter, i, headerField, outResults = {} outResults[1] = {} assignValue = function() - outResults[lineNum][headerField[fieldNum]] = field + if not pcall(function() + outResults[lineNum][headerField[fieldNum]] = field + end) then + error('ftcsv: too many columns in row ' .. lineNum) + end + end + end + + -- calculate the initial line count (note: this can include duplicates) + local headerFieldsExist = {} + local initialLineCount = 0 + for _, value in pairs(headerField) do + if not headerFieldsExist[value] and (fieldsToKeep == nil or fieldsToKeep[value]) then + headerFieldsExist[value] = true + initialLineCount = initialLineCount + 1 end end @@ -225,6 +239,9 @@ local function parseString(inputString, inputLength, delimiter, i, headerField, end -- incrememnt for new line + if fieldNum < initialLineCount then + error('ftcsv: too few columns in row ' .. lineNum) + end lineNum = lineNum + 1 outResults[lineNum] = {} fieldNum = 1 @@ -251,16 +268,20 @@ local function parseString(inputString, inputLength, delimiter, i, headerField, -- clean up last line if it's weird (this happens when there is a CRLF newline at end of file) -- doing a count gets it to pick up the oddballs local finalLineCount = 0 - for _, _ in pairs(outResults[lineNum]) do + local lastValue = nil + for k, v in pairs(outResults[lineNum]) do finalLineCount = finalLineCount + 1 + lastValue = v end - local initialLineCount = 0 - for _, _ in pairs(outResults[1]) do - initialLineCount = initialLineCount + 1 - end + + -- this indicates a CRLF -- print("Final/Initial", finalLineCount, initialLineCount) - if finalLineCount ~= initialLineCount then + if finalLineCount == 1 and lastValue == "" then outResults[lineNum] = nil + + -- otherwise there might not be enough line + elseif finalLineCount < initialLineCount then + error('ftcsv: too few columns in row ' .. lineNum) end return outResults @@ -295,8 +316,8 @@ function ftcsv.parse(inputFile, delimiter, options) fieldsToKeep[ofieldsToKeep[j]] = true end end - if header == false then - assert(next(rename) ~= nil, "ftcsv can only have fieldsToKeep for header-less files when they have been renamed. Please add the 'rename' option and try again.") + if header == false and options.rename == nil then + error("ftcsv: fieldsToKeep only works with header-less files when using the 'rename' functionality") end end if options.loadFromString ~= nil then @@ -318,10 +339,22 @@ function ftcsv.parse(inputFile, delimiter, options) end local inputLength = #inputString + -- if they sent in an empty file... + if inputLength == 0 then + error('ftcsv: Cannot parse an empty file') + end + -- parse through the headers! local headerField, i = parseString(inputString, inputLength, delimiter, 0) i = i + 1 -- start at the next char + -- make sure a header isn't empty + for _, header in ipairs(headerField) do + if #header == 0 then + error('ftcsv: Cannot parse a file which contains empty headers') + end + end + -- for files where there aren't headers! if header == false then i = 0 @@ -347,7 +380,7 @@ function ftcsv.parse(inputFile, delimiter, options) end end - -- apply some sweet header manuipulation + -- apply some sweet header manipulation if headerFunc then for j = 1, #headerField do headerField[j] = headerFunc(headerField[j]) @@ -374,7 +407,7 @@ local function writer(inputTable, dilimeter, headers) -- they came in for i = 1, #headers do if inputTable[1][headers[i]] == nil then - error("the field '" .. headers[i] .. "' doesn't exist in the table") + error("ftcsv: the field '" .. headers[i] .. "' doesn't exist in the inputTable") end if headers[i]:find('"') then headers[i] = headers[i]:gsub('"', '\\"') diff --git a/spec/bad_csvs/empty_file.csv b/spec/bad_csvs/empty_file.csv new file mode 100644 index 0000000..e69de29 diff --git a/spec/bad_csvs/empty_file_newline.csv b/spec/bad_csvs/empty_file_newline.csv new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/spec/bad_csvs/empty_file_newline.csv @@ -0,0 +1 @@ + diff --git a/spec/bad_csvs/empty_header.csv b/spec/bad_csvs/empty_header.csv new file mode 100644 index 0000000..f02adab --- /dev/null +++ b/spec/bad_csvs/empty_header.csv @@ -0,0 +1,2 @@ +a,b, +herp,derp \ No newline at end of file diff --git a/spec/bad_csvs/too_few_cols.csv b/spec/bad_csvs/too_few_cols.csv new file mode 100644 index 0000000..6d20020 --- /dev/null +++ b/spec/bad_csvs/too_few_cols.csv @@ -0,0 +1,3 @@ +a,b,c +failing,hard +man,oh,well... \ No newline at end of file diff --git a/spec/bad_csvs/too_few_cols_end.csv b/spec/bad_csvs/too_few_cols_end.csv new file mode 100644 index 0000000..2b32766 --- /dev/null +++ b/spec/bad_csvs/too_few_cols_end.csv @@ -0,0 +1,3 @@ +a,b,c +man,oh,well... +failing,hard \ No newline at end of file diff --git a/spec/bad_csvs/too_many_cols.csv b/spec/bad_csvs/too_many_cols.csv new file mode 100644 index 0000000..18f92b9 --- /dev/null +++ b/spec/bad_csvs/too_many_cols.csv @@ -0,0 +1,3 @@ +a,b,c +no,one,knows +what,am,i,doing? \ No newline at end of file diff --git a/spec/error_spec.lua b/spec/error_spec.lua new file mode 100644 index 0000000..5d8d412 --- /dev/null +++ b/spec/error_spec.lua @@ -0,0 +1,44 @@ +local ftcsv = require('ftcsv') + +local files = { + {"empty_file", "ftcsv: Cannot parse an empty file"}, + {"empty_file_newline", "ftcsv: Cannot parse a file which contains empty headers"}, + {"empty_header", "ftcsv: Cannot parse a file which contains empty headers"}, + {"too_few_cols", "ftcsv: too few columns in row 1"}, + {"too_few_cols_end", "ftcsv: too few columns in row 2"}, + {"too_many_cols", "ftcsv: too many columns in row 2"}, + {"dne", "ftcsv: File not found at spec/bad_csvs/dne.csv"} +} + +describe("csv decode error", function() + for _, value in ipairs(files) do + it("should error out " .. value[1], function() + local test = function() + ftcsv.parse("spec/bad_csvs/" .. value[1] .. ".csv", ",") + end + assert.has_error(test, value[2]) + end) + end +end) + +it("should error out for fieldsToKeep if no headers and no renaming takes place", function() + local test = function() + local options = {loadFromString=true, headers=false, fieldsToKeep={1, 2}} + ftcsv.parse("apple>banana>carrot\ndiamond>emerald>pearl", ">", options) + end + assert.has_error(test, "ftcsv: fieldsToKeep only works with header-less files when using the 'rename' functionality") +end) + +it("should error out when you want to encode a table and specify a field that doesn't exist", function() + local encodeThis = { + {a = 'herp1', b = 'derp1'}, + {a = 'herp2', b = 'derp2'}, + {a = 'herp3', b = 'derp3'}, + } + + local test = function() + ftcsv.encode(encodeThis, ">", {fieldsToKeep={"c"}}) + end + + assert.has_error(test, "ftcsv: the field 'c' doesn't exist in the inputTable") +end) \ No newline at end of file diff --git a/spec/feature_spec.lua b/spec/feature_spec.lua index 3810f0a..8d5b75e 100644 --- a/spec/feature_spec.lua +++ b/spec/feature_spec.lua @@ -107,11 +107,6 @@ describe("csv features", function() assert.are.same(expected, actual) end) - it("should error out for fieldsToKeep if no headers and no renaming", function() - local options = {loadFromString=true, headers=false, fieldsToKeep={1, 2}} - assert.has.errors(function() ftcsv.parse("apple>banana>carrot\ndiamond>emerald>pearl", ">", options) end) - end) - it("should handle only renaming fields from files without headers", function() local expected = {} expected[1] = {}