From fe27d16bdc4b83c29e55ce5f2738e83ba6e57c65 Mon Sep 17 00:00:00 2001 From: Vinzenz Rosenkranz Date: Fri, 13 Dec 2024 14:00:33 +0100 Subject: [PATCH] fix handling of delimiter in actual csv data; added test to prevent header and row column count mismatch Signed-off-by: Vinzenz Rosenkranz --- app/Exceptions/CsvColumnMismatchException.php | 19 +++++ .../AttributeImportExceptionStruct.php | 1 - app/File/Csv.php | 57 +++++++------- app/File/Parser.php | 2 +- app/Import/EntityImporter.php | 28 ++++--- app/Import/ImportResolution.php | 2 - lang/de/entity-importer.php | 1 + lang/en/entity-importer.php | 1 + tests/Feature/ApiDataImporterTest.php | 75 +++++++++++-------- 9 files changed, 115 insertions(+), 71 deletions(-) create mode 100644 app/Exceptions/CsvColumnMismatchException.php diff --git a/app/Exceptions/CsvColumnMismatchException.php b/app/Exceptions/CsvColumnMismatchException.php new file mode 100644 index 000000000..9531fa9f7 --- /dev/null +++ b/app/Exceptions/CsvColumnMismatchException.php @@ -0,0 +1,19 @@ +headerLine = $headerLine; + $this->dataLine = $dataLine; + $this->headerColumns = $headerColumns; + $this->dataColumns = $dataColumns; + } +} \ No newline at end of file diff --git a/app/Exceptions/Structs/AttributeImportExceptionStruct.php b/app/Exceptions/Structs/AttributeImportExceptionStruct.php index 931f6effe..33285d59f 100644 --- a/app/Exceptions/Structs/AttributeImportExceptionStruct.php +++ b/app/Exceptions/Structs/AttributeImportExceptionStruct.php @@ -5,7 +5,6 @@ use JsonSerializable; class AttributeImportExceptionStruct implements JsonSerializable { - public $type; public $columnIndex; public $columnName; diff --git a/app/File/Csv.php b/app/File/Csv.php index 93dd78f55..f8438c3ab 100644 --- a/app/File/Csv.php +++ b/app/File/Csv.php @@ -2,11 +2,13 @@ namespace App\File; -class Csv extends Parser { +use App\Exceptions\CsvColumnMismatchException; +class Csv extends Parser { private string $delimiter; private bool $hasHeaderRow; private array $headers = []; + private int $headerCount = 0; private string $encoding = 'UTF-8'; private int $rows = 0; @@ -16,21 +18,20 @@ public function __construct(bool $hasHeaderRow = false, string $delimiter = ",", $this->encoding = $encoding; } - public function getHeaders() { + public function getHeaders(): array { return $this->headers; } - - /** * Parses a CSV file. - * + * * @param resource $fileHandle - The file handle to the CSV file + * @param callable $$rowCallback - */ - public function parse($fileHandle, $rowCallback) { + public function parse($fileHandle, callable $rowCallback): void { $rowIndex = 0; $this->rows = 0; - // We don't use the fgetcsvfunction to change the file encoding to UTF-8. + // We don't use the fgetcsv function to change the file encoding to UTF-8. while(($row = fgets($fileHandle)) !== false) { $this->rows++; if($this->hasHeaderRow && $rowIndex === 0) { @@ -47,14 +48,14 @@ public function parse($fileHandle, $rowCallback) { /** * Gets the headers from the CSV file. - * + * * If there is no header (hasHeaderRow = false), it will return an array of numbers (1, 2, 3, ...) * and the array will not be changed. - * - * @param array $lines - The lines of the CSV file + * + * @param resource $lines - The lines of the CSV file * @return array $headers - The headers of the CSV file */ - public function parseHeaders($fileHandle) { + public function parseHeaders($fileHandle): array { $headers = null; $row = fgets($fileHandle); @@ -69,15 +70,17 @@ public function parseHeaders($fileHandle) { } else { $headers = $this->generateNumberHeaders($row); } + $this->headerCount = count($headers); rewind($fileHandle); $this->headers = $headers; return $headers; } - private function generateNumberHeaders($row) { + private function generateNumberHeaders(string $row): array { $headers = []; - $parts = explode($this->delimiter, $row); + // $parts = explode($this->delimiter, $row); + $parts = $this->fromCsvLine($row); for($i = 1; $i <= count($parts); $i++) { $headers[] = "#$i"; } @@ -87,28 +90,33 @@ private function generateNumberHeaders($row) { /** * Gets a single row from a CSV line string. - * + * * @param string $line - The line from the CSV file * @return array $row - Associative array using the headers as keys and the values from the line as values */ - private function parseRow($line) { + private function parseRow(string $line): array { if(empty($this->headers)) { throw new \Exception("Headers are not set. Run parseHeaders first."); } $row = []; - $arr = $this->fromCsvLine($line); - for($i = 0; $i < count($arr); $i++) { - $row[$this->headers[$i]] = trim($arr[$i]); + $cols = $this->fromCsvLine($line); + $colCount = count($cols); + if($this->headerCount != $colCount) { + $headerRow = implode($this->delimiter, $this->headers); + $dataRow = implode($this->delimiter, $cols); + throw new CsvColumnMismatchException($headerRow, $dataRow, $this->headerCount, $colCount); + } + for($i = 0; $i < $colCount; $i++) { + $row[$this->headers[$i]] = trim($cols[$i]); } return $row; } - /** * Converts the data to UTF-8 if it is not already. - * + * * @param array|string $data - The data to convert * @return array|string $data - The converted data */ @@ -129,12 +137,11 @@ private function isUtf8(): bool { return $this->encoding == 'UTF-8'; } - - public function getRows() { + public function getRows(): int { return $this->rows; } - public function getDataRows() { + public function getDataRows(): int { return $this->rows - ($this->hasHeaderRow ? 1 : 0); } @@ -142,11 +149,11 @@ public function getDataRows() { * Helper function to get the CSV from a line. * Always using the correct delimiter. */ - private function fromCsvLine($line) { + private function fromCsvLine(string $line): array { return str_getcsv($line, separator: $this->delimiter); } - private function verifyColumnExists($column) { + private function verifyColumnExists(string $column): void { if(!in_array($column, $this->headers)) { throw new \Exception("Column '$column' does not exist."); } diff --git a/app/File/Parser.php b/app/File/Parser.php index b1091a8fa..829aedbac 100644 --- a/app/File/Parser.php +++ b/app/File/Parser.php @@ -3,5 +3,5 @@ namespace App\File; abstract class Parser { - abstract public function parse(string $filePath, callable $rowCallback); + abstract public function parse($filePath, callable $rowCallback); } diff --git a/app/Import/EntityImporter.php b/app/Import/EntityImporter.php index 56a927415..dec660a66 100644 --- a/app/Import/EntityImporter.php +++ b/app/Import/EntityImporter.php @@ -2,6 +2,7 @@ namespace App\Import; +use App\Exceptions\CsvColumnMismatchException; use App\File\Csv; use App\Entity; use App\EntityType; @@ -40,7 +41,7 @@ public function __construct($metadata, $data) { $this->entityTypeId = $data['entity_type_id']; $this->attributesMap = $data['attributes']; - // The parent column is optional, therefore we only set it + // The parent column is optional, therefore we only set it // to another value than null, if there is valid data set. if(array_key_exists('parent_column', $data)) { $parentColumn = trim($data['parent_column']); @@ -103,14 +104,23 @@ public function validateImportData($filepath) { return $this->resolver; } - $csvTable->parse($handle, function ($row, $index) use (&$result, &$status) { - $namesValid = $this->validateName($row, $index); - if($namesValid) { - // The location depends on the name column. If the name is not correct, we can't check the location. - $this->validateLocation($row, $index); - } - $this->validateAttributesInRow($row, $index); - }); + try { + $csvTable->parse($handle, function ($row, $index) { + $namesValid = $this->validateName($row, $index); + if($namesValid) { + // The location depends on the name column. If the name is not correct, we can't check the location. + $this->validateLocation($row, $index); + } + $this->validateAttributesInRow($row, $index); + }); + } catch(CsvColumnMismatchException $csvMismatchException) { + return $this->resolver->conflict(__("entity-importer.csv-column-mismatch", [ + "data" => $csvMismatchException->dataLine, + "data_count" => $csvMismatchException->dataColumns, + "header_data" => $csvMismatchException->headerLine, + "header_count" => $csvMismatchException->headerColumns, + ])); + } if($csvTable->getDataRows() == 0) { $this->resolver->conflict(__("entity-importer.empty")); diff --git a/app/Import/ImportResolution.php b/app/Import/ImportResolution.php index 131acf3cd..1166c039f 100644 --- a/app/Import/ImportResolution.php +++ b/app/Import/ImportResolution.php @@ -2,7 +2,6 @@ namespace App\Import; - enum ImportResolutionType { case CREATE; case UPDATE; @@ -10,7 +9,6 @@ enum ImportResolutionType { } class ImportResolution { - private array $status; private array $errors = []; diff --git a/lang/de/entity-importer.php b/lang/de/entity-importer.php index 7fb38671c..6eeb6e31d 100644 --- a/lang/de/entity-importer.php +++ b/lang/de/entity-importer.php @@ -6,6 +6,7 @@ 'file-not-found' => 'Datei konnte nicht gelesen werden.', 'missing-data' => 'Benötigte Spalte fehlt: :column', 'invalid-data' => 'Ungültige Daten: [:column] => :value', + 'csv-column-mismatch' => 'Die Anzahl der Spalten in der aktuellen Zeile \':data\' (:data_count Spalten) stimmt nicht mit den Spalten der Kopfzeile \':header_data\' (:header_count Spalten) überein.', 'name-column-does-not-exist' => 'Die Spalte für den Namen existiert nicht: :column', 'parent-column-does-non-exist' => 'Die Spalte für die Eltern-Entität existiert nicht: :column', 'parent-entity-does-not-exist' => 'Die Eltern-Entität existiert nicht: :entity', diff --git a/lang/en/entity-importer.php b/lang/en/entity-importer.php index 1cf3d9000..ec4d36ff0 100644 --- a/lang/en/entity-importer.php +++ b/lang/en/entity-importer.php @@ -5,6 +5,7 @@ 'file-not-found' => 'File could not be read.', 'missing-data' => 'Required column is missing: :column', 'invalid-data' => 'Invalid data: [:column] => :value', + 'csv-column-mismatch' => 'Column count of current row \':data\' (:data_count columns) does not match column count in header row \':header_data\' (:header_count columns).', 'attribute-could-not-be-imported' => 'Attribute could not be imported: :attribute', 'attribute-id-does-not-exist' => 'The attribute id does not exist: :attributes', 'attribute-column-does-not-exist' => 'The attribute columns do not exist: :columns', diff --git a/tests/Feature/ApiDataImporterTest.php b/tests/Feature/ApiDataImporterTest.php index 32cb5b822..14e37556c 100644 --- a/tests/Feature/ApiDataImporterTest.php +++ b/tests/Feature/ApiDataImporterTest.php @@ -7,12 +7,10 @@ use Illuminate\Http\UploadedFile; class ApiDataImporterTest extends TestCase { - - /** - * + * * Utilities - * + * */ private function createDefaultCSVFile($delimiter = ",", $hasHeaderRow = true) { $columns = [ @@ -22,11 +20,11 @@ private function createDefaultCSVFile($delimiter = ",", $hasHeaderRow = true) { ['ياماتو', 'Site B', 'أصفر', "عَلِي;يُوْسِف;حَسَن"], ['Site A', '', 'Updated', 'Alternative Site'] ]; - + if($hasHeaderRow) { array_unshift($columns, ['name', 'parent', 'Notizen', 'Alternativer Name']); } - + return $this->createCSVFile($columns, $delimiter); } @@ -34,11 +32,11 @@ private function createDimensionsCSVFile($delimiter = ",", $hasHeaderRow = true) $columns = [ ['imported', 'Site A', '1;2;3;cm'], ]; - + if($hasHeaderRow) { array_unshift($columns, ['name', 'parent', 'Abmessungen']); } - + return $this->createCSVFile($columns, $delimiter); } @@ -55,8 +53,7 @@ private function createCSVFile(array $tableData, $delimiter = ",") { return UploadedFile::fake()->createWithContent('data.csv', $content); } - - + private function getData(?string $name = 'name', int $entityTypeId = 3, array $attributes = [], ?string $parentColumn = null) { return json_encode([ "name_column" => $name, @@ -65,7 +62,7 @@ private function getData(?string $name = 'name', int $entityTypeId = 3, array $a "attributes" => $attributes, ]); } - + private function getDimensionsData() { return $this->getData( parentColumn: 'parent', @@ -87,13 +84,13 @@ private function getMetaData(string $delimiter = ",", bool $hasHeaderRow = true) /** * We run the tests two times. Once with a header row and once without. */ - + /** - * + * * TESTS WITH NAMED HEADERS - * + * */ - + public function testValidationEmptyFile() { $file = $this->createCSVFile([]); $metadata = $this->getMetaData(); @@ -137,7 +134,6 @@ public function testValidationEmptyWithHeaders() { ]); } - public function testValidationNamesColumnMissingError() { $file = $this->createCSVFile([['other'], [''], ['other 2']]); @@ -180,7 +176,6 @@ public function testValidationNamesColumnNullError() { ]); } - public function testValidationWithAllNamesSetCorrectly() { $this->userRequest()->post('/api/v1/entity/import/validate', [ 'file' => $this->createDefaultCSVFile(), @@ -232,8 +227,6 @@ public function testValidationInvalidFile() { ]); } - - public function testValidationIncorrectDelimiter() { $this->userRequest()->post('/api/v1/entity/import/validate', [ 'file' => $this->createDefaultCSVFile(';'), @@ -251,7 +244,6 @@ public function testValidationIncorrectDelimiter() { ]); } - public function testValidationCorrectDelimiter() { $this->userRequest()->post('/api/v1/entity/import/validate', [ 'file' => $this->createDefaultCSVFile(';'), @@ -269,7 +261,6 @@ public function testValidationCorrectDelimiter() { ]); } - public function testValidationWithParentColumn() { $this->userRequest()->post('/api/v1/entity/import/validate', [ 'file' => $this->createDefaultCSVFile(), @@ -287,7 +278,6 @@ public function testValidationWithParentColumn() { ]);; } - public function testValidationWithParentColumnChildTypeNotAllowed() { $file = $this->createCSVFile([ ['name', 'parent', 'Notizen'], @@ -332,7 +322,6 @@ public function testValidationWithParentColumnTopLevelElementAlreadyExists() { ]); } - public function testValidationWithParentColumnSubElementAlreadyExists() { $file = $this->createCSVFile([ ['name', 'parent', 'Notizen'], @@ -375,7 +364,6 @@ public function testValidationOfAttributes() { ]); } - public function testValidationOfAttributesInvalidColumnName() { $this->userRequest()->post('/api/v1/entity/import/validate', [ 'file' => $this->createDefaultCSVFile(), @@ -488,6 +476,29 @@ public function testValidationOfAttributeValueFails() { ]); } + public function testValidationFailsWithMismatchingColumns() { + $headerRow = ["name", "parent", "Notes", "Description"]; + $headerRowString = implode(',', $headerRow); + $headerCount = count($headerRow); + $dataRow = ["Site A", "", "updated"]; + $dataRowString = implode(',', $dataRow); + $dataCount = count($dataRow); + $response = $this->userRequest()->post('/api/v1/entity/import/validate', [ + 'file' => $this->createCSVFile([ + $headerRow, + $dataRow, + ]), + 'data' => $this->getData(), + 'metadata' => $this->getMetaData(), + ]) + ->assertStatus(200); + + $response->assertJson([ + 'errors' => [ + "Column count of current row '{$dataRowString}' ({$dataCount} columns) does not match column count in header row '{$headerRowString}' ({$headerCount} columns)." + ], + ]); + } public function testDataImportSuccess() { $response = $this->userRequest()->post('/api/v1/entity/import', [ @@ -504,8 +515,6 @@ public function testDataImportSuccess() { $response->assertStatus(201); - - $entityId = null; try { $entityId = Entity::getFromPath('Site A\\\\imported'); @@ -592,13 +601,13 @@ public function testDataImportFailsWithIncompatibleMapping() { ] ]); } - + /** - * + * * TESTS WITH NO HEADERS - * + * */ - + public function testValidationEmptyFileNoHeaders() { $file = $this->createCSVFile([]); $metadata = $this->getMetaData(hasHeaderRow: false); @@ -851,7 +860,7 @@ public function testValidationOfAttributesNoHeaders() { $this->userRequest()->post('/api/v1/entity/import/validate', [ 'file' => $this->createDefaultCSVFile(hasHeaderRow: false), 'data' => $this->getData( - name: "#1", + name: "#1", attributes: [ 8 => "#3", 15 => "#4" @@ -875,7 +884,7 @@ public function testValidationOfAttributesInvalidColumnNameNoHeaders() { $this->userRequest()->post('/api/v1/entity/import/validate', [ 'file' => $this->createDefaultCSVFile(hasHeaderRow: false), 'data' => $this->getData( - name: "#1", + name: "#1", attributes: [ 8 => "#10", 15 => "#11" @@ -921,7 +930,7 @@ public function testValidationOfAttributesInvalidAttributeIdAndInvalidColumnName $this->userRequest()->post('/api/v1/entity/import/validate', [ 'file' => $this->createDefaultCSVFile(hasHeaderRow: false), 'data' => $this->getData( - name: "#1", + name: "#1", attributes: [ -1 => "#11", ]),