Skip to content

Commit

Permalink
Performance Improvements for Csv Reader (#3769)
Browse files Browse the repository at this point in the history
* Performance Improvements for Csv Reader

Investigating issue #381, a means was suggested to duplicated a problem, but no problem occurred ... except for performance. This involved a spreadsheet with a large number of cells, definitely not PhpSpreadsheet's strong point; even so, the program (entirely available in the issue) took a disastrous two or so hours to complete on my system. Looking at the Csv Reader code, several opportunities to cache results and avoid function calls jumped out, none of which seem to materially add to the maintenance burden of the program. Testing these changes resulted in a run time of about 20 minutes, still hardly a thing of beauty, but a huge improvement over the original and therefore worth proceeding with.

* Redo CsvIssue2232Test

Test cases included duplicates, and didn't account for some things (e.g. French locale will treat both 'vrai' and 'true' as true).

* Additional Optimization
  • Loading branch information
oleibman authored Oct 18, 2023
1 parent bc9ca28 commit 1282f3d
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 23 deletions.
55 changes: 37 additions & 18 deletions src/PhpSpreadsheet/Reader/Csv.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;

class Csv extends BaseReader
Expand Down Expand Up @@ -102,6 +101,14 @@ class Csv extends BaseReader
/** @var bool */
private $sheetNameIsFileName = false;

private string $getTrue = 'true';

private string $getFalse = 'false';

private string $thousandsSeparator = ',';

private string $decimalSeparator = '.';

/**
* Create a new CSV Reader instance.
*/
Expand Down Expand Up @@ -234,13 +241,14 @@ public function listWorksheetInfo(string $filename): array
$worksheetInfo[0]['lastColumnIndex'] = 0;
$worksheetInfo[0]['totalRows'] = 0;
$worksheetInfo[0]['totalColumns'] = 0;
$delimiter = $this->delimiter ?? '';

// Loop through each line of the file in turn
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
while (is_array($rowData)) {
++$worksheetInfo[0]['totalRows'];
$worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
}

$worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
Expand Down Expand Up @@ -386,15 +394,24 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
$outRow = 0;

// Loop through each line of the file in turn
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
$delimiter = $this->delimiter ?? '';
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
$valueBinder = Cell::getValueBinder();
$preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion();
$this->getTrue = Calculation::getTRUE();
$this->getFalse = Calculation::getFALSE();
$this->thousandsSeparator = StringHelper::getThousandsSeparator();
$this->decimalSeparator = StringHelper::getDecimalSeparator();
while (is_array($rowData)) {
$noOutputYet = true;
$columnLetter = 'A';
foreach ($rowData as $rowDatum) {
$this->convertBoolean($rowDatum, $preserveBooleanString);
$numberFormatMask = $this->convertFormattedNumber($rowDatum);
if ($preserveBooleanString) {
$rowDatum = $rowDatum ?? '';
} else {
$this->convertBoolean($rowDatum);
}
$numberFormatMask = $this->castFormattedNumberToNumeric ? $this->convertFormattedNumber($rowDatum) : '';
if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) {
if ($this->contiguous) {
if ($noOutputYet) {
Expand All @@ -405,15 +422,17 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
$outRow = $currentRow;
}
// Set basic styling for the value (Note that this could be overloaded by styling in a value binder)
$sheet->getCell($columnLetter . $outRow)->getStyle()
->getNumberFormat()
->setFormatCode($numberFormatMask);
if ($numberFormatMask !== '') {
$sheet->getStyle($columnLetter . $outRow)
->getNumberFormat()
->setFormatCode($numberFormatMask);
}
// Set cell value
$sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
}
++$columnLetter;
}
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
++$currentRow;
}

Expand All @@ -429,12 +448,12 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
/**
* Convert string true/false to boolean, and null to null-string.
*/
private function convertBoolean(mixed &$rowDatum, bool $preserveBooleanString): void
private function convertBoolean(mixed &$rowDatum): void
{
if (is_string($rowDatum) && !$preserveBooleanString) {
if (strcasecmp(Calculation::getTRUE(), $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
if (is_string($rowDatum)) {
if (strcasecmp($this->getTrue, $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
$rowDatum = true;
} elseif (strcasecmp(Calculation::getFALSE(), $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
} elseif (strcasecmp($this->getFalse, $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
$rowDatum = false;
}
} else {
Expand All @@ -447,18 +466,18 @@ private function convertBoolean(mixed &$rowDatum, bool $preserveBooleanString):
*/
private function convertFormattedNumber(mixed &$rowDatum): string
{
$numberFormatMask = NumberFormat::FORMAT_GENERAL;
$numberFormatMask = '';
if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) {
$numeric = str_replace(
[StringHelper::getThousandsSeparator(), StringHelper::getDecimalSeparator()],
[$this->thousandsSeparator, $this->decimalSeparator],
['', '.'],
$rowDatum
);

if (is_numeric($numeric)) {
$decimalPos = strpos($rowDatum, StringHelper::getDecimalSeparator());
$decimalPos = strpos($rowDatum, $this->decimalSeparator);
if ($this->preserveNumericFormatting === true) {
$numberFormatMask = (str_contains($rowDatum, StringHelper::getThousandsSeparator()))
$numberFormatMask = (str_contains($rowDatum, $this->thousandsSeparator))
? '#,##0' : '0';
if ($decimalPos !== false) {
$decimals = strlen($rowDatum) - $decimalPos - 1;
Expand Down
11 changes: 6 additions & 5 deletions tests/PhpSpreadsheetTests/Reader/Csv/CsvIssue2232Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public static function providerIssue2232(): array
/**
* @dataProvider providerIssue2232locale
*/
public function testBooleanConversionsLocaleAware(bool $useStringBinder, ?bool $preserveBoolString, mixed $b4Value, mixed $b5Value): void
public function testBooleanConversionsLocaleAware(bool $useStringBinder, ?bool $preserveBoolString, mixed $b2Value, mixed $b3Value, mixed $b4Value, mixed $b5Value): void
{
if ($useStringBinder) {
$binder = new StringValueBinder();
Expand All @@ -81,6 +81,8 @@ public function testBooleanConversionsLocaleAware(bool $useStringBinder, ?bool $
$filename = 'tests/data/Reader/CSV/issue.2232.csv';
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertSame($b2Value, $sheet->getCell('B2')->getValue());
self::assertSame($b3Value, $sheet->getCell('B3')->getValue());
self::assertSame($b4Value, $sheet->getCell('B4')->getValue());
self::assertSame($b5Value, $sheet->getCell('B5')->getValue());
$spreadsheet->disconnectWorksheets();
Expand All @@ -89,10 +91,9 @@ public function testBooleanConversionsLocaleAware(bool $useStringBinder, ?bool $
public static function providerIssue2232locale(): array
{
return [
[true, true, 'Faux', 'Vrai'],
[true, true, 'Faux', 'Vrai'],
[false, false, false, true],
[false, false, false, true],
'string binder preserve boolean string' => [true, true, 'FaLSe', 'tRUE', 'Faux', 'Vrai'],
'string binder convert boolean string' => [true, false, false, true, false, true],
'default binder' => [false, null, false, true, false, true],
];
}
}

0 comments on commit 1282f3d

Please sign in to comment.