Skip to content

Commit

Permalink
Html Reader/Writer Better Handling of Booleans
Browse files Browse the repository at this point in the history
When Html Writer outputs a cell with a boolean value, the result will either be 1 or null-string; neither of these is optimal for anyone looking at the resulting html. Html Reader already has the ability to recognize data types using the html `data-type` attribute, but Html Writer doesn't use it. This PR adds the ability to generate that attribute for booleans. It will generate a string value appropriate for the locale when it encounters a boolean. Html Reader, when it encounters `data-type="b"`, will interpret the result as true if the value is 1 or a string value recognized as true in any locale; it will interpret the result as false if the value is 0, null-string, null, or a string value recognized as false in any locale; if none of the above, it will leave the value as an unchanged string. So, Reader will wind up with the correct result even if its locale is different than what Writer used.

Because this is a breaking change, it is opt-in. You need to call `Writer::setBetterBoolean(true)` in order for it take effect. The current default value for that property is false. When it is time to introduce breaking changes (see PR PHPOffice#4240), the default will be changed to true.
  • Loading branch information
oleibman committed Dec 1, 2024
1 parent f37b119 commit a68e44e
Show file tree
Hide file tree
Showing 4 changed files with 266 additions and 2 deletions.
47 changes: 47 additions & 0 deletions src/PhpSpreadsheet/Calculation/Calculation.php
Original file line number Diff line number Diff line change
Expand Up @@ -3141,6 +3141,53 @@ private function getLocaleFile(string $localeDir, string $locale, string $langua
return $localeFileName;
}

/** @var array<int, array<int, string>> */
private static array $falseTrueArray = [];

/** @return array<int, array<int, string>> */
public function getFalseTrueArray(): array
{
if (!empty(self::$falseTrueArray)) {
return self::$falseTrueArray;
}
if (count(self::$validLocaleLanguages) == 1) {
self::loadLocales();
}
$falseTrueArray = [['FALSE'], ['TRUE']];
foreach (self::$validLocaleLanguages as $language) {
if (str_starts_with($language, 'en')) {
continue;
}
$locale = $language;
if (str_contains($locale, '_')) {
[$language] = explode('_', $locale);
}
$localeDir = implode(DIRECTORY_SEPARATOR, [__DIR__, 'locale', null]);

try {
$functionNamesFile = $this->getLocaleFile($localeDir, $locale, $language, 'functions');
} catch (Exception $e) {
continue;
}
// Retrieve the list of locale or language specific function names
$localeFunctions = file($functionNamesFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) ?: [];
foreach ($localeFunctions as $localeFunction) {
[$localeFunction] = explode('##', $localeFunction); // Strip out comments
if (str_contains($localeFunction, '=')) {
[$fName, $lfName] = array_map('trim', explode('=', $localeFunction));
if ($fName === 'FALSE') {
$falseTrueArray[0][] = $lfName;
} elseif ($fName === 'TRUE') {
$falseTrueArray[1][] = $lfName;
}
}
}
}
self::$falseTrueArray = $falseTrueArray;

return $falseTrueArray;
}

/**
* Set the locale code.
*
Expand Down
32 changes: 32 additions & 0 deletions src/PhpSpreadsheet/Reader/Html.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use DOMElement;
use DOMNode;
use DOMText;
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Comment;
Expand Down Expand Up @@ -271,6 +272,12 @@ protected function flushCell(Worksheet $sheet, string $column, int|string $row,
->setQuotePrefix(true);
}
}
if ($datatype === DataType::TYPE_BOOL) {
$cellContent = self::convertBoolean($cellContent);
if (!is_bool($cellContent)) {
$attributeArray['data-type'] = DataType::TYPE_STRING;
}
}

//catching the Exception and ignoring the invalid data types
try {
Expand All @@ -291,6 +298,31 @@ protected function flushCell(Worksheet $sheet, string $column, int|string $row,
$cellContent = (string) '';
}

/** @var array<int, array<int, string>> */
private static array $falseTrueArray = [];

private function convertBoolean(?string $cellContent): bool|string
{
if ($cellContent === '1') {
return true;
}
if ($cellContent === '0' || $cellContent === '' || $cellContent === null) {
return false;
}
if (empty(self::$falseTrueArray)) {
$calc = Calculation::getInstance();
self::$falseTrueArray = $calc->getFalseTrueArray();
}
if (in_array(mb_strtoupper($cellContent), self::$falseTrueArray[1], true)) {
return true;
}
if (in_array(mb_strtoupper($cellContent), self::$falseTrueArray[0], true)) {
return false;
}

return $cellContent;
}

private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
{
$attributeArray = [];
Expand Down
49 changes: 47 additions & 2 deletions src/PhpSpreadsheet/Writer/Html.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Cell\Cell;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Chart\Chart;
use PhpOffice\PhpSpreadsheet\Comment;
use PhpOffice\PhpSpreadsheet\Document\Properties;
Expand Down Expand Up @@ -36,6 +37,9 @@ class Html extends BaseWriter

private const DEFAULT_CELL_WIDTH_PIXELS = 56;

private const TRUE_SUBSTITUTE = "\u{fffe}";
private const FALSE_SUBSTITUTE = "\u{feff}";

/**
* Migration aid to tell if html tags will be treated as plaintext in comments.
* if (
Expand Down Expand Up @@ -141,13 +145,22 @@ class Html extends BaseWriter
/** @var Chart[] */
private $sheetCharts;

private bool $betterBoolean = false;

private string $getTrue = 'TRUE';

private string $getFalse = 'FALSE';

/**
* Create a new HTML.
*/
public function __construct(Spreadsheet $spreadsheet)
{
$this->spreadsheet = $spreadsheet;
$this->defaultFont = $this->spreadsheet->getDefaultStyle()->getFont();
$calc = Calculation::getInstance($this->spreadsheet);
$this->getTrue = $calc->getTRUE();
$this->getFalse = $calc->getFALSE();
}

/**
Expand Down Expand Up @@ -1346,8 +1359,21 @@ private function generateRowCellDataValue(Worksheet $worksheet, Cell $cell, stri
if ($cell->getValue() instanceof RichText) {
$cellData .= $this->generateRowCellDataValueRich($cell->getValue());
} else {
$origData = $this->preCalculateFormulas ? $cell->getCalculatedValue() : $cell->getValue();
$origData2 = $this->preCalculateFormulas ? $cell->getCalculatedValueString() : $cell->getValueString();
if ($this->preCalculateFormulas) {
$origData = $cell->getCalculatedValue();
if ($this->betterBoolean && is_bool($origData)) {
$origData2 = $origData ? self::TRUE_SUBSTITUTE : self::FALSE_SUBSTITUTE;
} else {
$origData2 = $cell->getCalculatedValueString();
}
} else {
$origData = $cell->getValue();
if ($this->betterBoolean && is_bool($origData)) {
$origData2 = $origData ? self::TRUE_SUBSTITUTE : self::FALSE_SUBSTITUTE;
} else {
$origData2 = $cell->getValueString();
}
}
$formatCode = $worksheet->getParentOrThrow()->getCellXfByIndex($cell->getXfIndex())->getNumberFormat()->getFormatCode();

$cellData = NumberFormat::toFormattedString(
Expand Down Expand Up @@ -1448,6 +1474,13 @@ private function generateRowWriteCell(
$htmlx .= $this->generateRowIncludeCharts($worksheet, $coordinate);
// Column start
$html .= ' <' . $cellType;
if ($cellData === self::TRUE_SUBSTITUTE) {
$html .= ' data-type="' . DataType::TYPE_BOOL . '"';
$cellData = $this->getTrue;
} elseif ($cellData === self::FALSE_SUBSTITUTE) {
$html .= ' data-type="' . DataType::TYPE_BOOL . '"';
$cellData = $this->getFalse;
}
if (!$this->useInlineCss && !$this->isPdf && is_string($cssClass)) {
$html .= ' class="' . $cssClass . '"';
if ($htmlx) {
Expand Down Expand Up @@ -1903,4 +1936,16 @@ private function shouldGenerateColumn(Worksheet $sheet, string $colStr): bool

return $sheet->getColumnDimension($colStr)->getVisible();
}

public function getBetterBoolean(): bool
{
return $this->betterBoolean;
}

public function setBetterBoolean(bool $betterBoolean): self
{
$this->betterBoolean = $betterBoolean;

return $this;
}
}
140 changes: 140 additions & 0 deletions tests/PhpSpreadsheetTests/Writer/Html/BetterBooleanTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
<?php

declare(strict_types=1);

namespace PhpOffice\PhpSpreadsheetTests\Writer\Html;

use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Reader\Html as HtmlReader;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Writer\Html as HtmlWriter;
use PhpOffice\PhpSpreadsheetTests\Functional;

class BetterBooleanTest extends Functional\AbstractFunctional
{
private string $locale;

protected function setUp(): void
{
$calculation = Calculation::getInstance();
$this->locale = $calculation->getLocale();
}

protected function tearDown(): void
{
$calculation = Calculation::getInstance();
$calculation->setLocale($this->locale);
}

public function testDefault(): void
{
$spreadsheet = new Spreadsheet();
$writer = new HtmlWriter($spreadsheet);
// Default will change with next PhpSpreadsheet release
self::assertFalse($writer->getBetterBoolean());
$spreadsheet->disconnectWorksheets();
}

public function setBetter(HtmlWriter $writer): void
{
$writer->setBetterBoolean(true);
}

public function setNotBetter(HtmlWriter $writer): void
{
$writer->setBetterBoolean(false);
}

public function testBetterBoolean(): void
{
$spreadsheet = new Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
$sheet->getCell('A1')->setValue(10);
$sheet->getCell('B1')->setValue('Hello');
$sheet->getCell('C1')->setValue(true);
$sheet->getCell('D1')->setValue('=IF(1>2, TRUE, FALSE)');

/** @var callable */
$callableWriter = [$this, 'setBetter'];
$reloaded = $this->writeAndReload($spreadsheet, 'Html', null, $callableWriter);
$spreadsheet->disconnectWorksheets();

$rsheet = $reloaded->getActiveSheet();
self::assertSame(10, $rsheet->getCell('A1')->getValue());
self::assertSame('Hello', $rsheet->getCell('B1')->getValue());
self::assertTrue($rsheet->getCell('C1')->getValue());
self::assertFalse($rsheet->getCell('D1')->getValue());
$reloaded->disconnectWorksheets();
}

public function testNotBetterBoolean(): void
{
$spreadsheet = new Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
$sheet->getCell('A1')->setValue(10);
$sheet->getCell('B1')->setValue('Hello');
$sheet->getCell('C1')->setValue(true);
$sheet->getCell('D1')->setValue('=IF(1>2, TRUE, FALSE)');

/** @var callable */
$callableWriter = [$this, 'setNotBetter'];
$reloaded = $this->writeAndReload($spreadsheet, 'Html', null, $callableWriter);
$spreadsheet->disconnectWorksheets();

$rsheet = $reloaded->getActiveSheet();
self::assertSame(10, $rsheet->getCell('A1')->getValue());
self::assertSame('Hello', $rsheet->getCell('B1')->getValue());
self::assertSame(1, $rsheet->getCell('C1')->getValue());
self::assertNull($rsheet->getCell('D1')->getValue());
$reloaded->disconnectWorksheets();
}

public function testLocale(): void
{
$spreadsheet = new Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
$sheet->getCell('A1')->setValue(10);
$sheet->getCell('B1')->setValue('Hello');
$sheet->getCell('C1')->setValue(true);
$sheet->getCell('D1')->setValue('=IF(1>2, TRUE, FALSE)');
$calc = Calculation::getInstance();
$calc->setLocale('fr');
$writer = new HtmlWriter($spreadsheet);
$writer->setBetterBoolean(true);
$html = $writer->generateHtmlAll();
self::assertStringContainsString('VRAI', $html);
self::assertStringNotContainsString('TRUE', $html);

/** @var callable */
$callableWriter = [$this, 'setBetter'];
$reloaded = $this->writeAndReload($spreadsheet, 'Html', null, $callableWriter);
$spreadsheet->disconnectWorksheets();

$rsheet = $reloaded->getActiveSheet();
self::assertSame(10, $rsheet->getCell('A1')->getValue());
self::assertSame('Hello', $rsheet->getCell('B1')->getValue());
self::assertTrue($rsheet->getCell('C1')->getValue());
self::assertFalse($rsheet->getCell('D1')->getValue());
$reloaded->disconnectWorksheets();
}

public function testForeignNoLocale(): void
{
$fragment = '<table><tbody><tr>'
. '<td>10</td>'
. '<td>Hello</td>'
. '<td data-type="b">ИСТИНА</td>' // Bulgarian TRUE
. '<td data-type="b">EPÄTOSI</td>' // Finnish FALSE
. '<td data-type="b">whatever</td>'
. '<td data-type="b">tRuE</td>'
. '</tr></tbody></table>';
$reader = new HtmlReader();
$spreadsheet = $reader->loadFromString($fragment);
$sheet = $spreadsheet->getActiveSheet();
self::assertTrue($sheet->getCell('C1')->getValue());
self::assertFalse($sheet->getCell('D1')->getValue());
self::assertSame('whatever', $sheet->getCell('E1')->getValue());
self::assertTrue($sheet->getCell('F1')->getValue());
$spreadsheet->disconnectWorksheets();
}
}

0 comments on commit a68e44e

Please sign in to comment.