From 7684a3d138f793630e2ac640ac7d3acc4a79467b Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Thu, 7 Dec 2023 20:19:11 +0000 Subject: [PATCH] ext/mbstring: move unsigned 32 bit integer tests to a new test (#12891) And only run it on 64 bit architectures as those are floats on 32 bit. --- .../tests/mb_decode_numericentity.phpt | 22 ------- .../mb_decode_numericentity_large_ints.phpt | 58 +++++++++++++++++++ .../tests/mb_encode_numericentity.phpt | 5 -- .../mb_encode_numericentity_large_ints.phpt | 16 +++++ 4 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 ext/mbstring/tests/mb_decode_numericentity_large_ints.phpt create mode 100644 ext/mbstring/tests/mb_encode_numericentity_large_ints.phpt diff --git a/ext/mbstring/tests/mb_decode_numericentity.phpt b/ext/mbstring/tests/mb_decode_numericentity.phpt index e5c1c27c7ac33..b58a2c5ed9da3 100644 --- a/ext/mbstring/tests/mb_decode_numericentity.phpt +++ b/ext/mbstring/tests/mb_decode_numericentity.phpt @@ -103,26 +103,9 @@ test("Successive hex entities", "22", "22", [0, 0xFFFF, 0, 0xFFFF], 'AS test("Starting entity immediately after decimal entity which is too long", "�A", "�A", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); test("Starting entity immediately after hex entity which is too long", "�A", "�", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); -$ucs4_test1 = mb_convert_encoding("�A", 'UCS-4BE', 'ASCII'); -testNonAscii("Starting entity immediately after valid decimal entity which is just within maximum length", $ucs4_test1, "\x3B\x9A\xCA\x00\x00\x00\x00A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'UCS-4BE'); -$ucs4_test2 = mb_convert_encoding("�A", 'UCS-4BE', 'ASCII'); -testNonAscii("Starting entity immediately after valid hex entity which is just within maximum length", $ucs4_test2, "\x11\x11\x11\x11\x00\x00\x00A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'UCS-4BE'); - test("Starting entity immediately after invalid decimal entity", "�A", "�A", [0x1, 0xFFFF, 0, 0xFFFF], 'ASCII'); test("Starting entity immediately after invalid hex entity", "�A", " ", [0x1, 0xFFFF, 0, 0xFFFF], 'ASCII'); -test("Starting entity immediately after too-big decimal entity", "�A", "�A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'ASCII'); - -// If the numeric entity decodes to 0xFFFFFFFF, that should be passed through -// Originally, the new implementation of mb_decode_numericentity used -1 as a marker indicating -// that the entity could not be successfully decoded, so if the entity decoded successfully to -// 0xFFFFFFFF (-1), it would be treated as an invalid entity -test("Regression test (entity which decodes to 0xFFFFFFFF)", "", "?", [0xFFFFFF86, 0xFFFFFFFF, 0xF, 0xFC015448], 'HZ'); - -// With the legacy conversion filters, a trailing & could be truncated by mb_decode_numericentity, -// because some text encodings did not properly invoke the next flush function in the chain -test("Regression test (truncation of successive & with JIS encoding)", "&&&", "&&&", [0x20FF37FF, 0x7202F569, 0xC4090023, 0xF160], "JIS"); - // Previously, signed arithmetic was used on convmap entries test("Regression test (convmap entries are now treated as unsigned)", ",", "?,", [0x22FFFF11, 0xBF111189, 0x67726511, 0x1161E719], "ASCII"); @@ -194,11 +177,6 @@ Successive A: string(9) "AA" => string(2) "AA" (Good) Successive hex entities: string(11) "22" => string(2) "22" (Good) Starting entity immediately after decimal entity which is too long: string(18) "�A" => string(14) "�A" (Good) Starting entity immediately after hex entity which is too long: string(17) "�A" => string(13) "�" (Good) -Starting entity immediately after valid decimal entity which is just within maximum length: 000000260000002300000031000000300000003000000030000000300000003000000030000000300000003000000030000000260000002300000036000000350000003b => 3b9aca0000000041 (Good) -Starting entity immediately after valid hex entity which is just within maximum length: 0000002600000023000000780000003100000031000000310000003100000031000000310000003100000031000000260000002300000036000000350000003b => 1111111100000041 (Good) Starting entity immediately after invalid decimal entity: string(8) "�A" => string(4) "�A" (Good) Starting entity immediately after invalid hex entity: string(9) "�A" => string(5) " " (Good) -Starting entity immediately after too-big decimal entity: string(17) "�A" => string(13) "�A" (Good) -Regression test (entity which decodes to 0xFFFFFFFF): string(5) "" => string(1) "?" (Good) -Regression test (truncation of successive & with JIS encoding): string(3) "&&&" => string(3) "&&&" (Good) Regression test (convmap entries are now treated as unsigned): string(4) "," => string(2) "?," (Good) diff --git a/ext/mbstring/tests/mb_decode_numericentity_large_ints.phpt b/ext/mbstring/tests/mb_decode_numericentity_large_ints.phpt new file mode 100644 index 0000000000000..c61001d95538b --- /dev/null +++ b/ext/mbstring/tests/mb_decode_numericentity_large_ints.phpt @@ -0,0 +1,58 @@ +--TEST-- +mb_decode_numericentity() with 0xFFFFFFFF in conversion map +--EXTENSIONS-- +mbstring +--SKIPIF-- + +--FILE-- + ", varDumpToString($result); + if ($result === $expected) + echo " (Good)\n"; + else + echo " (BAD; expected ", varDumpToString($expected), ")\n"; +} + +function testNonAscii($desc, $str, $expected, $convmap, $encoding) { + $result = mb_decode_numericentity($str, $convmap, $encoding); + echo $desc, ": ", bin2hex($str), " => ", bin2hex($result); + if ($result === $expected) + echo " (Good)\n"; + else + echo " (BAD; expected ", bin2hex($expected), ")\n"; +} + +$ucs4_test1 = mb_convert_encoding("�A", 'UCS-4BE', 'ASCII'); +testNonAscii("Starting entity immediately after valid decimal entity which is just within maximum length", $ucs4_test1, "\x3B\x9A\xCA\x00\x00\x00\x00A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'UCS-4BE'); +$ucs4_test2 = mb_convert_encoding("�A", 'UCS-4BE', 'ASCII'); +testNonAscii("Starting entity immediately after valid hex entity which is just within maximum length", $ucs4_test2, "\x11\x11\x11\x11\x00\x00\x00A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'UCS-4BE'); + +test("Starting entity immediately after too-big decimal entity", "�A", "�A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'ASCII'); + +// If the numeric entity decodes to 0xFFFFFFFF, that should be passed through +// Originally, the new implementation of mb_decode_numericentity used -1 as a marker indicating +// that the entity could not be successfully decoded, so if the entity decoded successfully to +// 0xFFFFFFFF (-1), it would be treated as an invalid entity +test("Regression test (entity which decodes to 0xFFFFFFFF)", "", "?", [0xFFFFFF86, 0xFFFFFFFF, 0xF, 0xFC015448], 'HZ'); + +// With the legacy conversion filters, a trailing & could be truncated by mb_decode_numericentity, +// because some text encodings did not properly invoke the next flush function in the chain +test("Regression test (truncation of successive & with JIS encoding)", "&&&", "&&&", [0x20FF37FF, 0x7202F569, 0xC4090023, 0xF160], "JIS"); + +?> +--EXPECT-- +Starting entity immediately after valid decimal entity which is just within maximum length: 000000260000002300000031000000300000003000000030000000300000003000000030000000300000003000000030000000260000002300000036000000350000003b => 3b9aca0000000041 (Good) +Starting entity immediately after valid hex entity which is just within maximum length: 0000002600000023000000780000003100000031000000310000003100000031000000310000003100000031000000260000002300000036000000350000003b => 1111111100000041 (Good) +Starting entity immediately after too-big decimal entity: string(17) "�A" => string(13) "�A" (Good) +Regression test (entity which decodes to 0xFFFFFFFF): string(5) "" => string(1) "?" (Good) +Regression test (truncation of successive & with JIS encoding): string(3) "&&&" => string(3) "&&&" (Good) diff --git a/ext/mbstring/tests/mb_encode_numericentity.phpt b/ext/mbstring/tests/mb_encode_numericentity.phpt index a394a58d2614f..851ffcef645b9 100644 --- a/ext/mbstring/tests/mb_encode_numericentity.phpt +++ b/ext/mbstring/tests/mb_encode_numericentity.phpt @@ -54,10 +54,6 @@ echo "11 (hex): " . mb_encode_numericentity($iso2022jp, $convmap, "ISO-2022-JP", $convmap = [0x2b, 0x2d4, 0x75656500, 0x656d2c53]; echo "12: " . mb_encode_numericentity("m", $convmap, "ASCII") . "\n"; -// Regression test; the old implementation could only emit hexadecimal entities with about 5 digits -$convmap = [0xffffffff, 0xffffffff, 0x540a0af7, 0x5a485054]; -echo "13: " . mb_encode_numericentity("\xFF", $convmap, "ASCII", true) . "\n"; - ?> --EXPECT-- 1: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ @@ -78,4 +74,3 @@ echo "13: " . mb_encode_numericentity("\xFF", $convmap, "ASCII", true) . "\n"; 11:  £ABC 11 (hex):  £ABC 12: � -13: � diff --git a/ext/mbstring/tests/mb_encode_numericentity_large_ints.phpt b/ext/mbstring/tests/mb_encode_numericentity_large_ints.phpt new file mode 100644 index 0000000000000..fff56f483b7ed --- /dev/null +++ b/ext/mbstring/tests/mb_encode_numericentity_large_ints.phpt @@ -0,0 +1,16 @@ +--TEST-- +mb_encode_numericentity() with 0xFFFFFFFF in conversion map +--EXTENSIONS-- +mbstring +--SKIPIF-- + +--FILE-- + +--EXPECT-- +13: �