From a8bd30a65f3251a93ae3a284f29554a495ff76e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phan=20Kochen?= Date: Tue, 8 Aug 2023 16:40:08 +0200 Subject: [PATCH] Add support for parsing XiK --- src/Xik.php | 142 +++++++++++++++++++++++++++++++++++++ tests/XikTest.php | 23 ++++++ tests/kdl/xik-output.xml | 144 +++++++++++++++++++++++++++++++++++++ tests/kdl/xik.kdl | 148 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 457 insertions(+) create mode 100644 src/Xik.php create mode 100644 tests/XikTest.php create mode 100644 tests/kdl/xik-output.xml create mode 100644 tests/kdl/xik.kdl diff --git a/src/Xik.php b/src/Xik.php new file mode 100644 index 0000000..4f6fbdf --- /dev/null +++ b/src/Xik.php @@ -0,0 +1,142 @@ +getNodes(); + if (count($nodes) > 0 && $nodes[0]->getName() === '!doctype') { + array_shift($nodes); + } + + // Parse XML declaration + $xmlDecl = []; + if (count($nodes) > 0 && $nodes[0]->getName() === '?xml') { + $xmlDecl = array_shift($nodes)->getProperties(); + } + + // Create the XML DOM document + $doc = new \DOMDocument( + $xmlDecl['version'] ?? '1.0', + $xmlDecl['encoding'] ?? '' + ); + + // Parse remaining document nodes + $namespaces = ['' => null]; + foreach ($nodes as $node) { + self::parseNode($node, $doc, $doc, $namespaces); + } + + return $doc; + } + + /** + * Parse a KDL node and create a DOM node. + * + * @param array $namespaces + */ + private static function parseNode( + NodeInterface $in, + \DOMParentNode $parent, + \DOMDocument $doc, + array $namespaces, + ): void { + $name = $in->getName(); + + // Skip any processing instructions + if (str_starts_with($name, '?') || str_starts_with($name, '!')) { + return; + } + + // Parse any new namespace declarations at this level. + self::parseNamespaceDeclarations($in, $namespaces); + + // Create the node. + [$ns, $name] = self::parseName($name, $namespaces); + $out = $doc->createElementNS($ns, $name); + + // Add values as a single text node. + $out->appendChild($doc->createTextNode(implode(' ', $in->getValues()))); + + // Add attributes. + foreach ($in->getProperties() as $name => $value) { + // Namespace declarations were already handled by `parseNamespaceDeclarations` + if ($name === 'xmlns' || str_starts_with($name, 'xmlns:')) { + continue; + } + + [$ns, $name] = self::parseName($name, $namespaces); + $out->setAttributeNS($ns, $name, (string) $value); + } + + // Add children. + foreach ($in->getChildren() as $node) { + self::parseNode($node, $out, $doc, $namespaces); + } + + // Attach to parent. + $parent->append($out); + } + + /** + * Parse XML namespace declarations from a KDL node. + * + * @param array $namespaces + */ + private static function parseNamespaceDeclarations(NodeInterface $node, array &$namespaces): void + { + foreach ($node->getProperties() as $prop => $value) { + if ($prop === 'xmlns') { + $namespaces[''] = $value; + } elseif (str_starts_with($prop, 'xmlns:')) { + $namespaces[substr($prop, 6)] = $value; + } + } + } + + /** + * Parse an XML element or attribute name into namespace and name. + * + * @param array $ns + * @return array{?string, string} + */ + private static function parseName(string $name, array &$namespaces): array + { + $ns = ''; + $sepIdx = strpos($name, ':'); + if ($sepIdx !== false) { + $ns = substr($name, 0, $sepIdx); + $name = substr($name, $sepIdx + 1); + } + if (!array_key_exists($ns, $namespaces)) { + throw new \Exception('Invalid namespace in element name: ' . $ns); + } + $ns = $namespaces[$ns]; + + return [$ns, $name]; + } +} diff --git a/tests/XikTest.php b/tests/XikTest.php new file mode 100644 index 0000000..8f17355 --- /dev/null +++ b/tests/XikTest.php @@ -0,0 +1,23 @@ +preserveWhiteSpace = false; + $formatted->formatOutput = true; + $formatted->loadXML($doc->saveXML()); + + self::assertSame($xml, $formatted->saveXML()); + } +} diff --git a/tests/kdl/xik-output.xml b/tests/kdl/xik-output.xml new file mode 100644 index 0000000..43fb11c --- /dev/null +++ b/tests/kdl/xik-output.xml @@ -0,0 +1,144 @@ + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + + An in-depth look at creating applications + with XML. + + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + + + diff --git a/tests/kdl/xik.kdl b/tests/kdl/xik.kdl new file mode 100644 index 0000000..f3873ea --- /dev/null +++ b/tests/kdl/xik.kdl @@ -0,0 +1,148 @@ +?xml version="1.0" + +// Sample converted from: +// https://learn.microsoft.com/en-us/previous-versions/windows/desktop/ms762271(v=vs.85) + +catalog { + book id="bk101" { + author "Gambardella, Matthew" + title "XML Developer's Guide" + genre "Computer" + price "44.95" + publish_date "2000-10-01" + description " + An in-depth look at creating applications + with XML. + " + } + book id="bk102" { + author "Ralls, Kim" + title "Midnight Rain" + genre "Fantasy" + price "5.95" + publish_date "2000-12-16" + description " + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + " + } + book id="bk103" { + author "Corets, Eva" + title "Maeve Ascendant" + genre "Fantasy" + price "5.95" + publish_date "2000-11-17" + description " + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + " + } + book id="bk104" { + author "Corets, Eva" + title "Oberon's Legacy" + genre "Fantasy" + price "5.95" + publish_date "2001-03-10" + description " + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + " + } + book id="bk105" { + author "Corets, Eva" + title "The Sundered Grail" + genre "Fantasy" + price "5.95" + publish_date "2001-09-10" + description " + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + " + } + book id="bk106" { + author "Randall, Cynthia" + title "Lover Birds" + genre "Romance" + price "4.95" + publish_date "2000-09-02" + description " + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + " + } + book id="bk107" { + author "Thurman, Paula" + title "Splish Splash" + genre "Romance" + price "4.95" + publish_date "2000-11-02" + description " + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + " + } + book id="bk108" { + author "Knorr, Stefan" + title "Creepy Crawlies" + genre "Horror" + price "4.95" + publish_date "2000-12-06" + description " + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + " + } + book id="bk109" { + author "Kress, Peter" + title "Paradox Lost" + genre "Science Fiction" + price "6.95" + publish_date "2000-11-02" + description " + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + " + } + book id="bk110" { + author "O'Brien, Tim" + title "Microsoft .NET: The Programming Bible" + genre "Computer" + price "36.95" + publish_date "2000-12-09" + description " + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + " + } + book id="bk111" { + author "O'Brien, Tim" + title "MSXML3: A Comprehensive Guide" + genre "Computer" + price "36.95" + publish_date "2000-12-01" + description " + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + " + } + book id="bk112" { + author "Galos, Mike" + title "Visual Studio 7: A Comprehensive Guide" + genre "Computer" + price "49.95" + publish_date "2001-04-16" + description " + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + " + } +}