From 47cfb593183af853a1dbeae347228f5307360c94 Mon Sep 17 00:00:00 2001 From: Paul Date: Tue, 18 Sep 2018 19:32:57 +0200 Subject: [PATCH] Adding missing XML Namespace support to XmlApplier (exactly the same way as it is implemented on XmlDomExtractor) --- .../Xml/XmlApplier.cs | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Xml/XmlApplier.cs b/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Xml/XmlApplier.cs index 12af7a8..c7111be 100644 --- a/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Xml/XmlApplier.cs +++ b/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Xml/XmlApplier.cs @@ -51,6 +51,12 @@ public class XmlApplier : IApplier /// For each column, map from the XML path to the column name private SqlMap columnPaths; + /// Map namespace prefixes to namespace URIs + /// If you have a default namespace (without prefix) in your XML document, + /// provide a prefix in the map for that namespace URI and use that prefix in the + /// XPath expression to select the nodes that are in the default namespace. + private SqlMap namespaceDecls; + /// New instances are constructed at least once per vertex /// In the input row, the name of the column containing XML. The column must be a string. /// Path of the XML element that contains rows. @@ -58,11 +64,18 @@ public class XmlApplier : IApplier /// It is specified relative to the row element. /// Arguments to appliers must not be column references. /// The arguments must be able to be calculated at compile time. - public XmlApplier(string xmlColumnName, string rowPath, SqlMap columnPaths) + /// For each namespace URI in the document that you want to query, map the prefix to the namespace URI. + /// If you have a default namespace (without prefix) in your XML document, + /// provide a prefix in the map for that namespace URI and use that prefix in the + /// XPath expression to select the nodes that are in the default namespace. + /// If there is no namespace URI in the document, the map can be left null. + /// Do not rely on static fields because their values will not cross vertices. + public XmlApplier(string xmlColumnName, string rowPath, SqlMap columnPaths, SqlMap namespaceDecls = null) { this.xmlColumnName = xmlColumnName; this.rowPath = rowPath; this.columnPaths = columnPaths; + this.namespaceDecls = namespaceDecls; } /// Apply is called at least once per instance @@ -81,17 +94,28 @@ public override IEnumerable Apply(IRow input, IUpdatableRow output) { throw new ArgumentException(string.Format("Column '{0}' must be of type 'string', not '{1}'", column.Name, column.Type.Name)); } - // TODO: Add XML Namespace support and allow document fragments (should also be supported on XmlDomExtractor!). + XmlDocument xmlDocument = new XmlDocument(); xmlDocument.LoadXml(input.Get(this.xmlColumnName)); - foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath)) + XmlNamespaceManager nsmanager = new XmlNamespaceManager(xmlDocument.NameTable); + + // If namespace declarations have been provided, add them to the namespace manager + if (this.namespaceDecls != null) + { + foreach (var namespaceDecl in this.namespaceDecls) + { + nsmanager.AddNamespace(namespaceDecl.Key, namespaceDecl.Value); + } + } + + foreach (XmlNode xmlNode in xmlDocument.DocumentElement.SelectNodes(this.rowPath, nsmanager)) { // IUpdatableRow implements a builder pattern to save memory allocations, // so call output.Set in a loop foreach(IColumn col in output.Schema) { var explicitColumnMapping = this.columnPaths.FirstOrDefault(columnPath => columnPath.Value == col.Name); - XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name); + XmlNode xml = xmlNode.SelectSingleNode(explicitColumnMapping.Key ?? col.Name, nsmanager); output.Set(explicitColumnMapping.Value ?? col.Name, xml == null ? null : xml.InnerXml); }