Skip to content

Commit

Permalink
Fix ProcessorFactory.withDistinguishedRootNode and isError
Browse files Browse the repository at this point in the history
When calling withDistinguishedRootNode, we copy the ProcessorFactory and
replace the root spec with the new node. When we do this, we also copy
the existing SchemaSet. But this SchemaSet was created with the previous
root spec, which means withDistinguishedRootNode doesn't actually do
anything.

Fortunately, there is a workaround which is to pass in the root
name/namspace to the compileSource/File functions, so that the root spec
is available when the SchemaSet is created, and avoid
withDistinguishedRootNode entirely.

But to fix this, this modifies withDistinguishedRootNode to not copy the
SchemaSet, so a new SchemaSet is created and initialized with the
correct root spec. Each ProcessorFactory now has a unique SchemaSet
instance with the correct root name and namespace.

This also discovered cases in the Compiler.compileSourceInternal and
JAPI/SAPI compileFile functions where they called isError on the newly
created ProcessorFactory before returning it to the caller. This meant
that the SchemaSet inside the ProcessorFactory would be forced to be
created, even though it might never be used, like in the case of
withDistinguishedRootNode being called and a different SchemaSet being
created.

Really the isError function should only ever be called by the user when
they are ready for the object to be evaluated, and not forced by
Daffodil internals. This ensures we avoid unnecessary work until the
user is ready, since isError is what eventually causes lazy vals to be
evaluated. However, this does mean that if a user doesn't check isError
they might get exceptions/assertions, but it is well documented in the
JAPI/SAPI that isError should be checked before calling other functions.

This also means that some debug logging that relies on knowing that
status of isError must be moved elsewhere, since the user might not have
called isError yet.

This also discovered instances in the TDMLRunner and CLI where we
queried diagnostics before checking isError--isError should always be
done first, which is fixed.

That said, this also modifies ProcessorFactory.getDiagnostics so that it
calls isError before returning the diagnostics. This way if user calls
getDiagnostics without first calling isError (like we used to do in the
TDMLRunner and CLI), the compilation work will be done and correct
diagnostics generated. Without this, there would be no diagnostics
because no work would be done yet. Although technically a user should
call isError before asking for diagnostics, there may be cases where
users don't, so this allows that to keep working as expected. This is
similar to how onPath calls isError even if a user doesn't.

DAFFODIL-2864, DAFFODIL-697
  • Loading branch information
stevedlawrence committed Dec 7, 2023
1 parent d66a8b4 commit b897694
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 32 deletions.
14 changes: 13 additions & 1 deletion daffodil-cli/src/main/scala/org/apache/daffodil/cli/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ import org.apache.daffodil.runtime1.debugger.TraceDebuggerRunner
import org.apache.daffodil.runtime1.externalvars.ExternalVariablesLoader
import org.apache.daffodil.runtime1.layers.LayerExecutionException
import org.apache.daffodil.runtime1.processors.DataLoc
import org.apache.daffodil.runtime1.processors.DataProcessor
import org.apache.daffodil.runtime1.processors.ExternalVariableException
import org.apache.daffodil.runtime1.udf.UserDefinedFunctionFatalErrorException
import org.apache.daffodil.slf4j.DaffodilLogger
Expand Down Expand Up @@ -960,6 +961,11 @@ class Main(
// saved processor. Those are from compile time, are all warnings, and
// are just noise in a production system where we're reloading a parser.
if (!processor.isError) {
Logger.log.whenDebugEnabled {
val processorImpl = processor.asInstanceOf[DataProcessor]
Logger.log.debug(s"Parser = ${processorImpl.ssrd.parser.toString}")
Logger.log.debug(s"Unparser = ${processorImpl.ssrd.unparser.toString}")
}
Some(processor.withValidationMode(mode))
} else {
None
Expand Down Expand Up @@ -1024,10 +1030,16 @@ class Main(
val processorFactory = compiler.compileSource(schemaSource)
if (!processorFactory.isError) {
val processor = processorFactory.onPath(path.getOrElse("/")).withValidationMode(mode)
displayDiagnostics(processor)
if (processor.isError) {
displayDiagnostics(processor)
None
} else {
displayDiagnostics(processor)
Logger.log.whenDebugEnabled {
val processorImpl = processor.asInstanceOf[DataProcessor]
Logger.log.debug(s"Parser = ${processorImpl.ssrd.parser.toString}")
Logger.log.debug(s"Unparser = ${processorImpl.ssrd.unparser.toString}")
}
Some(processor)
}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import org.apache.daffodil.lib.api.Diagnostic
import org.apache.daffodil.lib.api.URISchemaSource
import org.apache.daffodil.lib.api.UnitTestSchemaSource
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.util.Logger
import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.runtime1.api.DFDL
import org.apache.daffodil.runtime1.processors.DataProcessor
Expand All @@ -65,7 +64,6 @@ final class ProcessorFactory private (
val validateDFDLSchemas: Boolean,
checkAllTopLevel: Boolean,
tunables: DaffodilTunables,
optSchemaSet: Option[SchemaSet],
) extends DFDL.ProcessorFactory {

def this(
Expand All @@ -82,7 +80,6 @@ final class ProcessorFactory private (
validateDFDLSchemas,
checkAllTopLevel,
tunables,
None,
)

private def copy(optRootSpec: Option[RootSpec] = optRootSpec): ProcessorFactory =
Expand All @@ -92,19 +89,29 @@ final class ProcessorFactory private (
validateDFDLSchemas,
checkAllTopLevel,
tunables,
Some(sset),
)

lazy val sset: SchemaSet =
optSchemaSet.getOrElse(
SchemaSet(optRootSpec, schemaSource, validateDFDLSchemas, checkAllTopLevel, tunables),
)
SchemaSet(optRootSpec, schemaSource, validateDFDLSchemas, checkAllTopLevel, tunables)

lazy val rootView: RootView = sset.root

def elementBaseInstanceCount: Long = sset.elementBaseInstanceCount

def diagnostics: Seq[Diagnostic] = sset.diagnostics
def diagnostics: Seq[Diagnostic] = {
// The work to compile a schema and build diagnostics is triggered by the user calling
// isError. But if a user gets diagnostics before doing so, then no work will have been done
// and the diagnostics will be empty. Technically this is incorrect usage--a user should
// always call isError before getting diagnostics. But there are known instances where users
// have done this. We could detect this and throw a usage assertion so users know to fix it,
// but if they want diagnostics then they likely expected the work to have been done
// already, so lets just call isError to trigger that work so they get what they expect.
// Note that we don't check the result of isError, since it is perfectly reasonable for
// errors to exist when a user asks for diagnostics--we only call it for its side-effects.
isError
sset.diagnostics
}

def getDiagnostics: Seq[Diagnostic] = diagnostics

override def onPath(xpath: String): DFDL.DataProcessor = sset.onPath(xpath)
Expand Down Expand Up @@ -359,21 +366,6 @@ class Compiler private (
)
}

val err = pf.isError
val diags = pf.getDiagnostics // might be warnings even if not isError
if (err) {
Assert.invariant(diags.nonEmpty)
Logger.log.debug(
s"Compilation (ProcessorFactory) produced ${diags.length} errors/warnings.",
)
} else {
if (diags.nonEmpty) {
Logger.log.debug(s"Compilation (ProcessorFactory) produced ${diags.length} warnings.")
} else {
Logger.log.debug(s"ProcessorFactory completed with no errors.")
}
}
Logger.log.debug(s"Schema had ${pf.elementBaseInstanceCount} elements.")
pf
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,6 @@ trait SchemaSetRuntime1Mixin {
)
val dataProc =
new DataProcessor(ssrd, tunable, variableMap.copy(), diagnostics = this.diagnostics)
if (dataProc.isError) {} else {
Logger.log.debug(s"Parser = ${ssrd.parser.toString}.")
Logger.log.debug(s"Unparser = ${ssrd.unparser.toString}.")
Logger.log.debug(s"Compilation (DataProcesor) completed with no errors.")
}
dataProc
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ class Compiler private[japi] (private var sCompiler: SCompiler) {
): ProcessorFactory = {

val pf = sCompiler.compileFile(schemaFile, Option(rootName), Option(rootNamespace))
pf.isError
new ProcessorFactory(pf)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1346,4 +1346,59 @@ public void testJavaAPIBlob1() throws IOException, ClassNotFoundException, Inval
Files.delete(blobDir);
}
}

/**
* Verify that ProcessorFactory.withDistinguishedRootNode selects the right node
*/
@Test
public void testJavaAPIWithDistinguishedRootNode() throws IOException, ClassNotFoundException {
org.apache.daffodil.japi.Compiler c = Daffodil.compiler();

// e3 is defined first in mySchema3.dfdl.xsd, so if withDistinguishedRootNode is ignored,
// this should give a different result
java.io.File schemaFile = getResource("/test/japi/mySchema3.dfdl.xsd");
ProcessorFactory pf = c.compileFile(schemaFile);
pf = pf.withDistinguishedRootNode("e4", null);
DataProcessor dp = pf.onPath("/");

java.io.File file = getResource("/test/japi/myData16.dat");
java.io.FileInputStream fis = new java.io.FileInputStream(file);
InputSourceDataInputStream dis = new InputSourceDataInputStream(fis);
JDOMInfosetOutputter outputter = new JDOMInfosetOutputter();
ParseResult res = dp.parse(dis, outputter);
boolean err = res.isError();
assertFalse(err);
assertEquals(5, res.location().bytePos1b());
assertEquals(33, res.location().bitPos1b());

java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos);
JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult());
UnparseResult res2 = dp.unparse(inputter, wbc);
err = res2.isError();
assertFalse(err);
assertEquals("9100", bos.toString());
}

/***
* Verify that a user can get diagnostics without having to call isError
*
* @throws IOException
*/
@Test
public void testJavaAPIGetDiagnostics() throws IOException {
org.apache.daffodil.japi.Compiler c = Daffodil.compiler();
java.io.File schemaFile = new java.io.File("/test/japi/notHere1.dfdl.xsd");
ProcessorFactory pf = c.compileFile(schemaFile);
List<Diagnostic> diags = pf.getDiagnostics();
boolean found1 = false;
for (Diagnostic d : diags) {
if (d.getMessage().contains("notHere1")) {
found1 = true;
}
}
assertTrue(found1);
assertTrue(pf.isError());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ class Compiler private[sapi] (private var sCompiler: SCompiler) {
optRootNamespace: Option[String] = None,
): ProcessorFactory = {
val pf = sCompiler.compileFile(schemaFile, optRootName, optRootNamespace)
pf.isError
new ProcessorFactory(pf)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1325,4 +1325,56 @@ class TestScalaAPI {
Files.delete(blobDir)
}
}

/**
* Verify that ProcessorFactory.withDistinguishedRootNode selects the right node
*/
@Test
def testScalaAPIWithDistinguishedRootNode(): Unit = {
val c = Daffodil.compiler()

// e3 is defined first in mySchema3.dfdl.xsd, so if withDistinguishedRootNode is ignored,
// this should give a different result
val schemaFile = getResource("/test/sapi/mySchema3.dfdl.xsd")
val pf = c
.compileFile(schemaFile)
.withDistinguishedRootNode("e4", null)
val dp1 = pf.onPath("/")
val dp = reserializeDataProcessor(dp1)

val file = getResource("/test/sapi/myData16.dat")
val fis = new java.io.FileInputStream(file)
val input = new InputSourceDataInputStream(fis)
val outputter = new ScalaXMLInfosetOutputter()
val res = dp.parse(input, outputter)
val err = res.isError()
assertFalse(err)
assertEquals(5, res.location().bytePos1b())
assertEquals(33, res.location().bitPos1b())

val bos = new java.io.ByteArrayOutputStream()
val wbc = java.nio.channels.Channels.newChannel(bos)
val inputter = new ScalaXMLInfosetInputter(outputter.getResult)
val res2 = dp.unparse(inputter, wbc)
val err2 = res2.isError();
assertFalse(err2);
assertEquals("9100", bos.toString());
}

/**
* Verify that a user can get diagnostics without having to call isError
*/
@Test
def testScalaAPIGetDiagnostics(): Unit = {
val c = Daffodil.compiler()

val schemaFile = new java.io.File("/test/sapi/notHere1.dfdl.xsd")
val pf = c.compileFile(schemaFile)
val diags = pf.getDiagnostics
val found1 = diags.exists { _.getMessage().contains("notHere1") }

assertTrue(found1)
assertTrue(pf.isError())
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ final class TDMLDFDLProcessorFactory private (
optRootNamespace: Option[String],
): TDML.CompileResult = {
val pf = compiler.compileSource(schemaSource, optRootName, optRootNamespace)
val diags = pf.getDiagnostics
if (pf.isError) {
val diags = pf.getDiagnostics
Left(diags)
} else {
val res = this.generateProcessor(pf, useSerializedProcessor)
Expand Down

0 comments on commit b897694

Please sign in to comment.