Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve schema compiler performance #1396

Merged
merged 1 commit into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ final class ProcessorFactory private (
codeGenerator
}

override def isError: Boolean = sset.isError
override lazy val isError: Boolean = sset.isError

def withDistinguishedRootNode(name: String, namespace: String): ProcessorFactory = {
Assert.usage(name ne null)
Expand Down Expand Up @@ -378,7 +378,13 @@ class Compiler private (
tunables
)
}

// It is tempting to call pf.isError here to drive compilation to completion before we
// return the pf to the caller.
// However, this slows down TDML-based testing in Daffodil substantially by moving
// the entire isError checking pass inside the synchronized block of the Daffodil
// schema compiler. This results in reduced concurrency which substantially slows
// the daffodil test suite.
// pf.isError // don't call this here. Call it outside the synchronized block.
pf
}

Expand Down Expand Up @@ -409,9 +415,16 @@ object Compiler {
optRootName: Option[String],
optRootNamespace: Option[String]
): ProcessorFactory = {
synchronized {
val pf = synchronized {
c.compileSourceInternal(schemaSource, optRootName, optRootNamespace)
}
// Force all compilation to complete. Called here outside of synchronized block on purpose
// to avoid over-serializing things (which would slow down large test suites like Daffodil's test suite.)
// Note that this requires that the shared data structures which require Daffodil schema compilation to
// be serialized do *not* include the data structures being modified during isError processing (which is
// lots of OOLAG evaluations).
pf.isError
pf
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,18 @@ final class DFDLSchemaFile(
private lazy val errHandler = new DFDLSchemaFileLoadErrorHandler(schemaFileLocation)
private lazy val loader = new DaffodilXMLLoader(errHandler)

lazy val isValidAsCompleteDFDLSchema: Boolean = LV('isValidAsCompleteDFDLSchema) {
try {
loader.validateAsCompleteDFDLSchema(schemaSource)
} catch {
case e: org.xml.sax.SAXParseException =>
errHandler.error(e) // accumulate with error handler.
} finally {
errHandler.handleLoadErrors(this)
}
true
}.value

lazy val iiXMLSchemaDocument = LV('iiXMLSchemaDocument) {
val res = makeXMLSchemaDocument(seenBefore, Some(this))
if (res.isDFDLSchema && sset.shouldValidateDFDLSchemas) {
Expand All @@ -213,15 +225,16 @@ final class DFDLSchemaFile(
// Some things, tests generally, want to turn this validation off.
//
try {
loader.validateAsDFDLSchema(schemaSource)
mbeckerle marked this conversation as resolved.
Show resolved Hide resolved
loader.validateAsIndividualDFDLSchemaFile(schemaSource)
} catch {
// validateAsDFDLSchema doesn't always capture all exceptions in the
// validateAsIndividualDFDLSchemaFile doesn't always capture all exceptions in the
// loader's error handler. Even for fatal errors it sometimes
// just throws them.
case e: org.xml.sax.SAXParseException =>
errHandler.error(e) // accumulate with error handler.
} finally {
errHandler.handleLoadErrors(this)
}
errHandler.handleLoadErrors(this)
}
res
}.value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,21 +171,34 @@ final class SchemaSet private (
lazy val schemaFileList = schemas.map(s => s.uriString)

private lazy val isValid: Boolean = {
//
// We use keepGoing here, because we want to gather a validation error,
// suppress further propagation of it, and return false.
//
val isV = OOLAG.keepGoing(false) {
val files = allSchemaFiles
val fileValids = files.map {
_.isValid
if (!shouldValidateDFDLSchemas)
true // pretend it's valid though for some specific tests it may not be
else {
//
// We use keepGoing here, because we want to gather a validation error,
// suppress further propagation of it, and return false.
//
val isEachFileIndividuallyValid = OOLAG.keepGoing(false) {
val files = allSchemaFiles
val fileValids = files.map {
_.isValid
}
val res = fileValids.length > 0 && fileValids.fold(true) {
_ && _
}
res
}
val res = fileValids.length > 0 && fileValids.fold(true) {
_ && _
val isEntireSchemaValidAsAnXSD: Boolean = OOLAG.keepGoing(false) {
this.root.xmlSchemaDocument.schemaFile
.map { primaryDfdlSchemaFile =>
primaryDfdlSchemaFile.isValidAsCompleteDFDLSchema
}
.getOrElse(true)
}

val res = isEachFileIndividuallyValid && isEntireSchemaValidAsAnXSD
res
}
isV
}

lazy val validationDiagnostics = {
Expand Down Expand Up @@ -369,7 +382,7 @@ final class SchemaSet private (
* Or, you can leave the root unspecified, and this method will determine it from the
* first element declaration of the first schema file.
*/
lazy val root: Root = {
lazy val root: Root = LV('root) {
val re: GlobalElementDecl =
optPFRootSpec match {
case Some(rs) =>
Expand All @@ -393,7 +406,7 @@ final class SchemaSet private (
case _ => Assert.invariantFailed("illegal combination of root element specifications")
}
re.asRoot
}
}.value

/**
* Retrieve schema by namespace
Expand Down Expand Up @@ -670,7 +683,7 @@ final class SchemaSet private (
* and finally the AST objects are checked for errors, which recursively
* demands that all other aspects of compilation occur.
*/
override def isError: Boolean = {
override lazy val isError: Boolean = {
if (!isValid) true
else if (
// use keepGoing so we can capture errors and
Expand All @@ -681,6 +694,15 @@ final class SchemaSet private (
}
) true
else {
// we must check for errors here via the super method, as that iterates over
// all the objects evaluating them for any errors in their required evaluations.
// This has to be after everything else that could report an error here (on some
// other object) has been done.
//
// That is to say, if we called super.isError at the top of this method that would
// be incorrect since isValid and areComponentsConstructed above might cause errors
// to be recorded or objects created that this call to super.isError would then not
// take into account.
val hasErrors = super.isError
if (!hasErrors) {
// must be called after compilation is done
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,24 @@ trait SchemaSetRuntime1Mixin {
}.value

lazy val parser = LV('parser) {
val par = if (generateParser) root.document.parser else new NotParsableParser(root.erd)
val par =
if (generateParser) root.document.parser
else new NotParsableParser(root.erd)
Processor.initialize(par)
par
}.value

lazy val unparser = LV('unparser) {
val unp =
if (generateUnparser) root.document.unparser else new NotUnparsableUnparser(root.erd)
if (generateUnparser) root.document.unparser
else new NotUnparsableUnparser(root.erd)
Processor.initialize(unp)
unp
}.value

private lazy val layerRuntimeCompiler = new LayerRuntimeCompiler

lazy val allLayers: Seq[LayerRuntimeData] = LV('allLayers) {
private lazy val allLayers: Seq[LayerRuntimeData] = LV('allLayers) {
val lrds: Seq[LayerRuntimeData] = self.allSchemaComponents
.collect {
case stb: SequenceTermBase if (stb.isLayered) => stb
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,9 @@ class ProcessorFactory private[japi] (private var pf: SProcessorFactory)
*/
def onPath(path: String) = {
val dp = pf.onPath(path).asInstanceOf[SDataProcessor]
new DataProcessor(dp)
val res = new DataProcessor(dp)
res.isError // ensure all errors have been detected before we return the DP
mbeckerle marked this conversation as resolved.
Show resolved Hide resolved
res
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1252,7 +1252,7 @@ public void testJavaAPI27() throws IOException {
Throwable cause = e.getCause();
assertTrue(cause.toString().contains("Must call isError"));
assertTrue(cause.getCause().toString().contains("Schema Definition Error"));
assertTrue(cause.getCause().toString().contains("Cannot resolve the name 'tns:nonExistent'"));
assertTrue(cause.getCause().toString().contains("tns:nonExistent"));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,16 +478,13 @@
}

/**
* Currently we depend on being able to evaluate these
* repeatedly, and get different answers.
*
* because it forces evaluation of all the requiredEvaluationsAlways(...)
* Forces evaluation of all the requiredEvaluationsAlways(...)
* or requiredEvaluationsIfActivated(...)
* on all objects first, but that is only for the objects
* that have been created and activated at the time this is called.
*/

def isError: Boolean = {
def isError: Boolean = isErrorOnce
private lazy val isErrorOnce: Boolean = {
oolagRoot.checkErrors
val errorCount = oolagRoot.errors.size
errorCount > 0
Expand All @@ -511,18 +508,27 @@
sealed abstract class OOLAGValueBase(
val oolagContext: OOLAGHost,
nameArg: String,
body: => Any
bodyArg: => Any
) {

// SCHEMA COMPILER PERFORMANCE INSTRUMENTATION
// To get timing on every OOLAG LV, use these 3 lines to define the 'body' lazy var.
// private lazy val bodyOnce = bodyArg
// private lazy val timedBody = TimeTracker.track(name) { bodyOnce }
// private lazy val body = timedBody
// You will also need to call TimeTracker.logTimes() at the end of Compiler.compileSource.
private lazy val body = bodyArg

Assert.usage(oolagContext != null)

final lazy val name = nameArg

private var alreadyTriedThis = false
private var errorAlreadyHandled: Maybe[ErrorAlreadyHandled] = Nope
private var alreadyTriedThis: Maybe[AlreadyTried] = Nope
protected final def hasValue: Boolean = value_.isDefined
private var value_ : Maybe[AnyRef] = Nope

protected final def wasTried = alreadyTriedThis
private final def wasTried = alreadyTriedThis.isDefined

// private def warn(th: Diagnostic): Unit = oolagContext.warn(th)
private def error(th: Diagnostic): Unit = oolagContext.error(th)
Expand All @@ -549,9 +555,9 @@
}
}

override def toString = thisThing
override def toString: String = thisThing

protected final def toss(th: Throwable) = {
protected final def toss(th: Throwable): Nothing = {
throw th
}

Expand Down Expand Up @@ -603,19 +609,24 @@
// Typically this will be for a Schema Definition Error
//
Assert.invariant(hasValue == false)
Assert.invariant(alreadyTriedThis == true)
Assert.invariant(alreadyTriedThis.isDefined)
Assert.invariant(errorAlreadyHandled.isEmpty)

Logger.log.trace(" " * indent + s"${thisThing} has no value due to ${e}")
error(e)
//
// Catch this if you can carry on with more error gathering
// from other contexts. Otherwise just let it propagate.
//
toss(new ErrorAlreadyHandled(e, this))
val eah = new ErrorAlreadyHandled(e, this)
errorAlreadyHandled = One(eah) // in theory, saving this doesn't matter.
toss(eah)
}
case e @ ErrorsNotYetRecorded(diags) => {
Assert.invariant(alreadyTriedThis.isDefined)
Assert.invariant(!hasValue)
diags.foreach { error(_) }
toss(new AlreadyTried(this))
toss(alreadyTriedThis.get)

Check warning on line 629 in daffodil-lib/src/main/scala/org/apache/daffodil/lib/oolag/OOLAG.scala

View check run for this annotation

Codecov / codecov/patch

daffodil-lib/src/main/scala/org/apache/daffodil/lib/oolag/OOLAG.scala#L629

Added line #L629 was not covered by tests
}
case th => toss(th)
}
Expand All @@ -639,12 +650,12 @@
Logger.log.trace(" " * indent + s"LV: ${thisThing} CIRCULAR")
toss(c)
}
if (alreadyTriedThis) {
if (alreadyTriedThis.isDefined) {
Logger.log.trace(" " * indent + s"LV: ${thisThing} was tried and failed")
val e = AlreadyTried(this)
val e = alreadyTriedThis.get
toss(e)
}
alreadyTriedThis = true
alreadyTriedThis = One(AlreadyTried(this))
Logger.log.trace(" " * indent + s"Evaluating ${thisThing}")
}

Expand All @@ -660,7 +671,7 @@
oolagContext.currentOVList = oolagContext.currentOVList.tail
}

final def hasError = alreadyTriedThis && !hasValue
final def hasError = alreadyTriedThis.isDefined && !hasValue

/**
* forces the value, then boolean result tells you if
Expand All @@ -670,7 +681,7 @@

final def isError = {
val res =
if (alreadyTriedThis) !hasValue
if (alreadyTriedThis.isDefined) !hasValue
else {
try {
valueAsAny
Expand All @@ -687,22 +698,26 @@

final lazy val valueAsAny: Any = {
if (hasValue) value_.get
val res =
try {
oolagBefore
val v = body // good place for a breakpoint
oolagAfterValue(v.asInstanceOf[AnyRef])
v
} catch {
case npe: NullPointerException => throw npe
case s: scala.util.control.ControlThrowable => throw s
case u: UnsuppressableException => throw u
case e: Error => throw e
case th: Throwable => oolagCatch(th)
} finally {
oolagFinalize
}
res
else {
mbeckerle marked this conversation as resolved.
Show resolved Hide resolved
// egad.... on 2024-12-19 this was always re-evaluating due to missing `else` keyword.
// this should make a substantial difference in schema compilation time.
val res =
try {
oolagBefore
val v = body // good place for a breakpoint
oolagAfterValue(v.asInstanceOf[AnyRef])
v
} catch {
case npe: NullPointerException => throw npe
case s: scala.util.control.ControlThrowable => throw s
case u: UnsuppressableException => throw u
case e: Error => throw e
case th: Throwable => oolagCatch(th)
} finally {
oolagFinalize
}
res
}
}

protected lazy val toOptionAny: Option[Any] = {
Expand Down
Loading