diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilder.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilder.kt index c4157ff013..b01b6ce5c1 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilder.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilder.kt @@ -28,6 +28,8 @@ package de.fraunhofer.aisec.cpg.graph import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend import de.fraunhofer.aisec.cpg.graph.Node.Companion.EMPTY_NAME import de.fraunhofer.aisec.cpg.graph.NodeBuilder.log +import de.fraunhofer.aisec.cpg.graph.scopes.Scope +import de.fraunhofer.aisec.cpg.graph.scopes.Symbol import de.fraunhofer.aisec.cpg.graph.statements.* /** @@ -329,3 +331,28 @@ fun MetadataProvider.newDefaultStatement(rawNode: Any? = null): DefaultStatement log(node) return node } + +/** + * Creates a new [LookupScopeStatement]. The [MetadataProvider] receiver will be used to fill + * different meta-data using [Node.applyMetadata]. Calling this extension function outside of Kotlin + * requires an appropriate [MetadataProvider], such as a [LanguageFrontend] as an additional + * prepended argument. + */ +@JvmOverloads +fun MetadataProvider.newLookupScopeStatement( + symbols: List, + targetScope: Scope?, + rawNode: Any? = null +): LookupScopeStatement { + val node = LookupScopeStatement() + node.targetScope = targetScope + node.applyMetadata(this, EMPTY_NAME, rawNode, true) + + // Add it to our scope + for (symbol in symbols) { + node.scope?.predefinedLookupScopes[symbol] = node + } + + log(node) + return node +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt index 6194f54495..e128ecd112 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt @@ -32,6 +32,8 @@ import de.fraunhofer.aisec.cpg.graph.Node.Companion.TO_STRING_STYLE import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.ImportDeclaration import de.fraunhofer.aisec.cpg.graph.statements.LabelStatement +import de.fraunhofer.aisec.cpg.graph.statements.LookupScopeStatement +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.helpers.neo4j.NameConverter import org.apache.commons.lang3.builder.ToStringBuilder import org.neo4j.ogm.annotation.GeneratedValue @@ -90,6 +92,16 @@ abstract class Scope( */ @Transient var wildcardImports: MutableSet = mutableSetOf() + /** + * In some languages, the lookup scope of a symbol that is being resolved (e.g. of a + * [Reference]) can be adjusted through keywords (such as `global` in Python or PHP). + * + * We store this information in the form of a [LookupScopeStatement] in the AST, but we need to + * also store this information in the scope to avoid unnecessary AST traversals when resolving + * symbols using [lookupSymbol]. + */ + @Transient var predefinedLookupScopes: MutableMap = mutableMapOf() + /** Adds a [declaration] with the defined [symbol]. */ fun addSymbol(symbol: Symbol, declaration: Declaration) { if (declaration is ImportDeclaration && declaration.wildcardImport) { @@ -123,8 +135,16 @@ abstract class Scope( replaceImports: Boolean = true, predicate: ((Declaration) -> Boolean)? = null ): List { - // First, try to look for the symbol in the current scope - var scope: Scope? = this + // First, try to look for the symbol in the current scope (unless we have a predefined + // search scope). In the latter case we also need to restrict the lookup to the search scope + var modifiedScoped = this.predefinedLookupScopes[symbol]?.targetScope + var scope: Scope? = + if (modifiedScoped != null) { + modifiedScoped + } else { + this + } + var list: MutableList? = null while (scope != null) { @@ -154,10 +174,11 @@ abstract class Scope( } // If we do not have a hit, we can go up one scope, unless thisScopeOnly is set to true - if (!thisScopeOnly) { - scope = scope.parent - } else { + // (or we had a modified scope) + if (thisScopeOnly || modifiedScoped != null) { break + } else { + scope = scope.parent } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/LookupScopeStatement.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/LookupScopeStatement.kt new file mode 100644 index 0000000000..b0dd317af5 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/LookupScopeStatement.kt @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph.statements + +import de.fraunhofer.aisec.cpg.graph.newLookupScopeStatement +import de.fraunhofer.aisec.cpg.graph.scopes.Scope +import de.fraunhofer.aisec.cpg.graph.scopes.Symbol +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference +import java.util.Objects + +/** + * This statement modifies the lookup scope of one or more [Reference] nodes (or more precise it's + * symbols) within the current [Scope]. The most prominent example of this are the Python `global` + * and `nonlocal` keywords. + * + * This node itself does not implement the actual functionality. It is necessary to add this node + * (or the information therein) to [Scope.predefinedLookupScopes]. The reason for this is that we + * want to avoid AST traversals in the scope/identifier lookup. + * + * The [newLookupScopeStatement] node builder will add this automatically, so it is STRONGLY + * encouraged that the node builder is used instead of creating the node itself. + */ +class LookupScopeStatement : Statement() { + + /** The symbols this statement affects. */ + var symbols: List = listOf() + + /** The target scope to which the references are referring to. */ + var targetScope: Scope? = null + + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (other !is LookupScopeStatement) return false + return super.equals(other) && symbols == other.symbols && targetScope == other.targetScope + } + + override fun hashCode() = Objects.hash(super.hashCode(), symbols, targetScope) +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt index 5b64513ea5..04bb795221 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt @@ -157,6 +157,9 @@ open class EvaluationOrderGraphPass(ctx: TranslationContext) : TranslationUnitPa map[TypeIdExpression::class.java] = { handleDefault(it) } map[Reference::class.java] = { handleDefault(it) } map[LambdaExpression::class.java] = { handleLambdaExpression(it as LambdaExpression) } + map[LookupScopeStatement::class.java] = { + handleLookupScopeStatement(it as LookupScopeStatement) + } } protected fun doNothing() { @@ -1019,6 +1022,12 @@ open class EvaluationOrderGraphPass(ctx: TranslationContext) : TranslationUnitPa nextEdgeBranch = false } + private fun handleLookupScopeStatement(stmt: LookupScopeStatement) { + // Include the node as part of the EOG itself, but we do not need to go into any children or + // properties here + pushToEOG(stmt) + } + companion object { protected val LOGGER = LoggerFactory.getLogger(EvaluationOrderGraphPass::class.java) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index 6a3d8707a3..ef5f1cb8e0 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -180,6 +180,11 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // resolution, but in future this will also be used in resolving regular references. current.candidates = scopeManager.findSymbols(current.name, current.location).toSet() + // Preparation for a future without legacy call resolving. Taking the first candidate is not + // ideal since we are running into an issue with function pointers here (see workaround + // below). + var wouldResolveTo = current.candidates.singleOrNull() + // For now, we need to ignore reference expressions that are directly embedded into call // expressions, because they are the "callee" property. In the future, we will use this // property to actually resolve the function call. However, there is a special case that @@ -189,21 +194,38 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // of this call expression back to its original variable declaration. In the future, we want // to extend this particular code to resolve all callee references to their declarations, // i.e., their function definitions and get rid of the separate CallResolver. - var wouldResolveTo: Declaration? = null if (current.resolutionHelper is CallExpression) { // Peek into the declaration, and if it is only one declaration and a variable, we can // proceed normally, as we are running into the special case explained above. Otherwise, // we abort here (for now). - wouldResolveTo = current.candidates.singleOrNull() if (wouldResolveTo !is VariableDeclaration && wouldResolveTo !is ParameterDeclaration) { return } } + // Some stupid C++ workaround to use the legacy call resolver when we try to resolve targets + // for function pointers. At least we are only invoking the legacy resolver for a very small + // percentage of references now. + if (wouldResolveTo is FunctionDeclaration) { + // We need to invoke the legacy resolver, just to be sure + var legacy = scopeManager.resolveReference(current) + + // This is just for us to catch these differences in symbol resolving in the future. The + // difference is pretty much only that the legacy system takes parameters of the + // function-pointer-type into account and the new system does not (yet), because it just + // takes the first match. This will be needed to solve in the future. + if (legacy != wouldResolveTo) { + log.warn( + "The legacy symbol resolution and the new system produced different results here. This needs to be investigated in the future. For now, we take the legacy result." + ) + wouldResolveTo = legacy + } + } + // Only consider resolving, if the language frontend did not specify a resolution. If we // already have populated the wouldResolveTo variable, we can re-use this instead of // resolving again - var refersTo = current.refersTo ?: wouldResolveTo ?: scopeManager.resolveReference(current) + var refersTo = current.refersTo ?: wouldResolveTo var recordDeclType: Type? = null if (currentClass != null) { diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilderTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilderTest.kt new file mode 100644 index 0000000000..340362daf8 --- /dev/null +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilderTest.kt @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph + +import de.fraunhofer.aisec.cpg.ScopeManager +import de.fraunhofer.aisec.cpg.TranslationConfiguration +import de.fraunhofer.aisec.cpg.TranslationContext +import de.fraunhofer.aisec.cpg.TypeManager +import de.fraunhofer.aisec.cpg.frontends.TestLanguageFrontend +import de.fraunhofer.aisec.cpg.graph.builder.translationResult +import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope +import de.fraunhofer.aisec.cpg.test.assertRefersTo +import kotlin.test.Test +import kotlin.test.assertIs +import kotlin.test.assertNotNull + +class StatementBuilderTest { + @Test + fun testNewLookupScopeStatement() { + val frontend = + TestLanguageFrontend( + ctx = + TranslationContext( + TranslationConfiguration.builder().defaultPasses().build(), + ScopeManager(), + TypeManager() + ) + ) + val result = + frontend.build { + translationResult { + var tu = + with(frontend) { + var tu = newTranslationUnitDeclaration("main.file") + scopeManager.resetToGlobal(tu) + + var globalA = newVariableDeclaration("a") + scopeManager.addDeclaration(globalA) + + var func = newFunctionDeclaration("main") + scopeManager.enterScope(func) + + var body = newBlock() + scopeManager.enterScope(body) + + var localA = newVariableDeclaration("a") + var stmt = newDeclarationStatement() + stmt.declarations += localA + scopeManager.addDeclaration(localA) + body += stmt + + body += newLookupScopeStatement(listOf("a"), scopeManager.globalScope) + body += newReference("a") + + scopeManager.leaveScope(body) + func.body = body + scopeManager.leaveScope(func) + + scopeManager.addDeclaration(func) + scopeManager.leaveScope(tu) + tu + } + + components.firstOrNull()?.translationUnits?.add(tu) + } + } + + val globalA = result.variables["a"] + assertNotNull(globalA) + assertIs(globalA.scope) + + val a = result.refs["a"] + assertRefersTo(a, globalA) + } +} diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/ScopeTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/ScopeTest.kt new file mode 100644 index 0000000000..77f6db5b6a --- /dev/null +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/ScopeTest.kt @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph.scopes + +import de.fraunhofer.aisec.cpg.graph.Name +import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration +import de.fraunhofer.aisec.cpg.graph.statements.LookupScopeStatement +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Block +import kotlin.test.Test +import kotlin.test.assertEquals + +class ScopeTest { + @Test + fun testLookup() { + // some mock variable declarations, global and local + var globalA = VariableDeclaration() + globalA.name = Name("a") + var localA = VariableDeclaration() + localA.name = Name("a") + + // two scopes, global and local + val globalScope = GlobalScope() + globalScope.addSymbol("a", globalA) + val scope = BlockScope(Block()) + scope.parent = globalScope + scope.addSymbol("a", localA) + + // if we try to resolve "a" now, this should point to the local A since we start there and + // move upwards + var result = scope.lookupSymbol("a") + assertEquals(listOf(localA), result) + + // now, we pretend to have a lookup scope modifier for a symbol, e.g. through "global" in + // Python + var stmt = LookupScopeStatement() + stmt.targetScope = globalScope + stmt.symbols = listOf("a") + scope.predefinedLookupScopes["a"] = stmt + + // let's try the lookup again, this time it should point to the global A + result = scope.lookupSymbol("a") + assertEquals(listOf(globalA), result) + } +}