diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilder.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilder.kt index c4157ff013..b01b6ce5c1 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilder.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilder.kt @@ -28,6 +28,8 @@ package de.fraunhofer.aisec.cpg.graph import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend import de.fraunhofer.aisec.cpg.graph.Node.Companion.EMPTY_NAME import de.fraunhofer.aisec.cpg.graph.NodeBuilder.log +import de.fraunhofer.aisec.cpg.graph.scopes.Scope +import de.fraunhofer.aisec.cpg.graph.scopes.Symbol import de.fraunhofer.aisec.cpg.graph.statements.* /** @@ -329,3 +331,28 @@ fun MetadataProvider.newDefaultStatement(rawNode: Any? = null): DefaultStatement log(node) return node } + +/** + * Creates a new [LookupScopeStatement]. The [MetadataProvider] receiver will be used to fill + * different meta-data using [Node.applyMetadata]. Calling this extension function outside of Kotlin + * requires an appropriate [MetadataProvider], such as a [LanguageFrontend] as an additional + * prepended argument. + */ +@JvmOverloads +fun MetadataProvider.newLookupScopeStatement( + symbols: List, + targetScope: Scope?, + rawNode: Any? = null +): LookupScopeStatement { + val node = LookupScopeStatement() + node.targetScope = targetScope + node.applyMetadata(this, EMPTY_NAME, rawNode, true) + + // Add it to our scope + for (symbol in symbols) { + node.scope?.predefinedLookupScopes[symbol] = node + } + + log(node) + return node +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt index 6194f54495..e128ecd112 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt @@ -32,6 +32,8 @@ import de.fraunhofer.aisec.cpg.graph.Node.Companion.TO_STRING_STYLE import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.ImportDeclaration import de.fraunhofer.aisec.cpg.graph.statements.LabelStatement +import de.fraunhofer.aisec.cpg.graph.statements.LookupScopeStatement +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.helpers.neo4j.NameConverter import org.apache.commons.lang3.builder.ToStringBuilder import org.neo4j.ogm.annotation.GeneratedValue @@ -90,6 +92,16 @@ abstract class Scope( */ @Transient var wildcardImports: MutableSet = mutableSetOf() + /** + * In some languages, the lookup scope of a symbol that is being resolved (e.g. of a + * [Reference]) can be adjusted through keywords (such as `global` in Python or PHP). + * + * We store this information in the form of a [LookupScopeStatement] in the AST, but we need to + * also store this information in the scope to avoid unnecessary AST traversals when resolving + * symbols using [lookupSymbol]. + */ + @Transient var predefinedLookupScopes: MutableMap = mutableMapOf() + /** Adds a [declaration] with the defined [symbol]. */ fun addSymbol(symbol: Symbol, declaration: Declaration) { if (declaration is ImportDeclaration && declaration.wildcardImport) { @@ -123,8 +135,16 @@ abstract class Scope( replaceImports: Boolean = true, predicate: ((Declaration) -> Boolean)? = null ): List { - // First, try to look for the symbol in the current scope - var scope: Scope? = this + // First, try to look for the symbol in the current scope (unless we have a predefined + // search scope). In the latter case we also need to restrict the lookup to the search scope + var modifiedScoped = this.predefinedLookupScopes[symbol]?.targetScope + var scope: Scope? = + if (modifiedScoped != null) { + modifiedScoped + } else { + this + } + var list: MutableList? = null while (scope != null) { @@ -154,10 +174,11 @@ abstract class Scope( } // If we do not have a hit, we can go up one scope, unless thisScopeOnly is set to true - if (!thisScopeOnly) { - scope = scope.parent - } else { + // (or we had a modified scope) + if (thisScopeOnly || modifiedScoped != null) { break + } else { + scope = scope.parent } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/LookupScopeStatement.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/LookupScopeStatement.kt new file mode 100644 index 0000000000..b0dd317af5 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/LookupScopeStatement.kt @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph.statements + +import de.fraunhofer.aisec.cpg.graph.newLookupScopeStatement +import de.fraunhofer.aisec.cpg.graph.scopes.Scope +import de.fraunhofer.aisec.cpg.graph.scopes.Symbol +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference +import java.util.Objects + +/** + * This statement modifies the lookup scope of one or more [Reference] nodes (or more precise it's + * symbols) within the current [Scope]. The most prominent example of this are the Python `global` + * and `nonlocal` keywords. + * + * This node itself does not implement the actual functionality. It is necessary to add this node + * (or the information therein) to [Scope.predefinedLookupScopes]. The reason for this is that we + * want to avoid AST traversals in the scope/identifier lookup. + * + * The [newLookupScopeStatement] node builder will add this automatically, so it is STRONGLY + * encouraged that the node builder is used instead of creating the node itself. + */ +class LookupScopeStatement : Statement() { + + /** The symbols this statement affects. */ + var symbols: List = listOf() + + /** The target scope to which the references are referring to. */ + var targetScope: Scope? = null + + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (other !is LookupScopeStatement) return false + return super.equals(other) && symbols == other.symbols && targetScope == other.targetScope + } + + override fun hashCode() = Objects.hash(super.hashCode(), symbols, targetScope) +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt index 5b64513ea5..04bb795221 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt @@ -157,6 +157,9 @@ open class EvaluationOrderGraphPass(ctx: TranslationContext) : TranslationUnitPa map[TypeIdExpression::class.java] = { handleDefault(it) } map[Reference::class.java] = { handleDefault(it) } map[LambdaExpression::class.java] = { handleLambdaExpression(it as LambdaExpression) } + map[LookupScopeStatement::class.java] = { + handleLookupScopeStatement(it as LookupScopeStatement) + } } protected fun doNothing() { @@ -1019,6 +1022,12 @@ open class EvaluationOrderGraphPass(ctx: TranslationContext) : TranslationUnitPa nextEdgeBranch = false } + private fun handleLookupScopeStatement(stmt: LookupScopeStatement) { + // Include the node as part of the EOG itself, but we do not need to go into any children or + // properties here + pushToEOG(stmt) + } + companion object { protected val LOGGER = LoggerFactory.getLogger(EvaluationOrderGraphPass::class.java) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index 6a3d8707a3..ef5f1cb8e0 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -180,6 +180,11 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // resolution, but in future this will also be used in resolving regular references. current.candidates = scopeManager.findSymbols(current.name, current.location).toSet() + // Preparation for a future without legacy call resolving. Taking the first candidate is not + // ideal since we are running into an issue with function pointers here (see workaround + // below). + var wouldResolveTo = current.candidates.singleOrNull() + // For now, we need to ignore reference expressions that are directly embedded into call // expressions, because they are the "callee" property. In the future, we will use this // property to actually resolve the function call. However, there is a special case that @@ -189,21 +194,38 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // of this call expression back to its original variable declaration. In the future, we want // to extend this particular code to resolve all callee references to their declarations, // i.e., their function definitions and get rid of the separate CallResolver. - var wouldResolveTo: Declaration? = null if (current.resolutionHelper is CallExpression) { // Peek into the declaration, and if it is only one declaration and a variable, we can // proceed normally, as we are running into the special case explained above. Otherwise, // we abort here (for now). - wouldResolveTo = current.candidates.singleOrNull() if (wouldResolveTo !is VariableDeclaration && wouldResolveTo !is ParameterDeclaration) { return } } + // Some stupid C++ workaround to use the legacy call resolver when we try to resolve targets + // for function pointers. At least we are only invoking the legacy resolver for a very small + // percentage of references now. + if (wouldResolveTo is FunctionDeclaration) { + // We need to invoke the legacy resolver, just to be sure + var legacy = scopeManager.resolveReference(current) + + // This is just for us to catch these differences in symbol resolving in the future. The + // difference is pretty much only that the legacy system takes parameters of the + // function-pointer-type into account and the new system does not (yet), because it just + // takes the first match. This will be needed to solve in the future. + if (legacy != wouldResolveTo) { + log.warn( + "The legacy symbol resolution and the new system produced different results here. This needs to be investigated in the future. For now, we take the legacy result." + ) + wouldResolveTo = legacy + } + } + // Only consider resolving, if the language frontend did not specify a resolution. If we // already have populated the wouldResolveTo variable, we can re-use this instead of // resolving again - var refersTo = current.refersTo ?: wouldResolveTo ?: scopeManager.resolveReference(current) + var refersTo = current.refersTo ?: wouldResolveTo var recordDeclType: Type? = null if (currentClass != null) { diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilderTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilderTest.kt new file mode 100644 index 0000000000..340362daf8 --- /dev/null +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/StatementBuilderTest.kt @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph + +import de.fraunhofer.aisec.cpg.ScopeManager +import de.fraunhofer.aisec.cpg.TranslationConfiguration +import de.fraunhofer.aisec.cpg.TranslationContext +import de.fraunhofer.aisec.cpg.TypeManager +import de.fraunhofer.aisec.cpg.frontends.TestLanguageFrontend +import de.fraunhofer.aisec.cpg.graph.builder.translationResult +import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope +import de.fraunhofer.aisec.cpg.test.assertRefersTo +import kotlin.test.Test +import kotlin.test.assertIs +import kotlin.test.assertNotNull + +class StatementBuilderTest { + @Test + fun testNewLookupScopeStatement() { + val frontend = + TestLanguageFrontend( + ctx = + TranslationContext( + TranslationConfiguration.builder().defaultPasses().build(), + ScopeManager(), + TypeManager() + ) + ) + val result = + frontend.build { + translationResult { + var tu = + with(frontend) { + var tu = newTranslationUnitDeclaration("main.file") + scopeManager.resetToGlobal(tu) + + var globalA = newVariableDeclaration("a") + scopeManager.addDeclaration(globalA) + + var func = newFunctionDeclaration("main") + scopeManager.enterScope(func) + + var body = newBlock() + scopeManager.enterScope(body) + + var localA = newVariableDeclaration("a") + var stmt = newDeclarationStatement() + stmt.declarations += localA + scopeManager.addDeclaration(localA) + body += stmt + + body += newLookupScopeStatement(listOf("a"), scopeManager.globalScope) + body += newReference("a") + + scopeManager.leaveScope(body) + func.body = body + scopeManager.leaveScope(func) + + scopeManager.addDeclaration(func) + scopeManager.leaveScope(tu) + tu + } + + components.firstOrNull()?.translationUnits?.add(tu) + } + } + + val globalA = result.variables["a"] + assertNotNull(globalA) + assertIs(globalA.scope) + + val a = result.refs["a"] + assertRefersTo(a, globalA) + } +} diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/ScopeTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/ScopeTest.kt new file mode 100644 index 0000000000..77f6db5b6a --- /dev/null +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/ScopeTest.kt @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph.scopes + +import de.fraunhofer.aisec.cpg.graph.Name +import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration +import de.fraunhofer.aisec.cpg.graph.statements.LookupScopeStatement +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Block +import kotlin.test.Test +import kotlin.test.assertEquals + +class ScopeTest { + @Test + fun testLookup() { + // some mock variable declarations, global and local + var globalA = VariableDeclaration() + globalA.name = Name("a") + var localA = VariableDeclaration() + localA.name = Name("a") + + // two scopes, global and local + val globalScope = GlobalScope() + globalScope.addSymbol("a", globalA) + val scope = BlockScope(Block()) + scope.parent = globalScope + scope.addSymbol("a", localA) + + // if we try to resolve "a" now, this should point to the local A since we start there and + // move upwards + var result = scope.lookupSymbol("a") + assertEquals(listOf(localA), result) + + // now, we pretend to have a lookup scope modifier for a symbol, e.g. through "global" in + // Python + var stmt = LookupScopeStatement() + stmt.targetScope = globalScope + stmt.symbols = listOf("a") + scope.predefinedLookupScopes["a"] = stmt + + // let's try the lookup again, this time it should point to the global A + result = scope.lookupSymbol("a") + assertEquals(listOf(globalA), result) + } +} diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt index f2b9b79553..1876cca629 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt @@ -33,6 +33,8 @@ import de.fraunhofer.aisec.cpg.frontends.python.PythonLanguage.Companion.MODIFIE import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.Annotation import de.fraunhofer.aisec.cpg.graph.declarations.* +import de.fraunhofer.aisec.cpg.graph.scopes.BlockScope +import de.fraunhofer.aisec.cpg.graph.scopes.NameScope import de.fraunhofer.aisec.cpg.graph.statements.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.graph.types.FunctionType @@ -64,9 +66,9 @@ class StatementHandler(frontend: PythonLanguageFrontend) : is Python.AST.Assert -> handleAssert(node) is Python.AST.Try -> handleTryStatement(node) is Python.AST.Delete -> handleDelete(node) - is Python.AST.Global, + is Python.AST.Global -> handleGlobal(node) + is Python.AST.Nonlocal -> handleNonLocal(node) is Python.AST.Match, - is Python.AST.Nonlocal, is Python.AST.Raise, is Python.AST.TryStar, is Python.AST.With, @@ -526,6 +528,35 @@ class StatementHandler(frontend: PythonLanguageFrontend) : return wrapDeclarationToStatement(result) } + private fun handleGlobal(global: Python.AST.Global): LookupScopeStatement { + // Technically, our global scope is not identical to the python "global" scope. The reason + // behind that is that we wrap each file in a namespace (as defined in the python spec). So + // the "global" scope is actually our current namespace scope. + var pythonGlobalScope = + frontend.scopeManager.globalScope?.children?.firstOrNull { it is NameScope } + + return newLookupScopeStatement( + global.names.map { parseName(it).localName }, + pythonGlobalScope, + rawNode = global + ) + } + + private fun handleNonLocal(global: Python.AST.Nonlocal): LookupScopeStatement { + // We need to find the first outer function scope, or rather the block scope belonging to + // the function + var outerFunctionScope = + frontend.scopeManager.firstScopeOrNull { + it is BlockScope && it != frontend.scopeManager.currentScope + } + + return newLookupScopeStatement( + global.names.map { parseName(it).localName }, + outerFunctionScope, + rawNode = global + ) + } + /** Adds the arguments to [result] which might be located in a [recordDeclaration]. */ private fun handleArguments( args: Python.AST.arguments, diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt index 100791c74a..98a3de58c2 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt @@ -30,8 +30,8 @@ import de.fraunhofer.aisec.cpg.frontends.python.PythonLanguageFrontend import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration -import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration +import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope import de.fraunhofer.aisec.cpg.graph.statements.ForEachStatement import de.fraunhofer.aisec.cpg.graph.statements.expressions.AssignExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression @@ -78,70 +78,102 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx) { } } - /* - * Return null when not creating a new decl + /** + * This function will create a new dynamic [VariableDeclaration] if there is a write access to + * the [ref]. */ - private fun handleReference(node: Reference): VariableDeclaration? { - if (node.resolutionHelper is CallExpression) { + private fun handleWriteToReference(ref: Reference): VariableDeclaration? { + if (ref.resolutionHelper is CallExpression) { return null } - // TODO(oxisto): Actually the logic here is far more complex in reality, taking into account - // local and global variables, but for now we just resolve it using the scope manager - val resolved = scopeManager.resolveReference(node) + // Look for a potential scope modifier for this reference + // lookupScope + var targetScope = + scopeManager.currentScope?.predefinedLookupScopes[ref.name.toString()]?.targetScope + + // There are a couple of things to consider now + var symbol = + // Since this is a WRITE access, we need + // - to look for a local symbol, unless + // - a global keyword is present for this symbol and scope + if (targetScope != null) { + scopeManager.findSymbols(ref.name, ref.location, targetScope) + } else { + scopeManager.findSymbols(ref.name, ref.location) { + it.scope == scopeManager.currentScope + } + } // Nothing to create - if (resolved != null) return null - - val decl = - if (scopeManager.isInRecord) { - if (scopeManager.isInFunction) { - if ( - node is MemberExpression && - node.base.name == - (scopeManager.currentFunction as? MethodDeclaration)?.receiver?.name - ) { - // We need to temporarily jump into the scope of the current record to - // add the field - val field = - scopeManager.withScope(scopeManager.currentRecord?.scope) { - newFieldDeclaration(node.name) - } - field - } else { - val v = newVariableDeclaration(node.name) - v + if (symbol.isNotEmpty()) return null + + // First, check if we need to create a field + var field: FieldDeclaration? = + if (scopeManager.isInRecord && scopeManager.isInFunction) { + // Check, whether we are referring to a "self.X", which would create a field + if ( + ref is MemberExpression && + ref.base.name == scopeManager.currentMethod?.receiver?.name + ) { + // We need to temporarily jump into the scope of the current record to + // add the field. These are instance attributes + scopeManager.withScope(scopeManager.firstScopeIsInstanceOrNull()) { + newFieldDeclaration(ref.name) } + } else if (ref is MemberExpression) { + // If this is any other member expression, we are usually not interested in + // creating fields, except if this is a receiver + return null } else { - val field = - scopeManager.withScope(scopeManager.currentRecord?.scope) { - newFieldDeclaration(node.name) - } - field + null } + } else if (scopeManager.isInRecord) { + // We end up here for fields declared directly in the class body. These are class + // attributes; more or less static fields. + newFieldDeclaration(ref.name) + } else { + null + } + + // If we didn't create any field up to this point and if we are still have not returned, we + // can create a normal variable. We need to take scope modifications into account. + var decl = + if (field == null && targetScope != null) { + scopeManager.withScope(targetScope) { newVariableDeclaration(ref.name) } + } else if (field == null) { + newVariableDeclaration(ref.name) } else { - newVariableDeclaration(node.name) + field } - decl.code = node.code - decl.location = node.location + decl.code = ref.code + decl.location = ref.location decl.isImplicit = true - if (decl is FieldDeclaration) { - scopeManager.currentRecord?.addField(decl) - scopeManager.withScope(scopeManager.currentRecord?.scope) { - scopeManager.addDeclaration(decl) - } - } else { - scopeManager.addDeclaration(decl) - } + // TODO: trace? + log.debug( + "Creating dynamic {} {} in {}", + if (decl is FieldDeclaration) { + "field" + } else { + "variable" + }, + decl.name, + decl.scope + ) + + // Make sure we add the declaration at the correct place, i.e. with the scope we set at the + // creation time + scopeManager.withScope(decl.scope) { scopeManager.addDeclaration(decl) } + return decl } private fun handleAssignExpression(assignExpression: AssignExpression) { for (target in assignExpression.lhs) { (target as? Reference)?.let { - val handled = handleReference(target) + val handled = handleWriteToReference(target) if (handled is Declaration) { // We cannot assign an initializer here because this will lead to duplicate // DFG edges, but we need to propagate the type information from our value @@ -163,8 +195,10 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx) { private fun handleForEach(node: ForEachStatement) { when (val forVar = node.variable) { is Reference -> { - val handled = handleReference(forVar) - (handled as? Declaration)?.let { scopeManager.addDeclaration(it) } + val handled = handleWriteToReference(forVar) + if (handled is Declaration) { + handled.let { node.addDeclaration(it) } + } } } } diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt index c643d9408f..b867e0c030 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt @@ -649,10 +649,16 @@ class PythonFrontendTest : BaseTest() { val classFieldDeclaredInFunction = clsFoo.fields["classFieldDeclaredInFunction"] assertNotNull(classFieldDeclaredInFunction) - // assertEquals(3, clsFoo.fields.size) // TODO should "self" be considered a field here? - assertNull(classFieldNoInitializer.initializer) - assertNotNull(classFieldWithInit) + + val localClassFieldNoInitializer = methBar.variables["classFieldNoInitializer"] + assertNotNull(localClassFieldNoInitializer) + + val localClassFieldWithInit = methBar.variables["classFieldWithInit"] + assertNotNull(localClassFieldNoInitializer) + + val localClassFieldDeclaredInFunction = methBar.variables["classFieldDeclaredInFunction"] + assertNotNull(localClassFieldNoInitializer) // classFieldNoInitializer = classFieldWithInit val assignClsFieldOutsideFunc = clsFoo.statements[2] as? AssignExpression @@ -675,35 +681,35 @@ class PythonFrontendTest : BaseTest() { assertNotNull(decl0.firstAssignment) // self.classFieldNoInitializer = 789 - val barStmt1 = barBody.statements[1] as? AssignExpression + val barStmt1 = barBody.statements(1) assertNotNull(barStmt1) assertEquals(classFieldNoInitializer, (barStmt1.lhs())?.refersTo) // self.classFieldWithInit = 12 - val barStmt2 = barBody.statements[2] as? AssignExpression + val barStmt2 = barBody.statements(2) assertNotNull(barStmt2) assertEquals(classFieldWithInit, (barStmt2.lhs())?.refersTo) // classFieldNoInitializer = "shadowed" - val barStmt3 = barBody.statements[3] as? AssignExpression + val barStmt3 = barBody.statements(3) assertNotNull(barStmt3) assertEquals("=", barStmt3.operatorCode) - assertEquals(classFieldNoInitializer, (barStmt3.lhs())?.refersTo) - assertEquals("shadowed", (barStmt3.rhs>())?.value) + assertRefersTo(barStmt3.lhs(), localClassFieldNoInitializer) + assertLiteralValue("shadowed", barStmt3.rhs>()) // classFieldWithInit = "shadowed" - val barStmt4 = barBody.statements[4] as? AssignExpression + val barStmt4 = barBody.statements(4) assertNotNull(barStmt4) assertEquals("=", barStmt4.operatorCode) - assertEquals(classFieldWithInit, (barStmt4.lhs())?.refersTo) - assertEquals("shadowed", (barStmt4.rhs>())?.value) + assertRefersTo(barStmt4.lhs(), localClassFieldWithInit) + assertLiteralValue("shadowed", barStmt4.rhs>()) // classFieldDeclaredInFunction = "shadowed" - val barStmt5 = barBody.statements[5] as? AssignExpression + val barStmt5 = barBody.statements(5) assertNotNull(barStmt5) assertEquals("=", barStmt5.operatorCode) - assertEquals(classFieldDeclaredInFunction, (barStmt5.lhs())?.refersTo) - assertEquals("shadowed", (barStmt5.rhs>())?.value) + assertRefersTo(barStmt5.lhs(), localClassFieldDeclaredInFunction) + assertLiteralValue("shadowed", barStmt5.rhs>()) /* TODO: foo = Foo() diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/statementHandler/StatementHandlerTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/statementHandler/StatementHandlerTest.kt index 4ea13e479b..9ccec3ba24 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/statementHandler/StatementHandlerTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/statementHandler/StatementHandlerTest.kt @@ -232,4 +232,70 @@ class StatementHandlerTest : BaseTest() { assertEquals(assertResolvedType("str"), b.type) } } + + @Test + fun testGlobal() { + var file = topLevel.resolve("global.py").toFile() + val result = analyze(listOf(file), topLevel, true) { it.registerLanguage() } + assertNotNull(result) + + // There should be three variable declarations, two local and one global + var cVariables = result.variables("c") + assertEquals(3, cVariables.size) + + // Our scopes do not match 1:1 to python scopes, but rather the python "global" scope is a + // name space with the name of the file and the function scope is a block scope of the + // function body + var pythonGlobalScope = result.finalCtx.scopeManager.lookupScope(file.nameWithoutExtension) + + var globalC = cVariables.firstOrNull { it.scope == pythonGlobalScope } + assertNotNull(globalC) + + var localC1 = + cVariables.firstOrNull { + it.scope?.astNode?.astParent?.name?.localName == "local_write" + } + assertNotNull(localC1) + + var localC2 = + cVariables.firstOrNull { + it.scope?.astNode?.astParent?.name?.localName == "error_write" + } + assertNotNull(localC2) + + // In global_write, all references should point to global c + var cRefs = result.functions["global_write"]?.refs("c") + assertNotNull(cRefs) + cRefs.forEach { assertRefersTo(it, globalC) } + + // In global_read, all references should point to global c + cRefs = result.functions["global_read"]?.refs("c") + assertNotNull(cRefs) + cRefs.forEach { assertRefersTo(it, globalC) } + + // In local_write, all references should point to local c + cRefs = result.functions["local_write"]?.refs("c") + assertNotNull(cRefs) + cRefs.forEach { assertRefersTo(it, localC1) } + + // In error_write, all references will point to local c; even though the c on the right side + // SHOULD be unresolved - but this a general shortcoming because the resolving will not take + // the EOG into consideration (yet) + cRefs = result.functions["error_write"]?.refs("c") + assertNotNull(cRefs) + cRefs.forEach { assertRefersTo(it, localC2) } + } + + // TODO(oxisto): Re-renable this once we parse nested functions + @Ignore + @Test + fun testNonLocal() { + var file = topLevel.resolve("nonlocal.py").toFile() + val result = analyze(listOf(file), topLevel, true) { it.registerLanguage() } + assertNotNull(result) + + // There should be three variable declarations, two local and one global + var cVariables = result.variables("c") + assertEquals(3, cVariables.size) + } } diff --git a/cpg-language-python/src/test/resources/python/global.py b/cpg-language-python/src/test/resources/python/global.py new file mode 100644 index 0000000000..d8addf6463 --- /dev/null +++ b/cpg-language-python/src/test/resources/python/global.py @@ -0,0 +1,28 @@ +c = 1 + +def global_write(): + global c + # this writes to a global variable c + c = 2 + # this reads from the global variable c + print(c) + +def global_read(): + print(c) + +def local_write(): + # this writes to a local variable c + c = 3 + # this reads from the local variable c + print(c) + +def error_write(): + # this should result in an error/unresolved c variable; + # in the CPG this will probably still resolve to our local c + # because we do not take the EOG into account when resolving + # symbols (yet) + c = c + 1 + +global_write() +local_write() +error_write() \ No newline at end of file diff --git a/cpg-language-python/src/test/resources/python/nonlocal.py b/cpg-language-python/src/test/resources/python/nonlocal.py new file mode 100644 index 0000000000..472e0f9848 --- /dev/null +++ b/cpg-language-python/src/test/resources/python/nonlocal.py @@ -0,0 +1,17 @@ +def outer_function(): + c = 1 + + def nonlocal_write(): + nonlocal c + c = 2 + print(c) + + def nonlocal_read(): + print(c) + + def local_write(): + c = 2 + print(c) + + def nonlocal_error(): + c = c + 1