Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing python's global and nonlocal #1735

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ package de.fraunhofer.aisec.cpg.graph
import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.graph.Node.Companion.EMPTY_NAME
import de.fraunhofer.aisec.cpg.graph.NodeBuilder.log
import de.fraunhofer.aisec.cpg.graph.scopes.Scope
import de.fraunhofer.aisec.cpg.graph.scopes.Symbol
import de.fraunhofer.aisec.cpg.graph.statements.*

/**
Expand Down Expand Up @@ -329,3 +331,28 @@ fun MetadataProvider.newDefaultStatement(rawNode: Any? = null): DefaultStatement
log(node)
return node
}

/**
* Creates a new [LookupScopeStatement]. The [MetadataProvider] receiver will be used to fill
* different meta-data using [Node.applyMetadata]. Calling this extension function outside of Kotlin
* requires an appropriate [MetadataProvider], such as a [LanguageFrontend] as an additional
* prepended argument.
*/
@JvmOverloads
fun MetadataProvider.newLookupScopeStatement(
symbols: List<Symbol>,
targetScope: Scope?,
rawNode: Any? = null
): LookupScopeStatement {
val node = LookupScopeStatement()
node.targetScope = targetScope
node.applyMetadata(this, EMPTY_NAME, rawNode, true)

// Add it to our scope
for (symbol in symbols) {
node.scope?.predefinedLookupScopes[symbol] = node
}

log(node)
return node
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import de.fraunhofer.aisec.cpg.graph.Node.Companion.TO_STRING_STYLE
import de.fraunhofer.aisec.cpg.graph.declarations.Declaration
import de.fraunhofer.aisec.cpg.graph.declarations.ImportDeclaration
import de.fraunhofer.aisec.cpg.graph.statements.LabelStatement
import de.fraunhofer.aisec.cpg.graph.statements.LookupScopeStatement
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference
import de.fraunhofer.aisec.cpg.helpers.neo4j.NameConverter
import org.apache.commons.lang3.builder.ToStringBuilder
import org.neo4j.ogm.annotation.GeneratedValue
Expand Down Expand Up @@ -90,6 +92,16 @@ abstract class Scope(
*/
@Transient var wildcardImports: MutableSet<ImportDeclaration> = mutableSetOf()

/**
* In some languages, the lookup scope of a symbol that is being resolved (e.g. of a
* [Reference]) can be adjusted through keywords (such as `global` in Python or PHP).
*
* We store this information in the form of a [LookupScopeStatement] in the AST, but we need to
* also store this information in the scope to avoid unnecessary AST traversals when resolving
* symbols using [lookupSymbol].
*/
@Transient var predefinedLookupScopes: MutableMap<Symbol, LookupScopeStatement> = mutableMapOf()

/** Adds a [declaration] with the defined [symbol]. */
fun addSymbol(symbol: Symbol, declaration: Declaration) {
if (declaration is ImportDeclaration && declaration.wildcardImport) {
Expand Down Expand Up @@ -123,8 +135,16 @@ abstract class Scope(
replaceImports: Boolean = true,
predicate: ((Declaration) -> Boolean)? = null
): List<Declaration> {
// First, try to look for the symbol in the current scope
var scope: Scope? = this
// First, try to look for the symbol in the current scope (unless we have a predefined
// search scope). In the latter case we also need to restrict the lookup to the search scope
var modifiedScoped = this.predefinedLookupScopes[symbol]?.targetScope
var scope: Scope? =
if (modifiedScoped != null) {
modifiedScoped
} else {
this
}

var list: MutableList<Declaration>? = null

while (scope != null) {
Expand Down Expand Up @@ -154,10 +174,11 @@ abstract class Scope(
}

// If we do not have a hit, we can go up one scope, unless thisScopeOnly is set to true
if (!thisScopeOnly) {
scope = scope.parent
} else {
// (or we had a modified scope)
if (thisScopeOnly || modifiedScoped != null) {
break
} else {
scope = scope.parent
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2024, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $$$$$$\ $$$$$$$\ $$$$$$\
* $$ __$$\ $$ __$$\ $$ __$$\
* $$ / \__|$$ | $$ |$$ / \__|
* $$ | $$$$$$$ |$$ |$$$$\
* $$ | $$ ____/ $$ |\_$$ |
* $$ | $$\ $$ | $$ | $$ |
* \$$$$$ |$$ | \$$$$$ |
* \______/ \__| \______/
*
*/
package de.fraunhofer.aisec.cpg.graph.statements

import de.fraunhofer.aisec.cpg.graph.newLookupScopeStatement
import de.fraunhofer.aisec.cpg.graph.scopes.Scope
import de.fraunhofer.aisec.cpg.graph.scopes.Symbol
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference
import java.util.Objects

/**
* This statement modifies the lookup scope of one or more [Reference] nodes (or more precise it's
* symbols) within the current [Scope]. The most prominent example of this are the Python `global`
* and `nonlocal` keywords.
*
* This node itself does not implement the actual functionality. It is necessary to add this node
* (or the information therein) to [Scope.predefinedLookupScopes]. The reason for this is that we
* want to avoid AST traversals in the scope/identifier lookup.
*
* The [newLookupScopeStatement] node builder will add this automatically, so it is STRONGLY
* encouraged that the node builder is used instead of creating the node itself.
*/
class LookupScopeStatement : Statement() {

/** The symbols this statement affects. */
var symbols: List<Symbol> = listOf()

/** The target scope to which the references are referring to. */
var targetScope: Scope? = null

override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is LookupScopeStatement) return false
return super.equals(other) && symbols == other.symbols && targetScope == other.targetScope
}

override fun hashCode() = Objects.hash(super.hashCode(), symbols, targetScope)
}
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ open class EvaluationOrderGraphPass(ctx: TranslationContext) : TranslationUnitPa
map[TypeIdExpression::class.java] = { handleDefault(it) }
map[Reference::class.java] = { handleDefault(it) }
map[LambdaExpression::class.java] = { handleLambdaExpression(it as LambdaExpression) }
map[LookupScopeStatement::class.java] = {
handleLookupScopeStatement(it as LookupScopeStatement)
}
}

protected fun doNothing() {
Expand Down Expand Up @@ -1019,6 +1022,12 @@ open class EvaluationOrderGraphPass(ctx: TranslationContext) : TranslationUnitPa
nextEdgeBranch = false
}

private fun handleLookupScopeStatement(stmt: LookupScopeStatement) {
// Include the node as part of the EOG itself, but we do not need to go into any children or
// properties here
pushToEOG(stmt)
}

companion object {
protected val LOGGER = LoggerFactory.getLogger(EvaluationOrderGraphPass::class.java)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) {
// resolution, but in future this will also be used in resolving regular references.
current.candidates = scopeManager.findSymbols(current.name, current.location).toSet()

// Preparation for a future without legacy call resolving. Taking the first candidate is not
// ideal since we are running into an issue with function pointers here (see workaround
// below).
var wouldResolveTo = current.candidates.singleOrNull()

// For now, we need to ignore reference expressions that are directly embedded into call
// expressions, because they are the "callee" property. In the future, we will use this
// property to actually resolve the function call. However, there is a special case that
Expand All @@ -189,21 +194,38 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) {
// of this call expression back to its original variable declaration. In the future, we want
// to extend this particular code to resolve all callee references to their declarations,
// i.e., their function definitions and get rid of the separate CallResolver.
var wouldResolveTo: Declaration? = null
if (current.resolutionHelper is CallExpression) {
// Peek into the declaration, and if it is only one declaration and a variable, we can
// proceed normally, as we are running into the special case explained above. Otherwise,
// we abort here (for now).
wouldResolveTo = current.candidates.singleOrNull()
if (wouldResolveTo !is VariableDeclaration && wouldResolveTo !is ParameterDeclaration) {
return
}
}

// Some stupid C++ workaround to use the legacy call resolver when we try to resolve targets
// for function pointers. At least we are only invoking the legacy resolver for a very small
// percentage of references now.
if (wouldResolveTo is FunctionDeclaration) {
// We need to invoke the legacy resolver, just to be sure
var legacy = scopeManager.resolveReference(current)

// This is just for us to catch these differences in symbol resolving in the future. The
// difference is pretty much only that the legacy system takes parameters of the
// function-pointer-type into account and the new system does not (yet), because it just
// takes the first match. This will be needed to solve in the future.
if (legacy != wouldResolveTo) {
log.warn(
"The legacy symbol resolution and the new system produced different results here. This needs to be investigated in the future. For now, we take the legacy result."
)
wouldResolveTo = legacy
}
}

// Only consider resolving, if the language frontend did not specify a resolution. If we
// already have populated the wouldResolveTo variable, we can re-use this instead of
// resolving again
var refersTo = current.refersTo ?: wouldResolveTo ?: scopeManager.resolveReference(current)
var refersTo = current.refersTo ?: wouldResolveTo

var recordDeclType: Type? = null
if (currentClass != null) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright (c) 2024, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $$$$$$\ $$$$$$$\ $$$$$$\
* $$ __$$\ $$ __$$\ $$ __$$\
* $$ / \__|$$ | $$ |$$ / \__|
* $$ | $$$$$$$ |$$ |$$$$\
* $$ | $$ ____/ $$ |\_$$ |
* $$ | $$\ $$ | $$ | $$ |
* \$$$$$ |$$ | \$$$$$ |
* \______/ \__| \______/
*
*/
package de.fraunhofer.aisec.cpg.graph

import de.fraunhofer.aisec.cpg.ScopeManager
import de.fraunhofer.aisec.cpg.TranslationConfiguration
import de.fraunhofer.aisec.cpg.TranslationContext
import de.fraunhofer.aisec.cpg.TypeManager
import de.fraunhofer.aisec.cpg.frontends.TestLanguageFrontend
import de.fraunhofer.aisec.cpg.graph.builder.translationResult
import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope
import de.fraunhofer.aisec.cpg.test.assertRefersTo
import kotlin.test.Test
import kotlin.test.assertIs
import kotlin.test.assertNotNull

class StatementBuilderTest {
@Test
fun testNewLookupScopeStatement() {
val frontend =
TestLanguageFrontend(
ctx =
TranslationContext(
TranslationConfiguration.builder().defaultPasses().build(),
ScopeManager(),
TypeManager()
)
)
val result =
frontend.build {
translationResult {
var tu =
with(frontend) {
var tu = newTranslationUnitDeclaration("main.file")
scopeManager.resetToGlobal(tu)

var globalA = newVariableDeclaration("a")
scopeManager.addDeclaration(globalA)

var func = newFunctionDeclaration("main")
scopeManager.enterScope(func)

var body = newBlock()
scopeManager.enterScope(body)

var localA = newVariableDeclaration("a")
var stmt = newDeclarationStatement()
stmt.declarations += localA
scopeManager.addDeclaration(localA)
body += stmt

body += newLookupScopeStatement(listOf("a"), scopeManager.globalScope)
body += newReference("a")

scopeManager.leaveScope(body)
func.body = body
scopeManager.leaveScope(func)

scopeManager.addDeclaration(func)
scopeManager.leaveScope(tu)
tu
}

components.firstOrNull()?.translationUnits?.add(tu)
}
}

val globalA = result.variables["a"]
assertNotNull(globalA)
assertIs<GlobalScope>(globalA.scope)

val a = result.refs["a"]
assertRefersTo(a, globalA)
}
}
Loading
Loading