Skip to content

Commit

Permalink
Changing function signature of parse to accept the file content ins…
Browse files Browse the repository at this point in the history
…tead of a file

This PR changes the way `parse` works (in a backwards compatible way). Instead of parsing a `File`, we parse the file contents (and a path). The reasoning behind this is that almost all language frontends currently need to read the file contents and we can harmonize this. This will also allow us to provide more common statistics about the parsing context in the future.
  • Loading branch information
oxisto committed Sep 20, 2024
1 parent 737af6e commit 5b5814e
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2024, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $$$$$$\ $$$$$$$\ $$$$$$\
* $$ __$$\ $$ __$$\ $$ __$$\
* $$ / \__|$$ | $$ |$$ / \__|
* $$ | $$$$$$$ |$$ |$$$$\
* $$ | $$ ____/ $$ |\_$$ |
* $$ | $$\ $$ | $$ | $$ |
* \$$$$$ |$$ | \$$$$$ |
* \______/ \__| \______/
*
*/
package de.fraunhofer.aisec.cpg.frontends

import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration
import java.nio.file.Path

interface SupportsNewParse {
/**
* Parses the given [content] with the language frontend into a [TranslationUnitDeclaration]. If
* known, a [path] should be specified, so that the language frontend can potentially use more
* advanced features like module resolution.
*/
fun parse(content: String, path: Path? = null): TranslationUnitDeclaration
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ import java.net.URI
import java.util.*

/** A SARIF compatible location referring to a location, i.e. file and region within the file. */
class PhysicalLocation(uri: URI, region: Region) {
class ArtifactLocation(val uri: URI) {
class PhysicalLocation(uri: URI?, region: Region) {
class ArtifactLocation(val uri: URI?) {

override fun toString(): String {
return uri.path.substring(uri.path.lastIndexOf('/') + 1)
return if (uri != null) {
uri.path
} else {
"unknown"
}
}

override fun equals(other: Any?): Boolean {
Expand All @@ -45,7 +49,7 @@ class PhysicalLocation(uri: URI, region: Region) {
override fun hashCode() = Objects.hashCode(uri)
}

val artifactLocation: ArtifactLocation
var artifactLocation: ArtifactLocation
var region: Region

init {
Expand All @@ -68,11 +72,7 @@ class PhysicalLocation(uri: URI, region: Region) {
companion object {
fun locationLink(location: PhysicalLocation?): String {
return if (location != null) {
(location.artifactLocation.uri.path +
":" +
location.region.startLine +
":" +
location.region.startColumn)
"${location.artifactLocation}:${location.region.startLine}:${location.region.startColumn}"
} else "unknown"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ package de.fraunhofer.aisec.cpg.frontends.python
import de.fraunhofer.aisec.cpg.TranslationContext
import de.fraunhofer.aisec.cpg.frontends.Language
import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.frontends.SupportsNewParse
import de.fraunhofer.aisec.cpg.frontends.TranslationException
import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration
Expand All @@ -39,15 +40,16 @@ import de.fraunhofer.aisec.cpg.passes.configuration.RegisterExtraPass
import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation
import de.fraunhofer.aisec.cpg.sarif.Region
import java.io.File
import java.net.URI
import java.nio.file.Path
import jep.python.PyObject
import kotlin.io.path.Path
import kotlin.io.path.absolute
import kotlin.io.path.name
import kotlin.io.path.nameWithoutExtension
import kotlin.math.min

@RegisterExtraPass(PythonAddDeclarationsPass::class)
class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: TranslationContext) :
LanguageFrontend<Python.AST.AST, Python.AST.AST?>(language, ctx) {
LanguageFrontend<Python.AST.AST, Python.AST.AST?>(language, ctx), SupportsNewParse {
private val lineSeparator = '\n' // TODO
private val tokenTypeIndex = 0
private val jep = JepSingleton // configure Jep
Expand All @@ -62,21 +64,27 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
* new [PythonLanguageFrontend] instance per file.
*/
private lateinit var fileContent: String
private lateinit var uri: URI
private var filePath: Path? = null

@Throws(TranslationException::class)
override fun parse(file: File): TranslationUnitDeclaration {
fileContent = file.readText(Charsets.UTF_8)
uri = file.toURI()
override fun parse(content: String, path: Path?): TranslationUnitDeclaration {
this.fileContent = content
this.filePath = path

jep.getInterp().use {
it.set("content", fileContent)
it.set("filename", file.absolutePath)
it.set("content", content)
it.set(
"filename",
if (path != null) {
path.absolute().toString()
} else {
"<unknown>"
}
)
it.exec("import ast")
it.exec("parsed = ast.parse(content, filename=filename, type_comments=True)")

val pyAST = it.getValue("parsed") as PyObject
val tud = pythonASTtoCPG(pyAST, file.name)
val tud = pythonASTtoCPG(pyAST, path)

if (config.matchCommentsToNodes) {
it.exec("import tokenize")
Expand All @@ -97,6 +105,11 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
}
}

@Throws(TranslationException::class)
override fun parse(file: File): TranslationUnitDeclaration {
return parse(file.readText(Charsets.UTF_8), file.toPath())
}

private fun addCommentsToCPG(
tud: TranslationUnitDeclaration,
pyTokens: ArrayList<*>,
Expand Down Expand Up @@ -236,7 +249,7 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
override fun locationOf(astNode: Python.AST.AST): PhysicalLocation? {
return if (astNode is Python.AST.WithLocation) {
PhysicalLocation(
uri,
filePath?.toUri(),
Region(
startLine = astNode.lineno,
endLine = astNode.end_lineno,
Expand All @@ -253,17 +266,22 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
// will be invoked by native function
}

private fun pythonASTtoCPG(pyAST: PyObject, path: String): TranslationUnitDeclaration {
private fun pythonASTtoCPG(pyAST: PyObject, path: Path?): TranslationUnitDeclaration {
val pythonASTModule =
fromPython(pyAST) as? Python.AST.Module
?: TODO(
"Python ast of type ${fromPython(pyAST).javaClass} is not supported yet"
) // could be one of "ast.{Module,Interactive,Expression,FunctionType}

val tud = newTranslationUnitDeclaration(path, rawNode = pythonASTModule)
val tud = newTranslationUnitDeclaration(path?.name, rawNode = pythonASTModule)
scopeManager.resetToGlobal(tud)

val nsdName = Path(path).nameWithoutExtension
val nsdName =
if (path != null) {
path.nameWithoutExtension
} else {
"unknown"
}
val nsd = newNamespaceDeclaration(nsdName, rawNode = pythonASTModule)
tud.addDeclaration(nsd)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
*/
package de.fraunhofer.aisec.cpg.frontends.python

import de.fraunhofer.aisec.cpg.ScopeManager
import de.fraunhofer.aisec.cpg.TranslationConfiguration
import de.fraunhofer.aisec.cpg.TranslationContext
import de.fraunhofer.aisec.cpg.TypeManager
import de.fraunhofer.aisec.cpg.analysis.ValueEvaluator
import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.Annotation
Expand Down Expand Up @@ -1345,6 +1349,32 @@ class PythonFrontendTest : BaseTest() {
assertEquals(4.toLong(), rhs.evaluate())
}

@Test
fun testParseContent() {
var frontend =
PythonLanguageFrontend(
language = PythonLanguage(),
ctx =
TranslationContext(
TranslationConfiguration.builder().build(),
ScopeManager(),
TypeManager()
)
)

val tu = frontend.parse("a = 4\nprint(a)")
assertNotNull(tu)

val unknown = tu.namespaces["unknown"]
assertNotNull(unknown)

val refNames = tu.refs.map { it.name.localName }
assertEquals(listOf("a", "a", "print"), refNames)

val call = tu.calls["print"]
assertNotNull(call)
}

class PythonValueEvaluator : ValueEvaluator() {
override fun computeBinaryOpEffect(
lhsValue: Any?,
Expand Down

0 comments on commit 5b5814e

Please sign in to comment.