diff --git a/src/main/scala/ai/privado/audit/AuditReportConstants.scala b/src/main/scala/ai/privado/audit/AuditReportConstants.scala index b1ec04f6c..47084d505 100644 --- a/src/main/scala/ai/privado/audit/AuditReportConstants.scala +++ b/src/main/scala/ai/privado/audit/AuditReportConstants.scala @@ -106,4 +106,9 @@ object AuditReportConstants { val DEPENDENCY_UNRESOLVED_CODE_SNIPPET_NAME = "Code Snippet" val AUDIT_SOURCE_FILE_NAME = "audit-sources.json" + + val JS_ELEMENT_DISCOVERY_CLASS_INCLUDE_REGEX = + ".*(types|dto(s)?|data|constants|components|model(s)?).*|.*(types.ts|constants.ts|models.ts)" + + val JS_ELEMENT_DISCOVERY_OBJECT_INCLUDE_REGEX = ".*__ecma\\.(String|Number|Boolean|Object).*" } diff --git a/src/main/scala/ai/privado/audit/AuditReportEntryPoint.scala b/src/main/scala/ai/privado/audit/AuditReportEntryPoint.scala index 55bf609cb..b306dda03 100644 --- a/src/main/scala/ai/privado/audit/AuditReportEntryPoint.scala +++ b/src/main/scala/ai/privado/audit/AuditReportEntryPoint.scala @@ -38,21 +38,21 @@ object AuditReportEntryPoint { def createDataElementDiscoveryJson(dataElementDiscoveryData: List[List[String]], repoPath: String) = { val auditDataList = new ListBuffer[DataElementDiscoveryAudit]() + for (item <- dataElementDiscoveryData.drop(1)) { auditDataList += DataElementDiscoveryAudit( eliminateEmptyCellValueIfExist(item(0)), eliminateEmptyCellValueIfExist(item(1)), - item(2).toDouble, + if (item(2) == AuditReportConstants.AUDIT_EMPTY_CELL_VALUE) 0.0 else item(2).toDouble, eliminateEmptyCellValueIfExist(item(3)), eliminateEmptyCellValueIfExist(item(4)), if (item(5) == "YES") true else false, eliminateEmptyCellValueIfExist(item(6)), if (item(5) == "YES") true else false, eliminateEmptyCellValueIfExist(item(8)), - eliminateEmptyCellValueIfExist(item(9)) + if (item.size >= 10) eliminateEmptyCellValueIfExist(item(9)) else AuditReportConstants.AUDIT_EMPTY_CELL_VALUE ) } - JSONExporter.dataElementDiscoveryAuditFileExport( AuditReportConstants.AUDIT_SOURCE_FILE_NAME, repoPath, @@ -96,12 +96,28 @@ object AuditReportEntryPoint { workbook } - // Audit report generation for Python and javaScript - def getAuditWorkbook(): Workbook = { + def getAuditWorkbookPy(): Workbook = { val workbook: Workbook = new XSSFWorkbook() + createSheet(workbook, AuditReportConstants.AUDIT_DATA_FLOW_SHEET_NAME, DataFlowReport.processDataFlowAudit()) + workbook + } + // Audit report generation for Python and javaScript + def getAuditWorkbookJS(xtocpg: Try[Cpg], taggerCache: TaggerCache, repoPath: String): Workbook = { + val workbook: Workbook = new XSSFWorkbook() + val dataElementDiscoveryData = DataElementDiscoveryJS.processDataElementDiscovery(xtocpg, taggerCache) + createDataElementDiscoveryJson(dataElementDiscoveryData, repoPath = repoPath) + createSheet(workbook, AuditReportConstants.AUDIT_ELEMENT_DISCOVERY_SHEET_NAME, dataElementDiscoveryData) + // Changed Background colour when tagged + changeTaggedBackgroundColour(workbook, List(4, 6)) // Set Data Flow report into Sheet createSheet(workbook, AuditReportConstants.AUDIT_DATA_FLOW_SHEET_NAME, DataFlowReport.processDataFlowAudit()) + // Set Unresolved flow into Sheet + createSheet( + workbook, + AuditReportConstants.AUDIT_UNRESOLVED_SHEET_NAME, + UnresolvedFlowReport.processUnresolvedFlow() + ) workbook } diff --git a/src/main/scala/ai/privado/audit/DataElementDiscovery.scala b/src/main/scala/ai/privado/audit/DataElementDiscovery.scala index c8921d644..7490f0628 100644 --- a/src/main/scala/ai/privado/audit/DataElementDiscovery.scala +++ b/src/main/scala/ai/privado/audit/DataElementDiscovery.scala @@ -1,12 +1,14 @@ package ai.privado.audit +import ai.privado.audit.DataElementDiscovery.{getClass, getFileScore, getSourceUsingRules} import ai.privado.cache.TaggerCache +import ai.privado.languageEngine.java.language.module.{NodeStarters, StepsForModule} import ai.privado.model.{CatLevelOne, Constants, InternalTag} import io.shiftleft.codepropertygraph.generated.Cpg import io.shiftleft.codepropertygraph.generated.nodes.{Member, TypeDecl} import io.shiftleft.semanticcpg.language._ import org.slf4j.LoggerFactory -import ai.privado.dataflow.{Dataflow} +import ai.privado.dataflow.Dataflow import scala.collection.mutable import scala.collection.mutable.ListBuffer @@ -359,4 +361,133 @@ object DataElementDiscovery { } case class CollectionMethodInfo(var methodDetail: String, var endpoint: String) + +} + +object DataElementDiscoveryJS { + private val logger = LoggerFactory.getLogger(getClass) + def getSourceUsingRules(xtocpg: Try[Cpg]): List[String] = { + logger.info("Process Class Name from cpg") + val classNameList = ListBuffer[String]() + xtocpg match { + case Success(cpg) => { + val typeDeclList = cpg.typeDecl + .filter(_.order > 0) + .where(_.fullName(AuditReportConstants.JS_ELEMENT_DISCOVERY_CLASS_INCLUDE_REGEX)) + .toList + .concat( + cpg.typeDecl + .filter(_.order > 0) + .where(_.fullName(AuditReportConstants.JS_ELEMENT_DISCOVERY_OBJECT_INCLUDE_REGEX)) + .toList + ) + + typeDeclList.foreach(node => { + if (node.fullName.nonEmpty) { + classNameList += node.fullName + } + }) + } + case Failure(exception) => { + println("Failed to process class name from cpg") + logger.debug("Failed to process class name from cpg", exception) + println(exception.printStackTrace()) + } + } + logger.info("Successfully Processed Class Name from cpg") + classNameList.toList + } + + def processDataElementDiscovery(xtocpg: Try[Cpg], taggerCache: TaggerCache): List[List[String]] = { + val classNameRuleList = getSourceUsingRules(xtocpg) + val memberInfo = DataElementDiscovery.getMemberUsingClassName(xtocpg, classNameRuleList.toSet) + val workbookResult = new ListBuffer[List[String]]() + val typeDeclMemberCache = taggerCache.typeDeclMemberCache + + // Stores ClassName --> (MemberName --> SourceRuleID) + val taggedMemberInfo = mutable.HashMap[String, mutable.HashMap[String, String]]() + + // Reverse the mapping to MemberName --> sourceRuleId + typeDeclMemberCache.foreach { case (key, value) => + val reverseMap = mutable.HashMap[String, String]() + value.foreach { case (ruleName, memberSet) => + memberSet.foreach(member => { + reverseMap.put(member.name, ruleName) + }) + } + taggedMemberInfo.put(key, reverseMap) + } + + // Header List + workbookResult += List( + AuditReportConstants.ELEMENT_DISCOVERY_CLASS_NAME, + AuditReportConstants.ELEMENT_DISCOVERY_FILE_NAME, + AuditReportConstants.FILE_PRIORITY_SCORE, + AuditReportConstants.ELEMENT_DISCOVERY_MEMBER_NAME, + AuditReportConstants.ELEMENT_DISCOVERY_MEMBER_TYPE, + AuditReportConstants.ELEMENT_DISCOVERY_TAGGED_NAME, + AuditReportConstants.ELEMENT_DISCOVERY_SOURCE_RULE_ID, + AuditReportConstants.ELEMENT_DISCOVERY_INPUT_COLLECTION, + AuditReportConstants.ELEMENT_DISCOVERY_COLLECTION_ENDPOINT, + AuditReportConstants.ELEMENT_DISCOVERY_METHOD_NAME + ) + + // Construct the excel sheet and fill the data + try { + memberInfo.foreach { + case (key, value) => { + if (taggedMemberInfo.contains(key.fullName)) { + workbookResult += List( + key.name, + key.file.head.name, + getFileScore(key.file.name.headOption.getOrElse(Constants.EMPTY), xtocpg), + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_CHECKED_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE + ) + val ruleMemberInfo = taggedMemberInfo.getOrElse(key.fullName, new mutable.HashMap[String, String]) + value.foreach(member => { + if (ruleMemberInfo.contains(member.name)) { + workbookResult += List( + key.fullName, + key.file.head.name, + getFileScore(key.file.name.headOption.getOrElse(Constants.EMPTY), xtocpg), + member.name, + member.typeFullName, + AuditReportConstants.AUDIT_CHECKED_VALUE, + ruleMemberInfo.getOrElse(member.name, "Default value"), + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE + ) + } else { + workbookResult += List( + key.fullName, + key.file.head.name, + getFileScore(key.file.name.headOption.getOrElse(Constants.EMPTY), xtocpg), + member.name, + member.typeFullName, + AuditReportConstants.AUDIT_NOT_CHECKED_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE, + AuditReportConstants.AUDIT_EMPTY_CELL_VALUE + ) + } + }) + } + } + } + logger.info("Shutting down audit engine") + } catch { + case ex: Exception => + println("Failed to process Data Element Discovery report") + logger.debug("Failed to process Data Element Discovery report", ex) + } + workbookResult.toList + } } diff --git a/src/main/scala/ai/privado/audit/DependencyReport.scala b/src/main/scala/ai/privado/audit/DependencyReport.scala index 86f722632..4ea3b6085 100644 --- a/src/main/scala/ai/privado/audit/DependencyReport.scala +++ b/src/main/scala/ai/privado/audit/DependencyReport.scala @@ -1,10 +1,11 @@ package ai.privado.audit +import ai.privado.audit.DependencyReport.getClass import ai.privado.languageEngine.java.cache.DependencyModuleCache import ai.privado.languageEngine.java.cache.DependencyModuleCache.RuleCategoryInfo import ai.privado.languageEngine.java.language.module.{NodeStarters, StepsForModule} import io.shiftleft.codepropertygraph.generated.Cpg -import io.shiftleft.codepropertygraph.generated.nodes.ModuleDependency +import io.shiftleft.codepropertygraph.generated.nodes.{Dependency, ModuleDependency} import io.shiftleft.semanticcpg.language._ import org.slf4j.LoggerFactory diff --git a/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala index 16ed8eb7d..2b96ff046 100644 --- a/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala @@ -114,7 +114,7 @@ object JavascriptProcessor { if (ScanProcessor.config.generateAuditReport) { ExcelExporter.auditExport( outputAuditFileName, - AuditReportEntryPoint.getAuditWorkbook(), + AuditReportEntryPoint.getAuditWorkbookJS(xtocpg, taggerCache, sourceRepoLocation), sourceRepoLocation ) match { case Left(err) => diff --git a/src/main/scala/ai/privado/languageEngine/python/processor/PythonProcessor.scala b/src/main/scala/ai/privado/languageEngine/python/processor/PythonProcessor.scala index bfe4fd072..f1dd14d8b 100644 --- a/src/main/scala/ai/privado/languageEngine/python/processor/PythonProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/python/processor/PythonProcessor.scala @@ -121,7 +121,7 @@ object PythonProcessor { if (ScanProcessor.config.generateAuditReport) { ExcelExporter.auditExport( outputAuditFileName, - AuditReportEntryPoint.getAuditWorkbook(), + AuditReportEntryPoint.getAuditWorkbookPy(), sourceRepoLocation ) match { case Left(err) =>