From 1ae6f5d4f3dedfbc1ed39820bc495b4e8e040ea7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 18 Sep 2023 12:31:36 +0200 Subject: [PATCH 01/11] Small restructure of IDESolver --- .../DataFlow/IfdsIde/Solver/IDESolver.h | 109 +++++++++--------- 1 file changed, 54 insertions(+), 55 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index ac730de2a..747d7d7d7 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -320,8 +320,8 @@ class IDESolver DEBUG, "Queried Summary Edge Function: " << SumEdgFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f << '\n')); - WorkList.emplace_back(PathEdge(d1, ReturnSiteN, std::move(d3)), - f.composeWith(SumEdgFnE)); + addWorklistItem(d1, ReturnSiteN, std::move(d3), + f.composeWith(SumEdgFnE)); } } } else { @@ -347,8 +347,8 @@ class IDESolver // create initial self-loop PHASAR_LOG_LEVEL( DEBUG, "Create initial self-loop with D: " << DToString(d3)); - WorkList.emplace_back(PathEdge(d3, SP, d3), - EdgeIdentity{}); // line 15 + addWorklistItem(d3, SP, d3, EdgeIdentity{}); // line 15 + // register the fact that has an incoming edge from // line 15.1 of Naeem/Lhotak/Rodriguez addIncoming(SP, d3, n, d2); @@ -418,9 +418,8 @@ class IDESolver d_t d5_restoredCtx = restoreContextOnReturnedFact(n, d2, d5); // propagte the effects of the entire call PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); - WorkList.emplace_back( - PathEdge(d1, RetSiteN, std::move(d5_restoredCtx)), - f.composeWith(fPrime)); + addWorklistItem(d1, RetSiteN, std::move(d5_restoredCtx), + f.composeWith(fPrime)); } } } @@ -453,8 +452,7 @@ class IDESolver auto fPrime = f.composeWith(EdgeFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " << fPrime); - WorkList.emplace_back(PathEdge(d1, ReturnSiteN, std::move(d3)), - std::move(fPrime)); + addWorklistItem(d1, ReturnSiteN, std::move(d3), std::move(fPrime)); } } } @@ -490,8 +488,7 @@ class IDESolver PHASAR_LOG_LEVEL(DEBUG, "Compose: " << g << " * " << f << " = " << fPrime); INC_COUNTER("EF Queries", 1, Full); - WorkList.emplace_back(PathEdge(d1, nPrime, std::move(d3)), - std::move(fPrime)); + addWorklistItem(d1, nPrime, std::move(d3), std::move(fPrime)); } } } @@ -776,8 +773,7 @@ class IDESolver if (!IDEProblem.isZeroValue(Fact)) { INC_COUNTER("Gen facts", 1, Core); } - WorkList.emplace_back(PathEdge(Fact, StartPoint, Fact), - EdgeIdentity{}); + addWorklistItem(Fact, StartPoint, Fact, EdgeIdentity{}); } } } @@ -870,9 +866,9 @@ class IDESolver d_t d3 = ValAndFunc.first; d_t d5_restoredCtx = restoreContextOnReturnedFact(c, d4, d5); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); - WorkList.emplace_back(PathEdge(std::move(d3), RetSiteC, - std::move(d5_restoredCtx)), - f3.composeWith(fPrime)); + addWorklistItem(std::move(d3), RetSiteC, + std::move(d5_restoredCtx), + f3.composeWith(fPrime)); } } } @@ -932,9 +928,8 @@ class IDESolver void propagteUnbalancedReturnFlow(n_t RetSiteC, d_t TargetVal, EdgeFunction EdgeFunc, n_t /*RelatedCallSite*/) { - WorkList.emplace_back( - PathEdge(ZeroValue, std::move(RetSiteC), std::move(TargetVal)), - std::move(EdgeFunc)); + addWorklistItem(ZeroValue, std::move(RetSiteC), std::move(TargetVal), + std::move(EdgeFunc)); } /// This method will be called for each incoming edge and can be used to @@ -1020,31 +1015,8 @@ class IDESolver return RetFlowFunction->computeTargets(d2); } - /// Propagates the flow further down the exploded super graph, merging any - /// edge function that might already have been computed for TargetVal at - /// Target. - /// - /// @param SourceVal the source value of the propagated summary edge - /// @param Target the target statement - /// @param TargetVal the target value at the target statement - /// @param f the new edge function computed from (s0,SourceVal) to - /// (Target,TargetVal) - /// @param relatedCallSite for call and return flows the related call - /// statement, nullptr otherwise (this value is not used within this - /// implementation but may be useful for subclasses of IDESolver) - /// @param isUnbalancedReturn true if this edge is propagating an - /// unbalanced return (this value is not used within this implementation - /// but may be useful for subclasses of {@link IDESolver}) - /// - void propagate(d_t SourceVal, n_t Target, d_t TargetVal, - EdgeFunction f) { - PHASAR_LOG_LEVEL(DEBUG, "Propagate flow"); - PHASAR_LOG_LEVEL(DEBUG, "Source value : " << DToString(SourceVal)); - PHASAR_LOG_LEVEL(DEBUG, "Target : " << NToString(Target)); - PHASAR_LOG_LEVEL(DEBUG, "Target value : " << DToString(TargetVal)); - PHASAR_LOG_LEVEL( - DEBUG, "Edge function : " << f << " (result of previous compose)"); - + bool addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, + EdgeFunction f) { EdgeFunction JumpFnE = [&]() { const auto RevLookupResult = JumpFn->reverseLookup(Target, TargetVal); if (RevLookupResult) { @@ -1074,21 +1046,50 @@ class IDESolver if (NewFunction) { JumpFn->addFunction(SourceVal, Target, TargetVal, fPrime); PathEdge Edge(SourceVal, Target, TargetVal); - PathEdgeCount++; - pathEdgeProcessingTask(std::move(Edge)); + WorkList.push_back(std::move(Edge)); IF_LOG_ENABLED(if (!IDEProblem.isZeroValue(TargetVal)) { - PHASAR_LOG_LEVEL(DEBUG, "EDGE: getFunction()->getName() - << ", D: " << DToString(SourceVal) - << '>'); + PHASAR_LOG_LEVEL(DEBUG, "[addWorklistItem]: EDGE: getFunctionOf(Target)) + << ", D: " << DToString(SourceVal) << '>'); PHASAR_LOG_LEVEL(DEBUG, " ---> '); PHASAR_LOG_LEVEL(DEBUG, ' '); }); } else { - PHASAR_LOG_LEVEL(DEBUG, "PROPAGATE: No new function!"); + PHASAR_LOG_LEVEL(DEBUG, "[addWorklistItem]: No new function!"); } + + return NewFunction; + } + + /// Propagates the flow further down the exploded super graph, merging any + /// edge function that might already have been computed for TargetVal at + /// Target. + /// + /// @param SourceVal the source value of the propagated summary edge + /// @param Target the target statement + /// @param TargetVal the target value at the target statement + /// @param f the new edge function computed from (s0,SourceVal) to + /// (Target,TargetVal) + /// @param relatedCallSite for call and return flows the related call + /// statement, nullptr otherwise (this value is not used within this + /// implementation but may be useful for subclasses of IDESolver) + /// @param isUnbalancedReturn true if this edge is propagating an + /// unbalanced return (this value is not used within this implementation + /// but may be useful for subclasses of {@link IDESolver}) + /// + void propagate(PathEdge Edge) { + const auto &[SourceVal, Target, TargetVal] = Edge.get(); + + PHASAR_LOG_LEVEL(DEBUG, "Propagate flow"); + PHASAR_LOG_LEVEL(DEBUG, "Source value : " << DToString(SourceVal)); + PHASAR_LOG_LEVEL(DEBUG, "Target : " << NToString(Target)); + PHASAR_LOG_LEVEL(DEBUG, "Target value : " << DToString(TargetVal)); + + PathEdgeCount++; + pathEdgeProcessingTask(std::move(Edge)); } l_t joinValueAt(n_t /*Unit*/, d_t /*Fact*/, l_t Curr, l_t NewVal) { @@ -1658,12 +1659,10 @@ class IDESolver bool doNext() { assert(!WorkList.empty()); - auto [Edge, EF] = std::move(WorkList.back()); + auto Edge = std::move(WorkList.back()); WorkList.pop_back(); - auto [SourceVal, Target, TargetVal] = Edge.consume(); - propagate(std::move(SourceVal), std::move(Target), std::move(TargetVal), - std::move(EF)); + propagate(std::move(Edge)); return !WorkList.empty(); } @@ -1705,7 +1704,7 @@ class IDESolver const i_t *ICF; IFDSIDESolverConfig &SolverConfig; - std::vector, EdgeFunction>> WorkList; + std::vector> WorkList; std::vector> ValuePropWL; size_t PathEdgeCount = 0; From a3e88727a28e20447002d9b4e558ffa72dc4d498 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 18 Sep 2023 16:41:17 +0200 Subject: [PATCH 02/11] First draft with strategy --- .../DataFlow/IfdsIde/Solver/EagerIDESolver.h | 1814 +++++++++++++++++ .../DataFlow/IfdsIde/Solver/IDESolver.h | 30 +- .../DataFlow/IfdsIde/Solver/IFDSSolver.h | 27 +- .../DataFlow/IfdsIde/Solver/SolverStrategy.h | 53 + .../OpenSSLEVPKDFCTXDescription.h | 4 +- .../OpenSSLSecureHeapDescription.h | 7 +- .../OpenSSLSecureHeapDescription.cpp | 3 +- .../Problems/IDEGeneralizedLCATest.cpp | 4 +- 8 files changed, 1899 insertions(+), 43 deletions(-) create mode 100644 include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h create mode 100644 include/phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h diff --git a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h new file mode 100644 index 000000000..694f6622a --- /dev/null +++ b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h @@ -0,0 +1,1814 @@ +/****************************************************************************** + * Copyright (c) 2017 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Philipp Schubert and others + *****************************************************************************/ + +/* + * IDESolver.h + * + * Created on: 04.08.2016 + * Author: pdschbrt + */ + +#ifndef PHASAR_DATAFLOW_IFDSIDE_SOLVER_EAGERIDESOLVER_H +#define PHASAR_DATAFLOW_IFDSIDE_SOLVER_EAGERIDESOLVER_H + +#include "phasar/Config/Configuration.h" +#include "phasar/DB/ProjectIRDBBase.h" +#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" +#include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" +#include "phasar/DataFlow/IfdsIde/EdgeFunctions.h" +#include "phasar/DataFlow/IfdsIde/FlowFunctions.h" +#include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" +#include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" +#include "phasar/DataFlow/IfdsIde/InitialSeeds.h" +#include "phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h" +#include "phasar/DataFlow/IfdsIde/Solver/IDESolverAPIMixin.h" +#include "phasar/DataFlow/IfdsIde/Solver/JumpFunctions.h" +#include "phasar/DataFlow/IfdsIde/Solver/PathEdge.h" +#include "phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h" +#include "phasar/DataFlow/IfdsIde/SolverResults.h" +#include "phasar/Domain/AnalysisDomain.h" +#include "phasar/Utils/DOTGraph.h" +#include "phasar/Utils/JoinLattice.h" +#include "phasar/Utils/Logger.h" +#include "phasar/Utils/PAMMMacros.h" +#include "phasar/Utils/Table.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" + +#include "nlohmann/json.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace psr { + +/// Solves the given IDETabulationProblem as described in the 1996 paper by +/// Sagiv, Horwitz and Reps. To solve the problem, call solve(). Results +/// can then be queried by using resultAt() and resultsAt(). +template +class IDESolver + : public IDESolverAPIMixin> { + friend IDESolverAPIMixin>; + +public: + using ProblemTy = IDETabulationProblem; + using container_type = typename ProblemTy::container_type; + using FlowFunctionPtrType = typename ProblemTy::FlowFunctionPtrType; + + using l_t = typename AnalysisDomainTy::l_t; + using n_t = typename AnalysisDomainTy::n_t; + using i_t = typename AnalysisDomainTy::i_t; + using d_t = typename AnalysisDomainTy::d_t; + using f_t = typename AnalysisDomainTy::f_t; + using t_t = typename AnalysisDomainTy::t_t; + using v_t = typename AnalysisDomainTy::v_t; + + IDESolver(IDETabulationProblem &Problem, + const i_t *ICF, PropagateOntoStrategy /*Strategy*/ = {}) + : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), + SolverConfig(Problem.getIFDSIDESolverConfig()), + CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), + JumpFn(std::make_shared>()), + Seeds(Problem.initialSeeds()) { + assert(ICF != nullptr); + } + + IDESolver(const IDESolver &) = delete; + IDESolver &operator=(const IDESolver &) = delete; + IDESolver(IDESolver &&) = delete; + IDESolver &operator=(IDESolver &&) = delete; + + virtual ~IDESolver() = default; + + nlohmann::json getAsJson() { + using TableCell = typename Table::Cell; + const static std::string DataFlowID = "DataFlow"; + nlohmann::json J; + auto Results = this->ValTab.cellSet(); + if (Results.empty()) { + J[DataFlowID] = "EMPTY"; + } else { + std::vector Cells(Results.begin(), Results.end()); + sort(Cells.begin(), Cells.end(), [](TableCell Lhs, TableCell Rhs) { + return Lhs.getRowKey() < Rhs.getRowKey(); + }); + n_t Curr; + for (unsigned I = 0; I < Cells.size(); ++I) { + Curr = Cells[I].getRowKey(); + auto NStr = + llvm::StringRef(NToString(Cells[I].getRowKey())).trim().str(); + + std::string NodeStr = + ICF->getFunctionName(ICF->getFunctionOf(Curr)) + "::" + NStr; + J[DataFlowID][NodeStr]; + std::string FactStr = + llvm::StringRef(DToString(Cells[I].getColumnKey())).trim().str(); + std::string ValueStr = + llvm::StringRef(LToString(Cells[I].getValue())).trim().str(); + J[DataFlowID][NodeStr]["Facts"] += {FactStr, ValueStr}; + } + } + return J; + } + + /// Returns the L-type result for the given value at the given statement. + [[nodiscard]] l_t resultAt(n_t Stmt, d_t Value) { + return getSolverResults().resultAt(Stmt, Value); + } + + /// Returns the L-type result at the given statement for the given data-flow + /// fact while respecting LLVM's SSA semantics. + /// + /// An example: when a value is loaded and the location loaded from, here + /// variable 'i', is a data-flow fact that holds, then the loaded value '%0' + /// will usually be generated and also holds. However, due to the underlying + /// theory (and respective implementation) this load instruction causes the + /// loaded value to be generated and thus, it will be valid only AFTER the + /// load instruction, i.e., at the successor instruction. + /// + /// %0 = load i32, i32* %i, align 4 + /// + /// This result accessor function returns the results at the successor + /// instruction(s) reflecting that the expression on the left-hand side holds + /// if the expression on the right-hand side holds. + template + [[nodiscard]] typename std::enable_if_t< + std::is_same_v, llvm::Instruction *>, l_t> + resultAtInLLVMSSA(NTy Stmt, d_t Value) { + return getSolverResults().resultAtInLLVMSSA(Stmt, Value); + } + + /// Returns the resulting environment for the given statement. + /// The artificial zero value can be automatically stripped. + /// TOP values are never returned. + [[nodiscard]] virtual std::unordered_map + resultsAt(n_t Stmt, bool StripZero = false) /*TODO const*/ { + return getSolverResults().resultsAt(Stmt, StripZero); + } + + /// Returns the data-flow results at the given statement while respecting + /// LLVM's SSA semantics. + /// + /// An example: when a value is loaded and the location loaded from, here + /// variable 'i', is a data-flow fact that holds, then the loaded value '%0' + /// will usually be generated and also holds. However, due to the underlying + /// theory (and respective implementation) this load instruction causes the + /// loaded value to be generated and thus, it will be valid only AFTER the + /// load instruction, i.e., at the successor instruction. + /// + /// %0 = load i32, i32* %i, align 4 + /// + /// This result accessor function returns the results at the successor + /// instruction(s) reflecting that the expression on the left-hand side holds + /// if the expression on the right-hand side holds. + template + [[nodiscard]] typename std::enable_if_t< + std::is_same_v, llvm::Instruction *>, + std::unordered_map> + resultsAtInLLVMSSA(NTy Stmt, bool StripZero = false) { + return getSolverResults().resultsAtInLLVMSSA(Stmt, StripZero); + } + + virtual void emitTextReport(llvm::raw_ostream &OS = llvm::outs()) { + IDEProblem.emitTextReport(getSolverResults(), OS); + } + + virtual void emitGraphicalReport(llvm::raw_ostream &OS = llvm::outs()) { + IDEProblem.emitGraphicalReport(getSolverResults(), OS); + } + + void dumpResults(llvm::raw_ostream &OS = llvm::outs()) { + getSolverResults().dumpResults(*ICF, OS); + } + + void dumpAllInterPathEdges() { + llvm::outs() << "COMPUTED INTER PATH EDGES" << '\n'; + auto Interpe = this->computedInterPathEdges.cellSet(); + for (const auto &Cell : Interpe) { + llvm::outs() << "FROM" << '\n'; + IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); + llvm::outs() << "TO" << '\n'; + IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); + llvm::outs() << "FACTS" << '\n'; + for (const auto &Fact : Cell.getValue()) { + llvm::outs() << "fact" << '\n'; + IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); + llvm::outs() << "produces" << '\n'; + for (const auto &Out : Fact.second) { + IDEProblem.printDataFlowFact(llvm::outs(), Out); + } + } + } + } + + void dumpAllIntraPathEdges() { + llvm::outs() << "COMPUTED INTRA PATH EDGES" << '\n'; + auto Intrape = this->computedIntraPathEdges.cellSet(); + for (auto &Cell : Intrape) { + llvm::outs() << "FROM" << '\n'; + IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); + llvm::outs() << "TO" << '\n'; + IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); + llvm::outs() << "FACTS" << '\n'; + for (auto &Fact : Cell.getValue()) { + llvm::outs() << "fact" << '\n'; + IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); + llvm::outs() << "produces" << '\n'; + for (auto &Out : Fact.second) { + IDEProblem.printDataFlowFact(llvm::outs(), Out); + } + } + } + } + + /// Returns a view into the computed solver-results. + /// + /// NOTE: The SolverResults store a reference into this IDESolver, so its + /// lifetime is also bound to the lifetime of this solver. If you want to use + /// the solverResults beyond the lifetime of this solver, use + /// comsumeSolverResults() instead. + [[nodiscard]] SolverResults getSolverResults() noexcept { + return SolverResults(this->ValTab, ZeroValue); + } + + /// Moves the computed solver-results out of this solver such that the solver + /// can be destroyed without that the analysis results are lost. + /// Do not call any function (including getSolverResults()) on this IDESolver + /// instance after that. + [[nodiscard]] OwningSolverResults + consumeSolverResults() noexcept(std::is_nothrow_move_constructible_v) { + return OwningSolverResults(std::move(this->ValTab), + std::move(ZeroValue)); + } + +protected: + void addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, + EdgeFunction EF) { + WorkList.emplace_back(PathEdge{std::move( + SourceVal, std::move(Target), std::move(TargetVal), std::move(EF))}); + } + + bool updateJumpFunction(d_t SourceVal, n_t Target, d_t TargetVal, + EdgeFunction *f) { + EdgeFunction JumpFnE = [&]() { + const auto RevLookupResult = JumpFn->reverseLookup(Target, TargetVal); + if (RevLookupResult) { + const auto &JumpFnContainer = RevLookupResult->get(); + const auto Find = std::find_if( + JumpFnContainer.begin(), JumpFnContainer.end(), + [SourceVal](auto &KVpair) { return KVpair.first == SourceVal; }); + if (Find != JumpFnContainer.end()) { + return Find->second; + } + } + // jump function is initialized to all-top if no entry + // was found + return AllTop; + }(); + + EdgeFunction fPrime = JumpFnE.joinWith(*f); + bool NewFunction = fPrime != JumpFnE; + if (NewFunction) { + *f = fPrime; + JumpFn->addFunction(std::move(SourceVal), std::move(Target), + std::move(TargetVal), std::move(fPrime)); + + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL( + DEBUG, "Join: " << JumpFnE << " & " << f + << (JumpFnE == f ? " (EF's are equal)" : " ")); + PHASAR_LOG_LEVEL( + DEBUG, " = " << f << (NewFunction ? " (new jump func)" : " ")); + PHASAR_LOG_LEVEL(DEBUG, ' ')); + + IF_LOG_ENABLED(if (!IDEProblem.isZeroValue(TargetVal)) { + PHASAR_LOG_LEVEL(DEBUG, "EDGE: getFunctionOf(Target)) + << ", D: " << DToString(SourceVal) << '>'); + PHASAR_LOG_LEVEL(DEBUG, " ---> '); + PHASAR_LOG_LEVEL(DEBUG, ' '); + }); + } + return NewFunction; + } + + void updateWithNewEdge(llvm::SmallDenseSet &UpdatedFacts, d_t SourceVal, + n_t OldTarget, n_t NewTarget, d_t TargetVal, + EdgeFunction EF) { + if (updateJumpFunction(SourceVal, OldTarget, TargetVal, &EF)) { + UpdatedFacts.insert(TargetVal); + addWorklistItem(SourceVal, NewTarget, std::move(TargetVal), + std::move(EF)); + } else if (UpdatedFacts.contains(TargetVal)) { + addWorklistItem(SourceVal, NewTarget, std::move(TargetVal), + std::move(EF)); + } + } + + /// Lines 13-20 of the algorithm; processing a call site in the caller's + /// context. + /// + /// For each possible callee, registers incoming call edges. + /// Also propagates call-to-return flows and summarized callee flows within + /// the caller. + /// + /// The following cases must be considered and handled: + /// 1. Process as usual and just process the call + /// 2. Create a new summary for that function (which shall be done + /// by the problem) + /// 3. Just use an existing summary provided by the problem + /// 4. If a special function is called, use a special summary + /// function + /// + /// @param edge an edge whose target node resembles a method call + /// + virtual void processCall(PathEdge Edge, EdgeFunction f) { + PAMM_GET_INSTANCE; + INC_COUNTER("Process Call", 1, Full); + PHASAR_LOG_LEVEL(DEBUG, + "Process call at target: " << NToString(Edge.getTarget())); + d_t d1 = Edge.factAtSource(); + n_t n = Edge.getTarget(); + // a call node; line 14... + d_t d2 = Edge.factAtTarget(); + // EdgeFunction f = jumpFunction(Edge); + const auto &ReturnSiteNs = ICF->getReturnSitesOfCallAt(n); + const auto &Callees = ICF->getCalleesOfCallAt(n); + + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL(DEBUG, "Possible callees:"); for (auto Callee + : Callees) { + PHASAR_LOG_LEVEL(DEBUG, " " << Callee->getName()); + } PHASAR_LOG_LEVEL(DEBUG, "Possible return sites:"); + for (auto ret + : ReturnSiteNs) { + PHASAR_LOG_LEVEL(DEBUG, " " << NToString(ret)); + }); + + // The facts that are updated for the return-site + llvm::SmallDenseSet UpdatedFacts; + + // for each possible callee + for (f_t SCalledProcN : Callees) { // still line 14 + // check if a special summary for the called procedure exists + FlowFunctionPtrType SpecialSum = + CachedFlowEdgeFunctions.getSummaryFlowFunction(n, SCalledProcN); + // if a special summary is available, treat this as a normal flow + // and use the summary flow and edge functions + if (SpecialSum) { + PHASAR_LOG_LEVEL(DEBUG, "Found and process special summary"); + for (n_t ReturnSiteN : ReturnSiteNs) { + container_type Res = computeSummaryFlowFunction(SpecialSum, d1, d2); + INC_COUNTER("SpecialSummary-FF Application", 1, Full); + ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); + saveEdges(n, ReturnSiteN, d2, Res, false); + for (d_t d3 : Res) { + EdgeFunction SumEdgFnE = + CachedFlowEdgeFunctions.getSummaryEdgeFunction(n, d2, + ReturnSiteN, d3); + INC_COUNTER("SpecialSummary-EF Queries", 1, Full); + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL( + DEBUG, "Queried Summary Edge Function: " << SumEdgFnE); + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f + << '\n')); + + updateWithNewEdge(UpdatedFacts, d1, n, ReturnSiteN, std::move(d3), + f.composeWith(SumEdgFnE)); + } + } + } else { + // compute the call-flow function + FlowFunctionPtrType Function = + CachedFlowEdgeFunctions.getCallFlowFunction(n, SCalledProcN); + INC_COUNTER("FF Queries", 1, Full); + container_type Res = computeCallFlowFunction(Function, d1, d2); + ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); + // for each callee's start point(s) + auto StartPointsOf = ICF->getStartPointsOf(SCalledProcN); + if (StartPointsOf.empty()) { + PHASAR_LOG_LEVEL(DEBUG, "Start points of '" + + ICF->getFunctionName(SCalledProcN) + + "' currently not available!"); + } + // if startPointsOf is empty, the called function is a declaration + for (n_t SP : StartPointsOf) { + saveEdges(n, SP, d2, Res, true); + // for each result node of the call-flow function + for (d_t d3 : Res) { + using TableCell = typename Table>::Cell; + + const auto &SummaryEntries = endSummary(SP, d3); + + // register the fact that has an incoming edge from + // line 15.1 of Naeem/Lhotak/Rodriguez + addIncoming(SP, d3, n, d2); + + if (SummaryEntries.empty()) { + // create initial self-loop + PHASAR_LOG_LEVEL( + DEBUG, "Create initial self-loop with D: " << DToString(d3)); + addWorklistItem(d3, SP, d3, EdgeIdentity{}); // line 15 + continue; + } + // line 15.2, copy to avoid concurrent modification exceptions by + // other threads + // const std::set endSumm(endSummary(sP, d3)); + // llvm::outs() << "ENDSUMM" << '\n'; + // llvm::outs() << "Size: " << endSumm.size() << '\n'; + // llvm::outs() << "sP: " << NToString(sP) + // << "\nd3: " << DToString(d3) + // << '\n'; + // printEndSummaryTab(); + // still line 15.2 of Naeem/Lhotak/Rodriguez + // for each already-queried exit value reachable from + // , create new caller-side jump functions to the return + // sites because we have observed a potentially new incoming + // edge into + for (const TableCell &Entry : SummaryEntries) { + const n_t &eP = Entry.getRowKey(); + const d_t &d4 = Entry.getColumnKey(); + const EdgeFunction &fCalleeSummary = Entry.getValue(); + // for each return site + for (n_t RetSiteN : ReturnSiteNs) { + // compute return-flow function + FlowFunctionPtrType RetFunction = + CachedFlowEdgeFunctions.getRetFlowFunction(n, SCalledProcN, + eP, RetSiteN); + INC_COUNTER("FF Queries", 1, Full); + const container_type ReturnedFacts = computeReturnFlowFunction( + RetFunction, d3, d4, n, Container{d2}); + ADD_TO_HISTOGRAM("Data-flow facts", ReturnedFacts.size(), 1, + Full); + saveEdges(eP, RetSiteN, d4, ReturnedFacts, true); + // for each target value of the function + for (d_t d5 : ReturnedFacts) { + // update the caller-side summary function + // get call edge function + EdgeFunction f4 = + CachedFlowEdgeFunctions.getCallEdgeFunction( + n, d2, SCalledProcN, d3); + PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << f4); + // get return edge function + EdgeFunction f5 = + CachedFlowEdgeFunctions.getReturnEdgeFunction( + n, SCalledProcN, eP, d4, RetSiteN, d5); + PHASAR_LOG_LEVEL(DEBUG, + "Queried Return Edge Function: " << f5); + if (SolverConfig.emitESG()) { + for (auto SP : ICF->getStartPointsOf(SCalledProcN)) { + IntermediateEdgeFunctions[std::make_tuple(n, d2, SP, d3)] + .push_back(f4); + } + IntermediateEdgeFunctions[std::make_tuple(eP, d4, RetSiteN, + d5)] + .push_back(f5); + } + INC_COUNTER("EF Queries", 2, Full); + // compose call * calleeSummary * return edge functions + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " + << fCalleeSummary << " * " + << f4); + PHASAR_LOG_LEVEL(DEBUG, + " (return * calleeSummary * call)"); + EdgeFunction fPrime = + f4.composeWith(fCalleeSummary).composeWith(f5); + PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); + d_t d5_restoredCtx = restoreContextOnReturnedFact(n, d2, d5); + // propagte the effects of the entire call + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); + updateWithNewEdge(UpdatedFacts, d1, n, RetSiteN, + std::move(d5_restoredCtx), + f.composeWith(fPrime)); + } + } + } + } + } + } + } + // line 17-19 of Naeem/Lhotak/Rodriguez + // process intra-procedural flows along call-to-return flow functions + for (n_t ReturnSiteN : ReturnSiteNs) { + FlowFunctionPtrType CallToReturnFF = + CachedFlowEdgeFunctions.getCallToRetFlowFunction(n, ReturnSiteN, + Callees); + INC_COUNTER("FF Queries", 1, Full); + container_type ReturnFacts = + computeCallToReturnFlowFunction(CallToReturnFF, d1, d2); + ADD_TO_HISTOGRAM("Data-flow facts", ReturnFacts.size(), 1, Full); + saveEdges(n, ReturnSiteN, d2, ReturnFacts, false); + for (d_t d3 : ReturnFacts) { + EdgeFunction EdgeFnE = + CachedFlowEdgeFunctions.getCallToRetEdgeFunction(n, d2, ReturnSiteN, + d3, Callees); + PHASAR_LOG_LEVEL(DEBUG, + "Queried Call-to-Return Edge Function: " << EdgeFnE); + if (SolverConfig.emitESG()) { + IntermediateEdgeFunctions[std::make_tuple(n, d2, ReturnSiteN, d3)] + .push_back(EdgeFnE); + } + INC_COUNTER("EF Queries", 1, Full); + auto fPrime = f.composeWith(EdgeFnE); + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " + << fPrime); + updateWithNewEdge(UpdatedFacts, d1, n, ReturnSiteN, std::move(d3), + std::move(fPrime)); + } + } + } + + /// Lines 33-37 of the algorithm. + /// Simply propagate normal, intra-procedural flows. + /// @param edge + /// + virtual void processNormalFlow(PathEdge Edge, EdgeFunction f) { + PAMM_GET_INSTANCE; + INC_COUNTER("Process Normal", 1, Full); + PHASAR_LOG_LEVEL( + DEBUG, "Process normal at target: " << NToString(Edge.getTarget())); + // EdgeFunction f = jumpFunction(Edge); + auto [d1, n, d2] = Edge.consume(); + + const auto &Succs = ICF->getSuccsOf(n); + + llvm::SmallDenseSet UpdatedFacts; + + for (const auto &nPrime : Succs) { + FlowFunctionPtrType FlowFunc = + CachedFlowEdgeFunctions.getNormalFlowFunction(n, nPrime); + INC_COUNTER("FF Queries", 1, Full); + const container_type Res = computeNormalFlowFunction(FlowFunc, d1, d2); + ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); + saveEdges(n, nPrime, d2, Res, false); + for (d_t d3 : Res) { + EdgeFunction g = + CachedFlowEdgeFunctions.getNormalEdgeFunction(n, d2, nPrime, d3); + PHASAR_LOG_LEVEL(DEBUG, "Queried Normal Edge Function: " << g); + EdgeFunction fPrime = f.composeWith(g); + if (SolverConfig.emitESG()) { + IntermediateEdgeFunctions[std::make_tuple(n, d2, nPrime, d3)] + .push_back(g); + } + PHASAR_LOG_LEVEL(DEBUG, + "Compose: " << g << " * " << f << " = " << fPrime); + INC_COUNTER("EF Queries", 1, Full); + + updateWithNewEdge(UpdatedFacts, d1, n, nPrime, std::move(d3), + std::move(fPrime)); + } + } + } + + void propagateValueAtStart(const std::pair NAndD, n_t Stmt) { + PAMM_GET_INSTANCE; + d_t Fact = NAndD.second; + f_t Func = ICF->getFunctionOf(Stmt); + for (const n_t CallSite : ICF->getCallsFromWithin(Func)) { + auto LookupResults = JumpFn->forwardLookup(Fact, CallSite); + if (!LookupResults) { + continue; + } + for (size_t I = 0; I < LookupResults->get().size(); ++I) { + auto Entry = LookupResults->get()[I]; + d_t dPrime = Entry.first; + auto fPrime = Entry.second; + n_t SP = Stmt; + l_t Val = val(SP, Fact); + INC_COUNTER("Value Propagation", 1, Full); + propagateValue(CallSite, dPrime, fPrime.computeTarget(Val)); + } + } + } + + void propagateValueAtCall(const std::pair NAndD, n_t Stmt) { + PAMM_GET_INSTANCE; + d_t Fact = NAndD.second; + for (const f_t Callee : ICF->getCalleesOfCallAt(Stmt)) { + FlowFunctionPtrType CallFlowFunction = + CachedFlowEdgeFunctions.getCallFlowFunction(Stmt, Callee); + INC_COUNTER("FF Queries", 1, Full); + for (const d_t dPrime : CallFlowFunction->computeTargets(Fact)) { + EdgeFunction EdgeFn = CachedFlowEdgeFunctions.getCallEdgeFunction( + Stmt, Fact, Callee, dPrime); + PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << EdgeFn); + if (SolverConfig.emitESG()) { + for (const auto SP : ICF->getStartPointsOf(Callee)) { + IntermediateEdgeFunctions[std::make_tuple(Stmt, Fact, SP, dPrime)] + .push_back(EdgeFn); + } + } + INC_COUNTER("EF Queries", 1, Full); + for (const n_t StartPoint : ICF->getStartPointsOf(Callee)) { + INC_COUNTER("Value Propagation", 1, Full); + propagateValue(StartPoint, dPrime, + EdgeFn.computeTarget(val(Stmt, Fact))); + } + } + } + } + + void propagateValue(n_t NHashN, d_t NHashD, const l_t &L) { + l_t ValNHash = val(NHashN, NHashD); + l_t LPrime = joinValueAt(NHashN, NHashD, ValNHash, L); + if (!(LPrime == ValNHash)) { + setVal(NHashN, NHashD, std::move(LPrime)); + ValuePropWL.emplace_back(std::move(NHashN), std::move(NHashD)); + } + } + + l_t val(n_t NHashN, d_t NHashD) { + if (ValTab.contains(NHashN, NHashD)) { + return ValTab.get(NHashN, NHashD); + } + // implicitly initialized to top; see line [1] of Fig. 7 in SRH96 paper + return IDEProblem.topElement(); + } + + void setVal(n_t NHashN, d_t NHashD, l_t L) { + IF_LOG_ENABLED({ + PHASAR_LOG_LEVEL(DEBUG, + "Function : " << ICF->getFunctionOf(NHashN)->getName()); + PHASAR_LOG_LEVEL(DEBUG, "Inst. : " << NToString(NHashN)); + PHASAR_LOG_LEVEL(DEBUG, "Fact : " << DToString(NHashD)); + PHASAR_LOG_LEVEL(DEBUG, "Value : " << LToString(L)); + PHASAR_LOG_LEVEL(DEBUG, ' '); + }); + // TOP is the implicit default value which we do not need to store. + // if (l == IDEProblem.topElement()) { + // do not store top values + // ValTab.remove(nHashN, nHashD); + // } else { + ValTab.insert(NHashN, NHashD, std::move(L)); + // } + } + + EdgeFunction jumpFunction(const PathEdge Edge) { + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL(DEBUG, "JumpFunctions Forward-Lookup:"); + PHASAR_LOG_LEVEL(DEBUG, + " Source D: " << DToString(Edge.factAtSource())); + PHASAR_LOG_LEVEL(DEBUG, " Target N: " << NToString(Edge.getTarget())); + PHASAR_LOG_LEVEL(DEBUG, + " Target D: " << DToString(Edge.factAtTarget()))); + + auto FwdLookupRes = + JumpFn->forwardLookup(Edge.factAtSource(), Edge.getTarget()); + if (FwdLookupRes) { + auto &Ref = FwdLookupRes->get(); + if (auto Find = std::find_if(Ref.begin(), Ref.end(), + [Edge](const auto &Pair) { + return Edge.factAtTarget() == Pair.first; + }); + Find != Ref.end()) { + PHASAR_LOG_LEVEL(DEBUG, " => EdgeFn: " << Find->second); + return Find->second; + } + } + PHASAR_LOG_LEVEL(DEBUG, " => EdgeFn: " << AllTop); + // JumpFn initialized to all-top, see line [2] in SRH96 paper + return AllTop; + } + + void addEndSummary(n_t SP, d_t d1, n_t eP, d_t d2, EdgeFunction f) { + // note: at this point we don't need to join with a potential previous f + // because f is a jump function, which is already properly joined + // within propagate(..) + EndsummaryTab.get(SP, d1).insert(eP, d2, std::move(f)); + } + + // should be made a callable at some point + void pathEdgeProcessingTask(PathEdge Edge, EdgeFunction EF) { + PAMM_GET_INSTANCE; + INC_COUNTER("JumpFn Construction", 1, Full); + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL( + DEBUG, + "-------------------------------------------- " + << PathEdgeCount + << ". Path Edge --------------------------------------------"); + PHASAR_LOG_LEVEL(DEBUG, ' '); + PHASAR_LOG_LEVEL(DEBUG, "Process " << PathEdgeCount << ". path edge:"); + PHASAR_LOG_LEVEL(DEBUG, "< D source: " << DToString(Edge.factAtSource()) + << " ;"); + PHASAR_LOG_LEVEL(DEBUG, + " N target: " << NToString(Edge.getTarget()) << " ;"); + PHASAR_LOG_LEVEL(DEBUG, " D target: " << DToString(Edge.factAtTarget()) + << " >"); + PHASAR_LOG_LEVEL(DEBUG, ' ')); + + if (!ICF->isCallSite(Edge.getTarget())) { + if (ICF->isExitInst(Edge.getTarget())) { + processExit(Edge, EF); + } + if (!ICF->getSuccsOf(Edge.getTarget()).empty()) { + processNormalFlow(std::move(Edge), std::move(EF)); + } + } else { + processCall(std::move(Edge), std::move(EF)); + } + } + + // should be made a callable at some point + void valuePropagationTask(std::pair NAndD) { + n_t n = NAndD.first; + // our initial seeds are not necessarily method-start points but here they + // should be treated as such the same also for unbalanced return sites in + // an unbalanced problem + if (ICF->isStartPoint(n) || Seeds.containsInitialSeedsFor(n) || + UnbalancedRetSites.count(n)) { + // FIXME: is currently not executed for main!!! + // initial seeds are set in the global constructor, and main is also not + // officially called by any other function + propagateValueAtStart(NAndD, n); + } + if (ICF->isCallSite(n)) { + propagateValueAtCall(NAndD, n); + } + } + + // should be made a callable at some point + void valueComputationTask(const std::vector &Values) { + PAMM_GET_INSTANCE; + for (n_t n : Values) { + for (n_t SP : ICF->getStartPointsOf(ICF->getFunctionOf(n))) { + using TableCell = typename Table>::Cell; + Table> &LookupByTarget = + JumpFn->lookupByTarget(n); + for (const TableCell &SourceValTargetValAndFunction : + LookupByTarget.cellSet()) { + d_t dPrime = SourceValTargetValAndFunction.getRowKey(); + d_t d = SourceValTargetValAndFunction.getColumnKey(); + EdgeFunction fPrime = SourceValTargetValAndFunction.getValue(); + l_t TargetVal = val(SP, dPrime); + setVal(n, d, + IDEProblem.join(val(n, d), + fPrime.computeTarget(std::move(TargetVal)))); + INC_COUNTER("Value Computation", 1, Full); + } + } + } + } + + virtual void saveEdges(n_t SourceNode, n_t SinkStmt, d_t SourceVal, + const container_type &DestVals, bool InterP) { + if (!SolverConfig.recordEdges()) { + return; + } + Table> &TgtMap = + (InterP) ? ComputedInterPathEdges : ComputedIntraPathEdges; + TgtMap.get(SourceNode, SinkStmt)[SourceVal].insert(DestVals.begin(), + DestVals.end()); + } + + void submitInitialValues() { + std::map> AllSeeds = Seeds.getSeeds(); + for (n_t UnbalancedRetSite : UnbalancedRetSites) { + if (AllSeeds.find(UnbalancedRetSite) == AllSeeds.end()) { + AllSeeds[UnbalancedRetSite][ZeroValue] = IDEProblem.topElement(); + } + } + // do processing + for (const auto &[StartPoint, Facts] : AllSeeds) { + for (auto &[Fact, Value] : Facts) { + PHASAR_LOG_LEVEL(DEBUG, "set initial seed at: " + << NToString(StartPoint) + << ", fact: " << DToString(Fact) + << ", value: " << LToString(Value)); + // initialize the initial seeds with the top element as we have no + // information at the beginning of the value computation problem + setVal(StartPoint, Fact, Value); + std::pair SuperGraphNode(StartPoint, Fact); + valuePropagationTask(std::move(SuperGraphNode)); + } + } + } + + /// Computes the final values for edge functions. + void computeValues() { + PHASAR_LOG_LEVEL(DEBUG, "Start computing values"); + // Phase II(i) + submitInitialValues(); + while (!ValuePropWL.empty()) { + auto NAndD = std::move(ValuePropWL.back()); + ValuePropWL.pop_back(); + valuePropagationTask(std::move(NAndD)); + } + + // Phase II(ii) + // we create an array of all nodes and then dispatch fractions of this + // array to multiple threads + const auto AllNonCallStartNodes = ICF->allNonCallStartNodes(); + valueComputationTask(AllNonCallStartNodes); + } + + /// Schedules the processing of initial seeds, initiating the analysis. + /// Clients should only call this methods if performing synchronization on + /// their own. Normally, solve() should be called instead. + void submitInitialSeeds() { + PAMM_GET_INSTANCE; + // Check if the initial seeds contain the zero value at every starting + // point. If not, the zero value needs to be added to allow for correct + // solving of the problem. + for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { + if (Facts.find(ZeroValue) == Facts.end()) { + // Add zero value if it's not in the set of facts. + PHASAR_LOG_LEVEL( + DEBUG, "Zero-Value has been added automatically to start point: " + << NToString(StartPoint)); + Seeds.addSeed(StartPoint, ZeroValue, IDEProblem.bottomElement()); + } + } + PHASAR_LOG_LEVEL(DEBUG, + "Number of initial seeds: " << Seeds.countInitialSeeds()); + PHASAR_LOG_LEVEL(DEBUG, "List of initial seeds: "); + for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { + PHASAR_LOG_LEVEL(DEBUG, "Start point: " << NToString(StartPoint)); + /// If statically disabling the logger, Fact and Value are unused. To + /// prevent the copilation to fail with -Werror, add the [[maybe_unused]] + /// attribute + for ([[maybe_unused]] const auto &[Fact, Value] : Facts) { + PHASAR_LOG_LEVEL(DEBUG, "\tFact: " << DToString(Fact)); + PHASAR_LOG_LEVEL(DEBUG, "\tValue: " << LToString(Value)); + } + } + for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { + for (const auto &[Fact, Value] : Facts) { + PHASAR_LOG_LEVEL(DEBUG, "Submit seed at: " << NToString(StartPoint)); + PHASAR_LOG_LEVEL(DEBUG, "\tFact: " << DToString(Fact)); + PHASAR_LOG_LEVEL(DEBUG, "\tValue: " << LToString(Value)); + if (!IDEProblem.isZeroValue(Fact)) { + INC_COUNTER("Gen facts", 1, Core); + } + + /// TODO: Do we have to add EdgeIdentity to the JF-table in advance? + /// Probably not + WorkList.emplace_back(Fact, StartPoint, Fact); + } + } + } + + /// Lines 21-32 of the algorithm. + /// + /// Stores callee-side summaries. + /// Also, at the side of the caller, propagates intra-procedural flows to + /// return sites using those newly computed summaries. + /// + /// @param edge an edge whose target node resembles a method exit + /// + virtual void processExit(PathEdge Edge, EdgeFunction f) { + PAMM_GET_INSTANCE; + INC_COUNTER("Process Exit", 1, Full); + PHASAR_LOG_LEVEL(DEBUG, + "Process exit at target: " << NToString(Edge.getTarget())); + n_t n = Edge.getTarget(); // an exit node; line 21... + + // EdgeFunction f = jumpFunction(Edge); + f_t FunctionThatNeedsSummary = ICF->getFunctionOf(n); + d_t d1 = Edge.factAtSource(); + d_t d2 = Edge.factAtTarget(); + // for each of the method's start points, determine incoming calls + const auto StartPointsOf = ICF->getStartPointsOf(FunctionThatNeedsSummary); + std::map Inc; + for (n_t SP : StartPointsOf) { + // line 21.1 of Naeem/Lhotak/Rodriguez + // register end-summary + addEndSummary(SP, d1, n, d2, f); + for (const auto &Entry : incoming(d1, SP)) { + Inc[Entry.first] = Container{Entry.second}; + } + } + printEndSummaryTab(); + printIncomingTab(); + // for each incoming call edge already processed + //(see processCall(..)) + for (const auto &Entry : Inc) { + // line 22 + n_t c = Entry.first; + + llvm::SmallDenseSet UpdatedFacts; + + // for each return site + for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(c)) { + // compute return-flow function + FlowFunctionPtrType RetFunction = + CachedFlowEdgeFunctions.getRetFlowFunction( + c, FunctionThatNeedsSummary, n, RetSiteC); + INC_COUNTER("FF Queries", 1, Full); + // for each incoming-call value + for (d_t d4 : Entry.second) { + const container_type Targets = + computeReturnFlowFunction(RetFunction, d1, d2, c, Entry.second); + ADD_TO_HISTOGRAM("Data-flow facts", Targets.size(), 1, Full); + saveEdges(n, RetSiteC, d2, Targets, true); + // for each target value at the return site + // line 23 + for (d_t d5 : Targets) { + // compute composed function + // get call edge function + EdgeFunction f4 = CachedFlowEdgeFunctions.getCallEdgeFunction( + c, d4, ICF->getFunctionOf(n), d1); + PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << f4); + // get return edge function + EdgeFunction f5 = + CachedFlowEdgeFunctions.getReturnEdgeFunction( + c, ICF->getFunctionOf(n), n, d2, RetSiteC, d5); + PHASAR_LOG_LEVEL(DEBUG, "Queried Return Edge Function: " << f5); + if (SolverConfig.emitESG()) { + for (auto SP : ICF->getStartPointsOf(ICF->getFunctionOf(n))) { + IntermediateEdgeFunctions[std::make_tuple(c, d4, SP, d1)] + .push_back(f4); + } + IntermediateEdgeFunctions[std::make_tuple(n, d2, RetSiteC, d5)] + .push_back(f5); + } + INC_COUNTER("EF Queries", 2, Full); + // compose call function * function * return function + PHASAR_LOG_LEVEL(DEBUG, + "Compose: " << f5 << " * " << f << " * " << f4); + PHASAR_LOG_LEVEL(DEBUG, " (return * function * call)"); + EdgeFunction fPrime = f4.composeWith(f).composeWith(f5); + PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); + // for each jump function coming into the call, propagate to + // return site using the composed function + auto RevLookupResult = JumpFn->reverseLookup(c, d4); + if (RevLookupResult) { + for (size_t I = 0; I < RevLookupResult->get().size(); ++I) { + auto ValAndFunc = RevLookupResult->get()[I]; + EdgeFunction f3 = ValAndFunc.second; + if (f3 != AllTop) { + d_t d3 = ValAndFunc.first; + d_t d5_restoredCtx = restoreContextOnReturnedFact(c, d4, d5); + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); + if (updateJumpFunction(d3, c, d5_restoredCtx, + f3.composeWith(fPrime))) { + UpdatedFacts.insert(d5_restoredCtx); + WorkList.emplace_back(std::move(d3), RetSiteC, + std::move(d5_restoredCtx)); + } else if (UpdatedFacts.contains(d5_restoredCtx)) { + WorkList.emplace_back(std::move(d3), RetSiteC, + std::move(d5_restoredCtx)); + } + } + } + } + } + } + } + } + // handling for unbalanced problems where we return out of a method with a + // fact for which we have no incoming flow. + // note: we propagate that way only values that originate from ZERO, as + // conditionally generated values should only + // be propagated into callers that have an incoming edge for this + // condition + /// TODO: Add a check for "d1 is seed in functionOf(n)" + if (SolverConfig.followReturnsPastSeeds() && Inc.empty() /*&& + IDEProblem.isZeroValue(d1)*/) { + const auto &Callers = ICF->getCallersOf(FunctionThatNeedsSummary); + for (n_t Caller : Callers) { + for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(Caller)) { + FlowFunctionPtrType RetFunction = + CachedFlowEdgeFunctions.getRetFlowFunction( + Caller, FunctionThatNeedsSummary, n, RetSiteC); + INC_COUNTER("FF Queries", 1, Full); + const container_type Targets = computeReturnFlowFunction( + RetFunction, d1, d2, Caller, Container{ZeroValue}); + ADD_TO_HISTOGRAM("Data-flow facts", Targets.size(), 1, Full); + saveEdges(n, RetSiteC, d2, Targets, true); + for (d_t d5 : Targets) { + EdgeFunction f5 = + CachedFlowEdgeFunctions.getReturnEdgeFunction( + Caller, ICF->getFunctionOf(n), n, d2, RetSiteC, d5); + PHASAR_LOG_LEVEL(DEBUG, "Queried Return Edge Function: " << f5); + if (SolverConfig.emitESG()) { + IntermediateEdgeFunctions[std::make_tuple(n, d2, RetSiteC, d5)] + .push_back(f5); + } + INC_COUNTER("EF Queries", 1, Full); + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " << f); + propagteUnbalancedReturnFlow(RetSiteC, d5, f.composeWith(f5), + Caller); + // register for value processing (2nd IDE phase) + UnbalancedRetSites.insert(RetSiteC); + } + } + } + // in cases where there are no callers, the return statement would + // normally not be processed at all; this might be undesirable if + // the flow function has a side effect such as registering a taint; + // instead we thus call the return flow function will a null caller + if (Callers.empty()) { + IDEProblem.applyUnbalancedRetFlowFunctionSideEffects( + FunctionThatNeedsSummary, n, d2); + } + } + } + + void propagteUnbalancedReturnFlow(n_t RetSiteC, d_t TargetVal, + EdgeFunction EdgeFunc, + n_t /*RelatedCallSite*/) { + WorkList.emplace_back( + PathEdge{ZeroValue, std::move(RetSiteC), std::move(TargetVal)}, + std::move(EdgeFunc)); + } + + /// This method will be called for each incoming edge and can be used to + /// transfer knowledge from the calling edge to the returning edge, without + /// affecting the summary edges at the callee. + /// @param callSite + /// + /// @param d4 + /// Fact stored with the incoming edge, i.e., present at the + /// caller side + /// @param d5 + /// Fact that originally should be propagated to the caller. + /// @return Fact that will be propagated to the caller. + /// + d_t restoreContextOnReturnedFact(n_t /*CallSite*/, d_t /*d4*/, d_t d5) { + // TODO support LinkedNode and JoinHandlingNode + // if (d5 instanceof LinkedNode) { + // ((LinkedNode) d5).setCallingContext(d4); + // } + // if(d5 instanceof JoinHandlingNode) { + // ((JoinHandlingNode) + // d5).setCallingContext(d4); + // } + return d5; + } + + /// Computes the normal flow function for the given set of start and end + /// abstractions- + /// @param flowFunction The normal flow function to compute + /// @param d1 The abstraction at the method's start node + /// @param d2 The abstraction at the current node + /// @return The set of abstractions at the successor node + /// + container_type computeNormalFlowFunction(const FlowFunctionPtrType &FlowFunc, + d_t /*d1*/, d_t d2) { + return FlowFunc->computeTargets(d2); + } + + container_type + computeSummaryFlowFunction(const FlowFunctionPtrType &SummaryFlowFunction, + d_t /*d1*/, d_t d2) { + return SummaryFlowFunction->computeTargets(d2); + } + + /// Computes the call flow function for the given call-site abstraction + /// @param callFlowFunction The call flow function to compute + /// @param d1 The abstraction at the current method's start node. + /// @param d2 The abstraction at the call site + /// @return The set of caller-side abstractions at the callee's start node + /// + container_type + computeCallFlowFunction(const FlowFunctionPtrType &CallFlowFunction, + d_t /*d1*/, d_t d2) { + return CallFlowFunction->computeTargets(d2); + } + + /// Computes the call-to-return flow function for the given call-site + /// abstraction + /// @param callToReturnFlowFunction The call-to-return flow function to + /// compute + /// @param d1 The abstraction at the current method's start node. + /// @param d2 The abstraction at the call site + /// @return The set of caller-side abstractions at the return site + /// + container_type computeCallToReturnFlowFunction( + const FlowFunctionPtrType &CallToReturnFlowFunction, d_t /*d1*/, d_t d2) { + return CallToReturnFlowFunction->computeTargets(d2); + } + + /// Computes the return flow function for the given set of caller-side + /// abstractions. + /// @param retFunction The return flow function to compute + /// @param d1 The abstraction at the beginning of the callee + /// @param d2 The abstraction at the exit node in the callee + /// @param callSite The call site + /// @param callerSideDs The abstractions at the call site + /// @return The set of caller-side abstractions at the return site + /// + container_type + computeReturnFlowFunction(const FlowFunctionPtrType &RetFlowFunction, + d_t /*d1*/, d_t d2, n_t /*CallSite*/, + const Container & /*CallerSideDs*/) { + return RetFlowFunction->computeTargets(d2); + } + + /// Propagates the flow further down the exploded super graph, merging any + /// edge function that might already have been computed for TargetVal at + /// Target. + /// + /// @param SourceVal the source value of the propagated summary edge + /// @param Target the target statement + /// @param TargetVal the target value at the target statement + /// @param f the new edge function computed from (s0,SourceVal) to + /// (Target,TargetVal) + /// @param relatedCallSite for call and return flows the related call + /// statement, nullptr otherwise (this value is not used within this + /// implementation but may be useful for subclasses of IDESolver) + /// @param isUnbalancedReturn true if this edge is propagating an + /// unbalanced return (this value is not used within this implementation + /// but may be useful for subclasses of {@link IDESolver}) + /// + void propagate(PathEdge Edge, EdgeFunction EF) { + const auto &[SourceVal, Target, TargetVal] = Edge.get(); + + PHASAR_LOG_LEVEL(DEBUG, "Propagate flow"); + PHASAR_LOG_LEVEL(DEBUG, "Source value : " << DToString(SourceVal)); + PHASAR_LOG_LEVEL(DEBUG, "Target : " << NToString(Target)); + PHASAR_LOG_LEVEL(DEBUG, "Target value : " << DToString(TargetVal)); + + PathEdgeCount++; + pathEdgeProcessingTask(std::move(Edge), std::move(EF)); + } + + l_t joinValueAt(n_t /*Unit*/, d_t /*Fact*/, l_t Curr, l_t NewVal) { + return IDEProblem.join(std::move(Curr), std::move(NewVal)); + } + + std::set>::Cell> + endSummary(n_t SP, d_t d3) { + if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) { + auto Key = std::make_pair(SP, d3); + auto FindND = FSummaryReuse.find(Key); + if (FindND == FSummaryReuse.end()) { + FSummaryReuse.emplace(Key, 0); + } else { + FSummaryReuse[Key] += 1; + } + } + return EndsummaryTab.get(SP, d3).cellSet(); + } + + std::map incoming(d_t d1, n_t SP) { + return IncomingTab.get(SP, d1); + } + + void addIncoming(n_t SP, d_t d3, n_t n, d_t d2) { + IncomingTab.get(SP, d3)[n].insert(d2); + } + + void printIncomingTab() const { + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL(DEBUG, "Start of incomingtab entry"); + for (const auto &Cell + : IncomingTab.cellSet()) { + PHASAR_LOG_LEVEL(DEBUG, "sP: " << NToString(Cell.getRowKey())); + PHASAR_LOG_LEVEL(DEBUG, "d3: " << DToString(Cell.getColumnKey())); + for (const auto &Entry : Cell.getValue()) { + PHASAR_LOG_LEVEL(DEBUG, " n: " << NToString(Entry.first)); + for (const auto &Fact : Entry.second) { + PHASAR_LOG_LEVEL(DEBUG, " d2: " << DToString(Fact)); + } + } + PHASAR_LOG_LEVEL(DEBUG, "---------------"); + } PHASAR_LOG_LEVEL(DEBUG, "End of incomingtab entry");) + } + + void printEndSummaryTab() const { + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL(DEBUG, "Start of endsummarytab entry"); + + EndsummaryTab.foreachCell( + [](const auto &Row, const auto &Col, const auto &Val) { + PHASAR_LOG_LEVEL(DEBUG, "sP: " << NToString(Row)); + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(Col)); + + Val.foreachCell([](const auto &InnerRow, const auto &InnerCol, + const auto &InnerVal) { + PHASAR_LOG_LEVEL(DEBUG, " eP: " << NToString(InnerRow)); + PHASAR_LOG_LEVEL(DEBUG, " d2: " << DToString(InnerCol)); + PHASAR_LOG_LEVEL(DEBUG, " EF: " << InnerVal); + }); + PHASAR_LOG_LEVEL(DEBUG, "---------------"); + }); + + PHASAR_LOG_LEVEL(DEBUG, "End of endsummarytab entry");) + } + + void printComputedPathEdges() { + llvm::outs() + << "\n**********************************************************"; + llvm::outs() + << "\n* Computed intra-procedural path egdes *"; + llvm::outs() + << "\n**********************************************************\n"; + + // Sort intra-procedural path edges + auto Cells = ComputedIntraPathEdges.cellVec(); + StmtLess Stmtless(ICF); + sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { + return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); + }); + for (const auto &Cell : Cells) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + std::string N2Label = NToString(Edge.second); + llvm::outs() << "\nN1: " << NToString(Edge.first) << '\n' + << "N2: " << N2Label << "\n----" + << std::string(N2Label.size(), '-') << '\n'; + for (auto D1ToD2Set : Cell.getValue()) { + auto D1Fact = D1ToD2Set.first; + llvm::outs() << "D1: " << DToString(D1Fact) << '\n'; + for (auto D2Fact : D1ToD2Set.second) { + llvm::outs() << "\tD2: " << DToString(D2Fact) << '\n'; + } + llvm::outs() << '\n'; + } + } + + llvm::outs() + << "\n**********************************************************"; + llvm::outs() + << "\n* Computed inter-procedural path edges *"; + llvm::outs() + << "\n**********************************************************\n"; + + // Sort intra-procedural path edges + Cells = ComputedInterPathEdges.cellVec(); + sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { + return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); + }); + for (const auto &Cell : Cells) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + std::string N2Label = NToString(Edge.second); + llvm::outs() << "\nN1: " << NToString(Edge.first) << '\n' + << "N2: " << N2Label << "\n----" + << std::string(N2Label.size(), '-') << '\n'; + for (auto D1ToD2Set : Cell.getValue()) { + auto D1Fact = D1ToD2Set.first; + llvm::outs() << "D1: " << DToString(D1Fact) << '\n'; + for (auto D2Fact : D1ToD2Set.second) { + llvm::outs() << "\tD2: " << DToString(D2Fact) << '\n'; + } + llvm::outs() << '\n'; + } + } + } + + /// The invariant for computing the number of generated (#gen) and killed + /// (#kill) facts: + /// (1) #Valid facts at the last statement <= #gen - #kill + /// (2) #gen >= #kill + /// + /// The total number of valid facts can be smaller than the difference of + /// generated and killed facts, due to set semantics, i.e., a fact can be + /// generated multiple times but appears only once. + /// + /// Zero value is not counted! + /// + /// @brief Computes and prints statistics of the analysis run, e.g. number of + /// generated/killed facts, number of summary-reuses etc. + /// + void computeAndPrintStatistics() { + PAMM_GET_INSTANCE; + // Stores all valid facts at return site in caller context; return-site is + // key + std::unordered_map> ValidInCallerContext; + size_t NumGenFacts = 0; + size_t NumIntraPathEdges = 0; + size_t NumInterPathEdges = 0; + // --- Intra-procedural Path Edges --- + // d1 --> d2-Set + // Case 1: d1 in d2-Set + // Case 2: d1 not in d2-Set, i.e., d1 was killed. d2-Set could be empty. + for (const auto &Cell : ComputedIntraPathEdges.cellSet()) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Edge.first)); + PHASAR_LOG_LEVEL(DEBUG, "N2: " << NToString(Edge.second)); + for (auto &[D1, D2s] : Cell.getValue()) { + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); + NumIntraPathEdges += D2s.size(); + // Case 1 + if (D2s.find(D1) != D2s.end()) { + NumGenFacts += D2s.size() - 1; + } + // Case 2 + else { + NumGenFacts += D2s.size(); + } + // Store all valid facts after call-to-return flow + if (ICF->isCallSite(Edge.first)) { + ValidInCallerContext[Edge.second].insert(D2s.begin(), D2s.end()); + } + IF_LOG_ENABLED([this](const auto &D2s) { + for (auto D2 : D2s) { + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); + } + PHASAR_LOG_LEVEL(DEBUG, "----"); + }(D2s)); + } + PHASAR_LOG_LEVEL(DEBUG, " "); + } + // Stores all pairs of (Startpoint, Fact) for which a summary was applied + std::set> ProcessSummaryFacts; + PHASAR_LOG_LEVEL(DEBUG, "=============================================="); + PHASAR_LOG_LEVEL(DEBUG, "INTER PATH EDGES"); + for (const auto &Cell : ComputedInterPathEdges.cellSet()) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Edge.first)); + PHASAR_LOG_LEVEL(DEBUG, "N2: " << NToString(Edge.second)); + // --- Call-flow Path Edges --- + // Case 1: d1 --> empty set + // Can be ignored, since killing a fact in the caller context will + // actually happen during call-to-return. + // + // Case 2: d1 --> d2-Set + // Every fact d_i != ZeroValue in d2-set will be generated in the + // callee context, thus counts as a new fact. Even if d1 is passed as it + // is, it will count as a new fact. The reason for this is, that d1 can + // be killed in the callee context, but still be valid in the caller + // context. + // + // Special Case: Summary was applied for a particular call + // Process the summary's #gen and #kill. + if (ICF->isCallSite(Edge.first)) { + for (auto &[D1, D2s] : Cell.getValue()) { + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); + NumInterPathEdges += D2s.size(); + for (auto D2 : D2s) { + if (!IDEProblem.isZeroValue(D2)) { + NumGenFacts++; + } + // Special case + if (ProcessSummaryFacts.find(std::make_pair(Edge.second, D2)) != + ProcessSummaryFacts.end()) { + + std::set SummaryDSet; + EndsummaryTab.get(Edge.second, D2) + .foreachCell([&SummaryDSet](const auto &Row, const auto &Col, + const auto &Val) { + SummaryDSet.insert(Col); + }); + + // Process summary just as an intra-procedural edge + if (SummaryDSet.find(D2) != SummaryDSet.end()) { + NumGenFacts += SummaryDSet.size() - 1; + } else { + NumGenFacts += SummaryDSet.size(); + } + } else { + ProcessSummaryFacts.emplace(Edge.second, D2); + } + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); + } + PHASAR_LOG_LEVEL(DEBUG, "----"); + } + } + // --- Return-flow Path Edges --- + // Since every fact passed to the callee was counted as a new fact, we + // have to count every fact propagated to the caller as a kill to + // satisfy our invariant. Obviously, every fact not propagated to the + // caller will count as a kill. If an actual new fact is propagated to + // the caller, we have to increase the number of generated facts by one. + // Zero value does not count towards generated/killed facts. + if (ICF->isExitInst(Cell.getRowKey())) { + for (auto &[D1, D2s] : Cell.getValue()) { + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); + NumInterPathEdges += D2s.size(); + auto CallerFacts = ValidInCallerContext[Edge.second]; + for (auto D2 : D2s) { + // d2 not valid in caller context + if (CallerFacts.find(D2) == CallerFacts.end()) { + NumGenFacts++; + } + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); + } + PHASAR_LOG_LEVEL(DEBUG, "----"); + } + } + PHASAR_LOG_LEVEL(DEBUG, " "); + } + PHASAR_LOG_LEVEL(DEBUG, "SUMMARY REUSE"); + std::size_t TotalSummaryReuse = 0; + for (const auto &Entry : FSummaryReuse) { + PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Entry.first.first)); + PHASAR_LOG_LEVEL(DEBUG, "D1: " << DToString(Entry.first.second)); + PHASAR_LOG_LEVEL(DEBUG, "#Reuse: " << Entry.second); + TotalSummaryReuse += Entry.second; + } + INC_COUNTER("Gen facts", NumGenFacts, Core); + INC_COUNTER("Summary-reuse", TotalSummaryReuse, Core); + INC_COUNTER("Intra Path Edges", NumIntraPathEdges, Core); + INC_COUNTER("Inter Path Edges", NumInterPathEdges, Core); + + PHASAR_LOG_LEVEL(INFO, "----------------------------------------------"); + PHASAR_LOG_LEVEL(INFO, "=== Solver Statistics ==="); + PHASAR_LOG_LEVEL(INFO, "#Facts generated : " << GET_COUNTER("Gen facts")); + PHASAR_LOG_LEVEL(INFO, "#Facts killed : " << GET_COUNTER("Kill facts")); + PHASAR_LOG_LEVEL(INFO, + "#Summary-reuse : " << GET_COUNTER("Summary-reuse")); + PHASAR_LOG_LEVEL(INFO, + "#Intra Path Edges: " << GET_COUNTER("Intra Path Edges")); + PHASAR_LOG_LEVEL(INFO, + "#Inter Path Edges: " << GET_COUNTER("Inter Path Edges")); + if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Full) { + PHASAR_LOG_LEVEL( + INFO, "Flow function query count: " << GET_COUNTER("FF Queries")); + PHASAR_LOG_LEVEL( + INFO, "Edge function query count: " << GET_COUNTER("EF Queries")); + PHASAR_LOG_LEVEL(INFO, "Data-flow value propagation count: " + << GET_COUNTER("Value Propagation")); + PHASAR_LOG_LEVEL(INFO, "Data-flow value computation count: " + << GET_COUNTER("Value Computation")); + PHASAR_LOG_LEVEL(INFO, + "Special flow function usage count: " + << GET_COUNTER("SpecialSummary-FF Application")); + PHASAR_LOG_LEVEL(INFO, "Jump function construciton count: " + << GET_COUNTER("JumpFn Construction")); + PHASAR_LOG_LEVEL(INFO, + "Phase I duration: " << PRINT_TIMER("DFA Phase I")); + PHASAR_LOG_LEVEL(INFO, + "Phase II duration: " << PRINT_TIMER("DFA Phase II")); + PHASAR_LOG_LEVEL(INFO, "----------------------------------------------"); + CachedFlowEdgeFunctions.print(); + } + } + +public: + void enableESGAsDot() { SolverConfig.setEmitESG(); } + + void + emitESGAsDot(llvm::raw_ostream &OS = llvm::outs(), + llvm::StringRef DotConfigDir = PhasarConfig::PhasarDirectory()) { + PHASAR_LOG_LEVEL(DEBUG, "Emit Exploded super-graph (ESG) as DOT graph"); + PHASAR_LOG_LEVEL(DEBUG, "Process intra-procedural path egdes"); + PHASAR_LOG_LEVEL(DEBUG, "============================================="); + DOTGraph G; + DOTConfig::importDOTConfig(DotConfigDir); + DOTFunctionSubGraph *FG = nullptr; + + // Sort intra-procedural path edges + auto Cells = ComputedIntraPathEdges.cellVec(); + StmtLess Stmtless(ICF); + sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { + return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); + }); + for (const auto &Cell : Cells) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + std::string N1Label = NToString(Edge.first); + std::string N2Label = NToString(Edge.second); + PHASAR_LOG_LEVEL(DEBUG, "N1: " << N1Label); + PHASAR_LOG_LEVEL(DEBUG, "N2: " << N2Label); + std::string N1StmtId = ICF->getStatementId(Edge.first); + std::string N2StmtId = ICF->getStatementId(Edge.second); + std::string FuncName = ICF->getFunctionOf(Edge.first)->getName().str(); + // Get or create function subgraph + if (!FG || FG->Id != FuncName) { + FG = &G.Functions[FuncName]; + FG->Id = FuncName; + } + + // Create control flow nodes + DOTNode N1(FuncName, N1Label, N1StmtId); + DOTNode N2(FuncName, N2Label, N2StmtId); + // Add control flow node(s) to function subgraph + FG->Stmts.insert(N1); + if (ICF->isExitInst(Edge.second)) { + FG->Stmts.insert(N2); + } + + // Set control flow edge + FG->IntraCFEdges.emplace(N1, N2); + + DOTFactSubGraph *D1FSG = nullptr; + unsigned D1FactId = 0; + unsigned D2FactId = 0; + for (const auto &D1ToD2Set : Cell.getValue()) { + auto D1Fact = D1ToD2Set.first; + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); + + DOTNode D1; + if (IDEProblem.isZeroValue(D1Fact)) { + D1 = {FuncName, "Λ", N1StmtId, 0, false, true}; + D1FactId = 0; + } else { + // Get the fact-ID + D1FactId = G.getFactID(D1Fact); + std::string D1Label = DToString(D1Fact); + + // Get or create the fact subgraph + D1FSG = FG->getOrCreateFactSG(D1FactId, D1Label); + + // Insert D1 to fact subgraph + D1 = {FuncName, D1Label, N1StmtId, D1FactId, false, true}; + D1FSG->Nodes.insert(std::make_pair(N1StmtId, D1)); + } + + DOTFactSubGraph *D2FSG = nullptr; + for (const auto &D2Fact : D1ToD2Set.second) { + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); + // We do not need to generate any intra-procedural nodes and edges + // for the zero value since they will be auto-generated + if (!IDEProblem.isZeroValue(D2Fact)) { + // Get the fact-ID + D2FactId = G.getFactID(D2Fact); + std::string D2Label = DToString(D2Fact); + DOTNode D2 = {FuncName, D2Label, N2StmtId, D2FactId, false, true}; + std::string EFLabel; + auto EFVec = IntermediateEdgeFunctions[std::make_tuple( + Edge.first, D1Fact, Edge.second, D2Fact)]; + for (const auto &EF : EFVec) { + EFLabel += to_string(EF) + ", "; + } + PHASAR_LOG_LEVEL(DEBUG, "EF LABEL: " << EFLabel); + if (D1FactId == D2FactId && !IDEProblem.isZeroValue(D1Fact)) { + assert(D1FSG && "D1_FSG was nullptr but should be valid."); + D1FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); + D1FSG->Edges.emplace(D1, D2, true, EFLabel); + } else { + // Get or create the fact subgraph + D2FSG = FG->getOrCreateFactSG(D2FactId, D2Label); + + D2FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); + FG->CrossFactEdges.emplace(D1, D2, true, EFLabel); + } + } + } + PHASAR_LOG_LEVEL(DEBUG, "----------"); + } + PHASAR_LOG_LEVEL(DEBUG, " "); + } + + PHASAR_LOG_LEVEL(DEBUG, "============================================="); + PHASAR_LOG_LEVEL(DEBUG, "Process inter-procedural path edges"); + PHASAR_LOG_LEVEL(DEBUG, "============================================="); + Cells = ComputedInterPathEdges.cellVec(); + sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { + return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); + }); + for (const auto &Cell : Cells) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + std::string N1Label = NToString(Edge.first); + std::string N2Label = NToString(Edge.second); + std::string FNameOfN1 = ICF->getFunctionOf(Edge.first)->getName().str(); + std::string FNameOfN2 = ICF->getFunctionOf(Edge.second)->getName().str(); + std::string N1StmtId = ICF->getStatementId(Edge.first); + std::string N2StmtId = ICF->getStatementId(Edge.second); + PHASAR_LOG_LEVEL(DEBUG, "N1: " << N1Label); + PHASAR_LOG_LEVEL(DEBUG, "N2: " << N2Label); + + // Add inter-procedural control flow edge + DOTNode N1(FNameOfN1, N1Label, N1StmtId); + DOTNode N2(FNameOfN2, N2Label, N2StmtId); + + // Handle recursion control flow as intra-procedural control flow + // since those eges never leave the function subgraph + FG = nullptr; + if (FNameOfN1 == FNameOfN2) { + // This function subgraph is guaranteed to exist + FG = &G.Functions[FNameOfN1]; + FG->IntraCFEdges.emplace(N1, N2); + } else { + // Check the case where the callee is a single statement function, + // thus does not contain intra-procedural path edges. We have to + // generate the function sub graph here! + if (!G.Functions.count(FNameOfN1)) { + FG = &G.Functions[FNameOfN1]; + FG->Id = FNameOfN1; + FG->Stmts.insert(N1); + } else if (!G.Functions.count(FNameOfN2)) { + FG = &G.Functions[FNameOfN2]; + FG->Id = FNameOfN2; + FG->Stmts.insert(N2); + } + G.InterCFEdges.emplace(N1, N2); + } + + // Create D1 and D2, if D1 == D2 == lambda then add Edge(D1, D2) to + // interLambdaEges otherwise add Edge(D1, D2) to interFactEdges + unsigned D1FactId = 0; + unsigned D2FactId = 0; + for (const auto &D1ToD2Set : Cell.getValue()) { + auto D1Fact = D1ToD2Set.first; + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); + DOTNode D1; + if (IDEProblem.isZeroValue(D1Fact)) { + D1 = {FNameOfN1, "Λ", N1StmtId, 0, false, true}; + } else { + // Get the fact-ID + D1FactId = G.getFactID(D1Fact); + std::string D1Label = DToString(D1Fact); + D1 = {FNameOfN1, D1Label, N1StmtId, D1FactId, false, true}; + // FG should already exist even for single statement functions + if (!G.containsFactSG(FNameOfN1, D1FactId)) { + FG = &G.Functions[FNameOfN1]; + auto *D1FSG = FG->getOrCreateFactSG(D1FactId, D1Label); + D1FSG->Nodes.insert(std::make_pair(N1StmtId, D1)); + } + } + + auto D2Set = D1ToD2Set.second; + for (const auto &D2Fact : D2Set) { + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); + DOTNode D2; + if (IDEProblem.isZeroValue(D2Fact)) { + D2 = {FNameOfN2, "Λ", N2StmtId, 0, false, true}; + } else { + // Get the fact-ID + D2FactId = G.getFactID(D2Fact); + std::string D2Label = DToString(D2Fact); + D2 = {FNameOfN2, D2Label, N2StmtId, D2FactId, false, true}; + // FG should already exist even for single statement functions + if (!G.containsFactSG(FNameOfN2, D2FactId)) { + FG = &G.Functions[FNameOfN2]; + auto *D2FSG = FG->getOrCreateFactSG(D2FactId, D2Label); + D2FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); + } + } + + if (IDEProblem.isZeroValue(D1Fact) && + IDEProblem.isZeroValue(D2Fact)) { + // Do not add lambda recursion edges as inter-procedural edges + if (D1.FuncName != D2.FuncName) { + G.InterLambdaEdges.emplace(D1, D2, true, "AllBottom", "BOT"); + } + } else { + // std::string EFLabel = EF ? EF->str() : " "; + std::string EFLabel; + auto EFVec = IntermediateEdgeFunctions[std::make_tuple( + Edge.first, D1Fact, Edge.second, D2Fact)]; + for (const auto &EF : EFVec) { + PHASAR_LOG_LEVEL(DEBUG, "Partial EF Label: " << EF); + EFLabel.append(to_string(EF) + ", "); + } + PHASAR_LOG_LEVEL(DEBUG, "EF LABEL: " << EFLabel); + G.InterFactEdges.emplace(D1, D2, true, EFLabel); + } + } + PHASAR_LOG_LEVEL(DEBUG, "----------"); + } + PHASAR_LOG_LEVEL(DEBUG, " "); + } + OS << G; + } + +private: + /// @brief: Allows less-than comparison based on the statement ID. + struct StmtLess { + const i_t *ICF; + StringIDLess StrIDLess; + StmtLess(const i_t *ICF) : ICF(ICF), StrIDLess(StringIDLess()) {} + bool operator()(n_t Lhs, n_t Rhs) { + return StrIDLess(ICF->getStatementId(Lhs), ICF->getStatementId(Rhs)); + } + }; + + /// -- InteractiveIDESolverMixin implementation + + bool doInitialize() { + PAMM_GET_INSTANCE; + REG_COUNTER("Gen facts", 0, Core); + REG_COUNTER("Kill facts", 0, Core); + REG_COUNTER("Summary-reuse", 0, Core); + REG_COUNTER("Intra Path Edges", 0, Core); + REG_COUNTER("Inter Path Edges", 0, Core); + REG_COUNTER("FF Queries", 0, Full); + REG_COUNTER("EF Queries", 0, Full); + REG_COUNTER("Value Propagation", 0, Full); + REG_COUNTER("Value Computation", 0, Full); + REG_COUNTER("SpecialSummary-FF Application", 0, Full); + REG_COUNTER("SpecialSummary-EF Queries", 0, Full); + REG_COUNTER("JumpFn Construction", 0, Full); + REG_COUNTER("Process Call", 0, Full); + REG_COUNTER("Process Normal", 0, Full); + REG_COUNTER("Process Exit", 0, Full); + REG_COUNTER("[Calls] getAliasSet", 0, Full); + REG_HISTOGRAM("Data-flow facts", Full); + REG_HISTOGRAM("Points-to", Full); + + PHASAR_LOG_LEVEL(INFO, "IDE solver is solving the specified problem"); + PHASAR_LOG_LEVEL(INFO, + "Submit initial seeds, construct exploded super graph"); + // computations starting here + START_TIMER("DFA Phase I", Full); + + // We start our analysis and construct exploded supergraph + submitInitialSeeds(); + return !WorkList.empty(); + } + + bool doNext() { + assert(!WorkList.empty()); + auto [Edge, EF] = std::move(WorkList.back()); + WorkList.pop_back(); + + propagate(std::move(Edge), std::move(EF)); + + return !WorkList.empty(); + } + + void finalizeInternal() { + PAMM_GET_INSTANCE; + STOP_TIMER("DFA Phase I", Full); + if (SolverConfig.computeValues()) { + START_TIMER("DFA Phase II", Full); + // Computing the final values for the edge functions + PHASAR_LOG_LEVEL( + INFO, "Compute the final values according to the edge functions"); + computeValues(); + STOP_TIMER("DFA Phase II", Full); + } + PHASAR_LOG_LEVEL(INFO, "Problem solved"); + if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) { + computeAndPrintStatistics(); + } + if (SolverConfig.emitESG()) { + emitESGAsDot(); + } + } + + SolverResults doFinalize() & { + finalizeInternal(); + return getSolverResults(); + } + + OwningSolverResults doFinalize() && { + finalizeInternal(); + return consumeSolverResults(); + } + + /// -- Data members + + IDETabulationProblem &IDEProblem; + d_t ZeroValue; + const i_t *ICF; + IFDSIDESolverConfig &SolverConfig; + + std::vector, EdgeFunction>> WorkList; + std::vector> ValuePropWL; + + size_t PathEdgeCount = 0; + + FlowEdgeFunctionCache CachedFlowEdgeFunctions; + + Table> ComputedIntraPathEdges; + + Table> ComputedInterPathEdges; + + EdgeFunction AllTop; + + std::shared_ptr> JumpFn; + + std::map, std::vector>> + IntermediateEdgeFunctions; + + // stores summaries that were queried before they were computed + // see CC 2010 paper by Naeem, Lhotak and Rodriguez + Table>> EndsummaryTab; + + // edges going along calls + // see CC 2010 paper by Naeem, Lhotak and Rodriguez + Table> IncomingTab; + + // stores the return sites (inside callers) to which we have unbalanced + // returns if SolverConfig.followReturnPastSeeds is enabled + std::set UnbalancedRetSites; + + InitialSeeds Seeds; + + Table ValTab; + + std::map, size_t> FSummaryReuse; +}; + +template +IDESolver(Problem &, ICF *, PropagateOntoStrategy) + -> IDESolver; + +template +OwningSolverResults +solveIDEProblem(IDETabulationProblem &Problem, + const typename AnalysisDomainTy::i_t &ICF, + PropagateOntoStrategy Strategy) { + IDESolver Solver(Problem, &ICF, Strategy); + Solver.solve(); + return Solver.consumeSolverResults(); +} + +} // namespace psr + +#endif diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index 747d7d7d7..b9a272288 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -30,6 +30,7 @@ #include "phasar/DataFlow/IfdsIde/Solver/IDESolverAPIMixin.h" #include "phasar/DataFlow/IfdsIde/Solver/JumpFunctions.h" #include "phasar/DataFlow/IfdsIde/Solver/PathEdge.h" +#include "phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h" #include "phasar/DataFlow/IfdsIde/SolverResults.h" #include "phasar/Domain/AnalysisDomain.h" #include "phasar/Utils/DOTGraph.h" @@ -58,9 +59,8 @@ namespace psr { /// Solves the given IDETabulationProblem as described in the 1996 paper by /// Sagiv, Horwitz and Reps. To solve the problem, call solve(). Results /// can then be queried by using resultAt() and resultsAt(). -template > -class IDESolver +template +class IDESolver : public IDESolverAPIMixin> { friend IDESolverAPIMixin>; @@ -78,7 +78,7 @@ class IDESolver using v_t = typename AnalysisDomainTy::v_t; IDESolver(IDETabulationProblem &Problem, - const i_t *ICF) + const i_t *ICF, PropagateAfterStrategy /*Strategy*/ = {}) : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), SolverConfig(Problem.getIFDSIDESolverConfig()), CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), @@ -1741,30 +1741,14 @@ class IDESolver std::map, size_t> FSummaryReuse; }; -template -llvm::raw_ostream & -operator<<(llvm::raw_ostream &OS, - const IDESolver &Solver) { - Solver.dumpResults(OS); - return OS; -} - -template -IDESolver(Problem &, ICF *) - -> IDESolver; - -template -using IDESolver_P = IDESolver; - template OwningSolverResults solveIDEProblem(IDETabulationProblem &Problem, - const typename AnalysisDomainTy::i_t &ICF) { - IDESolver Solver(Problem, &ICF); + const typename AnalysisDomainTy::i_t &ICF, + PropagateAfterStrategy Strategy = {}) { + IDESolver Solver(Problem, &ICF, Strategy); Solver.solve(); return Solver.consumeSolverResults(); } diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index 3a5f20d8f..6a8201f94 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -19,6 +19,7 @@ #include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" #include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" +#include "phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h" #include "phasar/Domain/BinaryDomain.h" #include @@ -28,8 +29,7 @@ namespace psr { -template > +template class IFDSSolver : public IDESolver, Container> { public: @@ -42,8 +42,9 @@ class IFDSSolver typename = std::enable_if_t< std::is_base_of_v>> IFDSSolver(IFDSTabulationProblem &IFDSProblem, - const i_t *ICF) - : IDESolver>(IFDSProblem, ICF) {} + const i_t *ICF, Strategy S = {}) + : IDESolver, Container, Strategy>( + IFDSProblem, ICF) {} ~IFDSSolver() override = default; @@ -100,19 +101,25 @@ class IFDSSolver template IFDSSolver(Problem &, ICF *) -> IFDSSolver; + typename Problem::container_type, PropagateAfterStrategy>; -template +template +IFDSSolver(Problem &, ICF *, Strategy) + -> IFDSSolver; + +template using IFDSSolver_P = IFDSSolver; + typename Problem::container_type, Strategy>; -template +template OwningSolverResults solveIFDSProblem(IFDSTabulationProblem &Problem, - const typename AnalysisDomainTy::i_t &ICF) { - IFDSSolver Solver(Problem, &ICF); + const typename AnalysisDomainTy::i_t &ICF, Strategy S = {}) { + IFDSSolver Solver(Problem, &ICF, S); Solver.solve(); return Solver.consumeSolverResults(); } diff --git a/include/phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h b/include/phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h new file mode 100644 index 000000000..869667e01 --- /dev/null +++ b/include/phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h @@ -0,0 +1,53 @@ +/****************************************************************************** + * Copyright (c) 2023 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_DATAFLOW_IFDSIDE_SOLVER_SOLVERSTRATEGY_H +#define PHASAR_DATAFLOW_IFDSIDE_SOLVER_SOLVERSTRATEGY_H + +namespace psr { + +enum class SolverStrategyKind { + /// Propagate the data-flow effects of an instruction to the start of the + /// successor instructions. This is the default strategy + PropagateAfter, + // Propagate the data-flow effects of an instruction onto the same + // instruction. + PropagateOnto, +}; + +struct SolverStrategy {}; + +struct PropagateAfterStrategy : SolverStrategy { + static constexpr auto Kind = SolverStrategyKind::PropagateAfter; +}; + +struct PropagateOntoStrategy : SolverStrategy { + static constexpr auto Kind = SolverStrategyKind::PropagateOnto; +}; + +template +class IDESolver; + +template +IDESolver(Problem &, ICF *) + -> IDESolver; +template +IDESolver(Problem &, ICF *, PropagateAfterStrategy) + -> IDESolver; + +template +using IDESolver_P = IDESolver; + +} // namespace psr + +#endif // PHASAR_DATAFLOW_IFDSIDE_SOLVER_SOLVERSTRATEGY_H diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h index 081b94a46..023b3698e 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h @@ -73,14 +73,14 @@ class OpenSSLEVPKDFCTXDescription // std::map, int> // requiredKDFState; - IDESolver> + IDESolver_P> &KDFAnalysisResults; static OpenSSLEVTKDFToken funcNameToToken(llvm::StringRef F); public: using TypeStateDescription::getNextState; OpenSSLEVPKDFCTXDescription( - IDESolver> + IDESolver_P> &KDFAnalysisResults) : KDFAnalysisResults(KDFAnalysisResults) {} diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.h index cb69f4fe7..cd1eabe0b 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.h @@ -43,15 +43,14 @@ class OpenSSLSecureHeapDescription // Delta matrix to implement the state machine's Delta function static const OpenSSLSecureHeapState Delta[5][6]; - IDESolver - &SecureHeapPropagationResults; + IDESolver_P &SecureHeapPropagationResults; static OpenSSLSecureHeapToken funcNameToToken(llvm::StringRef F); public: using TypeStateDescription::getNextState; - OpenSSLSecureHeapDescription(IDESolver - &SecureHeapPropagationResults); + OpenSSLSecureHeapDescription( + IDESolver_P &SecureHeapPropagationResults); [[nodiscard]] bool isFactoryFunction(llvm::StringRef F) const override; [[nodiscard]] bool isConsumingFunction(llvm::StringRef F) const override; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.cpp index 3c0a90a67..5f2502ece 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.cpp @@ -52,8 +52,7 @@ const OpenSSLSecureHeapState OpenSSLSecureHeapDescription::Delta[5][6] = { OpenSSLSecureHeapState::FREED, OpenSSLSecureHeapState::ERROR}, }; OpenSSLSecureHeapDescription::OpenSSLSecureHeapDescription( - IDESolver - &SecureHeapPropagationResults) + IDESolver_P &SecureHeapPropagationResults) : SecureHeapPropagationResults(SecureHeapPropagationResults) {} bool OpenSSLSecureHeapDescription::isFactoryFunction(llvm::StringRef F) const { diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEGeneralizedLCATest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEGeneralizedLCATest.cpp index 17a74b610..3170a7bae 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEGeneralizedLCATest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEGeneralizedLCATest.cpp @@ -43,7 +43,7 @@ class IDEGeneralizedLCATest : public ::testing::Test { std::optional HA; std::optional LCAProblem; - std::unique_ptr> LCASolver; + std::unique_ptr> LCASolver; static constexpr size_t MaxSetSize = 2; @@ -54,7 +54,7 @@ class IDEGeneralizedLCATest : public ::testing::Test { HA.emplace(PathToLLFiles + LLFile, std::vector{"main"s}); LCAProblem = createAnalysisProblem( *HA, std::vector{"main"s}, MaxSetSize); - LCASolver = std::make_unique>( + LCASolver = std::make_unique>( *LCAProblem, &HA->getICFG()); LCASolver->solve(); From 7957bd32b66620851b4ffbfabe52fd23271d8fa7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 19 Sep 2023 14:39:38 +0200 Subject: [PATCH 03/11] Add unittest (fact propagation works, need to fix value propagation -- phase II) --- .../DataFlow/IfdsIde/Solver/EagerIDESolver.h | 52 +++--- .../DataFlow/IfdsIde/Solver/IDESolver.h | 6 +- .../DataFlow/IfdsIde/Solver/IFDSSolver.h | 2 +- .../DataFlow/IfdsIde/CMakeLists.txt | 1 + .../DataFlow/IfdsIde/EagerIDESolverTest.cpp | 155 ++++++++++++++++++ .../IfdsIde/InteractiveIDESolverTest.cpp | 2 + 6 files changed, 189 insertions(+), 29 deletions(-) create mode 100644 unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp diff --git a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h index 694f6622a..269974859 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h @@ -64,8 +64,10 @@ namespace psr { /// can then be queried by using resultAt() and resultsAt(). template class IDESolver - : public IDESolverAPIMixin> { - friend IDESolverAPIMixin>; + : public IDESolverAPIMixin< + IDESolver> { + friend IDESolverAPIMixin< + IDESolver>; public: using ProblemTy = IDETabulationProblem; @@ -261,8 +263,9 @@ class IDESolver protected: void addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, EdgeFunction EF) { - WorkList.emplace_back(PathEdge{std::move( - SourceVal, std::move(Target), std::move(TargetVal), std::move(EF))}); + WorkList.emplace_back( + PathEdge{std::move(SourceVal), std::move(Target), std::move(TargetVal)}, + std::move(EF)); } bool updateJumpFunction(d_t SourceVal, n_t Target, d_t TargetVal, @@ -286,18 +289,18 @@ class IDESolver EdgeFunction fPrime = JumpFnE.joinWith(*f); bool NewFunction = fPrime != JumpFnE; if (NewFunction) { - *f = fPrime; - JumpFn->addFunction(std::move(SourceVal), std::move(Target), - std::move(TargetVal), std::move(fPrime)); - IF_LOG_ENABLED( PHASAR_LOG_LEVEL( - DEBUG, "Join: " << JumpFnE << " & " << f - << (JumpFnE == f ? " (EF's are equal)" : " ")); + DEBUG, "Join: " << JumpFnE << " & " << *f + << (JumpFnE == *f ? " (EF's are equal)" : " ")); PHASAR_LOG_LEVEL( DEBUG, " = " << f << (NewFunction ? " (new jump func)" : " ")); PHASAR_LOG_LEVEL(DEBUG, ' ')); + *f = fPrime; + JumpFn->addFunction(std::move(SourceVal), std::move(Target), + std::move(TargetVal), std::move(fPrime)); + IF_LOG_ENABLED(if (!IDEProblem.isZeroValue(TargetVal)) { PHASAR_LOG_LEVEL(DEBUG, "EDGE: getFunctionOf(Target)) @@ -528,6 +531,7 @@ class IDESolver .push_back(EdgeFnE); } INC_COUNTER("EF Queries", 1, Full); + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f); auto fPrime = f.composeWith(EdgeFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " << fPrime); @@ -712,9 +716,10 @@ class IDESolver << " ;"); PHASAR_LOG_LEVEL(DEBUG, " N target: " << NToString(Edge.getTarget()) << " ;"); - PHASAR_LOG_LEVEL(DEBUG, " D target: " << DToString(Edge.factAtTarget()) - << " >"); - PHASAR_LOG_LEVEL(DEBUG, ' ')); + PHASAR_LOG_LEVEL(DEBUG, + " D target: " << DToString(Edge.factAtTarget())); + PHASAR_LOG_LEVEL(DEBUG, " J jump-function: " << EF << " >") + PHASAR_LOG_LEVEL(DEBUG, ' ')); if (!ICF->isCallSite(Edge.getTarget())) { if (ICF->isExitInst(Edge.getTarget())) { @@ -862,7 +867,7 @@ class IDESolver /// TODO: Do we have to add EdgeIdentity to the JF-table in advance? /// Probably not - WorkList.emplace_back(Fact, StartPoint, Fact); + addWorklistItem(Fact, StartPoint, Fact, EdgeIdentity{}); } } } @@ -959,15 +964,9 @@ class IDESolver d_t d3 = ValAndFunc.first; d_t d5_restoredCtx = restoreContextOnReturnedFact(c, d4, d5); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); - if (updateJumpFunction(d3, c, d5_restoredCtx, - f3.composeWith(fPrime))) { - UpdatedFacts.insert(d5_restoredCtx); - WorkList.emplace_back(std::move(d3), RetSiteC, - std::move(d5_restoredCtx)); - } else if (UpdatedFacts.contains(d5_restoredCtx)) { - WorkList.emplace_back(std::move(d3), RetSiteC, - std::move(d5_restoredCtx)); - } + updateWithNewEdge(UpdatedFacts, std::move(d3), c, RetSiteC, + std::move(d5_restoredCtx), + f3.composeWith(fPrime)); } } } @@ -1027,9 +1026,8 @@ class IDESolver void propagteUnbalancedReturnFlow(n_t RetSiteC, d_t TargetVal, EdgeFunction EdgeFunc, n_t /*RelatedCallSite*/) { - WorkList.emplace_back( - PathEdge{ZeroValue, std::move(RetSiteC), std::move(TargetVal)}, - std::move(EdgeFunc)); + addWorklistItem(ZeroValue, std::move(RetSiteC), std::move(TargetVal), + std::move(EdgeFunc)); } /// This method will be called for each incoming edge and can be used to @@ -1138,6 +1136,8 @@ class IDESolver PHASAR_LOG_LEVEL(DEBUG, "Source value : " << DToString(SourceVal)); PHASAR_LOG_LEVEL(DEBUG, "Target : " << NToString(Target)); PHASAR_LOG_LEVEL(DEBUG, "Target value : " << DToString(TargetVal)); + PHASAR_LOG_LEVEL(DEBUG, "Edge Function : " << EF); + llvm::errs().flush(); PathEdgeCount++; pathEdgeProcessingTask(std::move(Edge), std::move(EF)); diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index b9a272288..b2881843c 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -61,8 +61,10 @@ namespace psr { /// can then be queried by using resultAt() and resultsAt(). template class IDESolver - : public IDESolverAPIMixin> { - friend IDESolverAPIMixin>; + : public IDESolverAPIMixin< + IDESolver> { + friend IDESolverAPIMixin< + IDESolver>; public: using ProblemTy = IDETabulationProblem; diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index 6a8201f94..9c31225cd 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -44,7 +44,7 @@ class IFDSSolver IFDSSolver(IFDSTabulationProblem &IFDSProblem, const i_t *ICF, Strategy S = {}) : IDESolver, Container, Strategy>( - IFDSProblem, ICF) {} + IFDSProblem, ICF, S) {} ~IFDSSolver() override = default; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt index 9ab94b5db..9bfeb59f8 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt @@ -4,6 +4,7 @@ set(IfdsIdeSources EdgeFunctionComposerTest.cpp EdgeFunctionSingletonCacheTest.cpp InteractiveIDESolverTest.cpp + EagerIDESolverTest.cpp ) foreach(TEST_SRC ${IfdsIdeSources}) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp new file mode 100644 index 000000000..d80046bb0 --- /dev/null +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp @@ -0,0 +1,155 @@ +#include "phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h" + +#include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" +#include "phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDELinearConstantAnalysis.h" +#include "phasar/PhasarLLVM/HelperAnalyses.h" +#include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" +#include "phasar/Utils/Logger.h" +#include "phasar/Utils/Printer.h" +#include "phasar/Utils/TypeTraits.h" + +#include "TestConfig.h" +#include "gtest/gtest.h" + +#include +#include + +using namespace psr; + +namespace { + +/* ============== TEST FIXTURE ============== */ +class LinearConstant : public ::testing::TestWithParam { +protected: + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("linear_constant/"); + const std::vector EntryPoints = {"main"}; + +}; // Test Fixture + +TEST_P(LinearConstant, ResultsEquivalentPropagateOnto) { + HelperAnalyses HA(PathToLlFiles + GetParam(), EntryPoints); + + // Compute the ICFG to possibly create the runtime model + auto &ICFG = HA.getICFG(); + + auto HasGlobalCtor = HA.getProjectIRDB().getFunctionDefinition( + LLVMBasedICFG::GlobalCRuntimeModelName) != nullptr; + + auto LCAProblem = createAnalysisProblem( + HA, + std::vector{HasGlobalCtor ? LLVMBasedICFG::GlobalCRuntimeModelName.str() + : "main"}); + + auto PropagateOverResults = IDESolver(LCAProblem, &ICFG).solve(); + { + // psr::Logger::initializeStderrLogger(SeverityLevel::DEBUG); + + auto PropagateOntoResults = + IDESolver(LCAProblem, &ICFG, PropagateOntoStrategy{}).solve(); + + PropagateOntoResults.dumpResults(ICFG); + + for (auto &&Cell : PropagateOntoResults.getAllResultEntries()) { + const auto *Stmt = Cell.getRowKey(); + if (Stmt->isTerminator()) { + continue; + } + const auto *NextStmt = Stmt->getNextNonDebugInstruction(); + assert(NextStmt != nullptr); + + auto PropagateOverRes = + PropagateOverResults.resultAt(NextStmt, Cell.getColumnKey()); + EXPECT_EQ(PropagateOverRes, Cell.getValue()) + << "The Incoming results of the eager IDE solver should match the " + "outgoing results of the default solver. Expected: (" + << NToString(NextStmt) << ", " << DToString(Cell.getColumnKey()) + << ") --> " << LToString(PropagateOverRes) << "; got " + << LToString(Cell.getValue()); + } + } +} + +static constexpr std::string_view LCATestFiles[] = { + "basic_01_cpp_dbg.ll", + "basic_02_cpp_dbg.ll", + "basic_03_cpp_dbg.ll", + "basic_04_cpp_dbg.ll", + "basic_05_cpp_dbg.ll", + "basic_06_cpp_dbg.ll", + "basic_07_cpp_dbg.ll", + "basic_08_cpp_dbg.ll", + "basic_09_cpp_dbg.ll", + "basic_10_cpp_dbg.ll", + "basic_11_cpp_dbg.ll", + "basic_12_cpp_dbg.ll", + + "branch_01_cpp_dbg.ll", + "branch_02_cpp_dbg.ll", + "branch_03_cpp_dbg.ll", + "branch_04_cpp_dbg.ll", + "branch_05_cpp_dbg.ll", + "branch_06_cpp_dbg.ll", + "branch_07_cpp_dbg.ll", + + "while_01_cpp_dbg.ll", + "while_02_cpp_dbg.ll", + "while_03_cpp_dbg.ll", + "while_04_cpp_dbg.ll", + "while_05_cpp_dbg.ll", + "for_01_cpp_dbg.ll", + + "call_01_cpp_dbg.ll", + "call_02_cpp_dbg.ll", + "call_03_cpp_dbg.ll", + "call_04_cpp_dbg.ll", + "call_05_cpp_dbg.ll", + "call_06_cpp_dbg.ll", + "call_07_cpp_dbg.ll", + "call_08_cpp_dbg.ll", + "call_09_cpp_dbg.ll", + "call_10_cpp_dbg.ll", + "call_11_cpp_dbg.ll", + + "recursion_01_cpp_dbg.ll", + "recursion_02_cpp_dbg.ll", + "recursion_03_cpp_dbg.ll", + + "global_01_cpp_dbg.ll", + "global_02_cpp_dbg.ll", + "global_03_cpp_dbg.ll", + "global_04_cpp_dbg.ll", + "global_05_cpp_dbg.ll", + "global_06_cpp_dbg.ll", + "global_07_cpp_dbg.ll", + "global_08_cpp_dbg.ll", + "global_09_cpp_dbg.ll", + "global_10_cpp_dbg.ll", + "global_11_cpp_dbg.ll", + "global_12_cpp_dbg.ll", + "global_13_cpp_dbg.ll", + "global_14_cpp_dbg.ll", + "global_15_cpp_dbg.ll", + "global_16_cpp_dbg.ll", + + "overflow_add_cpp_dbg.ll", + "overflow_sub_cpp_dbg.ll", + "overflow_mul_cpp_dbg.ll", + "overflow_div_min_by_neg_one_cpp_dbg.ll", + + "ub_division_by_zero_cpp_dbg.ll", + "ub_modulo_by_zero_cpp_dbg.ll", +}; + +INSTANTIATE_TEST_SUITE_P(InteractiveIDESolverTest, LinearConstant, + ::testing::ValuesIn(LCATestFiles)); +} // namespace + +// main function for the test case +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/InteractiveIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/InteractiveIDESolverTest.cpp index 6ff07a20f..bb0ed4427 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/InteractiveIDESolverTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/InteractiveIDESolverTest.cpp @@ -14,6 +14,7 @@ using namespace psr; +namespace { /* ============== TEST FIXTURE ============== */ class LinearConstant : public ::testing::TestWithParam { protected: @@ -186,6 +187,7 @@ static constexpr std::string_view LCATestFiles[] = { INSTANTIATE_TEST_SUITE_P(InteractiveIDESolverTest, LinearConstant, ::testing::ValuesIn(LCATestFiles)); +} // namespace // main function for the test case int main(int Argc, char **Argv) { From 361b849d1797895d7bce97f030bc363c68815356 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 19 Sep 2023 15:58:13 +0200 Subject: [PATCH 04/11] Fix valud computation --- .../DataFlow/IfdsIde/EdgeFunctionUtils.h | 4 +- .../DataFlow/IfdsIde/Solver/EagerIDESolver.h | 81 +++++++++++++------ .../DataFlow/IfdsIde/EagerIDESolverTest.cpp | 2 +- 3 files changed, 59 insertions(+), 28 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h index 018a7849f..08e144d87 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h @@ -212,8 +212,8 @@ template -[[nodiscard]] llvm::raw_ostream & -operator<<(llvm::raw_ostream &OS, ByConstRef> Id) { +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const ConstantEdgeFunction &Id) { OS << "ConstantEF"; if constexpr (is_llvm_printable_v< typename ConstantEdgeFunction::value_type>) { diff --git a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h index 269974859..9b0aca618 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h @@ -37,10 +37,13 @@ #include "phasar/Utils/JoinLattice.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/PAMMMacros.h" +#include "phasar/Utils/Printer.h" #include "phasar/Utils/Table.h" #include "phasar/Utils/Utilities.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "nlohmann/json.hpp" @@ -54,9 +57,6 @@ #include #include -#include -#include - namespace psr { /// Solves the given IDETabulationProblem as described in the 1996 paper by @@ -587,19 +587,21 @@ class IDESolver PAMM_GET_INSTANCE; d_t Fact = NAndD.second; f_t Func = ICF->getFunctionOf(Stmt); - for (const n_t CallSite : ICF->getCallsFromWithin(Func)) { - auto LookupResults = JumpFn->forwardLookup(Fact, CallSite); - if (!LookupResults) { - continue; - } - for (size_t I = 0; I < LookupResults->get().size(); ++I) { - auto Entry = LookupResults->get()[I]; - d_t dPrime = Entry.first; - auto fPrime = Entry.second; - n_t SP = Stmt; - l_t Val = val(SP, Fact); - INC_COUNTER("Value Propagation", 1, Full); - propagateValue(CallSite, dPrime, fPrime.computeTarget(Val)); + for (const n_t CS : ICF->getCallsFromWithin(Func)) { + for (const auto &BeforeCS : ICF->getPredsOf(CS)) { + auto LookupResults = JumpFn->forwardLookup(Fact, BeforeCS); + if (!LookupResults) { + continue; + } + for (size_t I = 0; I < LookupResults->get().size(); ++I) { + auto Entry = LookupResults->get()[I]; + d_t dPrime = Entry.first; + auto fPrime = Entry.second; + n_t SP = Stmt; + l_t Val = seedVal(SP, Fact); + INC_COUNTER("Value Propagation", 1, Full); + propagateSeedValue(CS, dPrime, fPrime.computeTarget(Val)); + } } } } @@ -624,18 +626,27 @@ class IDESolver INC_COUNTER("EF Queries", 1, Full); for (const n_t StartPoint : ICF->getStartPointsOf(Callee)) { INC_COUNTER("Value Propagation", 1, Full); - propagateValue(StartPoint, dPrime, - EdgeFn.computeTarget(val(Stmt, Fact))); + propagateSeedValue(StartPoint, dPrime, + EdgeFn.computeTarget(seedVal(Stmt, Fact))); } } } } - void propagateValue(n_t NHashN, d_t NHashD, const l_t &L) { - l_t ValNHash = val(NHashN, NHashD); + // void propagateValue(n_t NHashN, d_t NHashD, const l_t &L) { + // l_t ValNHash = val(NHashN, NHashD); + // l_t LPrime = joinValueAt(NHashN, NHashD, ValNHash, L); + // if (!(LPrime == ValNHash)) { + // setVal(NHashN, NHashD, std::move(LPrime)); + // ValuePropWL.emplace_back(std::move(NHashN), std::move(NHashD)); + // } + // } + + void propagateSeedValue(n_t NHashN, d_t NHashD, const l_t &L) { + l_t ValNHash = seedVal(NHashN, NHashD); l_t LPrime = joinValueAt(NHashN, NHashD, ValNHash, L); if (!(LPrime == ValNHash)) { - setVal(NHashN, NHashD, std::move(LPrime)); + SeedValues.insert(NHashN, NHashD, std::move(LPrime)); ValuePropWL.emplace_back(std::move(NHashN), std::move(NHashD)); } } @@ -648,6 +659,13 @@ class IDESolver return IDEProblem.topElement(); } + l_t seedVal(n_t NHashN, d_t NHashD) { + if (SeedValues.contains(NHashN, NHashD)) { + return SeedValues.get(NHashN, NHashD); + } + return IDEProblem.topElement(); + } + void setVal(n_t NHashN, d_t NHashD, l_t L) { IF_LOG_ENABLED({ PHASAR_LOG_LEVEL(DEBUG, @@ -764,7 +782,7 @@ class IDESolver d_t dPrime = SourceValTargetValAndFunction.getRowKey(); d_t d = SourceValTargetValAndFunction.getColumnKey(); EdgeFunction fPrime = SourceValTargetValAndFunction.getValue(); - l_t TargetVal = val(SP, dPrime); + l_t TargetVal = seedVal(SP, dPrime); setVal(n, d, IDEProblem.join(val(n, d), fPrime.computeTarget(std::move(TargetVal)))); @@ -801,13 +819,25 @@ class IDESolver << ", value: " << LToString(Value)); // initialize the initial seeds with the top element as we have no // information at the beginning of the value computation problem - setVal(StartPoint, Fact, Value); + SeedValues.insert(StartPoint, Fact, Value); std::pair SuperGraphNode(StartPoint, Fact); valuePropagationTask(std::move(SuperGraphNode)); } } } + std::vector allNodes() const { + std::vector Ret; + // TODO: Reserve + + for (const auto &Fun : ICF->getAllFunctions()) { + for (const auto &Inst : ICF->getAllInstructionsOf(Fun)) { + Ret.push_back(Inst); + } + } + return Ret; + } + /// Computes the final values for edge functions. void computeValues() { PHASAR_LOG_LEVEL(DEBUG, "Start computing values"); @@ -822,8 +852,8 @@ class IDESolver // Phase II(ii) // we create an array of all nodes and then dispatch fractions of this // array to multiple threads - const auto AllNonCallStartNodes = ICF->allNonCallStartNodes(); - valueComputationTask(AllNonCallStartNodes); + const auto AllNodes = allNodes(); + valueComputationTask(AllNodes); } /// Schedules the processing of initial seeds, initiating the analysis. @@ -1786,6 +1816,7 @@ class IDESolver std::set UnbalancedRetSites; InitialSeeds Seeds; + Table SeedValues; Table ValTab; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp index d80046bb0..24f617321 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp @@ -51,7 +51,7 @@ TEST_P(LinearConstant, ResultsEquivalentPropagateOnto) { auto PropagateOntoResults = IDESolver(LCAProblem, &ICFG, PropagateOntoStrategy{}).solve(); - PropagateOntoResults.dumpResults(ICFG); + // PropagateOntoResults.dumpResults(ICFG); for (auto &&Cell : PropagateOntoResults.getAllResultEntries()) { const auto *Stmt = Cell.getRowKey(); From efbf9cfff529201b54d68529c6e3df73d7f119e6 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 20 Sep 2023 19:41:17 +0200 Subject: [PATCH 05/11] Improve eager propagation --- .../DataFlow/IfdsIde/Solver/EagerIDESolver.h | 61 ++++++++++--------- .../DataFlow/IfdsIde/Solver/IFDSSolver.h | 4 +- .../phasar/DataFlow/IfdsIde/SolverResults.h | 3 + .../DataFlow/IfdsIde/EagerIDESolverTest.cpp | 26 ++++---- 4 files changed, 52 insertions(+), 42 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h index 9b0aca618..794de2f4a 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h @@ -314,16 +314,26 @@ class IDESolver return NewFunction; } - void updateWithNewEdge(llvm::SmallDenseSet &UpdatedFacts, d_t SourceVal, - n_t OldTarget, n_t NewTarget, d_t TargetVal, - EdgeFunction EF) { + template + void updateWithNewEdges(d_t SourceVal, n_t OldTarget, + const TargetsT &NewTargets, d_t TargetVal, + EdgeFunction EF) { if (updateJumpFunction(SourceVal, OldTarget, TargetVal, &EF)) { - UpdatedFacts.insert(TargetVal); - addWorklistItem(SourceVal, NewTarget, std::move(TargetVal), - std::move(EF)); - } else if (UpdatedFacts.contains(TargetVal)) { - addWorklistItem(SourceVal, NewTarget, std::move(TargetVal), - std::move(EF)); + auto It = NewTargets.begin(); + auto End = NewTargets.end(); + if (It == End) { + return; + } + + auto Next = std::next(It); + if (Next == End) { + addWorklistItem(SourceVal, *It, std::move(TargetVal), std::move(EF)); + return; + } + + for (; It != End; ++It) { + addWorklistItem(SourceVal, *It, TargetVal, EF); + } } } @@ -367,9 +377,6 @@ class IDESolver PHASAR_LOG_LEVEL(DEBUG, " " << NToString(ret)); }); - // The facts that are updated for the return-site - llvm::SmallDenseSet UpdatedFacts; - // for each possible callee for (f_t SCalledProcN : Callees) { // still line 14 // check if a special summary for the called procedure exists @@ -395,8 +402,8 @@ class IDESolver PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f << '\n')); - updateWithNewEdge(UpdatedFacts, d1, n, ReturnSiteN, std::move(d3), - f.composeWith(SumEdgFnE)); + updateWithNewEdges(d1, n, ReturnSiteNs, std::move(d3), + f.composeWith(SumEdgFnE)); } } } else { @@ -499,9 +506,9 @@ class IDESolver d_t d5_restoredCtx = restoreContextOnReturnedFact(n, d2, d5); // propagte the effects of the entire call PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); - updateWithNewEdge(UpdatedFacts, d1, n, RetSiteN, - std::move(d5_restoredCtx), - f.composeWith(fPrime)); + updateWithNewEdges(d1, n, ReturnSiteNs, + std::move(d5_restoredCtx), + f.composeWith(fPrime)); } } } @@ -535,8 +542,8 @@ class IDESolver auto fPrime = f.composeWith(EdgeFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " << fPrime); - updateWithNewEdge(UpdatedFacts, d1, n, ReturnSiteN, std::move(d3), - std::move(fPrime)); + updateWithNewEdges(d1, n, ReturnSiteNs, std::move(d3), + std::move(fPrime)); } } } @@ -555,8 +562,6 @@ class IDESolver const auto &Succs = ICF->getSuccsOf(n); - llvm::SmallDenseSet UpdatedFacts; - for (const auto &nPrime : Succs) { FlowFunctionPtrType FlowFunc = CachedFlowEdgeFunctions.getNormalFlowFunction(n, nPrime); @@ -577,8 +582,7 @@ class IDESolver "Compose: " << g << " * " << f << " = " << fPrime); INC_COUNTER("EF Queries", 1, Full); - updateWithNewEdge(UpdatedFacts, d1, n, nPrime, std::move(d3), - std::move(fPrime)); + updateWithNewEdges(d1, n, Succs, std::move(d3), std::move(fPrime)); } } } @@ -940,10 +944,9 @@ class IDESolver // line 22 n_t c = Entry.first; - llvm::SmallDenseSet UpdatedFacts; - + const auto &RetSiteCs = ICF->getReturnSitesOfCallAt(c); // for each return site - for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(c)) { + for (n_t RetSiteC : RetSiteCs) { // compute return-flow function FlowFunctionPtrType RetFunction = CachedFlowEdgeFunctions.getRetFlowFunction( @@ -994,9 +997,9 @@ class IDESolver d_t d3 = ValAndFunc.first; d_t d5_restoredCtx = restoreContextOnReturnedFact(c, d4, d5); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); - updateWithNewEdge(UpdatedFacts, std::move(d3), c, RetSiteC, - std::move(d5_restoredCtx), - f3.composeWith(fPrime)); + updateWithNewEdges(std::move(d3), c, RetSiteCs, + std::move(d5_restoredCtx), + f3.composeWith(fPrime)); } } } diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index 9c31225cd..bf8fbd310 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -30,8 +30,8 @@ namespace psr { template -class IFDSSolver - : public IDESolver, Container> { +class IFDSSolver : public IDESolver, + Container, Strategy> { public: using ProblemTy = IFDSTabulationProblem; using d_t = typename AnalysisDomainTy::d_t; diff --git a/include/phasar/DataFlow/IfdsIde/SolverResults.h b/include/phasar/DataFlow/IfdsIde/SolverResults.h index 512d80063..ce268563f 100644 --- a/include/phasar/DataFlow/IfdsIde/SolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/SolverResults.h @@ -185,6 +185,9 @@ class SolverResultsBase { static_assert(std::is_base_of_v); return static_cast(*this); } + + SolverResultsBase() noexcept = default; + friend Derived; }; } // namespace detail diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp index 24f617321..7e246f121 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp @@ -51,24 +51,28 @@ TEST_P(LinearConstant, ResultsEquivalentPropagateOnto) { auto PropagateOntoResults = IDESolver(LCAProblem, &ICFG, PropagateOntoStrategy{}).solve(); - // PropagateOntoResults.dumpResults(ICFG); + bool Failed = false; - for (auto &&Cell : PropagateOntoResults.getAllResultEntries()) { - const auto *Stmt = Cell.getRowKey(); + for (const auto *Stmt : HA.getProjectIRDB().getAllInstructions()) { if (Stmt->isTerminator()) { continue; } + const auto *NextStmt = Stmt->getNextNonDebugInstruction(); assert(NextStmt != nullptr); - auto PropagateOverRes = - PropagateOverResults.resultAt(NextStmt, Cell.getColumnKey()); - EXPECT_EQ(PropagateOverRes, Cell.getValue()) - << "The Incoming results of the eager IDE solver should match the " - "outgoing results of the default solver. Expected: (" - << NToString(NextStmt) << ", " << DToString(Cell.getColumnKey()) - << ") --> " << LToString(PropagateOverRes) << "; got " - << LToString(Cell.getValue()); + for (auto &&[Fact, Value] : PropagateOntoResults.resultsAt(Stmt)) { + auto PropagateOverRes = PropagateOverResults.resultAt(NextStmt, Fact); + EXPECT_EQ(PropagateOverRes, Value) + << "The Incoming results of the eager IDE solver should match the " + "outgoing results of the default solver. Expected: (" + << NToString(NextStmt) << ", " << DToString(Fact) << ") --> " + << LToString(PropagateOverRes) << "; got " << LToString(Value); + Failed |= PropagateOverRes != Value; + } + } + if (Failed) { + PropagateOntoResults.dumpResults(ICFG); } } } From 408b00e196eccd6b933d9ffc2a8aaf72937c7ece Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 20 Sep 2023 20:57:07 +0200 Subject: [PATCH 06/11] Cleanup solvers + add SolverImpl (WIP) --- .clang-tidy | 1 + CMakeLists.txt | 2 +- include/phasar/DataFlow.h | 2 - .../DataFlow/IfdsIde/Solver/EagerIDESolver.h | 20 +- .../DataFlow/IfdsIde/Solver/IDESolver.h | 1710 +---------------- .../{ => detail}/FlowEdgeFunctionCache.h | 0 .../IfdsIde/Solver/detail/IDESolverImpl.h | 1696 ++++++++++++++++ .../IfdsIde/Solver/{ => detail}/PathEdge.h | 0 8 files changed, 1708 insertions(+), 1723 deletions(-) rename include/phasar/DataFlow/IfdsIde/Solver/{ => detail}/FlowEdgeFunctionCache.h (100%) create mode 100644 include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h rename include/phasar/DataFlow/IfdsIde/Solver/{ => detail}/PathEdge.h (100%) diff --git a/.clang-tidy b/.clang-tidy index 063d1090a..5d40d294d 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -25,6 +25,7 @@ Checks: '-*, -cppcoreguidelines-non-private-member-variables-in-classes, -cppcoreguidelines-init-variables, -cppcoreguidelines-macro-usage, + -cppcoreguidelines-avoid-do-while, bugprone-*, -bugprone-easily-swappable-parameters, modernize-*, diff --git a/CMakeLists.txt b/CMakeLists.txt index b9eb75366..d5e9e09f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,7 +231,7 @@ include_directories(SYSTEM ${SQLITE3_INCLUDE_DIR}) if (NOT PHASAR_IN_TREE) # Only search for LLVM if we build out of tree find_package(LLVM 14 REQUIRED CONFIG) - include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) + include_directories(${LLVM_INCLUDE_DIRS}) link_directories(${LLVM_LIB_PATH} ${LLVM_LIBRARY_DIRS}) endif() diff --git a/include/phasar/DataFlow.h b/include/phasar/DataFlow.h index a642ae484..2fc9c3347 100644 --- a/include/phasar/DataFlow.h +++ b/include/phasar/DataFlow.h @@ -20,11 +20,9 @@ #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" #include "phasar/DataFlow/IfdsIde/InitialSeeds.h" -#include "phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h" #include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" #include "phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h" #include "phasar/DataFlow/IfdsIde/Solver/JumpFunctions.h" -#include "phasar/DataFlow/IfdsIde/Solver/PathEdge.h" #include "phasar/DataFlow/IfdsIde/SolverResults.h" #include "phasar/DataFlow/IfdsIde/SpecialSummaries.h" #include "phasar/DataFlow/Mono/Contexts/CallStringCTX.h" diff --git a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h index 794de2f4a..e9d75ca1c 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h @@ -26,11 +26,10 @@ #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" #include "phasar/DataFlow/IfdsIde/InitialSeeds.h" -#include "phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h" #include "phasar/DataFlow/IfdsIde/Solver/IDESolverAPIMixin.h" #include "phasar/DataFlow/IfdsIde/Solver/JumpFunctions.h" -#include "phasar/DataFlow/IfdsIde/Solver/PathEdge.h" #include "phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h" +#include "phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h" #include "phasar/DataFlow/IfdsIde/SolverResults.h" #include "phasar/Domain/AnalysisDomain.h" #include "phasar/Utils/DOTGraph.h" @@ -363,7 +362,6 @@ class IDESolver n_t n = Edge.getTarget(); // a call node; line 14... d_t d2 = Edge.factAtTarget(); - // EdgeFunction f = jumpFunction(Edge); const auto &ReturnSiteNs = ICF->getReturnSitesOfCallAt(n); const auto &Callees = ICF->getCalleesOfCallAt(n); @@ -557,7 +555,6 @@ class IDESolver INC_COUNTER("Process Normal", 1, Full); PHASAR_LOG_LEVEL( DEBUG, "Process normal at target: " << NToString(Edge.getTarget())); - // EdgeFunction f = jumpFunction(Edge); auto [d1, n, d2] = Edge.consume(); const auto &Succs = ICF->getSuccsOf(n); @@ -637,15 +634,6 @@ class IDESolver } } - // void propagateValue(n_t NHashN, d_t NHashD, const l_t &L) { - // l_t ValNHash = val(NHashN, NHashD); - // l_t LPrime = joinValueAt(NHashN, NHashD, ValNHash, L); - // if (!(LPrime == ValNHash)) { - // setVal(NHashN, NHashD, std::move(LPrime)); - // ValuePropWL.emplace_back(std::move(NHashN), std::move(NHashD)); - // } - // } - void propagateSeedValue(n_t NHashN, d_t NHashD, const l_t &L) { l_t ValNHash = seedVal(NHashN, NHashD); l_t LPrime = joinValueAt(NHashN, NHashD, ValNHash, L); @@ -898,9 +886,6 @@ class IDESolver if (!IDEProblem.isZeroValue(Fact)) { INC_COUNTER("Gen facts", 1, Core); } - - /// TODO: Do we have to add EdgeIdentity to the JF-table in advance? - /// Probably not addWorklistItem(Fact, StartPoint, Fact, EdgeIdentity{}); } } @@ -920,8 +905,6 @@ class IDESolver PHASAR_LOG_LEVEL(DEBUG, "Process exit at target: " << NToString(Edge.getTarget())); n_t n = Edge.getTarget(); // an exit node; line 21... - - // EdgeFunction f = jumpFunction(Edge); f_t FunctionThatNeedsSummary = ICF->getFunctionOf(n); d_t d1 = Edge.factAtSource(); d_t d2 = Edge.factAtTarget(); @@ -1170,7 +1153,6 @@ class IDESolver PHASAR_LOG_LEVEL(DEBUG, "Target : " << NToString(Target)); PHASAR_LOG_LEVEL(DEBUG, "Target value : " << DToString(TargetVal)); PHASAR_LOG_LEVEL(DEBUG, "Edge Function : " << EF); - llvm::errs().flush(); PathEdgeCount++; pathEdgeProcessingTask(std::move(Edge), std::move(EF)); diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index b2881843c..e7be250aa 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -17,42 +17,7 @@ #ifndef PHASAR_DATAFLOW_IFDSIDE_SOLVER_IDESOLVER_H #define PHASAR_DATAFLOW_IFDSIDE_SOLVER_IDESOLVER_H -#include "phasar/Config/Configuration.h" -#include "phasar/DB/ProjectIRDBBase.h" -#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" -#include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" -#include "phasar/DataFlow/IfdsIde/EdgeFunctions.h" -#include "phasar/DataFlow/IfdsIde/FlowFunctions.h" -#include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" -#include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" -#include "phasar/DataFlow/IfdsIde/InitialSeeds.h" -#include "phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h" -#include "phasar/DataFlow/IfdsIde/Solver/IDESolverAPIMixin.h" -#include "phasar/DataFlow/IfdsIde/Solver/JumpFunctions.h" -#include "phasar/DataFlow/IfdsIde/Solver/PathEdge.h" -#include "phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h" -#include "phasar/DataFlow/IfdsIde/SolverResults.h" -#include "phasar/Domain/AnalysisDomain.h" -#include "phasar/Utils/DOTGraph.h" -#include "phasar/Utils/JoinLattice.h" -#include "phasar/Utils/Logger.h" -#include "phasar/Utils/PAMMMacros.h" -#include "phasar/Utils/Table.h" -#include "phasar/Utils/Utilities.h" - -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/raw_ostream.h" - -#include "nlohmann/json.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include +#include "phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h" namespace psr { @@ -61,10 +26,12 @@ namespace psr { /// can then be queried by using resultAt() and resultsAt(). template class IDESolver - : public IDESolverAPIMixin< - IDESolver> { - friend IDESolverAPIMixin< - IDESolver>; + : public IDESolverImpl< + IDESolver, + AnalysisDomainTy, Container, PropagateAfterStrategy> { + using base_t = IDESolverImpl< + IDESolver, + AnalysisDomainTy, Container, PropagateAfterStrategy>; public: using ProblemTy = IDETabulationProblem; @@ -80,1667 +47,8 @@ class IDESolver using v_t = typename AnalysisDomainTy::v_t; IDESolver(IDETabulationProblem &Problem, - const i_t *ICF, PropagateAfterStrategy /*Strategy*/ = {}) - : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), - SolverConfig(Problem.getIFDSIDESolverConfig()), - CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), - JumpFn(std::make_shared>()), - Seeds(Problem.initialSeeds()) { - assert(ICF != nullptr); - } - - IDESolver(const IDESolver &) = delete; - IDESolver &operator=(const IDESolver &) = delete; - IDESolver(IDESolver &&) = delete; - IDESolver &operator=(IDESolver &&) = delete; - - virtual ~IDESolver() = default; - - nlohmann::json getAsJson() { - using TableCell = typename Table::Cell; - const static std::string DataFlowID = "DataFlow"; - nlohmann::json J; - auto Results = this->ValTab.cellSet(); - if (Results.empty()) { - J[DataFlowID] = "EMPTY"; - } else { - std::vector Cells(Results.begin(), Results.end()); - sort(Cells.begin(), Cells.end(), [](TableCell Lhs, TableCell Rhs) { - return Lhs.getRowKey() < Rhs.getRowKey(); - }); - n_t Curr; - for (unsigned I = 0; I < Cells.size(); ++I) { - Curr = Cells[I].getRowKey(); - auto NStr = - llvm::StringRef(NToString(Cells[I].getRowKey())).trim().str(); - - std::string NodeStr = - ICF->getFunctionName(ICF->getFunctionOf(Curr)) + "::" + NStr; - J[DataFlowID][NodeStr]; - std::string FactStr = - llvm::StringRef(DToString(Cells[I].getColumnKey())).trim().str(); - std::string ValueStr = - llvm::StringRef(LToString(Cells[I].getValue())).trim().str(); - J[DataFlowID][NodeStr]["Facts"] += {FactStr, ValueStr}; - } - } - return J; - } - - /// Returns the L-type result for the given value at the given statement. - [[nodiscard]] l_t resultAt(n_t Stmt, d_t Value) { - return getSolverResults().resultAt(Stmt, Value); - } - - /// Returns the L-type result at the given statement for the given data-flow - /// fact while respecting LLVM's SSA semantics. - /// - /// An example: when a value is loaded and the location loaded from, here - /// variable 'i', is a data-flow fact that holds, then the loaded value '%0' - /// will usually be generated and also holds. However, due to the underlying - /// theory (and respective implementation) this load instruction causes the - /// loaded value to be generated and thus, it will be valid only AFTER the - /// load instruction, i.e., at the successor instruction. - /// - /// %0 = load i32, i32* %i, align 4 - /// - /// This result accessor function returns the results at the successor - /// instruction(s) reflecting that the expression on the left-hand side holds - /// if the expression on the right-hand side holds. - template - [[nodiscard]] typename std::enable_if_t< - std::is_same_v, llvm::Instruction *>, l_t> - resultAtInLLVMSSA(NTy Stmt, d_t Value) { - return getSolverResults().resultAtInLLVMSSA(Stmt, Value); - } - - /// Returns the resulting environment for the given statement. - /// The artificial zero value can be automatically stripped. - /// TOP values are never returned. - [[nodiscard]] virtual std::unordered_map - resultsAt(n_t Stmt, bool StripZero = false) /*TODO const*/ { - return getSolverResults().resultsAt(Stmt, StripZero); - } - - /// Returns the data-flow results at the given statement while respecting - /// LLVM's SSA semantics. - /// - /// An example: when a value is loaded and the location loaded from, here - /// variable 'i', is a data-flow fact that holds, then the loaded value '%0' - /// will usually be generated and also holds. However, due to the underlying - /// theory (and respective implementation) this load instruction causes the - /// loaded value to be generated and thus, it will be valid only AFTER the - /// load instruction, i.e., at the successor instruction. - /// - /// %0 = load i32, i32* %i, align 4 - /// - /// This result accessor function returns the results at the successor - /// instruction(s) reflecting that the expression on the left-hand side holds - /// if the expression on the right-hand side holds. - template - [[nodiscard]] typename std::enable_if_t< - std::is_same_v, llvm::Instruction *>, - std::unordered_map> - resultsAtInLLVMSSA(NTy Stmt, bool StripZero = false) { - return getSolverResults().resultsAtInLLVMSSA(Stmt, StripZero); - } - - virtual void emitTextReport(llvm::raw_ostream &OS = llvm::outs()) { - IDEProblem.emitTextReport(getSolverResults(), OS); - } - - virtual void emitGraphicalReport(llvm::raw_ostream &OS = llvm::outs()) { - IDEProblem.emitGraphicalReport(getSolverResults(), OS); - } - - void dumpResults(llvm::raw_ostream &OS = llvm::outs()) { - getSolverResults().dumpResults(*ICF, OS); - } - - void dumpAllInterPathEdges() { - llvm::outs() << "COMPUTED INTER PATH EDGES" << '\n'; - auto Interpe = this->computedInterPathEdges.cellSet(); - for (const auto &Cell : Interpe) { - llvm::outs() << "FROM" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); - llvm::outs() << "TO" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); - llvm::outs() << "FACTS" << '\n'; - for (const auto &Fact : Cell.getValue()) { - llvm::outs() << "fact" << '\n'; - IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); - llvm::outs() << "produces" << '\n'; - for (const auto &Out : Fact.second) { - IDEProblem.printDataFlowFact(llvm::outs(), Out); - } - } - } - } - - void dumpAllIntraPathEdges() { - llvm::outs() << "COMPUTED INTRA PATH EDGES" << '\n'; - auto Intrape = this->computedIntraPathEdges.cellSet(); - for (auto &Cell : Intrape) { - llvm::outs() << "FROM" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); - llvm::outs() << "TO" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); - llvm::outs() << "FACTS" << '\n'; - for (auto &Fact : Cell.getValue()) { - llvm::outs() << "fact" << '\n'; - IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); - llvm::outs() << "produces" << '\n'; - for (auto &Out : Fact.second) { - IDEProblem.printDataFlowFact(llvm::outs(), Out); - } - } - } - } - - /// Returns a view into the computed solver-results. - /// - /// NOTE: The SolverResults store a reference into this IDESolver, so its - /// lifetime is also bound to the lifetime of this solver. If you want to use - /// the solverResults beyond the lifetime of this solver, use - /// comsumeSolverResults() instead. - [[nodiscard]] SolverResults getSolverResults() noexcept { - return SolverResults(this->ValTab, ZeroValue); - } - - /// Moves the computed solver-results out of this solver such that the solver - /// can be destroyed without that the analysis results are lost. - /// Do not call any function (including getSolverResults()) on this IDESolver - /// instance after that. - [[nodiscard]] OwningSolverResults - consumeSolverResults() noexcept(std::is_nothrow_move_constructible_v) { - return OwningSolverResults(std::move(this->ValTab), - std::move(ZeroValue)); - } - -protected: - /// Lines 13-20 of the algorithm; processing a call site in the caller's - /// context. - /// - /// For each possible callee, registers incoming call edges. - /// Also propagates call-to-return flows and summarized callee flows within - /// the caller. - /// - /// The following cases must be considered and handled: - /// 1. Process as usual and just process the call - /// 2. Create a new summary for that function (which shall be done - /// by the problem) - /// 3. Just use an existing summary provided by the problem - /// 4. If a special function is called, use a special summary - /// function - /// - /// @param edge an edge whose target node resembles a method call - /// - virtual void processCall(const PathEdge Edge) { - PAMM_GET_INSTANCE; - INC_COUNTER("Process Call", 1, Full); - PHASAR_LOG_LEVEL(DEBUG, - "Process call at target: " << NToString(Edge.getTarget())); - d_t d1 = Edge.factAtSource(); - n_t n = Edge.getTarget(); - // a call node; line 14... - d_t d2 = Edge.factAtTarget(); - EdgeFunction f = jumpFunction(Edge); - const auto &ReturnSiteNs = ICF->getReturnSitesOfCallAt(n); - const auto &Callees = ICF->getCalleesOfCallAt(n); - - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL(DEBUG, "Possible callees:"); for (auto Callee - : Callees) { - PHASAR_LOG_LEVEL(DEBUG, " " << Callee->getName()); - } PHASAR_LOG_LEVEL(DEBUG, "Possible return sites:"); - for (auto ret - : ReturnSiteNs) { - PHASAR_LOG_LEVEL(DEBUG, " " << NToString(ret)); - }); - - // for each possible callee - for (f_t SCalledProcN : Callees) { // still line 14 - // check if a special summary for the called procedure exists - FlowFunctionPtrType SpecialSum = - CachedFlowEdgeFunctions.getSummaryFlowFunction(n, SCalledProcN); - // if a special summary is available, treat this as a normal flow - // and use the summary flow and edge functions - if (SpecialSum) { - PHASAR_LOG_LEVEL(DEBUG, "Found and process special summary"); - for (n_t ReturnSiteN : ReturnSiteNs) { - container_type Res = computeSummaryFlowFunction(SpecialSum, d1, d2); - INC_COUNTER("SpecialSummary-FF Application", 1, Full); - ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); - saveEdges(n, ReturnSiteN, d2, Res, false); - for (d_t d3 : Res) { - EdgeFunction SumEdgFnE = - CachedFlowEdgeFunctions.getSummaryEdgeFunction(n, d2, - ReturnSiteN, d3); - INC_COUNTER("SpecialSummary-EF Queries", 1, Full); - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL( - DEBUG, "Queried Summary Edge Function: " << SumEdgFnE); - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f - << '\n')); - addWorklistItem(d1, ReturnSiteN, std::move(d3), - f.composeWith(SumEdgFnE)); - } - } - } else { - // compute the call-flow function - FlowFunctionPtrType Function = - CachedFlowEdgeFunctions.getCallFlowFunction(n, SCalledProcN); - INC_COUNTER("FF Queries", 1, Full); - container_type Res = computeCallFlowFunction(Function, d1, d2); - ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); - // for each callee's start point(s) - auto StartPointsOf = ICF->getStartPointsOf(SCalledProcN); - if (StartPointsOf.empty()) { - PHASAR_LOG_LEVEL(DEBUG, "Start points of '" + - ICF->getFunctionName(SCalledProcN) + - "' currently not available!"); - } - // if startPointsOf is empty, the called function is a declaration - for (n_t SP : StartPointsOf) { - saveEdges(n, SP, d2, Res, true); - // for each result node of the call-flow function - for (d_t d3 : Res) { - using TableCell = typename Table>::Cell; - // create initial self-loop - PHASAR_LOG_LEVEL( - DEBUG, "Create initial self-loop with D: " << DToString(d3)); - addWorklistItem(d3, SP, d3, EdgeIdentity{}); // line 15 - - // register the fact that has an incoming edge from - // line 15.1 of Naeem/Lhotak/Rodriguez - addIncoming(SP, d3, n, d2); - // line 15.2, copy to avoid concurrent modification exceptions by - // other threads - // const std::set endSumm(endSummary(sP, d3)); - // llvm::outs() << "ENDSUMM" << '\n'; - // llvm::outs() << "Size: " << endSumm.size() << '\n'; - // llvm::outs() << "sP: " << NToString(sP) - // << "\nd3: " << DToString(d3) - // << '\n'; - // printEndSummaryTab(); - // still line 15.2 of Naeem/Lhotak/Rodriguez - // for each already-queried exit value reachable from - // , create new caller-side jump functions to the return - // sites because we have observed a potentially new incoming - // edge into - for (const TableCell &Entry : endSummary(SP, d3)) { - n_t eP = Entry.getRowKey(); - d_t d4 = Entry.getColumnKey(); - EdgeFunction fCalleeSummary = Entry.getValue(); - // for each return site - for (n_t RetSiteN : ReturnSiteNs) { - // compute return-flow function - FlowFunctionPtrType RetFunction = - CachedFlowEdgeFunctions.getRetFlowFunction(n, SCalledProcN, - eP, RetSiteN); - INC_COUNTER("FF Queries", 1, Full); - const container_type ReturnedFacts = computeReturnFlowFunction( - RetFunction, d3, d4, n, Container{d2}); - ADD_TO_HISTOGRAM("Data-flow facts", ReturnedFacts.size(), 1, - Full); - saveEdges(eP, RetSiteN, d4, ReturnedFacts, true); - // for each target value of the function - for (d_t d5 : ReturnedFacts) { - // update the caller-side summary function - // get call edge function - EdgeFunction f4 = - CachedFlowEdgeFunctions.getCallEdgeFunction( - n, d2, SCalledProcN, d3); - PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << f4); - // get return edge function - EdgeFunction f5 = - CachedFlowEdgeFunctions.getReturnEdgeFunction( - n, SCalledProcN, eP, d4, RetSiteN, d5); - PHASAR_LOG_LEVEL(DEBUG, - "Queried Return Edge Function: " << f5); - if (SolverConfig.emitESG()) { - for (auto SP : ICF->getStartPointsOf(SCalledProcN)) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, SP, d3)] - .push_back(f4); - } - IntermediateEdgeFunctions[std::make_tuple(eP, d4, RetSiteN, - d5)] - .push_back(f5); - } - INC_COUNTER("EF Queries", 2, Full); - // compose call * calleeSummary * return edge functions - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " - << fCalleeSummary << " * " - << f4); - PHASAR_LOG_LEVEL(DEBUG, - " (return * calleeSummary * call)"); - EdgeFunction fPrime = - f4.composeWith(fCalleeSummary).composeWith(f5); - PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); - d_t d5_restoredCtx = restoreContextOnReturnedFact(n, d2, d5); - // propagte the effects of the entire call - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); - addWorklistItem(d1, RetSiteN, std::move(d5_restoredCtx), - f.composeWith(fPrime)); - } - } - } - } - } - } - } - // line 17-19 of Naeem/Lhotak/Rodriguez - // process intra-procedural flows along call-to-return flow functions - for (n_t ReturnSiteN : ReturnSiteNs) { - FlowFunctionPtrType CallToReturnFF = - CachedFlowEdgeFunctions.getCallToRetFlowFunction(n, ReturnSiteN, - Callees); - INC_COUNTER("FF Queries", 1, Full); - container_type ReturnFacts = - computeCallToReturnFlowFunction(CallToReturnFF, d1, d2); - ADD_TO_HISTOGRAM("Data-flow facts", ReturnFacts.size(), 1, Full); - saveEdges(n, ReturnSiteN, d2, ReturnFacts, false); - for (d_t d3 : ReturnFacts) { - EdgeFunction EdgeFnE = - CachedFlowEdgeFunctions.getCallToRetEdgeFunction(n, d2, ReturnSiteN, - d3, Callees); - PHASAR_LOG_LEVEL(DEBUG, - "Queried Call-to-Return Edge Function: " << EdgeFnE); - if (SolverConfig.emitESG()) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, ReturnSiteN, d3)] - .push_back(EdgeFnE); - } - INC_COUNTER("EF Queries", 1, Full); - auto fPrime = f.composeWith(EdgeFnE); - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " - << fPrime); - addWorklistItem(d1, ReturnSiteN, std::move(d3), std::move(fPrime)); - } - } - } - - /// Lines 33-37 of the algorithm. - /// Simply propagate normal, intra-procedural flows. - /// @param edge - /// - virtual void processNormalFlow(PathEdge Edge) { - PAMM_GET_INSTANCE; - INC_COUNTER("Process Normal", 1, Full); - PHASAR_LOG_LEVEL( - DEBUG, "Process normal at target: " << NToString(Edge.getTarget())); - EdgeFunction f = jumpFunction(Edge); - auto [d1, n, d2] = Edge.consume(); - - for (const auto nPrime : ICF->getSuccsOf(n)) { - FlowFunctionPtrType FlowFunc = - CachedFlowEdgeFunctions.getNormalFlowFunction(n, nPrime); - INC_COUNTER("FF Queries", 1, Full); - const container_type Res = computeNormalFlowFunction(FlowFunc, d1, d2); - ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); - saveEdges(n, nPrime, d2, Res, false); - for (d_t d3 : Res) { - EdgeFunction g = - CachedFlowEdgeFunctions.getNormalEdgeFunction(n, d2, nPrime, d3); - PHASAR_LOG_LEVEL(DEBUG, "Queried Normal Edge Function: " << g); - EdgeFunction fPrime = f.composeWith(g); - if (SolverConfig.emitESG()) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, nPrime, d3)] - .push_back(g); - } - PHASAR_LOG_LEVEL(DEBUG, - "Compose: " << g << " * " << f << " = " << fPrime); - INC_COUNTER("EF Queries", 1, Full); - addWorklistItem(d1, nPrime, std::move(d3), std::move(fPrime)); - } - } - } - - void propagateValueAtStart(const std::pair NAndD, n_t Stmt) { - PAMM_GET_INSTANCE; - d_t Fact = NAndD.second; - f_t Func = ICF->getFunctionOf(Stmt); - for (const n_t CallSite : ICF->getCallsFromWithin(Func)) { - auto LookupResults = JumpFn->forwardLookup(Fact, CallSite); - if (!LookupResults) { - continue; - } - for (size_t I = 0; I < LookupResults->get().size(); ++I) { - auto Entry = LookupResults->get()[I]; - d_t dPrime = Entry.first; - auto fPrime = Entry.second; - n_t SP = Stmt; - l_t Val = val(SP, Fact); - INC_COUNTER("Value Propagation", 1, Full); - propagateValue(CallSite, dPrime, fPrime.computeTarget(Val)); - } - } - } - - void propagateValueAtCall(const std::pair NAndD, n_t Stmt) { - PAMM_GET_INSTANCE; - d_t Fact = NAndD.second; - for (const f_t Callee : ICF->getCalleesOfCallAt(Stmt)) { - FlowFunctionPtrType CallFlowFunction = - CachedFlowEdgeFunctions.getCallFlowFunction(Stmt, Callee); - INC_COUNTER("FF Queries", 1, Full); - for (const d_t dPrime : CallFlowFunction->computeTargets(Fact)) { - EdgeFunction EdgeFn = CachedFlowEdgeFunctions.getCallEdgeFunction( - Stmt, Fact, Callee, dPrime); - PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << EdgeFn); - if (SolverConfig.emitESG()) { - for (const auto SP : ICF->getStartPointsOf(Callee)) { - IntermediateEdgeFunctions[std::make_tuple(Stmt, Fact, SP, dPrime)] - .push_back(EdgeFn); - } - } - INC_COUNTER("EF Queries", 1, Full); - for (const n_t StartPoint : ICF->getStartPointsOf(Callee)) { - INC_COUNTER("Value Propagation", 1, Full); - propagateValue(StartPoint, dPrime, - EdgeFn.computeTarget(val(Stmt, Fact))); - } - } - } - } - - void propagateValue(n_t NHashN, d_t NHashD, const l_t &L) { - l_t ValNHash = val(NHashN, NHashD); - l_t LPrime = joinValueAt(NHashN, NHashD, ValNHash, L); - if (!(LPrime == ValNHash)) { - setVal(NHashN, NHashD, std::move(LPrime)); - ValuePropWL.emplace_back(std::move(NHashN), std::move(NHashD)); - } - } - - l_t val(n_t NHashN, d_t NHashD) { - if (ValTab.contains(NHashN, NHashD)) { - return ValTab.get(NHashN, NHashD); - } - // implicitly initialized to top; see line [1] of Fig. 7 in SRH96 paper - return IDEProblem.topElement(); - } - - void setVal(n_t NHashN, d_t NHashD, l_t L) { - IF_LOG_ENABLED({ - PHASAR_LOG_LEVEL(DEBUG, - "Function : " << ICF->getFunctionOf(NHashN)->getName()); - PHASAR_LOG_LEVEL(DEBUG, "Inst. : " << NToString(NHashN)); - PHASAR_LOG_LEVEL(DEBUG, "Fact : " << DToString(NHashD)); - PHASAR_LOG_LEVEL(DEBUG, "Value : " << LToString(L)); - PHASAR_LOG_LEVEL(DEBUG, ' '); - }); - // TOP is the implicit default value which we do not need to store. - // if (l == IDEProblem.topElement()) { - // do not store top values - // ValTab.remove(nHashN, nHashD); - // } else { - ValTab.insert(NHashN, NHashD, std::move(L)); - // } - } - - EdgeFunction jumpFunction(const PathEdge Edge) { - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL(DEBUG, "JumpFunctions Forward-Lookup:"); - PHASAR_LOG_LEVEL(DEBUG, - " Source D: " << DToString(Edge.factAtSource())); - PHASAR_LOG_LEVEL(DEBUG, " Target N: " << NToString(Edge.getTarget())); - PHASAR_LOG_LEVEL(DEBUG, - " Target D: " << DToString(Edge.factAtTarget()))); - - auto FwdLookupRes = - JumpFn->forwardLookup(Edge.factAtSource(), Edge.getTarget()); - if (FwdLookupRes) { - auto &Ref = FwdLookupRes->get(); - if (auto Find = std::find_if(Ref.begin(), Ref.end(), - [Edge](const auto &Pair) { - return Edge.factAtTarget() == Pair.first; - }); - Find != Ref.end()) { - PHASAR_LOG_LEVEL(DEBUG, " => EdgeFn: " << Find->second); - return Find->second; - } - } - PHASAR_LOG_LEVEL(DEBUG, " => EdgeFn: " << AllTop); - // JumpFn initialized to all-top, see line [2] in SRH96 paper - return AllTop; - } - - void addEndSummary(n_t SP, d_t d1, n_t eP, d_t d2, EdgeFunction f) { - // note: at this point we don't need to join with a potential previous f - // because f is a jump function, which is already properly joined - // within propagate(..) - EndsummaryTab.get(SP, d1).insert(eP, d2, std::move(f)); - } - - // should be made a callable at some point - void pathEdgeProcessingTask(PathEdge Edge) { - PAMM_GET_INSTANCE; - INC_COUNTER("JumpFn Construction", 1, Full); - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL( - DEBUG, - "-------------------------------------------- " - << PathEdgeCount - << ". Path Edge --------------------------------------------"); - PHASAR_LOG_LEVEL(DEBUG, ' '); - PHASAR_LOG_LEVEL(DEBUG, "Process " << PathEdgeCount << ". path edge:"); - PHASAR_LOG_LEVEL(DEBUG, "< D source: " << DToString(Edge.factAtSource()) - << " ;"); - PHASAR_LOG_LEVEL(DEBUG, - " N target: " << NToString(Edge.getTarget()) << " ;"); - PHASAR_LOG_LEVEL(DEBUG, " D target: " << DToString(Edge.factAtTarget()) - << " >"); - PHASAR_LOG_LEVEL(DEBUG, ' ')); - - if (!ICF->isCallSite(Edge.getTarget())) { - if (ICF->isExitInst(Edge.getTarget())) { - processExit(Edge); - } - if (!ICF->getSuccsOf(Edge.getTarget()).empty()) { - processNormalFlow(std::move(Edge)); - } - } else { - processCall(std::move(Edge)); - } - } - - // should be made a callable at some point - void valuePropagationTask(std::pair NAndD) { - n_t n = NAndD.first; - // our initial seeds are not necessarily method-start points but here they - // should be treated as such the same also for unbalanced return sites in - // an unbalanced problem - if (ICF->isStartPoint(n) || Seeds.containsInitialSeedsFor(n) || - UnbalancedRetSites.count(n)) { - // FIXME: is currently not executed for main!!! - // initial seeds are set in the global constructor, and main is also not - // officially called by any other function - propagateValueAtStart(NAndD, n); - } - if (ICF->isCallSite(n)) { - propagateValueAtCall(NAndD, n); - } - } - - // should be made a callable at some point - void valueComputationTask(const std::vector &Values) { - PAMM_GET_INSTANCE; - for (n_t n : Values) { - for (n_t SP : ICF->getStartPointsOf(ICF->getFunctionOf(n))) { - using TableCell = typename Table>::Cell; - Table> &LookupByTarget = - JumpFn->lookupByTarget(n); - for (const TableCell &SourceValTargetValAndFunction : - LookupByTarget.cellSet()) { - d_t dPrime = SourceValTargetValAndFunction.getRowKey(); - d_t d = SourceValTargetValAndFunction.getColumnKey(); - EdgeFunction fPrime = SourceValTargetValAndFunction.getValue(); - l_t TargetVal = val(SP, dPrime); - setVal(n, d, - IDEProblem.join(val(n, d), - fPrime.computeTarget(std::move(TargetVal)))); - INC_COUNTER("Value Computation", 1, Full); - } - } - } - } - - virtual void saveEdges(n_t SourceNode, n_t SinkStmt, d_t SourceVal, - const container_type &DestVals, bool InterP) { - if (!SolverConfig.recordEdges()) { - return; - } - Table> &TgtMap = - (InterP) ? ComputedInterPathEdges : ComputedIntraPathEdges; - TgtMap.get(SourceNode, SinkStmt)[SourceVal].insert(DestVals.begin(), - DestVals.end()); - } - - void submitInitialValues() { - std::map> AllSeeds = Seeds.getSeeds(); - for (n_t UnbalancedRetSite : UnbalancedRetSites) { - if (AllSeeds.find(UnbalancedRetSite) == AllSeeds.end()) { - AllSeeds[UnbalancedRetSite][ZeroValue] = IDEProblem.topElement(); - } - } - // do processing - for (const auto &[StartPoint, Facts] : AllSeeds) { - for (auto &[Fact, Value] : Facts) { - PHASAR_LOG_LEVEL(DEBUG, "set initial seed at: " - << NToString(StartPoint) - << ", fact: " << DToString(Fact) - << ", value: " << LToString(Value)); - // initialize the initial seeds with the top element as we have no - // information at the beginning of the value computation problem - setVal(StartPoint, Fact, Value); - std::pair SuperGraphNode(StartPoint, Fact); - valuePropagationTask(std::move(SuperGraphNode)); - } - } - } - - /// Computes the final values for edge functions. - void computeValues() { - PHASAR_LOG_LEVEL(DEBUG, "Start computing values"); - // Phase II(i) - submitInitialValues(); - while (!ValuePropWL.empty()) { - auto NAndD = std::move(ValuePropWL.back()); - ValuePropWL.pop_back(); - valuePropagationTask(std::move(NAndD)); - } - - // Phase II(ii) - // we create an array of all nodes and then dispatch fractions of this - // array to multiple threads - const auto AllNonCallStartNodes = ICF->allNonCallStartNodes(); - valueComputationTask(AllNonCallStartNodes); - } - - /// Schedules the processing of initial seeds, initiating the analysis. - /// Clients should only call this methods if performing synchronization on - /// their own. Normally, solve() should be called instead. - void submitInitialSeeds() { - PAMM_GET_INSTANCE; - // Check if the initial seeds contain the zero value at every starting - // point. If not, the zero value needs to be added to allow for correct - // solving of the problem. - for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { - if (Facts.find(ZeroValue) == Facts.end()) { - // Add zero value if it's not in the set of facts. - PHASAR_LOG_LEVEL( - DEBUG, "Zero-Value has been added automatically to start point: " - << NToString(StartPoint)); - Seeds.addSeed(StartPoint, ZeroValue, IDEProblem.bottomElement()); - } - } - PHASAR_LOG_LEVEL(DEBUG, - "Number of initial seeds: " << Seeds.countInitialSeeds()); - PHASAR_LOG_LEVEL(DEBUG, "List of initial seeds: "); - for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { - PHASAR_LOG_LEVEL(DEBUG, "Start point: " << NToString(StartPoint)); - /// If statically disabling the logger, Fact and Value are unused. To - /// prevent the copilation to fail with -Werror, add the [[maybe_unused]] - /// attribute - for ([[maybe_unused]] const auto &[Fact, Value] : Facts) { - PHASAR_LOG_LEVEL(DEBUG, "\tFact: " << DToString(Fact)); - PHASAR_LOG_LEVEL(DEBUG, "\tValue: " << LToString(Value)); - } - } - for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { - for (const auto &[Fact, Value] : Facts) { - PHASAR_LOG_LEVEL(DEBUG, "Submit seed at: " << NToString(StartPoint)); - PHASAR_LOG_LEVEL(DEBUG, "\tFact: " << DToString(Fact)); - PHASAR_LOG_LEVEL(DEBUG, "\tValue: " << LToString(Value)); - if (!IDEProblem.isZeroValue(Fact)) { - INC_COUNTER("Gen facts", 1, Core); - } - addWorklistItem(Fact, StartPoint, Fact, EdgeIdentity{}); - } - } - } - - /// Lines 21-32 of the algorithm. - /// - /// Stores callee-side summaries. - /// Also, at the side of the caller, propagates intra-procedural flows to - /// return sites using those newly computed summaries. - /// - /// @param edge an edge whose target node resembles a method exit - /// - virtual void processExit(const PathEdge Edge) { - PAMM_GET_INSTANCE; - INC_COUNTER("Process Exit", 1, Full); - PHASAR_LOG_LEVEL(DEBUG, - "Process exit at target: " << NToString(Edge.getTarget())); - n_t n = Edge.getTarget(); // an exit node; line 21... - EdgeFunction f = jumpFunction(Edge); - f_t FunctionThatNeedsSummary = ICF->getFunctionOf(n); - d_t d1 = Edge.factAtSource(); - d_t d2 = Edge.factAtTarget(); - // for each of the method's start points, determine incoming calls - const auto StartPointsOf = ICF->getStartPointsOf(FunctionThatNeedsSummary); - std::map Inc; - for (n_t SP : StartPointsOf) { - // line 21.1 of Naeem/Lhotak/Rodriguez - // register end-summary - addEndSummary(SP, d1, n, d2, f); - for (const auto &Entry : incoming(d1, SP)) { - Inc[Entry.first] = Container{Entry.second}; - } - } - printEndSummaryTab(); - printIncomingTab(); - // for each incoming call edge already processed - //(see processCall(..)) - for (const auto &Entry : Inc) { - // line 22 - n_t c = Entry.first; - // for each return site - for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(c)) { - // compute return-flow function - FlowFunctionPtrType RetFunction = - CachedFlowEdgeFunctions.getRetFlowFunction( - c, FunctionThatNeedsSummary, n, RetSiteC); - INC_COUNTER("FF Queries", 1, Full); - // for each incoming-call value - for (d_t d4 : Entry.second) { - const container_type Targets = - computeReturnFlowFunction(RetFunction, d1, d2, c, Entry.second); - ADD_TO_HISTOGRAM("Data-flow facts", Targets.size(), 1, Full); - saveEdges(n, RetSiteC, d2, Targets, true); - // for each target value at the return site - // line 23 - for (d_t d5 : Targets) { - // compute composed function - // get call edge function - EdgeFunction f4 = CachedFlowEdgeFunctions.getCallEdgeFunction( - c, d4, ICF->getFunctionOf(n), d1); - PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << f4); - // get return edge function - EdgeFunction f5 = - CachedFlowEdgeFunctions.getReturnEdgeFunction( - c, ICF->getFunctionOf(n), n, d2, RetSiteC, d5); - PHASAR_LOG_LEVEL(DEBUG, "Queried Return Edge Function: " << f5); - if (SolverConfig.emitESG()) { - for (auto SP : ICF->getStartPointsOf(ICF->getFunctionOf(n))) { - IntermediateEdgeFunctions[std::make_tuple(c, d4, SP, d1)] - .push_back(f4); - } - IntermediateEdgeFunctions[std::make_tuple(n, d2, RetSiteC, d5)] - .push_back(f5); - } - INC_COUNTER("EF Queries", 2, Full); - // compose call function * function * return function - PHASAR_LOG_LEVEL(DEBUG, - "Compose: " << f5 << " * " << f << " * " << f4); - PHASAR_LOG_LEVEL(DEBUG, " (return * function * call)"); - EdgeFunction fPrime = f4.composeWith(f).composeWith(f5); - PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); - // for each jump function coming into the call, propagate to - // return site using the composed function - auto RevLookupResult = JumpFn->reverseLookup(c, d4); - if (RevLookupResult) { - for (size_t I = 0; I < RevLookupResult->get().size(); ++I) { - auto ValAndFunc = RevLookupResult->get()[I]; - EdgeFunction f3 = ValAndFunc.second; - if (f3 != AllTop) { - d_t d3 = ValAndFunc.first; - d_t d5_restoredCtx = restoreContextOnReturnedFact(c, d4, d5); - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); - addWorklistItem(std::move(d3), RetSiteC, - std::move(d5_restoredCtx), - f3.composeWith(fPrime)); - } - } - } - } - } - } - } - // handling for unbalanced problems where we return out of a method with a - // fact for which we have no incoming flow. - // note: we propagate that way only values that originate from ZERO, as - // conditionally generated values should only - // be propagated into callers that have an incoming edge for this - // condition - /// TODO: Add a check for "d1 is seed in functionOf(n)" - if (SolverConfig.followReturnsPastSeeds() && Inc.empty() /*&& - IDEProblem.isZeroValue(d1)*/) { - const auto &Callers = ICF->getCallersOf(FunctionThatNeedsSummary); - for (n_t Caller : Callers) { - for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(Caller)) { - FlowFunctionPtrType RetFunction = - CachedFlowEdgeFunctions.getRetFlowFunction( - Caller, FunctionThatNeedsSummary, n, RetSiteC); - INC_COUNTER("FF Queries", 1, Full); - const container_type Targets = computeReturnFlowFunction( - RetFunction, d1, d2, Caller, Container{ZeroValue}); - ADD_TO_HISTOGRAM("Data-flow facts", Targets.size(), 1, Full); - saveEdges(n, RetSiteC, d2, Targets, true); - for (d_t d5 : Targets) { - EdgeFunction f5 = - CachedFlowEdgeFunctions.getReturnEdgeFunction( - Caller, ICF->getFunctionOf(n), n, d2, RetSiteC, d5); - PHASAR_LOG_LEVEL(DEBUG, "Queried Return Edge Function: " << f5); - if (SolverConfig.emitESG()) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, RetSiteC, d5)] - .push_back(f5); - } - INC_COUNTER("EF Queries", 1, Full); - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " << f); - propagteUnbalancedReturnFlow(RetSiteC, d5, f.composeWith(f5), - Caller); - // register for value processing (2nd IDE phase) - UnbalancedRetSites.insert(RetSiteC); - } - } - } - // in cases where there are no callers, the return statement would - // normally not be processed at all; this might be undesirable if - // the flow function has a side effect such as registering a taint; - // instead we thus call the return flow function will a null caller - if (Callers.empty()) { - IDEProblem.applyUnbalancedRetFlowFunctionSideEffects( - FunctionThatNeedsSummary, n, d2); - } - } - } - - void propagteUnbalancedReturnFlow(n_t RetSiteC, d_t TargetVal, - EdgeFunction EdgeFunc, - n_t /*RelatedCallSite*/) { - addWorklistItem(ZeroValue, std::move(RetSiteC), std::move(TargetVal), - std::move(EdgeFunc)); - } - - /// This method will be called for each incoming edge and can be used to - /// transfer knowledge from the calling edge to the returning edge, without - /// affecting the summary edges at the callee. - /// @param callSite - /// - /// @param d4 - /// Fact stored with the incoming edge, i.e., present at the - /// caller side - /// @param d5 - /// Fact that originally should be propagated to the caller. - /// @return Fact that will be propagated to the caller. - /// - d_t restoreContextOnReturnedFact(n_t /*CallSite*/, d_t /*d4*/, d_t d5) { - // TODO support LinkedNode and JoinHandlingNode - // if (d5 instanceof LinkedNode) { - // ((LinkedNode) d5).setCallingContext(d4); - // } - // if(d5 instanceof JoinHandlingNode) { - // ((JoinHandlingNode) - // d5).setCallingContext(d4); - // } - return d5; - } - - /// Computes the normal flow function for the given set of start and end - /// abstractions- - /// @param flowFunction The normal flow function to compute - /// @param d1 The abstraction at the method's start node - /// @param d2 The abstraction at the current node - /// @return The set of abstractions at the successor node - /// - container_type computeNormalFlowFunction(const FlowFunctionPtrType &FlowFunc, - d_t /*d1*/, d_t d2) { - return FlowFunc->computeTargets(d2); - } - - container_type - computeSummaryFlowFunction(const FlowFunctionPtrType &SummaryFlowFunction, - d_t /*d1*/, d_t d2) { - return SummaryFlowFunction->computeTargets(d2); - } - - /// Computes the call flow function for the given call-site abstraction - /// @param callFlowFunction The call flow function to compute - /// @param d1 The abstraction at the current method's start node. - /// @param d2 The abstraction at the call site - /// @return The set of caller-side abstractions at the callee's start node - /// - container_type - computeCallFlowFunction(const FlowFunctionPtrType &CallFlowFunction, - d_t /*d1*/, d_t d2) { - return CallFlowFunction->computeTargets(d2); - } - - /// Computes the call-to-return flow function for the given call-site - /// abstraction - /// @param callToReturnFlowFunction The call-to-return flow function to - /// compute - /// @param d1 The abstraction at the current method's start node. - /// @param d2 The abstraction at the call site - /// @return The set of caller-side abstractions at the return site - /// - container_type computeCallToReturnFlowFunction( - const FlowFunctionPtrType &CallToReturnFlowFunction, d_t /*d1*/, d_t d2) { - return CallToReturnFlowFunction->computeTargets(d2); - } - - /// Computes the return flow function for the given set of caller-side - /// abstractions. - /// @param retFunction The return flow function to compute - /// @param d1 The abstraction at the beginning of the callee - /// @param d2 The abstraction at the exit node in the callee - /// @param callSite The call site - /// @param callerSideDs The abstractions at the call site - /// @return The set of caller-side abstractions at the return site - /// - container_type - computeReturnFlowFunction(const FlowFunctionPtrType &RetFlowFunction, - d_t /*d1*/, d_t d2, n_t /*CallSite*/, - const Container & /*CallerSideDs*/) { - return RetFlowFunction->computeTargets(d2); - } - - bool addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, - EdgeFunction f) { - EdgeFunction JumpFnE = [&]() { - const auto RevLookupResult = JumpFn->reverseLookup(Target, TargetVal); - if (RevLookupResult) { - const auto &JumpFnContainer = RevLookupResult->get(); - const auto Find = std::find_if( - JumpFnContainer.begin(), JumpFnContainer.end(), - [SourceVal](auto &KVpair) { return KVpair.first == SourceVal; }); - if (Find != JumpFnContainer.end()) { - return Find->second; - } - } - // jump function is initialized to all-top if no entry - // was found - return AllTop; - }(); - EdgeFunction fPrime = JumpFnE.joinWith(f); - bool NewFunction = fPrime != JumpFnE; - - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL( - DEBUG, "Join: " << JumpFnE << " & " << f - << (JumpFnE == f ? " (EF's are equal)" : " ")); - PHASAR_LOG_LEVEL(DEBUG, - " = " << fPrime - << (NewFunction ? " (new jump func)" : " ")); - PHASAR_LOG_LEVEL(DEBUG, ' ')); - if (NewFunction) { - JumpFn->addFunction(SourceVal, Target, TargetVal, fPrime); - PathEdge Edge(SourceVal, Target, TargetVal); - WorkList.push_back(std::move(Edge)); - - IF_LOG_ENABLED(if (!IDEProblem.isZeroValue(TargetVal)) { - PHASAR_LOG_LEVEL(DEBUG, "[addWorklistItem]: EDGE: getFunctionOf(Target)) - << ", D: " << DToString(SourceVal) << '>'); - PHASAR_LOG_LEVEL(DEBUG, " ---> '); - PHASAR_LOG_LEVEL(DEBUG, ' '); - }); - } else { - PHASAR_LOG_LEVEL(DEBUG, "[addWorklistItem]: No new function!"); - } - - return NewFunction; - } - - /// Propagates the flow further down the exploded super graph, merging any - /// edge function that might already have been computed for TargetVal at - /// Target. - /// - /// @param SourceVal the source value of the propagated summary edge - /// @param Target the target statement - /// @param TargetVal the target value at the target statement - /// @param f the new edge function computed from (s0,SourceVal) to - /// (Target,TargetVal) - /// @param relatedCallSite for call and return flows the related call - /// statement, nullptr otherwise (this value is not used within this - /// implementation but may be useful for subclasses of IDESolver) - /// @param isUnbalancedReturn true if this edge is propagating an - /// unbalanced return (this value is not used within this implementation - /// but may be useful for subclasses of {@link IDESolver}) - /// - void propagate(PathEdge Edge) { - const auto &[SourceVal, Target, TargetVal] = Edge.get(); - - PHASAR_LOG_LEVEL(DEBUG, "Propagate flow"); - PHASAR_LOG_LEVEL(DEBUG, "Source value : " << DToString(SourceVal)); - PHASAR_LOG_LEVEL(DEBUG, "Target : " << NToString(Target)); - PHASAR_LOG_LEVEL(DEBUG, "Target value : " << DToString(TargetVal)); - - PathEdgeCount++; - pathEdgeProcessingTask(std::move(Edge)); - } - - l_t joinValueAt(n_t /*Unit*/, d_t /*Fact*/, l_t Curr, l_t NewVal) { - return IDEProblem.join(std::move(Curr), std::move(NewVal)); - } - - std::set>::Cell> - endSummary(n_t SP, d_t d3) { - if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) { - auto Key = std::make_pair(SP, d3); - auto FindND = FSummaryReuse.find(Key); - if (FindND == FSummaryReuse.end()) { - FSummaryReuse.emplace(Key, 0); - } else { - FSummaryReuse[Key] += 1; - } - } - return EndsummaryTab.get(SP, d3).cellSet(); - } - - std::map incoming(d_t d1, n_t SP) { - return IncomingTab.get(SP, d1); - } - - void addIncoming(n_t SP, d_t d3, n_t n, d_t d2) { - IncomingTab.get(SP, d3)[n].insert(d2); - } - - void printIncomingTab() const { - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL(DEBUG, "Start of incomingtab entry"); - for (const auto &Cell - : IncomingTab.cellSet()) { - PHASAR_LOG_LEVEL(DEBUG, "sP: " << NToString(Cell.getRowKey())); - PHASAR_LOG_LEVEL(DEBUG, "d3: " << DToString(Cell.getColumnKey())); - for (const auto &Entry : Cell.getValue()) { - PHASAR_LOG_LEVEL(DEBUG, " n: " << NToString(Entry.first)); - for (const auto &Fact : Entry.second) { - PHASAR_LOG_LEVEL(DEBUG, " d2: " << DToString(Fact)); - } - } - PHASAR_LOG_LEVEL(DEBUG, "---------------"); - } PHASAR_LOG_LEVEL(DEBUG, "End of incomingtab entry");) - } - - void printEndSummaryTab() const { - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL(DEBUG, "Start of endsummarytab entry"); - - EndsummaryTab.foreachCell( - [](const auto &Row, const auto &Col, const auto &Val) { - PHASAR_LOG_LEVEL(DEBUG, "sP: " << NToString(Row)); - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(Col)); - - Val.foreachCell([](const auto &InnerRow, const auto &InnerCol, - const auto &InnerVal) { - PHASAR_LOG_LEVEL(DEBUG, " eP: " << NToString(InnerRow)); - PHASAR_LOG_LEVEL(DEBUG, " d2: " << DToString(InnerCol)); - PHASAR_LOG_LEVEL(DEBUG, " EF: " << InnerVal); - }); - PHASAR_LOG_LEVEL(DEBUG, "---------------"); - }); - - PHASAR_LOG_LEVEL(DEBUG, "End of endsummarytab entry");) - } - - void printComputedPathEdges() { - llvm::outs() - << "\n**********************************************************"; - llvm::outs() - << "\n* Computed intra-procedural path egdes *"; - llvm::outs() - << "\n**********************************************************\n"; - - // Sort intra-procedural path edges - auto Cells = ComputedIntraPathEdges.cellVec(); - StmtLess Stmtless(ICF); - sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { - return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); - }); - for (const auto &Cell : Cells) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - std::string N2Label = NToString(Edge.second); - llvm::outs() << "\nN1: " << NToString(Edge.first) << '\n' - << "N2: " << N2Label << "\n----" - << std::string(N2Label.size(), '-') << '\n'; - for (auto D1ToD2Set : Cell.getValue()) { - auto D1Fact = D1ToD2Set.first; - llvm::outs() << "D1: " << DToString(D1Fact) << '\n'; - for (auto D2Fact : D1ToD2Set.second) { - llvm::outs() << "\tD2: " << DToString(D2Fact) << '\n'; - } - llvm::outs() << '\n'; - } - } - - llvm::outs() - << "\n**********************************************************"; - llvm::outs() - << "\n* Computed inter-procedural path edges *"; - llvm::outs() - << "\n**********************************************************\n"; - - // Sort intra-procedural path edges - Cells = ComputedInterPathEdges.cellVec(); - sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { - return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); - }); - for (const auto &Cell : Cells) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - std::string N2Label = NToString(Edge.second); - llvm::outs() << "\nN1: " << NToString(Edge.first) << '\n' - << "N2: " << N2Label << "\n----" - << std::string(N2Label.size(), '-') << '\n'; - for (auto D1ToD2Set : Cell.getValue()) { - auto D1Fact = D1ToD2Set.first; - llvm::outs() << "D1: " << DToString(D1Fact) << '\n'; - for (auto D2Fact : D1ToD2Set.second) { - llvm::outs() << "\tD2: " << DToString(D2Fact) << '\n'; - } - llvm::outs() << '\n'; - } - } - } - - /// The invariant for computing the number of generated (#gen) and killed - /// (#kill) facts: - /// (1) #Valid facts at the last statement <= #gen - #kill - /// (2) #gen >= #kill - /// - /// The total number of valid facts can be smaller than the difference of - /// generated and killed facts, due to set semantics, i.e., a fact can be - /// generated multiple times but appears only once. - /// - /// Zero value is not counted! - /// - /// @brief Computes and prints statistics of the analysis run, e.g. number of - /// generated/killed facts, number of summary-reuses etc. - /// - void computeAndPrintStatistics() { - PAMM_GET_INSTANCE; - // Stores all valid facts at return site in caller context; return-site is - // key - std::unordered_map> ValidInCallerContext; - size_t NumGenFacts = 0; - size_t NumIntraPathEdges = 0; - size_t NumInterPathEdges = 0; - // --- Intra-procedural Path Edges --- - // d1 --> d2-Set - // Case 1: d1 in d2-Set - // Case 2: d1 not in d2-Set, i.e., d1 was killed. d2-Set could be empty. - for (const auto &Cell : ComputedIntraPathEdges.cellSet()) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Edge.first)); - PHASAR_LOG_LEVEL(DEBUG, "N2: " << NToString(Edge.second)); - for (auto &[D1, D2s] : Cell.getValue()) { - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); - NumIntraPathEdges += D2s.size(); - // Case 1 - if (D2s.find(D1) != D2s.end()) { - NumGenFacts += D2s.size() - 1; - } - // Case 2 - else { - NumGenFacts += D2s.size(); - } - // Store all valid facts after call-to-return flow - if (ICF->isCallSite(Edge.first)) { - ValidInCallerContext[Edge.second].insert(D2s.begin(), D2s.end()); - } - IF_LOG_ENABLED([this](const auto &D2s) { - for (auto D2 : D2s) { - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); - } - PHASAR_LOG_LEVEL(DEBUG, "----"); - }(D2s)); - } - PHASAR_LOG_LEVEL(DEBUG, " "); - } - // Stores all pairs of (Startpoint, Fact) for which a summary was applied - std::set> ProcessSummaryFacts; - PHASAR_LOG_LEVEL(DEBUG, "=============================================="); - PHASAR_LOG_LEVEL(DEBUG, "INTER PATH EDGES"); - for (const auto &Cell : ComputedInterPathEdges.cellSet()) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Edge.first)); - PHASAR_LOG_LEVEL(DEBUG, "N2: " << NToString(Edge.second)); - // --- Call-flow Path Edges --- - // Case 1: d1 --> empty set - // Can be ignored, since killing a fact in the caller context will - // actually happen during call-to-return. - // - // Case 2: d1 --> d2-Set - // Every fact d_i != ZeroValue in d2-set will be generated in the - // callee context, thus counts as a new fact. Even if d1 is passed as it - // is, it will count as a new fact. The reason for this is, that d1 can - // be killed in the callee context, but still be valid in the caller - // context. - // - // Special Case: Summary was applied for a particular call - // Process the summary's #gen and #kill. - if (ICF->isCallSite(Edge.first)) { - for (auto &[D1, D2s] : Cell.getValue()) { - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); - NumInterPathEdges += D2s.size(); - for (auto D2 : D2s) { - if (!IDEProblem.isZeroValue(D2)) { - NumGenFacts++; - } - // Special case - if (ProcessSummaryFacts.find(std::make_pair(Edge.second, D2)) != - ProcessSummaryFacts.end()) { - - std::set SummaryDSet; - EndsummaryTab.get(Edge.second, D2) - .foreachCell([&SummaryDSet](const auto &Row, const auto &Col, - const auto &Val) { - SummaryDSet.insert(Col); - }); - - // Process summary just as an intra-procedural edge - if (SummaryDSet.find(D2) != SummaryDSet.end()) { - NumGenFacts += SummaryDSet.size() - 1; - } else { - NumGenFacts += SummaryDSet.size(); - } - } else { - ProcessSummaryFacts.emplace(Edge.second, D2); - } - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); - } - PHASAR_LOG_LEVEL(DEBUG, "----"); - } - } - // --- Return-flow Path Edges --- - // Since every fact passed to the callee was counted as a new fact, we - // have to count every fact propagated to the caller as a kill to - // satisfy our invariant. Obviously, every fact not propagated to the - // caller will count as a kill. If an actual new fact is propagated to - // the caller, we have to increase the number of generated facts by one. - // Zero value does not count towards generated/killed facts. - if (ICF->isExitInst(Cell.getRowKey())) { - for (auto &[D1, D2s] : Cell.getValue()) { - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); - NumInterPathEdges += D2s.size(); - auto CallerFacts = ValidInCallerContext[Edge.second]; - for (auto D2 : D2s) { - // d2 not valid in caller context - if (CallerFacts.find(D2) == CallerFacts.end()) { - NumGenFacts++; - } - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); - } - PHASAR_LOG_LEVEL(DEBUG, "----"); - } - } - PHASAR_LOG_LEVEL(DEBUG, " "); - } - PHASAR_LOG_LEVEL(DEBUG, "SUMMARY REUSE"); - std::size_t TotalSummaryReuse = 0; - for (const auto &Entry : FSummaryReuse) { - PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Entry.first.first)); - PHASAR_LOG_LEVEL(DEBUG, "D1: " << DToString(Entry.first.second)); - PHASAR_LOG_LEVEL(DEBUG, "#Reuse: " << Entry.second); - TotalSummaryReuse += Entry.second; - } - INC_COUNTER("Gen facts", NumGenFacts, Core); - INC_COUNTER("Summary-reuse", TotalSummaryReuse, Core); - INC_COUNTER("Intra Path Edges", NumIntraPathEdges, Core); - INC_COUNTER("Inter Path Edges", NumInterPathEdges, Core); - - PHASAR_LOG_LEVEL(INFO, "----------------------------------------------"); - PHASAR_LOG_LEVEL(INFO, "=== Solver Statistics ==="); - PHASAR_LOG_LEVEL(INFO, "#Facts generated : " << GET_COUNTER("Gen facts")); - PHASAR_LOG_LEVEL(INFO, "#Facts killed : " << GET_COUNTER("Kill facts")); - PHASAR_LOG_LEVEL(INFO, - "#Summary-reuse : " << GET_COUNTER("Summary-reuse")); - PHASAR_LOG_LEVEL(INFO, - "#Intra Path Edges: " << GET_COUNTER("Intra Path Edges")); - PHASAR_LOG_LEVEL(INFO, - "#Inter Path Edges: " << GET_COUNTER("Inter Path Edges")); - if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Full) { - PHASAR_LOG_LEVEL( - INFO, "Flow function query count: " << GET_COUNTER("FF Queries")); - PHASAR_LOG_LEVEL( - INFO, "Edge function query count: " << GET_COUNTER("EF Queries")); - PHASAR_LOG_LEVEL(INFO, "Data-flow value propagation count: " - << GET_COUNTER("Value Propagation")); - PHASAR_LOG_LEVEL(INFO, "Data-flow value computation count: " - << GET_COUNTER("Value Computation")); - PHASAR_LOG_LEVEL(INFO, - "Special flow function usage count: " - << GET_COUNTER("SpecialSummary-FF Application")); - PHASAR_LOG_LEVEL(INFO, "Jump function construciton count: " - << GET_COUNTER("JumpFn Construction")); - PHASAR_LOG_LEVEL(INFO, - "Phase I duration: " << PRINT_TIMER("DFA Phase I")); - PHASAR_LOG_LEVEL(INFO, - "Phase II duration: " << PRINT_TIMER("DFA Phase II")); - PHASAR_LOG_LEVEL(INFO, "----------------------------------------------"); - CachedFlowEdgeFunctions.print(); - } - } - -public: - void enableESGAsDot() { SolverConfig.setEmitESG(); } - - void - emitESGAsDot(llvm::raw_ostream &OS = llvm::outs(), - llvm::StringRef DotConfigDir = PhasarConfig::PhasarDirectory()) { - PHASAR_LOG_LEVEL(DEBUG, "Emit Exploded super-graph (ESG) as DOT graph"); - PHASAR_LOG_LEVEL(DEBUG, "Process intra-procedural path egdes"); - PHASAR_LOG_LEVEL(DEBUG, "============================================="); - DOTGraph G; - DOTConfig::importDOTConfig(DotConfigDir); - DOTFunctionSubGraph *FG = nullptr; - - // Sort intra-procedural path edges - auto Cells = ComputedIntraPathEdges.cellVec(); - StmtLess Stmtless(ICF); - sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { - return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); - }); - for (const auto &Cell : Cells) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - std::string N1Label = NToString(Edge.first); - std::string N2Label = NToString(Edge.second); - PHASAR_LOG_LEVEL(DEBUG, "N1: " << N1Label); - PHASAR_LOG_LEVEL(DEBUG, "N2: " << N2Label); - std::string N1StmtId = ICF->getStatementId(Edge.first); - std::string N2StmtId = ICF->getStatementId(Edge.second); - std::string FuncName = ICF->getFunctionOf(Edge.first)->getName().str(); - // Get or create function subgraph - if (!FG || FG->Id != FuncName) { - FG = &G.Functions[FuncName]; - FG->Id = FuncName; - } - - // Create control flow nodes - DOTNode N1(FuncName, N1Label, N1StmtId); - DOTNode N2(FuncName, N2Label, N2StmtId); - // Add control flow node(s) to function subgraph - FG->Stmts.insert(N1); - if (ICF->isExitInst(Edge.second)) { - FG->Stmts.insert(N2); - } - - // Set control flow edge - FG->IntraCFEdges.emplace(N1, N2); - - DOTFactSubGraph *D1FSG = nullptr; - unsigned D1FactId = 0; - unsigned D2FactId = 0; - for (const auto &D1ToD2Set : Cell.getValue()) { - auto D1Fact = D1ToD2Set.first; - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); - - DOTNode D1; - if (IDEProblem.isZeroValue(D1Fact)) { - D1 = {FuncName, "Λ", N1StmtId, 0, false, true}; - D1FactId = 0; - } else { - // Get the fact-ID - D1FactId = G.getFactID(D1Fact); - std::string D1Label = DToString(D1Fact); - - // Get or create the fact subgraph - D1FSG = FG->getOrCreateFactSG(D1FactId, D1Label); - - // Insert D1 to fact subgraph - D1 = {FuncName, D1Label, N1StmtId, D1FactId, false, true}; - D1FSG->Nodes.insert(std::make_pair(N1StmtId, D1)); - } - - DOTFactSubGraph *D2FSG = nullptr; - for (const auto &D2Fact : D1ToD2Set.second) { - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); - // We do not need to generate any intra-procedural nodes and edges - // for the zero value since they will be auto-generated - if (!IDEProblem.isZeroValue(D2Fact)) { - // Get the fact-ID - D2FactId = G.getFactID(D2Fact); - std::string D2Label = DToString(D2Fact); - DOTNode D2 = {FuncName, D2Label, N2StmtId, D2FactId, false, true}; - std::string EFLabel; - auto EFVec = IntermediateEdgeFunctions[std::make_tuple( - Edge.first, D1Fact, Edge.second, D2Fact)]; - for (const auto &EF : EFVec) { - EFLabel += to_string(EF) + ", "; - } - PHASAR_LOG_LEVEL(DEBUG, "EF LABEL: " << EFLabel); - if (D1FactId == D2FactId && !IDEProblem.isZeroValue(D1Fact)) { - assert(D1FSG && "D1_FSG was nullptr but should be valid."); - D1FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); - D1FSG->Edges.emplace(D1, D2, true, EFLabel); - } else { - // Get or create the fact subgraph - D2FSG = FG->getOrCreateFactSG(D2FactId, D2Label); - - D2FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); - FG->CrossFactEdges.emplace(D1, D2, true, EFLabel); - } - } - } - PHASAR_LOG_LEVEL(DEBUG, "----------"); - } - PHASAR_LOG_LEVEL(DEBUG, " "); - } - - PHASAR_LOG_LEVEL(DEBUG, "============================================="); - PHASAR_LOG_LEVEL(DEBUG, "Process inter-procedural path edges"); - PHASAR_LOG_LEVEL(DEBUG, "============================================="); - Cells = ComputedInterPathEdges.cellVec(); - sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { - return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); - }); - for (const auto &Cell : Cells) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - std::string N1Label = NToString(Edge.first); - std::string N2Label = NToString(Edge.second); - std::string FNameOfN1 = ICF->getFunctionOf(Edge.first)->getName().str(); - std::string FNameOfN2 = ICF->getFunctionOf(Edge.second)->getName().str(); - std::string N1StmtId = ICF->getStatementId(Edge.first); - std::string N2StmtId = ICF->getStatementId(Edge.second); - PHASAR_LOG_LEVEL(DEBUG, "N1: " << N1Label); - PHASAR_LOG_LEVEL(DEBUG, "N2: " << N2Label); - - // Add inter-procedural control flow edge - DOTNode N1(FNameOfN1, N1Label, N1StmtId); - DOTNode N2(FNameOfN2, N2Label, N2StmtId); - - // Handle recursion control flow as intra-procedural control flow - // since those eges never leave the function subgraph - FG = nullptr; - if (FNameOfN1 == FNameOfN2) { - // This function subgraph is guaranteed to exist - FG = &G.Functions[FNameOfN1]; - FG->IntraCFEdges.emplace(N1, N2); - } else { - // Check the case where the callee is a single statement function, - // thus does not contain intra-procedural path edges. We have to - // generate the function sub graph here! - if (!G.Functions.count(FNameOfN1)) { - FG = &G.Functions[FNameOfN1]; - FG->Id = FNameOfN1; - FG->Stmts.insert(N1); - } else if (!G.Functions.count(FNameOfN2)) { - FG = &G.Functions[FNameOfN2]; - FG->Id = FNameOfN2; - FG->Stmts.insert(N2); - } - G.InterCFEdges.emplace(N1, N2); - } - - // Create D1 and D2, if D1 == D2 == lambda then add Edge(D1, D2) to - // interLambdaEges otherwise add Edge(D1, D2) to interFactEdges - unsigned D1FactId = 0; - unsigned D2FactId = 0; - for (const auto &D1ToD2Set : Cell.getValue()) { - auto D1Fact = D1ToD2Set.first; - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); - DOTNode D1; - if (IDEProblem.isZeroValue(D1Fact)) { - D1 = {FNameOfN1, "Λ", N1StmtId, 0, false, true}; - } else { - // Get the fact-ID - D1FactId = G.getFactID(D1Fact); - std::string D1Label = DToString(D1Fact); - D1 = {FNameOfN1, D1Label, N1StmtId, D1FactId, false, true}; - // FG should already exist even for single statement functions - if (!G.containsFactSG(FNameOfN1, D1FactId)) { - FG = &G.Functions[FNameOfN1]; - auto *D1FSG = FG->getOrCreateFactSG(D1FactId, D1Label); - D1FSG->Nodes.insert(std::make_pair(N1StmtId, D1)); - } - } - - auto D2Set = D1ToD2Set.second; - for (const auto &D2Fact : D2Set) { - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); - DOTNode D2; - if (IDEProblem.isZeroValue(D2Fact)) { - D2 = {FNameOfN2, "Λ", N2StmtId, 0, false, true}; - } else { - // Get the fact-ID - D2FactId = G.getFactID(D2Fact); - std::string D2Label = DToString(D2Fact); - D2 = {FNameOfN2, D2Label, N2StmtId, D2FactId, false, true}; - // FG should already exist even for single statement functions - if (!G.containsFactSG(FNameOfN2, D2FactId)) { - FG = &G.Functions[FNameOfN2]; - auto *D2FSG = FG->getOrCreateFactSG(D2FactId, D2Label); - D2FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); - } - } - - if (IDEProblem.isZeroValue(D1Fact) && - IDEProblem.isZeroValue(D2Fact)) { - // Do not add lambda recursion edges as inter-procedural edges - if (D1.FuncName != D2.FuncName) { - G.InterLambdaEdges.emplace(D1, D2, true, "AllBottom", "BOT"); - } - } else { - // std::string EFLabel = EF ? EF->str() : " "; - std::string EFLabel; - auto EFVec = IntermediateEdgeFunctions[std::make_tuple( - Edge.first, D1Fact, Edge.second, D2Fact)]; - for (const auto &EF : EFVec) { - PHASAR_LOG_LEVEL(DEBUG, "Partial EF Label: " << EF); - EFLabel.append(to_string(EF) + ", "); - } - PHASAR_LOG_LEVEL(DEBUG, "EF LABEL: " << EFLabel); - G.InterFactEdges.emplace(D1, D2, true, EFLabel); - } - } - PHASAR_LOG_LEVEL(DEBUG, "----------"); - } - PHASAR_LOG_LEVEL(DEBUG, " "); - } - OS << G; - } - -private: - /// @brief: Allows less-than comparison based on the statement ID. - struct StmtLess { - const i_t *ICF; - StringIDLess StrIDLess; - StmtLess(const i_t *ICF) : ICF(ICF), StrIDLess(StringIDLess()) {} - bool operator()(n_t Lhs, n_t Rhs) { - return StrIDLess(ICF->getStatementId(Lhs), ICF->getStatementId(Rhs)); - } - }; - - /// -- InteractiveIDESolverMixin implementation - - bool doInitialize() { - PAMM_GET_INSTANCE; - REG_COUNTER("Gen facts", 0, Core); - REG_COUNTER("Kill facts", 0, Core); - REG_COUNTER("Summary-reuse", 0, Core); - REG_COUNTER("Intra Path Edges", 0, Core); - REG_COUNTER("Inter Path Edges", 0, Core); - REG_COUNTER("FF Queries", 0, Full); - REG_COUNTER("EF Queries", 0, Full); - REG_COUNTER("Value Propagation", 0, Full); - REG_COUNTER("Value Computation", 0, Full); - REG_COUNTER("SpecialSummary-FF Application", 0, Full); - REG_COUNTER("SpecialSummary-EF Queries", 0, Full); - REG_COUNTER("JumpFn Construction", 0, Full); - REG_COUNTER("Process Call", 0, Full); - REG_COUNTER("Process Normal", 0, Full); - REG_COUNTER("Process Exit", 0, Full); - REG_COUNTER("[Calls] getAliasSet", 0, Full); - REG_HISTOGRAM("Data-flow facts", Full); - REG_HISTOGRAM("Points-to", Full); - - PHASAR_LOG_LEVEL(INFO, "IDE solver is solving the specified problem"); - PHASAR_LOG_LEVEL(INFO, - "Submit initial seeds, construct exploded super graph"); - // computations starting here - START_TIMER("DFA Phase I", Full); - - // We start our analysis and construct exploded supergraph - submitInitialSeeds(); - return !WorkList.empty(); - } - - bool doNext() { - assert(!WorkList.empty()); - auto Edge = std::move(WorkList.back()); - WorkList.pop_back(); - - propagate(std::move(Edge)); - - return !WorkList.empty(); - } - - void finalizeInternal() { - PAMM_GET_INSTANCE; - STOP_TIMER("DFA Phase I", Full); - if (SolverConfig.computeValues()) { - START_TIMER("DFA Phase II", Full); - // Computing the final values for the edge functions - PHASAR_LOG_LEVEL( - INFO, "Compute the final values according to the edge functions"); - computeValues(); - STOP_TIMER("DFA Phase II", Full); - } - PHASAR_LOG_LEVEL(INFO, "Problem solved"); - if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) { - computeAndPrintStatistics(); - } - if (SolverConfig.emitESG()) { - emitESGAsDot(); - } - } - - SolverResults doFinalize() & { - finalizeInternal(); - return getSolverResults(); - } - - OwningSolverResults doFinalize() && { - finalizeInternal(); - return consumeSolverResults(); - } - - /// -- Data members - - IDETabulationProblem &IDEProblem; - d_t ZeroValue; - const i_t *ICF; - IFDSIDESolverConfig &SolverConfig; - - std::vector> WorkList; - std::vector> ValuePropWL; - - size_t PathEdgeCount = 0; - - FlowEdgeFunctionCache CachedFlowEdgeFunctions; - - Table> ComputedIntraPathEdges; - - Table> ComputedInterPathEdges; - - EdgeFunction AllTop; - - std::shared_ptr> JumpFn; - - std::map, std::vector>> - IntermediateEdgeFunctions; - - // stores summaries that were queried before they were computed - // see CC 2010 paper by Naeem, Lhotak and Rodriguez - Table>> EndsummaryTab; - - // edges going along calls - // see CC 2010 paper by Naeem, Lhotak and Rodriguez - Table> IncomingTab; - - // stores the return sites (inside callers) to which we have unbalanced - // returns if SolverConfig.followReturnPastSeeds is enabled - std::set UnbalancedRetSites; - - InitialSeeds Seeds; - - Table ValTab; - - std::map, size_t> FSummaryReuse; + const i_t *ICF, PropagateAfterStrategy Strategy = {}) + : base_t(Problem, ICF, Strategy) {} }; template diff --git a/include/phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h b/include/phasar/DataFlow/IfdsIde/Solver/detail/FlowEdgeFunctionCache.h similarity index 100% rename from include/phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h rename to include/phasar/DataFlow/IfdsIde/Solver/detail/FlowEdgeFunctionCache.h diff --git a/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h new file mode 100644 index 000000000..7ee1da590 --- /dev/null +++ b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h @@ -0,0 +1,1696 @@ +/****************************************************************************** + * Copyright (c) 2017 Philipp Schubert. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Philipp Schubert, Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_DATAFLOW_IFDSIDE_SOLVER_IDESOLVERIMPL_H +#define PHASAR_DATAFLOW_IFDSIDE_SOLVER_IDESOLVERIMPL_H + +#include "phasar/AnalysisStrategy/Strategies.h" +#include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" +#include "phasar/DataFlow/IfdsIde/Solver/IDESolverAPIMixin.h" +#include "phasar/DataFlow/IfdsIde/Solver/JumpFunctions.h" +#include "phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h" +#include "phasar/DataFlow/IfdsIde/Solver/detail/FlowEdgeFunctionCache.h" +#include "phasar/DataFlow/IfdsIde/Solver/detail/PathEdge.h" +#include "phasar/Utils/DOTGraph.h" + +#include + +namespace psr { +template +class IDESolverImpl : public IDESolverAPIMixin { +public: + using ProblemTy = IDETabulationProblem; + using container_type = typename ProblemTy::container_type; + using FlowFunctionPtrType = typename ProblemTy::FlowFunctionPtrType; + + using l_t = typename AnalysisDomainTy::l_t; + using n_t = typename AnalysisDomainTy::n_t; + using i_t = typename AnalysisDomainTy::i_t; + using d_t = typename AnalysisDomainTy::d_t; + using f_t = typename AnalysisDomainTy::f_t; + using t_t = typename AnalysisDomainTy::t_t; + using v_t = typename AnalysisDomainTy::v_t; + + IDESolverImpl(const IDESolverImpl &) = delete; + IDESolverImpl &operator=(const IDESolverImpl &) = delete; + IDESolverImpl(IDESolverImpl &&) = delete; + IDESolverImpl &operator=(IDESolverImpl &&) = delete; + + virtual ~IDESolverImpl() = default; + + /// Returns a view into the computed solver-results. + /// + /// NOTE: The SolverResults store a reference into this IDESolver, so its + /// lifetime is also bound to the lifetime of this solver. If you want to use + /// the solverResults beyond the lifetime of this solver, use + /// comsumeSolverResults() instead. + [[nodiscard]] SolverResults getSolverResults() const noexcept { + return SolverResults(this->ValTab, ZeroValue); + } + + /// Moves the computed solver-results out of this solver such that the solver + /// can be destroyed without that the analysis results are lost. + /// Do not call any function (including getSolverResults()) on this IDESolver + /// instance after that. + [[nodiscard]] OwningSolverResults + consumeSolverResults() noexcept(std::is_nothrow_move_constructible_v) { + return OwningSolverResults(std::move(this->ValTab), + std::move(ZeroValue)); + } + + /// Returns the resulting environment for the given statement. + /// The artificial zero value can be automatically stripped. + /// TOP values are never returned. + [[nodiscard]] decltype(auto) resultsAt(n_t Stmt, bool StripZero) const { + return self().getSolverResults().resultsAt(Stmt, StripZero); + } + + /// Returns the resulting environment for the given statement. + /// TOP values are never returned. + [[nodiscard]] decltype(auto) resultsAt(n_t Stmt) const { + return self().getSolverResults().resultsAt(Stmt); + } + + /// Returns the L-type result for the given value at the given statement. + [[nodiscard]] decltype(auto) resultAt(n_t Stmt, d_t Value) const { + return self().getSolverResults().resultAt(Stmt, Value); + } + + [[nodiscard]] nlohmann::json getAsJson() const { + using TableCell = typename Table::Cell; + const static std::string DataFlowID = "DataFlow"; + nlohmann::json J; + auto Results = ValTab.cellSet(); + if (Results.empty()) { + J[DataFlowID] = "EMPTY"; + } else { + std::vector Cells(Results.begin(), Results.end()); + sort(Cells.begin(), Cells.end(), [](TableCell Lhs, TableCell Rhs) { + return Lhs.getRowKey() < Rhs.getRowKey(); + }); + n_t Curr; + for (unsigned I = 0; I < Cells.size(); ++I) { + Curr = Cells[I].getRowKey(); + auto NStr = + llvm::StringRef(NToString(Cells[I].getRowKey())).trim().str(); + + std::string NodeStr = + ICF->getFunctionName(ICF->getFunctionOf(Curr)) + "::" + NStr; + J[DataFlowID][NodeStr]; + std::string FactStr = + llvm::StringRef(DToString(Cells[I].getColumnKey())).trim().str(); + std::string ValueStr = + llvm::StringRef(LToString(Cells[I].getValue())).trim().str(); + J[DataFlowID][NodeStr]["Facts"] += {FactStr, ValueStr}; + } + } + return J; + } + + virtual void emitTextReport(llvm::raw_ostream &OS = llvm::outs()) { + IDEProblem.emitTextReport(getSolverResults(), OS); + } + + virtual void emitGraphicalReport(llvm::raw_ostream &OS = llvm::outs()) { + IDEProblem.emitGraphicalReport(getSolverResults(), OS); + } + + void dumpResults(llvm::raw_ostream &OS = llvm::outs()) { + self().getSolverResults().dumpResults(*ICF, OS); + } + + void dumpAllInterPathEdges() { + llvm::outs() << "COMPUTED INTER PATH EDGES" << '\n'; + auto Interpe = ComputedInterPathEdges.cellSet(); + for (const auto &Cell : Interpe) { + llvm::outs() << "FROM" << '\n'; + IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); + llvm::outs() << "TO" << '\n'; + IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); + llvm::outs() << "FACTS" << '\n'; + for (const auto &Fact : Cell.getValue()) { + llvm::outs() << "fact" << '\n'; + IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); + llvm::outs() << "produces" << '\n'; + for (const auto &Out : Fact.second) { + IDEProblem.printDataFlowFact(llvm::outs(), Out); + } + } + } + } + + void dumpAllIntraPathEdges() { + llvm::outs() << "COMPUTED INTRA PATH EDGES" << '\n'; + auto Intrape = ComputedIntraPathEdges.cellSet(); + for (auto &Cell : Intrape) { + llvm::outs() << "FROM" << '\n'; + IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); + llvm::outs() << "TO" << '\n'; + IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); + llvm::outs() << "FACTS" << '\n'; + for (auto &Fact : Cell.getValue()) { + llvm::outs() << "fact" << '\n'; + IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); + llvm::outs() << "produces" << '\n'; + for (auto &Out : Fact.second) { + IDEProblem.printDataFlowFact(llvm::outs(), Out); + } + } + } + } + + void enableESGAsDot() { SolverConfig.setEmitESG(); } + + void + emitESGAsDot(llvm::raw_ostream &OS = llvm::outs(), + llvm::StringRef DotConfigDir = PhasarConfig::PhasarDirectory()) { + PHASAR_LOG_LEVEL(DEBUG, "Emit Exploded super-graph (ESG) as DOT graph"); + PHASAR_LOG_LEVEL(DEBUG, "Process intra-procedural path egdes"); + PHASAR_LOG_LEVEL(DEBUG, "============================================="); + DOTGraph G; + DOTConfig::importDOTConfig(DotConfigDir); + DOTFunctionSubGraph *FG = nullptr; + + // Sort intra-procedural path edges + auto Cells = ComputedIntraPathEdges.cellVec(); + StmtLess Stmtless(ICF); + std::sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { + return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); + }); + for (const auto &Cell : Cells) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + std::string N1Label = NToString(Edge.first); + std::string N2Label = NToString(Edge.second); + PHASAR_LOG_LEVEL(DEBUG, "N1: " << N1Label); + PHASAR_LOG_LEVEL(DEBUG, "N2: " << N2Label); + std::string N1StmtId = ICF->getStatementId(Edge.first); + std::string N2StmtId = ICF->getStatementId(Edge.second); + std::string FuncName = ICF->getFunctionOf(Edge.first)->getName().str(); + // Get or create function subgraph + if (!FG || FG->Id != FuncName) { + FG = &G.Functions[FuncName]; + FG->Id = FuncName; + } + + // Create control flow nodes + DOTNode N1(FuncName, N1Label, N1StmtId); + DOTNode N2(FuncName, N2Label, N2StmtId); + // Add control flow node(s) to function subgraph + FG->Stmts.insert(N1); + if (ICF->isExitInst(Edge.second)) { + FG->Stmts.insert(N2); + } + + // Set control flow edge + FG->IntraCFEdges.emplace(N1, N2); + + DOTFactSubGraph *D1FSG = nullptr; + unsigned D1FactId = 0; + unsigned D2FactId = 0; + for (const auto &D1ToD2Set : Cell.getValue()) { + auto D1Fact = D1ToD2Set.first; + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); + + DOTNode D1; + if (IDEProblem.isZeroValue(D1Fact)) { + D1 = {FuncName, "Λ", N1StmtId, 0, false, true}; + D1FactId = 0; + } else { + // Get the fact-ID + D1FactId = G.getFactID(D1Fact); + std::string D1Label = DToString(D1Fact); + + // Get or create the fact subgraph + D1FSG = FG->getOrCreateFactSG(D1FactId, D1Label); + + // Insert D1 to fact subgraph + D1 = {FuncName, D1Label, N1StmtId, D1FactId, false, true}; + D1FSG->Nodes.insert(std::make_pair(N1StmtId, D1)); + } + + DOTFactSubGraph *D2FSG = nullptr; + for (const auto &D2Fact : D1ToD2Set.second) { + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); + // We do not need to generate any intra-procedural nodes and edges + // for the zero value since they will be auto-generated + if (!IDEProblem.isZeroValue(D2Fact)) { + // Get the fact-ID + D2FactId = G.getFactID(D2Fact); + std::string D2Label = DToString(D2Fact); + DOTNode D2 = {FuncName, D2Label, N2StmtId, D2FactId, false, true}; + std::string EFLabel; + auto EFVec = IntermediateEdgeFunctions[std::make_tuple( + Edge.first, D1Fact, Edge.second, D2Fact)]; + for (const auto &EF : EFVec) { + EFLabel += to_string(EF) + ", "; + } + PHASAR_LOG_LEVEL(DEBUG, "EF LABEL: " << EFLabel); + if (D1FactId == D2FactId && !IDEProblem.isZeroValue(D1Fact)) { + assert(D1FSG && "D1_FSG was nullptr but should be valid."); + D1FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); + D1FSG->Edges.emplace(D1, D2, true, EFLabel); + } else { + // Get or create the fact subgraph + D2FSG = FG->getOrCreateFactSG(D2FactId, D2Label); + + D2FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); + FG->CrossFactEdges.emplace(D1, D2, true, EFLabel); + } + } + } + PHASAR_LOG_LEVEL(DEBUG, "----------"); + } + PHASAR_LOG_LEVEL(DEBUG, " "); + } + + PHASAR_LOG_LEVEL(DEBUG, "============================================="); + PHASAR_LOG_LEVEL(DEBUG, "Process inter-procedural path edges"); + PHASAR_LOG_LEVEL(DEBUG, "============================================="); + Cells = ComputedInterPathEdges.cellVec(); + std::sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { + return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); + }); + for (const auto &Cell : Cells) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + std::string N1Label = NToString(Edge.first); + std::string N2Label = NToString(Edge.second); + std::string FNameOfN1 = ICF->getFunctionOf(Edge.first)->getName().str(); + std::string FNameOfN2 = ICF->getFunctionOf(Edge.second)->getName().str(); + std::string N1StmtId = ICF->getStatementId(Edge.first); + std::string N2StmtId = ICF->getStatementId(Edge.second); + PHASAR_LOG_LEVEL(DEBUG, "N1: " << N1Label); + PHASAR_LOG_LEVEL(DEBUG, "N2: " << N2Label); + + // Add inter-procedural control flow edge + DOTNode N1(FNameOfN1, N1Label, N1StmtId); + DOTNode N2(FNameOfN2, N2Label, N2StmtId); + + // Handle recursion control flow as intra-procedural control flow + // since those eges never leave the function subgraph + FG = nullptr; + if (FNameOfN1 == FNameOfN2) { + // This function subgraph is guaranteed to exist + FG = &G.Functions[FNameOfN1]; + FG->IntraCFEdges.emplace(N1, N2); + } else { + // Check the case where the callee is a single statement function, + // thus does not contain intra-procedural path edges. We have to + // generate the function sub graph here! + if (!G.Functions.count(FNameOfN1)) { + FG = &G.Functions[FNameOfN1]; + FG->Id = FNameOfN1; + FG->Stmts.insert(N1); + } else if (!G.Functions.count(FNameOfN2)) { + FG = &G.Functions[FNameOfN2]; + FG->Id = FNameOfN2; + FG->Stmts.insert(N2); + } + G.InterCFEdges.emplace(N1, N2); + } + + // Create D1 and D2, if D1 == D2 == lambda then add Edge(D1, D2) to + // interLambdaEges otherwise add Edge(D1, D2) to interFactEdges + unsigned D1FactId = 0; + unsigned D2FactId = 0; + for (const auto &D1ToD2Set : Cell.getValue()) { + auto D1Fact = D1ToD2Set.first; + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); + DOTNode D1; + if (IDEProblem.isZeroValue(D1Fact)) { + D1 = {FNameOfN1, "Λ", N1StmtId, 0, false, true}; + } else { + // Get the fact-ID + D1FactId = G.getFactID(D1Fact); + std::string D1Label = DToString(D1Fact); + D1 = {FNameOfN1, D1Label, N1StmtId, D1FactId, false, true}; + // FG should already exist even for single statement functions + if (!G.containsFactSG(FNameOfN1, D1FactId)) { + FG = &G.Functions[FNameOfN1]; + auto *D1FSG = FG->getOrCreateFactSG(D1FactId, D1Label); + D1FSG->Nodes.insert(std::make_pair(N1StmtId, D1)); + } + } + + auto D2Set = D1ToD2Set.second; + for (const auto &D2Fact : D2Set) { + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); + DOTNode D2; + if (IDEProblem.isZeroValue(D2Fact)) { + D2 = {FNameOfN2, "Λ", N2StmtId, 0, false, true}; + } else { + // Get the fact-ID + D2FactId = G.getFactID(D2Fact); + std::string D2Label = DToString(D2Fact); + D2 = {FNameOfN2, D2Label, N2StmtId, D2FactId, false, true}; + // FG should already exist even for single statement functions + if (!G.containsFactSG(FNameOfN2, D2FactId)) { + FG = &G.Functions[FNameOfN2]; + auto *D2FSG = FG->getOrCreateFactSG(D2FactId, D2Label); + D2FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); + } + } + + if (IDEProblem.isZeroValue(D1Fact) && + IDEProblem.isZeroValue(D2Fact)) { + // Do not add lambda recursion edges as inter-procedural edges + if (D1.FuncName != D2.FuncName) { + G.InterLambdaEdges.emplace(D1, D2, true, "AllBottom", "BOT"); + } + } else { + // std::string EFLabel = EF ? EF->str() : " "; + std::string EFLabel; + auto EFVec = IntermediateEdgeFunctions[std::make_tuple( + Edge.first, D1Fact, Edge.second, D2Fact)]; + for (const auto &EF : EFVec) { + PHASAR_LOG_LEVEL(DEBUG, "Partial EF Label: " << EF); + EFLabel.append(to_string(EF) + ", "); + } + PHASAR_LOG_LEVEL(DEBUG, "EF LABEL: " << EFLabel); + G.InterFactEdges.emplace(D1, D2, true, EFLabel); + } + } + PHASAR_LOG_LEVEL(DEBUG, "----------"); + } + PHASAR_LOG_LEVEL(DEBUG, " "); + } + OS << G; + } + +private: + /// -- The actual IFDS/IDE implementation (with customization points) + + /// Propagates the flow further down the exploded super graph, merging any + /// edge function that might already have been computed for TargetVal at + /// Target. + /// + /// @param SourceVal the source value of the propagated summary edge + /// @param Target the target statement + /// @param TargetVal the target value at the target statement + /// @param f the new edge function computed from (s0,SourceVal) to + /// (Target,TargetVal) + /// @param relatedCallSite for call and return flows the related call + /// statement, nullptr otherwise (this value is not used within this + /// implementation but may be useful for subclasses of IDESolver) + /// @param isUnbalancedReturn true if this edge is propagating an + /// unbalanced return (this value is not used within this implementation + /// but may be useful for subclasses of {@link IDESolver}) + /// + void propagate(PathEdge Edge, EdgeFunction EF) { + const auto &[SourceVal, Target, TargetVal] = Edge.get(); + + PHASAR_LOG_LEVEL(DEBUG, "Propagate flow"); + PHASAR_LOG_LEVEL(DEBUG, "Source value : " << DToString(SourceVal)); + PHASAR_LOG_LEVEL(DEBUG, "Target : " << NToString(Target)); + PHASAR_LOG_LEVEL(DEBUG, "Target value : " << DToString(TargetVal)); + + PathEdgeCount++; + self().pathEdgeProcessingTask(std::move(Edge), std::move(EF)); + } + + void pathEdgeProcessingTask(PathEdge Edge, EdgeFunction EF) { + PAMM_GET_INSTANCE; + INC_COUNTER("JumpFn Construction", 1, Full); + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL( + DEBUG, + "-------------------------------------------- " + << PathEdgeCount + << ". Path Edge --------------------------------------------"); + PHASAR_LOG_LEVEL(DEBUG, ' '); + PHASAR_LOG_LEVEL(DEBUG, "Process " << PathEdgeCount << ". path edge:"); + PHASAR_LOG_LEVEL(DEBUG, "< D source: " << DToString(Edge.factAtSource()) + << " ;"); + PHASAR_LOG_LEVEL(DEBUG, + " N target: " << NToString(Edge.getTarget()) << " ;"); + PHASAR_LOG_LEVEL(DEBUG, " D target: " << DToString(Edge.factAtTarget()) + << " >"); + PHASAR_LOG_LEVEL(DEBUG, ' ')); + + if (!ICF->isCallSite(Edge.getTarget())) { + if (ICF->isExitInst(Edge.getTarget())) { + self().processExit(Edge, EF); + } + if (!ICF->getSuccsOf(Edge.getTarget()).empty()) { + self().processNormalFlow(std::move(Edge), std::move(EF)); + } + } else { + self().processCall(std::move(Edge), std::move(EF)); + } + } + + /// Lines 33-37 of the algorithm. + /// Simply propagate normal, intra-procedural flows. + /// @param edge + /// + void processNormalFlow(PathEdge Edge, EdgeFunction f) { + PAMM_GET_INSTANCE; + INC_COUNTER("Process Normal", 1, Full); + PHASAR_LOG_LEVEL( + DEBUG, "Process normal at target: " << NToString(Edge.getTarget())); + auto [d1, n, d2] = Edge.consume(); + + for (const auto nPrime : ICF->getSuccsOf(n)) { + FlowFunctionPtrType FlowFunc = + CachedFlowEdgeFunctions.getNormalFlowFunction(n, nPrime); + INC_COUNTER("FF Queries", 1, Full); + const container_type Res = computeNormalFlowFunction(FlowFunc, d1, d2); + ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); + self().saveEdges(n, nPrime, d2, Res, false); + for (d_t d3 : Res) { + EdgeFunction g = + CachedFlowEdgeFunctions.getNormalEdgeFunction(n, d2, nPrime, d3); + PHASAR_LOG_LEVEL(DEBUG, "Queried Normal Edge Function: " << g); + EdgeFunction fPrime = f.composeWith(g); + if (SolverConfig.emitESG()) { + IntermediateEdgeFunctions[std::make_tuple(n, d2, nPrime, d3)] + .push_back(g); + } + PHASAR_LOG_LEVEL(DEBUG, + "Compose: " << g << " * " << f << " = " << fPrime); + INC_COUNTER("EF Queries", 1, Full); + self().addWorklistItem(d1, nPrime, std::move(d3), std::move(fPrime)); + } + } + } + + /// Lines 13-20 of the algorithm; processing a call site in the caller's + /// context. + /// + /// For each possible callee, registers incoming call edges. + /// Also propagates call-to-return flows and summarized callee flows within + /// the caller. + /// + /// The following cases must be considered and handled: + /// 1. Process as usual and just process the call + /// 2. Create a new summary for that function (which shall be done + /// by the problem) + /// 3. Just use an existing summary provided by the problem + /// 4. If a special function is called, use a special summary + /// function + /// + /// @param edge an edge whose target node resembles a method call + /// + void processCall(PathEdge Edge, EdgeFunction f) { + PAMM_GET_INSTANCE; + INC_COUNTER("Process Call", 1, Full); + PHASAR_LOG_LEVEL(DEBUG, + "Process call at target: " << NToString(Edge.getTarget())); + d_t d1 = Edge.factAtSource(); + n_t n = Edge.getTarget(); + // a call node; line 14... + d_t d2 = Edge.factAtTarget(); + const auto &ReturnSiteNs = ICF->getReturnSitesOfCallAt(n); + const auto &Callees = ICF->getCalleesOfCallAt(n); + + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL(DEBUG, "Possible callees:"); for (auto Callee + : Callees) { + PHASAR_LOG_LEVEL(DEBUG, " " << Callee->getName()); + } PHASAR_LOG_LEVEL(DEBUG, "Possible return sites:"); + for (auto ret + : ReturnSiteNs) { + PHASAR_LOG_LEVEL(DEBUG, " " << NToString(ret)); + }); + + // for each possible callee + for (f_t SCalledProcN : Callees) { // still line 14 + // check if a special summary for the called procedure exists + FlowFunctionPtrType SpecialSum = + CachedFlowEdgeFunctions.getSummaryFlowFunction(n, SCalledProcN); + // if a special summary is available, treat this as a normal flow + // and use the summary flow and edge functions + if (SpecialSum) { + PHASAR_LOG_LEVEL(DEBUG, "Found and process special summary"); + for (n_t ReturnSiteN : ReturnSiteNs) { + container_type Res = + self().computeSummaryFlowFunction(SpecialSum, d1, d2); + INC_COUNTER("SpecialSummary-FF Application", 1, Full); + ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); + self().saveEdges(n, ReturnSiteN, d2, Res, false); + for (d_t d3 : Res) { + EdgeFunction SumEdgFnE = + CachedFlowEdgeFunctions.getSummaryEdgeFunction(n, d2, + ReturnSiteN, d3); + INC_COUNTER("SpecialSummary-EF Queries", 1, Full); + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL( + DEBUG, "Queried Summary Edge Function: " << SumEdgFnE); + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f + << '\n')); + self().addWorklistItem(d1, ReturnSiteN, std::move(d3), + f.composeWith(SumEdgFnE)); + } + } + } else { + // compute the call-flow function + FlowFunctionPtrType Function = + CachedFlowEdgeFunctions.getCallFlowFunction(n, SCalledProcN); + INC_COUNTER("FF Queries", 1, Full); + container_type Res = computeCallFlowFunction(Function, d1, d2); + ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); + // for each callee's start point(s) + auto StartPointsOf = ICF->getStartPointsOf(SCalledProcN); + if (StartPointsOf.empty()) { + PHASAR_LOG_LEVEL(DEBUG, "Start points of '" + + ICF->getFunctionName(SCalledProcN) + + "' currently not available!"); + } + // if startPointsOf is empty, the called function is a declaration + for (n_t SP : StartPointsOf) { + self().saveEdges(n, SP, d2, Res, true); + // for each result node of the call-flow function + for (d_t d3 : Res) { + using TableCell = typename Table>::Cell; + // create initial self-loop + PHASAR_LOG_LEVEL( + DEBUG, "Create initial self-loop with D: " << DToString(d3)); + self().addWorklistItem(d3, SP, d3, EdgeIdentity{}); // line 15 + + // register the fact that has an incoming edge from + // line 15.1 of Naeem/Lhotak/Rodriguez + self().addIncoming(SP, d3, n, d2); + // line 15.2, copy to avoid concurrent modification exceptions by + // other threads + // const std::set endSumm(endSummary(sP, d3)); + // llvm::outs() << "ENDSUMM" << '\n'; + // llvm::outs() << "Size: " << endSumm.size() << '\n'; + // llvm::outs() << "sP: " << NToString(sP) + // << "\nd3: " << DToString(d3) + // << '\n'; + // printEndSummaryTab(); + // still line 15.2 of Naeem/Lhotak/Rodriguez + // for each already-queried exit value reachable from + // , create new caller-side jump functions to the return + // sites because we have observed a potentially new incoming + // edge into + for (const TableCell &Entry : self().endSummary(SP, d3)) { + n_t eP = Entry.getRowKey(); + d_t d4 = Entry.getColumnKey(); + EdgeFunction fCalleeSummary = Entry.getValue(); + // for each return site + for (n_t RetSiteN : ReturnSiteNs) { + // compute return-flow function + FlowFunctionPtrType RetFunction = + CachedFlowEdgeFunctions.getRetFlowFunction(n, SCalledProcN, + eP, RetSiteN); + INC_COUNTER("FF Queries", 1, Full); + const container_type ReturnedFacts = + self().computeReturnFlowFunction(RetFunction, d3, d4, n, + Container{d2}); + ADD_TO_HISTOGRAM("Data-flow facts", ReturnedFacts.size(), 1, + Full); + self().saveEdges(eP, RetSiteN, d4, ReturnedFacts, true); + // for each target value of the function + for (d_t d5 : ReturnedFacts) { + // update the caller-side summary function + // get call edge function + EdgeFunction f4 = + CachedFlowEdgeFunctions.getCallEdgeFunction( + n, d2, SCalledProcN, d3); + PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << f4); + // get return edge function + EdgeFunction f5 = + CachedFlowEdgeFunctions.getReturnEdgeFunction( + n, SCalledProcN, eP, d4, RetSiteN, d5); + PHASAR_LOG_LEVEL(DEBUG, + "Queried Return Edge Function: " << f5); + if (SolverConfig.emitESG()) { + for (auto SP : ICF->getStartPointsOf(SCalledProcN)) { + IntermediateEdgeFunctions[std::make_tuple(n, d2, SP, d3)] + .push_back(f4); + } + IntermediateEdgeFunctions[std::make_tuple(eP, d4, RetSiteN, + d5)] + .push_back(f5); + } + INC_COUNTER("EF Queries", 2, Full); + // compose call * calleeSummary * return edge functions + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " + << fCalleeSummary << " * " + << f4); + PHASAR_LOG_LEVEL(DEBUG, + " (return * calleeSummary * call)"); + EdgeFunction fPrime = + f4.composeWith(fCalleeSummary).composeWith(f5); + PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); + d_t d5_restoredCtx = + self().restoreContextOnReturnedFact(n, d2, d5); + // propagte the effects of the entire call + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); + self().addWorklistItem(d1, RetSiteN, + std::move(d5_restoredCtx), + f.composeWith(fPrime)); + } + } + } + } + } + } + } + // line 17-19 of Naeem/Lhotak/Rodriguez + // process intra-procedural flows along call-to-return flow functions + for (n_t ReturnSiteN : ReturnSiteNs) { + FlowFunctionPtrType CallToReturnFF = + CachedFlowEdgeFunctions.getCallToRetFlowFunction(n, ReturnSiteN, + Callees); + INC_COUNTER("FF Queries", 1, Full); + container_type ReturnFacts = + self().computeCallToReturnFlowFunction(CallToReturnFF, d1, d2); + ADD_TO_HISTOGRAM("Data-flow facts", ReturnFacts.size(), 1, Full); + self().saveEdges(n, ReturnSiteN, d2, ReturnFacts, false); + for (d_t d3 : ReturnFacts) { + EdgeFunction EdgeFnE = + CachedFlowEdgeFunctions.getCallToRetEdgeFunction(n, d2, ReturnSiteN, + d3, Callees); + PHASAR_LOG_LEVEL(DEBUG, + "Queried Call-to-Return Edge Function: " << EdgeFnE); + if (SolverConfig.emitESG()) { + IntermediateEdgeFunctions[std::make_tuple(n, d2, ReturnSiteN, d3)] + .push_back(EdgeFnE); + } + INC_COUNTER("EF Queries", 1, Full); + auto fPrime = f.composeWith(EdgeFnE); + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " + << fPrime); + self().addWorklistItem(d1, ReturnSiteN, std::move(d3), + std::move(fPrime)); + } + } + } + + /// Lines 21-32 of the algorithm. + /// + /// Stores callee-side summaries. + /// Also, at the side of the caller, propagates intra-procedural flows to + /// return sites using those newly computed summaries. + /// + /// @param edge an edge whose target node resembles a method exit + /// + virtual void processExit(PathEdge Edge, EdgeFunction f) { + PAMM_GET_INSTANCE; + INC_COUNTER("Process Exit", 1, Full); + PHASAR_LOG_LEVEL(DEBUG, + "Process exit at target: " << NToString(Edge.getTarget())); + n_t n = Edge.getTarget(); // an exit node; line 21... + f_t FunctionThatNeedsSummary = ICF->getFunctionOf(n); + d_t d1 = Edge.factAtSource(); + d_t d2 = Edge.factAtTarget(); + // for each of the method's start points, determine incoming calls + const auto StartPointsOf = ICF->getStartPointsOf(FunctionThatNeedsSummary); + std::map Inc; + for (n_t SP : StartPointsOf) { + // line 21.1 of Naeem/Lhotak/Rodriguez + // register end-summary + self().addEndSummary(SP, d1, n, d2, f); + for (const auto &Entry : incoming(d1, SP)) { + Inc[Entry.first] = Container{Entry.second}; + } + } + self().printEndSummaryTab(); + self().printIncomingTab(); + // for each incoming call edge already processed + //(see processCall(..)) + for (const auto &Entry : Inc) { + // line 22 + n_t c = Entry.first; + // for each return site + for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(c)) { + // compute return-flow function + FlowFunctionPtrType RetFunction = + CachedFlowEdgeFunctions.getRetFlowFunction( + c, FunctionThatNeedsSummary, n, RetSiteC); + INC_COUNTER("FF Queries", 1, Full); + // for each incoming-call value + for (d_t d4 : Entry.second) { + const auto &Targets = self().computeReturnFlowFunction( + RetFunction, d1, d2, c, Entry.second); + ADD_TO_HISTOGRAM("Data-flow facts", Targets.size(), 1, Full); + self().saveEdges(n, RetSiteC, d2, Targets, true); + // for each target value at the return site + // line 23 + for (const d_t &d5 : Targets) { + // compute composed function + // get call edge function + EdgeFunction f4 = CachedFlowEdgeFunctions.getCallEdgeFunction( + c, d4, ICF->getFunctionOf(n), d1); + PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << f4); + // get return edge function + EdgeFunction f5 = + CachedFlowEdgeFunctions.getReturnEdgeFunction( + c, ICF->getFunctionOf(n), n, d2, RetSiteC, d5); + PHASAR_LOG_LEVEL(DEBUG, "Queried Return Edge Function: " << f5); + if (SolverConfig.emitESG()) { + for (auto SP : ICF->getStartPointsOf(ICF->getFunctionOf(n))) { + IntermediateEdgeFunctions[std::make_tuple(c, d4, SP, d1)] + .push_back(f4); + } + IntermediateEdgeFunctions[std::make_tuple(n, d2, RetSiteC, d5)] + .push_back(f5); + } + INC_COUNTER("EF Queries", 2, Full); + // compose call function * function * return function + PHASAR_LOG_LEVEL(DEBUG, + "Compose: " << f5 << " * " << f << " * " << f4); + PHASAR_LOG_LEVEL(DEBUG, " (return * function * call)"); + EdgeFunction fPrime = f4.composeWith(f).composeWith(f5); + PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); + // for each jump function coming into the call, propagate to + // return site using the composed function + auto RevLookupResult = JumpFn->reverseLookup(c, d4); + if (RevLookupResult) { + for (size_t I = 0; I < RevLookupResult->get().size(); ++I) { + auto ValAndFunc = RevLookupResult->get()[I]; + EdgeFunction f3 = ValAndFunc.second; + if (f3 != AllTop) { + d_t d3 = ValAndFunc.first; + d_t d5_restoredCtx = + self().restoreContextOnReturnedFact(c, d4, d5); + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); + self().addWorklistItem(std::move(d3), RetSiteC, + std::move(d5_restoredCtx), + f3.composeWith(fPrime)); + } + } + } + } + } + } + } + // handling for unbalanced problems where we return out of a method with a + // fact for which we have no incoming flow. + // note: we propagate that way only values that originate from ZERO, as + // conditionally generated values should only + // be propagated into callers that have an incoming edge for this + // condition + /// TODO: Add a check for "d1 is seed in functionOf(n)" + if (SolverConfig.followReturnsPastSeeds() && Inc.empty() /*&& + IDEProblem.isZeroValue(d1)*/) { + const auto &Callers = ICF->getCallersOf(FunctionThatNeedsSummary); + for (n_t Caller : Callers) { + for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(Caller)) { + FlowFunctionPtrType RetFunction = + CachedFlowEdgeFunctions.getRetFlowFunction( + Caller, FunctionThatNeedsSummary, n, RetSiteC); + INC_COUNTER("FF Queries", 1, Full); + const container_type Targets = self().computeReturnFlowFunction( + RetFunction, d1, d2, Caller, Container{ZeroValue}); + ADD_TO_HISTOGRAM("Data-flow facts", Targets.size(), 1, Full); + self().saveEdges(n, RetSiteC, d2, Targets, true); + for (d_t d5 : Targets) { + EdgeFunction f5 = + CachedFlowEdgeFunctions.getReturnEdgeFunction( + Caller, ICF->getFunctionOf(n), n, d2, RetSiteC, d5); + PHASAR_LOG_LEVEL(DEBUG, "Queried Return Edge Function: " << f5); + if (SolverConfig.emitESG()) { + IntermediateEdgeFunctions[std::make_tuple(n, d2, RetSiteC, d5)] + .push_back(f5); + } + INC_COUNTER("EF Queries", 1, Full); + PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " << f); + self().propagteUnbalancedReturnFlow(RetSiteC, d5, f.composeWith(f5), + Caller); + // register for value processing (2nd IDE phase) + UnbalancedRetSites.insert(RetSiteC); + } + } + } + // in cases where there are no callers, the return statement would + // normally not be processed at all; this might be undesirable if + // the flow function has a side effect such as registering a taint; + // instead we thus call the return flow function will a null caller + if (Callers.empty()) { + IDEProblem.applyUnbalancedRetFlowFunctionSideEffects( + FunctionThatNeedsSummary, n, d2); + } + } + } + + void propagteUnbalancedReturnFlow(n_t RetSiteC, d_t TargetVal, + EdgeFunction EdgeFunc, + n_t /*RelatedCallSite*/) { + self().addWorklistItem(ZeroValue, std::move(RetSiteC), std::move(TargetVal), + std::move(EdgeFunc)); + } + + EdgeFunction jumpFunction(const PathEdge Edge) { + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL(DEBUG, "JumpFunctions Forward-Lookup:"); + PHASAR_LOG_LEVEL(DEBUG, + " Source D: " << DToString(Edge.factAtSource())); + PHASAR_LOG_LEVEL(DEBUG, " Target N: " << NToString(Edge.getTarget())); + PHASAR_LOG_LEVEL(DEBUG, + " Target D: " << DToString(Edge.factAtTarget()))); + + auto FwdLookupRes = + JumpFn->forwardLookup(Edge.factAtSource(), Edge.getTarget()); + if (FwdLookupRes) { + auto &Ref = FwdLookupRes->get(); + if (auto Find = std::find_if(Ref.begin(), Ref.end(), + [Edge](const auto &Pair) { + return Edge.factAtTarget() == Pair.first; + }); + Find != Ref.end()) { + PHASAR_LOG_LEVEL(DEBUG, " => EdgeFn: " << Find->second); + return Find->second; + } + } + PHASAR_LOG_LEVEL(DEBUG, " => EdgeFn: " << AllTop); + // JumpFn initialized to all-top, see line [2] in SRH96 paper + return AllTop; + } + + void addEndSummary(n_t SP, d_t d1, n_t eP, d_t d2, EdgeFunction f) { + // note: at this point we don't need to join with a potential previous f + // because f is a jump function, which is already properly joined + // within propagate(..) + EndsummaryTab.get(SP, d1).insert(eP, d2, std::move(f)); + } + + // TODO: De-virtualize + virtual void saveEdges(n_t SourceNode, n_t SinkStmt, d_t SourceVal, + const container_type &DestVals, bool InterP) { + if (!SolverConfig.recordEdges()) { + return; + } + Table> &TgtMap = + (InterP) ? ComputedInterPathEdges : ComputedIntraPathEdges; + TgtMap.get(SourceNode, SinkStmt)[SourceVal].insert(DestVals.begin(), + DestVals.end()); + } + + /// Schedules the processing of initial seeds, initiating the analysis. + /// Clients should only call this methods if performing synchronization on + /// their own. Normally, solve() should be called instead. + void submitInitialSeeds() { + PAMM_GET_INSTANCE; + // Check if the initial seeds contain the zero value at every starting + // point. If not, the zero value needs to be added to allow for correct + // solving of the problem. + for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { + if (Facts.find(ZeroValue) == Facts.end()) { + // Add zero value if it's not in the set of facts. + PHASAR_LOG_LEVEL( + DEBUG, "Zero-Value has been added automatically to start point: " + << NToString(StartPoint)); + Seeds.addSeed(StartPoint, ZeroValue, IDEProblem.bottomElement()); + } + } + PHASAR_LOG_LEVEL(DEBUG, + "Number of initial seeds: " << Seeds.countInitialSeeds()); + PHASAR_LOG_LEVEL(DEBUG, "List of initial seeds: "); + for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { + PHASAR_LOG_LEVEL(DEBUG, "Start point: " << NToString(StartPoint)); + /// If statically disabling the logger, Fact and Value are unused. To + /// prevent the copilation to fail with -Werror, add the [[maybe_unused]] + /// attribute + for ([[maybe_unused]] const auto &[Fact, Value] : Facts) { + PHASAR_LOG_LEVEL(DEBUG, "\tFact: " << DToString(Fact)); + PHASAR_LOG_LEVEL(DEBUG, "\tValue: " << LToString(Value)); + } + } + for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { + for (const auto &[Fact, Value] : Facts) { + PHASAR_LOG_LEVEL(DEBUG, "Submit seed at: " << NToString(StartPoint)); + PHASAR_LOG_LEVEL(DEBUG, "\tFact: " << DToString(Fact)); + PHASAR_LOG_LEVEL(DEBUG, "\tValue: " << LToString(Value)); + if (!IDEProblem.isZeroValue(Fact)) { + INC_COUNTER("Gen facts", 1, Core); + } + addWorklistItem(Fact, StartPoint, Fact, EdgeIdentity{}); + } + } + } + + /// This method will be called for each incoming edge and can be used to + /// transfer knowledge from the calling edge to the returning edge, without + /// affecting the summary edges at the callee. + /// @param callSite + /// + /// @param d4 + /// Fact stored with the incoming edge, i.e., present at the + /// caller side + /// @param d5 + /// Fact that originally should be propagated to the caller. + /// @return Fact that will be propagated to the caller. + /// + d_t restoreContextOnReturnedFact(n_t /*CallSite*/, d_t /*d4*/, d_t d5) { + // TODO support LinkedNode and JoinHandlingNode + // if (d5 instanceof LinkedNode) { + // ((LinkedNode) d5).setCallingContext(d4); + // } + // if(d5 instanceof JoinHandlingNode) { + // ((JoinHandlingNode) + // d5).setCallingContext(d4); + // } + return d5; + } + + /// Computes the normal flow function for the given set of start and end + /// abstractions- + /// @param flowFunction The normal flow function to compute + /// @param d1 The abstraction at the method's start node + /// @param d2 The abstraction at the current node + /// @return The set of abstractions at the successor node + /// + container_type computeNormalFlowFunction(const FlowFunctionPtrType &FlowFunc, + d_t /*d1*/, d_t d2) { + return FlowFunc->computeTargets(std::move(d2)); + } + + container_type + computeSummaryFlowFunction(const FlowFunctionPtrType &SummaryFlowFunction, + d_t /*d1*/, d_t d2) { + return SummaryFlowFunction->computeTargets(std::move(d2)); + } + + /// Computes the call flow function for the given call-site abstraction + /// @param callFlowFunction The call flow function to compute + /// @param d1 The abstraction at the current method's start node. + /// @param d2 The abstraction at the call site + /// @return The set of caller-side abstractions at the callee's start node + /// + container_type + computeCallFlowFunction(const FlowFunctionPtrType &CallFlowFunction, + d_t /*d1*/, d_t d2) { + return CallFlowFunction->computeTargets(std::move(d2)); + } + + /// Computes the call-to-return flow function for the given call-site + /// abstraction + /// @param callToReturnFlowFunction The call-to-return flow function to + /// compute + /// @param d1 The abstraction at the current method's start node. + /// @param d2 The abstraction at the call site + /// @return The set of caller-side abstractions at the return site + /// + container_type computeCallToReturnFlowFunction( + const FlowFunctionPtrType &CallToReturnFlowFunction, d_t /*d1*/, d_t d2) { + return CallToReturnFlowFunction->computeTargets(std::move(d2)); + } + + /// Computes the return flow function for the given set of caller-side + /// abstractions. + /// @param retFunction The return flow function to compute + /// @param d1 The abstraction at the beginning of the callee + /// @param d2 The abstraction at the exit node in the callee + /// @param callSite The call site + /// @param callerSideDs The abstractions at the call site + /// @return The set of caller-side abstractions at the return site + /// + container_type + computeReturnFlowFunction(const FlowFunctionPtrType &RetFlowFunction, + d_t /*d1*/, d_t d2, n_t /*CallSite*/, + const Container & /*CallerSideDs*/) { + return RetFlowFunction->computeTargets(std::move(d2)); + } + + bool addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, + EdgeFunction f) { + EdgeFunction JumpFnE = [&]() { + const auto RevLookupResult = JumpFn->reverseLookup(Target, TargetVal); + if (RevLookupResult) { + const auto &JumpFnContainer = RevLookupResult->get(); + const auto Find = std::find_if( + JumpFnContainer.begin(), JumpFnContainer.end(), + [SourceVal](auto &KVpair) { return KVpair.first == SourceVal; }); + if (Find != JumpFnContainer.end()) { + return Find->second; + } + } + // jump function is initialized to all-top if no entry + // was found + return AllTop; + }(); + EdgeFunction fPrime = JumpFnE.joinWith(f); + bool NewFunction = fPrime != JumpFnE; + + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL( + DEBUG, "Join: " << JumpFnE << " & " << f + << (JumpFnE == f ? " (EF's are equal)" : " ")); + PHASAR_LOG_LEVEL(DEBUG, + " = " << fPrime + << (NewFunction ? " (new jump func)" : " ")); + PHASAR_LOG_LEVEL(DEBUG, ' ')); + if (NewFunction) { + JumpFn->addFunction(SourceVal, Target, TargetVal, fPrime); + PathEdge Edge(SourceVal, Target, TargetVal); + WorkList.push_back(std::move(Edge)); + + IF_LOG_ENABLED(if (!IDEProblem.isZeroValue(TargetVal)) { + PHASAR_LOG_LEVEL(DEBUG, "[addWorklistItem]: EDGE: getFunctionOf(Target)) + << ", D: " << DToString(SourceVal) << '>'); + PHASAR_LOG_LEVEL(DEBUG, " ---> '); + PHASAR_LOG_LEVEL(DEBUG, ' '); + }); + } else { + PHASAR_LOG_LEVEL(DEBUG, "[addWorklistItem]: No new function!"); + } + + return NewFunction; + } + + std::set>::Cell> + endSummary(n_t SP, d_t d3) { + if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) { + auto Key = std::make_pair(SP, d3); + auto FindND = FSummaryReuse.find(Key); + if (FindND == FSummaryReuse.end()) { + FSummaryReuse.emplace(Key, 0); + } else { + FSummaryReuse[Key] += 1; + } + } + return EndsummaryTab.get(SP, d3).cellSet(); + } + + std::map incoming(d_t d1, n_t SP) { + return IncomingTab.get(SP, d1); + } + + void addIncoming(n_t SP, d_t d3, n_t n, d_t d2) { + IncomingTab.get(SP, d3)[n].insert(d2); + } + + /// -- IDE-specific functions: + + /// Computes the final values for edge functions. + void computeValues() { + PHASAR_LOG_LEVEL(DEBUG, "Start computing values"); + // Phase II(i) + self().submitInitialValues(); + while (!ValuePropWL.empty()) { + auto NAndD = std::move(ValuePropWL.back()); + ValuePropWL.pop_back(); + self().valuePropagationTask(std::move(NAndD)); + } + + // Phase II(ii) + // we create an array of all nodes and then dispatch fractions of this + // array to multiple threads + const auto &AllNonCallStartNodes = self().getAllValueComputationNodes(); + self().valueComputationTask(AllNonCallStartNodes); + } + + void propagateValueAtStart(const std::pair NAndD, n_t Stmt) { + PAMM_GET_INSTANCE; + d_t Fact = NAndD.second; + f_t Func = ICF->getFunctionOf(Stmt); + for (const n_t CallSite : ICF->getCallsFromWithin(Func)) { + auto LookupResults = JumpFn->forwardLookup(Fact, CallSite); + if (!LookupResults) { + continue; + } + for (size_t I = 0; I < LookupResults->get().size(); ++I) { + auto Entry = LookupResults->get()[I]; + d_t dPrime = Entry.first; + auto fPrime = Entry.second; + n_t SP = Stmt; + l_t Val = val(SP, Fact); + INC_COUNTER("Value Propagation", 1, Full); + self().propagateValue(CallSite, dPrime, fPrime.computeTarget(Val)); + } + } + } + + void propagateValueAtCall(const std::pair NAndD, n_t Stmt) { + PAMM_GET_INSTANCE; + d_t Fact = NAndD.second; + for (const f_t Callee : ICF->getCalleesOfCallAt(Stmt)) { + FlowFunctionPtrType CallFlowFunction = + CachedFlowEdgeFunctions.getCallFlowFunction(Stmt, Callee); + INC_COUNTER("FF Queries", 1, Full); + for (const d_t dPrime : CallFlowFunction->computeTargets(Fact)) { + EdgeFunction EdgeFn = CachedFlowEdgeFunctions.getCallEdgeFunction( + Stmt, Fact, Callee, dPrime); + PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << EdgeFn); + if (SolverConfig.emitESG()) { + for (const auto SP : ICF->getStartPointsOf(Callee)) { + IntermediateEdgeFunctions[std::make_tuple(Stmt, Fact, SP, dPrime)] + .push_back(EdgeFn); + } + } + INC_COUNTER("EF Queries", 1, Full); + for (const n_t StartPoint : ICF->getStartPointsOf(Callee)) { + INC_COUNTER("Value Propagation", 1, Full); + self().propagateValue(StartPoint, dPrime, + EdgeFn.computeTarget(self().val(Stmt, Fact))); + } + } + } + } + + void propagateValue(n_t NHashN, d_t NHashD, const l_t &L) { + l_t ValNHash = self().val(NHashN, NHashD); + l_t LPrime = self().joinValueAt(NHashN, NHashD, ValNHash, L); + if (!(LPrime == ValNHash)) { + self().setVal(NHashN, NHashD, std::move(LPrime)); + ValuePropWL.emplace_back(std::move(NHashN), std::move(NHashD)); + } + } + + // should be made a callable at some point + void valuePropagationTask(std::pair NAndD) { + n_t n = NAndD.first; + // our initial seeds are not necessarily method-start points but here they + // should be treated as such the same also for unbalanced return sites in + // an unbalanced problem + if (ICF->isStartPoint(n) || Seeds.containsInitialSeedsFor(n) || + UnbalancedRetSites.count(n)) { + // FIXME: is currently not executed for main!!! + // initial seeds are set in the global constructor, and main is also not + // officially called by any other function + self().propagateValueAtStart(NAndD, n); + } + if (ICF->isCallSite(n)) { + self().propagateValueAtCall(NAndD, n); + } + } + + // should be made a callable at some point + void valueComputationTask(const std::vector &Values) { + PAMM_GET_INSTANCE; + for (n_t n : Values) { + for (n_t SP : ICF->getStartPointsOf(ICF->getFunctionOf(n))) { + using TableCell = typename Table>::Cell; + Table> &LookupByTarget = + JumpFn->lookupByTarget(n); + for (const TableCell &SourceValTargetValAndFunction : + LookupByTarget.cellSet()) { + d_t dPrime = SourceValTargetValAndFunction.getRowKey(); + d_t d = SourceValTargetValAndFunction.getColumnKey(); + EdgeFunction fPrime = SourceValTargetValAndFunction.getValue(); + l_t TargetVal = self().val(SP, dPrime); + self().setVal( + n, d, + IDEProblem.join(self().val(n, d), + fPrime.computeTarget(std::move(TargetVal)))); + INC_COUNTER("Value Computation", 1, Full); + } + } + } + } + + void submitInitialValues() { + std::map> AllSeeds = Seeds.getSeeds(); + for (n_t UnbalancedRetSite : UnbalancedRetSites) { + if (AllSeeds.find(UnbalancedRetSite) == AllSeeds.end()) { + AllSeeds[UnbalancedRetSite][ZeroValue] = IDEProblem.topElement(); + } + } + // do processing + for (const auto &[StartPoint, Facts] : AllSeeds) { + for (auto &[Fact, Value] : Facts) { + PHASAR_LOG_LEVEL(DEBUG, "set initial seed at: " + << NToString(StartPoint) + << ", fact: " << DToString(Fact) + << ", value: " << LToString(Value)); + // initialize the initial seeds with the top element as we have no + // information at the beginning of the value computation problem + self().setVal(StartPoint, Fact, Value); + std::pair SuperGraphNode(StartPoint, Fact); + self().valuePropagationTask(std::move(SuperGraphNode)); + } + } + } + + l_t val(n_t NHashN, d_t NHashD) { + if (ValTab.contains(NHashN, NHashD)) { + return ValTab.get(NHashN, NHashD); + } + // implicitly initialized to top; see line [1] of Fig. 7 in SRH96 paper + return IDEProblem.topElement(); + } + + void setVal(n_t NHashN, d_t NHashD, l_t L) { + IF_LOG_ENABLED({ + PHASAR_LOG_LEVEL(DEBUG, + "Function : " << ICF->getFunctionOf(NHashN)->getName()); + PHASAR_LOG_LEVEL(DEBUG, "Inst. : " << NToString(NHashN)); + PHASAR_LOG_LEVEL(DEBUG, "Fact : " << DToString(NHashD)); + PHASAR_LOG_LEVEL(DEBUG, "Value : " << LToString(L)); + PHASAR_LOG_LEVEL(DEBUG, ' '); + }); + // TOP is the implicit default value which we do not need to store. + // if (l == IDEProblem.topElement()) { + // do not store top values + // ValTab.remove(nHashN, nHashD); + // } else { + ValTab.insert(NHashN, NHashD, std::move(L)); + // } + } + + std::vector getAllValueComputationNodes() { + return ICF->allNonCallStartNodes(); + } + + l_t joinValueAt(n_t /*Unit*/, d_t /*Fact*/, l_t Curr, l_t NewVal) { + return IDEProblem.join(std::move(Curr), std::move(NewVal)); + } + + /// -- InteractiveIDESolverMixin implementation + + bool doInitialize() { + PAMM_GET_INSTANCE; + REG_COUNTER("Gen facts", 0, Core); + REG_COUNTER("Kill facts", 0, Core); + REG_COUNTER("Summary-reuse", 0, Core); + REG_COUNTER("Intra Path Edges", 0, Core); + REG_COUNTER("Inter Path Edges", 0, Core); + REG_COUNTER("FF Queries", 0, Full); + REG_COUNTER("EF Queries", 0, Full); + REG_COUNTER("Value Propagation", 0, Full); + REG_COUNTER("Value Computation", 0, Full); + REG_COUNTER("SpecialSummary-FF Application", 0, Full); + REG_COUNTER("SpecialSummary-EF Queries", 0, Full); + REG_COUNTER("JumpFn Construction", 0, Full); + REG_COUNTER("Process Call", 0, Full); + REG_COUNTER("Process Normal", 0, Full); + REG_COUNTER("Process Exit", 0, Full); + REG_COUNTER("[Calls] getAliasSet", 0, Full); + REG_HISTOGRAM("Data-flow facts", Full); + REG_HISTOGRAM("Points-to", Full); + + PHASAR_LOG_LEVEL(INFO, "IDE solver is solving the specified problem"); + PHASAR_LOG_LEVEL(INFO, + "Submit initial seeds, construct exploded super graph"); + // computations starting here + START_TIMER("DFA Phase I", Full); + + // We start our analysis and construct exploded supergraph + self().submitInitialSeeds(); + return !WorkList.empty(); + } + + bool doNext() { + assert(!WorkList.empty()); + auto Edge = std::move(WorkList.back()); + WorkList.pop_back(); + + auto EF = self().jumpFunction(Edge); + self().propagate(std::move(Edge), std::move(EF)); + + return !WorkList.empty(); + } + + void finalizeInternal() { + PAMM_GET_INSTANCE; + STOP_TIMER("DFA Phase I", Full); + if (SolverConfig.computeValues()) { + START_TIMER("DFA Phase II", Full); + // Computing the final values for the edge functions + PHASAR_LOG_LEVEL( + INFO, "Compute the final values according to the edge functions"); + self().computeValues(); + STOP_TIMER("DFA Phase II", Full); + } + PHASAR_LOG_LEVEL(INFO, "Problem solved"); + if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) { + self().computeAndPrintStatistics(); + } + if (SolverConfig.emitESG()) { + self().emitESGAsDot(); + } + } + + SolverResults doFinalize() & { + self().finalizeInternal(); + return self().getSolverResults(); + } + + OwningSolverResults doFinalize() && { + self().finalizeInternal(); + return self().consumeSolverResults(); + } + + /// -- Misc functions + + void printIncomingTab() const { + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL(DEBUG, "Start of incomingtab entry"); + for (const auto &Cell + : IncomingTab.cellSet()) { + PHASAR_LOG_LEVEL(DEBUG, "sP: " << NToString(Cell.getRowKey())); + PHASAR_LOG_LEVEL(DEBUG, "d3: " << DToString(Cell.getColumnKey())); + for (const auto &Entry : Cell.getValue()) { + PHASAR_LOG_LEVEL(DEBUG, " n: " << NToString(Entry.first)); + for (const auto &Fact : Entry.second) { + PHASAR_LOG_LEVEL(DEBUG, " d2: " << DToString(Fact)); + } + } + PHASAR_LOG_LEVEL(DEBUG, "---------------"); + } PHASAR_LOG_LEVEL(DEBUG, "End of incomingtab entry");) + } + + void printEndSummaryTab() const { + IF_LOG_ENABLED( + PHASAR_LOG_LEVEL(DEBUG, "Start of endsummarytab entry"); + + EndsummaryTab.foreachCell( + [](const auto &Row, const auto &Col, const auto &Val) { + PHASAR_LOG_LEVEL(DEBUG, "sP: " << NToString(Row)); + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(Col)); + + Val.foreachCell([](const auto &InnerRow, const auto &InnerCol, + const auto &InnerVal) { + PHASAR_LOG_LEVEL(DEBUG, " eP: " << NToString(InnerRow)); + PHASAR_LOG_LEVEL(DEBUG, " d2: " << DToString(InnerCol)); + PHASAR_LOG_LEVEL(DEBUG, " EF: " << InnerVal); + }); + PHASAR_LOG_LEVEL(DEBUG, "---------------"); + }); + + PHASAR_LOG_LEVEL(DEBUG, "End of endsummarytab entry");) + } + + void printComputedPathEdges() { + llvm::outs() + << "\n**********************************************************"; + llvm::outs() + << "\n* Computed intra-procedural path egdes *"; + llvm::outs() + << "\n**********************************************************\n"; + + // Sort intra-procedural path edges + auto Cells = ComputedIntraPathEdges.cellVec(); + StmtLess Stmtless(ICF); + sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { + return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); + }); + for (const auto &Cell : Cells) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + std::string N2Label = NToString(Edge.second); + llvm::outs() << "\nN1: " << NToString(Edge.first) << '\n' + << "N2: " << N2Label << "\n----" + << std::string(N2Label.size(), '-') << '\n'; + for (auto D1ToD2Set : Cell.getValue()) { + auto D1Fact = D1ToD2Set.first; + llvm::outs() << "D1: " << DToString(D1Fact) << '\n'; + for (auto D2Fact : D1ToD2Set.second) { + llvm::outs() << "\tD2: " << DToString(D2Fact) << '\n'; + } + llvm::outs() << '\n'; + } + } + + llvm::outs() + << "\n**********************************************************"; + llvm::outs() + << "\n* Computed inter-procedural path edges *"; + llvm::outs() + << "\n**********************************************************\n"; + + // Sort intra-procedural path edges + Cells = ComputedInterPathEdges.cellVec(); + sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { + return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); + }); + for (const auto &Cell : Cells) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + std::string N2Label = NToString(Edge.second); + llvm::outs() << "\nN1: " << NToString(Edge.first) << '\n' + << "N2: " << N2Label << "\n----" + << std::string(N2Label.size(), '-') << '\n'; + for (auto D1ToD2Set : Cell.getValue()) { + auto D1Fact = D1ToD2Set.first; + llvm::outs() << "D1: " << DToString(D1Fact) << '\n'; + for (auto D2Fact : D1ToD2Set.second) { + llvm::outs() << "\tD2: " << DToString(D2Fact) << '\n'; + } + llvm::outs() << '\n'; + } + } + } + + /// The invariant for computing the number of generated (#gen) and killed + /// (#kill) facts: + /// (1) #Valid facts at the last statement <= #gen - #kill + /// (2) #gen >= #kill + /// + /// The total number of valid facts can be smaller than the difference of + /// generated and killed facts, due to set semantics, i.e., a fact can be + /// generated multiple times but appears only once. + /// + /// Zero value is not counted! + /// + /// @brief Computes and prints statistics of the analysis run, e.g. number of + /// generated/killed facts, number of summary-reuses etc. + /// + void computeAndPrintStatistics() { + PAMM_GET_INSTANCE; + // Stores all valid facts at return site in caller context; return-site is + // key + std::unordered_map> ValidInCallerContext; + size_t NumGenFacts = 0; + size_t NumIntraPathEdges = 0; + size_t NumInterPathEdges = 0; + // --- Intra-procedural Path Edges --- + // d1 --> d2-Set + // Case 1: d1 in d2-Set + // Case 2: d1 not in d2-Set, i.e., d1 was killed. d2-Set could be empty. + for (const auto &Cell : ComputedIntraPathEdges.cellSet()) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Edge.first)); + PHASAR_LOG_LEVEL(DEBUG, "N2: " << NToString(Edge.second)); + for (auto &[D1, D2s] : Cell.getValue()) { + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); + NumIntraPathEdges += D2s.size(); + // Case 1 + if (D2s.find(D1) != D2s.end()) { + NumGenFacts += D2s.size() - 1; + } + // Case 2 + else { + NumGenFacts += D2s.size(); + } + // Store all valid facts after call-to-return flow + if (ICF->isCallSite(Edge.first)) { + ValidInCallerContext[Edge.second].insert(D2s.begin(), D2s.end()); + } + IF_LOG_ENABLED([this](const auto &D2s) { + for (auto D2 : D2s) { + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); + } + PHASAR_LOG_LEVEL(DEBUG, "----"); + }(D2s)); + } + PHASAR_LOG_LEVEL(DEBUG, " "); + } + // Stores all pairs of (Startpoint, Fact) for which a summary was applied + std::set> ProcessSummaryFacts; + PHASAR_LOG_LEVEL(DEBUG, "=============================================="); + PHASAR_LOG_LEVEL(DEBUG, "INTER PATH EDGES"); + for (const auto &Cell : ComputedInterPathEdges.cellSet()) { + auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); + PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Edge.first)); + PHASAR_LOG_LEVEL(DEBUG, "N2: " << NToString(Edge.second)); + // --- Call-flow Path Edges --- + // Case 1: d1 --> empty set + // Can be ignored, since killing a fact in the caller context will + // actually happen during call-to-return. + // + // Case 2: d1 --> d2-Set + // Every fact d_i != ZeroValue in d2-set will be generated in the + // callee context, thus counts as a new fact. Even if d1 is passed as it + // is, it will count as a new fact. The reason for this is, that d1 can + // be killed in the callee context, but still be valid in the caller + // context. + // + // Special Case: Summary was applied for a particular call + // Process the summary's #gen and #kill. + if (ICF->isCallSite(Edge.first)) { + for (auto &[D1, D2s] : Cell.getValue()) { + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); + NumInterPathEdges += D2s.size(); + for (auto D2 : D2s) { + if (!IDEProblem.isZeroValue(D2)) { + NumGenFacts++; + } + // Special case + if (ProcessSummaryFacts.find(std::make_pair(Edge.second, D2)) != + ProcessSummaryFacts.end()) { + + std::set SummaryDSet; + EndsummaryTab.get(Edge.second, D2) + .foreachCell([&SummaryDSet](const auto &Row, const auto &Col, + const auto &Val) { + SummaryDSet.insert(Col); + }); + + // Process summary just as an intra-procedural edge + if (SummaryDSet.find(D2) != SummaryDSet.end()) { + NumGenFacts += SummaryDSet.size() - 1; + } else { + NumGenFacts += SummaryDSet.size(); + } + } else { + ProcessSummaryFacts.emplace(Edge.second, D2); + } + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); + } + PHASAR_LOG_LEVEL(DEBUG, "----"); + } + } + // --- Return-flow Path Edges --- + // Since every fact passed to the callee was counted as a new fact, we + // have to count every fact propagated to the caller as a kill to + // satisfy our invariant. Obviously, every fact not propagated to the + // caller will count as a kill. If an actual new fact is propagated to + // the caller, we have to increase the number of generated facts by one. + // Zero value does not count towards generated/killed facts. + if (ICF->isExitInst(Cell.getRowKey())) { + for (auto &[D1, D2s] : Cell.getValue()) { + PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); + NumInterPathEdges += D2s.size(); + auto CallerFacts = ValidInCallerContext[Edge.second]; + for (auto D2 : D2s) { + // d2 not valid in caller context + if (CallerFacts.find(D2) == CallerFacts.end()) { + NumGenFacts++; + } + PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); + } + PHASAR_LOG_LEVEL(DEBUG, "----"); + } + } + PHASAR_LOG_LEVEL(DEBUG, " "); + } + PHASAR_LOG_LEVEL(DEBUG, "SUMMARY REUSE"); + std::size_t TotalSummaryReuse = 0; + for (const auto &Entry : FSummaryReuse) { + PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Entry.first.first)); + PHASAR_LOG_LEVEL(DEBUG, "D1: " << DToString(Entry.first.second)); + PHASAR_LOG_LEVEL(DEBUG, "#Reuse: " << Entry.second); + TotalSummaryReuse += Entry.second; + } + INC_COUNTER("Gen facts", NumGenFacts, Core); + INC_COUNTER("Summary-reuse", TotalSummaryReuse, Core); + INC_COUNTER("Intra Path Edges", NumIntraPathEdges, Core); + INC_COUNTER("Inter Path Edges", NumInterPathEdges, Core); + + PHASAR_LOG_LEVEL(INFO, "----------------------------------------------"); + PHASAR_LOG_LEVEL(INFO, "=== Solver Statistics ==="); + PHASAR_LOG_LEVEL(INFO, "#Facts generated : " << GET_COUNTER("Gen facts")); + PHASAR_LOG_LEVEL(INFO, "#Facts killed : " << GET_COUNTER("Kill facts")); + PHASAR_LOG_LEVEL(INFO, + "#Summary-reuse : " << GET_COUNTER("Summary-reuse")); + PHASAR_LOG_LEVEL(INFO, + "#Intra Path Edges: " << GET_COUNTER("Intra Path Edges")); + PHASAR_LOG_LEVEL(INFO, + "#Inter Path Edges: " << GET_COUNTER("Inter Path Edges")); + if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Full) { + PHASAR_LOG_LEVEL( + INFO, "Flow function query count: " << GET_COUNTER("FF Queries")); + PHASAR_LOG_LEVEL( + INFO, "Edge function query count: " << GET_COUNTER("EF Queries")); + PHASAR_LOG_LEVEL(INFO, "Data-flow value propagation count: " + << GET_COUNTER("Value Propagation")); + PHASAR_LOG_LEVEL(INFO, "Data-flow value computation count: " + << GET_COUNTER("Value Computation")); + PHASAR_LOG_LEVEL(INFO, + "Special flow function usage count: " + << GET_COUNTER("SpecialSummary-FF Application")); + PHASAR_LOG_LEVEL(INFO, "Jump function construciton count: " + << GET_COUNTER("JumpFn Construction")); + PHASAR_LOG_LEVEL(INFO, + "Phase I duration: " << PRINT_TIMER("DFA Phase I")); + PHASAR_LOG_LEVEL(INFO, + "Phase II duration: " << PRINT_TIMER("DFA Phase II")); + PHASAR_LOG_LEVEL(INFO, "----------------------------------------------"); + CachedFlowEdgeFunctions.print(); + } + } + + /// @brief: Allows less-than comparison based on the statement ID. + struct StmtLess { + const i_t *ICF; + StringIDLess StrIDLess; + StmtLess(const i_t *ICF) : ICF(ICF), StrIDLess(StringIDLess()) {} + bool operator()(n_t Lhs, n_t Rhs) { + return StrIDLess(ICF->getStatementId(Lhs), ICF->getStatementId(Rhs)); + } + }; + + [[nodiscard]] Derived &self() noexcept { + return static_cast(*this); + } + [[nodiscard]] const Derived &self() const noexcept { + return static_cast(*this); + } + + IDESolverImpl(IDETabulationProblem &Problem, + const i_t *ICF, PropagateAfterStrategy /*Strategy*/ = {}) + : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), + SolverConfig(Problem.getIFDSIDESolverConfig()), + CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), + JumpFn(std::make_shared>()), + Seeds(Problem.initialSeeds()) { + assert(ICF != nullptr); + + static_assert(std::is_base_of_v); + static_assert(std::is_base_of_v); + static_assert(std::is_empty_v); + static_assert(std::is_trivially_default_constructible_v); + } + + friend Derived; + friend IDESolverAPIMixin; + + /// -- Data members + + IDETabulationProblem &IDEProblem; + d_t ZeroValue; + const i_t *ICF; + IFDSIDESolverConfig &SolverConfig; + + std::vector> WorkList; + std::vector> ValuePropWL; + + size_t PathEdgeCount = 0; + + FlowEdgeFunctionCache CachedFlowEdgeFunctions; + + Table> ComputedIntraPathEdges; + + Table> ComputedInterPathEdges; + + EdgeFunction AllTop; + + std::shared_ptr> JumpFn; + + std::map, std::vector>> + IntermediateEdgeFunctions; + + // stores summaries that were queried before they were computed + // see CC 2010 paper by Naeem, Lhotak and Rodriguez + Table>> EndsummaryTab; + + // edges going along calls + // see CC 2010 paper by Naeem, Lhotak and Rodriguez + Table> IncomingTab; + + // stores the return sites (inside callers) to which we have unbalanced + // returns if SolverConfig.followReturnPastSeeds is enabled + std::set UnbalancedRetSites; + + InitialSeeds Seeds; + + Table ValTab; + + std::map, size_t> FSummaryReuse; + + [[no_unique_address]] StrategyT Strategy{}; +}; +} // namespace psr + +#endif // PHASAR_DATAFLOW_IFDSIDE_SOLVER_IDESOLVERIMPL_H diff --git a/include/phasar/DataFlow/IfdsIde/Solver/PathEdge.h b/include/phasar/DataFlow/IfdsIde/Solver/detail/PathEdge.h similarity index 100% rename from include/phasar/DataFlow/IfdsIde/Solver/PathEdge.h rename to include/phasar/DataFlow/IfdsIde/Solver/detail/PathEdge.h From 1f14b61b37c03da9285c29d7138c6b60160fe664 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 20 Sep 2023 21:18:31 +0200 Subject: [PATCH 07/11] Generalize interface within IDESolverImpl --- .../IfdsIde/Solver/detail/IDESolverImpl.h | 79 ++++++++++++------- 1 file changed, 51 insertions(+), 28 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h index 7ee1da590..9bc196fba 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h @@ -456,7 +456,9 @@ class IDESolverImpl : public IDESolverAPIMixin { DEBUG, "Process normal at target: " << NToString(Edge.getTarget())); auto [d1, n, d2] = Edge.consume(); - for (const auto nPrime : ICF->getSuccsOf(n)) { + const auto &Succs = ICF->getSuccsOf(n); + + for (const auto nPrime : Succs) { FlowFunctionPtrType FlowFunc = CachedFlowEdgeFunctions.getNormalFlowFunction(n, nPrime); INC_COUNTER("FF Queries", 1, Full); @@ -475,7 +477,8 @@ class IDESolverImpl : public IDESolverAPIMixin { PHASAR_LOG_LEVEL(DEBUG, "Compose: " << g << " * " << f << " = " << fPrime); INC_COUNTER("EF Queries", 1, Full); - self().addWorklistItem(d1, nPrime, std::move(d3), std::move(fPrime)); + self().updateWithNewEdges(d1, n, nPrime, Succs, std::move(d3), + std::move(fPrime)); } } } @@ -544,8 +547,8 @@ class IDESolverImpl : public IDESolverAPIMixin { DEBUG, "Queried Summary Edge Function: " << SumEdgFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f << '\n')); - self().addWorklistItem(d1, ReturnSiteN, std::move(d3), - f.composeWith(SumEdgFnE)); + self().updateWithNewEdges(d1, n, ReturnSiteN, ReturnSiteNs, + std::move(d3), f.composeWith(SumEdgFnE)); } } } else { @@ -571,7 +574,8 @@ class IDESolverImpl : public IDESolverAPIMixin { // create initial self-loop PHASAR_LOG_LEVEL( DEBUG, "Create initial self-loop with D: " << DToString(d3)); - self().addWorklistItem(d3, SP, d3, EdgeIdentity{}); // line 15 + self().addInitialWorklistItem(d3, SP, d3, + EdgeIdentity{}); // line 15 // register the fact that has an incoming edge from // line 15.1 of Naeem/Lhotak/Rodriguez @@ -644,9 +648,10 @@ class IDESolverImpl : public IDESolverAPIMixin { self().restoreContextOnReturnedFact(n, d2, d5); // propagte the effects of the entire call PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); - self().addWorklistItem(d1, RetSiteN, - std::move(d5_restoredCtx), - f.composeWith(fPrime)); + + self().updateWithNewEdges(d1, n, RetSiteN, ReturnSiteNs, + std::move(d5_restoredCtx), + f.composeWith(fPrime)); } } } @@ -679,8 +684,9 @@ class IDESolverImpl : public IDESolverAPIMixin { auto fPrime = f.composeWith(EdgeFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " << fPrime); - self().addWorklistItem(d1, ReturnSiteN, std::move(d3), - std::move(fPrime)); + + self().updateWithNewEdges(d1, n, ReturnSiteN, ReturnSiteNs, + std::move(d3), std::move(fPrime)); } } } @@ -720,8 +726,10 @@ class IDESolverImpl : public IDESolverAPIMixin { for (const auto &Entry : Inc) { // line 22 n_t c = Entry.first; + + const auto &RetSiteCs = ICF->getReturnSitesOfCallAt(c); // for each return site - for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(c)) { + for (n_t RetSiteC : RetSiteCs) { // compute return-flow function FlowFunctionPtrType RetFunction = CachedFlowEdgeFunctions.getRetFlowFunction( @@ -773,9 +781,9 @@ class IDESolverImpl : public IDESolverAPIMixin { d_t d5_restoredCtx = self().restoreContextOnReturnedFact(c, d4, d5); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); - self().addWorklistItem(std::move(d3), RetSiteC, - std::move(d5_restoredCtx), - f3.composeWith(fPrime)); + self().updateWithNewEdges( + std::move(d3), c, RetSiteC, RetSiteCs, + std::move(d5_restoredCtx), f3.composeWith(fPrime)); } } } @@ -835,8 +843,8 @@ class IDESolverImpl : public IDESolverAPIMixin { void propagteUnbalancedReturnFlow(n_t RetSiteC, d_t TargetVal, EdgeFunction EdgeFunc, n_t /*RelatedCallSite*/) { - self().addWorklistItem(ZeroValue, std::move(RetSiteC), std::move(TargetVal), - std::move(EdgeFunc)); + self().addInitialWorklistItem(ZeroValue, std::move(RetSiteC), + std::move(TargetVal), std::move(EdgeFunc)); } EdgeFunction jumpFunction(const PathEdge Edge) { @@ -923,7 +931,8 @@ class IDESolverImpl : public IDESolverAPIMixin { if (!IDEProblem.isZeroValue(Fact)) { INC_COUNTER("Gen facts", 1, Core); } - addWorklistItem(Fact, StartPoint, Fact, EdgeIdentity{}); + self().addInitialWorklistItem(Fact, StartPoint, Fact, + EdgeIdentity{}); } } } @@ -1011,10 +1020,12 @@ class IDESolverImpl : public IDESolverAPIMixin { return RetFlowFunction->computeTargets(std::move(d2)); } - bool addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, - EdgeFunction f) { + template + void updateWithNewEdges(d_t SourceVal, n_t /*OldTarget*/, n_t NewTarget, + const TargetsT & /*AllNewTargets*/, d_t TargetVal, + EdgeFunction f) { EdgeFunction JumpFnE = [&]() { - const auto RevLookupResult = JumpFn->reverseLookup(Target, TargetVal); + const auto RevLookupResult = JumpFn->reverseLookup(NewTarget, TargetVal); if (RevLookupResult) { const auto &JumpFnContainer = RevLookupResult->get(); const auto Find = std::find_if( @@ -1040,24 +1051,36 @@ class IDESolverImpl : public IDESolverAPIMixin { << (NewFunction ? " (new jump func)" : " ")); PHASAR_LOG_LEVEL(DEBUG, ' ')); if (NewFunction) { - JumpFn->addFunction(SourceVal, Target, TargetVal, fPrime); - PathEdge Edge(SourceVal, Target, TargetVal); - WorkList.push_back(std::move(Edge)); + JumpFn->addFunction(SourceVal, NewTarget, TargetVal, fPrime); IF_LOG_ENABLED(if (!IDEProblem.isZeroValue(TargetVal)) { - PHASAR_LOG_LEVEL(DEBUG, "[addWorklistItem]: EDGE: getFunctionOf(Target)) + PHASAR_LOG_LEVEL(DEBUG, "[updateWithNewEdges]: EDGE: getFunctionOf(NewTarget)) << ", D: " << DToString(SourceVal) << '>'); - PHASAR_LOG_LEVEL(DEBUG, " ---> '); PHASAR_LOG_LEVEL(DEBUG, ' '); }); + + self().addWorklistItem(SourceVal, NewTarget, TargetVal, + std::move(fPrime)); } else { - PHASAR_LOG_LEVEL(DEBUG, "[addWorklistItem]: No new function!"); + PHASAR_LOG_LEVEL(DEBUG, "[updateWithNewEdges]: No new function!"); } + } + + void addInitialWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, + EdgeFunction EF) { + self().updateWithNewEdges(std::move(SourceVal), Target, Target, + llvm::ArrayRef(&Target, 1), + std::move(TargetVal), std::move(EF)); + } - return NewFunction; + void addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, + EdgeFunction /*EF*/) { + WorkList.emplace_back(std::move(SourceVal), std::move(Target), + std::move(TargetVal)); } std::set>::Cell> From c1d9632e5cba1f5914b66a3f2475c58dbc679270 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 20 Sep 2023 22:17:23 +0200 Subject: [PATCH 08/11] Make EagerIDESolver inherit the IDESolverImpl getting rid of a lot code duplication --- .../DataFlow/IfdsIde/Solver/EagerIDESolver.h | 1687 +---------------- .../DataFlow/IfdsIde/Solver/IDESolver.h | 12 +- .../IfdsIde/Solver/detail/IDESolverImpl.h | 44 +- 3 files changed, 99 insertions(+), 1644 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h index e9d75ca1c..dcaf55409 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h @@ -61,12 +61,16 @@ namespace psr { /// Solves the given IDETabulationProblem as described in the 1996 paper by /// Sagiv, Horwitz and Reps. To solve the problem, call solve(). Results /// can then be queried by using resultAt() and resultsAt(). +/// +/// Propagates data-flow facts onto the statement, where they were generated. template class IDESolver - : public IDESolverAPIMixin< - IDESolver> { - friend IDESolverAPIMixin< - IDESolver>; + : public IDESolverImpl< + IDESolver, + AnalysisDomainTy, Container, PropagateOntoStrategy> { + using base_t = IDESolverImpl< + IDESolver, + AnalysisDomainTy, Container, PropagateOntoStrategy>; public: using ProblemTy = IDETabulationProblem; @@ -81,196 +85,22 @@ class IDESolver using t_t = typename AnalysisDomainTy::t_t; using v_t = typename AnalysisDomainTy::v_t; - IDESolver(IDETabulationProblem &Problem, - const i_t *ICF, PropagateOntoStrategy /*Strategy*/ = {}) - : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), - SolverConfig(Problem.getIFDSIDESolverConfig()), - CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), - JumpFn(std::make_shared>()), - Seeds(Problem.initialSeeds()) { - assert(ICF != nullptr); - } - - IDESolver(const IDESolver &) = delete; - IDESolver &operator=(const IDESolver &) = delete; - IDESolver(IDESolver &&) = delete; - IDESolver &operator=(IDESolver &&) = delete; - - virtual ~IDESolver() = default; - - nlohmann::json getAsJson() { - using TableCell = typename Table::Cell; - const static std::string DataFlowID = "DataFlow"; - nlohmann::json J; - auto Results = this->ValTab.cellSet(); - if (Results.empty()) { - J[DataFlowID] = "EMPTY"; - } else { - std::vector Cells(Results.begin(), Results.end()); - sort(Cells.begin(), Cells.end(), [](TableCell Lhs, TableCell Rhs) { - return Lhs.getRowKey() < Rhs.getRowKey(); - }); - n_t Curr; - for (unsigned I = 0; I < Cells.size(); ++I) { - Curr = Cells[I].getRowKey(); - auto NStr = - llvm::StringRef(NToString(Cells[I].getRowKey())).trim().str(); - - std::string NodeStr = - ICF->getFunctionName(ICF->getFunctionOf(Curr)) + "::" + NStr; - J[DataFlowID][NodeStr]; - std::string FactStr = - llvm::StringRef(DToString(Cells[I].getColumnKey())).trim().str(); - std::string ValueStr = - llvm::StringRef(LToString(Cells[I].getValue())).trim().str(); - J[DataFlowID][NodeStr]["Facts"] += {FactStr, ValueStr}; - } - } - return J; - } - - /// Returns the L-type result for the given value at the given statement. - [[nodiscard]] l_t resultAt(n_t Stmt, d_t Value) { - return getSolverResults().resultAt(Stmt, Value); - } - - /// Returns the L-type result at the given statement for the given data-flow - /// fact while respecting LLVM's SSA semantics. - /// - /// An example: when a value is loaded and the location loaded from, here - /// variable 'i', is a data-flow fact that holds, then the loaded value '%0' - /// will usually be generated and also holds. However, due to the underlying - /// theory (and respective implementation) this load instruction causes the - /// loaded value to be generated and thus, it will be valid only AFTER the - /// load instruction, i.e., at the successor instruction. - /// - /// %0 = load i32, i32* %i, align 4 - /// - /// This result accessor function returns the results at the successor - /// instruction(s) reflecting that the expression on the left-hand side holds - /// if the expression on the right-hand side holds. - template - [[nodiscard]] typename std::enable_if_t< - std::is_same_v, llvm::Instruction *>, l_t> - resultAtInLLVMSSA(NTy Stmt, d_t Value) { - return getSolverResults().resultAtInLLVMSSA(Stmt, Value); - } - - /// Returns the resulting environment for the given statement. - /// The artificial zero value can be automatically stripped. - /// TOP values are never returned. - [[nodiscard]] virtual std::unordered_map - resultsAt(n_t Stmt, bool StripZero = false) /*TODO const*/ { - return getSolverResults().resultsAt(Stmt, StripZero); - } - - /// Returns the data-flow results at the given statement while respecting - /// LLVM's SSA semantics. - /// - /// An example: when a value is loaded and the location loaded from, here - /// variable 'i', is a data-flow fact that holds, then the loaded value '%0' - /// will usually be generated and also holds. However, due to the underlying - /// theory (and respective implementation) this load instruction causes the - /// loaded value to be generated and thus, it will be valid only AFTER the - /// load instruction, i.e., at the successor instruction. - /// - /// %0 = load i32, i32* %i, align 4 - /// - /// This result accessor function returns the results at the successor - /// instruction(s) reflecting that the expression on the left-hand side holds - /// if the expression on the right-hand side holds. - template - [[nodiscard]] typename std::enable_if_t< - std::is_same_v, llvm::Instruction *>, - std::unordered_map> - resultsAtInLLVMSSA(NTy Stmt, bool StripZero = false) { - return getSolverResults().resultsAtInLLVMSSA(Stmt, StripZero); - } - - virtual void emitTextReport(llvm::raw_ostream &OS = llvm::outs()) { - IDEProblem.emitTextReport(getSolverResults(), OS); - } - - virtual void emitGraphicalReport(llvm::raw_ostream &OS = llvm::outs()) { - IDEProblem.emitGraphicalReport(getSolverResults(), OS); - } - - void dumpResults(llvm::raw_ostream &OS = llvm::outs()) { - getSolverResults().dumpResults(*ICF, OS); - } + explicit IDESolver(IDETabulationProblem &Problem, + const i_t *ICF, PropagateOntoStrategy Strategy = {}) + : base_t(Problem, ICF, Strategy) {} - void dumpAllInterPathEdges() { - llvm::outs() << "COMPUTED INTER PATH EDGES" << '\n'; - auto Interpe = this->computedInterPathEdges.cellSet(); - for (const auto &Cell : Interpe) { - llvm::outs() << "FROM" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); - llvm::outs() << "TO" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); - llvm::outs() << "FACTS" << '\n'; - for (const auto &Fact : Cell.getValue()) { - llvm::outs() << "fact" << '\n'; - IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); - llvm::outs() << "produces" << '\n'; - for (const auto &Out : Fact.second) { - IDEProblem.printDataFlowFact(llvm::outs(), Out); - } - } - } - } - - void dumpAllIntraPathEdges() { - llvm::outs() << "COMPUTED INTRA PATH EDGES" << '\n'; - auto Intrape = this->computedIntraPathEdges.cellSet(); - for (auto &Cell : Intrape) { - llvm::outs() << "FROM" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); - llvm::outs() << "TO" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); - llvm::outs() << "FACTS" << '\n'; - for (auto &Fact : Cell.getValue()) { - llvm::outs() << "fact" << '\n'; - IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); - llvm::outs() << "produces" << '\n'; - for (auto &Out : Fact.second) { - IDEProblem.printDataFlowFact(llvm::outs(), Out); - } - } - } - } - - /// Returns a view into the computed solver-results. - /// - /// NOTE: The SolverResults store a reference into this IDESolver, so its - /// lifetime is also bound to the lifetime of this solver. If you want to use - /// the solverResults beyond the lifetime of this solver, use - /// comsumeSolverResults() instead. - [[nodiscard]] SolverResults getSolverResults() noexcept { - return SolverResults(this->ValTab, ZeroValue); - } - - /// Moves the computed solver-results out of this solver such that the solver - /// can be destroyed without that the analysis results are lost. - /// Do not call any function (including getSolverResults()) on this IDESolver - /// instance after that. - [[nodiscard]] OwningSolverResults - consumeSolverResults() noexcept(std::is_nothrow_move_constructible_v) { - return OwningSolverResults(std::move(this->ValTab), - std::move(ZeroValue)); - } +private: + friend base_t; + friend IDESolverAPIMixin< + IDESolver>; -protected: - void addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, - EdgeFunction EF) { - WorkList.emplace_back( - PathEdge{std::move(SourceVal), std::move(Target), std::move(TargetVal)}, - std::move(EF)); - } + /// -- Phase I customization bool updateJumpFunction(d_t SourceVal, n_t Target, d_t TargetVal, EdgeFunction *f) { EdgeFunction JumpFnE = [&]() { - const auto RevLookupResult = JumpFn->reverseLookup(Target, TargetVal); + const auto RevLookupResult = + this->JumpFn->reverseLookup(Target, TargetVal); if (RevLookupResult) { const auto &JumpFnContainer = RevLookupResult->get(); const auto Find = std::find_if( @@ -282,7 +112,7 @@ class IDESolver } // jump function is initialized to all-top if no entry // was found - return AllTop; + return this->AllTop; }(); EdgeFunction fPrime = JumpFnE.joinWith(*f); @@ -291,19 +121,19 @@ class IDESolver IF_LOG_ENABLED( PHASAR_LOG_LEVEL( DEBUG, "Join: " << JumpFnE << " & " << *f - << (JumpFnE == *f ? " (EF's are equal)" : " ")); + << (JumpFnE == *f ? " (EF's are equal)" : "")); PHASAR_LOG_LEVEL( - DEBUG, " = " << f << (NewFunction ? " (new jump func)" : " ")); + DEBUG, " = " << f << (NewFunction ? " (new jump func)" : "")); PHASAR_LOG_LEVEL(DEBUG, ' ')); *f = fPrime; - JumpFn->addFunction(std::move(SourceVal), std::move(Target), - std::move(TargetVal), std::move(fPrime)); + this->JumpFn->addFunction(std::move(SourceVal), std::move(Target), + std::move(TargetVal), std::move(fPrime)); - IF_LOG_ENABLED(if (!IDEProblem.isZeroValue(TargetVal)) { - PHASAR_LOG_LEVEL(DEBUG, "EDGE: getFunctionOf(Target)) - << ", D: " << DToString(SourceVal) << '>'); + IF_LOG_ENABLED(if (!this->IDEProblem.isZeroValue(TargetVal)) { + PHASAR_LOG_LEVEL( + DEBUG, "EDGE: ICF->getFunctionOf(Target)) + << ", D: " << DToString(SourceVal) << '>'); PHASAR_LOG_LEVEL(DEBUG, " ---> '); @@ -314,7 +144,7 @@ class IDESolver } template - void updateWithNewEdges(d_t SourceVal, n_t OldTarget, + void updateWithNewEdges(d_t SourceVal, n_t OldTarget, n_t /*NewTarget*/, const TargetsT &NewTargets, d_t TargetVal, EdgeFunction EF) { if (updateJumpFunction(SourceVal, OldTarget, TargetVal, &EF)) { @@ -336,261 +166,38 @@ class IDESolver } } - /// Lines 13-20 of the algorithm; processing a call site in the caller's - /// context. - /// - /// For each possible callee, registers incoming call edges. - /// Also propagates call-to-return flows and summarized callee flows within - /// the caller. - /// - /// The following cases must be considered and handled: - /// 1. Process as usual and just process the call - /// 2. Create a new summary for that function (which shall be done - /// by the problem) - /// 3. Just use an existing summary provided by the problem - /// 4. If a special function is called, use a special summary - /// function - /// - /// @param edge an edge whose target node resembles a method call - /// - virtual void processCall(PathEdge Edge, EdgeFunction f) { - PAMM_GET_INSTANCE; - INC_COUNTER("Process Call", 1, Full); - PHASAR_LOG_LEVEL(DEBUG, - "Process call at target: " << NToString(Edge.getTarget())); - d_t d1 = Edge.factAtSource(); - n_t n = Edge.getTarget(); - // a call node; line 14... - d_t d2 = Edge.factAtTarget(); - const auto &ReturnSiteNs = ICF->getReturnSitesOfCallAt(n); - const auto &Callees = ICF->getCalleesOfCallAt(n); - - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL(DEBUG, "Possible callees:"); for (auto Callee - : Callees) { - PHASAR_LOG_LEVEL(DEBUG, " " << Callee->getName()); - } PHASAR_LOG_LEVEL(DEBUG, "Possible return sites:"); - for (auto ret - : ReturnSiteNs) { - PHASAR_LOG_LEVEL(DEBUG, " " << NToString(ret)); - }); - - // for each possible callee - for (f_t SCalledProcN : Callees) { // still line 14 - // check if a special summary for the called procedure exists - FlowFunctionPtrType SpecialSum = - CachedFlowEdgeFunctions.getSummaryFlowFunction(n, SCalledProcN); - // if a special summary is available, treat this as a normal flow - // and use the summary flow and edge functions - if (SpecialSum) { - PHASAR_LOG_LEVEL(DEBUG, "Found and process special summary"); - for (n_t ReturnSiteN : ReturnSiteNs) { - container_type Res = computeSummaryFlowFunction(SpecialSum, d1, d2); - INC_COUNTER("SpecialSummary-FF Application", 1, Full); - ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); - saveEdges(n, ReturnSiteN, d2, Res, false); - for (d_t d3 : Res) { - EdgeFunction SumEdgFnE = - CachedFlowEdgeFunctions.getSummaryEdgeFunction(n, d2, - ReturnSiteN, d3); - INC_COUNTER("SpecialSummary-EF Queries", 1, Full); - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL( - DEBUG, "Queried Summary Edge Function: " << SumEdgFnE); - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f - << '\n')); - - updateWithNewEdges(d1, n, ReturnSiteNs, std::move(d3), - f.composeWith(SumEdgFnE)); - } - } - } else { - // compute the call-flow function - FlowFunctionPtrType Function = - CachedFlowEdgeFunctions.getCallFlowFunction(n, SCalledProcN); - INC_COUNTER("FF Queries", 1, Full); - container_type Res = computeCallFlowFunction(Function, d1, d2); - ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); - // for each callee's start point(s) - auto StartPointsOf = ICF->getStartPointsOf(SCalledProcN); - if (StartPointsOf.empty()) { - PHASAR_LOG_LEVEL(DEBUG, "Start points of '" + - ICF->getFunctionName(SCalledProcN) + - "' currently not available!"); - } - // if startPointsOf is empty, the called function is a declaration - for (n_t SP : StartPointsOf) { - saveEdges(n, SP, d2, Res, true); - // for each result node of the call-flow function - for (d_t d3 : Res) { - using TableCell = typename Table>::Cell; - - const auto &SummaryEntries = endSummary(SP, d3); - - // register the fact that has an incoming edge from - // line 15.1 of Naeem/Lhotak/Rodriguez - addIncoming(SP, d3, n, d2); - - if (SummaryEntries.empty()) { - // create initial self-loop - PHASAR_LOG_LEVEL( - DEBUG, "Create initial self-loop with D: " << DToString(d3)); - addWorklistItem(d3, SP, d3, EdgeIdentity{}); // line 15 - continue; - } - // line 15.2, copy to avoid concurrent modification exceptions by - // other threads - // const std::set endSumm(endSummary(sP, d3)); - // llvm::outs() << "ENDSUMM" << '\n'; - // llvm::outs() << "Size: " << endSumm.size() << '\n'; - // llvm::outs() << "sP: " << NToString(sP) - // << "\nd3: " << DToString(d3) - // << '\n'; - // printEndSummaryTab(); - // still line 15.2 of Naeem/Lhotak/Rodriguez - // for each already-queried exit value reachable from - // , create new caller-side jump functions to the return - // sites because we have observed a potentially new incoming - // edge into - for (const TableCell &Entry : SummaryEntries) { - const n_t &eP = Entry.getRowKey(); - const d_t &d4 = Entry.getColumnKey(); - const EdgeFunction &fCalleeSummary = Entry.getValue(); - // for each return site - for (n_t RetSiteN : ReturnSiteNs) { - // compute return-flow function - FlowFunctionPtrType RetFunction = - CachedFlowEdgeFunctions.getRetFlowFunction(n, SCalledProcN, - eP, RetSiteN); - INC_COUNTER("FF Queries", 1, Full); - const container_type ReturnedFacts = computeReturnFlowFunction( - RetFunction, d3, d4, n, Container{d2}); - ADD_TO_HISTOGRAM("Data-flow facts", ReturnedFacts.size(), 1, - Full); - saveEdges(eP, RetSiteN, d4, ReturnedFacts, true); - // for each target value of the function - for (d_t d5 : ReturnedFacts) { - // update the caller-side summary function - // get call edge function - EdgeFunction f4 = - CachedFlowEdgeFunctions.getCallEdgeFunction( - n, d2, SCalledProcN, d3); - PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << f4); - // get return edge function - EdgeFunction f5 = - CachedFlowEdgeFunctions.getReturnEdgeFunction( - n, SCalledProcN, eP, d4, RetSiteN, d5); - PHASAR_LOG_LEVEL(DEBUG, - "Queried Return Edge Function: " << f5); - if (SolverConfig.emitESG()) { - for (auto SP : ICF->getStartPointsOf(SCalledProcN)) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, SP, d3)] - .push_back(f4); - } - IntermediateEdgeFunctions[std::make_tuple(eP, d4, RetSiteN, - d5)] - .push_back(f5); - } - INC_COUNTER("EF Queries", 2, Full); - // compose call * calleeSummary * return edge functions - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " - << fCalleeSummary << " * " - << f4); - PHASAR_LOG_LEVEL(DEBUG, - " (return * calleeSummary * call)"); - EdgeFunction fPrime = - f4.composeWith(fCalleeSummary).composeWith(f5); - PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); - d_t d5_restoredCtx = restoreContextOnReturnedFact(n, d2, d5); - // propagte the effects of the entire call - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); - updateWithNewEdges(d1, n, ReturnSiteNs, - std::move(d5_restoredCtx), - f.composeWith(fPrime)); - } - } - } - } - } - } - } - // line 17-19 of Naeem/Lhotak/Rodriguez - // process intra-procedural flows along call-to-return flow functions - for (n_t ReturnSiteN : ReturnSiteNs) { - FlowFunctionPtrType CallToReturnFF = - CachedFlowEdgeFunctions.getCallToRetFlowFunction(n, ReturnSiteN, - Callees); - INC_COUNTER("FF Queries", 1, Full); - container_type ReturnFacts = - computeCallToReturnFlowFunction(CallToReturnFF, d1, d2); - ADD_TO_HISTOGRAM("Data-flow facts", ReturnFacts.size(), 1, Full); - saveEdges(n, ReturnSiteN, d2, ReturnFacts, false); - for (d_t d3 : ReturnFacts) { - EdgeFunction EdgeFnE = - CachedFlowEdgeFunctions.getCallToRetEdgeFunction(n, d2, ReturnSiteN, - d3, Callees); - PHASAR_LOG_LEVEL(DEBUG, - "Queried Call-to-Return Edge Function: " << EdgeFnE); - if (SolverConfig.emitESG()) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, ReturnSiteN, d3)] - .push_back(EdgeFnE); - } - INC_COUNTER("EF Queries", 1, Full); - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f); - auto fPrime = f.composeWith(EdgeFnE); - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " - << fPrime); - updateWithNewEdges(d1, n, ReturnSiteNs, std::move(d3), - std::move(fPrime)); - } - } + void addInitialWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, + EdgeFunction EF) { + addWorklistItem(std::move(SourceVal), std::move(Target), + std::move(TargetVal), std::move(EF)); } - /// Lines 33-37 of the algorithm. - /// Simply propagate normal, intra-procedural flows. - /// @param edge - /// - virtual void processNormalFlow(PathEdge Edge, EdgeFunction f) { - PAMM_GET_INSTANCE; - INC_COUNTER("Process Normal", 1, Full); - PHASAR_LOG_LEVEL( - DEBUG, "Process normal at target: " << NToString(Edge.getTarget())); - auto [d1, n, d2] = Edge.consume(); + void addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, + EdgeFunction EF) { + WorkList.emplace_back( + PathEdge{std::move(SourceVal), std::move(Target), std::move(TargetVal)}, + std::move(EF)); + } - const auto &Succs = ICF->getSuccsOf(n); + bool doNext() { + assert(!WorkList.empty()); + auto [Edge, EF] = std::move(WorkList.back()); + WorkList.pop_back(); - for (const auto &nPrime : Succs) { - FlowFunctionPtrType FlowFunc = - CachedFlowEdgeFunctions.getNormalFlowFunction(n, nPrime); - INC_COUNTER("FF Queries", 1, Full); - const container_type Res = computeNormalFlowFunction(FlowFunc, d1, d2); - ADD_TO_HISTOGRAM("Data-flow facts", Res.size(), 1, Full); - saveEdges(n, nPrime, d2, Res, false); - for (d_t d3 : Res) { - EdgeFunction g = - CachedFlowEdgeFunctions.getNormalEdgeFunction(n, d2, nPrime, d3); - PHASAR_LOG_LEVEL(DEBUG, "Queried Normal Edge Function: " << g); - EdgeFunction fPrime = f.composeWith(g); - if (SolverConfig.emitESG()) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, nPrime, d3)] - .push_back(g); - } - PHASAR_LOG_LEVEL(DEBUG, - "Compose: " << g << " * " << f << " = " << fPrime); - INC_COUNTER("EF Queries", 1, Full); + this->propagate(std::move(Edge), std::move(EF)); - updateWithNewEdges(d1, n, Succs, std::move(d3), std::move(fPrime)); - } - } + return !WorkList.empty(); } + /// -- Phase II customization + void propagateValueAtStart(const std::pair NAndD, n_t Stmt) { PAMM_GET_INSTANCE; d_t Fact = NAndD.second; - f_t Func = ICF->getFunctionOf(Stmt); - for (const n_t CS : ICF->getCallsFromWithin(Func)) { - for (const auto &BeforeCS : ICF->getPredsOf(CS)) { - auto LookupResults = JumpFn->forwardLookup(Fact, BeforeCS); + f_t Func = this->ICF->getFunctionOf(Stmt); + for (const n_t &CS : this->ICF->getCallsFromWithin(Func)) { + for (const auto &BeforeCS : this->ICF->getPredsOf(CS)) { + auto LookupResults = this->JumpFn->forwardLookup(Fact, BeforeCS); if (!LookupResults) { continue; } @@ -601,1211 +208,39 @@ class IDESolver n_t SP = Stmt; l_t Val = seedVal(SP, Fact); INC_COUNTER("Value Propagation", 1, Full); - propagateSeedValue(CS, dPrime, fPrime.computeTarget(Val)); - } - } - } - } - - void propagateValueAtCall(const std::pair NAndD, n_t Stmt) { - PAMM_GET_INSTANCE; - d_t Fact = NAndD.second; - for (const f_t Callee : ICF->getCalleesOfCallAt(Stmt)) { - FlowFunctionPtrType CallFlowFunction = - CachedFlowEdgeFunctions.getCallFlowFunction(Stmt, Callee); - INC_COUNTER("FF Queries", 1, Full); - for (const d_t dPrime : CallFlowFunction->computeTargets(Fact)) { - EdgeFunction EdgeFn = CachedFlowEdgeFunctions.getCallEdgeFunction( - Stmt, Fact, Callee, dPrime); - PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << EdgeFn); - if (SolverConfig.emitESG()) { - for (const auto SP : ICF->getStartPointsOf(Callee)) { - IntermediateEdgeFunctions[std::make_tuple(Stmt, Fact, SP, dPrime)] - .push_back(EdgeFn); - } + this->propagateValue(CS, dPrime, fPrime.computeTarget(Val)); } - INC_COUNTER("EF Queries", 1, Full); - for (const n_t StartPoint : ICF->getStartPointsOf(Callee)) { - INC_COUNTER("Value Propagation", 1, Full); - propagateSeedValue(StartPoint, dPrime, - EdgeFn.computeTarget(seedVal(Stmt, Fact))); - } - } - } - } - - void propagateSeedValue(n_t NHashN, d_t NHashD, const l_t &L) { - l_t ValNHash = seedVal(NHashN, NHashD); - l_t LPrime = joinValueAt(NHashN, NHashD, ValNHash, L); - if (!(LPrime == ValNHash)) { - SeedValues.insert(NHashN, NHashD, std::move(LPrime)); - ValuePropWL.emplace_back(std::move(NHashN), std::move(NHashD)); - } - } - - l_t val(n_t NHashN, d_t NHashD) { - if (ValTab.contains(NHashN, NHashD)) { - return ValTab.get(NHashN, NHashD); - } - // implicitly initialized to top; see line [1] of Fig. 7 in SRH96 paper - return IDEProblem.topElement(); - } - - l_t seedVal(n_t NHashN, d_t NHashD) { - if (SeedValues.contains(NHashN, NHashD)) { - return SeedValues.get(NHashN, NHashD); - } - return IDEProblem.topElement(); - } - - void setVal(n_t NHashN, d_t NHashD, l_t L) { - IF_LOG_ENABLED({ - PHASAR_LOG_LEVEL(DEBUG, - "Function : " << ICF->getFunctionOf(NHashN)->getName()); - PHASAR_LOG_LEVEL(DEBUG, "Inst. : " << NToString(NHashN)); - PHASAR_LOG_LEVEL(DEBUG, "Fact : " << DToString(NHashD)); - PHASAR_LOG_LEVEL(DEBUG, "Value : " << LToString(L)); - PHASAR_LOG_LEVEL(DEBUG, ' '); - }); - // TOP is the implicit default value which we do not need to store. - // if (l == IDEProblem.topElement()) { - // do not store top values - // ValTab.remove(nHashN, nHashD); - // } else { - ValTab.insert(NHashN, NHashD, std::move(L)); - // } - } - - EdgeFunction jumpFunction(const PathEdge Edge) { - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL(DEBUG, "JumpFunctions Forward-Lookup:"); - PHASAR_LOG_LEVEL(DEBUG, - " Source D: " << DToString(Edge.factAtSource())); - PHASAR_LOG_LEVEL(DEBUG, " Target N: " << NToString(Edge.getTarget())); - PHASAR_LOG_LEVEL(DEBUG, - " Target D: " << DToString(Edge.factAtTarget()))); - - auto FwdLookupRes = - JumpFn->forwardLookup(Edge.factAtSource(), Edge.getTarget()); - if (FwdLookupRes) { - auto &Ref = FwdLookupRes->get(); - if (auto Find = std::find_if(Ref.begin(), Ref.end(), - [Edge](const auto &Pair) { - return Edge.factAtTarget() == Pair.first; - }); - Find != Ref.end()) { - PHASAR_LOG_LEVEL(DEBUG, " => EdgeFn: " << Find->second); - return Find->second; } } - PHASAR_LOG_LEVEL(DEBUG, " => EdgeFn: " << AllTop); - // JumpFn initialized to all-top, see line [2] in SRH96 paper - return AllTop; } - void addEndSummary(n_t SP, d_t d1, n_t eP, d_t d2, EdgeFunction f) { - // note: at this point we don't need to join with a potential previous f - // because f is a jump function, which is already properly joined - // within propagate(..) - EndsummaryTab.get(SP, d1).insert(eP, d2, std::move(f)); - } - - // should be made a callable at some point - void pathEdgeProcessingTask(PathEdge Edge, EdgeFunction EF) { - PAMM_GET_INSTANCE; - INC_COUNTER("JumpFn Construction", 1, Full); - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL( - DEBUG, - "-------------------------------------------- " - << PathEdgeCount - << ". Path Edge --------------------------------------------"); - PHASAR_LOG_LEVEL(DEBUG, ' '); - PHASAR_LOG_LEVEL(DEBUG, "Process " << PathEdgeCount << ". path edge:"); - PHASAR_LOG_LEVEL(DEBUG, "< D source: " << DToString(Edge.factAtSource()) - << " ;"); - PHASAR_LOG_LEVEL(DEBUG, - " N target: " << NToString(Edge.getTarget()) << " ;"); - PHASAR_LOG_LEVEL(DEBUG, - " D target: " << DToString(Edge.factAtTarget())); - PHASAR_LOG_LEVEL(DEBUG, " J jump-function: " << EF << " >") - PHASAR_LOG_LEVEL(DEBUG, ' ')); - - if (!ICF->isCallSite(Edge.getTarget())) { - if (ICF->isExitInst(Edge.getTarget())) { - processExit(Edge, EF); - } - if (!ICF->getSuccsOf(Edge.getTarget()).empty()) { - processNormalFlow(std::move(Edge), std::move(EF)); - } - } else { - processCall(std::move(Edge), std::move(EF)); - } - } - - // should be made a callable at some point - void valuePropagationTask(std::pair NAndD) { - n_t n = NAndD.first; - // our initial seeds are not necessarily method-start points but here they - // should be treated as such the same also for unbalanced return sites in - // an unbalanced problem - if (ICF->isStartPoint(n) || Seeds.containsInitialSeedsFor(n) || - UnbalancedRetSites.count(n)) { - // FIXME: is currently not executed for main!!! - // initial seeds are set in the global constructor, and main is also not - // officially called by any other function - propagateValueAtStart(NAndD, n); - } - if (ICF->isCallSite(n)) { - propagateValueAtCall(NAndD, n); - } - } - - // should be made a callable at some point - void valueComputationTask(const std::vector &Values) { - PAMM_GET_INSTANCE; - for (n_t n : Values) { - for (n_t SP : ICF->getStartPointsOf(ICF->getFunctionOf(n))) { - using TableCell = typename Table>::Cell; - Table> &LookupByTarget = - JumpFn->lookupByTarget(n); - for (const TableCell &SourceValTargetValAndFunction : - LookupByTarget.cellSet()) { - d_t dPrime = SourceValTargetValAndFunction.getRowKey(); - d_t d = SourceValTargetValAndFunction.getColumnKey(); - EdgeFunction fPrime = SourceValTargetValAndFunction.getValue(); - l_t TargetVal = seedVal(SP, dPrime); - setVal(n, d, - IDEProblem.join(val(n, d), - fPrime.computeTarget(std::move(TargetVal)))); - INC_COUNTER("Value Computation", 1, Full); - } - } - } - } - - virtual void saveEdges(n_t SourceNode, n_t SinkStmt, d_t SourceVal, - const container_type &DestVals, bool InterP) { - if (!SolverConfig.recordEdges()) { - return; - } - Table> &TgtMap = - (InterP) ? ComputedInterPathEdges : ComputedIntraPathEdges; - TgtMap.get(SourceNode, SinkStmt)[SourceVal].insert(DestVals.begin(), - DestVals.end()); - } - - void submitInitialValues() { - std::map> AllSeeds = Seeds.getSeeds(); - for (n_t UnbalancedRetSite : UnbalancedRetSites) { - if (AllSeeds.find(UnbalancedRetSite) == AllSeeds.end()) { - AllSeeds[UnbalancedRetSite][ZeroValue] = IDEProblem.topElement(); - } - } - // do processing - for (const auto &[StartPoint, Facts] : AllSeeds) { - for (auto &[Fact, Value] : Facts) { - PHASAR_LOG_LEVEL(DEBUG, "set initial seed at: " - << NToString(StartPoint) - << ", fact: " << DToString(Fact) - << ", value: " << LToString(Value)); - // initialize the initial seeds with the top element as we have no - // information at the beginning of the value computation problem - SeedValues.insert(StartPoint, Fact, Value); - std::pair SuperGraphNode(StartPoint, Fact); - valuePropagationTask(std::move(SuperGraphNode)); - } - } - } - - std::vector allNodes() const { + std::vector getAllValueComputationNodes() const { std::vector Ret; // TODO: Reserve - for (const auto &Fun : ICF->getAllFunctions()) { - for (const auto &Inst : ICF->getAllInstructionsOf(Fun)) { + for (const auto &Fun : this->ICF->getAllFunctions()) { + for (const auto &Inst : this->ICF->getAllInstructionsOf(Fun)) { Ret.push_back(Inst); } } return Ret; } - /// Computes the final values for edge functions. - void computeValues() { - PHASAR_LOG_LEVEL(DEBUG, "Start computing values"); - // Phase II(i) - submitInitialValues(); - while (!ValuePropWL.empty()) { - auto NAndD = std::move(ValuePropWL.back()); - ValuePropWL.pop_back(); - valuePropagationTask(std::move(NAndD)); - } - - // Phase II(ii) - // we create an array of all nodes and then dispatch fractions of this - // array to multiple threads - const auto AllNodes = allNodes(); - valueComputationTask(AllNodes); - } - - /// Schedules the processing of initial seeds, initiating the analysis. - /// Clients should only call this methods if performing synchronization on - /// their own. Normally, solve() should be called instead. - void submitInitialSeeds() { - PAMM_GET_INSTANCE; - // Check if the initial seeds contain the zero value at every starting - // point. If not, the zero value needs to be added to allow for correct - // solving of the problem. - for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { - if (Facts.find(ZeroValue) == Facts.end()) { - // Add zero value if it's not in the set of facts. - PHASAR_LOG_LEVEL( - DEBUG, "Zero-Value has been added automatically to start point: " - << NToString(StartPoint)); - Seeds.addSeed(StartPoint, ZeroValue, IDEProblem.bottomElement()); - } - } - PHASAR_LOG_LEVEL(DEBUG, - "Number of initial seeds: " << Seeds.countInitialSeeds()); - PHASAR_LOG_LEVEL(DEBUG, "List of initial seeds: "); - for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { - PHASAR_LOG_LEVEL(DEBUG, "Start point: " << NToString(StartPoint)); - /// If statically disabling the logger, Fact and Value are unused. To - /// prevent the copilation to fail with -Werror, add the [[maybe_unused]] - /// attribute - for ([[maybe_unused]] const auto &[Fact, Value] : Facts) { - PHASAR_LOG_LEVEL(DEBUG, "\tFact: " << DToString(Fact)); - PHASAR_LOG_LEVEL(DEBUG, "\tValue: " << LToString(Value)); - } - } - for (const auto &[StartPoint, Facts] : Seeds.getSeeds()) { - for (const auto &[Fact, Value] : Facts) { - PHASAR_LOG_LEVEL(DEBUG, "Submit seed at: " << NToString(StartPoint)); - PHASAR_LOG_LEVEL(DEBUG, "\tFact: " << DToString(Fact)); - PHASAR_LOG_LEVEL(DEBUG, "\tValue: " << LToString(Value)); - if (!IDEProblem.isZeroValue(Fact)) { - INC_COUNTER("Gen facts", 1, Core); - } - addWorklistItem(Fact, StartPoint, Fact, EdgeIdentity{}); - } - } - } - - /// Lines 21-32 of the algorithm. - /// - /// Stores callee-side summaries. - /// Also, at the side of the caller, propagates intra-procedural flows to - /// return sites using those newly computed summaries. - /// - /// @param edge an edge whose target node resembles a method exit - /// - virtual void processExit(PathEdge Edge, EdgeFunction f) { - PAMM_GET_INSTANCE; - INC_COUNTER("Process Exit", 1, Full); - PHASAR_LOG_LEVEL(DEBUG, - "Process exit at target: " << NToString(Edge.getTarget())); - n_t n = Edge.getTarget(); // an exit node; line 21... - f_t FunctionThatNeedsSummary = ICF->getFunctionOf(n); - d_t d1 = Edge.factAtSource(); - d_t d2 = Edge.factAtTarget(); - // for each of the method's start points, determine incoming calls - const auto StartPointsOf = ICF->getStartPointsOf(FunctionThatNeedsSummary); - std::map Inc; - for (n_t SP : StartPointsOf) { - // line 21.1 of Naeem/Lhotak/Rodriguez - // register end-summary - addEndSummary(SP, d1, n, d2, f); - for (const auto &Entry : incoming(d1, SP)) { - Inc[Entry.first] = Container{Entry.second}; - } - } - printEndSummaryTab(); - printIncomingTab(); - // for each incoming call edge already processed - //(see processCall(..)) - for (const auto &Entry : Inc) { - // line 22 - n_t c = Entry.first; - - const auto &RetSiteCs = ICF->getReturnSitesOfCallAt(c); - // for each return site - for (n_t RetSiteC : RetSiteCs) { - // compute return-flow function - FlowFunctionPtrType RetFunction = - CachedFlowEdgeFunctions.getRetFlowFunction( - c, FunctionThatNeedsSummary, n, RetSiteC); - INC_COUNTER("FF Queries", 1, Full); - // for each incoming-call value - for (d_t d4 : Entry.second) { - const container_type Targets = - computeReturnFlowFunction(RetFunction, d1, d2, c, Entry.second); - ADD_TO_HISTOGRAM("Data-flow facts", Targets.size(), 1, Full); - saveEdges(n, RetSiteC, d2, Targets, true); - // for each target value at the return site - // line 23 - for (d_t d5 : Targets) { - // compute composed function - // get call edge function - EdgeFunction f4 = CachedFlowEdgeFunctions.getCallEdgeFunction( - c, d4, ICF->getFunctionOf(n), d1); - PHASAR_LOG_LEVEL(DEBUG, "Queried Call Edge Function: " << f4); - // get return edge function - EdgeFunction f5 = - CachedFlowEdgeFunctions.getReturnEdgeFunction( - c, ICF->getFunctionOf(n), n, d2, RetSiteC, d5); - PHASAR_LOG_LEVEL(DEBUG, "Queried Return Edge Function: " << f5); - if (SolverConfig.emitESG()) { - for (auto SP : ICF->getStartPointsOf(ICF->getFunctionOf(n))) { - IntermediateEdgeFunctions[std::make_tuple(c, d4, SP, d1)] - .push_back(f4); - } - IntermediateEdgeFunctions[std::make_tuple(n, d2, RetSiteC, d5)] - .push_back(f5); - } - INC_COUNTER("EF Queries", 2, Full); - // compose call function * function * return function - PHASAR_LOG_LEVEL(DEBUG, - "Compose: " << f5 << " * " << f << " * " << f4); - PHASAR_LOG_LEVEL(DEBUG, " (return * function * call)"); - EdgeFunction fPrime = f4.composeWith(f).composeWith(f5); - PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); - // for each jump function coming into the call, propagate to - // return site using the composed function - auto RevLookupResult = JumpFn->reverseLookup(c, d4); - if (RevLookupResult) { - for (size_t I = 0; I < RevLookupResult->get().size(); ++I) { - auto ValAndFunc = RevLookupResult->get()[I]; - EdgeFunction f3 = ValAndFunc.second; - if (f3 != AllTop) { - d_t d3 = ValAndFunc.first; - d_t d5_restoredCtx = restoreContextOnReturnedFact(c, d4, d5); - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); - updateWithNewEdges(std::move(d3), c, RetSiteCs, - std::move(d5_restoredCtx), - f3.composeWith(fPrime)); - } - } - } - } - } - } - } - // handling for unbalanced problems where we return out of a method with a - // fact for which we have no incoming flow. - // note: we propagate that way only values that originate from ZERO, as - // conditionally generated values should only - // be propagated into callers that have an incoming edge for this - // condition - /// TODO: Add a check for "d1 is seed in functionOf(n)" - if (SolverConfig.followReturnsPastSeeds() && Inc.empty() /*&& - IDEProblem.isZeroValue(d1)*/) { - const auto &Callers = ICF->getCallersOf(FunctionThatNeedsSummary); - for (n_t Caller : Callers) { - for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(Caller)) { - FlowFunctionPtrType RetFunction = - CachedFlowEdgeFunctions.getRetFlowFunction( - Caller, FunctionThatNeedsSummary, n, RetSiteC); - INC_COUNTER("FF Queries", 1, Full); - const container_type Targets = computeReturnFlowFunction( - RetFunction, d1, d2, Caller, Container{ZeroValue}); - ADD_TO_HISTOGRAM("Data-flow facts", Targets.size(), 1, Full); - saveEdges(n, RetSiteC, d2, Targets, true); - for (d_t d5 : Targets) { - EdgeFunction f5 = - CachedFlowEdgeFunctions.getReturnEdgeFunction( - Caller, ICF->getFunctionOf(n), n, d2, RetSiteC, d5); - PHASAR_LOG_LEVEL(DEBUG, "Queried Return Edge Function: " << f5); - if (SolverConfig.emitESG()) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, RetSiteC, d5)] - .push_back(f5); - } - INC_COUNTER("EF Queries", 1, Full); - PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " << f); - propagteUnbalancedReturnFlow(RetSiteC, d5, f.composeWith(f5), - Caller); - // register for value processing (2nd IDE phase) - UnbalancedRetSites.insert(RetSiteC); - } - } - } - // in cases where there are no callers, the return statement would - // normally not be processed at all; this might be undesirable if - // the flow function has a side effect such as registering a taint; - // instead we thus call the return flow function will a null caller - if (Callers.empty()) { - IDEProblem.applyUnbalancedRetFlowFunctionSideEffects( - FunctionThatNeedsSummary, n, d2); - } - } - } - - void propagteUnbalancedReturnFlow(n_t RetSiteC, d_t TargetVal, - EdgeFunction EdgeFunc, - n_t /*RelatedCallSite*/) { - addWorklistItem(ZeroValue, std::move(RetSiteC), std::move(TargetVal), - std::move(EdgeFunc)); - } - - /// This method will be called for each incoming edge and can be used to - /// transfer knowledge from the calling edge to the returning edge, without - /// affecting the summary edges at the callee. - /// @param callSite - /// - /// @param d4 - /// Fact stored with the incoming edge, i.e., present at the - /// caller side - /// @param d5 - /// Fact that originally should be propagated to the caller. - /// @return Fact that will be propagated to the caller. - /// - d_t restoreContextOnReturnedFact(n_t /*CallSite*/, d_t /*d4*/, d_t d5) { - // TODO support LinkedNode and JoinHandlingNode - // if (d5 instanceof LinkedNode) { - // ((LinkedNode) d5).setCallingContext(d4); - // } - // if(d5 instanceof JoinHandlingNode) { - // ((JoinHandlingNode) - // d5).setCallingContext(d4); - // } - return d5; - } - - /// Computes the normal flow function for the given set of start and end - /// abstractions- - /// @param flowFunction The normal flow function to compute - /// @param d1 The abstraction at the method's start node - /// @param d2 The abstraction at the current node - /// @return The set of abstractions at the successor node - /// - container_type computeNormalFlowFunction(const FlowFunctionPtrType &FlowFunc, - d_t /*d1*/, d_t d2) { - return FlowFunc->computeTargets(d2); - } - - container_type - computeSummaryFlowFunction(const FlowFunctionPtrType &SummaryFlowFunction, - d_t /*d1*/, d_t d2) { - return SummaryFlowFunction->computeTargets(d2); - } - - /// Computes the call flow function for the given call-site abstraction - /// @param callFlowFunction The call flow function to compute - /// @param d1 The abstraction at the current method's start node. - /// @param d2 The abstraction at the call site - /// @return The set of caller-side abstractions at the callee's start node - /// - container_type - computeCallFlowFunction(const FlowFunctionPtrType &CallFlowFunction, - d_t /*d1*/, d_t d2) { - return CallFlowFunction->computeTargets(d2); - } - - /// Computes the call-to-return flow function for the given call-site - /// abstraction - /// @param callToReturnFlowFunction The call-to-return flow function to - /// compute - /// @param d1 The abstraction at the current method's start node. - /// @param d2 The abstraction at the call site - /// @return The set of caller-side abstractions at the return site - /// - container_type computeCallToReturnFlowFunction( - const FlowFunctionPtrType &CallToReturnFlowFunction, d_t /*d1*/, d_t d2) { - return CallToReturnFlowFunction->computeTargets(d2); - } - - /// Computes the return flow function for the given set of caller-side - /// abstractions. - /// @param retFunction The return flow function to compute - /// @param d1 The abstraction at the beginning of the callee - /// @param d2 The abstraction at the exit node in the callee - /// @param callSite The call site - /// @param callerSideDs The abstractions at the call site - /// @return The set of caller-side abstractions at the return site - /// - container_type - computeReturnFlowFunction(const FlowFunctionPtrType &RetFlowFunction, - d_t /*d1*/, d_t d2, n_t /*CallSite*/, - const Container & /*CallerSideDs*/) { - return RetFlowFunction->computeTargets(d2); - } - - /// Propagates the flow further down the exploded super graph, merging any - /// edge function that might already have been computed for TargetVal at - /// Target. - /// - /// @param SourceVal the source value of the propagated summary edge - /// @param Target the target statement - /// @param TargetVal the target value at the target statement - /// @param f the new edge function computed from (s0,SourceVal) to - /// (Target,TargetVal) - /// @param relatedCallSite for call and return flows the related call - /// statement, nullptr otherwise (this value is not used within this - /// implementation but may be useful for subclasses of IDESolver) - /// @param isUnbalancedReturn true if this edge is propagating an - /// unbalanced return (this value is not used within this implementation - /// but may be useful for subclasses of {@link IDESolver}) - /// - void propagate(PathEdge Edge, EdgeFunction EF) { - const auto &[SourceVal, Target, TargetVal] = Edge.get(); - - PHASAR_LOG_LEVEL(DEBUG, "Propagate flow"); - PHASAR_LOG_LEVEL(DEBUG, "Source value : " << DToString(SourceVal)); - PHASAR_LOG_LEVEL(DEBUG, "Target : " << NToString(Target)); - PHASAR_LOG_LEVEL(DEBUG, "Target value : " << DToString(TargetVal)); - PHASAR_LOG_LEVEL(DEBUG, "Edge Function : " << EF); - - PathEdgeCount++; - pathEdgeProcessingTask(std::move(Edge), std::move(EF)); - } - - l_t joinValueAt(n_t /*Unit*/, d_t /*Fact*/, l_t Curr, l_t NewVal) { - return IDEProblem.join(std::move(Curr), std::move(NewVal)); - } - - std::set>::Cell> - endSummary(n_t SP, d_t d3) { - if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) { - auto Key = std::make_pair(SP, d3); - auto FindND = FSummaryReuse.find(Key); - if (FindND == FSummaryReuse.end()) { - FSummaryReuse.emplace(Key, 0); - } else { - FSummaryReuse[Key] += 1; - } - } - return EndsummaryTab.get(SP, d3).cellSet(); - } - - std::map incoming(d_t d1, n_t SP) { - return IncomingTab.get(SP, d1); - } - - void addIncoming(n_t SP, d_t d3, n_t n, d_t d2) { - IncomingTab.get(SP, d3)[n].insert(d2); - } - - void printIncomingTab() const { - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL(DEBUG, "Start of incomingtab entry"); - for (const auto &Cell - : IncomingTab.cellSet()) { - PHASAR_LOG_LEVEL(DEBUG, "sP: " << NToString(Cell.getRowKey())); - PHASAR_LOG_LEVEL(DEBUG, "d3: " << DToString(Cell.getColumnKey())); - for (const auto &Entry : Cell.getValue()) { - PHASAR_LOG_LEVEL(DEBUG, " n: " << NToString(Entry.first)); - for (const auto &Fact : Entry.second) { - PHASAR_LOG_LEVEL(DEBUG, " d2: " << DToString(Fact)); - } - } - PHASAR_LOG_LEVEL(DEBUG, "---------------"); - } PHASAR_LOG_LEVEL(DEBUG, "End of incomingtab entry");) - } - - void printEndSummaryTab() const { - IF_LOG_ENABLED( - PHASAR_LOG_LEVEL(DEBUG, "Start of endsummarytab entry"); - - EndsummaryTab.foreachCell( - [](const auto &Row, const auto &Col, const auto &Val) { - PHASAR_LOG_LEVEL(DEBUG, "sP: " << NToString(Row)); - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(Col)); - - Val.foreachCell([](const auto &InnerRow, const auto &InnerCol, - const auto &InnerVal) { - PHASAR_LOG_LEVEL(DEBUG, " eP: " << NToString(InnerRow)); - PHASAR_LOG_LEVEL(DEBUG, " d2: " << DToString(InnerCol)); - PHASAR_LOG_LEVEL(DEBUG, " EF: " << InnerVal); - }); - PHASAR_LOG_LEVEL(DEBUG, "---------------"); - }); - - PHASAR_LOG_LEVEL(DEBUG, "End of endsummarytab entry");) - } - - void printComputedPathEdges() { - llvm::outs() - << "\n**********************************************************"; - llvm::outs() - << "\n* Computed intra-procedural path egdes *"; - llvm::outs() - << "\n**********************************************************\n"; - - // Sort intra-procedural path edges - auto Cells = ComputedIntraPathEdges.cellVec(); - StmtLess Stmtless(ICF); - sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { - return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); - }); - for (const auto &Cell : Cells) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - std::string N2Label = NToString(Edge.second); - llvm::outs() << "\nN1: " << NToString(Edge.first) << '\n' - << "N2: " << N2Label << "\n----" - << std::string(N2Label.size(), '-') << '\n'; - for (auto D1ToD2Set : Cell.getValue()) { - auto D1Fact = D1ToD2Set.first; - llvm::outs() << "D1: " << DToString(D1Fact) << '\n'; - for (auto D2Fact : D1ToD2Set.second) { - llvm::outs() << "\tD2: " << DToString(D2Fact) << '\n'; - } - llvm::outs() << '\n'; - } - } - - llvm::outs() - << "\n**********************************************************"; - llvm::outs() - << "\n* Computed inter-procedural path edges *"; - llvm::outs() - << "\n**********************************************************\n"; - - // Sort intra-procedural path edges - Cells = ComputedInterPathEdges.cellVec(); - sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { - return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); - }); - for (const auto &Cell : Cells) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - std::string N2Label = NToString(Edge.second); - llvm::outs() << "\nN1: " << NToString(Edge.first) << '\n' - << "N2: " << N2Label << "\n----" - << std::string(N2Label.size(), '-') << '\n'; - for (auto D1ToD2Set : Cell.getValue()) { - auto D1Fact = D1ToD2Set.first; - llvm::outs() << "D1: " << DToString(D1Fact) << '\n'; - for (auto D2Fact : D1ToD2Set.second) { - llvm::outs() << "\tD2: " << DToString(D2Fact) << '\n'; - } - llvm::outs() << '\n'; - } - } - } - - /// The invariant for computing the number of generated (#gen) and killed - /// (#kill) facts: - /// (1) #Valid facts at the last statement <= #gen - #kill - /// (2) #gen >= #kill - /// - /// The total number of valid facts can be smaller than the difference of - /// generated and killed facts, due to set semantics, i.e., a fact can be - /// generated multiple times but appears only once. - /// - /// Zero value is not counted! - /// - /// @brief Computes and prints statistics of the analysis run, e.g. number of - /// generated/killed facts, number of summary-reuses etc. - /// - void computeAndPrintStatistics() { - PAMM_GET_INSTANCE; - // Stores all valid facts at return site in caller context; return-site is - // key - std::unordered_map> ValidInCallerContext; - size_t NumGenFacts = 0; - size_t NumIntraPathEdges = 0; - size_t NumInterPathEdges = 0; - // --- Intra-procedural Path Edges --- - // d1 --> d2-Set - // Case 1: d1 in d2-Set - // Case 2: d1 not in d2-Set, i.e., d1 was killed. d2-Set could be empty. - for (const auto &Cell : ComputedIntraPathEdges.cellSet()) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Edge.first)); - PHASAR_LOG_LEVEL(DEBUG, "N2: " << NToString(Edge.second)); - for (auto &[D1, D2s] : Cell.getValue()) { - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); - NumIntraPathEdges += D2s.size(); - // Case 1 - if (D2s.find(D1) != D2s.end()) { - NumGenFacts += D2s.size() - 1; - } - // Case 2 - else { - NumGenFacts += D2s.size(); - } - // Store all valid facts after call-to-return flow - if (ICF->isCallSite(Edge.first)) { - ValidInCallerContext[Edge.second].insert(D2s.begin(), D2s.end()); - } - IF_LOG_ENABLED([this](const auto &D2s) { - for (auto D2 : D2s) { - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); - } - PHASAR_LOG_LEVEL(DEBUG, "----"); - }(D2s)); - } - PHASAR_LOG_LEVEL(DEBUG, " "); - } - // Stores all pairs of (Startpoint, Fact) for which a summary was applied - std::set> ProcessSummaryFacts; - PHASAR_LOG_LEVEL(DEBUG, "=============================================="); - PHASAR_LOG_LEVEL(DEBUG, "INTER PATH EDGES"); - for (const auto &Cell : ComputedInterPathEdges.cellSet()) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Edge.first)); - PHASAR_LOG_LEVEL(DEBUG, "N2: " << NToString(Edge.second)); - // --- Call-flow Path Edges --- - // Case 1: d1 --> empty set - // Can be ignored, since killing a fact in the caller context will - // actually happen during call-to-return. - // - // Case 2: d1 --> d2-Set - // Every fact d_i != ZeroValue in d2-set will be generated in the - // callee context, thus counts as a new fact. Even if d1 is passed as it - // is, it will count as a new fact. The reason for this is, that d1 can - // be killed in the callee context, but still be valid in the caller - // context. - // - // Special Case: Summary was applied for a particular call - // Process the summary's #gen and #kill. - if (ICF->isCallSite(Edge.first)) { - for (auto &[D1, D2s] : Cell.getValue()) { - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); - NumInterPathEdges += D2s.size(); - for (auto D2 : D2s) { - if (!IDEProblem.isZeroValue(D2)) { - NumGenFacts++; - } - // Special case - if (ProcessSummaryFacts.find(std::make_pair(Edge.second, D2)) != - ProcessSummaryFacts.end()) { - - std::set SummaryDSet; - EndsummaryTab.get(Edge.second, D2) - .foreachCell([&SummaryDSet](const auto &Row, const auto &Col, - const auto &Val) { - SummaryDSet.insert(Col); - }); - - // Process summary just as an intra-procedural edge - if (SummaryDSet.find(D2) != SummaryDSet.end()) { - NumGenFacts += SummaryDSet.size() - 1; - } else { - NumGenFacts += SummaryDSet.size(); - } - } else { - ProcessSummaryFacts.emplace(Edge.second, D2); - } - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); - } - PHASAR_LOG_LEVEL(DEBUG, "----"); - } - } - // --- Return-flow Path Edges --- - // Since every fact passed to the callee was counted as a new fact, we - // have to count every fact propagated to the caller as a kill to - // satisfy our invariant. Obviously, every fact not propagated to the - // caller will count as a kill. If an actual new fact is propagated to - // the caller, we have to increase the number of generated facts by one. - // Zero value does not count towards generated/killed facts. - if (ICF->isExitInst(Cell.getRowKey())) { - for (auto &[D1, D2s] : Cell.getValue()) { - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); - NumInterPathEdges += D2s.size(); - auto CallerFacts = ValidInCallerContext[Edge.second]; - for (auto D2 : D2s) { - // d2 not valid in caller context - if (CallerFacts.find(D2) == CallerFacts.end()) { - NumGenFacts++; - } - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2)); - } - PHASAR_LOG_LEVEL(DEBUG, "----"); - } - } - PHASAR_LOG_LEVEL(DEBUG, " "); - } - PHASAR_LOG_LEVEL(DEBUG, "SUMMARY REUSE"); - std::size_t TotalSummaryReuse = 0; - for (const auto &Entry : FSummaryReuse) { - PHASAR_LOG_LEVEL(DEBUG, "N1: " << NToString(Entry.first.first)); - PHASAR_LOG_LEVEL(DEBUG, "D1: " << DToString(Entry.first.second)); - PHASAR_LOG_LEVEL(DEBUG, "#Reuse: " << Entry.second); - TotalSummaryReuse += Entry.second; - } - INC_COUNTER("Gen facts", NumGenFacts, Core); - INC_COUNTER("Summary-reuse", TotalSummaryReuse, Core); - INC_COUNTER("Intra Path Edges", NumIntraPathEdges, Core); - INC_COUNTER("Inter Path Edges", NumInterPathEdges, Core); - - PHASAR_LOG_LEVEL(INFO, "----------------------------------------------"); - PHASAR_LOG_LEVEL(INFO, "=== Solver Statistics ==="); - PHASAR_LOG_LEVEL(INFO, "#Facts generated : " << GET_COUNTER("Gen facts")); - PHASAR_LOG_LEVEL(INFO, "#Facts killed : " << GET_COUNTER("Kill facts")); - PHASAR_LOG_LEVEL(INFO, - "#Summary-reuse : " << GET_COUNTER("Summary-reuse")); - PHASAR_LOG_LEVEL(INFO, - "#Intra Path Edges: " << GET_COUNTER("Intra Path Edges")); - PHASAR_LOG_LEVEL(INFO, - "#Inter Path Edges: " << GET_COUNTER("Inter Path Edges")); - if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Full) { - PHASAR_LOG_LEVEL( - INFO, "Flow function query count: " << GET_COUNTER("FF Queries")); - PHASAR_LOG_LEVEL( - INFO, "Edge function query count: " << GET_COUNTER("EF Queries")); - PHASAR_LOG_LEVEL(INFO, "Data-flow value propagation count: " - << GET_COUNTER("Value Propagation")); - PHASAR_LOG_LEVEL(INFO, "Data-flow value computation count: " - << GET_COUNTER("Value Computation")); - PHASAR_LOG_LEVEL(INFO, - "Special flow function usage count: " - << GET_COUNTER("SpecialSummary-FF Application")); - PHASAR_LOG_LEVEL(INFO, "Jump function construciton count: " - << GET_COUNTER("JumpFn Construction")); - PHASAR_LOG_LEVEL(INFO, - "Phase I duration: " << PRINT_TIMER("DFA Phase I")); - PHASAR_LOG_LEVEL(INFO, - "Phase II duration: " << PRINT_TIMER("DFA Phase II")); - PHASAR_LOG_LEVEL(INFO, "----------------------------------------------"); - CachedFlowEdgeFunctions.print(); - } - } - -public: - void enableESGAsDot() { SolverConfig.setEmitESG(); } - - void - emitESGAsDot(llvm::raw_ostream &OS = llvm::outs(), - llvm::StringRef DotConfigDir = PhasarConfig::PhasarDirectory()) { - PHASAR_LOG_LEVEL(DEBUG, "Emit Exploded super-graph (ESG) as DOT graph"); - PHASAR_LOG_LEVEL(DEBUG, "Process intra-procedural path egdes"); - PHASAR_LOG_LEVEL(DEBUG, "============================================="); - DOTGraph G; - DOTConfig::importDOTConfig(DotConfigDir); - DOTFunctionSubGraph *FG = nullptr; - - // Sort intra-procedural path edges - auto Cells = ComputedIntraPathEdges.cellVec(); - StmtLess Stmtless(ICF); - sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { - return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); - }); - for (const auto &Cell : Cells) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - std::string N1Label = NToString(Edge.first); - std::string N2Label = NToString(Edge.second); - PHASAR_LOG_LEVEL(DEBUG, "N1: " << N1Label); - PHASAR_LOG_LEVEL(DEBUG, "N2: " << N2Label); - std::string N1StmtId = ICF->getStatementId(Edge.first); - std::string N2StmtId = ICF->getStatementId(Edge.second); - std::string FuncName = ICF->getFunctionOf(Edge.first)->getName().str(); - // Get or create function subgraph - if (!FG || FG->Id != FuncName) { - FG = &G.Functions[FuncName]; - FG->Id = FuncName; - } - - // Create control flow nodes - DOTNode N1(FuncName, N1Label, N1StmtId); - DOTNode N2(FuncName, N2Label, N2StmtId); - // Add control flow node(s) to function subgraph - FG->Stmts.insert(N1); - if (ICF->isExitInst(Edge.second)) { - FG->Stmts.insert(N2); - } - - // Set control flow edge - FG->IntraCFEdges.emplace(N1, N2); - - DOTFactSubGraph *D1FSG = nullptr; - unsigned D1FactId = 0; - unsigned D2FactId = 0; - for (const auto &D1ToD2Set : Cell.getValue()) { - auto D1Fact = D1ToD2Set.first; - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); - - DOTNode D1; - if (IDEProblem.isZeroValue(D1Fact)) { - D1 = {FuncName, "Λ", N1StmtId, 0, false, true}; - D1FactId = 0; - } else { - // Get the fact-ID - D1FactId = G.getFactID(D1Fact); - std::string D1Label = DToString(D1Fact); - - // Get or create the fact subgraph - D1FSG = FG->getOrCreateFactSG(D1FactId, D1Label); - - // Insert D1 to fact subgraph - D1 = {FuncName, D1Label, N1StmtId, D1FactId, false, true}; - D1FSG->Nodes.insert(std::make_pair(N1StmtId, D1)); - } - - DOTFactSubGraph *D2FSG = nullptr; - for (const auto &D2Fact : D1ToD2Set.second) { - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); - // We do not need to generate any intra-procedural nodes and edges - // for the zero value since they will be auto-generated - if (!IDEProblem.isZeroValue(D2Fact)) { - // Get the fact-ID - D2FactId = G.getFactID(D2Fact); - std::string D2Label = DToString(D2Fact); - DOTNode D2 = {FuncName, D2Label, N2StmtId, D2FactId, false, true}; - std::string EFLabel; - auto EFVec = IntermediateEdgeFunctions[std::make_tuple( - Edge.first, D1Fact, Edge.second, D2Fact)]; - for (const auto &EF : EFVec) { - EFLabel += to_string(EF) + ", "; - } - PHASAR_LOG_LEVEL(DEBUG, "EF LABEL: " << EFLabel); - if (D1FactId == D2FactId && !IDEProblem.isZeroValue(D1Fact)) { - assert(D1FSG && "D1_FSG was nullptr but should be valid."); - D1FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); - D1FSG->Edges.emplace(D1, D2, true, EFLabel); - } else { - // Get or create the fact subgraph - D2FSG = FG->getOrCreateFactSG(D2FactId, D2Label); - - D2FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); - FG->CrossFactEdges.emplace(D1, D2, true, EFLabel); - } - } - } - PHASAR_LOG_LEVEL(DEBUG, "----------"); - } - PHASAR_LOG_LEVEL(DEBUG, " "); - } - - PHASAR_LOG_LEVEL(DEBUG, "============================================="); - PHASAR_LOG_LEVEL(DEBUG, "Process inter-procedural path edges"); - PHASAR_LOG_LEVEL(DEBUG, "============================================="); - Cells = ComputedInterPathEdges.cellVec(); - sort(Cells.begin(), Cells.end(), [&Stmtless](auto Lhs, auto Rhs) { - return Stmtless(Lhs.getRowKey(), Rhs.getRowKey()); - }); - for (const auto &Cell : Cells) { - auto Edge = std::make_pair(Cell.getRowKey(), Cell.getColumnKey()); - std::string N1Label = NToString(Edge.first); - std::string N2Label = NToString(Edge.second); - std::string FNameOfN1 = ICF->getFunctionOf(Edge.first)->getName().str(); - std::string FNameOfN2 = ICF->getFunctionOf(Edge.second)->getName().str(); - std::string N1StmtId = ICF->getStatementId(Edge.first); - std::string N2StmtId = ICF->getStatementId(Edge.second); - PHASAR_LOG_LEVEL(DEBUG, "N1: " << N1Label); - PHASAR_LOG_LEVEL(DEBUG, "N2: " << N2Label); - - // Add inter-procedural control flow edge - DOTNode N1(FNameOfN1, N1Label, N1StmtId); - DOTNode N2(FNameOfN2, N2Label, N2StmtId); - - // Handle recursion control flow as intra-procedural control flow - // since those eges never leave the function subgraph - FG = nullptr; - if (FNameOfN1 == FNameOfN2) { - // This function subgraph is guaranteed to exist - FG = &G.Functions[FNameOfN1]; - FG->IntraCFEdges.emplace(N1, N2); - } else { - // Check the case where the callee is a single statement function, - // thus does not contain intra-procedural path edges. We have to - // generate the function sub graph here! - if (!G.Functions.count(FNameOfN1)) { - FG = &G.Functions[FNameOfN1]; - FG->Id = FNameOfN1; - FG->Stmts.insert(N1); - } else if (!G.Functions.count(FNameOfN2)) { - FG = &G.Functions[FNameOfN2]; - FG->Id = FNameOfN2; - FG->Stmts.insert(N2); - } - G.InterCFEdges.emplace(N1, N2); - } - - // Create D1 and D2, if D1 == D2 == lambda then add Edge(D1, D2) to - // interLambdaEges otherwise add Edge(D1, D2) to interFactEdges - unsigned D1FactId = 0; - unsigned D2FactId = 0; - for (const auto &D1ToD2Set : Cell.getValue()) { - auto D1Fact = D1ToD2Set.first; - PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); - DOTNode D1; - if (IDEProblem.isZeroValue(D1Fact)) { - D1 = {FNameOfN1, "Λ", N1StmtId, 0, false, true}; - } else { - // Get the fact-ID - D1FactId = G.getFactID(D1Fact); - std::string D1Label = DToString(D1Fact); - D1 = {FNameOfN1, D1Label, N1StmtId, D1FactId, false, true}; - // FG should already exist even for single statement functions - if (!G.containsFactSG(FNameOfN1, D1FactId)) { - FG = &G.Functions[FNameOfN1]; - auto *D1FSG = FG->getOrCreateFactSG(D1FactId, D1Label); - D1FSG->Nodes.insert(std::make_pair(N1StmtId, D1)); - } - } - - auto D2Set = D1ToD2Set.second; - for (const auto &D2Fact : D2Set) { - PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); - DOTNode D2; - if (IDEProblem.isZeroValue(D2Fact)) { - D2 = {FNameOfN2, "Λ", N2StmtId, 0, false, true}; - } else { - // Get the fact-ID - D2FactId = G.getFactID(D2Fact); - std::string D2Label = DToString(D2Fact); - D2 = {FNameOfN2, D2Label, N2StmtId, D2FactId, false, true}; - // FG should already exist even for single statement functions - if (!G.containsFactSG(FNameOfN2, D2FactId)) { - FG = &G.Functions[FNameOfN2]; - auto *D2FSG = FG->getOrCreateFactSG(D2FactId, D2Label); - D2FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); - } - } - - if (IDEProblem.isZeroValue(D1Fact) && - IDEProblem.isZeroValue(D2Fact)) { - // Do not add lambda recursion edges as inter-procedural edges - if (D1.FuncName != D2.FuncName) { - G.InterLambdaEdges.emplace(D1, D2, true, "AllBottom", "BOT"); - } - } else { - // std::string EFLabel = EF ? EF->str() : " "; - std::string EFLabel; - auto EFVec = IntermediateEdgeFunctions[std::make_tuple( - Edge.first, D1Fact, Edge.second, D2Fact)]; - for (const auto &EF : EFVec) { - PHASAR_LOG_LEVEL(DEBUG, "Partial EF Label: " << EF); - EFLabel.append(to_string(EF) + ", "); - } - PHASAR_LOG_LEVEL(DEBUG, "EF LABEL: " << EFLabel); - G.InterFactEdges.emplace(D1, D2, true, EFLabel); - } - } - PHASAR_LOG_LEVEL(DEBUG, "----------"); - } - PHASAR_LOG_LEVEL(DEBUG, " "); - } - OS << G; - } - -private: - /// @brief: Allows less-than comparison based on the statement ID. - struct StmtLess { - const i_t *ICF; - StringIDLess StrIDLess; - StmtLess(const i_t *ICF) : ICF(ICF), StrIDLess(StringIDLess()) {} - bool operator()(n_t Lhs, n_t Rhs) { - return StrIDLess(ICF->getStatementId(Lhs), ICF->getStatementId(Rhs)); - } - }; - - /// -- InteractiveIDESolverMixin implementation - - bool doInitialize() { - PAMM_GET_INSTANCE; - REG_COUNTER("Gen facts", 0, Core); - REG_COUNTER("Kill facts", 0, Core); - REG_COUNTER("Summary-reuse", 0, Core); - REG_COUNTER("Intra Path Edges", 0, Core); - REG_COUNTER("Inter Path Edges", 0, Core); - REG_COUNTER("FF Queries", 0, Full); - REG_COUNTER("EF Queries", 0, Full); - REG_COUNTER("Value Propagation", 0, Full); - REG_COUNTER("Value Computation", 0, Full); - REG_COUNTER("SpecialSummary-FF Application", 0, Full); - REG_COUNTER("SpecialSummary-EF Queries", 0, Full); - REG_COUNTER("JumpFn Construction", 0, Full); - REG_COUNTER("Process Call", 0, Full); - REG_COUNTER("Process Normal", 0, Full); - REG_COUNTER("Process Exit", 0, Full); - REG_COUNTER("[Calls] getAliasSet", 0, Full); - REG_HISTOGRAM("Data-flow facts", Full); - REG_HISTOGRAM("Points-to", Full); - - PHASAR_LOG_LEVEL(INFO, "IDE solver is solving the specified problem"); - PHASAR_LOG_LEVEL(INFO, - "Submit initial seeds, construct exploded super graph"); - // computations starting here - START_TIMER("DFA Phase I", Full); - - // We start our analysis and construct exploded supergraph - submitInitialSeeds(); - return !WorkList.empty(); - } - - bool doNext() { - assert(!WorkList.empty()); - auto [Edge, EF] = std::move(WorkList.back()); - WorkList.pop_back(); - - propagate(std::move(Edge), std::move(EF)); - - return !WorkList.empty(); - } - - void finalizeInternal() { - PAMM_GET_INSTANCE; - STOP_TIMER("DFA Phase I", Full); - if (SolverConfig.computeValues()) { - START_TIMER("DFA Phase II", Full); - // Computing the final values for the edge functions - PHASAR_LOG_LEVEL( - INFO, "Compute the final values according to the edge functions"); - computeValues(); - STOP_TIMER("DFA Phase II", Full); - } - PHASAR_LOG_LEVEL(INFO, "Problem solved"); - if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) { - computeAndPrintStatistics(); - } - if (SolverConfig.emitESG()) { - emitESGAsDot(); + l_t seedVal(n_t NHashN, d_t NHashD) { + if (SeedValues.contains(NHashN, NHashD)) { + return SeedValues.get(NHashN, NHashD); } + return this->IDEProblem.topElement(); } - SolverResults doFinalize() & { - finalizeInternal(); - return getSolverResults(); + void setSeedVal(n_t NHashN, d_t NHashD, l_t L) { + SeedValues.insert(std::move(NHashN), std::move(NHashD), std::move(L)); } - OwningSolverResults doFinalize() && { - finalizeInternal(); - return consumeSolverResults(); - } - - /// -- Data members - - IDETabulationProblem &IDEProblem; - d_t ZeroValue; - const i_t *ICF; - IFDSIDESolverConfig &SolverConfig; + // -- Data members std::vector, EdgeFunction>> WorkList; - std::vector> ValuePropWL; - - size_t PathEdgeCount = 0; - - FlowEdgeFunctionCache CachedFlowEdgeFunctions; - - Table> ComputedIntraPathEdges; - - Table> ComputedInterPathEdges; - - EdgeFunction AllTop; - - std::shared_ptr> JumpFn; - - std::map, std::vector>> - IntermediateEdgeFunctions; - - // stores summaries that were queried before they were computed - // see CC 2010 paper by Naeem, Lhotak and Rodriguez - Table>> EndsummaryTab; - - // edges going along calls - // see CC 2010 paper by Naeem, Lhotak and Rodriguez - Table> IncomingTab; - - // stores the return sites (inside callers) to which we have unbalanced - // returns if SolverConfig.followReturnPastSeeds is enabled - std::set UnbalancedRetSites; - - InitialSeeds Seeds; Table SeedValues; - - Table ValTab; - - std::map, size_t> FSummaryReuse; }; template diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index e7be250aa..c59b3ea2b 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -24,6 +24,9 @@ namespace psr { /// Solves the given IDETabulationProblem as described in the 1996 paper by /// Sagiv, Horwitz and Reps. To solve the problem, call solve(). Results /// can then be queried by using resultAt() and resultsAt(). +/// +/// Propagates data-flow facts to the successors of the statement, where they +/// were generated. template class IDESolver : public IDESolverImpl< @@ -46,9 +49,14 @@ class IDESolver using t_t = typename AnalysisDomainTy::t_t; using v_t = typename AnalysisDomainTy::v_t; - IDESolver(IDETabulationProblem &Problem, - const i_t *ICF, PropagateAfterStrategy Strategy = {}) + explicit IDESolver(IDETabulationProblem &Problem, + const i_t *ICF, PropagateAfterStrategy Strategy = {}) : base_t(Problem, ICF, Strategy) {} + +private: + friend base_t; + + std::vector> WorkList; }; template diff --git a/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h index 9bc196fba..c5e29ddef 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h @@ -18,6 +18,7 @@ #include "phasar/DataFlow/IfdsIde/Solver/detail/FlowEdgeFunctionCache.h" #include "phasar/DataFlow/IfdsIde/Solver/detail/PathEdge.h" #include "phasar/Utils/DOTGraph.h" +#include "phasar/Utils/Printer.h" #include @@ -1079,8 +1080,8 @@ class IDESolverImpl : public IDESolverAPIMixin { void addWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, EdgeFunction /*EF*/) { - WorkList.emplace_back(std::move(SourceVal), std::move(Target), - std::move(TargetVal)); + self().WorkList.emplace_back(std::move(SourceVal), std::move(Target), + std::move(TargetVal)); } std::set>::Cell> @@ -1139,7 +1140,7 @@ class IDESolverImpl : public IDESolverAPIMixin { d_t dPrime = Entry.first; auto fPrime = Entry.second; n_t SP = Stmt; - l_t Val = val(SP, Fact); + l_t Val = self().seedVal(SP, Fact); INC_COUNTER("Value Propagation", 1, Full); self().propagateValue(CallSite, dPrime, fPrime.computeTarget(Val)); } @@ -1166,18 +1167,20 @@ class IDESolverImpl : public IDESolverAPIMixin { INC_COUNTER("EF Queries", 1, Full); for (const n_t StartPoint : ICF->getStartPointsOf(Callee)) { INC_COUNTER("Value Propagation", 1, Full); - self().propagateValue(StartPoint, dPrime, - EdgeFn.computeTarget(self().val(Stmt, Fact))); + self().propagateValue( + StartPoint, dPrime, + EdgeFn.computeTarget(self().seedVal(Stmt, Fact))); } } } } void propagateValue(n_t NHashN, d_t NHashD, const l_t &L) { - l_t ValNHash = self().val(NHashN, NHashD); + l_t ValNHash = self().seedVal(NHashN, NHashD); l_t LPrime = self().joinValueAt(NHashN, NHashD, ValNHash, L); if (!(LPrime == ValNHash)) { - self().setVal(NHashN, NHashD, std::move(LPrime)); + self().setSeedVal(NHashN, NHashD, std::move(LPrime)); + ValuePropWL.emplace_back(std::move(NHashN), std::move(NHashD)); } } @@ -1208,12 +1211,14 @@ class IDESolverImpl : public IDESolverAPIMixin { using TableCell = typename Table>::Cell; Table> &LookupByTarget = JumpFn->lookupByTarget(n); + for (const TableCell &SourceValTargetValAndFunction : LookupByTarget.cellSet()) { d_t dPrime = SourceValTargetValAndFunction.getRowKey(); d_t d = SourceValTargetValAndFunction.getColumnKey(); EdgeFunction fPrime = SourceValTargetValAndFunction.getValue(); - l_t TargetVal = self().val(SP, dPrime); + l_t TargetVal = self().seedVal(SP, dPrime); + self().setVal( n, d, IDEProblem.join(self().val(n, d), @@ -1240,7 +1245,7 @@ class IDESolverImpl : public IDESolverAPIMixin { << ", value: " << LToString(Value)); // initialize the initial seeds with the top element as we have no // information at the beginning of the value computation problem - self().setVal(StartPoint, Fact, Value); + self().setSeedVal(StartPoint, Fact, Value); std::pair SuperGraphNode(StartPoint, Fact); self().valuePropagationTask(std::move(SuperGraphNode)); } @@ -1255,6 +1260,10 @@ class IDESolverImpl : public IDESolverAPIMixin { return IDEProblem.topElement(); } + l_t seedVal(n_t NHashN, d_t NHashD) { + return self().val(std::move(NHashN), std::move(NHashD)); + } + void setVal(n_t NHashN, d_t NHashD, l_t L) { IF_LOG_ENABLED({ PHASAR_LOG_LEVEL(DEBUG, @@ -1273,6 +1282,10 @@ class IDESolverImpl : public IDESolverAPIMixin { // } } + void setSeedVal(n_t NHashN, d_t NHashD, l_t L) { + self().setVal(std::move(NHashN), std::move(NHashD), std::move(L)); + } + std::vector getAllValueComputationNodes() { return ICF->allNonCallStartNodes(); } @@ -1312,18 +1325,18 @@ class IDESolverImpl : public IDESolverAPIMixin { // We start our analysis and construct exploded supergraph self().submitInitialSeeds(); - return !WorkList.empty(); + return !self().WorkList.empty(); } bool doNext() { - assert(!WorkList.empty()); - auto Edge = std::move(WorkList.back()); - WorkList.pop_back(); + assert(!self().WorkList.empty()); + auto Edge = std::move(self().WorkList.back()); + self().WorkList.pop_back(); auto EF = self().jumpFunction(Edge); self().propagate(std::move(Edge), std::move(EF)); - return !WorkList.empty(); + return !self().WorkList.empty(); } void finalizeInternal() { @@ -1652,7 +1665,7 @@ class IDESolverImpl : public IDESolverAPIMixin { } IDESolverImpl(IDETabulationProblem &Problem, - const i_t *ICF, PropagateAfterStrategy /*Strategy*/ = {}) + const i_t *ICF, StrategyT /*Strategy*/ = {}) : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), SolverConfig(Problem.getIFDSIDESolverConfig()), CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), @@ -1676,7 +1689,6 @@ class IDESolverImpl : public IDESolverAPIMixin { const i_t *ICF; IFDSIDESolverConfig &SolverConfig; - std::vector> WorkList; std::vector> ValuePropWL; size_t PathEdgeCount = 0; From 868faa3d56c738dc542385fb2ed4fb3d37632149 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 24 Sep 2023 12:31:20 +0200 Subject: [PATCH 09/11] Huge cleanup --- .clang-tidy | 1 + include/phasar/ControlFlow/ICFGBase.h | 6 ++ .../phasar/DataFlow/IfdsIde/InitialSeeds.h | 6 +- .../DataFlow/IfdsIde/Solver/EagerIDESolver.h | 54 ++------------ .../DataFlow/IfdsIde/Solver/IDESolver.h | 14 ++-- .../DataFlow/IfdsIde/Solver/IFDSSolver.h | 6 +- .../DataFlow/IfdsIde/Solver/SolverStrategy.h | 16 ++-- .../IfdsIde/Solver/detail/IDESolverImpl.h | 74 +++++++++---------- .../DataFlow/Mono/Solver/IntraMonoSolver.h | 2 +- .../ControlFlow/LLVMBasedBackwardICFG.h | 1 + .../PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 3 + .../IFDSFieldSensTaintAnalysis/Utils/Log.h | 2 +- include/phasar/Utils/IOManip.h | 29 ++++++++ lib/LibPhasar.cpp | 5 ++ .../RandomChangeFrontendAction.cpp | 4 +- lib/PhasarClang/RandomChangeVisitor.cpp | 2 +- .../ControlFlow/LLVMBasedBackwardICFG.cpp | 4 + lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp | 22 +++--- .../ControlFlow/Resolver/CHAResolver.cpp | 2 +- .../ControlFlow/Resolver/DTAResolver.cpp | 2 +- .../ControlFlow/Resolver/OTFResolver.cpp | 2 +- .../ControlFlow/Resolver/RTAResolver.cpp | 2 +- .../Stats/LcovRetValWriter.cpp | 10 +-- .../Stats/LcovWriter.cpp | 10 +-- .../Stats/LineNumberWriter.cpp | 2 +- .../Utils/DataFlowUtils.cpp | 2 +- .../DataFlow/IfdsIde/IFDSIDESolverConfig.cpp | 28 ++++--- .../Problems/IDEExtendedTaintAnalysis.cpp | 2 +- .../IDEGeneralizedLCA/IDEGeneralizedLCA.cpp | 8 +- .../Problems/IFDSFieldSensTaintAnalysis.cpp | 14 ++-- .../Passes/GeneralStatisticsAnalysis.cpp | 26 +++---- lib/PhasarLLVM/Pointer/LLVMAliasSet.cpp | 2 +- .../Pointer/LLVMBasedAliasAnalysis.cpp | 2 +- .../TaintConfig/LLVMTaintConfig.cpp | 10 +-- lib/Utils/IOManip.cpp | 12 +++ lib/Utils/PAMM.cpp | 4 +- .../ControlFlow/LLVMBasedICFGExportTest.cpp | 4 +- .../Problems/IDEExtendedTaintAnalysisTest.cpp | 2 +- .../Problems/IFDSConstAnalysisTest.cpp | 2 +- 39 files changed, 206 insertions(+), 193 deletions(-) create mode 100644 include/phasar/Utils/IOManip.h create mode 100644 lib/Utils/IOManip.cpp diff --git a/.clang-tidy b/.clang-tidy index 5d40d294d..5160c27dd 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -4,6 +4,7 @@ Checks: '-*, misc-*, -misc-non-private-member-variables-in-classes, -misc-no-recursion, + -misc-use-anonymous-namespace, readability-*, -readability-function-cognitive-complexity, -readability-else-after*, diff --git a/include/phasar/ControlFlow/ICFGBase.h b/include/phasar/ControlFlow/ICFGBase.h index fea37796c..2774a7a17 100644 --- a/include/phasar/ControlFlow/ICFGBase.h +++ b/include/phasar/ControlFlow/ICFGBase.h @@ -38,6 +38,12 @@ template class ICFGBase { return self().getAllFunctionsImpl(); } + /// Returns the number of total functions that were considered when building + /// up this ICFG. Equals the size of getAllFunctions() + [[nodiscard]] size_t getNumFunctions() const noexcept { + return self().getNumFunctionsImpl(); + } + /// returns the function definition or declaration with the given name. If /// ther eis no such function, returns a default constructed f_t (nullptr for /// pointers). diff --git a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h index 60e5ae23e..efc230d53 100644 --- a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h +++ b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h @@ -98,13 +98,13 @@ template class InitialSeeds { for (const auto &[Node, Facts] : Seeds) { OS << "At "; printNode(Node); - OS << "\n"; + OS << '\n'; for (const auto &[Fact, Value] : Facts) { OS << "> "; printFact(Fact); - OS << " --> \\." << Value << "\n"; + OS << " --> \\." << Value << '\n'; } - OS << "\n"; + OS << '\n'; } OS << "========================== End Seeds ==========================\n"; } diff --git a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h index dcaf55409..d94401b93 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h @@ -1,60 +1,16 @@ /****************************************************************************** - * Copyright (c) 2017 Philipp Schubert. + * Copyright (c) 2023 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * * Contributors: - * Philipp Schubert and others + * Fabian Schiebel and others *****************************************************************************/ -/* - * IDESolver.h - * - * Created on: 04.08.2016 - * Author: pdschbrt - */ - #ifndef PHASAR_DATAFLOW_IFDSIDE_SOLVER_EAGERIDESOLVER_H #define PHASAR_DATAFLOW_IFDSIDE_SOLVER_EAGERIDESOLVER_H -#include "phasar/Config/Configuration.h" -#include "phasar/DB/ProjectIRDBBase.h" -#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" -#include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" -#include "phasar/DataFlow/IfdsIde/EdgeFunctions.h" -#include "phasar/DataFlow/IfdsIde/FlowFunctions.h" -#include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" -#include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" -#include "phasar/DataFlow/IfdsIde/InitialSeeds.h" -#include "phasar/DataFlow/IfdsIde/Solver/IDESolverAPIMixin.h" -#include "phasar/DataFlow/IfdsIde/Solver/JumpFunctions.h" -#include "phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h" #include "phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h" -#include "phasar/DataFlow/IfdsIde/SolverResults.h" -#include "phasar/Domain/AnalysisDomain.h" -#include "phasar/Utils/DOTGraph.h" -#include "phasar/Utils/JoinLattice.h" -#include "phasar/Utils/Logger.h" -#include "phasar/Utils/PAMMMacros.h" -#include "phasar/Utils/Printer.h" -#include "phasar/Utils/Table.h" -#include "phasar/Utils/Utilities.h" - -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -#include "nlohmann/json.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include namespace psr { @@ -130,7 +86,7 @@ class IDESolver this->JumpFn->addFunction(std::move(SourceVal), std::move(Target), std::move(TargetVal), std::move(fPrime)); - IF_LOG_ENABLED(if (!this->IDEProblem.isZeroValue(TargetVal)) { + IF_LOG_ENABLED(if (!this->IDEProblem->isZeroValue(TargetVal)) { PHASAR_LOG_LEVEL( DEBUG, "EDGE: ICF->getFunctionOf(Target)) << ", D: " << DToString(SourceVal) << '>'); @@ -216,7 +172,7 @@ class IDESolver std::vector getAllValueComputationNodes() const { std::vector Ret; - // TODO: Reserve + Ret.reserve(this->ICF->getNumFunctions() * 2); // Just a rough guess for (const auto &Fun : this->ICF->getAllFunctions()) { for (const auto &Inst : this->ICF->getAllInstructionsOf(Fun)) { @@ -230,7 +186,7 @@ class IDESolver if (SeedValues.contains(NHashN, NHashD)) { return SeedValues.get(NHashN, NHashD); } - return this->IDEProblem.topElement(); + return this->IDEProblem->topElement(); } void setSeedVal(n_t NHashN, d_t NHashD, l_t L) { diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index c59b3ea2b..74c81f556 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -28,13 +28,13 @@ namespace psr { /// Propagates data-flow facts to the successors of the statement, where they /// were generated. template -class IDESolver +class IDESolver : public IDESolverImpl< - IDESolver, - AnalysisDomainTy, Container, PropagateAfterStrategy> { + IDESolver, + AnalysisDomainTy, Container, PropagateOverStrategy> { using base_t = IDESolverImpl< - IDESolver, - AnalysisDomainTy, Container, PropagateAfterStrategy>; + IDESolver, + AnalysisDomainTy, Container, PropagateOverStrategy>; public: using ProblemTy = IDETabulationProblem; @@ -50,7 +50,7 @@ class IDESolver using v_t = typename AnalysisDomainTy::v_t; explicit IDESolver(IDETabulationProblem &Problem, - const i_t *ICF, PropagateAfterStrategy Strategy = {}) + const i_t *ICF, PropagateOverStrategy Strategy = {}) : base_t(Problem, ICF, Strategy) {} private: @@ -65,7 +65,7 @@ OwningSolverResults solveIDEProblem(IDETabulationProblem &Problem, const typename AnalysisDomainTy::i_t &ICF, - PropagateAfterStrategy Strategy = {}) { + PropagateOverStrategy Strategy = {}) { IDESolver Solver(Problem, &ICF, Strategy); Solver.solve(); return Solver.consumeSolverResults(); diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index bf8fbd310..d106ce7e6 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -101,19 +101,19 @@ class IFDSSolver : public IDESolver, template IFDSSolver(Problem &, ICF *) -> IFDSSolver; + typename Problem::container_type, PropagateOverStrategy>; template IFDSSolver(Problem &, ICF *, Strategy) -> IFDSSolver; -template +template using IFDSSolver_P = IFDSSolver; template + typename Strategy = PropagateOverStrategy> OwningSolverResults diff --git a/include/phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h b/include/phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h index 869667e01..12a16d6de 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/SolverStrategy.h @@ -15,7 +15,7 @@ namespace psr { enum class SolverStrategyKind { /// Propagate the data-flow effects of an instruction to the start of the /// successor instructions. This is the default strategy - PropagateAfter, + PropagateOver, // Propagate the data-flow effects of an instruction onto the same // instruction. PropagateOnto, @@ -23,8 +23,8 @@ enum class SolverStrategyKind { struct SolverStrategy {}; -struct PropagateAfterStrategy : SolverStrategy { - static constexpr auto Kind = SolverStrategyKind::PropagateAfter; +struct PropagateOverStrategy : SolverStrategy { + static constexpr auto Kind = SolverStrategyKind::PropagateOver; }; struct PropagateOntoStrategy : SolverStrategy { @@ -32,19 +32,19 @@ struct PropagateOntoStrategy : SolverStrategy { }; template + typename Strategy = PropagateOverStrategy> class IDESolver; template IDESolver(Problem &, ICF *) -> IDESolver; + typename Problem::container_type, PropagateOverStrategy>; template -IDESolver(Problem &, ICF *, PropagateAfterStrategy) +IDESolver(Problem &, ICF *, PropagateOverStrategy) -> IDESolver; + typename Problem::container_type, PropagateOverStrategy>; -template +template using IDESolver_P = IDESolver; diff --git a/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h index c5e29ddef..7fbeaba09 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h @@ -10,7 +10,6 @@ #ifndef PHASAR_DATAFLOW_IFDSIDE_SOLVER_IDESOLVERIMPL_H #define PHASAR_DATAFLOW_IFDSIDE_SOLVER_IDESOLVERIMPL_H -#include "phasar/AnalysisStrategy/Strategies.h" #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/DataFlow/IfdsIde/Solver/IDESolverAPIMixin.h" #include "phasar/DataFlow/IfdsIde/Solver/JumpFunctions.h" @@ -20,6 +19,8 @@ #include "phasar/Utils/DOTGraph.h" #include "phasar/Utils/Printer.h" +#include "nlohmann/json.hpp" + #include namespace psr { @@ -116,11 +117,11 @@ class IDESolverImpl : public IDESolverAPIMixin { } virtual void emitTextReport(llvm::raw_ostream &OS = llvm::outs()) { - IDEProblem.emitTextReport(getSolverResults(), OS); + IDEProblem->emitTextReport(getSolverResults(), OS); } virtual void emitGraphicalReport(llvm::raw_ostream &OS = llvm::outs()) { - IDEProblem.emitGraphicalReport(getSolverResults(), OS); + IDEProblem->emitGraphicalReport(getSolverResults(), OS); } void dumpResults(llvm::raw_ostream &OS = llvm::outs()) { @@ -132,16 +133,16 @@ class IDESolverImpl : public IDESolverAPIMixin { auto Interpe = ComputedInterPathEdges.cellSet(); for (const auto &Cell : Interpe) { llvm::outs() << "FROM" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); + IDEProblem->printNode(llvm::outs(), Cell.getRowKey()); llvm::outs() << "TO" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); + IDEProblem->printNode(llvm::outs(), Cell.getColumnKey()); llvm::outs() << "FACTS" << '\n'; for (const auto &Fact : Cell.getValue()) { llvm::outs() << "fact" << '\n'; - IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); + IDEProblem->printDataFlowFact(llvm::outs(), Fact.first); llvm::outs() << "produces" << '\n'; for (const auto &Out : Fact.second) { - IDEProblem.printDataFlowFact(llvm::outs(), Out); + IDEProblem->printDataFlowFact(llvm::outs(), Out); } } } @@ -152,16 +153,16 @@ class IDESolverImpl : public IDESolverAPIMixin { auto Intrape = ComputedIntraPathEdges.cellSet(); for (auto &Cell : Intrape) { llvm::outs() << "FROM" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getRowKey()); + IDEProblem->printNode(llvm::outs(), Cell.getRowKey()); llvm::outs() << "TO" << '\n'; - IDEProblem.printNode(llvm::outs(), Cell.getColumnKey()); + IDEProblem->printNode(llvm::outs(), Cell.getColumnKey()); llvm::outs() << "FACTS" << '\n'; for (auto &Fact : Cell.getValue()) { llvm::outs() << "fact" << '\n'; - IDEProblem.printDataFlowFact(llvm::outs(), Fact.first); + IDEProblem->printDataFlowFact(llvm::outs(), Fact.first); llvm::outs() << "produces" << '\n'; for (auto &Out : Fact.second) { - IDEProblem.printDataFlowFact(llvm::outs(), Out); + IDEProblem->printDataFlowFact(llvm::outs(), Out); } } } @@ -220,7 +221,7 @@ class IDESolverImpl : public IDESolverAPIMixin { PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); DOTNode D1; - if (IDEProblem.isZeroValue(D1Fact)) { + if (IDEProblem->isZeroValue(D1Fact)) { D1 = {FuncName, "Λ", N1StmtId, 0, false, true}; D1FactId = 0; } else { @@ -241,7 +242,7 @@ class IDESolverImpl : public IDESolverAPIMixin { PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); // We do not need to generate any intra-procedural nodes and edges // for the zero value since they will be auto-generated - if (!IDEProblem.isZeroValue(D2Fact)) { + if (!IDEProblem->isZeroValue(D2Fact)) { // Get the fact-ID D2FactId = G.getFactID(D2Fact); std::string D2Label = DToString(D2Fact); @@ -253,7 +254,7 @@ class IDESolverImpl : public IDESolverAPIMixin { EFLabel += to_string(EF) + ", "; } PHASAR_LOG_LEVEL(DEBUG, "EF LABEL: " << EFLabel); - if (D1FactId == D2FactId && !IDEProblem.isZeroValue(D1Fact)) { + if (D1FactId == D2FactId && !IDEProblem->isZeroValue(D1Fact)) { assert(D1FSG && "D1_FSG was nullptr but should be valid."); D1FSG->Nodes.insert(std::make_pair(N2StmtId, D2)); D1FSG->Edges.emplace(D1, D2, true, EFLabel); @@ -324,7 +325,7 @@ class IDESolverImpl : public IDESolverAPIMixin { auto D1Fact = D1ToD2Set.first; PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1Fact)); DOTNode D1; - if (IDEProblem.isZeroValue(D1Fact)) { + if (IDEProblem->isZeroValue(D1Fact)) { D1 = {FNameOfN1, "Λ", N1StmtId, 0, false, true}; } else { // Get the fact-ID @@ -343,7 +344,7 @@ class IDESolverImpl : public IDESolverAPIMixin { for (const auto &D2Fact : D2Set) { PHASAR_LOG_LEVEL(DEBUG, "d2: " << DToString(D2Fact)); DOTNode D2; - if (IDEProblem.isZeroValue(D2Fact)) { + if (IDEProblem->isZeroValue(D2Fact)) { D2 = {FNameOfN2, "Λ", N2StmtId, 0, false, true}; } else { // Get the fact-ID @@ -358,8 +359,8 @@ class IDESolverImpl : public IDESolverAPIMixin { } } - if (IDEProblem.isZeroValue(D1Fact) && - IDEProblem.isZeroValue(D2Fact)) { + if (IDEProblem->isZeroValue(D1Fact) && + IDEProblem->isZeroValue(D2Fact)) { // Do not add lambda recursion edges as inter-procedural edges if (D1.FuncName != D2.FuncName) { G.InterLambdaEdges.emplace(D1, D2, true, "AllBottom", "BOT"); @@ -800,7 +801,7 @@ class IDESolverImpl : public IDESolverAPIMixin { // condition /// TODO: Add a check for "d1 is seed in functionOf(n)" if (SolverConfig.followReturnsPastSeeds() && Inc.empty() /*&& - IDEProblem.isZeroValue(d1)*/) { + IDEProblem->isZeroValue(d1)*/) { const auto &Callers = ICF->getCallersOf(FunctionThatNeedsSummary); for (n_t Caller : Callers) { for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(Caller)) { @@ -835,7 +836,7 @@ class IDESolverImpl : public IDESolverAPIMixin { // the flow function has a side effect such as registering a taint; // instead we thus call the return flow function will a null caller if (Callers.empty()) { - IDEProblem.applyUnbalancedRetFlowFunctionSideEffects( + IDEProblem->applyUnbalancedRetFlowFunctionSideEffects( FunctionThatNeedsSummary, n, d2); } } @@ -908,7 +909,7 @@ class IDESolverImpl : public IDESolverAPIMixin { PHASAR_LOG_LEVEL( DEBUG, "Zero-Value has been added automatically to start point: " << NToString(StartPoint)); - Seeds.addSeed(StartPoint, ZeroValue, IDEProblem.bottomElement()); + Seeds.addSeed(StartPoint, ZeroValue, IDEProblem->bottomElement()); } } PHASAR_LOG_LEVEL(DEBUG, @@ -929,7 +930,7 @@ class IDESolverImpl : public IDESolverAPIMixin { PHASAR_LOG_LEVEL(DEBUG, "Submit seed at: " << NToString(StartPoint)); PHASAR_LOG_LEVEL(DEBUG, "\tFact: " << DToString(Fact)); PHASAR_LOG_LEVEL(DEBUG, "\tValue: " << LToString(Value)); - if (!IDEProblem.isZeroValue(Fact)) { + if (!IDEProblem->isZeroValue(Fact)) { INC_COUNTER("Gen facts", 1, Core); } self().addInitialWorklistItem(Fact, StartPoint, Fact, @@ -1054,7 +1055,7 @@ class IDESolverImpl : public IDESolverAPIMixin { if (NewFunction) { JumpFn->addFunction(SourceVal, NewTarget, TargetVal, fPrime); - IF_LOG_ENABLED(if (!IDEProblem.isZeroValue(TargetVal)) { + IF_LOG_ENABLED(if (!IDEProblem->isZeroValue(TargetVal)) { PHASAR_LOG_LEVEL(DEBUG, "[updateWithNewEdges]: EDGE: getFunctionOf(NewTarget)) << ", D: " << DToString(SourceVal) << '>'); @@ -1221,8 +1222,8 @@ class IDESolverImpl : public IDESolverAPIMixin { self().setVal( n, d, - IDEProblem.join(self().val(n, d), - fPrime.computeTarget(std::move(TargetVal)))); + IDEProblem->join(self().val(n, d), + fPrime.computeTarget(std::move(TargetVal)))); INC_COUNTER("Value Computation", 1, Full); } } @@ -1233,7 +1234,7 @@ class IDESolverImpl : public IDESolverAPIMixin { std::map> AllSeeds = Seeds.getSeeds(); for (n_t UnbalancedRetSite : UnbalancedRetSites) { if (AllSeeds.find(UnbalancedRetSite) == AllSeeds.end()) { - AllSeeds[UnbalancedRetSite][ZeroValue] = IDEProblem.topElement(); + AllSeeds[UnbalancedRetSite][ZeroValue] = IDEProblem->topElement(); } } // do processing @@ -1257,7 +1258,7 @@ class IDESolverImpl : public IDESolverAPIMixin { return ValTab.get(NHashN, NHashD); } // implicitly initialized to top; see line [1] of Fig. 7 in SRH96 paper - return IDEProblem.topElement(); + return IDEProblem->topElement(); } l_t seedVal(n_t NHashN, d_t NHashD) { @@ -1274,7 +1275,7 @@ class IDESolverImpl : public IDESolverAPIMixin { PHASAR_LOG_LEVEL(DEBUG, ' '); }); // TOP is the implicit default value which we do not need to store. - // if (l == IDEProblem.topElement()) { + // if (l == IDEProblem->topElement()) { // do not store top values // ValTab.remove(nHashN, nHashD); // } else { @@ -1291,7 +1292,7 @@ class IDESolverImpl : public IDESolverAPIMixin { } l_t joinValueAt(n_t /*Unit*/, d_t /*Fact*/, l_t Curr, l_t NewVal) { - return IDEProblem.join(std::move(Curr), std::move(NewVal)); + return IDEProblem->join(std::move(Curr), std::move(NewVal)); } /// -- InteractiveIDESolverMixin implementation @@ -1549,7 +1550,7 @@ class IDESolverImpl : public IDESolverAPIMixin { PHASAR_LOG_LEVEL(DEBUG, "d1: " << DToString(D1)); NumInterPathEdges += D2s.size(); for (auto D2 : D2s) { - if (!IDEProblem.isZeroValue(D2)) { + if (!IDEProblem->isZeroValue(D2)) { NumGenFacts++; } // Special case @@ -1558,8 +1559,9 @@ class IDESolverImpl : public IDESolverAPIMixin { std::set SummaryDSet; EndsummaryTab.get(Edge.second, D2) - .foreachCell([&SummaryDSet](const auto &Row, const auto &Col, - const auto &Val) { + .foreachCell([&SummaryDSet](const auto & /*Row*/, + const auto &Col, + const auto & /*Val*/) { SummaryDSet.insert(Col); }); @@ -1666,7 +1668,7 @@ class IDESolverImpl : public IDESolverAPIMixin { IDESolverImpl(IDETabulationProblem &Problem, const i_t *ICF, StrategyT /*Strategy*/ = {}) - : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), + : IDEProblem(&Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), SolverConfig(Problem.getIFDSIDESolverConfig()), CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), JumpFn(std::make_shared>()), @@ -1684,10 +1686,10 @@ class IDESolverImpl : public IDESolverAPIMixin { /// -- Data members - IDETabulationProblem &IDEProblem; + IDETabulationProblem *IDEProblem{}; d_t ZeroValue; const i_t *ICF; - IFDSIDESolverConfig &SolverConfig; + IFDSIDESolverConfig SolverConfig; std::vector> ValuePropWL; @@ -1723,8 +1725,6 @@ class IDESolverImpl : public IDESolverAPIMixin { Table ValTab; std::map, size_t> FSummaryReuse; - - [[no_unique_address]] StrategyT Strategy{}; }; } // namespace psr diff --git a/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h b/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h index ac46b9068..641e38a65 100644 --- a/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h +++ b/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h @@ -75,7 +75,7 @@ template class IntraMonoSolver { initialize(); // step 2: Iteration (updating Worklist and Analysis) while (!Worklist.empty()) { - // llvm::outs() << "worklist size: " << Worklist.size() << "\n"; + // llvm::outs() << "worklist size: " << Worklist.size() << '\n'; std::pair Edge = Worklist.front(); Worklist.pop_front(); n_t Src = Edge.first; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h index 101718a42..a65a702eb 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h @@ -52,6 +52,7 @@ class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, private: [[nodiscard]] FunctionRange getAllFunctionsImpl() const; + [[nodiscard]] size_t getNumFunctionsImpl() const noexcept; [[nodiscard]] f_t getFunctionImpl(llvm::StringRef Fun) const; [[nodiscard]] bool isIndirectFunctionCallImpl(n_t Inst) const; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index f1af71970..afe9e0b9f 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -139,8 +139,11 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { private: [[nodiscard]] FunctionRange getAllFunctionsImpl() const; + [[nodiscard]] size_t getNumFunctionsImpl() const noexcept; [[nodiscard]] f_t getFunctionImpl(llvm::StringRef Fun) const; + [[nodiscard]] size_t getNumNodesImpl() const noexcept; + [[nodiscard]] bool isIndirectFunctionCallImpl(n_t Inst) const; [[nodiscard]] bool isVirtualFunctionCallImpl(n_t Inst) const; [[nodiscard]] std::vector allNonCallStartNodesImpl() const; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Utils/Log.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Utils/Log.h index 29b3835b3..4938f6eaa 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Utils/Log.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Utils/Log.h @@ -9,7 +9,7 @@ #define LOG_INFO(x) \ do { \ - llvm::outs() << "[ENV_TRACE] " << x << "\n"; /*NOLINT*/ \ + llvm::outs() << "[ENV_TRACE] " << x << '\n'; /*NOLINT*/ \ \ llvm::outs().flush(); \ } while (0) diff --git a/include/phasar/Utils/IOManip.h b/include/phasar/Utils/IOManip.h new file mode 100644 index 000000000..4472d93da --- /dev/null +++ b/include/phasar/Utils/IOManip.h @@ -0,0 +1,29 @@ +/****************************************************************************** + * Copyright (c) 2023 Fabian Schiebel + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_UTILS_IOMANIP_H +#define PHASAR_UTILS_IOMANIP_H + +namespace llvm { +class raw_ostream; +} // namespace llvm + +namespace psr { +struct BoolAlpha { + bool Value{}; +}; +struct Flush {}; +static constexpr Flush flush; // NOLINT + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, BoolAlpha BA); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Flush); + +} // namespace psr + +#endif // PHASAR_UTILS_IOMANIP_H diff --git a/lib/LibPhasar.cpp b/lib/LibPhasar.cpp index 97bbc811c..406e11fab 100644 --- a/lib/LibPhasar.cpp +++ b/lib/LibPhasar.cpp @@ -7,4 +7,9 @@ * Fabian Schiebel and others *****************************************************************************/ +// Make sure, clangd always picks the right source file to infer the +// compile-commands for IDESolverImpl.h. Otherwise this leads to strange eror +// squiggles +#include "phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h" + #include "phasar.h" diff --git a/lib/PhasarClang/RandomChangeFrontendAction.cpp b/lib/PhasarClang/RandomChangeFrontendAction.cpp index bbb1b7436..af80b91aa 100644 --- a/lib/PhasarClang/RandomChangeFrontendAction.cpp +++ b/lib/PhasarClang/RandomChangeFrontendAction.cpp @@ -41,14 +41,14 @@ RandomChangeFrontendAction::RandomChangeFrontendAction() = default; void RandomChangeFrontendAction::EndSourceFileAction() { clang::SourceManager &SM = RW.getSourceMgr(); llvm::errs() << "** EndSourceFileAction for: " - << SM.getFileEntryForID(SM.getMainFileID())->getName() << "\n"; + << SM.getFileEntryForID(SM.getMainFileID())->getName() << '\n'; RW.getEditBuffer(SM.getMainFileID()).write(llvm::outs()); } std::unique_ptr RandomChangeFrontendAction::CreateASTConsumer(clang::CompilerInstance &CI, llvm::StringRef File) { - llvm::errs() << "** Creating AST consumer for: " << File << "\n"; + llvm::errs() << "** Creating AST consumer for: " << File << '\n'; RW.setSourceMgr(CI.getSourceManager(), CI.getLangOpts()); return std::make_unique(RW); } diff --git a/lib/PhasarClang/RandomChangeVisitor.cpp b/lib/PhasarClang/RandomChangeVisitor.cpp index 86a5f310e..f688e4ed0 100644 --- a/lib/PhasarClang/RandomChangeVisitor.cpp +++ b/lib/PhasarClang/RandomChangeVisitor.cpp @@ -62,7 +62,7 @@ bool RandomChangeVisitor::visitFunctionDecl(clang::FunctionDecl *F) { // Add comment before std::stringstream SSBefore; SSBefore << "// Begin function " << FuncName << " returning " << TypeStr - << "\n"; + << '\n'; clang::SourceLocation ST = F->getSourceRange().getBegin(); RW.InsertText(ST, SSBefore.str(), true, true); // And after diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp index 0ca199d25..61928fe49 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp @@ -23,6 +23,10 @@ FunctionRange LLVMBasedBackwardICFG::getAllFunctionsImpl() const { return ForwardICFG->getAllFunctions(); } +size_t LLVMBasedBackwardICFG::getNumFunctionsImpl() const noexcept { + return ForwardICFG->getNumFunctions(); +} + auto LLVMBasedBackwardICFG::getFunctionImpl(llvm::StringRef Fun) const -> f_t { return ForwardICFG->getFunction(Fun); } diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp index 87b2279f2..cbf7799de 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp @@ -394,26 +394,28 @@ bool LLVMBasedICFG::isPhasarGenerated(const llvm::Function &F) noexcept { return false; } -[[nodiscard]] FunctionRange LLVMBasedICFG::getAllFunctionsImpl() const { +FunctionRange LLVMBasedICFG::getAllFunctionsImpl() const { return IRDB->getAllFunctions(); } -[[nodiscard]] auto LLVMBasedICFG::getFunctionImpl(llvm::StringRef Fun) const - -> f_t { +size_t LLVMBasedICFG::getNumFunctionsImpl() const noexcept { + return IRDB->getNumFunctions(); +} + +auto LLVMBasedICFG::getFunctionImpl(llvm::StringRef Fun) const -> f_t { return IRDB->getFunction(Fun); } -[[nodiscard]] bool LLVMBasedICFG::isIndirectFunctionCallImpl(n_t Inst) const { +bool LLVMBasedICFG::isIndirectFunctionCallImpl(n_t Inst) const { const auto *CallSite = llvm::dyn_cast(Inst); return CallSite && CallSite->isIndirectCall(); } -[[nodiscard]] bool LLVMBasedICFG::isVirtualFunctionCallImpl(n_t Inst) const { +bool LLVMBasedICFG::isVirtualFunctionCallImpl(n_t Inst) const { return internalIsVirtualFunctionCall(Inst, *TH); } -[[nodiscard]] auto LLVMBasedICFG::allNonCallStartNodesImpl() const - -> std::vector { +auto LLVMBasedICFG::allNonCallStartNodesImpl() const -> std::vector { std::vector NonCallStartNodes; NonCallStartNodes.reserve(2 * IRDB->getNumFunctions()); for (const auto *Inst : IRDB->getAllInstructions()) { @@ -425,7 +427,7 @@ bool LLVMBasedICFG::isPhasarGenerated(const llvm::Function &F) noexcept { return NonCallStartNodes; } -[[nodiscard]] auto LLVMBasedICFG::getCallsFromWithinImpl(f_t Fun) const +auto LLVMBasedICFG::getCallsFromWithinImpl(f_t Fun) const -> llvm::SmallVector { llvm::SmallVector CallSites; for (const auto &I : llvm::instructions(Fun)) { @@ -436,7 +438,7 @@ bool LLVMBasedICFG::isPhasarGenerated(const llvm::Function &F) noexcept { return CallSites; } -[[nodiscard]] auto LLVMBasedICFG::getReturnSitesOfCallAtImpl(n_t Inst) const +auto LLVMBasedICFG::getReturnSitesOfCallAtImpl(n_t Inst) const -> llvm::SmallVector { /// Currently, we don't distinguish normal-dest and unwind-dest, so we can /// just use getSuccsOf @@ -451,7 +453,7 @@ void LLVMBasedICFG::printImpl(llvm::raw_ostream &OS) const { [](n_t CS) { return llvmIRToStableString(CS); }); } -[[nodiscard]] nlohmann::json LLVMBasedICFG::getAsJsonImpl() const { +nlohmann::json LLVMBasedICFG::getAsJsonImpl() const { return CG.getAsJson( [](f_t F) { return F->getName().str(); }, [this](n_t Inst) { return IRDB->getInstructionId(Inst); }); diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp index 2e841321b..2652bc81a 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp @@ -47,7 +47,7 @@ auto CHAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) // Leading to SEGFAULT in Unittests. Error only when // run in Debug mode // << llvmIRToString(CallSite) - << "\n"); + << '\n'); return {}; } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/DTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/DTAResolver.cpp index 0bfec443e..8830c4125 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/DTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/DTAResolver.cpp @@ -168,7 +168,7 @@ auto DTAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) PHASAR_LOG_LEVEL(DEBUG, "Error with resolveVirtualCall : impossible to retrieve " "the vtable index\n" - << llvmIRToString(CallSite) << "\n"); + << llvmIRToString(CallSite) << '\n'); return {}; } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp index b1e99e85d..24f61aafd 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp @@ -86,7 +86,7 @@ auto OTFResolver::resolveVirtualCall(const llvm::CallBase *CallSite) PHASAR_LOG_LEVEL(DEBUG, "Error with resolveVirtualCall : impossible to retrieve " "the vtable index\n" - << llvmIRToString(CallSite) << "\n"); + << llvmIRToString(CallSite) << '\n'); return {}; } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp index 67d163df6..92e42ac8d 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp @@ -68,7 +68,7 @@ auto RTAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) PHASAR_LOG_LEVEL(DEBUG, "Error with resolveVirtualCall : impossible to retrieve " "the vtable index\n" - << llvmIRToString(CallSite) << "\n"); + << llvmIRToString(CallSite) << '\n'); return {}; } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LcovRetValWriter.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LcovRetValWriter.cpp index 036bdc37f..d2354064f 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LcovRetValWriter.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LcovRetValWriter.cpp @@ -56,13 +56,13 @@ void LcovRetValWriter::write() const { const auto File = FileEntry.first; const auto FunctionStats = FileEntry.second; - Writer << "SF:" << File << "\n"; + Writer << "SF:" << File << '\n'; for (const auto &FunctionEntry : FunctionStats) { const auto Function = FunctionEntry.first; Writer << "FNDA:" - << "1," << Function << "\n"; + << "1," << Function << '\n'; } for (const auto &FunctionEntry : FunctionStats) { @@ -70,13 +70,11 @@ void LcovRetValWriter::write() const { for (const auto &LineNumberEntry : LineNumberStats) { - Writer << "DA:" << LineNumberEntry.getLineNumber() << ",1" - << "\n"; + Writer << "DA:" << LineNumberEntry.getLineNumber() << ",1" << '\n'; } } - Writer << "end_of_record" - << "\n"; + Writer << "end_of_record" << '\n'; } } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LcovWriter.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LcovWriter.cpp index 9730fff69..f577efaa7 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LcovWriter.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LcovWriter.cpp @@ -15,13 +15,13 @@ void LcovWriter::write() const { const auto File = FileEntry.first; const auto FunctionStats = FileEntry.second; - Writer << "SF:" << File << "\n"; + Writer << "SF:" << File << '\n'; for (const auto &FunctionEntry : FunctionStats) { const auto Function = FunctionEntry.first; Writer << "FNDA:" - << "1," << Function << "\n"; + << "1," << Function << '\n'; } for (const auto &FunctionEntry : FunctionStats) { @@ -29,13 +29,11 @@ void LcovWriter::write() const { for (const auto &LineNumberEntry : LineNumberStats) { - Writer << "DA:" << LineNumberEntry.getLineNumber() << ",1" - << "\n"; + Writer << "DA:" << LineNumberEntry.getLineNumber() << ",1" << '\n'; } } - Writer << "end_of_record" - << "\n"; + Writer << "end_of_record" << '\n'; } } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LineNumberWriter.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LineNumberWriter.cpp index b4ac741cb..f4b99c9c6 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LineNumberWriter.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Stats/LineNumberWriter.cpp @@ -19,7 +19,7 @@ void LineNumberWriter::write() const { for (const auto &LineNumberEntry : LineNumberStats) { - Writer << LineNumberEntry.getLineNumber() << "\n"; + Writer << LineNumberEntry.getLineNumber() << '\n'; } } } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Utils/DataFlowUtils.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Utils/DataFlowUtils.cpp index 24959d9f8..2262176ac 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Utils/DataFlowUtils.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSFieldSensTaintAnalysis/Utils/DataFlowUtils.cpp @@ -1029,7 +1029,7 @@ dumpMemoryLocation(const std::vector &MemLocationSeq) { for (const auto MemLocationPart : MemLocationSeq) { llvm::outs() << "[ENV_TRACE] "; MemLocationPart->print(llvm::outs()); - llvm::outs() << "\n"; + llvm::outs() << '\n'; llvm::outs().flush(); } #endif diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSIDESolverConfig.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSIDESolverConfig.cpp index 7e6d854ad..6fe2c57b2 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSIDESolverConfig.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/IFDSIDESolverConfig.cpp @@ -9,12 +9,11 @@ #include "phasar/DataFlow/IfdsIde/IFDSIDESolverConfig.h" -#include +#include "phasar/Utils/IOManip.h" -using namespace std; -using namespace psr; +#include "llvm/Support/raw_ostream.h" -namespace psr { +using namespace psr; IFDSIDESolverConfig::IFDSIDESolverConfig(SolverConfigOptions Options) noexcept : Options(Options) {} @@ -59,16 +58,15 @@ void IFDSIDESolverConfig::setComputePersistedSummaries(bool Set) { void IFDSIDESolverConfig::setConfig(SolverConfigOptions Opt) { Options = Opt; } -ostream &operator<<(ostream &OS, const IFDSIDESolverConfig &SC) { +llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, + const IFDSIDESolverConfig &SC) { return OS << "IFDSIDESolverConfig:\n" - << "\tfollowReturnsPastSeeds: " << SC.followReturnsPastSeeds() - << "\n" - << "\tautoAddZero: " << std::boolalpha << SC.autoAddZero() << "\n" - << "\tcomputeValues: " << SC.computeValues() << "\n" - << "\trecordEdges: " << SC.recordEdges() << "\n" - << "\tcomputePersistedSummaries: " << SC.computePersistedSummaries() - << "\n" - << "\temitESG: " << SC.emitESG(); + << "\tfollowReturnsPastSeeds: " + << BoolAlpha{SC.followReturnsPastSeeds()} << '\n' + << "\tautoAddZero: " << BoolAlpha{SC.autoAddZero()} << '\n' + << "\tcomputeValues: " << BoolAlpha{SC.computeValues()} << '\n' + << "\trecordEdges: " << BoolAlpha{SC.recordEdges()} << '\n' + << "\tcomputePersistedSummaries: " + << BoolAlpha{SC.computePersistedSummaries()} << '\n' + << "\temitESG: " << BoolAlpha{SC.emitESG()}; } - -} // namespace psr diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.cpp index 02c08a309..d757e81dd 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.cpp @@ -753,7 +753,7 @@ void IDEExtendedTaintAnalysis::emitTextReport( for (auto &[Inst, LeakSet] : Leaks) { OS << "At " << NToString(Inst) << '\n'; for (const auto &Leak : LeakSet) { - OS << "\t" << llvmIRToShortString(Leak) << "\n"; + OS << "\t" << llvmIRToShortString(Leak) << '\n'; } } OS << '\n'; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEGeneralizedLCA/IDEGeneralizedLCA.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEGeneralizedLCA/IDEGeneralizedLCA.cpp index b49ecb872..329215181 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEGeneralizedLCA/IDEGeneralizedLCA.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEGeneralizedLCA/IDEGeneralizedLCA.cpp @@ -294,12 +294,12 @@ EdgeFunction IDEGeneralizedLCA::getNormalEdgeFunction( IDEGeneralizedLCA::d_t PointerOperand = Store->getPointerOperand(); IDEGeneralizedLCA::d_t ValueOperand = Store->getValueOperand(); /*if (auto cnstFP = llvm::dyn_cast(valueOperand)) { - llvm::errs() << "Value Operand: " << *cnstFP << "\n"; + llvm::errs() << "Value Operand: " << *cnstFP << '\n'; llvm::errs() << "ValueOperand as APF: "; cnstFP->getValueAPF().print(llvm::errs()); - llvm::errs() << "\n"; + llvm::errs() << '\n'; llvm::errs() << "Value operand as double: " - << cnstFP->getValueAPF().convertToDouble() << "\n"; + << cnstFP->getValueAPF().convertToDouble() << '\n'; }*/ if (PointerOperand == SuccNode) { // Case I: Storing a constant value. @@ -488,7 +488,7 @@ EdgeFunction IDEGeneralizedLCA::allTopFunction() { if (!llvm::isa(res.first)) { os << "\nValue: " << VtoString(res.second) << "\nIR : " << DtoString(res.first) << '\n' - << llvmValueToSrc(res.first, false) << "\n"; + << llvmValueToSrc(res.first, false) << '\n'; } } } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSFieldSensTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSFieldSensTaintAnalysis.cpp index 8b1eb7341..f5664cdfd 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSFieldSensTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSFieldSensTaintAnalysis.cpp @@ -265,20 +265,20 @@ void IFDSFieldSensTaintAnalysis::emitTextReport( std::string psr::DToString(const ExtendedValue &EV) { std::string Ret; llvm::raw_string_ostream OS(Ret); - OS << llvmIRToString(EV.getValue()) << "\n"; + OS << llvmIRToString(EV.getValue()) << '\n'; for (const auto *MemLocationPart : EV.getMemLocationSeq()) { - OS << "A:\t" << llvmIRToString(MemLocationPart) << "\n"; + OS << "A:\t" << llvmIRToString(MemLocationPart) << '\n'; } if (!EV.getEndOfTaintedBlockLabel().empty()) { - OS << "L:\t" << EV.getEndOfTaintedBlockLabel() << "\n"; + OS << "L:\t" << EV.getEndOfTaintedBlockLabel() << '\n'; } if (EV.isVarArg()) { - OS << "VT:\t" << EV.isVarArgTemplate() << "\n"; + OS << "VT:\t" << EV.isVarArgTemplate() << '\n'; for (const auto *VAListMemLocationPart : EV.getVaListMemLocationSeq()) { - OS << "VLA:\t" << llvmIRToString(VAListMemLocationPart) << "\n"; + OS << "VLA:\t" << llvmIRToString(VAListMemLocationPart) << '\n'; } - OS << "VI:\t" << EV.getVarArgIndex() << "\n"; - OS << "CI:\t" << EV.getCurrentVarArgIndex() << "\n"; + OS << "VI:\t" << EV.getVarArgIndex() << '\n'; + OS << "CI:\t" << EV.getCurrentVarArgIndex() << '\n'; } return Ret; diff --git a/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp b/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp index 0b46a0c6f..111404867 100644 --- a/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp +++ b/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp @@ -238,21 +238,21 @@ nlohmann::json GeneralStatistics::getAsJson() const { llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GeneralStatistics &Statistics) { - return OS << "General LLVM IR Statistics" - << "\n" + return OS << "General LLVM IR Statistics\n" << "Module " << Statistics.ModuleName << ":\n" - << "LLVM IR instructions:\t" << Statistics.Instructions << "\n" - << "Functions:\t" << Statistics.Functions << "\n" - << "Global Variables:\t" << Statistics.Globals << "\n" - << "Global Variable Consts:\t" << Statistics.GlobalConsts << "\n" - << "Global Pointers:\t" << Statistics.GlobalPointers << "\n" + << "LLVM IR instructions:\t" << Statistics.Instructions << '\n' + << "Functions:\t" << Statistics.Functions << '\n' + << "Globals:\t" << Statistics.Globals << '\n' + << "Global Consts:\t" << Statistics.GlobalConsts << '\n' + << "Global Variables:\t" + << Statistics.Globals - Statistics.GlobalConsts << '\n' << "Alloca Instructions:\t" << Statistics.AllocaInstructions.size() - << "\n" - << "Call Sites:\t" << Statistics.CallSites << "\n" - << "Branches:\t" << Statistics.Branches << "\n" - << "GetElementPtrs:\t" << Statistics.GetElementPtrs << "\n" - << "Phi Nodes:\t" << Statistics.PhiNodes << "\n" - << "Basic Blocks:\t" << Statistics.BasicBlocks << "\n"; + << '\n' + << "Call Sites:\t" << Statistics.CallSites << '\n' + << "Branches:\t" << Statistics.Branches << '\n' + << "GetElementPtrs:\t" << Statistics.GetElementPtrs << '\n' + << "Phi Nodes:\t" << Statistics.PhiNodes << '\n' + << "Basic Blocks:\t" << Statistics.BasicBlocks << '\n'; } } // namespace psr diff --git a/lib/PhasarLLVM/Pointer/LLVMAliasSet.cpp b/lib/PhasarLLVM/Pointer/LLVMAliasSet.cpp index 07902f06c..c5e0be266 100644 --- a/lib/PhasarLLVM/Pointer/LLVMAliasSet.cpp +++ b/lib/PhasarLLVM/Pointer/LLVMAliasSet.cpp @@ -785,7 +785,7 @@ void LLVMAliasSet::drawAliasSetsDistribution(int Peak) const { llvm::outs() << llvm::formatv("{0,10} |{1,-50} {2,-10}\n", KV.first, PeakBar, KV.second); } - llvm::outs() << "\n"; + llvm::outs() << '\n'; if (Peak) { for (const auto &ValueSetPair : AliasSets) { diff --git a/lib/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.cpp b/lib/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.cpp index 319dbc4b7..bfdcd308d 100644 --- a/lib/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.cpp +++ b/lib/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.cpp @@ -59,7 +59,7 @@ static void printResults(llvm::AliasResult AR, bool P, const llvm::Value *V1, if (O2 < O1) { std::swap(O1, O2); } - llvm::errs() << " " << AR << ":\t" << O1 << ", " << O2 << "\n"; + llvm::errs() << " " << AR << ":\t" << O1 << ", " << O2 << '\n'; } } diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index 06958f7ed..42e65f37b 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -49,7 +49,7 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) { llvm::errs() << "The function name '" << Name << "' is ambiguous. Possible candidates are:\n"; for (const auto *F : FnDefs) { - llvm::errs() << "> " << F->getName() << "\n"; + llvm::errs() << "> " << F->getName() << '\n'; } llvm::errs() << "Please further specify the function's name, such that it " "becomes unambiguous\n"; @@ -66,7 +66,7 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, auto FnDefs = findAllFunctionDefs(IRDB, Name); if (FnDefs.empty()) { - llvm::errs() << "WARNING: Cannot retrieve function " << Name << "\n"; + llvm::errs() << "WARNING: Cannot retrieve function " << Name << '\n'; continue; } @@ -81,7 +81,7 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, llvm::errs() << "ERROR: The source-function parameter index is out of " "bounds: " - << Idx << "\n"; + << Idx << '\n'; // Use 'continue' instead of 'break' to get error messages for the // remaining parameters as well continue; @@ -96,7 +96,7 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, llvm::errs() << "ERROR: The source-function parameter index is out of " "bounds: " - << Idx << "\n"; + << Idx << '\n'; continue; } addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink); @@ -116,7 +116,7 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB, llvm::errs() << "ERROR: The source-function parameter index is out of " "bounds: " - << Idx << "\n"; + << Idx << '\n'; continue; } addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer); diff --git a/lib/Utils/IOManip.cpp b/lib/Utils/IOManip.cpp new file mode 100644 index 000000000..3e312fa52 --- /dev/null +++ b/lib/Utils/IOManip.cpp @@ -0,0 +1,12 @@ +#include "phasar/Utils/IOManip.h" + +#include "llvm/Support/raw_ostream.h" + +llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, BoolAlpha BA) { + return OS << (BA.Value ? "true" : "false"); +} + +llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, Flush) { + OS.flush(); + return OS; +} diff --git a/lib/Utils/PAMM.cpp b/lib/Utils/PAMM.cpp index b1ea931ac..15dba92cc 100644 --- a/lib/Utils/PAMM.cpp +++ b/lib/Utils/PAMM.cpp @@ -255,7 +255,7 @@ void PAMM::printTimers(llvm::raw_ostream &OS) { if (StoppedTimer.empty()) { OS << "No single Timer started!\n\n"; } else { - OS << "\n"; + OS << '\n'; } OS << "Repeating Timer\n"; OS << "---------------\n"; @@ -290,7 +290,7 @@ void PAMM::printCounters(llvm::raw_ostream &OS) { if (Counter.empty()) { OS << "No Counter registered!\n"; } else { - OS << "\n"; + OS << '\n'; } } diff --git a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGExportTest.cpp b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGExportTest.cpp index 9b4b18046..a69446276 100644 --- a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGExportTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGExportTest.cpp @@ -110,7 +110,7 @@ class LLVMBasedICFGExportTest : public ::testing::Test { = [WithDebugOutput](auto &&...Args) { if (WithDebugOutput) { ((llvm::errs() << Args), ...); - llvm::errs() << "\n"; + llvm::errs() << '\n'; } }; @@ -193,7 +193,7 @@ class LLVMBasedICFGExportTest : public ::testing::Test { } } AW{}; IRDB.getModule()->print(llvm::errs(), &AW); - // llvm::errs() << "ModuleRef: " << *IRDB.getWPAModule() << "\n"; + // llvm::errs() << "ModuleRef: " << *IRDB.getWPAModule() << '\n'; llvm::errs() << ICFG.exportICFGAsJson(/*WithSourceCodeInfo*/ false).dump(4) << '\n'; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp index 8f942e762..e169a7434 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp @@ -67,7 +67,7 @@ class IDETaintAnalysisTest : public ::testing::Test { auto Ret = LLVMTaintConfig(HA.getProjectIRDB(), *JS); if (DumpResults) { - llvm::errs() << Ret << "\n"; + llvm::errs() << Ret << '\n'; } return Ret; }, diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSConstAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSConstAnalysisTest.cpp index 77f858c30..dfbd2687d 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSConstAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSConstAnalysisTest.cpp @@ -75,7 +75,7 @@ class IFDSConstAnalysisTest : public ::testing::Test { } std::set MutableIDs; for (const auto *Memloc : AllMutableAllocas) { - std::cerr << "> Is Mutable: " << llvmIRToShortString(Memloc) << "\n"; + std::cerr << "> Is Mutable: " << llvmIRToShortString(Memloc) << '\n'; MutableIDs.insert(std::stoul(getMetaDataID(Memloc))); } EXPECT_EQ(GroundTruth, MutableIDs); From a8e1998705ee8565f8a6c0be3263f5faedf5b306 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 Dec 2023 19:14:54 +0100 Subject: [PATCH 10/11] Fix invalid reverse lookup with PropagateOntoStrategy --- .../DataFlow/IfdsIde/Solver/EagerIDESolver.h | 24 +++++++++++++++++++ .../IfdsIde/Solver/detail/IDESolverImpl.h | 22 ++++++++++++++--- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h index 65aeb942a..e8ff77699 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h @@ -126,6 +126,30 @@ class IDESolver } } + const llvm::SmallVectorImpl>> * + incomingJumpFunctionsAtCall( + n_t CallSite, d_t TargetVal, + llvm::SmallVectorImpl>> &Storage) { + const auto &Preds = this->ICF->getPredsOf(CallSite); + + if (Preds.size() == 1) { + auto Opt = this->JumpFn->reverseLookup(*Preds.begin(), TargetVal); + if (Opt) { + return &Opt->get(); + } + return nullptr; + } + + for (const auto &Pred : Preds) { + auto Opt = this->JumpFn->reverseLookup(*Preds.begin(), TargetVal); + if (Opt) { + Storage.append(Opt->get()); + } + } + + return Storage.empty() ? nullptr : &Storage; + } + void addInitialWorklistItem(d_t SourceVal, n_t Target, d_t TargetVal, EdgeFunction EF) { addWorklistItem(std::move(SourceVal), std::move(Target), diff --git a/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h index 551ee7238..594a14fb8 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h @@ -726,6 +726,9 @@ class IDESolverImpl : public IDESolverAPIMixin { } self().printEndSummaryTab(); self().printIncomingTab(); + + llvm::SmallVector>> JFStorage; + // for each incoming call edge already processed //(see processCall(..)) for (const auto &Entry : Inc) { @@ -776,10 +779,12 @@ class IDESolverImpl : public IDESolverAPIMixin { PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); // for each jump function coming into the call, propagate to // return site using the composed function - auto RevLookupResult = JumpFn->reverseLookup(c, d4); + JFStorage.clear(); + const auto *RevLookupResult = + self().incomingJumpFunctionsAtCall(c, d4, JFStorage); if (RevLookupResult) { - for (size_t I = 0; I < RevLookupResult->get().size(); ++I) { - auto ValAndFunc = RevLookupResult->get()[I]; + for (size_t I = 0; I < RevLookupResult->size(); ++I) { + auto ValAndFunc = (*RevLookupResult)[I]; EdgeFunction f3 = ValAndFunc.second; if (f3 != AllTop) { d_t d3 = ValAndFunc.first; @@ -845,6 +850,17 @@ class IDESolverImpl : public IDESolverAPIMixin { } } + const llvm::SmallVectorImpl>> * + incomingJumpFunctionsAtCall( + n_t CallSite, d_t TargetVal, + llvm::SmallVectorImpl>> & /*Storage*/) { + auto Opt = JumpFn->reverseLookup(CallSite, TargetVal); + if (Opt) { + return &Opt->get(); + } + return nullptr; + } + void propagteUnbalancedReturnFlow(n_t RetSiteC, d_t TargetVal, EdgeFunction EdgeFunc, n_t /*RelatedCallSite*/) { From 8f7f948e820121a5f7226206e5e4c6d17b124114 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 Dec 2023 20:29:32 +0100 Subject: [PATCH 11/11] Better testing for EagerIDESolver --- .../DataFlow/IfdsIde/Solver/EagerIDESolver.h | 7 ++++ .../linear_constant/call_13.cpp | 13 ++++++ .../DataFlow/IfdsIde/EagerIDESolverTest.cpp | 41 ++++++++++++++++++- 3 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 test/llvm_test_code/linear_constant/call_13.cpp diff --git a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h index 2fa8c9990..29d0158c2 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/EagerIDESolver.h @@ -10,6 +10,7 @@ #ifndef PHASAR_DATAFLOW_IFDSIDE_SOLVER_EAGERIDESOLVER_H #define PHASAR_DATAFLOW_IFDSIDE_SOLVER_EAGERIDESOLVER_H +#include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" #include "phasar/DataFlow/IfdsIde/Solver/detail/IDESolverImpl.h" namespace psr { @@ -140,6 +141,12 @@ class IDESolver return nullptr; } + if (Preds.empty()) { + // We are at the start of the current function + Storage.emplace_back(std::move(TargetVal), EdgeIdentity{}); + return &Storage; + } + for (const auto &Pred : Preds) { auto Opt = this->JumpFn->reverseLookup(Pred, TargetVal); if (Opt) { diff --git a/test/llvm_test_code/linear_constant/call_13.cpp b/test/llvm_test_code/linear_constant/call_13.cpp new file mode 100644 index 000000000..ba35a2be3 --- /dev/null +++ b/test/llvm_test_code/linear_constant/call_13.cpp @@ -0,0 +1,13 @@ + +extern "C" int puts(int); + +void use(int &p) { puts(p); } + +int main() { + int x = 42; + use(x); + int y = 43; + y = x; + y++; + return y; +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp index 7e246f121..b4e4f588f 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/EagerIDESolverTest.cpp @@ -11,6 +11,8 @@ #include "phasar/Utils/Printer.h" #include "phasar/Utils/TypeTraits.h" +#include "llvm/Support/raw_ostream.h" + #include "TestConfig.h" #include "gtest/gtest.h" @@ -30,6 +32,26 @@ class LinearConstant : public ::testing::TestWithParam { }; // Test Fixture +template +static std::string computeDiff(const ResultsMapTy &DefaultResults, + const ResultsMapTy &EagerResults) { + std::string Ret; + llvm::raw_string_ostream OS(Ret); + + for (const auto &[Fact, Val] : DefaultResults) { + if (!EagerResults.count(Fact)) { + OS << " + " << DToString(Fact) << '\n'; + } + } + for (const auto &[Fact, Val] : EagerResults) { + if (!DefaultResults.count(Fact)) { + OS << " - " << DToString(Fact) << '\n'; + } + } + + return Ret; +} + TEST_P(LinearConstant, ResultsEquivalentPropagateOnto) { HelperAnalyses HA(PathToLlFiles + GetParam(), EntryPoints); @@ -54,7 +76,7 @@ TEST_P(LinearConstant, ResultsEquivalentPropagateOnto) { bool Failed = false; for (const auto *Stmt : HA.getProjectIRDB().getAllInstructions()) { - if (Stmt->isTerminator()) { + if (Stmt->isTerminator() || Stmt->isDebugOrPseudoInst()) { continue; } @@ -66,13 +88,26 @@ TEST_P(LinearConstant, ResultsEquivalentPropagateOnto) { EXPECT_EQ(PropagateOverRes, Value) << "The Incoming results of the eager IDE solver should match the " "outgoing results of the default solver. Expected: (" - << NToString(NextStmt) << ", " << DToString(Fact) << ") --> " + << NToString(Stmt) << ", " << DToString(Fact) << ") --> " << LToString(PropagateOverRes) << "; got " << LToString(Value); Failed |= PropagateOverRes != Value; } + + auto DefaultSize = PropagateOverResults.resultsAt(NextStmt).size(); + auto EagerSize = PropagateOntoResults.resultsAt(Stmt).size(); + + EXPECT_EQ(DefaultSize, EagerSize) + << "The Number of facts holding at the incoming results of the eager " + "IDE solver do not match the number of outgoing facts of the " + "default solver. At: " + << NToString(Stmt) << " Diff:\n" + << computeDiff(PropagateOverResults.resultsAt(NextStmt), + PropagateOntoResults.resultsAt(Stmt)); + Failed |= DefaultSize != EagerSize; } if (Failed) { PropagateOntoResults.dumpResults(ICFG); + llvm::outs().flush(); } } } @@ -117,6 +152,8 @@ static constexpr std::string_view LCATestFiles[] = { "call_09_cpp_dbg.ll", "call_10_cpp_dbg.ll", "call_11_cpp_dbg.ll", + "call_12_cpp_dbg.ll", + "call_13_cpp_dbg.ll", "recursion_01_cpp_dbg.ll", "recursion_02_cpp_dbg.ll",