From c78dbe0e50e1bbc1e24a76351ad7190b159f6861 Mon Sep 17 00:00:00 2001 From: Martin Mory Date: Wed, 6 Sep 2023 09:35:29 +0200 Subject: [PATCH 1/5] Fix handling of unbalanced returns for IIA (#664) * fix handling of unbalanced returns for IIA, for the change in the IDE solver I am not sure yet how this should be done properly. * Update include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Fix error due to update from dev * Separate APi for side-effets of ret-FF in case of unbalanced-return end * pre-commit --------- Co-authored-by: Martin Mory Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Fabian Schiebel <52407375+fabianbs96@users.noreply.github.com> Co-authored-by: Fabian Schiebel --- .../phasar/DataFlow/IfdsIde/FlowFunctions.h | 13 ++++++++ .../IfdsIde/Solver/FlowEdgeFunctionCache.h | 33 +++++++++++++++++++ .../DataFlow/IfdsIde/Solver/IDESolver.h | 12 +++---- .../Problems/IDEInstInteractionAnalysis.h | 4 ++- include/phasar/Utils/Utilities.h | 23 +++++++++++++ 5 files changed, 77 insertions(+), 8 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index 87a3db452..7ff1769d7 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -794,6 +794,19 @@ class FlowFunctions virtual FlowFunctionPtrType getRetFlowFunction(n_t CallSite, f_t CalleeFun, n_t ExitInst, n_t RetSite) = 0; + // Performs any side-effects of a return-flow-function + // + // In case of unbalanced returns (if the option `followReturnsPastSeeds` is + // activated in the IfdsIdeSolverConfig), we will eventually reach a function + // that is not called from other functions. Still, we may want to apply a + // return-flow-function -- just for its side-effects, such as registering a + // taint + virtual void applyUnbalancedRetFlowFunctionSideEffects(f_t CalleeFun, + n_t ExitInst, + d_t Source) { + // By default, do nothing + } + // // Describes the data-flows alongsite a CallSite. // diff --git a/include/phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h b/include/phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h index 484606935..b9d89a88e 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/FlowEdgeFunctionCache.h @@ -15,6 +15,7 @@ #include "phasar/Utils/EquivalenceClassMap.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/PAMMMacros.h" +#include "phasar/Utils/Utilities.h" #include "llvm/ADT/DenseMap.h" @@ -185,6 +186,8 @@ class FlowEdgeFunctionCache { operator=(FlowEdgeFunctionCache &&FEFC) noexcept = default; FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) { + assertNotNull(Curr); + assertNotNull(Succ); PAMM_GET_INSTANCE; IF_LOG_ENABLED( PHASAR_LOG_LEVEL(DEBUG, "Normal flow function factory call"); @@ -217,6 +220,8 @@ class FlowEdgeFunctionCache { } FlowFunctionPtrType getCallFlowFunction(n_t CallSite, f_t DestFun) { + assertNotNull(CallSite); + assertNotNull(DestFun); PAMM_GET_INSTANCE; IF_LOG_ENABLED( PHASAR_LOG_LEVEL(DEBUG, "Call flow function factory call"); @@ -241,6 +246,10 @@ class FlowEdgeFunctionCache { FlowFunctionPtrType getRetFlowFunction(n_t CallSite, f_t CalleeFun, n_t ExitInst, n_t RetSite) { + assertNotNull(CallSite); + assertNotNull(CalleeFun); + assertNotNull(ExitInst); + assertNotNull(RetSite); PAMM_GET_INSTANCE; IF_LOG_ENABLED( PHASAR_LOG_LEVEL(DEBUG, "Return flow function factory call"); @@ -270,6 +279,9 @@ class FlowEdgeFunctionCache { FlowFunctionPtrType getCallToRetFlowFunction(n_t CallSite, n_t RetSite, llvm::ArrayRef Callees) { + assertNotNull(CallSite); + assertNotNull(RetSite); + assertAllNotNull(Callees); PAMM_GET_INSTANCE; IF_LOG_ENABLED( PHASAR_LOG_LEVEL(DEBUG, "Call-to-Return flow function factory call"); @@ -300,6 +312,8 @@ class FlowEdgeFunctionCache { } FlowFunctionPtrType getSummaryFlowFunction(n_t CallSite, f_t DestFun) { + assertNotNull(CallSite); + assertNotNull(DestFun); // PAMM_GET_INSTANCE; // INC_COUNTER("Summary-FF Construction", 1, Full); IF_LOG_ENABLED( @@ -313,6 +327,9 @@ class FlowEdgeFunctionCache { EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) { + assertNotNull(Curr); + assertNotNull(Succ); + PAMM_GET_INSTANCE; IF_LOG_ENABLED( PHASAR_LOG_LEVEL(DEBUG, "Normal edge function factory call"); @@ -357,6 +374,10 @@ class FlowEdgeFunctionCache { EdgeFunction getCallEdgeFunction(n_t CallSite, d_t SrcNode, f_t DestinationFunction, d_t DestNode) { + + assertNotNull(CallSite); + assertNotNull(DestinationFunction); + PAMM_GET_INSTANCE; IF_LOG_ENABLED( PHASAR_LOG_LEVEL(DEBUG, "Call edge function factory call"); @@ -387,6 +408,11 @@ class FlowEdgeFunctionCache { EdgeFunction getReturnEdgeFunction(n_t CallSite, f_t CalleeFunction, n_t ExitInst, d_t ExitNode, n_t RetSite, d_t RetNode) { + assertNotNull(CallSite); + assertNotNull(CalleeFunction); + assertNotNull(ExitInst); + assertNotNull(RetSite); + PAMM_GET_INSTANCE; IF_LOG_ENABLED( PHASAR_LOG_LEVEL(DEBUG, "Return edge function factory call"); @@ -419,6 +445,10 @@ class FlowEdgeFunctionCache { EdgeFunction getCallToRetEdgeFunction(n_t CallSite, d_t CallNode, n_t RetSite, d_t RetSiteNode, llvm::ArrayRef Callees) { + assertNotNull(CallSite); + assertNotNull(RetSite); + assertAllNotNull(Callees); + PAMM_GET_INSTANCE; IF_LOG_ENABLED( PHASAR_LOG_LEVEL(DEBUG, "Call-to-Return edge function factory call"); @@ -472,6 +502,9 @@ class FlowEdgeFunctionCache { EdgeFunction getSummaryEdgeFunction(n_t CallSite, d_t CallNode, n_t RetSite, d_t RetSiteNode) { + assertNotNull(CallSite); + assertNotNull(RetSite); + PAMM_GET_INSTANCE; IF_LOG_ENABLED( PHASAR_LOG_LEVEL(DEBUG, "Summary edge function factory call"); diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index 669bd108d..ac730de2a 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -886,8 +886,9 @@ class IDESolver // conditionally generated values should only // be propagated into callers that have an incoming edge for this // condition - if (SolverConfig.followReturnsPastSeeds() && Inc.empty() && - IDEProblem.isZeroValue(d1)) { + /// TODO: Add a check for "d1 is seed in functionOf(n)" + if (SolverConfig.followReturnsPastSeeds() && Inc.empty() /*&& + IDEProblem.isZeroValue(d1)*/) { const auto &Callers = ICF->getCallersOf(FunctionThatNeedsSummary); for (n_t Caller : Callers) { for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(Caller)) { @@ -922,11 +923,8 @@ class IDESolver // the flow function has a side effect such as registering a taint; // instead we thus call the return flow function will a null caller if (Callers.empty()) { - FlowFunctionPtrType RetFunction = - CachedFlowEdgeFunctions.getRetFlowFunction( - nullptr, FunctionThatNeedsSummary, n, nullptr); - INC_COUNTER("FF Queries", 1, Full); - RetFunction->computeTargets(d2); + IDEProblem.applyUnbalancedRetFlowFunctionSideEffects( + FunctionThatNeedsSummary, n, d2); } } } diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h index 8f9a71af7..9e9972ac4 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h @@ -522,7 +522,9 @@ class IDEInstInteractionAnalysisT n_t /* RetSite */) override { // Map return value back to the caller. If pointer parameters hold at the // end of a callee function generate all of those in the caller context. - + if (CallSite == nullptr) { + return this->killAllFlows(); + } auto MapFactsToCallerFF = mapFactsToCaller(llvm::cast(CallSite), ExitInst, {}, [](const llvm::Value *RetVal, d_t Src) { diff --git a/include/phasar/Utils/Utilities.h b/include/phasar/Utils/Utilities.h index 12154af21..762735bfe 100644 --- a/include/phasar/Utils/Utilities.h +++ b/include/phasar/Utils/Utilities.h @@ -14,9 +14,12 @@ #include "phasar/Utils/TypeTraits.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -266,6 +269,26 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, return OS; } +template +LLVM_ATTRIBUTE_ALWAYS_INLINE void assertNotNull(const T &Value) {} + +template +LLVM_ATTRIBUTE_ALWAYS_INLINE void assertNotNull(const std::optional &Value) { + assert(Value.has_value()); +} + +template +LLVM_ATTRIBUTE_ALWAYS_INLINE void assertNotNull(const T *Value) { + assert(Value != nullptr); +} + +template void assertAllNotNull(const T &Range) { + assertNotNull(Range); + for (const auto &Elem : Range) { + assertNotNull(Elem); + } +} + } // namespace psr #endif From 76fe0804c67d567e0a9b874373928e8bb4f98511 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 10 Sep 2023 13:38:21 +0200 Subject: [PATCH 2/5] Adds helper function to determine if a function was generated by phasar (#665) * Adds helper function to determine if a function was generated by phasar During global analysis, phasar generates helper function to correctly handle global ctors/dtors and other global code fragments. With the new checking function, users can determine whether a given function was generated by phasar or not, e.g., for global analysis. * get rid of magic string literals that name the various generated functions of phasar's global ctor/dtor modelling * minor style --------- Co-authored-by: Martin Mory Co-authored-by: Martin Mory Co-authored-by: Fabian Schiebel --- .../phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 12 ++++++++++++ lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp | 14 ++++++++++++++ .../ControlFlow/LLVMBasedICFGGlobalsImpl.cpp | 11 ++++++----- .../ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp | 13 ++++++++----- .../PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp | 4 ++-- 5 files changed, 42 insertions(+), 12 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index e2171a31a..db0b492a1 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -55,6 +55,15 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { static constexpr llvm::StringLiteral GlobalCRuntimeModelName = "__psrCRuntimeGlobalCtorsModel"; + static constexpr llvm::StringLiteral GlobalCRuntimeDtorModelName = + "__psrCRuntimeGlobalDtorsModel"; + + static constexpr llvm::StringLiteral GlobalCRuntimeDtorsCallerName = + "__psrGlobalDtorsCaller"; + + static constexpr llvm::StringLiteral GlobalCRuntimeUserEntrySelectorName = + "__psrCRuntimeUserEntrySelector"; + /// Constructs the ICFG based on the given IRDB and the entry-points using a /// fixpoint iteration. This may take a long time. /// @@ -119,6 +128,9 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { /// Gets the underlying IRDB [[nodiscard]] LLVMProjectIRDB *getIRDB() const noexcept { return IRDB; } + /// Returns true, if a function was generated by phasar. + [[nodiscard]] static bool isPhasarGenerated(const llvm::Function &) noexcept; + using CFGBase::print; using ICFGBase::print; diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp index 42d715c27..20ca70d3b 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/Support/ErrorHandling.h" @@ -380,6 +381,19 @@ LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, LLVMBasedICFG::~LLVMBasedICFG() = default; +bool LLVMBasedICFG::isPhasarGenerated(const llvm::Function &F) noexcept { + if (F.hasName()) { + llvm::StringRef FunctionName = F.getName(); + return llvm::StringSwitch(FunctionName) + .Cases(GlobalCRuntimeModelName, GlobalCRuntimeDtorModelName, + GlobalCRuntimeDtorsCallerName, + GlobalCRuntimeUserEntrySelectorName, true) + .Default(false); + } + + return false; +} + [[nodiscard]] FunctionRange LLVMBasedICFG::getAllFunctionsImpl() const { return IRDB->getAllFunctions(); } diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp index 3e91de6f3..32e42b57c 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp @@ -124,9 +124,10 @@ static llvm::Function *createDtorCallerForModule( &RegisteredDtors) { auto *PhasarDtorCaller = llvm::cast( - Mod.getOrInsertFunction("__psrGlobalDtorsCaller." + - getReducedModuleName(Mod), - llvm::Type::getVoidTy(Mod.getContext())) + Mod.getOrInsertFunction( + LLVMBasedICFG::GlobalCRuntimeDtorsCallerName.str() + '.' + + getReducedModuleName(Mod), + llvm::Type::getVoidTy(Mod.getContext())) .getCallee()); auto *BB = @@ -195,7 +196,7 @@ static std::pair buildCRuntimeGlobalDtorsModel( auto &CTX = M.getContext(); auto *Cleanup = llvm::cast( - M.getOrInsertFunction("__psrCRuntimeGlobalDtorsModel", + M.getOrInsertFunction(LLVMBasedICFG::GlobalCRuntimeDtorModelName, llvm::Type::getVoidTy(CTX)) .getCallee()); @@ -301,7 +302,7 @@ llvm::Function *LLVMBasedICFG::buildCRuntimeGlobalCtorsDtorsModel( } else { auto UEntrySelectorFn = M.getOrInsertFunction( - "__psrCRuntimeUserEntrySelector", llvm::Type::getInt32Ty(CTX)); + GlobalCRuntimeUserEntrySelectorName, llvm::Type::getInt32Ty(CTX)); auto *UEntrySelector = IRB.CreateCall(UEntrySelectorFn); diff --git a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp index f4e913c44..0977c8189 100644 --- a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp @@ -93,9 +93,10 @@ TEST_F(LLVMBasedICFGGlobCtorDtorTest, CtorTest) { // GlobalCtor->print(llvm::outs()); - ensureFunctionOrdering(GlobalCtor, ICFG, - {{"_GLOBAL__sub_I_globals_ctor_1.cpp", "main"}, - {"main", "__psrCRuntimeGlobalDtorsModel"}}); + ensureFunctionOrdering( + GlobalCtor, ICFG, + {{"_GLOBAL__sub_I_globals_ctor_1.cpp", "main"}, + {"main", LLVMBasedICFG::GlobalCRuntimeDtorModelName}}); } TEST_F(LLVMBasedICFGGlobCtorDtorTest, CtorTest2) { @@ -144,10 +145,12 @@ TEST_F(LLVMBasedICFGGlobCtorDtorTest, DtorTest1) { ensureFunctionOrdering( GlobalCtor, ICFG, {{"_GLOBAL__sub_I_globals_dtor_1.cpp", "main"}, - {"main", "__psrGlobalDtorsCaller.globals_dtor_1_cpp.ll"}}); + {"main", LLVMBasedICFG::GlobalCRuntimeDtorsCallerName.str() + + ".globals_dtor_1_cpp.ll"}}); auto *GlobalDtor = - IRDB.getFunction("__psrGlobalDtorsCaller.globals_dtor_1_cpp.ll"); + IRDB.getFunction(LLVMBasedICFG::GlobalCRuntimeDtorsCallerName.str() + + ".globals_dtor_1_cpp.ll"); ASSERT_NE(nullptr, GlobalDtor); diff --git a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp index b3894ee04..249283fde 100644 --- a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp @@ -83,9 +83,9 @@ TEST(LLVMBasedICFGTest, StaticCallSite_2b) { const llvm::Function *FOO = IRDB.getFunctionDefinition("foo"); const llvm::Function *BAR = IRDB.getFunctionDefinition("bar"); const llvm::Function *CTOR = - IRDB.getFunctionDefinition("__psrCRuntimeGlobalCtorsModel"); + IRDB.getFunctionDefinition(LLVMBasedICFG::GlobalCRuntimeModelName); const llvm::Function *DTOR = - IRDB.getFunctionDefinition("__psrCRuntimeGlobalDtorsModel"); + IRDB.getFunctionDefinition(LLVMBasedICFG::GlobalCRuntimeDtorModelName); ASSERT_TRUE(F); ASSERT_TRUE(FOO); ASSERT_TRUE(BAR); From 94a0e47cba8601f286fbf02248b3395958458bc0 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel <52407375+fabianbs96@users.noreply.github.com> Date: Thu, 21 Sep 2023 16:48:42 +0200 Subject: [PATCH 3/5] Fix Backward ICFG (#660) * Fix backwards ICFG (not all required interface functions were implemented) + use explicit template instantiation to make sure, the interface is fully implemented in the future * pre-commit --- include/phasar/ControlFlow/ICFGBase.h | 9 +-------- .../PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h | 10 +++++++++- include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 2 ++ lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp | 9 ++++++++- lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp | 2 ++ 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/include/phasar/ControlFlow/ICFGBase.h b/include/phasar/ControlFlow/ICFGBase.h index 1f62fb69a..fea37796c 100644 --- a/include/phasar/ControlFlow/ICFGBase.h +++ b/include/phasar/ControlFlow/ICFGBase.h @@ -101,14 +101,7 @@ template class ICFGBase { n_t>); return self().getReturnSitesOfCallAtImpl(Inst); } - /// Returns an iterable range of all global initializer functions - [[nodiscard]] decltype(auto) - getGlobalInitializers(ByConstRef Fun) const { - static_assert( - is_iterable_over_v); - return self().getGlobalInitializersImpl(Fun); - } + /// Prints the underlying call-graph as DOT to the given output-stream void print(llvm::raw_ostream &OS = llvm::outs()) const { self().printImpl(OS); diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h index 10a342488..101718a42 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h @@ -19,9 +19,14 @@ namespace psr { class LLVMBasedICFG; +class LLVMBasedBackwardICFG; +template class CallGraph; + +template <> +struct CFGTraits : CFGTraits {}; class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, - public ICFGBase { + public ICFGBase { friend ICFGBase; class LLVMBackwardRet { @@ -60,6 +65,7 @@ class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, getReturnSitesOfCallAtImpl(n_t Inst) const; void printImpl(llvm::raw_ostream &OS) const; [[nodiscard]] nlohmann::json getAsJsonImpl() const; + [[nodiscard]] const CallGraph &getCallGraphImpl() const noexcept; llvm::LLVMContext BackwardRetsCtx; llvm::DenseMap BackwardRets; @@ -68,6 +74,8 @@ class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, LLVMBasedICFG *ForwardICFG{}; }; + +extern template class ICFGBase; } // namespace psr #endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index db0b492a1..f1af71970 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -162,6 +162,8 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { LLVMProjectIRDB *IRDB = nullptr; MaybeUniquePtr TH; }; + +extern template class ICFGBase; } // namespace psr #endif diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp index 6edadac34..0ca199d25 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp @@ -9,7 +9,6 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h" -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" namespace psr { @@ -68,4 +67,12 @@ void LLVMBasedBackwardICFG::printImpl(llvm::raw_ostream &OS) const { nlohmann::json LLVMBasedBackwardICFG::getAsJsonImpl() const { return ForwardICFG->getAsJson(); } + +auto LLVMBasedBackwardICFG::getCallGraphImpl() const noexcept + -> const CallGraph & { + return ForwardICFG->getCallGraph(); +} + +template class ICFGBase; + } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp index 20ca70d3b..87b2279f2 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp @@ -457,4 +457,6 @@ void LLVMBasedICFG::printImpl(llvm::raw_ostream &OS) const { [this](n_t Inst) { return IRDB->getInstructionId(Inst); }); } +template class ICFGBase; + } // namespace psr From 1fafc4467a553d5ca877709ec3b70e558e5fa2b7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel <52407375+fabianbs96@users.noreply.github.com> Date: Thu, 21 Sep 2023 17:37:12 +0200 Subject: [PATCH 4/5] Fix Taint Analysis (#661) * Handle Alias indirection and return-POI in taint analysis * pre-commit --- .../DataFlow/IfdsIde/LLVMFlowFunctions.h | 46 ++-- .../IfdsIde/Problems/IFDSTaintAnalysis.h | 4 +- include/phasar/Utils/TypeTraits.h | 4 + .../IfdsIde/Problems/IFDSTaintAnalysis.cpp | 205 ++++++++++++++---- .../taint_analysis/CMakeLists.txt | 2 + .../taint_analysis/double_free_01.c | 7 + .../taint_analysis/double_free_02.c | 9 + .../Problems/IFDSTaintAnalysisTest.cpp | 112 +++++++--- 8 files changed, 294 insertions(+), 95 deletions(-) create mode 100644 test/llvm_test_code/taint_analysis/double_free_01.c create mode 100644 test/llvm_test_code/taint_analysis/double_free_02.c diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h index dba21e5d5..e994671bb 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h @@ -239,7 +239,8 @@ mapFactsToCallee(const llvm::CallBase *CallSite, const llvm::Function *DestFun, /// /// Propagates the return value back to the call-site and based on the /// PropagateParameter predicate propagates back parameters holding as dataflow -/// facts. +/// facts. The resulting out-set of dataflow facts can be post-processed if +/// necessary. /// /// Let a call-site cs: r = fun(..., ax, ...) a function prototype fun(..., /// px, ...) and an exit statement exit: return rv. @@ -252,30 +253,30 @@ mapFactsToCallee(const llvm::CallBase *CallSite, const llvm::Function *DestFun, /// f(x) = ({ax} if PropagateParameter(ax, x) else {}) union ({r} if /// PropagateRet(rv, x) else {}). /// -template , - typename FnParam = std::equal_to, - typename FnRet = std::equal_to, - typename DCtor = DefaultConstruct, - typename = std::enable_if_t< - std::is_invocable_r_v && - std::is_invocable_r_v>> -FlowFunctionPtrType -mapFactsToCaller(const llvm::CallBase *CallSite, - const llvm::Instruction *ExitInst, - FnParam &&PropagateParameter = {}, FnRet &&PropagateRet = {}, - DCtor &&FactConstructor = {}, bool PropagateGlobals = true, - bool PropagateZeroToCaller = true) { +template < + typename D = const llvm::Value *, typename Container = std::set, + typename FnParam = std::equal_to, typename FnRet = std::equal_to, + typename DCtor = DefaultConstruct, typename PostProcessFn = IgnoreArgs, + typename = std::enable_if_t< + std::is_invocable_r_v && + std::is_invocable_r_v>> +FlowFunctionPtrType mapFactsToCaller( + const llvm::CallBase *CallSite, const llvm::Instruction *ExitInst, + FnParam &&PropagateParameter = {}, FnRet &&PropagateRet = {}, + DCtor &&FactConstructor = {}, bool PropagateGlobals = true, + bool PropagateZeroToCaller = true, PostProcessFn &&PostProcess = {}) { struct Mapper : public FlowFunction { Mapper(const llvm::CallBase *CallSite, const llvm::Instruction *ExitInst, bool PropagateGlobals, FnParam &&PropagateParameter, FnRet &&PropagateRet, DCtor &&FactConstructor, - bool PropagateZeroToCaller) + bool PropagateZeroToCaller, PostProcessFn &&PostProcess) : CSAndPropGlob(CallSite, PropagateGlobals), ExitInstAndPropZero(ExitInst, PropagateZeroToCaller), PropArg(std::forward(PropagateParameter)), PropRet(std::forward(PropagateRet)), - FactConstructor(std::forward(FactConstructor)) {} + FactConstructor(std::forward(FactConstructor)), + PostProcess(std::forward(PostProcess)) {} Container computeTargets(D Source) override { Container Res; @@ -337,6 +338,8 @@ mapFactsToCaller(const llvm::CallBase *CallSite, } } + std::invoke(PostProcess, Res); + return Res; } @@ -346,13 +349,14 @@ mapFactsToCaller(const llvm::CallBase *CallSite, [[no_unique_address]] std::decay_t PropArg; [[no_unique_address]] std::decay_t PropRet; [[no_unique_address]] std::decay_t FactConstructor; + [[no_unique_address]] std::decay_t PostProcess; }; - return std::make_shared(CallSite, ExitInst, PropagateGlobals, - std::forward(PropagateParameter), - std::forward(PropagateRet), - std::forward(FactConstructor), - PropagateZeroToCaller); + return std::make_shared( + CallSite, ExitInst, PropagateGlobals, + std::forward(PropagateParameter), + std::forward(PropagateRet), std::forward(FactConstructor), + PropagateZeroToCaller, std::forward(PostProcess)); } //===----------------------------------------------------------------------===// diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h index 5e6d85ccf..6c58a032d 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h @@ -55,7 +55,8 @@ class IFDSTaintAnalysis */ IFDSTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, const LLVMTaintConfig *Config, - std::vector EntryPoints = {"main"}); + std::vector EntryPoints = {"main"}, + bool TaintMainArgs = true); ~IFDSTaintAnalysis() override = default; @@ -85,6 +86,7 @@ class IFDSTaintAnalysis private: const LLVMTaintConfig *Config{}; LLVMAliasInfoRef PT{}; + bool TaintMainArgs{}; bool isSourceCall(const llvm::CallBase *CB, const llvm::Function *Callee) const; diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 745cc4b42..77bcfa36a 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -257,6 +257,10 @@ template struct DefaultConstruct { } }; +struct IgnoreArgs { + template void operator()(U &&.../*Val*/) noexcept {} +}; + template void reserveIfPossible(T &Container, size_t Capacity) { if constexpr (detail::has_reserve::value) { Container.reserve(Capacity); diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp index 890c357ad..c7b2b6f72 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp @@ -9,6 +9,8 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" +#include "phasar/DataFlow/IfdsIde/EntryPointUtils.h" +#include "phasar/DataFlow/IfdsIde/FlowFunctions.h" #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" @@ -36,13 +38,16 @@ #include namespace psr { +using d_t = IFDSTaintAnalysis::d_t; +using container_type = IFDSTaintAnalysis::container_type; IFDSTaintAnalysis::IFDSTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, const LLVMTaintConfig *Config, - std::vector EntryPoints) + std::vector EntryPoints, + bool TaintMainArgs) : IFDSTabulationProblem(IRDB, std::move(EntryPoints), createZeroValue()), - Config(Config), PT(PT) { + Config(Config), PT(PT), TaintMainArgs(TaintMainArgs) { assert(Config != nullptr); assert(PT); } @@ -110,29 +115,58 @@ bool IFDSTaintAnalysis::isSanitizerCall(const llvm::CallBase * /*CB*/, [this](const auto &Arg) { return Config->isSanitizer(&Arg); }); } +static bool canSkipAtContext(const llvm::Value *Val, + const llvm::Instruction *Context) noexcept { + if (const auto *Inst = llvm::dyn_cast(Val)) { + /// Mapping instructions between functions is done via the call-FF and + /// ret-FF + if (Inst->getFunction() != Context->getFunction()) { + return true; + } + if (Inst->getParent() == Context->getParent() && + Context->comesBefore(Inst)) { + // We will see that inst later + return true; + } + return false; + } + + if (const auto *Arg = llvm::dyn_cast(Val)) { + // An argument is only valid in the function it belongs to + if (Arg->getParent() != Context->getFunction()) { + return true; + } + } + return false; +} + +static bool isCompiletimeConstantData(const llvm::Value *Val) noexcept { + if (const auto *Glob = llvm::dyn_cast(Val)) { + // Data cannot flow into the readonly-data section + return Glob->isConstant(); + } + + return llvm::isa(Val) || llvm::isa(Val); +} + void IFDSTaintAnalysis::populateWithMayAliases( container_type &Facts, const llvm::Instruction *Context) const { container_type Tmp = Facts; for (const auto *Fact : Facts) { auto Aliases = PT.getAliasSet(Fact); for (const auto *Alias : *Aliases) { - if (const auto *Inst = llvm::dyn_cast(Alias)) { - /// Mapping instructions between functions is done via the call-FF and - /// ret-FF - if (Inst->getFunction() != Context->getFunction()) { - continue; - } - if (Inst->getParent() == Context->getParent() && - Context->comesBefore(Inst)) { - // We will see that inst later - continue; - } - } else if (const auto *Glob = - llvm::dyn_cast(Alias)) { - if (Glob != Fact && Glob->isConstant()) { - // Data cannot flow into the readonly-data section - continue; - } + if (canSkipAtContext(Alias, Context)) { + continue; + } + + if (isCompiletimeConstantData(Alias)) { + continue; + } + + if (const auto *Load = llvm::dyn_cast(Alias)) { + // Handle at least one level of indirection... + const auto *PointerOp = Load->getPointerOperand()->stripPointerCasts(); + Tmp.insert(PointerOp); } Tmp.insert(Alias); @@ -148,6 +182,85 @@ void IFDSTaintAnalysis::populateWithMustAliases( /// may-aliases } +static IFDSTaintAnalysis::FlowFunctionPtrType transferAndKillFlow(d_t To, + d_t From) { + if (From->hasNUsesOrMore(2)) { + return FlowFunctionTemplates::transferFlow(To, From); + } + return FlowFunctionTemplates::lambdaFlow( + [To, From](d_t Source) -> container_type { + if (Source == From) { + return {To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); +} + +static IFDSTaintAnalysis::FlowFunctionPtrType +transferAndKillTwoFlows(d_t To, d_t From1, d_t From2) { + bool KillFrom1 = !From1->hasNUsesOrMore(2); + bool KillFrom2 = !From2->hasNUsesOrMore(2); + + if (KillFrom1) { + if (KillFrom2) { + return FlowFunctionTemplates::lambdaFlow( + [To, From1, From2](d_t Source) -> container_type { + if (Source == From1 || Source == From2) { + return {To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); + } + + return FlowFunctionTemplates::lambdaFlow( + [To, From1, From2](d_t Source) -> container_type { + if (Source == From1) { + return {To}; + } + if (Source == From2) { + return {Source, To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); + } + + if (KillFrom2) { + return FlowFunctionTemplates::lambdaFlow( + [To, From1, From2](d_t Source) -> container_type { + if (Source == From1) { + return {Source, To}; + } + if (Source == From2) { + return {To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); + } + + return FlowFunctionTemplates::lambdaFlow( + [To, From1, From2](d_t Source) -> container_type { + if (Source == From1 || Source == From2) { + return {Source, To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); +} + auto IFDSTaintAnalysis::getNormalFlowFunction(n_t Curr, [[maybe_unused]] n_t Succ) -> FlowFunctionPtrType { @@ -156,47 +269,40 @@ auto IFDSTaintAnalysis::getNormalFlowFunction(n_t Curr, container_type Gen; Gen.insert(Store->getPointerOperand()); populateWithMayAliases(Gen, Store); - Gen.insert(Store->getValueOperand()); + if (Store->getValueOperand()->hasNUsesOrMore(2)) { + Gen.insert(Store->getValueOperand()); + } return lambdaFlow( [Store, Gen{std::move(Gen)}](d_t Source) -> container_type { - if (Store->getValueOperand() == Source) { - return Gen; - } if (Store->getPointerOperand() == Source) { return {}; } + if (Store->getValueOperand() == Source) { + return Gen; + } + return {Source}; }); } // If a tainted value is loaded, the loaded value is of course tainted if (const auto *Load = llvm::dyn_cast(Curr)) { - return transferFlow(Load, Load->getPointerOperand()); + return transferAndKillFlow(Load, Load->getPointerOperand()); } // Check if an address is computed from a tainted base pointer of an // aggregated object if (const auto *GEP = llvm::dyn_cast(Curr)) { - return transferFlow(GEP, GEP->getPointerOperand()); + return transferAndKillFlow(GEP, GEP->getPointerOperand()); } // Check if a tainted value is extracted and taint the targets of // the extract operation accordingly if (const auto *Extract = llvm::dyn_cast(Curr)) { - return transferFlow(Extract, Extract->getAggregateOperand()); + return transferAndKillFlow(Extract, Extract->getAggregateOperand()); } if (const auto *Insert = llvm::dyn_cast(Curr)) { - return lambdaFlow([Insert](d_t Source) -> container_type { - if (Source == Insert->getAggregateOperand() || - Source == Insert->getInsertedValueOperand()) { - return {Source, Insert}; - } - - if (Source == Insert) { - return {}; - } - - return {Source}; - }); + return transferAndKillTwoFlows(Insert, Insert->getAggregateOperand(), + Insert->getInsertedValueOperand()); } if (const auto *Cast = llvm::dyn_cast(Curr)) { @@ -234,7 +340,11 @@ auto IFDSTaintAnalysis::getRetFlowFunction(n_t CallSite, f_t /*CalleeFun*/, [](d_t Formal, d_t Source) { return Formal == Source && Formal->getType()->isPointerTy(); }, - [](d_t RetVal, d_t Source) { return RetVal == Source; }); + [](d_t RetVal, d_t Source) { return RetVal == Source; }, {}, true, true, + [this, CallSite](container_type &Res) { + // Correctly handling return-POIs + populateWithMayAliases(Res, CallSite); + }); // All other stuff is killed at this point } @@ -332,19 +442,24 @@ auto IFDSTaintAnalysis::getSummaryFlowFunction([[maybe_unused]] n_t CallSite, auto IFDSTaintAnalysis::initialSeeds() -> InitialSeeds { PHASAR_LOG_LEVEL(DEBUG, "IFDSTaintAnalysis::initialSeeds()"); - // If main function is the entry point, commandline arguments have to be - // tainted. Otherwise we just use the zero value to initialize the analysis. + InitialSeeds Seeds; LLVMBasedCFG C; - forallStartingPoints(EntryPoints, IRDB, C, [this, &Seeds](n_t SP) { - Seeds.addSeed(SP, getZeroValue()); - if (SP->getFunction()->getName() == "main") { + addSeedsForStartingPoints(EntryPoints, IRDB, C, Seeds, getZeroValue(), + psr::BinaryDomain::BOTTOM); + + if (TaintMainArgs && llvm::is_contained(EntryPoints, "main")) { + // If main function is the entry point, commandline arguments have to be + // tainted. Otherwise we just use the zero value to initialize the analysis. + + const auto *MainF = IRDB->getFunction("main"); + for (const auto *SP : C.getStartPointsOf(MainF)) { for (const auto &Arg : SP->getFunction()->args()) { Seeds.addSeed(SP, &Arg); } } - }); + } return Seeds; } diff --git a/test/llvm_test_code/taint_analysis/CMakeLists.txt b/test/llvm_test_code/taint_analysis/CMakeLists.txt index 1571c8432..cbefe3382 100644 --- a/test/llvm_test_code/taint_analysis/CMakeLists.txt +++ b/test/llvm_test_code/taint_analysis/CMakeLists.txt @@ -33,6 +33,8 @@ set(NoMem2regSources struct_member.cpp dynamic_memory.cpp dynamic_memory_simple.cpp + double_free_01.c + double_free_02.c ) foreach(TEST_SRC ${NoMem2regSources}) diff --git a/test/llvm_test_code/taint_analysis/double_free_01.c b/test/llvm_test_code/taint_analysis/double_free_01.c new file mode 100644 index 000000000..9ef43c7db --- /dev/null +++ b/test/llvm_test_code/taint_analysis/double_free_01.c @@ -0,0 +1,7 @@ +#include + +int main() { + void *X = malloc(32); + free(X); + free(X); +} diff --git a/test/llvm_test_code/taint_analysis/double_free_02.c b/test/llvm_test_code/taint_analysis/double_free_02.c new file mode 100644 index 000000000..73d1d06ac --- /dev/null +++ b/test/llvm_test_code/taint_analysis/double_free_02.c @@ -0,0 +1,9 @@ +#include + +void doFree(void *P) { free(P); } + +int main() { + void *X = malloc(32); + doFree(X); + free(X); +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp index d5352abc9..0812fafcf 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp @@ -8,6 +8,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" #include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "TestConfig.h" @@ -24,7 +25,7 @@ class IFDSTaintAnalysisTest : public ::testing::Test { protected: static constexpr auto PathToLlFiles = PHASAR_BUILD_SUBFOLDER("taint_analysis/"); - const std::vector EntryPoints = {"main"}; + static inline const std::vector EntryPoints = {"main"}; std::optional HA; @@ -34,42 +35,79 @@ class IFDSTaintAnalysisTest : public ::testing::Test { IFDSTaintAnalysisTest() = default; ~IFDSTaintAnalysisTest() override = default; + static LLVMTaintConfig getDefaultConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "_Z6sourcev") { + Ret.insert(Call); + } + return Ret; + }; + auto SinkCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "_Z4sinki") { + assert(Call->arg_size() > 0); + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + return LLVMTaintConfig(std::move(SourceCB), std::move(SinkCB)); + } + + static LLVMTaintConfig getDoubleFreeConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "free") { + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + + return LLVMTaintConfig(SourceCB, SourceCB); + } + void initialize(const llvm::Twine &IRFile) { HA.emplace(IRFile, EntryPoints); - LLVMTaintConfig::TaintDescriptionCallBackTy SourceCB = - [](const llvm::Instruction *Inst) { - std::set Ret; - if (const auto *Call = llvm::dyn_cast(Inst); - Call && Call->getCalledFunction() && - Call->getCalledFunction()->getName() == "_Z6sourcev") { - Ret.insert(Call); - } - return Ret; - }; - LLVMTaintConfig::TaintDescriptionCallBackTy SinkCB = - [](const llvm::Instruction *Inst) { - std::set Ret; - if (const auto *Call = llvm::dyn_cast(Inst); - Call && Call->getCalledFunction() && - Call->getCalledFunction()->getName() == "_Z4sinki") { - assert(Call->arg_size() > 0); - Ret.insert(Call->getArgOperand(0)); - } - return Ret; - }; - TSF.emplace(std::move(SourceCB), std::move(SinkCB)); + + if (!TSF) { + TSF = getDefaultConfig(); + } TaintProblem = createAnalysisProblem(*HA, &*TSF, EntryPoints); } - void SetUp() override { ValueAnnotationPass::resetValueID(); } + static void doAnalysis(const llvm::Twine &IRFile, + const LLVMTaintConfig &Config, + const map> &GroundTruth) { + HelperAnalyses HA(PathToLlFiles + IRFile, EntryPoints); + + auto TaintProblem = + createAnalysisProblem(HA, &Config, EntryPoints); - void TearDown() override {} + IFDSSolver TaintSolver(TaintProblem, &HA.getICFG()); + TaintSolver.solve(); - void compareResults(map> &GroundTruth) { - // std::map> Leaks; + TaintSolver.dumpResults(); + + compare(TaintProblem.Leaks, GroundTruth); + } + + static void doAnalysis(const llvm::Twine &IRFile, + const map> &GroundTruth) { + doAnalysis(IRFile, getDefaultConfig(), GroundTruth); + } + + template + static void compare(const LeaksTy &Leaks, + const map> &GroundTruth) { map> FoundLeaks; - for (const auto &Leak : TaintProblem->Leaks) { + for (const auto &Leak : Leaks) { int SinkId = stoi(getMetaDataID(Leak.first)); set LeakedValueIds; for (const auto *LV : Leak.second) { @@ -79,6 +117,10 @@ class IFDSTaintAnalysisTest : public ::testing::Test { } EXPECT_EQ(FoundLeaks, GroundTruth); } + + void compareResults(const map> &GroundTruth) noexcept { + compare(TaintProblem->Leaks, GroundTruth); + } }; // Test Fixture TEST_F(IFDSTaintAnalysisTest, TaintTest_01) { @@ -257,6 +299,20 @@ TEST_F(IFDSTaintAnalysisTest, TaintTest_ExceptionHandling_10) { compareResults(GroundTruth); } +TEST_F(IFDSTaintAnalysisTest, TaintTest_DoubleFree_01) { + doAnalysis("double_free_01_c.ll", getDoubleFreeConfig(), + { + {6, {"5"}}, + }); +} + +TEST_F(IFDSTaintAnalysisTest, TaintTest_DoubleFree_02) { + doAnalysis("double_free_02_c.ll", getDoubleFreeConfig(), + { + {11, {"10"}}, + }); +} + int main(int Argc, char **Argv) { ::testing::InitGoogleTest(&Argc, Argv); return RUN_ALL_TESTS(); From 06da95b2905bb3b8ba1db24337145e969ef7b9f0 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel <52407375+fabianbs96@users.noreply.github.com> Date: Thu, 21 Sep 2023 17:58:36 +0200 Subject: [PATCH 5/5] HelperAnalyses with existing Module (#667) * Allow creating a HelperAnalyses object from an already existing LLVM Module * minor API extension of ProjectIRDB --- .../phasar/PhasarLLVM/DB/LLVMProjectIRDB.h | 21 +++-- include/phasar/PhasarLLVM/HelperAnalyses.h | 10 ++ .../phasar/PhasarLLVM/HelperAnalysisConfig.h | 3 + lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp | 92 +++++++++++-------- lib/PhasarLLVM/HelperAnalyses.cpp | 14 +++ 5 files changed, 95 insertions(+), 45 deletions(-) diff --git a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h index 51285ef8e..590ffafa6 100644 --- a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h +++ b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h @@ -41,19 +41,23 @@ class LLVMProjectIRDB : public ProjectIRDBBase { friend ProjectIRDBBase; public: - /// Reads and parses the given LLVM IR file and owns the resulting IR Module + /// Reads and parses the given LLVM IR file and owns the resulting IR Module. + /// If an error occurs, an error message is written to stderr and subsequent + /// calls to isValid() return false. explicit LLVMProjectIRDB(const llvm::Twine &IRFileName); /// Initializes the new ProjectIRDB with the given IR Module _without_ taking - /// ownership. The module is not being preprocessed. + /// ownership. The module is optionally being preprocessed. /// /// CAUTION: Do not manage the same LLVM Module with multiple LLVMProjectIRDB /// instances at the same time! This will confuse the ModulesToSlotTracker - explicit LLVMProjectIRDB(llvm::Module *Mod); + explicit LLVMProjectIRDB(llvm::Module *Mod, bool DoPreprocessing = true); /// Initializes the new ProjectIRDB with the given IR Module and takes - /// ownership of it + /// ownership of it. The module is optionally being preprocessed. explicit LLVMProjectIRDB(std::unique_ptr Mod, bool DoPreprocessing = true); - /// Parses the given LLVM IR file and owns the resulting IR Module + /// Parses the given LLVM IR file and owns the resulting IR Module. + /// If an error occurs, an error message is written to stderr and subsequent + /// calls to isValid() return false. explicit LLVMProjectIRDB(llvm::MemoryBufferRef Buf); LLVMProjectIRDB(const LLVMProjectIRDB &) = delete; @@ -64,6 +68,9 @@ class LLVMProjectIRDB : public ProjectIRDBBase { [[nodiscard]] static std::unique_ptr getParsedIRModuleOrNull(const llvm::Twine &IRFileName, llvm::LLVMContext &Ctx) noexcept; + [[nodiscard]] static std::unique_ptr + getParsedIRModuleOrNull(llvm::MemoryBufferRef IRFileContent, + llvm::LLVMContext &Ctx) noexcept; /// Also use the const overload using ProjectIRDBBase::getFunction; @@ -81,7 +88,7 @@ class LLVMProjectIRDB : public ProjectIRDBBase { /// Also use the const overload using ProjectIRDBBase::getModule; /// Non-const overload - [[nodiscard]] llvm::Module *getModule() { return Mod.get(); } + [[nodiscard]] llvm::Module *getModule() noexcept { return Mod.get(); } /// Similar to getInstruction(size_t), but is also able to return global /// variables by id @@ -96,6 +103,8 @@ class LLVMProjectIRDB : public ProjectIRDBBase { /// called twice for the same function. Use with care! void insertFunction(llvm::Function *F, bool DoPreprocessing = true); + explicit operator bool() const noexcept { return isValid(); } + private: [[nodiscard]] m_t getModuleImpl() const noexcept { return Mod.get(); } [[nodiscard]] bool debugInfoAvailableImpl() const; diff --git a/include/phasar/PhasarLLVM/HelperAnalyses.h b/include/phasar/PhasarLLVM/HelperAnalyses.h index 39a06ddf9..f8cb8029b 100644 --- a/include/phasar/PhasarLLVM/HelperAnalyses.h +++ b/include/phasar/PhasarLLVM/HelperAnalyses.h @@ -20,6 +20,10 @@ #include #include +namespace llvm { +class Module; +} // namespace llvm + namespace psr { class LLVMProjectIRDB; class LLVMTypeHierarchy; @@ -46,6 +50,12 @@ class HelperAnalyses { // NOLINT(cppcoreguidelines-special-member-functions) explicit HelperAnalyses(const char *IRFile, std::vector EntryPoints, HelperAnalysisConfig Config = {}); + explicit HelperAnalyses(llvm::Module *IRModule, + std::vector EntryPoints, + HelperAnalysisConfig Config = {}); + explicit HelperAnalyses(std::unique_ptr IRModule, + std::vector EntryPoints, + HelperAnalysisConfig Config = {}); ~HelperAnalyses() noexcept; [[nodiscard]] LLVMProjectIRDB &getProjectIRDB(); diff --git a/include/phasar/PhasarLLVM/HelperAnalysisConfig.h b/include/phasar/PhasarLLVM/HelperAnalysisConfig.h index 4fbd87c62..df59c41c5 100644 --- a/include/phasar/PhasarLLVM/HelperAnalysisConfig.h +++ b/include/phasar/PhasarLLVM/HelperAnalysisConfig.h @@ -27,6 +27,9 @@ struct HelperAnalysisConfig { Soundness SoundnessLevel = Soundness::Soundy; bool AutoGlobalSupport = true; bool AllowLazyPTS = true; + /// Preprocess a ProjectIRDB even if it gets constructed by an already + /// existing llvm::Module + bool PreprocessExistingModule = true; HelperAnalysisConfig &&withCGType(CallGraphAnalysisType CGTy) &&noexcept { this->CGTy = CGTy; diff --git a/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp b/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp index c498c2c91..d4632d49f 100644 --- a/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp +++ b/lib/PhasarLLVM/DB/LLVMProjectIRDB.cpp @@ -13,15 +13,57 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/SourceMgr.h" #include namespace psr { +std::unique_ptr +LLVMProjectIRDB::getParsedIRModuleOrNull(llvm::MemoryBufferRef IRFileContent, + llvm::LLVMContext &Ctx) noexcept { + + llvm::SMDiagnostic Diag; + std::unique_ptr M = llvm::parseIR(IRFileContent, Diag, Ctx); + bool BrokenDebugInfo = false; + if (M == nullptr) { + Diag.print(nullptr, llvm::errs()); + return nullptr; + } + /* Crash in presence of llvm-3.9.1 module (segfault) */ + if (M == nullptr || llvm::verifyModule(*M, &llvm::errs(), &BrokenDebugInfo)) { + PHASAR_LOG_LEVEL(ERROR, IRFileContent.getBufferIdentifier() + << " could not be parsed correctly!"); + return nullptr; + } + if (BrokenDebugInfo) { + PHASAR_LOG_LEVEL(WARNING, "Debug info is broken!"); + } + return M; +} + +std::unique_ptr +LLVMProjectIRDB::getParsedIRModuleOrNull(const llvm::Twine &IRFileName, + llvm::LLVMContext &Ctx) noexcept { + // Look at LLVM's IRReader.cpp for reference + + auto FileOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(IRFileName, /*IsText=*/true); + if (std::error_code EC = FileOrErr.getError()) { + llvm::SmallString<128> Buf; + auto Err = llvm::SMDiagnostic(IRFileName.toStringRef(Buf), + llvm::SourceMgr::DK_Error, + "Could not open input file: " + EC.message()); + Err.print(nullptr, llvm::errs()); + return nullptr; + } + return getParsedIRModuleOrNull(*FileOrErr.get(), Ctx); +} + LLVMProjectIRDB::LLVMProjectIRDB(const llvm::Twine &IRFileName) { - std::unique_ptr M = getParsedIRModuleOrNull(IRFileName, Ctx); + auto M = getParsedIRModuleOrNull(IRFileName, Ctx); if (!M) { return; @@ -87,10 +129,16 @@ void LLVMProjectIRDB::preprocessModule(llvm::Module *NonConstMod) { assert(InstToId.size() == IdToInst.size()); } -LLVMProjectIRDB::LLVMProjectIRDB(llvm::Module *Mod) : Mod(Mod) { +LLVMProjectIRDB::LLVMProjectIRDB(llvm::Module *Mod, bool DoPreprocessing) + : Mod(Mod) { assert(Mod != nullptr); ModulesToSlotTracker::setMSTForModule(Mod); - initInstructionIds(); + + if (DoPreprocessing) { + preprocessModule(Mod); + } else { + initInstructionIds(); + } } LLVMProjectIRDB::LLVMProjectIRDB(std::unique_ptr Mod, @@ -109,21 +157,10 @@ LLVMProjectIRDB::LLVMProjectIRDB(std::unique_ptr Mod, LLVMProjectIRDB::LLVMProjectIRDB(llvm::MemoryBufferRef Buf) { llvm::SMDiagnostic Diag; - std::unique_ptr M = llvm::parseIR(Buf, Diag, Ctx); - bool BrokenDebugInfo = false; - if (M == nullptr) { - Diag.print(nullptr, llvm::errs()); - return; - } - - if (llvm::verifyModule(*M, &llvm::errs(), &BrokenDebugInfo)) { - PHASAR_LOG_LEVEL(ERROR, Buf.getBufferIdentifier() - << " could not be parsed correctly!"); + auto M = getParsedIRModuleOrNull(Buf, Ctx); + if (!M) { return; } - if (BrokenDebugInfo) { - PHASAR_LOG_LEVEL(WARNING, "Debug info is broken!"); - } auto *NonConst = M.get(); Mod = std::move(M); @@ -137,29 +174,6 @@ LLVMProjectIRDB::~LLVMProjectIRDB() { } } -std::unique_ptr -LLVMProjectIRDB::getParsedIRModuleOrNull(const llvm::Twine &IRFileName, - llvm::LLVMContext &Ctx) noexcept { - llvm::SMDiagnostic Diag; - llvm::SmallString<256> Buf; - std::unique_ptr M = - llvm::parseIRFile(IRFileName.toStringRef(Buf), Diag, Ctx); - bool BrokenDebugInfo = false; - if (M == nullptr) { - Diag.print(nullptr, llvm::errs()); - return nullptr; - } - /* Crash in presence of llvm-3.9.1 module (segfault) */ - if (M == nullptr || llvm::verifyModule(*M, &llvm::errs(), &BrokenDebugInfo)) { - PHASAR_LOG_LEVEL(ERROR, IRFileName << " could not be parsed correctly!"); - return nullptr; - } - if (BrokenDebugInfo) { - PHASAR_LOG_LEVEL(WARNING, "Debug info is broken!"); - } - return M; -} - static llvm::Function * internalGetFunctionDefinition(const llvm::Module &M, llvm::StringRef FunctionName) { diff --git a/lib/PhasarLLVM/HelperAnalyses.cpp b/lib/PhasarLLVM/HelperAnalyses.cpp index e0d13925e..815f9d887 100644 --- a/lib/PhasarLLVM/HelperAnalyses.cpp +++ b/lib/PhasarLLVM/HelperAnalyses.cpp @@ -43,6 +43,20 @@ HelperAnalyses::HelperAnalyses(const char *IRFile, HelperAnalysisConfig Config) : HelperAnalyses(std::string(IRFile), std::move(EntryPoints), std::move(Config)) {} +HelperAnalyses::HelperAnalyses(llvm::Module *IRModule, + std::vector EntryPoints, + HelperAnalysisConfig Config) + : HelperAnalyses(std::string(), std::move(EntryPoints), std::move(Config)) { + this->IRDB = std::make_unique( + IRModule, Config.PreprocessExistingModule); +} +HelperAnalyses::HelperAnalyses(std::unique_ptr IRModule, + std::vector EntryPoints, + HelperAnalysisConfig Config) + : HelperAnalyses(std::string(), std::move(EntryPoints), std::move(Config)) { + this->IRDB = std::make_unique( + std::move(IRModule), Config.PreprocessExistingModule); +} HelperAnalyses::~HelperAnalyses() noexcept = default;