From 76fe0804c67d567e0a9b874373928e8bb4f98511 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sun, 10 Sep 2023 13:38:21 +0200 Subject: [PATCH 1/3] Adds helper function to determine if a function was generated by phasar (#665) * Adds helper function to determine if a function was generated by phasar During global analysis, phasar generates helper function to correctly handle global ctors/dtors and other global code fragments. With the new checking function, users can determine whether a given function was generated by phasar or not, e.g., for global analysis. * get rid of magic string literals that name the various generated functions of phasar's global ctor/dtor modelling * minor style --------- Co-authored-by: Martin Mory Co-authored-by: Martin Mory Co-authored-by: Fabian Schiebel --- .../phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 12 ++++++++++++ lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp | 14 ++++++++++++++ .../ControlFlow/LLVMBasedICFGGlobalsImpl.cpp | 11 ++++++----- .../ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp | 13 ++++++++----- .../PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp | 4 ++-- 5 files changed, 42 insertions(+), 12 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index e2171a31a..db0b492a1 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -55,6 +55,15 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { static constexpr llvm::StringLiteral GlobalCRuntimeModelName = "__psrCRuntimeGlobalCtorsModel"; + static constexpr llvm::StringLiteral GlobalCRuntimeDtorModelName = + "__psrCRuntimeGlobalDtorsModel"; + + static constexpr llvm::StringLiteral GlobalCRuntimeDtorsCallerName = + "__psrGlobalDtorsCaller"; + + static constexpr llvm::StringLiteral GlobalCRuntimeUserEntrySelectorName = + "__psrCRuntimeUserEntrySelector"; + /// Constructs the ICFG based on the given IRDB and the entry-points using a /// fixpoint iteration. This may take a long time. /// @@ -119,6 +128,9 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { /// Gets the underlying IRDB [[nodiscard]] LLVMProjectIRDB *getIRDB() const noexcept { return IRDB; } + /// Returns true, if a function was generated by phasar. + [[nodiscard]] static bool isPhasarGenerated(const llvm::Function &) noexcept; + using CFGBase::print; using ICFGBase::print; diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp index 42d715c27..20ca70d3b 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/Support/ErrorHandling.h" @@ -380,6 +381,19 @@ LLVMBasedICFG::LLVMBasedICFG(LLVMProjectIRDB *IRDB, LLVMBasedICFG::~LLVMBasedICFG() = default; +bool LLVMBasedICFG::isPhasarGenerated(const llvm::Function &F) noexcept { + if (F.hasName()) { + llvm::StringRef FunctionName = F.getName(); + return llvm::StringSwitch(FunctionName) + .Cases(GlobalCRuntimeModelName, GlobalCRuntimeDtorModelName, + GlobalCRuntimeDtorsCallerName, + GlobalCRuntimeUserEntrySelectorName, true) + .Default(false); + } + + return false; +} + [[nodiscard]] FunctionRange LLVMBasedICFG::getAllFunctionsImpl() const { return IRDB->getAllFunctions(); } diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp index 3e91de6f3..32e42b57c 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobalsImpl.cpp @@ -124,9 +124,10 @@ static llvm::Function *createDtorCallerForModule( &RegisteredDtors) { auto *PhasarDtorCaller = llvm::cast( - Mod.getOrInsertFunction("__psrGlobalDtorsCaller." + - getReducedModuleName(Mod), - llvm::Type::getVoidTy(Mod.getContext())) + Mod.getOrInsertFunction( + LLVMBasedICFG::GlobalCRuntimeDtorsCallerName.str() + '.' + + getReducedModuleName(Mod), + llvm::Type::getVoidTy(Mod.getContext())) .getCallee()); auto *BB = @@ -195,7 +196,7 @@ static std::pair buildCRuntimeGlobalDtorsModel( auto &CTX = M.getContext(); auto *Cleanup = llvm::cast( - M.getOrInsertFunction("__psrCRuntimeGlobalDtorsModel", + M.getOrInsertFunction(LLVMBasedICFG::GlobalCRuntimeDtorModelName, llvm::Type::getVoidTy(CTX)) .getCallee()); @@ -301,7 +302,7 @@ llvm::Function *LLVMBasedICFG::buildCRuntimeGlobalCtorsDtorsModel( } else { auto UEntrySelectorFn = M.getOrInsertFunction( - "__psrCRuntimeUserEntrySelector", llvm::Type::getInt32Ty(CTX)); + GlobalCRuntimeUserEntrySelectorName, llvm::Type::getInt32Ty(CTX)); auto *UEntrySelector = IRB.CreateCall(UEntrySelectorFn); diff --git a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp index f4e913c44..0977c8189 100644 --- a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGGlobCtorDtorTest.cpp @@ -93,9 +93,10 @@ TEST_F(LLVMBasedICFGGlobCtorDtorTest, CtorTest) { // GlobalCtor->print(llvm::outs()); - ensureFunctionOrdering(GlobalCtor, ICFG, - {{"_GLOBAL__sub_I_globals_ctor_1.cpp", "main"}, - {"main", "__psrCRuntimeGlobalDtorsModel"}}); + ensureFunctionOrdering( + GlobalCtor, ICFG, + {{"_GLOBAL__sub_I_globals_ctor_1.cpp", "main"}, + {"main", LLVMBasedICFG::GlobalCRuntimeDtorModelName}}); } TEST_F(LLVMBasedICFGGlobCtorDtorTest, CtorTest2) { @@ -144,10 +145,12 @@ TEST_F(LLVMBasedICFGGlobCtorDtorTest, DtorTest1) { ensureFunctionOrdering( GlobalCtor, ICFG, {{"_GLOBAL__sub_I_globals_dtor_1.cpp", "main"}, - {"main", "__psrGlobalDtorsCaller.globals_dtor_1_cpp.ll"}}); + {"main", LLVMBasedICFG::GlobalCRuntimeDtorsCallerName.str() + + ".globals_dtor_1_cpp.ll"}}); auto *GlobalDtor = - IRDB.getFunction("__psrGlobalDtorsCaller.globals_dtor_1_cpp.ll"); + IRDB.getFunction(LLVMBasedICFG::GlobalCRuntimeDtorsCallerName.str() + + ".globals_dtor_1_cpp.ll"); ASSERT_NE(nullptr, GlobalDtor); diff --git a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp index b3894ee04..249283fde 100644 --- a/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp +++ b/unittests/PhasarLLVM/ControlFlow/LLVMBasedICFGTest.cpp @@ -83,9 +83,9 @@ TEST(LLVMBasedICFGTest, StaticCallSite_2b) { const llvm::Function *FOO = IRDB.getFunctionDefinition("foo"); const llvm::Function *BAR = IRDB.getFunctionDefinition("bar"); const llvm::Function *CTOR = - IRDB.getFunctionDefinition("__psrCRuntimeGlobalCtorsModel"); + IRDB.getFunctionDefinition(LLVMBasedICFG::GlobalCRuntimeModelName); const llvm::Function *DTOR = - IRDB.getFunctionDefinition("__psrCRuntimeGlobalDtorsModel"); + IRDB.getFunctionDefinition(LLVMBasedICFG::GlobalCRuntimeDtorModelName); ASSERT_TRUE(F); ASSERT_TRUE(FOO); ASSERT_TRUE(BAR); From 94a0e47cba8601f286fbf02248b3395958458bc0 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel <52407375+fabianbs96@users.noreply.github.com> Date: Thu, 21 Sep 2023 16:48:42 +0200 Subject: [PATCH 2/3] Fix Backward ICFG (#660) * Fix backwards ICFG (not all required interface functions were implemented) + use explicit template instantiation to make sure, the interface is fully implemented in the future * pre-commit --- include/phasar/ControlFlow/ICFGBase.h | 9 +-------- .../PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h | 10 +++++++++- include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 2 ++ lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp | 9 ++++++++- lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp | 2 ++ 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/include/phasar/ControlFlow/ICFGBase.h b/include/phasar/ControlFlow/ICFGBase.h index 1f62fb69a..fea37796c 100644 --- a/include/phasar/ControlFlow/ICFGBase.h +++ b/include/phasar/ControlFlow/ICFGBase.h @@ -101,14 +101,7 @@ template class ICFGBase { n_t>); return self().getReturnSitesOfCallAtImpl(Inst); } - /// Returns an iterable range of all global initializer functions - [[nodiscard]] decltype(auto) - getGlobalInitializers(ByConstRef Fun) const { - static_assert( - is_iterable_over_v); - return self().getGlobalInitializersImpl(Fun); - } + /// Prints the underlying call-graph as DOT to the given output-stream void print(llvm::raw_ostream &OS = llvm::outs()) const { self().printImpl(OS); diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h index 10a342488..101718a42 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h @@ -19,9 +19,14 @@ namespace psr { class LLVMBasedICFG; +class LLVMBasedBackwardICFG; +template class CallGraph; + +template <> +struct CFGTraits : CFGTraits {}; class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, - public ICFGBase { + public ICFGBase { friend ICFGBase; class LLVMBackwardRet { @@ -60,6 +65,7 @@ class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, getReturnSitesOfCallAtImpl(n_t Inst) const; void printImpl(llvm::raw_ostream &OS) const; [[nodiscard]] nlohmann::json getAsJsonImpl() const; + [[nodiscard]] const CallGraph &getCallGraphImpl() const noexcept; llvm::LLVMContext BackwardRetsCtx; llvm::DenseMap BackwardRets; @@ -68,6 +74,8 @@ class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, LLVMBasedICFG *ForwardICFG{}; }; + +extern template class ICFGBase; } // namespace psr #endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index db0b492a1..f1af71970 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -162,6 +162,8 @@ class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { LLVMProjectIRDB *IRDB = nullptr; MaybeUniquePtr TH; }; + +extern template class ICFGBase; } // namespace psr #endif diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp index 6edadac34..0ca199d25 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.cpp @@ -9,7 +9,6 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h" -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" namespace psr { @@ -68,4 +67,12 @@ void LLVMBasedBackwardICFG::printImpl(llvm::raw_ostream &OS) const { nlohmann::json LLVMBasedBackwardICFG::getAsJsonImpl() const { return ForwardICFG->getAsJson(); } + +auto LLVMBasedBackwardICFG::getCallGraphImpl() const noexcept + -> const CallGraph & { + return ForwardICFG->getCallGraph(); +} + +template class ICFGBase; + } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp index 20ca70d3b..87b2279f2 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedICFG.cpp @@ -457,4 +457,6 @@ void LLVMBasedICFG::printImpl(llvm::raw_ostream &OS) const { [this](n_t Inst) { return IRDB->getInstructionId(Inst); }); } +template class ICFGBase; + } // namespace psr From 1fafc4467a553d5ca877709ec3b70e558e5fa2b7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel <52407375+fabianbs96@users.noreply.github.com> Date: Thu, 21 Sep 2023 17:37:12 +0200 Subject: [PATCH 3/3] Fix Taint Analysis (#661) * Handle Alias indirection and return-POI in taint analysis * pre-commit --- .../DataFlow/IfdsIde/LLVMFlowFunctions.h | 46 ++-- .../IfdsIde/Problems/IFDSTaintAnalysis.h | 4 +- include/phasar/Utils/TypeTraits.h | 4 + .../IfdsIde/Problems/IFDSTaintAnalysis.cpp | 205 ++++++++++++++---- .../taint_analysis/CMakeLists.txt | 2 + .../taint_analysis/double_free_01.c | 7 + .../taint_analysis/double_free_02.c | 9 + .../Problems/IFDSTaintAnalysisTest.cpp | 112 +++++++--- 8 files changed, 294 insertions(+), 95 deletions(-) create mode 100644 test/llvm_test_code/taint_analysis/double_free_01.c create mode 100644 test/llvm_test_code/taint_analysis/double_free_02.c diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h index dba21e5d5..e994671bb 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h @@ -239,7 +239,8 @@ mapFactsToCallee(const llvm::CallBase *CallSite, const llvm::Function *DestFun, /// /// Propagates the return value back to the call-site and based on the /// PropagateParameter predicate propagates back parameters holding as dataflow -/// facts. +/// facts. The resulting out-set of dataflow facts can be post-processed if +/// necessary. /// /// Let a call-site cs: r = fun(..., ax, ...) a function prototype fun(..., /// px, ...) and an exit statement exit: return rv. @@ -252,30 +253,30 @@ mapFactsToCallee(const llvm::CallBase *CallSite, const llvm::Function *DestFun, /// f(x) = ({ax} if PropagateParameter(ax, x) else {}) union ({r} if /// PropagateRet(rv, x) else {}). /// -template , - typename FnParam = std::equal_to, - typename FnRet = std::equal_to, - typename DCtor = DefaultConstruct, - typename = std::enable_if_t< - std::is_invocable_r_v && - std::is_invocable_r_v>> -FlowFunctionPtrType -mapFactsToCaller(const llvm::CallBase *CallSite, - const llvm::Instruction *ExitInst, - FnParam &&PropagateParameter = {}, FnRet &&PropagateRet = {}, - DCtor &&FactConstructor = {}, bool PropagateGlobals = true, - bool PropagateZeroToCaller = true) { +template < + typename D = const llvm::Value *, typename Container = std::set, + typename FnParam = std::equal_to, typename FnRet = std::equal_to, + typename DCtor = DefaultConstruct, typename PostProcessFn = IgnoreArgs, + typename = std::enable_if_t< + std::is_invocable_r_v && + std::is_invocable_r_v>> +FlowFunctionPtrType mapFactsToCaller( + const llvm::CallBase *CallSite, const llvm::Instruction *ExitInst, + FnParam &&PropagateParameter = {}, FnRet &&PropagateRet = {}, + DCtor &&FactConstructor = {}, bool PropagateGlobals = true, + bool PropagateZeroToCaller = true, PostProcessFn &&PostProcess = {}) { struct Mapper : public FlowFunction { Mapper(const llvm::CallBase *CallSite, const llvm::Instruction *ExitInst, bool PropagateGlobals, FnParam &&PropagateParameter, FnRet &&PropagateRet, DCtor &&FactConstructor, - bool PropagateZeroToCaller) + bool PropagateZeroToCaller, PostProcessFn &&PostProcess) : CSAndPropGlob(CallSite, PropagateGlobals), ExitInstAndPropZero(ExitInst, PropagateZeroToCaller), PropArg(std::forward(PropagateParameter)), PropRet(std::forward(PropagateRet)), - FactConstructor(std::forward(FactConstructor)) {} + FactConstructor(std::forward(FactConstructor)), + PostProcess(std::forward(PostProcess)) {} Container computeTargets(D Source) override { Container Res; @@ -337,6 +338,8 @@ mapFactsToCaller(const llvm::CallBase *CallSite, } } + std::invoke(PostProcess, Res); + return Res; } @@ -346,13 +349,14 @@ mapFactsToCaller(const llvm::CallBase *CallSite, [[no_unique_address]] std::decay_t PropArg; [[no_unique_address]] std::decay_t PropRet; [[no_unique_address]] std::decay_t FactConstructor; + [[no_unique_address]] std::decay_t PostProcess; }; - return std::make_shared(CallSite, ExitInst, PropagateGlobals, - std::forward(PropagateParameter), - std::forward(PropagateRet), - std::forward(FactConstructor), - PropagateZeroToCaller); + return std::make_shared( + CallSite, ExitInst, PropagateGlobals, + std::forward(PropagateParameter), + std::forward(PropagateRet), std::forward(FactConstructor), + PropagateZeroToCaller, std::forward(PostProcess)); } //===----------------------------------------------------------------------===// diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h index 5e6d85ccf..6c58a032d 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h @@ -55,7 +55,8 @@ class IFDSTaintAnalysis */ IFDSTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, const LLVMTaintConfig *Config, - std::vector EntryPoints = {"main"}); + std::vector EntryPoints = {"main"}, + bool TaintMainArgs = true); ~IFDSTaintAnalysis() override = default; @@ -85,6 +86,7 @@ class IFDSTaintAnalysis private: const LLVMTaintConfig *Config{}; LLVMAliasInfoRef PT{}; + bool TaintMainArgs{}; bool isSourceCall(const llvm::CallBase *CB, const llvm::Function *Callee) const; diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 745cc4b42..77bcfa36a 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -257,6 +257,10 @@ template struct DefaultConstruct { } }; +struct IgnoreArgs { + template void operator()(U &&.../*Val*/) noexcept {} +}; + template void reserveIfPossible(T &Container, size_t Capacity) { if constexpr (detail::has_reserve::value) { Container.reserve(Capacity); diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp index 890c357ad..c7b2b6f72 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp @@ -9,6 +9,8 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" +#include "phasar/DataFlow/IfdsIde/EntryPointUtils.h" +#include "phasar/DataFlow/IfdsIde/FlowFunctions.h" #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" @@ -36,13 +38,16 @@ #include namespace psr { +using d_t = IFDSTaintAnalysis::d_t; +using container_type = IFDSTaintAnalysis::container_type; IFDSTaintAnalysis::IFDSTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, const LLVMTaintConfig *Config, - std::vector EntryPoints) + std::vector EntryPoints, + bool TaintMainArgs) : IFDSTabulationProblem(IRDB, std::move(EntryPoints), createZeroValue()), - Config(Config), PT(PT) { + Config(Config), PT(PT), TaintMainArgs(TaintMainArgs) { assert(Config != nullptr); assert(PT); } @@ -110,29 +115,58 @@ bool IFDSTaintAnalysis::isSanitizerCall(const llvm::CallBase * /*CB*/, [this](const auto &Arg) { return Config->isSanitizer(&Arg); }); } +static bool canSkipAtContext(const llvm::Value *Val, + const llvm::Instruction *Context) noexcept { + if (const auto *Inst = llvm::dyn_cast(Val)) { + /// Mapping instructions between functions is done via the call-FF and + /// ret-FF + if (Inst->getFunction() != Context->getFunction()) { + return true; + } + if (Inst->getParent() == Context->getParent() && + Context->comesBefore(Inst)) { + // We will see that inst later + return true; + } + return false; + } + + if (const auto *Arg = llvm::dyn_cast(Val)) { + // An argument is only valid in the function it belongs to + if (Arg->getParent() != Context->getFunction()) { + return true; + } + } + return false; +} + +static bool isCompiletimeConstantData(const llvm::Value *Val) noexcept { + if (const auto *Glob = llvm::dyn_cast(Val)) { + // Data cannot flow into the readonly-data section + return Glob->isConstant(); + } + + return llvm::isa(Val) || llvm::isa(Val); +} + void IFDSTaintAnalysis::populateWithMayAliases( container_type &Facts, const llvm::Instruction *Context) const { container_type Tmp = Facts; for (const auto *Fact : Facts) { auto Aliases = PT.getAliasSet(Fact); for (const auto *Alias : *Aliases) { - if (const auto *Inst = llvm::dyn_cast(Alias)) { - /// Mapping instructions between functions is done via the call-FF and - /// ret-FF - if (Inst->getFunction() != Context->getFunction()) { - continue; - } - if (Inst->getParent() == Context->getParent() && - Context->comesBefore(Inst)) { - // We will see that inst later - continue; - } - } else if (const auto *Glob = - llvm::dyn_cast(Alias)) { - if (Glob != Fact && Glob->isConstant()) { - // Data cannot flow into the readonly-data section - continue; - } + if (canSkipAtContext(Alias, Context)) { + continue; + } + + if (isCompiletimeConstantData(Alias)) { + continue; + } + + if (const auto *Load = llvm::dyn_cast(Alias)) { + // Handle at least one level of indirection... + const auto *PointerOp = Load->getPointerOperand()->stripPointerCasts(); + Tmp.insert(PointerOp); } Tmp.insert(Alias); @@ -148,6 +182,85 @@ void IFDSTaintAnalysis::populateWithMustAliases( /// may-aliases } +static IFDSTaintAnalysis::FlowFunctionPtrType transferAndKillFlow(d_t To, + d_t From) { + if (From->hasNUsesOrMore(2)) { + return FlowFunctionTemplates::transferFlow(To, From); + } + return FlowFunctionTemplates::lambdaFlow( + [To, From](d_t Source) -> container_type { + if (Source == From) { + return {To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); +} + +static IFDSTaintAnalysis::FlowFunctionPtrType +transferAndKillTwoFlows(d_t To, d_t From1, d_t From2) { + bool KillFrom1 = !From1->hasNUsesOrMore(2); + bool KillFrom2 = !From2->hasNUsesOrMore(2); + + if (KillFrom1) { + if (KillFrom2) { + return FlowFunctionTemplates::lambdaFlow( + [To, From1, From2](d_t Source) -> container_type { + if (Source == From1 || Source == From2) { + return {To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); + } + + return FlowFunctionTemplates::lambdaFlow( + [To, From1, From2](d_t Source) -> container_type { + if (Source == From1) { + return {To}; + } + if (Source == From2) { + return {Source, To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); + } + + if (KillFrom2) { + return FlowFunctionTemplates::lambdaFlow( + [To, From1, From2](d_t Source) -> container_type { + if (Source == From1) { + return {Source, To}; + } + if (Source == From2) { + return {To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); + } + + return FlowFunctionTemplates::lambdaFlow( + [To, From1, From2](d_t Source) -> container_type { + if (Source == From1 || Source == From2) { + return {Source, To}; + } + if (Source == To) { + return {}; + } + return {Source}; + }); +} + auto IFDSTaintAnalysis::getNormalFlowFunction(n_t Curr, [[maybe_unused]] n_t Succ) -> FlowFunctionPtrType { @@ -156,47 +269,40 @@ auto IFDSTaintAnalysis::getNormalFlowFunction(n_t Curr, container_type Gen; Gen.insert(Store->getPointerOperand()); populateWithMayAliases(Gen, Store); - Gen.insert(Store->getValueOperand()); + if (Store->getValueOperand()->hasNUsesOrMore(2)) { + Gen.insert(Store->getValueOperand()); + } return lambdaFlow( [Store, Gen{std::move(Gen)}](d_t Source) -> container_type { - if (Store->getValueOperand() == Source) { - return Gen; - } if (Store->getPointerOperand() == Source) { return {}; } + if (Store->getValueOperand() == Source) { + return Gen; + } + return {Source}; }); } // If a tainted value is loaded, the loaded value is of course tainted if (const auto *Load = llvm::dyn_cast(Curr)) { - return transferFlow(Load, Load->getPointerOperand()); + return transferAndKillFlow(Load, Load->getPointerOperand()); } // Check if an address is computed from a tainted base pointer of an // aggregated object if (const auto *GEP = llvm::dyn_cast(Curr)) { - return transferFlow(GEP, GEP->getPointerOperand()); + return transferAndKillFlow(GEP, GEP->getPointerOperand()); } // Check if a tainted value is extracted and taint the targets of // the extract operation accordingly if (const auto *Extract = llvm::dyn_cast(Curr)) { - return transferFlow(Extract, Extract->getAggregateOperand()); + return transferAndKillFlow(Extract, Extract->getAggregateOperand()); } if (const auto *Insert = llvm::dyn_cast(Curr)) { - return lambdaFlow([Insert](d_t Source) -> container_type { - if (Source == Insert->getAggregateOperand() || - Source == Insert->getInsertedValueOperand()) { - return {Source, Insert}; - } - - if (Source == Insert) { - return {}; - } - - return {Source}; - }); + return transferAndKillTwoFlows(Insert, Insert->getAggregateOperand(), + Insert->getInsertedValueOperand()); } if (const auto *Cast = llvm::dyn_cast(Curr)) { @@ -234,7 +340,11 @@ auto IFDSTaintAnalysis::getRetFlowFunction(n_t CallSite, f_t /*CalleeFun*/, [](d_t Formal, d_t Source) { return Formal == Source && Formal->getType()->isPointerTy(); }, - [](d_t RetVal, d_t Source) { return RetVal == Source; }); + [](d_t RetVal, d_t Source) { return RetVal == Source; }, {}, true, true, + [this, CallSite](container_type &Res) { + // Correctly handling return-POIs + populateWithMayAliases(Res, CallSite); + }); // All other stuff is killed at this point } @@ -332,19 +442,24 @@ auto IFDSTaintAnalysis::getSummaryFlowFunction([[maybe_unused]] n_t CallSite, auto IFDSTaintAnalysis::initialSeeds() -> InitialSeeds { PHASAR_LOG_LEVEL(DEBUG, "IFDSTaintAnalysis::initialSeeds()"); - // If main function is the entry point, commandline arguments have to be - // tainted. Otherwise we just use the zero value to initialize the analysis. + InitialSeeds Seeds; LLVMBasedCFG C; - forallStartingPoints(EntryPoints, IRDB, C, [this, &Seeds](n_t SP) { - Seeds.addSeed(SP, getZeroValue()); - if (SP->getFunction()->getName() == "main") { + addSeedsForStartingPoints(EntryPoints, IRDB, C, Seeds, getZeroValue(), + psr::BinaryDomain::BOTTOM); + + if (TaintMainArgs && llvm::is_contained(EntryPoints, "main")) { + // If main function is the entry point, commandline arguments have to be + // tainted. Otherwise we just use the zero value to initialize the analysis. + + const auto *MainF = IRDB->getFunction("main"); + for (const auto *SP : C.getStartPointsOf(MainF)) { for (const auto &Arg : SP->getFunction()->args()) { Seeds.addSeed(SP, &Arg); } } - }); + } return Seeds; } diff --git a/test/llvm_test_code/taint_analysis/CMakeLists.txt b/test/llvm_test_code/taint_analysis/CMakeLists.txt index 1571c8432..cbefe3382 100644 --- a/test/llvm_test_code/taint_analysis/CMakeLists.txt +++ b/test/llvm_test_code/taint_analysis/CMakeLists.txt @@ -33,6 +33,8 @@ set(NoMem2regSources struct_member.cpp dynamic_memory.cpp dynamic_memory_simple.cpp + double_free_01.c + double_free_02.c ) foreach(TEST_SRC ${NoMem2regSources}) diff --git a/test/llvm_test_code/taint_analysis/double_free_01.c b/test/llvm_test_code/taint_analysis/double_free_01.c new file mode 100644 index 000000000..9ef43c7db --- /dev/null +++ b/test/llvm_test_code/taint_analysis/double_free_01.c @@ -0,0 +1,7 @@ +#include + +int main() { + void *X = malloc(32); + free(X); + free(X); +} diff --git a/test/llvm_test_code/taint_analysis/double_free_02.c b/test/llvm_test_code/taint_analysis/double_free_02.c new file mode 100644 index 000000000..73d1d06ac --- /dev/null +++ b/test/llvm_test_code/taint_analysis/double_free_02.c @@ -0,0 +1,9 @@ +#include + +void doFree(void *P) { free(P); } + +int main() { + void *X = malloc(32); + doFree(X); + free(X); +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp index d5352abc9..0812fafcf 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp @@ -8,6 +8,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" #include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "TestConfig.h" @@ -24,7 +25,7 @@ class IFDSTaintAnalysisTest : public ::testing::Test { protected: static constexpr auto PathToLlFiles = PHASAR_BUILD_SUBFOLDER("taint_analysis/"); - const std::vector EntryPoints = {"main"}; + static inline const std::vector EntryPoints = {"main"}; std::optional HA; @@ -34,42 +35,79 @@ class IFDSTaintAnalysisTest : public ::testing::Test { IFDSTaintAnalysisTest() = default; ~IFDSTaintAnalysisTest() override = default; + static LLVMTaintConfig getDefaultConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "_Z6sourcev") { + Ret.insert(Call); + } + return Ret; + }; + auto SinkCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "_Z4sinki") { + assert(Call->arg_size() > 0); + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + return LLVMTaintConfig(std::move(SourceCB), std::move(SinkCB)); + } + + static LLVMTaintConfig getDoubleFreeConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "free") { + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + + return LLVMTaintConfig(SourceCB, SourceCB); + } + void initialize(const llvm::Twine &IRFile) { HA.emplace(IRFile, EntryPoints); - LLVMTaintConfig::TaintDescriptionCallBackTy SourceCB = - [](const llvm::Instruction *Inst) { - std::set Ret; - if (const auto *Call = llvm::dyn_cast(Inst); - Call && Call->getCalledFunction() && - Call->getCalledFunction()->getName() == "_Z6sourcev") { - Ret.insert(Call); - } - return Ret; - }; - LLVMTaintConfig::TaintDescriptionCallBackTy SinkCB = - [](const llvm::Instruction *Inst) { - std::set Ret; - if (const auto *Call = llvm::dyn_cast(Inst); - Call && Call->getCalledFunction() && - Call->getCalledFunction()->getName() == "_Z4sinki") { - assert(Call->arg_size() > 0); - Ret.insert(Call->getArgOperand(0)); - } - return Ret; - }; - TSF.emplace(std::move(SourceCB), std::move(SinkCB)); + + if (!TSF) { + TSF = getDefaultConfig(); + } TaintProblem = createAnalysisProblem(*HA, &*TSF, EntryPoints); } - void SetUp() override { ValueAnnotationPass::resetValueID(); } + static void doAnalysis(const llvm::Twine &IRFile, + const LLVMTaintConfig &Config, + const map> &GroundTruth) { + HelperAnalyses HA(PathToLlFiles + IRFile, EntryPoints); + + auto TaintProblem = + createAnalysisProblem(HA, &Config, EntryPoints); - void TearDown() override {} + IFDSSolver TaintSolver(TaintProblem, &HA.getICFG()); + TaintSolver.solve(); - void compareResults(map> &GroundTruth) { - // std::map> Leaks; + TaintSolver.dumpResults(); + + compare(TaintProblem.Leaks, GroundTruth); + } + + static void doAnalysis(const llvm::Twine &IRFile, + const map> &GroundTruth) { + doAnalysis(IRFile, getDefaultConfig(), GroundTruth); + } + + template + static void compare(const LeaksTy &Leaks, + const map> &GroundTruth) { map> FoundLeaks; - for (const auto &Leak : TaintProblem->Leaks) { + for (const auto &Leak : Leaks) { int SinkId = stoi(getMetaDataID(Leak.first)); set LeakedValueIds; for (const auto *LV : Leak.second) { @@ -79,6 +117,10 @@ class IFDSTaintAnalysisTest : public ::testing::Test { } EXPECT_EQ(FoundLeaks, GroundTruth); } + + void compareResults(const map> &GroundTruth) noexcept { + compare(TaintProblem->Leaks, GroundTruth); + } }; // Test Fixture TEST_F(IFDSTaintAnalysisTest, TaintTest_01) { @@ -257,6 +299,20 @@ TEST_F(IFDSTaintAnalysisTest, TaintTest_ExceptionHandling_10) { compareResults(GroundTruth); } +TEST_F(IFDSTaintAnalysisTest, TaintTest_DoubleFree_01) { + doAnalysis("double_free_01_c.ll", getDoubleFreeConfig(), + { + {6, {"5"}}, + }); +} + +TEST_F(IFDSTaintAnalysisTest, TaintTest_DoubleFree_02) { + doAnalysis("double_free_02_c.ll", getDoubleFreeConfig(), + { + {11, {"10"}}, + }); +} + int main(int Argc, char **Argv) { ::testing::InitGoogleTest(&Argc, Argv); return RUN_ALL_TESTS();