Phasar-IFDS框架學(xué)習(xí)筆記

IFDSAnalysis

Phasar框架中,為了解決IFDS / IDE問(wèn)題,主要分成了以下三個(gè)步驟:

  • IFDS / IDE 求解器(slover)。

  • 流函數(shù)的形式定義IFDS / IDE分析問(wèn)題(不同IFDS問(wèn)題定義不同的流函數(shù))。

  • 程序間控制流圖的實(shí)現(xiàn)(ICFG)。

1.phasar 主函數(shù)流程

AnalysisController

在處理完配置文件和命令行解析后,實(shí)例化一個(gè) AnalysisController 管理整個(gè)分析的流程:

    /********* 處理完參數(shù)解析后,下面的代碼開(kāi)始進(jìn)行實(shí)際的分析 *********/
    // At this point we have set-up all the parameters and can start the actual
    // analyses that have been choosen.
    AnalysisController Controller(
        [&lg](bool usingModules) {
          PAMM_GET_INSTANCE;
          START_TIMER("IRDB Construction", PAMM_SEVERITY_LEVEL::Full);
          LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "Set-up IR database.");
          IRDBOptions Opt = IRDBOptions::NONE;
          if (VariablesMap["wpa"].as<bool>()) {
            Opt |= IRDBOptions::WPA;
          }
          if (VariablesMap["mem2reg"].as<bool>()) {
            Opt |= IRDBOptions::MEM2REG;
          }
          if (usingModules) {
            ProjectIRDB IRDB(
                VariablesMap["module"].as<std::vector<std::string>>(), Opt);
            STOP_TIMER("IRDB Construction", PAMM_SEVERITY_LEVEL::Full);
            return IRDB;
          } else {
            // perform a little trick to make OptionsParser only responsible for
            // the project sources
            int OnlyTakeCareOfSources = 2;
            const char *ProjectSources =
                VariablesMap["project"].as<std::string>().c_str();
            const char *DummyProgName = "not_important";
            const char *DummyArgs[] = {DummyProgName, ProjectSources};
            clang::tooling::CommonOptionsParser OptionsParser(
                OnlyTakeCareOfSources, DummyArgs, StaticAnalysisCategory,
                OccurrencesFlag);
            clang::tooling::CompilationDatabase &CompileDB =
                OptionsParser.getCompilations();
            ProjectIRDB IRDB(CompileDB, Opt);
            STOP_TIMER("IRDB Construction", PAMM_SEVERITY_LEVEL::Full);
            return IRDB;
          }
        }(VariablesMap.count("module")),
        // IRDB 管理和維護(hù)llvm生成的IR代碼和相關(guān)信息
        ChosenDataFlowAnalyses,
        // 要執(zhí)行的數(shù)據(jù)流分析實(shí)例,如IFDSTaintAnalysis
        VariablesMap["wpa"].as<bool>(),  
        // wpa 是否分析整個(gè)程序 Whole-program analysis mode (1 or 0)
        VariablesMap["printedgerec"].as<bool>(),
        VariablesMap["graph-id"].as<std::string>());
        // 指定圖的id,其后由exportJson將圖轉(zhuǎn)化后發(fā)送到可視化的服務(wù)端

        /********* 使用一個(gè)ChosenDataFlowAnalyses來(lái)管理整個(gè)分析流程 *********/
  • 接下來(lái)看下 AnalysisController 的構(gòu)造方法,其主要是完成以下功能:
    • 檢查入口點(diǎn)合法性
    • 生成 ICFG
    • 調(diào)用用戶指定的 Analyzer
AnalysisController::AnalysisController(
    ProjectIRDB &&IRDB, std::vector<DataFlowAnalysisType> Analyses,
    bool WPA_MODE, bool PrintEdgeRecorder, std::string graph_id)
    : FinalResultsJson() {
  PAMM_GET_INSTANCE;
  auto &lg = lg::get();
  LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
                << "Constructed the analysis controller.");
  LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
                << "Found the following IR files for this project: ");
  for (auto file : IRDB.getAllSourceFiles()) {
    LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "\t" << file);
  }
  // 檢查入口點(diǎn)是否合法
  LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "Check for chosen entry points.");
  vector<string> EntryPoints = {"main"};
  if (VariablesMap.count("entry-points")) {
    std::vector<std::string> invalidEntryPoints;
    for (auto &entryPoint : VariablesMap["entry-points"].as<vector<string>>()) {
      if (IRDB.getFunction(entryPoint) == nullptr) {
        invalidEntryPoints.push_back(entryPoint);
      }
    }
    if (invalidEntryPoints.size()) {
      for (auto &invalidEntryPoint : invalidEntryPoints) {
        LOG_IF_ENABLE(BOOST_LOG_SEV(lg, ERROR)
                      << "Entry point '" << invalidEntryPoint
                      << "' is not valid.");
      }
      throw logic_error("invalid entry points");
    }
    if (VariablesMap["entry-points"].as<vector<string>>().size()) {
      EntryPoints = VariablesMap["entry-points"].as<vector<string>>();
    }
  }
  if (WPA_MODE) {
    // here we link every llvm module into a single module containing the entire
    // IR
    LOG_IF_ENABLE(
        BOOST_LOG_SEV(lg, INFO)
        << "link all llvm modules into a single module for WPA ...\n");
    START_TIMER("Link to WPA Module", PAMM_SEVERITY_LEVEL::Full);
    IRDB.linkForWPA();
    STOP_TIMER("Link to WPA Module", PAMM_SEVERITY_LEVEL::Full);
    LOG_IF_ENABLE(
        BOOST_LOG_SEV(lg, INFO)
        << "link all llvm modules into a single module for WPA ended\n");
  }
  IRDB.preprocessIR();

  // 重構(gòu) inter-modular 的類層次結(jié)構(gòu)和虛函數(shù)表
  LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "Reconstruct the class hierarchy.");
  START_TIMER("CH Construction", PAMM_SEVERITY_LEVEL::Core);
  LLVMTypeHierarchy CH(IRDB);
  STOP_TIMER("CH Construction", PAMM_SEVERITY_LEVEL::Core);
  LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
                << "Reconstruction of class hierarchy completed.");

  // 生成 CG
  CallGraphAnalysisType CGType(
      (VariablesMap.count("callgraph-analysis"))
          ? StringToCallGraphAnalysisType.at(
                VariablesMap["callgraph-analysis"].as<string>())
          : CallGraphAnalysisType::OTF);
  // 執(zhí)行 WPA 分析
  if (WPA_MODE) {
    START_TIMER("CG Construction", PAMM_SEVERITY_LEVEL::Core);
    LLVMBasedICFG ICFG(CH, IRDB, CGType, EntryPoints);
    // 生成 ICFG
    if (VariablesMap.count("callgraph-plugin")) {
      throw runtime_error("callgraph plugin not found");
    }
    STOP_TIMER("CG Construction", PAMM_SEVERITY_LEVEL::Core);
    // ICFG.printAsDot("call_graph.dot");

    // CFG 只在 monotone 函數(shù)內(nèi)分析框架中使用
    LLVMBasedCFG CFG;
    START_TIMER("DFA Runtime", PAMM_SEVERITY_LEVEL::Core);
    /*
     * 根據(jù)選擇的分析模塊執(zhí)行
     */
    for (DataFlowAnalysisType analysis : Analyses) {
      LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
                    << "Performing analysis: " << analysis);
      switch (analysis) {
      case DataFlowAnalysisType::IFDS_TaintAnalysis: {
        TaintSensitiveFunctions TSF;
        // 使用build-in的source和sink
        IFDSTaintAnalysis TaintAnalysisProblem(ICFG, TSF, EntryPoints);
        LLVMIFDSSolver<const llvm::Value *, LLVMBasedICFG &> LLVMTaintSolver(
            TaintAnalysisProblem, false);
        // 實(shí)例化一個(gè) solver 進(jìn)行數(shù)據(jù)流分析
        cout << "IFDS Taint Analysis ..." << endl;
        LLVMTaintSolver.solve();
        cout << "IFDS Taint Analysis ended" << endl;
        // FinalResultsJson += LLVMTaintSolver.getAsJson();
        if (PrintEdgeRecorder) {
          LLVMTaintSolver.exportJson(graph_id);
        }
        break;
      }
      case DataFlowAnalysisType::IDE_TaintAnalysis: {
        IDETaintAnalysis taintanalysisproblem(ICFG, EntryPoints);
        LLVMIDESolver<const llvm::Value *, const llvm::Value *, LLVMBasedICFG &>
            llvmtaintsolver(taintanalysisproblem, true);
        llvmtaintsolver.solve();
        FinalResultsJson += llvmtaintsolver.getAsJson();
        if (PrintEdgeRecorder) {
          llvmtaintsolver.exportJson(graph_id);
        }
        break;
      }
     …………
     …………
  }
  // 智能化模塊分析?
  // Perform module-wise (MW) analysis
  else {
    throw runtime_error(
        "This code will follow soon with an accompanying paper!");
  }
}

下邊代碼是調(diào)用具體的 IFDS 框架求解污點(diǎn)分析問(wèn)題的代碼,其中 LLVMIFDSSolver 繼承自 IFDSSolver ,最終的祖先類是 IDESolver ,最基本的函數(shù)基本上都封裝在這里。

        IFDSTaintAnalysis TaintAnalysisProblem(ICFG, TSF, EntryPoints);
        LLVMIFDSSolver<const llvm::Value *, LLVMBasedICFG &> LLVMTaintSolver(
            TaintAnalysisProblem, false);
        // 實(shí)例化一個(gè) solver 進(jìn)行數(shù)據(jù)流分析
        cout << "IFDS Taint Analysis ..." << endl;
        LLVMTaintSolver.solve();

2. IDESolver(IFDS的基類)

solve 函數(shù)實(shí)現(xiàn)流程如下:

  • 調(diào)用 submitInitalSeeds
    • 根據(jù) initseed 初始化
    • 生成 edge function 構(gòu)建 exploded supergraph
  • 調(diào)用 computeValues
    • 根據(jù) edge function 計(jì)算并保存函數(shù)節(jié)點(diǎn)的數(shù)據(jù)流值并保存到 Table<N, D, V> valtab 中
  • 調(diào)用 computeAndPrintStatistics
    • 計(jì)算并通過(guò)之前保存的valtab打印運(yùn)行中的一些信息
    • 通過(guò)從 computedIntraPathEdges 和 computedInterPathEdges 結(jié)構(gòu)中獲取信息
    • IntraPathEdges 和 InterPathEdges 由分析過(guò)程中調(diào)用 saveEdges 函數(shù)存儲(chǔ)
  virtual void solve() {
      ……
    // computations starting here
    START_TIMER("DFA Phase I", PAMM_SEVERITY_LEVEL::Full);
    // We start our analysis and construct exploded supergraph
    LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
                  << "Submit initial seeds, construct exploded super graph");
    /* 初始化,構(gòu)建超級(jí)圖 */
    submitInitalSeeds();
    STOP_TIMER("DFA Phase I", PAMM_SEVERITY_LEVEL::Full);
    if (computevalues) {
      START_TIMER("DFA Phase II", PAMM_SEVERITY_LEVEL::Full);
      // 計(jì)算最終的值
      // Computing the final values for the edge functions
      LOG_IF_ENABLE(
          BOOST_LOG_SEV(lg, INFO)
          << "Compute the final values according to the edge functions");
      computeValues();
      STOP_TIMER("DFA Phase II", PAMM_SEVERITY_LEVEL::Full);
    }
    LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "Problem solved");
    if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) {
      computeAndPrintStatistics();
    }
  }

subitInitalSeeds

submitInitalSeeds 的主要功能是初始化種子和初始化分析。

  void submitInitalSeeds() {
    auto &lg = lg::get();
    PAMM_GET_INSTANCE;
    for (const auto &seed : initialSeeds) {
      N startPoint = seed.first;
      LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG)
                    << "Start point: "
                    << ideTabulationProblem.NtoString(startPoint));
      for (const D &value : seed.second) {
        LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG)
                      << "      Value: "
                      << ideTabulationProblem.DtoString(value));
        LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG) << ' ');
        if (!ideTabulationProblem.isZeroValue(value)) {
          INC_COUNTER("Gen facts", 1, PAMM_SEVERITY_LEVEL::Core);
        }
        propagate(zeroValue, startPoint, value, EdgeIdentity<V>::getInstance(),
                  nullptr, false);
      }
      jumpFn->addFunction(zeroValue, startPoint, zeroValue,
                          EdgeIdentity<V>::getInstance()); // 添加邊函數(shù)
// jumpFn 為指向 std::shared_ptr<JumpFunctions<N, D, M, V, I>> 的智能指針
// addFunction 函數(shù)的功能為記錄數(shù)據(jù)流前向和后向的流向,存儲(chǔ)為{key=llvm::value,value=edgefunction}的形式
    }
  }
  • initialSeeds 變量來(lái)源于 tabulationProblem.initialSeeds() 方法,根據(jù)要分析的特定問(wèn)題的不同,其生成初始化種子的方法也不同。比如對(duì)于 IFDSTaintAnalysis 來(lái)說(shuō),EntryPoint 為 main 函數(shù)的時(shí)候,插入的初始值是命令行參數(shù)(標(biāo)記為污染的)和零值,對(duì)于其他入口點(diǎn),只插入零值。
map<IFDSTaintAnalysis::n_t, set<IFDSTaintAnalysis::d_t>>
IFDSTaintAnalysis::initialSeeds() {
  auto &lg = lg::get();
  LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG)
                << "IFDSTaintAnalysis::initialSeeds()");
  // If main function is the entry point, commandline arguments have to be
  // tainted. Otherwise we just use the zero value to initialize the analysis.
  map<IFDSTaintAnalysis::n_t, set<IFDSTaintAnalysis::d_t>> SeedMap;
  for (auto &EntryPoint : EntryPoints) {
    if (EntryPoint == "main") {
      set<IFDSTaintAnalysis::d_t> CmdArgs;
      for (auto &Arg : icfg.getMethod(EntryPoint)->args()) {
        CmdArgs.insert(&Arg);
      }
      CmdArgs.insert(zeroValue());
      SeedMap.insert(
          make_pair(&icfg.getMethod(EntryPoint)->front().front(), CmdArgs));
    } else {
      SeedMap.insert(make_pair(&icfg.getMethod(EntryPoint)->front().front(),
                               set<IFDSTaintAnalysis::d_t>({zeroValue()})));
    }
  }
  return SeedMap;
}

propagate

  • propagate 函數(shù)負(fù)責(zé)將數(shù)據(jù)流值沿著超級(jí)圖傳播,合并已經(jīng)在目標(biāo)語(yǔ)句處計(jì)算出的邊函數(shù)
    • 參數(shù) sourceVal 為邊函數(shù)上的源 value
    • 參數(shù) target 為 目標(biāo)語(yǔ)句
    • 參數(shù) targetval 為目標(biāo)處的 value
    • 參數(shù) f 為 (s0,sourceVal) 到(target,targetVal) 計(jì)算出來(lái)的 edge function
void
  propagate(D sourceVal, N target, D targetVal,
            std::shared_ptr<EdgeFunction<V>> f,
            /* deliberately exposed to clients */ N relatedCallSite,
            /* deliberately exposed to clients */ bool isUnbalancedReturn) {
    auto &lg = lg::get();
    LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG) << "Propagate flow");
    std::shared_ptr<EdgeFunction<V>> jumpFnE = nullptr;
    std::shared_ptr<EdgeFunction<V>> fPrime;
    if (!jumpFn->reverseLookup(target, targetVal).empty()) {
       // reverseLookup 對(duì)于給定的目標(biāo)語(yǔ)句和值,返回所有關(guān)聯(lián)的源值,返回值是從源值到edge函數(shù)的映射。
       // 如果不為空,返回 sourceVal 到 targetVal 的所有 edge function
      jumpFnE = jumpFn->reverseLookup(target, targetVal)[sourceVal];
    }
    if (jumpFnE == nullptr) {
      jumpFnE = allTop; // jump function is initialized to all-top
    }
    fPrime = jumpFnE->joinWith(f); // 合并?
    bool newFunction = !(fPrime->equal_to(jumpFnE));
    if (newFunction) { // 如果是一個(gè)新edge function的話,進(jìn)行傳播,
      jumpFn->addFunction(sourceVal, target, targetVal, fPrime);
      PathEdge<N, D> edge(sourceVal, target, targetVal); // 添加邊
      PathEdgeCount++;
      pathEdgeProcessingTask(edge);
      /*
      pathEdgeProcessingTask
      處理邊,根據(jù)其 target 的不同做以下處理(對(duì)應(yīng)于 tabulation 算法):
        processExit
        processNormalFlow
        processCall
      */
  }

pathEdgeProcessingTask

? 這個(gè)函數(shù)主要根據(jù)傳遞過(guò)來(lái)的邊,判斷 target node 是不是 call 或者 exit 節(jié)點(diǎn),分別做不同的處理傳播

 void pathEdgeProcessingTask(PathEdge<N, D> edge) {
    bool isCall = icfg.isCallStmt(edge.getTarget());
    if (!isCall) {   
      if (icfg.isExitStmt(edge.getTarget())) {
        processExit(edge);
      }
      if (!icfg.getSuccsOf(edge.getTarget()).empty()) {
        processNormalFlow(edge);
      }
    } else {
      processCall(edge);
    }
  }

? 調(diào)用的這三個(gè)函數(shù)實(shí)現(xiàn)的是論文中 IFDS 的基本算法

函數(shù)名 功能
processCall 對(duì)應(yīng)算法的13-20行,處理call
processExit 對(duì)應(yīng)算法的21-32行,處理exit
processNormalFlow 對(duì)應(yīng)算法的33-37行,處理普通情況下的數(shù)據(jù)流傳遞(除了 call 和 exit )
  • processCall 主要流程:
    • 對(duì)于每一個(gè)可能的 callee,首先會(huì)檢查是否存在 summary 邊,有的話直接當(dāng)作普通的數(shù)據(jù)流傳遞,否則就根據(jù)call 的函數(shù)計(jì)算數(shù)據(jù)流傳遞。
    • 然后處理 call2ret 節(jié)點(diǎn)間的數(shù)據(jù)流傳遞
  • processExit 主要流程:
    • 對(duì)于 caller 傳入的數(shù)據(jù)流并且能最終通過(guò) retsite 傳遞出來(lái)的,添加 summary 邊
    • 將 caller 函數(shù)開(kāi)始處的數(shù)據(jù)流值傳遞到 ret 處

computeValues

? 根據(jù) edge functions 計(jì)算最終值

  void computeValues() {
    auto &lg = lg::get();
    LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG) << "Start computing values");
    // Phase II(i)
    std::map<N, std::set<D>> allSeeds(initialSeeds);
    for (N unbalancedRetSite : unbalancedRetSites) {
      if (allSeeds[unbalancedRetSite].empty()) {
        allSeeds.insert(make_pair(unbalancedRetSite, std::set<D>({zeroValue})));
      }
    }
    // do processing
      // 這里是設(shè)置節(jié)點(diǎn)的數(shù)據(jù)流值,并將其保存到 valtab 中
    for (const auto &seed : allSeeds) {
      N startPoint = seed.first;
      for (D val : seed.second) {
        // 設(shè)置起始節(jié)點(diǎn)數(shù)據(jù)流值
        setVal(startPoint, val, ideTabulationProblem.bottomElement());
        std::pair<N, D> superGraphNode(startPoint, val);
        valuePropagationTask(superGraphNode);
      }
    }
    // Phase II(ii)
    // we create an array of all nodes and then dispatch fractions of this array
    // to multiple threads
      
    std::set<N> allNonCallStartNodes = icfg.allNonCallStartNodes();
   // 返回既不是call也不是invoke節(jié)點(diǎn)的集合
    std::vector<N> nonCallStartNodesArray(allNonCallStartNodes.size());
    size_t i = 0;
    for (N n : allNonCallStartNodes) {
      nonCallStartNodesArray[i] = n;
      i++;
    }
    valueComputationTask(nonCallStartNodesArray);
  }

setVal

? 給 node 設(shè)置數(shù)據(jù)流值,如果 l 是 top value 的話就從 valtab 中移除

  void setVal(N nHashN, D nHashD, V l) {
    auto &lg = lg::get();
    // 如果是 top value,則不存儲(chǔ)
    // TOP is the implicit default value which we do not need to store.
    if (l == ideTabulationProblem.topElement()) {
      valtab.remove(nHashN, nHashD);
    } else {
      valtab.insert(nHashN, nHashD, l);
    }
  }

valuePropagationTask

  // should be made a callable at some point
  void valuePropagationTask(std::pair<N, D> nAndD) {
    N n = nAndD.first;
    // 初始化的種子不一定是函數(shù)的入口點(diǎn)
    // our initial seeds are not necessarily method-start points but here they
    // should be treated as such the same also for unbalanced return sites in
    // an unbalanced problem
    if (icfg.isStartPoint(n) || initialSeeds.count(n) ||
        unbalancedRetSites.count(n)) {
      propagateValueAtStart(nAndD, n);
    }
    if (icfg.isCallStmt(n)) {
      propagateValueAtCall(nAndD, n);
    }
  }

? 如果是函數(shù)入口點(diǎn)或者在 initialSeeds 中的話,執(zhí)行 propagateValueAtStart 進(jìn)行數(shù)據(jù)流傳遞。

propagateValueAtStart

  void propagateValueAtStart(std::pair<N, D> nAndD, N n) {
    PAMM_GET_INSTANCE;
    D d = nAndD.second;
    M p = icfg.getMethodOf(n); // 獲取 n 所在的函數(shù)
    for (N c : icfg.getCallsFromWithin(p)) {
      for (auto entry : jumpFn->forwardLookup(d, c)) { 
        // 這里的c最開(kāi)始是 initialSeeds 中的初始數(shù)據(jù)流值
        /* forwardLookup函數(shù)根據(jù)給定的source value和目標(biāo)語(yǔ)句,返回所有目標(biāo)語(yǔ)句中受到影響的
           目標(biāo)value和 edge function,(key=target value,value=edge function)      */ 
        D dPrime = entry.first;
        std::shared_ptr<EdgeFunction<V>> fPrime = entry.second;
        N sP = n;
        V value = val(sP, d);
        INC_COUNTER("Value Propagation", 1, PAMM_SEVERITY_LEVEL::Full);
        propagateValue(c, dPrime, fPrime->computeTarget(value)); // 傳遞數(shù)據(jù)流
      }
    }
  }
  • getCallsFromWithin 獲取給定函數(shù)的所有調(diào)用位置

propagateValueAtCall

  void propagateValueAtCall(std::pair<N, D> nAndD, N n) {
    PAMM_GET_INSTANCE;
    D d = nAndD.second;
    for (M q : icfg.getCalleesOfCallAt(n)) { // 獲取到 callee
      std::shared_ptr<FlowFunction<D>> callFlowFunction = // call指令的數(shù)據(jù)流傳遞函數(shù)
          cachedFlowEdgeFunctions.getCallFlowFunction(n, q); 
        // getCallFlowFunction 完成實(shí)參和形參的映射
      INC_COUNTER("FF Queries", 1, PAMM_SEVERITY_LEVEL::Full);
      for (D dPrime : callFlowFunction->computeTargets(d)) {
        std::shared_ptr<EdgeFunction<V>> edgeFn =
            cachedFlowEdgeFunctions.getCallEdgeFunction(n, d, q, dPrime); 
          // 獲取 edge function
        INC_COUNTER("EF Queries", 1, PAMM_SEVERITY_LEVEL::Full);
        for (N startPoint : icfg.getStartPointsOf(q)) {
          INC_COUNTER("Value Propagation", 1, PAMM_SEVERITY_LEVEL::Full);
          propagateValue(startPoint, dPrime, edgeFn->computeTarget(val(n, d))); 
        }
      }
    }
  }
  • 這里的 cachedFlowEdgeFunctions = tabulationProblem

propagateValue

 void propagateValue(N nHashN, D nHashD, V v) {
    V valNHash = val(nHashN, nHashD);
    V vPrime = joinValueAt(nHashN, nHashD, valNHash, v);
    if (!(vPrime == valNHash)) {
      setVal(nHashN, nHashD, vPrime);
      valuePropagationTask(std::pair<N, D>(nHashN, nHashD));
    }
  }

3.IFDS問(wèn)題分析實(shí)例

IFDSTaintAnalysis

這里以 IFDSTaintAnalysis 為例,這個(gè)類中主要實(shí)例化了 Tablution 算法中所定義的四個(gè)數(shù)據(jù)流傳遞函數(shù):

  • getNormalFlowFunction 普通節(jié)點(diǎn)
  • getCallFlowFunction 函數(shù)調(diào)用點(diǎn)到被調(diào)用函數(shù)
  • getRetFlowFunction ret 節(jié)點(diǎn),在污點(diǎn)分析問(wèn)題中,用于在函數(shù)調(diào)用返回后,處理返回值和參數(shù)是否被污染,只關(guān)注 pointer/reference 類型的指針
  • getCallToRetFlowFunction call結(jié)點(diǎn)其return結(jié)點(diǎn)

其實(shí)例化的 IFDSTaintAnalysis 類如下:

namespace psr {

class LLVMBasedICFG;

/**
 *
 * 主要處理的數(shù)據(jù)流向是從 source 點(diǎn)到 sink 點(diǎn),一旦發(fā)現(xiàn)有被標(biāo)記為污染的數(shù)據(jù)
 * 流到達(dá) sink 點(diǎn),就會(huì) leak 報(bào)告出來(lái)。
 * 
 * @see TaintSensitiveFunctions on how to specify your own
 * taint-sensitive source and sink functions.
 */
class IFDSTaintAnalysis : public DefaultIFDSTabulationProblem<
                              const llvm::Instruction *, const llvm::Value *,
                              const llvm::Function *, LLVMBasedICFG &> {
                                  // {N D M I}
public:
  typedef const llvm::Value *d_t;
  typedef const llvm::Instruction *n_t;
  typedef const llvm::Function *m_t;
  typedef LLVMBasedICFG &i_t;

private:
  TaintSensitiveFunctions SourceSinkFunctions   ; // 存儲(chǔ)SourceSink函數(shù)
  std::vector<std::string> EntryPoints;  // 函數(shù)入口點(diǎn)

public:
  /// Holds all leaks found during the analysis
  std::map<n_t, std::set<d_t>> Leaks;

  /**
   *
   * @param icfg
   * @param TSF
   * @param EntryPoints
   */
  IFDSTaintAnalysis(i_t icfg, TaintSensitiveFunctions TSF,
                    std::vector<std::string> EntryPoints = {"main"});                               
                                  
  virtual ~IFDSTaintAnalysis() = default;

  std::shared_ptr<FlowFunction<d_t>> getNormalFlowFunction(n_t curr,
                                                           n_t succ) override;
  std::shared_ptr<FlowFunction<d_t>> getCallFlowFunction(n_t callStmt,
                                                         m_t destMthd) override;

  std::shared_ptr<FlowFunction<d_t>> getRetFlowFunction(n_t callSite,
                                                        m_t calleeMthd,
                                                        n_t exitStmt,
                                                        n_t retSite) override;
  std::shared_ptr<FlowFunction<d_t>>
  getCallToRetFlowFunction(n_t callSite, n_t retSite,
                           std::set<m_t> callees) override;

// 計(jì)算summary的函數(shù),將一個(gè)函數(shù)內(nèi)部的數(shù)據(jù)流傳遞情況計(jì)算summary
  std::shared_ptr<FlowFunction<d_t>>
  getSummaryFlowFunction(n_t callStmt, m_t destMthd) override;

  std::map<n_t, std::set<d_t>> initialSeeds() override;
  d_t createZeroValue() override;
  bool isZeroValue(d_t d) const override;
  void printNode(std::ostream &os, n_t n) const override;
  void printDataFlowFact(std::ostream &os, d_t d) const override;
  void printMethod(std::ostream &os, m_t m) const override;
  void printIFDSReport(std::ostream &os,
                       SolverResults<n_t, d_t, BinaryDomain> &SR) override;
};
} // namespace psr
  • IFDSTaintAnalysis 在初始化的時(shí)候傳入了一個(gè) TaintSensitiveFunctions 對(duì)象,這個(gè)類主要是維護(hù)用于污點(diǎn)分析 source 函數(shù)和 sink 函數(shù),并且缺省定義了一些常見(jiàn)的函數(shù),并標(biāo)記了其 Critical argument

DefaultIFDSTabulationProblem

? 這是一個(gè) IFDS 問(wèn)題的默認(rèn)模板類,用于定義一個(gè) IFDS 問(wèn)題,設(shè)置其 solver 的參數(shù):

namespace psr {
template <typename N, typename D, typename M, typename I>
class DefaultIFDSTabulationProblem : public IFDSTabulationProblem<N, D, M, I> {
protected:
  I icfg;
  virtual D createZeroValue() = 0;
  D zerovalue;

public:
    // 進(jìn)行一些默認(rèn)的 solver 的配置
  DefaultIFDSTabulationProblem(I icfg) : icfg(icfg) {
    this->solver_config.followReturnsPastSeeds = false;
    this->solver_config.autoAddZero = true;
    this->solver_config.computeValues = true;
    this->solver_config.recordEdges = true;
    this->solver_config.computePersistedSummaries = true;
  }

  virtual ~DefaultIFDSTabulationProblem() = default;

  virtual std::shared_ptr<FlowFunction<D>>
  getSummaryFlowFunction(N callStmt, M destMthd) override {
    return nullptr;
  }
    
  I interproceduralCFG() override { return icfg; }
  D zeroValue() override { return zerovalue; }
};

} // namespace psr

#endif

因此在用戶實(shí)現(xiàn)自己的 IFDS分析問(wèn)題的時(shí)候,只需要 override 特定的數(shù)據(jù)流傳遞函數(shù)即可。

4. ICFG 實(shí)現(xiàn)

TODO

5. 代碼結(jié)構(gòu)

? Phasar使用AnalysisController管理整個(gè)分析流程,根據(jù)選擇的特定的數(shù)據(jù)流問(wèn)題進(jìn)行分析。并將IFDS問(wèn)題分別使用兩個(gè)類管理,一個(gè)是Problem類,用于定義要分析的問(wèn)題,另一類是Solver類,用于求解IFDS問(wèn)題:

  1. Problem類的繼承關(guān)系及基類簡(jiǎn)述
    • DefaultIFDSTabulationProblem:
      • IFDSTabulationProblem
      • FlowFunctions
      • NodePrinter
      • DataFlowFactPrinter
      • MethodPrinter
簡(jiǎn)述
DefaultIFDSTabulationProblem 所有特定 IFDS 問(wèn)題的基類
IFDSTabulationProblem 保存了zerovalue、配置信息和初始化值
FlowFunctions 定義基本的數(shù)據(jù)流傳遞函數(shù)
NodePrinter、DataFlowFactPrinter、MethodPrinter 打印特定數(shù)據(jù)結(jié)構(gòu)的模板類
  • FlowFunctions 類中實(shí)現(xiàn)的方法接口
方法名 功能
getNormalFlowFunction 普通節(jié)點(diǎn)的數(shù)據(jù)流傳遞函數(shù)
getCallFlowFunction 函數(shù)調(diào)用的數(shù)據(jù)流傳遞函數(shù),實(shí)參映射到形參
getRetFlowFunction 返回節(jié)點(diǎn)的數(shù)據(jù)流傳遞函數(shù),在被調(diào)函數(shù)的最后將數(shù)據(jù)流值傳回caller中其ret節(jié)點(diǎn)
getCallToRetFlowFunction call到ret的數(shù)據(jù)流傳遞函數(shù),處理沒(méi)有被callee影響到的數(shù)據(jù)流值
getSummaryFlowFunction 默認(rèn)是返回空指針,用于處理libc庫(kù)函數(shù)或者llvm生成的不能follow進(jìn)去的函數(shù)
  • IFDSTabulationProblem 類中的方法接口和成員變量

    名稱 簡(jiǎn)述
    initialSeeds 分析起始程序點(diǎn)和其的初始數(shù)據(jù)流值
    setSolverConfiguration 設(shè)置solver
    isZeroValue 判斷數(shù)據(jù)流值是否是0值
    interproceduralCFG 返回分析問(wèn)題的ICFG
  1. Slover類的繼承關(guān)系及簡(jiǎn)述

    LLVMIFDSSolver最終也是繼承自 IDESolver,對(duì)于特定的IFDS問(wèn)題的求解主要是調(diào)用solve( )方法。

    • LLVMIFDSSolver
      • IFDSSolver
        • IDESolver
  • LLVMIFDSSolver

    LLVMIFDSSolver這個(gè)類中實(shí)現(xiàn)了打印過(guò)程間、過(guò)程內(nèi) pathEdge 方法以及將結(jié)果可視化傳送給web端的接口。

  • IFDSSolver

    IFDSSolver實(shí)現(xiàn)了一個(gè)返回給定stmt處ifds結(jié)果的方法。

  • IDESolver

    這個(gè)類中slove( )函數(shù)實(shí)現(xiàn)的是對(duì)IFDS問(wèn)題的求解,solve函數(shù)主要有三個(gè)步驟:

    1. submitInitalSeeds 提交初始化數(shù)據(jù)流值的seed,構(gòu)建 exploded supergraph
    2. computeValues 根據(jù) edge functions 計(jì)算最終的數(shù)據(jù)流值
    3. 主要是計(jì)算和打印一些分析過(guò)程中的信息,比如gen/kill的數(shù)據(jù)流數(shù)量等。
函數(shù)名 功能
propagate 負(fù)責(zé)將數(shù)據(jù)流沿著exploded super graph向下傳遞,合并任何可能已經(jīng)在目標(biāo)語(yǔ)句處算出數(shù)據(jù)流值的edge function
setVal 保存每個(gè)程序點(diǎn)的數(shù)據(jù)流值
saveEdges 向computedInterPathEdges和computedIntraPathEdges存儲(chǔ)已經(jīng)計(jì)算出來(lái)的pathEdges
computeValues 根據(jù)邊函數(shù)計(jì)算每一個(gè)程序點(diǎn)的數(shù)據(jù)流值,調(diào)用setval函數(shù)存入valtab
addEndSummary 添加函數(shù)的摘要邊
變量名 功能
std::shared_ptr<JumpFunctions<N, D, M, V, I>> jumpFn; 存儲(chǔ)并管理exploded super graph中的數(shù)據(jù)流邊
Table<N, D, V> valtab; 存儲(chǔ)節(jié)點(diǎn)的數(shù)據(jù)流值
computedInterPathEdges 函數(shù)間的邊(打印信息用)
computedIntraPathEdges 函數(shù)內(nèi)的邊(打印信息用)
Table<N, D, std::map<N, std::set<D>>> incomingtab; 記錄所有的函數(shù)調(diào)用信息(處理exit節(jié)點(diǎn)的時(shí)候使用)
Table<N, D, Table<N, D, std::shared_ptr<EdgeFunction<V>>>> endsummarytab 存儲(chǔ)已經(jīng)計(jì)算出來(lái)的summay邊。

propagate函數(shù)

  • 從jumpFn中反向找能夠從source處傳遞到target處的edge function,如果找不到就用alltop
    • 如果是新的edgefuntion的話,說(shuō)明可以直接從最初點(diǎn)到當(dāng)前點(diǎn)添加一條邊(比如存在a-b的邊,然后當(dāng)前的數(shù)據(jù)流傳遞是b-c,會(huì)添加一條a-c的邊到j(luò)umpFunctions中)
    • 然后會(huì)調(diào)用pathEdgeProcessingTask處理從source到target的pathEdge
    • pathEdgeProcessingTask函數(shù)會(huì)根據(jù)target的類型做三種不同處理:
      • processExit
        • 首先找到該exit節(jié)點(diǎn)的所有caller
        • 根據(jù)這些caller傳遞過(guò)來(lái)的數(shù)據(jù)流值和從函數(shù)start節(jié)點(diǎn)傳遞到exit節(jié)點(diǎn)的數(shù)據(jù)流值,調(diào)用propagate將數(shù)據(jù)流值從callee的start處傳遞到ret site處
      • processNormalFlow
        • 處理正常的數(shù)據(jù)流傳遞,通過(guò)getNormalFlowFunction獲取數(shù)據(jù)流傳遞函數(shù),然后通過(guò)computeNormalFlowFunction計(jì)算傳遞后的數(shù)據(jù)流值
        • 計(jì)算出傳遞后的數(shù)據(jù)流值后再次調(diào)用propagate函數(shù)傳遞數(shù)據(jù)流值
      • processCall
        • 處理call語(yǔ)句的時(shí)候,首先看callee的函數(shù)是不是之前計(jì)算過(guò),即通過(guò)getSummaryFlowFunction獲取其summary flow function,如果存在就當(dāng)作普通的數(shù)據(jù)流傳遞
        • 否則就處理函數(shù)實(shí)參和形參的映射(記錄在incomingtab中)
        • 最后把能夠直接傳遞到ret-site的數(shù)據(jù)流值進(jìn)行傳遞

6.參考:

1. github

2. wiki

7.問(wèn)題

  1. Table<N, D, std::map<N, std::set<D>>> incomingtab
  • edges going along calls

  • incomingtab相關(guān)的方法接口

  std::map<N, std::set<D>> incoming(D d1, N sP) {
    return incomingtab.get(sP, d1);
  }

  void addIncoming(N sP, D d3, N n, D d2) {
    incomingtab.get(sP, d3)[n].insert(d2);
  }
  • 使用 incomingtab

    • processCall 函數(shù)中,遇到 call 節(jié)點(diǎn)的并找到其 call 的函數(shù)的時(shí)候,就注冊(cè)下 callee 的 startpoint 處的數(shù)據(jù)流值是來(lái)自哪里。

      for (N sP : startPointsOf) {
        saveEdges(n, sP, d2, res, true);
        // for each result node of the call-flow function
        for (D d3 : res) {
          // create initial self-loop
          propagate(d3, sP, d3, EdgeIdentity<V>::getInstance(), n,
                    false); // line 15
          // register the fact that <sp,d3> has an incoming edge from <n,d2>
          // line 15.1 of Naeem/Lhotak/Rodriguez
          addIncoming(sP, d3, n, d2);
      
    • processExit 函數(shù)中,處理 exit 節(jié)點(diǎn)的時(shí)候,先找到其對(duì)應(yīng)的 start point,然后對(duì)于 start point,添加一條從 start point 處到 exit 的邊(存儲(chǔ)在 fSummaryReuse 中),然后通過(guò)去尋找其 incoming calls,計(jì)算 call 到 ret 的數(shù)據(jù)流值,最后將數(shù)據(jù)流值從 call 傳遞到 retsite。

      D d1 = edge.factAtSource();
      D d2 = edge.factAtTarget();
      // for each of the method's start points, determine incoming calls
      std::set<N> startPointsOf = icfg.getStartPointsOf(methodThatNeedsSummary);
      std::map<N, std::set<D>> inc;
      for (N sP : startPointsOf) {
        // line 21.1 of Naeem/Lhotak/Rodriguez
        // register end-summary
        addEndSummary(sP, d1, n, d2, f);
        for (auto entry : incoming(d1, sP)) {
          inc[entry.first] = std::set<D>{entry.second};
        }
      }
      printEndSummaryTab();
      printIncomingTab();
      // for each incoming call edge already processed
      //(see processCall(..))
      for (auto entry : inc) {
        // line 22
        N c = entry.first;
        // for each return site
        for (N retSiteC : icfg.getReturnSitesOfCallAt(c)) {
          // compute return-flow function
          std::shared_ptr<FlowFunction<D>> retFunction =
              cachedFlowEdgeFunctions.getRetFlowFunction(
                  c, methodThatNeedsSummary, n, retSiteC);
          INC_COUNTER("FF Queries", 1, PAMM_SEVERITY_LEVEL::Full);
          // for each incoming-call value
          for (D d4 : entry.second) {
            std::set<D> targets =
                computeReturnFlowFunction(retFunction, d1, d2, c, entry.second);
            ADD_TO_HISTOGRAM("Data-flow facts", targets.size(), 1,
                             PAMM_SEVERITY_LEVEL::Full);
      
  1. 實(shí)參形參映射

    以污點(diǎn)分析為例,在 getCallFlowFunction 函數(shù)中實(shí)現(xiàn)參數(shù)的映射:

shared_ptr<FlowFunction<IFDSTaintAnalysis::d_t>>
IFDSTaintAnalysis::getCallFlowFunction(IFDSTaintAnalysis::n_t callStmt,
                                       IFDSTaintAnalysis::m_t destMthd) {
  auto &lg = lg::get();
  LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG)
                << "IFDSTaintAnalysis::getCallFlowFunction()");
  string FunctionName = cxx_demangle(destMthd->getName().str());
  // Check if a source or sink function is called:
  // We then can kill all data-flow facts not following the called function.
  // The respective taints or leaks are then generated in the corresponding
  // call to return flow function.
  if (SourceSinkFunctions.isSource(FunctionName) ||
      (SourceSinkFunctions.isSink(FunctionName))) {
    return KillAll<IFDSTaintAnalysis::d_t>::getInstance();
  }
  // Map the actual into the formal parameters
  if (llvm::isa<llvm::CallInst>(callStmt) ||
      llvm::isa<llvm::InvokeInst>(callStmt)) {
    return make_shared<MapFactsToCallee>(llvm::ImmutableCallSite(callStmt),
                                         destMthd);
  }
  // Pass everything else as identity
  return Identity<IFDSTaintAnalysis::d_t>::getInstance();
}

? 具體實(shí)現(xiàn):

MapFactsToCallee::MapFactsToCallee(
    const llvm::ImmutableCallSite &callSite, const llvm::Function *destMthd,
    function<bool(const llvm::Value *)> predicate)
    : destMthd(destMthd), predicate(predicate) {
  // Set up the actual parameters
  for (unsigned idx = 0; idx < callSite.getNumArgOperands(); ++idx) {
    actuals.push_back(callSite.getArgOperand(idx));
  }
  // Set up the formal parameters
  for (unsigned idx = 0; idx < destMthd->arg_size(); ++idx) {
    formals.push_back(getNthFunctionArgument(destMthd, idx));
  }
}

? 在 processCall 函數(shù)中:

    D d1 = edge.factAtSource();
    N n = edge.getTarget(); // a call node; line 14...
    D d2 = edge.factAtTarget();
    std::shared_ptr<FlowFunction<D>> function =
            cachedFlowEdgeFunctions.getCallFlowFunction(n, sCalledProcN);
    std::set<D> res = computeCallFlowFunction(function, d1, d2);
    for (N sP : startPointsOf) {
          saveEdges(n, sP, d2, res, true);
          // for each result node of the call-flow function
          for (D d3 : res) {
            // create initial self-loop
            propagate(d3, sP, d3, EdgeIdentity<V>::getInstance(), n,
                      false); // line 15
            // register the fact that <sp,d3> has an incoming edge from <n,d2>
            // line 15.1 of Naeem/Lhotak/Rodriguez
            addIncoming(sP, d3, n, d2);
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容