IFDSAnalysis
Phasar框架中,為了解決IFDS / IDE問(wèn)題,主要分成了以下三個(gè)步驟:
IFDS / IDE 求解器(slover)。
流函數(shù)的形式定義IFDS / IDE分析問(wèn)題(不同IFDS問(wèn)題定義不同的流函數(shù))。
程序間控制流圖的實(shí)現(xiàn)(ICFG)。
1.phasar 主函數(shù)流程
AnalysisController
在處理完配置文件和命令行解析后,實(shí)例化一個(gè) AnalysisController 管理整個(gè)分析的流程:
/********* 處理完參數(shù)解析后,下面的代碼開(kāi)始進(jìn)行實(shí)際的分析 *********/
// At this point we have set-up all the parameters and can start the actual
// analyses that have been choosen.
AnalysisController Controller(
[&lg](bool usingModules) {
PAMM_GET_INSTANCE;
START_TIMER("IRDB Construction", PAMM_SEVERITY_LEVEL::Full);
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "Set-up IR database.");
IRDBOptions Opt = IRDBOptions::NONE;
if (VariablesMap["wpa"].as<bool>()) {
Opt |= IRDBOptions::WPA;
}
if (VariablesMap["mem2reg"].as<bool>()) {
Opt |= IRDBOptions::MEM2REG;
}
if (usingModules) {
ProjectIRDB IRDB(
VariablesMap["module"].as<std::vector<std::string>>(), Opt);
STOP_TIMER("IRDB Construction", PAMM_SEVERITY_LEVEL::Full);
return IRDB;
} else {
// perform a little trick to make OptionsParser only responsible for
// the project sources
int OnlyTakeCareOfSources = 2;
const char *ProjectSources =
VariablesMap["project"].as<std::string>().c_str();
const char *DummyProgName = "not_important";
const char *DummyArgs[] = {DummyProgName, ProjectSources};
clang::tooling::CommonOptionsParser OptionsParser(
OnlyTakeCareOfSources, DummyArgs, StaticAnalysisCategory,
OccurrencesFlag);
clang::tooling::CompilationDatabase &CompileDB =
OptionsParser.getCompilations();
ProjectIRDB IRDB(CompileDB, Opt);
STOP_TIMER("IRDB Construction", PAMM_SEVERITY_LEVEL::Full);
return IRDB;
}
}(VariablesMap.count("module")),
// IRDB 管理和維護(hù)llvm生成的IR代碼和相關(guān)信息
ChosenDataFlowAnalyses,
// 要執(zhí)行的數(shù)據(jù)流分析實(shí)例,如IFDSTaintAnalysis
VariablesMap["wpa"].as<bool>(),
// wpa 是否分析整個(gè)程序 Whole-program analysis mode (1 or 0)
VariablesMap["printedgerec"].as<bool>(),
VariablesMap["graph-id"].as<std::string>());
// 指定圖的id,其后由exportJson將圖轉(zhuǎn)化后發(fā)送到可視化的服務(wù)端
/********* 使用一個(gè)ChosenDataFlowAnalyses來(lái)管理整個(gè)分析流程 *********/
- 接下來(lái)看下 AnalysisController 的構(gòu)造方法,其主要是完成以下功能:
- 檢查入口點(diǎn)合法性
- 生成 ICFG
- 調(diào)用用戶指定的 Analyzer
AnalysisController::AnalysisController(
ProjectIRDB &&IRDB, std::vector<DataFlowAnalysisType> Analyses,
bool WPA_MODE, bool PrintEdgeRecorder, std::string graph_id)
: FinalResultsJson() {
PAMM_GET_INSTANCE;
auto &lg = lg::get();
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
<< "Constructed the analysis controller.");
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
<< "Found the following IR files for this project: ");
for (auto file : IRDB.getAllSourceFiles()) {
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "\t" << file);
}
// 檢查入口點(diǎn)是否合法
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "Check for chosen entry points.");
vector<string> EntryPoints = {"main"};
if (VariablesMap.count("entry-points")) {
std::vector<std::string> invalidEntryPoints;
for (auto &entryPoint : VariablesMap["entry-points"].as<vector<string>>()) {
if (IRDB.getFunction(entryPoint) == nullptr) {
invalidEntryPoints.push_back(entryPoint);
}
}
if (invalidEntryPoints.size()) {
for (auto &invalidEntryPoint : invalidEntryPoints) {
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, ERROR)
<< "Entry point '" << invalidEntryPoint
<< "' is not valid.");
}
throw logic_error("invalid entry points");
}
if (VariablesMap["entry-points"].as<vector<string>>().size()) {
EntryPoints = VariablesMap["entry-points"].as<vector<string>>();
}
}
if (WPA_MODE) {
// here we link every llvm module into a single module containing the entire
// IR
LOG_IF_ENABLE(
BOOST_LOG_SEV(lg, INFO)
<< "link all llvm modules into a single module for WPA ...\n");
START_TIMER("Link to WPA Module", PAMM_SEVERITY_LEVEL::Full);
IRDB.linkForWPA();
STOP_TIMER("Link to WPA Module", PAMM_SEVERITY_LEVEL::Full);
LOG_IF_ENABLE(
BOOST_LOG_SEV(lg, INFO)
<< "link all llvm modules into a single module for WPA ended\n");
}
IRDB.preprocessIR();
// 重構(gòu) inter-modular 的類層次結(jié)構(gòu)和虛函數(shù)表
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "Reconstruct the class hierarchy.");
START_TIMER("CH Construction", PAMM_SEVERITY_LEVEL::Core);
LLVMTypeHierarchy CH(IRDB);
STOP_TIMER("CH Construction", PAMM_SEVERITY_LEVEL::Core);
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
<< "Reconstruction of class hierarchy completed.");
// 生成 CG
CallGraphAnalysisType CGType(
(VariablesMap.count("callgraph-analysis"))
? StringToCallGraphAnalysisType.at(
VariablesMap["callgraph-analysis"].as<string>())
: CallGraphAnalysisType::OTF);
// 執(zhí)行 WPA 分析
if (WPA_MODE) {
START_TIMER("CG Construction", PAMM_SEVERITY_LEVEL::Core);
LLVMBasedICFG ICFG(CH, IRDB, CGType, EntryPoints);
// 生成 ICFG
if (VariablesMap.count("callgraph-plugin")) {
throw runtime_error("callgraph plugin not found");
}
STOP_TIMER("CG Construction", PAMM_SEVERITY_LEVEL::Core);
// ICFG.printAsDot("call_graph.dot");
// CFG 只在 monotone 函數(shù)內(nèi)分析框架中使用
LLVMBasedCFG CFG;
START_TIMER("DFA Runtime", PAMM_SEVERITY_LEVEL::Core);
/*
* 根據(jù)選擇的分析模塊執(zhí)行
*/
for (DataFlowAnalysisType analysis : Analyses) {
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
<< "Performing analysis: " << analysis);
switch (analysis) {
case DataFlowAnalysisType::IFDS_TaintAnalysis: {
TaintSensitiveFunctions TSF;
// 使用build-in的source和sink
IFDSTaintAnalysis TaintAnalysisProblem(ICFG, TSF, EntryPoints);
LLVMIFDSSolver<const llvm::Value *, LLVMBasedICFG &> LLVMTaintSolver(
TaintAnalysisProblem, false);
// 實(shí)例化一個(gè) solver 進(jìn)行數(shù)據(jù)流分析
cout << "IFDS Taint Analysis ..." << endl;
LLVMTaintSolver.solve();
cout << "IFDS Taint Analysis ended" << endl;
// FinalResultsJson += LLVMTaintSolver.getAsJson();
if (PrintEdgeRecorder) {
LLVMTaintSolver.exportJson(graph_id);
}
break;
}
case DataFlowAnalysisType::IDE_TaintAnalysis: {
IDETaintAnalysis taintanalysisproblem(ICFG, EntryPoints);
LLVMIDESolver<const llvm::Value *, const llvm::Value *, LLVMBasedICFG &>
llvmtaintsolver(taintanalysisproblem, true);
llvmtaintsolver.solve();
FinalResultsJson += llvmtaintsolver.getAsJson();
if (PrintEdgeRecorder) {
llvmtaintsolver.exportJson(graph_id);
}
break;
}
…………
…………
}
// 智能化模塊分析?
// Perform module-wise (MW) analysis
else {
throw runtime_error(
"This code will follow soon with an accompanying paper!");
}
}
下邊代碼是調(diào)用具體的 IFDS 框架求解污點(diǎn)分析問(wèn)題的代碼,其中 LLVMIFDSSolver 繼承自 IFDSSolver ,最終的祖先類是 IDESolver ,最基本的函數(shù)基本上都封裝在這里。
IFDSTaintAnalysis TaintAnalysisProblem(ICFG, TSF, EntryPoints);
LLVMIFDSSolver<const llvm::Value *, LLVMBasedICFG &> LLVMTaintSolver(
TaintAnalysisProblem, false);
// 實(shí)例化一個(gè) solver 進(jìn)行數(shù)據(jù)流分析
cout << "IFDS Taint Analysis ..." << endl;
LLVMTaintSolver.solve();
2. IDESolver(IFDS的基類)
solve 函數(shù)實(shí)現(xiàn)流程如下:
- 調(diào)用 submitInitalSeeds
- 根據(jù) initseed 初始化
- 生成 edge function 構(gòu)建 exploded supergraph
- 調(diào)用 computeValues
- 根據(jù) edge function 計(jì)算并保存函數(shù)節(jié)點(diǎn)的數(shù)據(jù)流值并保存到 Table<N, D, V> valtab 中
- 調(diào)用 computeAndPrintStatistics
- 計(jì)算并通過(guò)之前保存的valtab打印運(yùn)行中的一些信息
- 通過(guò)從 computedIntraPathEdges 和 computedInterPathEdges 結(jié)構(gòu)中獲取信息
- IntraPathEdges 和 InterPathEdges 由分析過(guò)程中調(diào)用 saveEdges 函數(shù)存儲(chǔ)
virtual void solve() {
……
// computations starting here
START_TIMER("DFA Phase I", PAMM_SEVERITY_LEVEL::Full);
// We start our analysis and construct exploded supergraph
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO)
<< "Submit initial seeds, construct exploded super graph");
/* 初始化,構(gòu)建超級(jí)圖 */
submitInitalSeeds();
STOP_TIMER("DFA Phase I", PAMM_SEVERITY_LEVEL::Full);
if (computevalues) {
START_TIMER("DFA Phase II", PAMM_SEVERITY_LEVEL::Full);
// 計(jì)算最終的值
// Computing the final values for the edge functions
LOG_IF_ENABLE(
BOOST_LOG_SEV(lg, INFO)
<< "Compute the final values according to the edge functions");
computeValues();
STOP_TIMER("DFA Phase II", PAMM_SEVERITY_LEVEL::Full);
}
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, INFO) << "Problem solved");
if constexpr (PAMM_CURR_SEV_LEVEL >= PAMM_SEVERITY_LEVEL::Core) {
computeAndPrintStatistics();
}
}
subitInitalSeeds
submitInitalSeeds 的主要功能是初始化種子和初始化分析。
void submitInitalSeeds() {
auto &lg = lg::get();
PAMM_GET_INSTANCE;
for (const auto &seed : initialSeeds) {
N startPoint = seed.first;
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG)
<< "Start point: "
<< ideTabulationProblem.NtoString(startPoint));
for (const D &value : seed.second) {
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG)
<< " Value: "
<< ideTabulationProblem.DtoString(value));
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG) << ' ');
if (!ideTabulationProblem.isZeroValue(value)) {
INC_COUNTER("Gen facts", 1, PAMM_SEVERITY_LEVEL::Core);
}
propagate(zeroValue, startPoint, value, EdgeIdentity<V>::getInstance(),
nullptr, false);
}
jumpFn->addFunction(zeroValue, startPoint, zeroValue,
EdgeIdentity<V>::getInstance()); // 添加邊函數(shù)
// jumpFn 為指向 std::shared_ptr<JumpFunctions<N, D, M, V, I>> 的智能指針
// addFunction 函數(shù)的功能為記錄數(shù)據(jù)流前向和后向的流向,存儲(chǔ)為{key=llvm::value,value=edgefunction}的形式
}
}
-
initialSeeds變量來(lái)源于tabulationProblem.initialSeeds()方法,根據(jù)要分析的特定問(wèn)題的不同,其生成初始化種子的方法也不同。比如對(duì)于 IFDSTaintAnalysis 來(lái)說(shuō),EntryPoint為 main 函數(shù)的時(shí)候,插入的初始值是命令行參數(shù)(標(biāo)記為污染的)和零值,對(duì)于其他入口點(diǎn),只插入零值。
map<IFDSTaintAnalysis::n_t, set<IFDSTaintAnalysis::d_t>>
IFDSTaintAnalysis::initialSeeds() {
auto &lg = lg::get();
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG)
<< "IFDSTaintAnalysis::initialSeeds()");
// If main function is the entry point, commandline arguments have to be
// tainted. Otherwise we just use the zero value to initialize the analysis.
map<IFDSTaintAnalysis::n_t, set<IFDSTaintAnalysis::d_t>> SeedMap;
for (auto &EntryPoint : EntryPoints) {
if (EntryPoint == "main") {
set<IFDSTaintAnalysis::d_t> CmdArgs;
for (auto &Arg : icfg.getMethod(EntryPoint)->args()) {
CmdArgs.insert(&Arg);
}
CmdArgs.insert(zeroValue());
SeedMap.insert(
make_pair(&icfg.getMethod(EntryPoint)->front().front(), CmdArgs));
} else {
SeedMap.insert(make_pair(&icfg.getMethod(EntryPoint)->front().front(),
set<IFDSTaintAnalysis::d_t>({zeroValue()})));
}
}
return SeedMap;
}
propagate
-
propagate函數(shù)負(fù)責(zé)將數(shù)據(jù)流值沿著超級(jí)圖傳播,合并已經(jīng)在目標(biāo)語(yǔ)句處計(jì)算出的邊函數(shù)- 參數(shù) sourceVal 為邊函數(shù)上的源 value
- 參數(shù) target 為 目標(biāo)語(yǔ)句
- 參數(shù) targetval 為目標(biāo)處的 value
- 參數(shù) f 為 (s0,sourceVal) 到(target,targetVal) 計(jì)算出來(lái)的 edge function
void
propagate(D sourceVal, N target, D targetVal,
std::shared_ptr<EdgeFunction<V>> f,
/* deliberately exposed to clients */ N relatedCallSite,
/* deliberately exposed to clients */ bool isUnbalancedReturn) {
auto &lg = lg::get();
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG) << "Propagate flow");
std::shared_ptr<EdgeFunction<V>> jumpFnE = nullptr;
std::shared_ptr<EdgeFunction<V>> fPrime;
if (!jumpFn->reverseLookup(target, targetVal).empty()) {
// reverseLookup 對(duì)于給定的目標(biāo)語(yǔ)句和值,返回所有關(guān)聯(lián)的源值,返回值是從源值到edge函數(shù)的映射。
// 如果不為空,返回 sourceVal 到 targetVal 的所有 edge function
jumpFnE = jumpFn->reverseLookup(target, targetVal)[sourceVal];
}
if (jumpFnE == nullptr) {
jumpFnE = allTop; // jump function is initialized to all-top
}
fPrime = jumpFnE->joinWith(f); // 合并?
bool newFunction = !(fPrime->equal_to(jumpFnE));
if (newFunction) { // 如果是一個(gè)新edge function的話,進(jìn)行傳播,
jumpFn->addFunction(sourceVal, target, targetVal, fPrime);
PathEdge<N, D> edge(sourceVal, target, targetVal); // 添加邊
PathEdgeCount++;
pathEdgeProcessingTask(edge);
/*
pathEdgeProcessingTask
處理邊,根據(jù)其 target 的不同做以下處理(對(duì)應(yīng)于 tabulation 算法):
processExit
processNormalFlow
processCall
*/
}
pathEdgeProcessingTask
? 這個(gè)函數(shù)主要根據(jù)傳遞過(guò)來(lái)的邊,判斷 target node 是不是 call 或者 exit 節(jié)點(diǎn),分別做不同的處理傳播
void pathEdgeProcessingTask(PathEdge<N, D> edge) {
bool isCall = icfg.isCallStmt(edge.getTarget());
if (!isCall) {
if (icfg.isExitStmt(edge.getTarget())) {
processExit(edge);
}
if (!icfg.getSuccsOf(edge.getTarget()).empty()) {
processNormalFlow(edge);
}
} else {
processCall(edge);
}
}
? 調(diào)用的這三個(gè)函數(shù)實(shí)現(xiàn)的是論文中 IFDS 的基本算法
| 函數(shù)名 | 功能 |
|---|---|
| processCall | 對(duì)應(yīng)算法的13-20行,處理call |
| processExit | 對(duì)應(yīng)算法的21-32行,處理exit |
| processNormalFlow | 對(duì)應(yīng)算法的33-37行,處理普通情況下的數(shù)據(jù)流傳遞(除了 call 和 exit ) |
- processCall 主要流程:
- 對(duì)于每一個(gè)可能的 callee,首先會(huì)檢查是否存在 summary 邊,有的話直接當(dāng)作普通的數(shù)據(jù)流傳遞,否則就根據(jù)call 的函數(shù)計(jì)算數(shù)據(jù)流傳遞。
- 然后處理 call2ret 節(jié)點(diǎn)間的數(shù)據(jù)流傳遞
- processExit 主要流程:
- 對(duì)于 caller 傳入的數(shù)據(jù)流并且能最終通過(guò) retsite 傳遞出來(lái)的,添加 summary 邊
- 將 caller 函數(shù)開(kāi)始處的數(shù)據(jù)流值傳遞到 ret 處
computeValues
? 根據(jù) edge functions 計(jì)算最終值
void computeValues() {
auto &lg = lg::get();
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG) << "Start computing values");
// Phase II(i)
std::map<N, std::set<D>> allSeeds(initialSeeds);
for (N unbalancedRetSite : unbalancedRetSites) {
if (allSeeds[unbalancedRetSite].empty()) {
allSeeds.insert(make_pair(unbalancedRetSite, std::set<D>({zeroValue})));
}
}
// do processing
// 這里是設(shè)置節(jié)點(diǎn)的數(shù)據(jù)流值,并將其保存到 valtab 中
for (const auto &seed : allSeeds) {
N startPoint = seed.first;
for (D val : seed.second) {
// 設(shè)置起始節(jié)點(diǎn)數(shù)據(jù)流值
setVal(startPoint, val, ideTabulationProblem.bottomElement());
std::pair<N, D> superGraphNode(startPoint, val);
valuePropagationTask(superGraphNode);
}
}
// Phase II(ii)
// we create an array of all nodes and then dispatch fractions of this array
// to multiple threads
std::set<N> allNonCallStartNodes = icfg.allNonCallStartNodes();
// 返回既不是call也不是invoke節(jié)點(diǎn)的集合
std::vector<N> nonCallStartNodesArray(allNonCallStartNodes.size());
size_t i = 0;
for (N n : allNonCallStartNodes) {
nonCallStartNodesArray[i] = n;
i++;
}
valueComputationTask(nonCallStartNodesArray);
}
setVal
? 給 node 設(shè)置數(shù)據(jù)流值,如果 l 是 top value 的話就從 valtab 中移除
void setVal(N nHashN, D nHashD, V l) {
auto &lg = lg::get();
// 如果是 top value,則不存儲(chǔ)
// TOP is the implicit default value which we do not need to store.
if (l == ideTabulationProblem.topElement()) {
valtab.remove(nHashN, nHashD);
} else {
valtab.insert(nHashN, nHashD, l);
}
}
valuePropagationTask
// should be made a callable at some point
void valuePropagationTask(std::pair<N, D> nAndD) {
N n = nAndD.first;
// 初始化的種子不一定是函數(shù)的入口點(diǎn)
// our initial seeds are not necessarily method-start points but here they
// should be treated as such the same also for unbalanced return sites in
// an unbalanced problem
if (icfg.isStartPoint(n) || initialSeeds.count(n) ||
unbalancedRetSites.count(n)) {
propagateValueAtStart(nAndD, n);
}
if (icfg.isCallStmt(n)) {
propagateValueAtCall(nAndD, n);
}
}
? 如果是函數(shù)入口點(diǎn)或者在 initialSeeds 中的話,執(zhí)行 propagateValueAtStart 進(jìn)行數(shù)據(jù)流傳遞。
propagateValueAtStart
void propagateValueAtStart(std::pair<N, D> nAndD, N n) {
PAMM_GET_INSTANCE;
D d = nAndD.second;
M p = icfg.getMethodOf(n); // 獲取 n 所在的函數(shù)
for (N c : icfg.getCallsFromWithin(p)) {
for (auto entry : jumpFn->forwardLookup(d, c)) {
// 這里的c最開(kāi)始是 initialSeeds 中的初始數(shù)據(jù)流值
/* forwardLookup函數(shù)根據(jù)給定的source value和目標(biāo)語(yǔ)句,返回所有目標(biāo)語(yǔ)句中受到影響的
目標(biāo)value和 edge function,(key=target value,value=edge function) */
D dPrime = entry.first;
std::shared_ptr<EdgeFunction<V>> fPrime = entry.second;
N sP = n;
V value = val(sP, d);
INC_COUNTER("Value Propagation", 1, PAMM_SEVERITY_LEVEL::Full);
propagateValue(c, dPrime, fPrime->computeTarget(value)); // 傳遞數(shù)據(jù)流
}
}
}
- getCallsFromWithin 獲取給定函數(shù)的所有調(diào)用位置
propagateValueAtCall
void propagateValueAtCall(std::pair<N, D> nAndD, N n) {
PAMM_GET_INSTANCE;
D d = nAndD.second;
for (M q : icfg.getCalleesOfCallAt(n)) { // 獲取到 callee
std::shared_ptr<FlowFunction<D>> callFlowFunction = // call指令的數(shù)據(jù)流傳遞函數(shù)
cachedFlowEdgeFunctions.getCallFlowFunction(n, q);
// getCallFlowFunction 完成實(shí)參和形參的映射
INC_COUNTER("FF Queries", 1, PAMM_SEVERITY_LEVEL::Full);
for (D dPrime : callFlowFunction->computeTargets(d)) {
std::shared_ptr<EdgeFunction<V>> edgeFn =
cachedFlowEdgeFunctions.getCallEdgeFunction(n, d, q, dPrime);
// 獲取 edge function
INC_COUNTER("EF Queries", 1, PAMM_SEVERITY_LEVEL::Full);
for (N startPoint : icfg.getStartPointsOf(q)) {
INC_COUNTER("Value Propagation", 1, PAMM_SEVERITY_LEVEL::Full);
propagateValue(startPoint, dPrime, edgeFn->computeTarget(val(n, d)));
}
}
}
}
- 這里的 cachedFlowEdgeFunctions = tabulationProblem
propagateValue
void propagateValue(N nHashN, D nHashD, V v) {
V valNHash = val(nHashN, nHashD);
V vPrime = joinValueAt(nHashN, nHashD, valNHash, v);
if (!(vPrime == valNHash)) {
setVal(nHashN, nHashD, vPrime);
valuePropagationTask(std::pair<N, D>(nHashN, nHashD));
}
}
3.IFDS問(wèn)題分析實(shí)例
IFDSTaintAnalysis
這里以 IFDSTaintAnalysis 為例,這個(gè)類中主要實(shí)例化了 Tablution 算法中所定義的四個(gè)數(shù)據(jù)流傳遞函數(shù):
- getNormalFlowFunction 普通節(jié)點(diǎn)
- getCallFlowFunction 函數(shù)調(diào)用點(diǎn)到被調(diào)用函數(shù)
- getRetFlowFunction ret 節(jié)點(diǎn),在污點(diǎn)分析問(wèn)題中,用于在函數(shù)調(diào)用返回后,處理返回值和參數(shù)是否被污染,只關(guān)注 pointer/reference 類型的指針
- getCallToRetFlowFunction call結(jié)點(diǎn)其return結(jié)點(diǎn)
其實(shí)例化的 IFDSTaintAnalysis 類如下:
namespace psr {
class LLVMBasedICFG;
/**
*
* 主要處理的數(shù)據(jù)流向是從 source 點(diǎn)到 sink 點(diǎn),一旦發(fā)現(xiàn)有被標(biāo)記為污染的數(shù)據(jù)
* 流到達(dá) sink 點(diǎn),就會(huì) leak 報(bào)告出來(lái)。
*
* @see TaintSensitiveFunctions on how to specify your own
* taint-sensitive source and sink functions.
*/
class IFDSTaintAnalysis : public DefaultIFDSTabulationProblem<
const llvm::Instruction *, const llvm::Value *,
const llvm::Function *, LLVMBasedICFG &> {
// {N D M I}
public:
typedef const llvm::Value *d_t;
typedef const llvm::Instruction *n_t;
typedef const llvm::Function *m_t;
typedef LLVMBasedICFG &i_t;
private:
TaintSensitiveFunctions SourceSinkFunctions ; // 存儲(chǔ)SourceSink函數(shù)
std::vector<std::string> EntryPoints; // 函數(shù)入口點(diǎn)
public:
/// Holds all leaks found during the analysis
std::map<n_t, std::set<d_t>> Leaks;
/**
*
* @param icfg
* @param TSF
* @param EntryPoints
*/
IFDSTaintAnalysis(i_t icfg, TaintSensitiveFunctions TSF,
std::vector<std::string> EntryPoints = {"main"});
virtual ~IFDSTaintAnalysis() = default;
std::shared_ptr<FlowFunction<d_t>> getNormalFlowFunction(n_t curr,
n_t succ) override;
std::shared_ptr<FlowFunction<d_t>> getCallFlowFunction(n_t callStmt,
m_t destMthd) override;
std::shared_ptr<FlowFunction<d_t>> getRetFlowFunction(n_t callSite,
m_t calleeMthd,
n_t exitStmt,
n_t retSite) override;
std::shared_ptr<FlowFunction<d_t>>
getCallToRetFlowFunction(n_t callSite, n_t retSite,
std::set<m_t> callees) override;
// 計(jì)算summary的函數(shù),將一個(gè)函數(shù)內(nèi)部的數(shù)據(jù)流傳遞情況計(jì)算summary
std::shared_ptr<FlowFunction<d_t>>
getSummaryFlowFunction(n_t callStmt, m_t destMthd) override;
std::map<n_t, std::set<d_t>> initialSeeds() override;
d_t createZeroValue() override;
bool isZeroValue(d_t d) const override;
void printNode(std::ostream &os, n_t n) const override;
void printDataFlowFact(std::ostream &os, d_t d) const override;
void printMethod(std::ostream &os, m_t m) const override;
void printIFDSReport(std::ostream &os,
SolverResults<n_t, d_t, BinaryDomain> &SR) override;
};
} // namespace psr
- IFDSTaintAnalysis 在初始化的時(shí)候傳入了一個(gè) TaintSensitiveFunctions 對(duì)象,這個(gè)類主要是維護(hù)用于污點(diǎn)分析 source 函數(shù)和 sink 函數(shù),并且缺省定義了一些常見(jiàn)的函數(shù),并標(biāo)記了其 Critical argument
DefaultIFDSTabulationProblem
? 這是一個(gè) IFDS 問(wèn)題的默認(rèn)模板類,用于定義一個(gè) IFDS 問(wèn)題,設(shè)置其 solver 的參數(shù):
namespace psr {
template <typename N, typename D, typename M, typename I>
class DefaultIFDSTabulationProblem : public IFDSTabulationProblem<N, D, M, I> {
protected:
I icfg;
virtual D createZeroValue() = 0;
D zerovalue;
public:
// 進(jìn)行一些默認(rèn)的 solver 的配置
DefaultIFDSTabulationProblem(I icfg) : icfg(icfg) {
this->solver_config.followReturnsPastSeeds = false;
this->solver_config.autoAddZero = true;
this->solver_config.computeValues = true;
this->solver_config.recordEdges = true;
this->solver_config.computePersistedSummaries = true;
}
virtual ~DefaultIFDSTabulationProblem() = default;
virtual std::shared_ptr<FlowFunction<D>>
getSummaryFlowFunction(N callStmt, M destMthd) override {
return nullptr;
}
I interproceduralCFG() override { return icfg; }
D zeroValue() override { return zerovalue; }
};
} // namespace psr
#endif
因此在用戶實(shí)現(xiàn)自己的 IFDS分析問(wèn)題的時(shí)候,只需要 override 特定的數(shù)據(jù)流傳遞函數(shù)即可。
4. ICFG 實(shí)現(xiàn)
TODO
5. 代碼結(jié)構(gòu)
? Phasar使用AnalysisController管理整個(gè)分析流程,根據(jù)選擇的特定的數(shù)據(jù)流問(wèn)題進(jìn)行分析。并將IFDS問(wèn)題分別使用兩個(gè)類管理,一個(gè)是Problem類,用于定義要分析的問(wèn)題,另一類是Solver類,用于求解IFDS問(wèn)題:
- Problem類的繼承關(guān)系及基類簡(jiǎn)述
- DefaultIFDSTabulationProblem:
- IFDSTabulationProblem
- FlowFunctions
- NodePrinter
- DataFlowFactPrinter
- MethodPrinter
- DefaultIFDSTabulationProblem:
| 類 | 簡(jiǎn)述 |
|---|---|
| DefaultIFDSTabulationProblem | 所有特定 IFDS 問(wèn)題的基類 |
| IFDSTabulationProblem | 保存了zerovalue、配置信息和初始化值 |
| FlowFunctions | 定義基本的數(shù)據(jù)流傳遞函數(shù) |
| NodePrinter、DataFlowFactPrinter、MethodPrinter | 打印特定數(shù)據(jù)結(jié)構(gòu)的模板類 |
- FlowFunctions 類中實(shí)現(xiàn)的方法接口
| 方法名 | 功能 |
|---|---|
| getNormalFlowFunction | 普通節(jié)點(diǎn)的數(shù)據(jù)流傳遞函數(shù) |
| getCallFlowFunction | 函數(shù)調(diào)用的數(shù)據(jù)流傳遞函數(shù),實(shí)參映射到形參 |
| getRetFlowFunction | 返回節(jié)點(diǎn)的數(shù)據(jù)流傳遞函數(shù),在被調(diào)函數(shù)的最后將數(shù)據(jù)流值傳回caller中其ret節(jié)點(diǎn) |
| getCallToRetFlowFunction | call到ret的數(shù)據(jù)流傳遞函數(shù),處理沒(méi)有被callee影響到的數(shù)據(jù)流值 |
| getSummaryFlowFunction | 默認(rèn)是返回空指針,用于處理libc庫(kù)函數(shù)或者llvm生成的不能follow進(jìn)去的函數(shù) |
-
IFDSTabulationProblem 類中的方法接口和成員變量
名稱 簡(jiǎn)述 initialSeeds 分析起始程序點(diǎn)和其的初始數(shù)據(jù)流值 setSolverConfiguration 設(shè)置solver isZeroValue 判斷數(shù)據(jù)流值是否是0值 interproceduralCFG 返回分析問(wèn)題的ICFG
-
Slover類的繼承關(guān)系及簡(jiǎn)述
LLVMIFDSSolver最終也是繼承自 IDESolver,對(duì)于特定的IFDS問(wèn)題的求解主要是調(diào)用solve( )方法。
- LLVMIFDSSolver
- IFDSSolver
- IDESolver
- IFDSSolver
- LLVMIFDSSolver
-
LLVMIFDSSolver
LLVMIFDSSolver這個(gè)類中實(shí)現(xiàn)了打印過(guò)程間、過(guò)程內(nèi) pathEdge 方法以及將結(jié)果可視化傳送給web端的接口。
-
IFDSSolver
IFDSSolver實(shí)現(xiàn)了一個(gè)返回給定stmt處ifds結(jié)果的方法。
-
IDESolver
這個(gè)類中slove( )函數(shù)實(shí)現(xiàn)的是對(duì)IFDS問(wèn)題的求解,solve函數(shù)主要有三個(gè)步驟:
- submitInitalSeeds 提交初始化數(shù)據(jù)流值的seed,構(gòu)建 exploded supergraph
- computeValues 根據(jù) edge functions 計(jì)算最終的數(shù)據(jù)流值
- 主要是計(jì)算和打印一些分析過(guò)程中的信息,比如gen/kill的數(shù)據(jù)流數(shù)量等。
| 函數(shù)名 | 功能 |
|---|---|
| propagate | 負(fù)責(zé)將數(shù)據(jù)流沿著exploded super graph向下傳遞,合并任何可能已經(jīng)在目標(biāo)語(yǔ)句處算出數(shù)據(jù)流值的edge function |
| setVal | 保存每個(gè)程序點(diǎn)的數(shù)據(jù)流值 |
| saveEdges | 向computedInterPathEdges和computedIntraPathEdges存儲(chǔ)已經(jīng)計(jì)算出來(lái)的pathEdges |
| computeValues | 根據(jù)邊函數(shù)計(jì)算每一個(gè)程序點(diǎn)的數(shù)據(jù)流值,調(diào)用setval函數(shù)存入valtab |
| addEndSummary | 添加函數(shù)的摘要邊 |
| 變量名 | 功能 |
|---|---|
| std::shared_ptr<JumpFunctions<N, D, M, V, I>> jumpFn; | 存儲(chǔ)并管理exploded super graph中的數(shù)據(jù)流邊 |
| Table<N, D, V> valtab; | 存儲(chǔ)節(jié)點(diǎn)的數(shù)據(jù)流值 |
| computedInterPathEdges | 函數(shù)間的邊(打印信息用) |
| computedIntraPathEdges | 函數(shù)內(nèi)的邊(打印信息用) |
| Table<N, D, std::map<N, std::set<D>>> incomingtab; | 記錄所有的函數(shù)調(diào)用信息(處理exit節(jié)點(diǎn)的時(shí)候使用) |
| Table<N, D, Table<N, D, std::shared_ptr<EdgeFunction<V>>>> endsummarytab | 存儲(chǔ)已經(jīng)計(jì)算出來(lái)的summay邊。 |
propagate函數(shù)
- 從jumpFn中反向找能夠從source處傳遞到target處的edge function,如果找不到就用alltop
- 如果是新的edgefuntion的話,說(shuō)明可以直接從最初點(diǎn)到當(dāng)前點(diǎn)添加一條邊(比如存在a-b的邊,然后當(dāng)前的數(shù)據(jù)流傳遞是b-c,會(huì)添加一條a-c的邊到j(luò)umpFunctions中)
- 然后會(huì)調(diào)用pathEdgeProcessingTask處理從source到target的pathEdge
- pathEdgeProcessingTask函數(shù)會(huì)根據(jù)target的類型做三種不同處理:
- processExit
- 首先找到該exit節(jié)點(diǎn)的所有caller
- 根據(jù)這些caller傳遞過(guò)來(lái)的數(shù)據(jù)流值和從函數(shù)start節(jié)點(diǎn)傳遞到exit節(jié)點(diǎn)的數(shù)據(jù)流值,調(diào)用propagate將數(shù)據(jù)流值從callee的start處傳遞到ret site處
- processNormalFlow
- 處理正常的數(shù)據(jù)流傳遞,通過(guò)getNormalFlowFunction獲取數(shù)據(jù)流傳遞函數(shù),然后通過(guò)computeNormalFlowFunction計(jì)算傳遞后的數(shù)據(jù)流值
- 計(jì)算出傳遞后的數(shù)據(jù)流值后再次調(diào)用propagate函數(shù)傳遞數(shù)據(jù)流值
- processCall
- 處理call語(yǔ)句的時(shí)候,首先看callee的函數(shù)是不是之前計(jì)算過(guò),即通過(guò)getSummaryFlowFunction獲取其summary flow function,如果存在就當(dāng)作普通的數(shù)據(jù)流傳遞
- 否則就處理函數(shù)實(shí)參和形參的映射(記錄在incomingtab中)
- 最后把能夠直接傳遞到ret-site的數(shù)據(jù)流值進(jìn)行傳遞
- processExit
6.參考:
1. github
2. wiki
7.問(wèn)題
- Table<N, D, std::map<N, std::set<D>>> incomingtab
edges going along calls
incomingtab相關(guān)的方法接口
std::map<N, std::set<D>> incoming(D d1, N sP) {
return incomingtab.get(sP, d1);
}
void addIncoming(N sP, D d3, N n, D d2) {
incomingtab.get(sP, d3)[n].insert(d2);
}
-
使用 incomingtab
-
processCall 函數(shù)中,遇到 call 節(jié)點(diǎn)的并找到其 call 的函數(shù)的時(shí)候,就注冊(cè)下 callee 的 startpoint 處的數(shù)據(jù)流值是來(lái)自哪里。
for (N sP : startPointsOf) { saveEdges(n, sP, d2, res, true); // for each result node of the call-flow function for (D d3 : res) { // create initial self-loop propagate(d3, sP, d3, EdgeIdentity<V>::getInstance(), n, false); // line 15 // register the fact that <sp,d3> has an incoming edge from <n,d2> // line 15.1 of Naeem/Lhotak/Rodriguez addIncoming(sP, d3, n, d2); -
processExit 函數(shù)中,處理 exit 節(jié)點(diǎn)的時(shí)候,先找到其對(duì)應(yīng)的 start point,然后對(duì)于 start point,添加一條從 start point 處到 exit 的邊(存儲(chǔ)在 fSummaryReuse 中),然后通過(guò)去尋找其 incoming calls,計(jì)算 call 到 ret 的數(shù)據(jù)流值,最后將數(shù)據(jù)流值從 call 傳遞到 retsite。
D d1 = edge.factAtSource(); D d2 = edge.factAtTarget(); // for each of the method's start points, determine incoming calls std::set<N> startPointsOf = icfg.getStartPointsOf(methodThatNeedsSummary); std::map<N, std::set<D>> inc; for (N sP : startPointsOf) { // line 21.1 of Naeem/Lhotak/Rodriguez // register end-summary addEndSummary(sP, d1, n, d2, f); for (auto entry : incoming(d1, sP)) { inc[entry.first] = std::set<D>{entry.second}; } } printEndSummaryTab(); printIncomingTab(); // for each incoming call edge already processed //(see processCall(..)) for (auto entry : inc) { // line 22 N c = entry.first; // for each return site for (N retSiteC : icfg.getReturnSitesOfCallAt(c)) { // compute return-flow function std::shared_ptr<FlowFunction<D>> retFunction = cachedFlowEdgeFunctions.getRetFlowFunction( c, methodThatNeedsSummary, n, retSiteC); INC_COUNTER("FF Queries", 1, PAMM_SEVERITY_LEVEL::Full); // for each incoming-call value for (D d4 : entry.second) { std::set<D> targets = computeReturnFlowFunction(retFunction, d1, d2, c, entry.second); ADD_TO_HISTOGRAM("Data-flow facts", targets.size(), 1, PAMM_SEVERITY_LEVEL::Full);
-
-
實(shí)參形參映射
以污點(diǎn)分析為例,在 getCallFlowFunction 函數(shù)中實(shí)現(xiàn)參數(shù)的映射:
shared_ptr<FlowFunction<IFDSTaintAnalysis::d_t>>
IFDSTaintAnalysis::getCallFlowFunction(IFDSTaintAnalysis::n_t callStmt,
IFDSTaintAnalysis::m_t destMthd) {
auto &lg = lg::get();
LOG_IF_ENABLE(BOOST_LOG_SEV(lg, DEBUG)
<< "IFDSTaintAnalysis::getCallFlowFunction()");
string FunctionName = cxx_demangle(destMthd->getName().str());
// Check if a source or sink function is called:
// We then can kill all data-flow facts not following the called function.
// The respective taints or leaks are then generated in the corresponding
// call to return flow function.
if (SourceSinkFunctions.isSource(FunctionName) ||
(SourceSinkFunctions.isSink(FunctionName))) {
return KillAll<IFDSTaintAnalysis::d_t>::getInstance();
}
// Map the actual into the formal parameters
if (llvm::isa<llvm::CallInst>(callStmt) ||
llvm::isa<llvm::InvokeInst>(callStmt)) {
return make_shared<MapFactsToCallee>(llvm::ImmutableCallSite(callStmt),
destMthd);
}
// Pass everything else as identity
return Identity<IFDSTaintAnalysis::d_t>::getInstance();
}
? 具體實(shí)現(xiàn):
MapFactsToCallee::MapFactsToCallee(
const llvm::ImmutableCallSite &callSite, const llvm::Function *destMthd,
function<bool(const llvm::Value *)> predicate)
: destMthd(destMthd), predicate(predicate) {
// Set up the actual parameters
for (unsigned idx = 0; idx < callSite.getNumArgOperands(); ++idx) {
actuals.push_back(callSite.getArgOperand(idx));
}
// Set up the formal parameters
for (unsigned idx = 0; idx < destMthd->arg_size(); ++idx) {
formals.push_back(getNthFunctionArgument(destMthd, idx));
}
}
? 在 processCall 函數(shù)中:
D d1 = edge.factAtSource();
N n = edge.getTarget(); // a call node; line 14...
D d2 = edge.factAtTarget();
std::shared_ptr<FlowFunction<D>> function =
cachedFlowEdgeFunctions.getCallFlowFunction(n, sCalledProcN);
std::set<D> res = computeCallFlowFunction(function, d1, d2);
for (N sP : startPointsOf) {
saveEdges(n, sP, d2, res, true);
// for each result node of the call-flow function
for (D d3 : res) {
// create initial self-loop
propagate(d3, sP, d3, EdgeIdentity<V>::getInstance(), n,
false); // line 15
// register the fact that <sp,d3> has an incoming edge from <n,d2>
// line 15.1 of Naeem/Lhotak/Rodriguez
addIncoming(sP, d3, n, d2);