005 Traverse Control flow graph
Learning Objectives
- learn how to traverse graph structure
- review SSA and Phi function
Traversing a Control Flow Graph(CFG) is a fundamental method in compiler optimization and program analysis.
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/raw_ostream.h"
#include <unordered_map>
#include <stack>
using namespace llvm;
namespace{
struct YJ009RIV : public FunctionPass{
// Map values with addr of BB
using MapVecBlk = MapVector<BasicBlock const *, SmallPtrSet<Value *, 8>>;
using NodeTy = DomTreeNodeBase<BasicBlock> *;
MapVecBlk result;
static char ID;
YJ009RIV() : FunctionPass(ID) {}
MapVecBlk buildRIV(llvm::Function &F, llvm::DomTreeNodeBase<llvm::BasicBlock> *CFGRoot){
// DominatorTree *DT; //FunctionAnalysisManager.getResult<DominatroTreeAnalytsis>(F);
// NodeTy CFGRoot = DT->getRootNode();
MapVecBlk definedVarMap;
// for each basic block, map defined variables
for(auto& BB: F){
auto &Values = definedVarMap[&BB];
for(auto& I: BB)
if (I.getType()->isIntegerTy())
Values.insert(&I);
}
auto& EntryBBValues = result[&F.getEntryBlock()];
for(auto& Global : F.getParent()->globals())
if (Global.getValueType()->isIntegerTy())
EntryBBValues.insert(&Global);
for(Argument &Arg : F.args())
if (Arg.getType()->isIntegerTy())
EntryBBValues.insert(&Arg);
// traverse CFG
MapVecBlk reachableIntVarMap;
std::stack<NodeTy> BBsToProcess;
BBsToProcess.push(CFGRoot);
while(!BBsToProcess.empty()){
auto *Parent = BBsToProcess.top();
BBsToProcess.pop();
auto &ParentDefVar = definedVarMap[Parent->getBlock()];
SmallPtrSet<Value *, 8> ParentRIV = result[Parent->getBlock()];
// Iterate child nodes of the parent node and update its RIV
for(auto &Child : *Parent){
BBsToProcess.push(Child);
auto ChildBB = Child->getBlock();
result[ChildBB].insert(ParentDefVar.begin(), ParentDefVar.end());
result[ChildBB].insert(ParentRIV.begin(), ParentRIV.end());
}
}
// print
for(const auto& BI : result){
errs() << BI.first->getName() << '\n';
for(auto I : BI.second){
I->print(errs());
errs() << '\n';
}
}
return result;
}
bool runOnFunction(Function& F) override {
bool modified = false;
DominatorTree *DT = new DominatorTree(F);
MapVecBlk result = buildRIV(F, DT->getRootNode());
return modified;
}
};
}
char YJ009RIV::ID = 0;
static RegisterPass<YJ009RIV>X ("RIV", "Find Reachable Integer Values", true, false);
Why do we use DFS or BFS instead of for loop (e.g. for(auto& BB: F)
)?
Using a simple loop such as for(auto& BB: F)
does not guarantee the order in which the basic blocks are visited may not correspond to the logical or execution order. It is crucial for certain analysis and transformation performing live analysis, pointer analysis or other flow-sensitive optimization.
What is llvm::SmallPtrSet
?
llvm::SmallPtrSet
is a data structure in order to store a small number of pointers in a set. It is similar with std::unordered_set
but it is optimized for the small number of pointers as the name implies. It is usually used to track live variables, unique types, or etc.
Pass | Type |
---|---|
Writing HelloLLVM Pass | analysis |
Iterating over Module, Function, Basic block | analysis |
Count the number of insts, func calls | analysis |
Insert func call | transformation |
Change Insts (obfuscation) | transformation |
Control flow graph | transformation |
Reference
[1] Andrzej Warzyński. llvm-tutor. github
[2] Adrian Sampson. LLVM for Grad Students. blog
[3] Keshav Pingali. CS 380C: Advanced Topics in Compilers. blog
[4] Mathematical Optimization: Solving Problems using SCIP and Python. site
Enjoy Reading This Article?
Here are some more articles you might like to read next: