Learning Objectives

  • learn how to traverse graph structure
  • review SSA and Phi function

Traversing a Control Flow Graph(CFG) is a fundamental method in compiler optimization and program analysis.

#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"

#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/Support/raw_ostream.h"

#include <unordered_map>
#include <stack>

using namespace llvm;

namespace{
    struct YJ009RIV : public FunctionPass{
        // Map values with addr of BB
        using MapVecBlk = MapVector<BasicBlock const *, SmallPtrSet<Value *, 8>>;
        using NodeTy = DomTreeNodeBase<BasicBlock> *;

        MapVecBlk result;

        static char ID;
        YJ009RIV() : FunctionPass(ID) {}

        MapVecBlk buildRIV(llvm::Function &F, llvm::DomTreeNodeBase<llvm::BasicBlock> *CFGRoot){
            // DominatorTree *DT; //FunctionAnalysisManager.getResult<DominatroTreeAnalytsis>(F);
            // NodeTy CFGRoot = DT->getRootNode();
 
            MapVecBlk definedVarMap;
            // for each basic block, map defined variables
            for(auto& BB: F){
                auto &Values = definedVarMap[&BB];
                for(auto& I: BB)
                    if (I.getType()->isIntegerTy())
                        Values.insert(&I);
            }

            auto& EntryBBValues = result[&F.getEntryBlock()];

            for(auto& Global : F.getParent()->globals())
                if (Global.getValueType()->isIntegerTy())
                    EntryBBValues.insert(&Global);

            for(Argument &Arg : F.args())
                if (Arg.getType()->isIntegerTy())
                    EntryBBValues.insert(&Arg);

            // traverse CFG
            MapVecBlk reachableIntVarMap;
            std::stack<NodeTy> BBsToProcess;
            BBsToProcess.push(CFGRoot);

            while(!BBsToProcess.empty()){
                auto *Parent = BBsToProcess.top();
                BBsToProcess.pop();

                auto &ParentDefVar = definedVarMap[Parent->getBlock()];
                SmallPtrSet<Value *, 8> ParentRIV = result[Parent->getBlock()];

                // Iterate child nodes of the parent node and update its RIV
                for(auto &Child : *Parent){
                    BBsToProcess.push(Child);
                    auto ChildBB = Child->getBlock();

                    result[ChildBB].insert(ParentDefVar.begin(), ParentDefVar.end());
                    result[ChildBB].insert(ParentRIV.begin(), ParentRIV.end());
                }
            }

            // print
            for(const auto& BI : result){
                errs() << BI.first->getName() << '\n';

                for(auto I : BI.second){
                    I->print(errs());
                    errs() << '\n';
                }
            }

            return result;
        }

        bool runOnFunction(Function& F) override {
            bool modified = false;

            DominatorTree *DT = new DominatorTree(F);
            MapVecBlk result = buildRIV(F, DT->getRootNode());

            return modified;
        }
    };
}

char YJ009RIV::ID = 0;
static RegisterPass<YJ009RIV>X ("RIV", "Find Reachable Integer Values", true, false);

Why do we use DFS or BFS instead of for loop (e.g. for(auto& BB: F))?

Using a simple loop such as for(auto& BB: F) does not guarantee the order in which the basic blocks are visited may not correspond to the logical or execution order. It is crucial for certain analysis and transformation performing live analysis, pointer analysis or other flow-sensitive optimization.

What is llvm::SmallPtrSet?

llvm::SmallPtrSet is a data structure in order to store a small number of pointers in a set. It is similar with std::unordered_set but it is optimized for the small number of pointers as the name implies. It is usually used to track live variables, unique types, or etc.

Pass Type
Writing HelloLLVM Pass analysis
Iterating over Module, Function, Basic block analysis
Count the number of insts, func calls analysis
Insert func call transformation
Change Insts (obfuscation) transformation
Control flow graph transformation

Reference

[1] Andrzej Warzyński. llvm-tutor. github
[2] Adrian Sampson. LLVM for Grad Students. blog
[3] Keshav Pingali. CS 380C: Advanced Topics in Compilers. blog
[4] Mathematical Optimization: Solving Problems using SCIP and Python. site