3

I want to detect the instructions like mov dword ptr [rbp-0x28], 0x7 (so, all the instructions in mov dword ptr [rbp-0xxx], xxx format) using Intel PIN (mainly to get array writes information). In un-optimized code, this should get most stores to local variables.

I can do:

if (INS_Opcode(ins) == XED_ICLASS_MOV)
   instruction detection;

to detect the mov instruction. But, along with that it also detects other instruction such as mov eax, 0x0. I want to detect the instructions with dword ptr size directive.

I checked the pin instruction inspection API and pin xed-iclass-enum. Using that documentation I tried something like:

if ((INS_Opcode(ins) == XED_ICLASS_MOV) && INS_OperandIsMemory(ins, 0))
    instruction detection;

which gives me the desired result. But also gives me the instructions like mov esi, eax (which I don't desire).

My code:

#include <fstream>
#include <iostream>
#include "pin.H"
#include <stack>
#include <unordered_map>
// Additional library calls go here

// Stack allocation

struct Node
{
    int value;
};

std::stack<Node> mainStack;

// Ins object mapping

class Insr
{
private:
    INS insobject;

public:
    Insr(INS insob)
    {
        insobject = insob;
    }
    INS get_insobject()
    {
        return insobject;
    }
};

static std::unordered_map<ADDRINT, Insr*> insstack;

// Output file object
ofstream OutFile;

//static uint64_t counter = 0;

std::string rtin = "";
// Make this lock if you want to print from _start
uint32_t key = 0;

void printmaindisas(uint64_t addr, std::string disassins)
{
    std::stringstream tempstream;
    tempstream << std::hex << addr;
    std::string address = tempstream.str();
    // if (addr > 0x700000000000)
    //  return;
    if (addr > 0x700000000000)
        return;
    if (!key)
        return;
    // if (insstack[addr]->get_opcode() == XED_ICLASS_ADD || insstack[addr]->get_opcode()
    //      == XED_ICLASS_SUB)
    INS ins = insstack[addr]->get_insobject();
    if((INS_Opcode(ins) == XED_ICLASS_ADD || INS_Opcode(ins) == XED_ICLASS_SUB)
            &&(INS_OperandIsImmediate(ins, 1)))
    {
      int value = INS_OperandImmediate(ins, 1);
        std::cout << "value: " << value << '\n';
        Node node{value};
        mainStack.push(node);
        std::cout << "stack top: " << mainStack.top().value << '\n';
    }
    if ((INS_Opcode(ins) == XED_ICLASS_MOV) && INS_OperandIsMemory(ins, 0))
    {
            std::cout << "yes!" << '\n';
    }


    std::cout<<address<<"\t"<<disassins<<std::endl;
}

void mutex_lock()
{
key = 0;
std::cout<<"out\n";
}
void mutex_unlock()
{
    key = 1;
    std::cout<<"in\n";
}

void Instruction(INS ins, VOID *v)
{

    insstack.insert(std::make_pair(INS_Address(ins), new Insr(ins)));
    INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)printmaindisas, IARG_ADDRINT, INS_Address(ins),
    IARG_PTR, new string(INS_Disassemble(ins)), IARG_END);
}

void Routine(RTN rtn, VOID *V)
{
    if (RTN_Name(rtn) == "main")
    {
        //std::cout<<"Loading: "<<RTN_Name(rtn) << endl;
        RTN_Open(rtn);
        RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)mutex_unlock, IARG_END);
        RTN_InsertCall(rtn, IPOINT_AFTER, (AFUNPTR)mutex_lock, IARG_END);
        RTN_Close(rtn);
    }
}

KNOB<string> KnobOutputFile(KNOB_MODE_WRITEONCE, "pintool", "o", "mytool.out", "specify output file name");
/*
VOID Fini(INT32 code, VOID *v)
{
    // Write to a file since cout and cerr maybe closed by the application
    OutFile.setf(ios::showbase);
    OutFile << "Count " << count << endl;
    OutFile.close();
}
*/

int32_t Usage()
{
  cerr << "This is my custom tool" << endl;
  cerr << endl << KNOB_BASE::StringKnobSummary() << endl;
  return -1;
}

int main(int argc, char * argv[])
{
  // It must be called for image instrumentation
  // Initialize the symbol table
  PIN_InitSymbols();
  // Initialize pin
    // PIN_Init must be called before PIN_StartProgram
    // as mentioned in the documentation
  if (PIN_Init(argc, argv)) return Usage();

  // Open the output file to write
  OutFile.open(KnobOutputFile.Value().c_str());

  // Set instruction format as intel
    // Not needed because my machine is intel
  PIN_SetSyntaxIntel();

  RTN_AddInstrumentFunction(Routine, 0);

  // Add an isntruction instrumentation
  INS_AddInstrumentFunction(Instruction, 0);


  //PIN_AddFiniFunction(Fini, 0);

  // Start the program here
  PIN_StartProgram();

  return 0;

}

And the output I'm getting:

in
40051e  push rbp
value: -128
stack top: -128
40051f  mov rbp, rsp
400522  add rsp, 0xffffffffffffff80
yes!
400526  mov dword ptr [rbp-0x28], 0x7
yes!
40052d  mov dword ptr [rbp-0x64], 0x9
400534  mov eax, 0x0
400539  call 0x4004e6
4004e6  push rbp
value: 64
stack top: 64
4004e7  mov rbp, rsp
4004ea  sub rsp, 0x40
yes!
4004ee  mov dword ptr [rbp-0xc], 0x4
4004f5  lea rax, ptr [rbp-0xc]
yes!
4004f9  mov qword ptr [rbp-0x8], rax
4004fd  mov rax, qword ptr [rbp-0x8]
400501  mov eax, dword ptr [rax]
yes!
400503  mov esi, eax
400505  mov edi, 0x4005d0
40050a  mov eax, 0x0
40050f  call 0x4003f0
4003f0  jmp qword ptr [rip+0x200c22]
4003f6  push 0x0
4003fb  jmp 0x4003e0
4003e0  push qword ptr [rip+0x200c22]
4003e6  jmp qword ptr [rip+0x200c24]
4
yes!
400514  mov dword ptr [rbp-0x3c], 0x3
40051b  nop
40051c  leave 
40051d  ret 
40053e  mov eax, 0x0
400543  leave 
out

Is this the correct way to do that (without any false positives)?

halfer
  • 19,824
  • 17
  • 99
  • 186
R4444
  • 2,016
  • 2
  • 19
  • 30
  • 1
    The part `INS_Opcode(ins) != REG_ESI` doesn't make sense. Did you mean something else? You can check the size of the memory operand using `INS_OperandWidth`. if I understand your question correctly, you want to check whether `INS_OperandWidth(ins, 0) == 32`. – Hadi Brais Mar 25 '19 at 01:40
  • Thanks for your reply. Without `INS_Opcode(ins) != REG_ESI` it detects the instructions like `mov esi, eax`. Also I tried your method by doing something like `if ((INS_Opcode(ins) == XED_ICLASS_MOV) && (INS_OperandWidth(ins, 0)==32))`, but it also detects other instructions like 'mov eax, 0x0' (which makes sense I believe). I want to detect only instructions like `mov dword ptr [rbp-xxx], xxx` – R4444 Mar 25 '19 at 01:50
  • 1
    But `INS_OperandIsMemory` should fail in these cases because the first operand is not a memory reference. – Hadi Brais Mar 25 '19 at 01:53
  • 2
    I think Hadi's point is that ESI is not the opcode, it's the operand. Therefore `INS_Opcode(ins) != REG_ESI` makes no sense. – Ben Voigt Mar 25 '19 at 02:10
  • You are correct. `INS_Opcode(ins) != REG_ESI` doesn't make any sense (my mistake). I have edited the question accordingly. I get what you are trying to say about `INS_OperandIsMemory(ins, 0)`, but I'm not sure how am I getting the desired result. I also added my code and the output I'm getting. – R4444 Mar 25 '19 at 02:12
  • Well I solved that using `if ((INS_Opcode(ins) == XED_ICLASS_MOV) && INS_OperandIsMemory(ins, 0) && (INS_OperandWidth(ins, 0) == 32))`. Thanks for your help @Ben and @ Hadi. @Hadi your first comment helped me, if you post this I can mark it as an answer. – R4444 Mar 25 '19 at 02:34

1 Answers1

4

If you want to accept all of the following instructions:

mov [rbp + disp], reg/imm
mov [rbp*scale + disp], reg/imm
mov [reg + rbp*scale], reg/imm
mov [rbp + reg*scale + disp], reg/imm

then you need to perform the following checks:

if (INS_Opcode(ins) == XED_ICLASS_MOV &&              // Check that the instruction is MOV.
    INS_OperandIsMemory(ins, 0) &&                    // Check that the destination operand is a memory operand.
    INS_OperandWidth(ins, 0) == 32 &&                 // Check that the size of the operand is 32 bits.
    (INS_OperandMemoryBaseReg(ins, 0) == REG_EBP || 
     INS_OperandMemoryIndexReg(ins, 0) == REG_EBP))  // Check that the base or index register is RBP.
{
    ...
}

Note that these checks accept both MOV instructions with displacement (including a displacement of zero) and MOV instructions without displacement (which is semantically equivalent to a displacement of zero but the encoding is different).

I assumed that you want to accept RBP both as a base register or as an index register (potentially with a scale larger than 1). Note that in case RBP is used as a base register, the encoding of the instruction will always include a displacement. See: Why are rbp and rsp called general purpose registers?.

If you want to accept all of the following instructions where RBP is used as the base register:

mov [rbp + disp], reg/imm
mov [rbp + reg*scale + disp], reg/imm

then you need to perform the following checks:

if (INS_Opcode(ins) == XED_ICLASS_MOV &&              // Check that the instruction is MOV.
    INS_OperandIsMemory(ins, 0) &&                    // Check that the destination operand is a memory operand.
    INS_OperandWidth(ins, 0) == 32 &&                 // Check that the size of the operand is 32 bits.
    INS_OperandMemoryBaseReg(ins, 0) == REG_EBP)      // Check that the base is RBP.
{
    ...
}

If you want to accept only the following instruction:

mov [rbp + disp], reg/imm

then you need to perform the following checks:

if (INS_Opcode(ins) == XED_ICLASS_MOV &&                 // Check that the instruction is MOV.
    INS_OperandIsMemory(ins, 0) &&                       // Check that the destination operand is a memory operand.
    INS_OperandWidth(ins, 0) == 32 &&                    // Check that the size of the operand is 32 bits.
    INS_OperandMemoryBaseReg(ins, 0) == REG_EBP &&       // Check that the base is RBP.
    INS_OperandMemoryIndexReg(ins, 0) == REG_INVALID())  // Check that there is no index register.
{
    ...
}

If you want to check whether the displacement is a negative number, use the following check:

INS_OperandMemoryDisplacement(ins, 0) < 0

Note that INS_OperandMemoryDisplacement does not distinguish between a memory operand that has no displacement and one that has a displacement of zero. If there is no displacement, it just returns zero. If you want to determine whether the instruction encoding actually includes a displacement field, then you should use the XED API instead.

Hadi Brais
  • 22,259
  • 3
  • 54
  • 95
  • This is perfect! Thanks. – R4444 Mar 25 '19 at 02:42
  • 1
    @Ruturaj Your question is a little ambiguous actually. I've expanded the answer to cover all the possible interpretations that I could think of. – Hadi Brais Mar 25 '19 at 04:49
  • 2
    `[rbp*scale]` isn't possible (with any register), only `[reg*scale + disp32]`. The only encoding with an index but no base register uses a disp32: mod=00 (normally no displacement) SIB.Base=101=RBP https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2. So that special case effectively means use a disp32 *as* the base instead of a register. You can't just have no base at all, only reg or disp32. (This is unfortunate for LEA to copy-and-scale; it requires 4 bytes of zeros) – Peter Cordes Mar 25 '19 at 05:02
  • @Hadi yes, sorry for the inconvenience. But, your question answers it very well. Yes, I have exactly done this: `INS_OperandMemoryDisplacement(ins, 0) < 0` to keep it within the bounds. @Peter thanks for sharing that observation. – R4444 Mar 25 '19 at 13:07