diff --git a/Makefile b/Makefile index 9d5f54b..680fc6b 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ all: @cp -f src/tracker ./ check: all + @cp tracker test/ @cd test/ && $(MAKE) format: diff --git a/include/trace.h b/include/trace.h index b404fea..15fb379 100644 --- a/include/trace.h +++ b/include/trace.h @@ -13,39 +13,61 @@ #ifndef _TRACE_H #define _TRACE_H +#include "../graphviz/cgraph.h" +#include #include #include -#include #define DEFAULT_HASHTABLE_SIZE 65536 /* 2^16 */ /* A more convenient byte_t type */ typedef uint8_t byte_t; -/* ***** Handling assembly instructions ***** */ +/* All the possible types of instruction */ +typedef enum +{ + BASIC, + BRANCH, + CALL, + JUMP, + RET +} instr_type_t; +/* ***** Definitions of all the struct used ***** */ + +/* ***** Handling assembly instructions ***** */ typedef struct _instr_t instr_t; +/* ***** Hashtables to store cfg nodes ***** */ +typedef struct _hashtable_t hashtable_t; + +/* ***** cfg nodes keeping track of the executions ***** */ +typedef struct _cfg_t cfg_t; + +/* ***** Linked list to store a trace ***** */ +typedef struct _trace_t trace_t; + + +/* ***** instr_t functions ***** */ + /* Return a new instr_t struct, NULL otherwise (and set errno) */ -instr_t *instr_new (const uintptr_t addr, - const uint8_t size, - const uint8_t *opcodes); +instr_t *instr_new (const uintptr_t addr, const uint8_t size, + const uint8_t *opcodes, char *str_name); /* Delete the assembly instruction from memory */ void instr_delete (instr_t *instr); /* Get the address of the instruction */ -uintptr_t instr_get_addr (instr_t * const instr); +uintptr_t instr_get_addr (instr_t *const instr); /* Get the size (in bytes) of the instruction */ -size_t instr_get_size (instr_t * const instr); +size_t instr_get_size (instr_t *const instr); /* Get a pointer to the opcodes of the instruction */ -uint8_t * instr_get_opcodes (instr_t * const instr); +uint8_t * instr_get_opcodes (instr_t *const instr); -/* ***** Hashtables to store instructions ***** */ -typedef struct _hashtable_t hashtable_t; +/* ***** hashtable_t functions ***** */ /* Return an hash index for the instruction */ uint64_t hash_instr (const instr_t *instr); @@ -57,10 +79,10 @@ hashtable_t *hashtable_new (const size_t size); void hashtable_delete (hashtable_t *ht); /* Insert the instruction in the hashtable */ -bool hashtable_insert (hashtable_t * ht, instr_t * instr); +bool hashtable_insert (hashtable_t *ht, cfg_t *cfg); /* Look-up if current instruction is already in the hashtable */ -bool hashtable_lookup (hashtable_t *ht, instr_t *instr); +cfg_t *hashtable_lookup (hashtable_t *ht, instr_t *instr); /* Count the number of entries in the hashtable */ size_t hashtable_entries (hashtable_t *ht); @@ -68,4 +90,71 @@ size_t hashtable_entries (hashtable_t *ht); /* Count the number of collisions in the hashtable */ size_t hashtable_collisions (hashtable_t *ht); + +/* ***** trace_t functions ***** */ + +/* Creates a trace and initialize it with ins +Returns a pointer to the created trace, or NULL if an error occured */ +trace_t *trace_new (instr_t *ins); + +/* Creates an element initialized with ins and insert it after t +Returns a pointer to the created element or NULL if an error occured */ +trace_t *trace_insert (trace_t *t, instr_t *ins); + +/* Free every element in the trace t */ +void trace_delete (trace_t *t); + +/* Returns a pointer to the first element where t2 differs from t1 */ +trace_t *trace_compare (trace_t *t1, trace_t *t2); + + +/* ***** cfg_t functions ***** */ + +/* Creates a cfg and it with hash_index +Returns a pointer to the created trace, or NULL if an error occured */ +cfg_t *cfg_new (hashtable_t *ht, instr_t *ins, char *str); + +/* Auxiliary function for cfg_insert */ +cfg_t *aux_cfg_insert (cfg_t *CFG, cfg_t *new); + +/* Creates an element initialized with ins and insert it in CFG's succesors +Returns a pointer to the created element or NULL if an error occured*/ +cfg_t *cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins, Agraph_t *g, char *str); + +/* Free every allocated field of CFG, as well as CFG itself */ +void cfg_delete (cfg_t *CFG); + +/* Get the instruction in CFG */ +instr_t *cfg_get_instr (cfg_t *CFG); + +/* Get the number of successors of CFG */ +uint16_t cfg_get_nb_out (cfg_t *CFG); + +/* Get the number of "parents" of CFG */ +uint16_t cfg_get_nb_in (cfg_t *CFG); + +/* Get the type of the instruction in CFG */ +instr_type_t cfg_get_type (cfg_t *CFG); + +/* Get the index of the function CFG is in */ +uint16_t cfg_get_name (cfg_t *CFG); + +/* Get a pointer array of every successor of CFG */ +cfg_t **cfg_get_successor (cfg_t *CFG); + +/* Get a pointer to successor number i of CFG */ +cfg_t *cfg_get_successor_i (cfg_t *CFG, uint16_t i); + +/* Get the total number of functions */ +size_t get_nb_name (void); + +/* Get the str with the address, the opcodes, the mnemonics and the operands */ +char *cfg_get_str (cfg_t *CFG); + +/* Get a pointer to the first node in the function number index */ +cfg_t *get_function_entry (size_t index); + +/* Adds the very first node to function_entry */ +void add_first_entry (cfg_t *CFG); + #endif /* _TRACE_H */ diff --git a/src/Makefile b/src/Makefile index db668f7..631aaa7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,7 @@ # Usual compilation flags CFLAGS = -Wall -Wextra -std=c11 -DDEBUG -g CPPFLAGS = -I../include -LDFLAGS = -lcapstone +LDFLAGS = -lcapstone -lcgraph # Special rules and targets .PHONY: all clean help diff --git a/src/trace.c b/src/trace.c index e96cbb2..f56f731 100644 --- a/src/trace.c +++ b/src/trace.c @@ -14,18 +14,19 @@ #include #include +#include struct _instr_t { - uintptr_t address; /* Address where lies the instruction */ + uintptr_t address; /* Address where lies the instruction */ // uintptr_t *next; /* List of addresses of the next instructions */ - // uint8_t type; /* Instr type: 0 = instr, 1 = branch, 2 = call, 3 = jmp */ + instr_type_t type; /* Instr type: 0 = instr, 1 = branch, 2 = call, 3 = jmp, 4 = ret */ uint8_t size; /* Opcode size */ uint8_t opcodes[]; /* Instruction opcode */ }; instr_t * -instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) +instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes, char *str_name) { /* Check size != 0 and opcodes != NULL */ if (size == 0 || opcodes == NULL) @@ -41,7 +42,28 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) instr->address = addr; instr->size = size; memcpy (instr->opcodes, opcodes, size); - + /* Test opcodes to assign type to instruction */ + if ((opcodes[0] >= 0x70 && opcodes[0] <= 0x7F) + || (opcodes[0] == 0x0F && opcodes[1] >= 0x80 && opcodes[1] <= 0x8F)) + instr->type = BRANCH; + else if (opcodes[0] == 0xE8 + || opcodes[0] == 0x9A + || (opcodes[0] == 0xFF && (((size == 2 && opcodes[1] >= 0xD0 && opcodes[1] <= 0xDF) || size == 3) || opcodes[1] == 0x15)) + || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && + ((opcodes[2] >= 0xD0 && opcodes[2] <= 0xD7) || size > 3))) + instr->type = CALL; + else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) + || (opcodes[0] == 0xFF && (((size == 2 && opcodes[1] >= 0xE0 && opcodes[1] <= 0xEF) || size == 4 || size == 5) || opcodes[1] == 0x25)) + || (opcodes[0] >= 0xE0 && opcodes[0] <= 0xE3) + || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && + opcodes[2] >= 0xE0 && opcodes[2] <= 0xE7)) + instr->type = JUMP; + else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) + || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) + || (opcodes[0] == 0xF3 && opcodes[1] == 0xC3 && size == 2)) + instr->type = RET; + else + instr->type = BASIC; return instr; } @@ -76,14 +98,36 @@ struct _hashtable_t size_t size; /* Hashtable size */ size_t collisions; /* Number of collisions encountered */ size_t entries; /* Number of entries registered */ - instr_t ** buckets[]; /* Hachtable buckets */ + cfg_t **buckets[]; /* Hachtable buckets */ }; + +struct _cfg_t +{ + instr_t *instruction; /* Pointer to instruction */ + uint16_t nb_in; /* Number of predecessor */ + uint16_t nb_out; /* Number of successor */ + uint16_t name; /* Current function name */ + char *str_graph; /* Address + opcodes + mnemonic + operand */ + cfg_t **successor; /* Array of pointers to successor */ +}; + +/* Represent the number of successive calls whithout rets */ +uint16_t depth = 0; +/* Keep track of the number of different function called */ +uint16_t nb_name = 0; +/* Array of caller indexed by depth */ +cfg_t *stack[256] = {NULL}; +/* Arry of function's entry */ +cfg_t *function_entry[256] = {NULL}; + /* Compression function for Merkle-Damgard construction */ -#define mix(h) ({ \ - (h) ^= (h) >> 23; \ - (h) *= 0x2127598bf4325c37ULL; \ - (h) ^= (h) >> 47; }) +#define mix(h) \ + ({ \ + (h) ^= (h) >> 23ULL; \ + (h) *= 0x2127598bf4325c37ULL; \ + (h) ^= (h) >> 47ULL; \ + }) uint64_t fasthash64 (const uint8_t *buf, size_t len, uint64_t seed) @@ -96,32 +140,34 @@ fasthash64 (const uint8_t *buf, size_t len, uint64_t seed) uint64_t h = seed ^ (len * m); uint64_t v; - while (pos != end) { - v = *pos++; - h ^= mix(v); - h *= m; - } + while (pos != end) + { + v = *pos++; + h ^= mix(v); + h *= m; + } pos2 = (const uint8_t *) pos; v = 0; - switch (len & 7) { - case 7: v ^= (uint64_t) pos2[6] << 48; - /* FALLTHROUGH */ - case 6: v ^= (uint64_t) pos2[5] << 40; - /* FALLTHROUGH */ - case 5: v ^= (uint64_t) pos2[4] << 32; - /* FALLTHROUGH */ - case 4: v ^= (uint64_t) pos2[3] << 24; - /* FALLTHROUGH */ - case 3: v ^= (uint64_t) pos2[2] << 16; - /* FALLTHROUGH */ - case 2: v ^= (uint64_t) pos2[1] << 8; - /* FALLTHROUGH */ - case 1: v ^= (uint64_t) pos2[0]; - h ^= mix(v); - h *= m; - } + switch (len & 7) + { + case 7: v ^= (uint64_t) pos2[6] << 48; + /* FALLTHROUGH */ + case 6: v ^= (uint64_t) pos2[5] << 40; + /* FALLTHROUGH */ + case 5: v ^= (uint64_t) pos2[4] << 32; + /* FALLTHROUGH */ + case 4: v ^= (uint64_t) pos2[3] << 24; + /* FALLTHROUGH */ + case 3: v ^= (uint64_t) pos2[2] << 16; + /* FALLTHROUGH */ + case 2: v ^= (uint64_t) pos2[1] << 8; + /* FALLTHROUGH */ + case 1: v ^= (uint64_t) pos2[0]; + h ^= mix(v); + h *= m; + } return mix(h); } @@ -141,16 +187,16 @@ hashtable_new (const size_t size) return NULL; } - hashtable_t *ht = malloc (sizeof (hashtable_t) + size * sizeof (instr_t *)); + hashtable_t *ht = malloc (sizeof (hashtable_t) + size * sizeof (cfg_t *)); if (!ht) return NULL; /* Initialize to zero */ - *ht = (hashtable_t) { 0 }; + *ht = (hashtable_t) {0}; ht->size = size; ht->collisions = 0; ht->entries = 0; - memset (ht->buckets, 0, size * sizeof (instr_t *)); + memset (ht->buckets, 0, size * sizeof (cfg_t *)); return ht; } @@ -159,30 +205,37 @@ void hashtable_delete (hashtable_t *ht) { for (size_t i = 0; i < ht->size; i++) - free (ht->buckets[i]); + { + size_t j = 0; + if (ht->buckets[i]) + { + while (ht->buckets[i][j] != NULL) + cfg_delete (ht->buckets[i][j++]); + free (ht->buckets[i]); + } + } free (ht); } -#include bool -hashtable_insert (hashtable_t * ht, instr_t * instr) +hashtable_insert (hashtable_t * ht, cfg_t *CFG) { - if (ht == NULL || instr == NULL) + if (ht == NULL || CFG->instruction == NULL) { errno = EINVAL; return false; } - size_t index = hash_instr(instr) % ht->size; + size_t index = hash_instr (CFG->instruction) % ht->size; /* Bucket is empty */ if (ht->buckets[index] == NULL) { - ht->buckets[index] = calloc (2, sizeof (instr_t *)); + ht->buckets[index] = calloc (2, sizeof (cfg_t *)); if (ht->buckets[index] == NULL) - return false; - ht->buckets[index][0] = instr; + return false; + ht->buckets[index][0] = CFG; ht->entries++; return true; } @@ -190,41 +243,44 @@ hashtable_insert (hashtable_t * ht, instr_t * instr) /* Bucket isn't NULL, scanning all entries to see if instr is already here */ size_t k = 0; while (ht->buckets[index][k] != NULL) - if (ht->buckets[index][k++]->address == instr->address) - return true; - - instr_t **new_bucket = calloc (k + 2, sizeof (instr_t *)); + if (ht->buckets[index][k++]->instruction->address + == CFG->instruction->address) + return true; /* No error but we need to delete the redundant one */ + cfg_t **new_bucket = calloc (k + 2, sizeof (cfg_t *)); if (!new_bucket) - return false; + return false; + ht->collisions++; ht->entries++; - memcpy (new_bucket, ht->buckets[index], k * sizeof (instr_t *)); - new_bucket[k] = instr; + memcpy (new_bucket, ht->buckets[index], k * sizeof (cfg_t *)); + new_bucket[k] = CFG; free (ht->buckets[index]); ht->buckets[index] = new_bucket; return true; } -bool +cfg_t * hashtable_lookup (hashtable_t *ht, instr_t *instr) { if (!ht) - return false; + return NULL; size_t index = hash_instr (instr) % ht->size; /* Bucket is empty */ if (ht->buckets[index] == NULL) - return false; + return NULL; /* Bucket is not empty, scanning all entries to see if instr is here */ size_t k = 0; while (ht->buckets[index][k] != NULL) - if (ht->buckets[index][k++]->address == instr->address) - return true; - - return false; + { + if (ht->buckets[index][k]->instruction->address == instr->address) + return ht->buckets[index][k]; + k++; + } + return NULL; } size_t @@ -238,3 +294,347 @@ hashtable_collisions (hashtable_t *ht) { return ht->collisions; } + +/*****************************************/ + + +struct _trace_t +{ + instr_t *instruction; /* Index for the hash value of the instruction */ + trace_t *next; /* Pointer to the next value in the list */ +}; + +trace_t * +trace_new (instr_t *ins) +{ + trace_t *t = malloc (sizeof (trace_t)); + if (!t) + return NULL; + /* Initialize trace */ + t->instruction = ins; + t->next = NULL; + return t; +} + +trace_t * +trace_insert (trace_t *t, instr_t *ins) +{ + if (!t) + return NULL; + trace_t *new = trace_new (ins); + if (!new) + return NULL; + if (t->next) + new->next = t->next; + t->next = new; + return new; +} + +void +trace_delete (trace_t *t) +{ + if (!t) + return; + trace_t *tmp = t; + while (tmp->next) + { + tmp = tmp->next; + free (t); + t = tmp; + } + free(t); + return; +} + +trace_t * +trace_compare (trace_t *t1, trace_t *t2) +{ + trace_t *tmp1 = t1; + trace_t *tmp2 = t2; + while (tmp1->instruction->address == tmp2->instruction->address) + { + tmp1 = tmp1->next; + tmp2 = tmp2->next; + if (!tmp1) + return tmp2; + if (!tmp2) + return NULL; + } + return tmp2; +} + +/********************************************************************/ + + +cfg_t * +cfg_new (hashtable_t *ht, instr_t *ins, char *str) +{ + cfg_t *CFG = calloc (1, sizeof (cfg_t)); + if (!CFG) + return NULL; + if (ins->type == BASIC) + /* If type is BASIC then we know for sure there can only be one successor */ + CFG->successor = calloc (1, sizeof (cfg_t)); + else + CFG->successor = calloc (2, sizeof (cfg_t)); + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + /* Initializing the CFG structure */ + CFG->instruction = ins; + CFG->nb_in = 0; + CFG->nb_out = 0; + CFG->str_graph = calloc ((strlen (str) + 1), sizeof (char)); + if (!CFG->str_graph) + { + cfg_delete (CFG); + return NULL; + } + strcpy (CFG->str_graph, str); + /* Initializing the nmae if it is the first function */ + if (nb_name == 0) + CFG->name = 0; + hashtable_insert (ht, CFG); + return CFG; +} + +static bool +is_power_2 (uint16_t n) +{ + if (n == 0) + return false; + while (n % 2 == 0) + { + if (n == 2) + return true; + n = n / 2; + } + return false; +} + +cfg_t * +aux_cfg_insert (cfg_t *CFG, cfg_t *new) +{ + if (!new) + return NULL; + /* Checking if the parent already has a successor */ + if (CFG->instruction->type != RET && !CFG->successor[0]) + { + CFG->successor[0] = new; + CFG->nb_out++; + new->nb_in++; + new->name = CFG->name; + } + else + { + /* Inserting the new node in the parent's successors */ + switch (CFG->instruction->type) + { + // case BASIC: + // if (CFG->nb_out >= 1) + // return NULL; + // break; + case BRANCH: + if (CFG->nb_out >= 2) + return NULL; + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->successor[1] = new; + CFG->nb_out++; + new->nb_in++; + new->name = CFG->name; + break; + // case CALL: + // stack[depth] = CFG; + // depth++; + // break; + case JUMP: + if (is_power_2 (CFG->nb_out)) + CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->successor[CFG->nb_out] = new; + CFG->nb_out++; + new->nb_in++; + new->name = CFG->name; + break; + case RET: + /* Checking the call on the top of the stack */ + depth--; + if (new->instruction->address + == stack[depth]->instruction->address + stack[depth]->instruction->size) + { + CFG = stack[depth]; + stack[depth] = NULL; + bool flag = false; + /* Check if new is already a successor of CFG */ + for (size_t i = 0; i < CFG->nb_out; i++) + { + if (CFG->successor[i]->instruction->address + == new->instruction->address) + { + flag = true; + break; + } + } + if (flag) + break; + } + else + { + depth++; + } + if (is_power_2 (CFG->nb_out)) + { + CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); + } + + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->successor[CFG->nb_out] = new; + CFG->nb_out++; + new->nb_in++; + new->name = CFG->name; + break; + } + } + return new; +} + +cfg_t * +cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins,Agraph_t *g, char *str) +{ + if (!CFG) + return NULL; + cfg_t *new = hashtable_lookup (ht, ins); + /* First time seeing this instruction */ + if (!new) + { + new = cfg_new (ht, ins, str); + /* Pushing the call on the stack */ + if (CFG->instruction->type == CALL) + { + nb_name++; + function_entry[nb_name] = new; + stack[depth] = CFG; + depth++; + } + + return aux_cfg_insert(CFG, new); + } +else + { + /* Pushing the call on the stack */ + if (CFG->instruction->type == CALL) + { + stack[depth] = CFG; + depth++; + } + instr_delete (ins); + /* Checking if new is already a successor of old */ + for (size_t i = 0; i < CFG->nb_out; i++) + { + if (CFG->successor[i]->instruction->address + == new->instruction->address) + return new; + } + + return aux_cfg_insert(CFG, new); + } +} + + +void +cfg_delete (cfg_t *CFG) +{ + if (CFG) + { + if (CFG->instruction) + { + instr_delete (CFG->instruction); + } + if (CFG->successor) + free (CFG->successor); + if (CFG->str_graph) + free (CFG->str_graph); + free (CFG); + } + return; +} + +instr_t * +cfg_get_instr (cfg_t *CFG) +{ + return CFG->instruction; +} + +uint16_t +cfg_get_nb_out (cfg_t *CFG) +{ + return CFG->nb_out; +} + +uint16_t +cfg_get_nb_in (cfg_t *CFG) +{ + return CFG->nb_in; +} + +instr_type_t +cfg_get_type (cfg_t *CFG) +{ + return CFG->instruction->type; +} + +uint16_t +cfg_get_name (cfg_t *CFG) +{ + return CFG->name; +} + +cfg_t ** +cfg_get_successor (cfg_t *CFG) +{ + return CFG->successor; +} + +cfg_t * +cfg_get_successor_i (cfg_t *CFG, uint16_t i) +{ + return CFG->successor[i]; +} + +/* to delete ? */ +size_t +get_nb_name (void) +{ + return nb_name; +} + +cfg_t * +get_function_entry (size_t index) +{ + return function_entry[index]; +} + +char * +cfg_get_str (cfg_t *CFG) +{ + return CFG->str_graph; +} + +void add_first_entry (cfg_t *CFG) +{ + function_entry[0] = CFG; +} diff --git a/src/tracker.c b/src/tracker.c index ea41651..2dd37a4 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -11,6 +11,7 @@ */ #include "tracker.h" +#include #define _POSIX_C_SOURCE 200809L @@ -48,20 +49,27 @@ typedef enum /* In amd64, maximum bytes for an opcode is 15 */ #define MAX_OPCODE_BYTES 16 +/* Maximum length of a line in input */ +#define MAX_LEN 1024 + /* Global variables for this module */ static bool debug = false; /* 'debug' option flag */ static bool verbose = false; /* 'verbose' option flag */ static FILE *output = NULL; /* output file (default: stdout) */ +/* input file containing executable's name and argument */ +static FILE *input = NULL; + +static FILE *fp = NULL; /* Get the architecture of the executable */ static arch_t check_execfile (char *execfilename) { struct stat exec_stats; - if (stat(execfilename, &exec_stats) == -1) + if (stat (execfilename, &exec_stats) == -1) err (EXIT_FAILURE, "error: '%s'", execfilename); - if (!S_ISREG(exec_stats.st_mode) || !(exec_stats.st_mode & S_IXUSR)) + if (!S_ISREG (exec_stats.st_mode) || !(exec_stats.st_mode & S_IXUSR)) errx (EXIT_FAILURE, "error: '%s' is not an executable file", execfilename); /* Check if given file is an executable and discover its architecture */ @@ -79,7 +87,7 @@ check_execfile (char *execfilename) errx (EXIT_FAILURE, "error: '%s' is not an ELF binary", execfilename); /* Extract executable architecture (byte at 0x12) */ - fseek(execfile, 0x12, SEEK_SET); + fseek (execfile, 0x12, SEEK_SET); if (fread (&buf, 1, 1, execfile) != 1) errx (EXIT_FAILURE, "error: cannot read '%s'", execfilename); @@ -99,7 +107,7 @@ check_execfile (char *execfilename) } /* Closing file after verifications */ - fclose(execfile); + fclose (execfile); return exec_arch; } @@ -117,6 +125,269 @@ get_current_ip (struct user_regs_struct *regs) #endif } +static void +get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_size) +{ + FILE *execfile = fopen (execfilename, "r"); + unsigned char buf[8]; + fseek (execfile, 0x28, SEEK_SET); + fread (&buf, 8, 1, execfile); + uint64_t e_shoff = 0; + for (int i = 7; i >= 0; i--) { + e_shoff = e_shoff << 8; + e_shoff += buf[i]; + } + fseek (execfile, 0x3a, SEEK_SET); + fread (&buf, 2, 1, execfile); + uint64_t e_shentsize = 0; + for (int i = 1; i >= 0; i--) { + e_shentsize = e_shentsize << 8; + e_shentsize += buf[i]; + } + fseek (execfile, 0x3c, SEEK_SET); + fread (&buf, 2, 1, execfile); + uint64_t e_shnum = 0; + for (int i = 1; i >= 0; i--) { + e_shnum = e_shnum << 8; + e_shnum += buf[i]; + } + fseek (execfile, 0x3e, SEEK_SET); + fread (&buf, 2, 1, execfile); + uint64_t e_shstrndx = 0; + for (int i = 1; i >= 0; i--) { + e_shstrndx = e_shstrndx << 8; + e_shstrndx += buf[i]; + } + fseek (execfile, e_shoff + (e_shentsize * e_shstrndx) + 0x18, SEEK_SET); + fread (&buf, 8, 1, execfile); + uint64_t shstrtab = 0; + for (int i = 7; i >= 0; i--) { + shstrtab = shstrtab << 8; + shstrtab += buf[i]; + } + uint64_t index = 0; + while (true) { + uint64_t var = 0; + fseek (execfile, e_shoff + (e_shentsize * index), SEEK_SET); + fread (&buf, 4, 1, execfile); + for (int i = 3; i >= 0; i--) { + var = var << 8; + var += buf[i]; + } + fseek (execfile, shstrtab + var, SEEK_SET); + fread (&buf, 5, 1, execfile); + if (buf[0] == '.' && buf[1] == 't' && buf[2] == 'e' && buf[3] == 'x' && buf[4] == 't') + break; + index++; + } + fseek (execfile, e_shoff + (e_shentsize * index) + 0x18, SEEK_SET); + fread (&buf, 8, 1, execfile); + *text_addr = 0; + for (int i = 7; i >= 0; i--) { + *text_addr = *text_addr << 8; + *text_addr += buf[i]; + } + fseek (execfile, e_shoff + (e_shentsize * index) + 0x20, SEEK_SET); + fread (&buf, 8, 1, execfile); + *text_size = 0; + for (int i = 7; i >= 0; i--) { + *text_size = *text_size << 8; + *text_size += buf[i]; + } + fclose (execfile); + return; +} + +static char * +concat_str (char *dest, char *follow) +{ + if (!dest) + { + dest = calloc ((strlen (follow) + 1), sizeof (char)); + sprintf (dest, "%s", follow); + return dest; + } + dest = realloc (dest, (strlen (dest) + 1 + strlen (follow) + 1) * sizeof (char)); + sprintf (dest + strlen(dest), "\n%s", follow); + return dest; +} + +static Agraph_t * +graph_create_function (Agraph_t *g, cfg_t *entry, Agnode_t *n) +{ + Agnode_t *m = NULL; + cfg_t *old = entry; + cfg_t *new = NULL; + char *str_bb = NULL; + while (cfg_get_type (old) == BASIC || cfg_get_type (old) == CALL) + { + /* Not the begining of the function + more than 1 parent --> basic block */ + if (old != entry && cfg_get_nb_in (old) > 1) + { + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (n) + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + graph_create_function (g, old, m); + return g; + } + if (cfg_get_type (old) == CALL) + { + /* Searching for a RET following the CALL */ + uint16_t i = 0; + while (i < cfg_get_nb_out (old)) + { + new = cfg_get_successor_i (old, i); + if (instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old)) + == instr_get_addr (cfg_get_instr (new))) + break; + new = NULL; + i++; + } + str_bb = concat_str (str_bb, cfg_get_str(old)); + if (!new) + { + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (n) + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + return g; + } + else + { + old = new; + new = NULL; + } + } + else + { + str_bb = concat_str (str_bb, cfg_get_str(old)); + if (cfg_get_nb_out (old) == 0) + { + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + return g; + } + old = cfg_get_successor_i (old, 0); + /* Ugly trick to avoid infite loops if a instruction is its own parent */ + if (old == entry) + { + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + Agnode_t *tmp = agnode (g, cfg_get_str (old), TRUE); + agedge (g, m, tmp, NULL, TRUE); + agedge (g, tmp, tmp, NULL, TRUE); + return g; + } + } + } + /* Enf of a basic block */ + str_bb = concat_str (str_bb, cfg_get_str(old)); + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (n) + { + if (!agedge (g, n, m, NULL, FALSE)) + { + agedge (g, n, m, NULL, TRUE); + if (cfg_get_type (old) == BRANCH || cfg_get_type (old) == JUMP) + { + for (uint16_t i = 0; i < cfg_get_nb_out (old); i++) + graph_create_function (g, cfg_get_successor_i (old, i), m); + } + } + } + else + { + if (cfg_get_type (old) == BRANCH || cfg_get_type (old) == JUMP) + { + for (uint16_t i = 0; i < cfg_get_nb_out (old); i++) + graph_create_function (g, cfg_get_successor_i (old, i), m); + } + } + return g; +} + +static Agraph_t * +graph_create_function_2 (Agraph_t *g, cfg_t *entry) +{ + Agnode_t *n, *m; + cfg_t *old = entry; + cfg_t *new; + while (cfg_get_type (old) == BASIC) + { + new = cfg_get_successor_i(old, 0); + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + else + return g; + if (cfg_get_type (new) != RET) + { + new = old; + old = cfg_get_successor_i(old, 0); + } + else + { + return g; + } + } + if (cfg_get_type (old) == BRANCH || cfg_get_type (old) == JUMP) + { + uint16_t j = 0; + while (j < cfg_get_nb_out (old)) + { + new = cfg_get_successor_i(old, j); + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + if (!agedge (g, n, m, NULL, FALSE)) + { + agedge (g, n, m, NULL, TRUE); + g = graph_create_function_2(g, new); + } + j++; + } + } + else if (cfg_get_type (old) == CALL) + { + uint16_t i = 0; + while (i < cfg_get_nb_out (old)) + { + new = cfg_get_successor_i(old, i); + if (instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old)) + == instr_get_addr ( cfg_get_instr (new))) + { + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + if (!agedge (g, n, m, NULL, FALSE)) + { + agedge (g, n, m, NULL, TRUE); + g = graph_create_function_2(g, new); + } + else + return g; + } + i++; + } + + } + return g; +} + + + int main (int argc, char *argv[], char *envp[]) { @@ -161,7 +432,7 @@ main (int argc, char *argv[], char *envp[]) case 'o': /* Output file */ output = fopen (optarg, "we"); if (!output) - err (EXIT_FAILURE, "error: cannot open file '%s'", optarg); + err (EXIT_FAILURE, "error: cannot open file '%s'", optarg); break; case 'i': /* intel syntax mode */ @@ -179,8 +450,7 @@ main (int argc, char *argv[], char *envp[]) case 'V': /* Display version number and exit */ fprintf (stdout, "%s %s\n", program_name, VERSION); - fputs ("Trace the execution of a program on the given input\n", - stdout); + fputs ("Trace the execution of a program on the given input\n", stdout); exit (EXIT_SUCCESS); break; @@ -198,158 +468,250 @@ main (int argc, char *argv[], char *envp[]) errx (EXIT_FAILURE, "error: missing argument: an executable is required!"); /* Extracting the complete argc/argv[] of the traced command */ - int exec_argc = argc - optind; - char *exec_argv[exec_argc + 1]; - for (int i = 0; i < exec_argc; i++) - { - exec_argv[i] = argv[optind + i]; - } - exec_argv[exec_argc] = NULL; - - /* Perfom various checks on the executable file */ - arch_t exec_arch = check_execfile (exec_argv[0]); - - /* Display the traced command */ - fprintf (output, "%s: starting to trace '", program_name); - for (int i = 0; i < exec_argc - 1; i++) - { - fprintf (output, "%s ", exec_argv[i]); - } - fprintf (output, "%s'\n\n", exec_argv[exec_argc - 1]); - - /* Forking and tracing */ - pid_t child = fork (); - if (child == -1) - errx (EXIT_FAILURE, "error: fork failed!"); - - /* Initialized and start the child */ - if (child == 0) - { - /* Disabling ASLR */ - personality (ADDR_NO_RANDOMIZE); - - /* Start tracing the process */ - if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) == -1) - errx (EXIT_FAILURE, - "error: cannot operate from inside a ptrace() call!"); - - /* Starting the traced executable */ - execve (exec_argv[0], exec_argv, envp); - } - - /* Parent process */ - int status; - byte_t buf[MAX_OPCODE_BYTES]; - uintptr_t ip; - struct user_regs_struct regs; - - /* Initializing Capstone disassembler */ - csh handle; - cs_insn *insn; - size_t count; - - cs_mode exec_mode = 0; - switch (exec_arch) - { - case x86_32_arch: - exec_mode = CS_MODE_32; - break; - - case x86_64_arch: - exec_mode = CS_MODE_64; - break; - - default: - errx (EXIT_FAILURE, "error: '%s' unsupported architecture", exec_argv[0]); - } - - /* Initialize the assembly decoder */ - if (cs_open(CS_ARCH_X86, exec_mode, &handle) != CS_ERR_OK) - errx (EXIT_FAILURE, "error: cannot start capstone disassembler"); - - /* Set syntax flavor output */ - if (intel) - cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); - else - cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); + input = fopen (argv[optind], "r"); + if (input == NULL) + errx (EXIT_FAILURE, "error: can't open the input file"); - /* Main disassembling loop */ - size_t instr_count = 0; - hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); - if (ht == NULL) - err (EXIT_FAILURE, "error: cannot create hashtable"); - - while (true) + int nb_line = 0; + char str[MAX_LEN]; + while (fgets (str, MAX_LEN, input) != NULL) { - /* Waiting for child process */ - wait(&status); - if (WIFEXITED(status)) - break; - - /* Get instruction pointer */ - ptrace(PTRACE_GETREGS, child, NULL, ®s); - - /* Printing instruction pointer */ - ip = get_current_ip (®s); - fprintf (output, "0x%" PRIxPTR " ", ip); - - /* Get the opcode from memory */ - for (size_t i = 0; i < MAX_OPCODE_BYTES; i += 8) - { - long *ptr = (long *) &(buf[i]); - *ptr = ptrace (PTRACE_PEEKDATA, child, ip + i, NULL); - } - - /* Get the mnemonic from decoder */ - count = cs_disasm(handle, &(buf[0]), MAX_OPCODE_BYTES, 0x1000, 0, &insn); - if (count > 0) - { - /* Display the bytes */ - for (size_t i = 0; i < insn[0].size; i++) - fprintf (output, " %02x", buf[i]); - - /* Pretty printing and formating */ - if (insn[0].size != 8 && insn[0].size != 11) - fprintf (output, "\t"); - - for (int i = 0; i < 4 - (insn[0].size / 3); i++) - fprintf (output, "\t"); - - /* Display mnemonic and operand */ - fprintf (output, "%s %s", insn[0].mnemonic, insn[0].op_str); - fprintf(output, "\n"); - - /* Create the instr_t structure */ - instr_t *instr = instr_new (ip, insn[0].size, buf); - if (!instr) - err (EXIT_FAILURE, "error:"); - - if (!hashtable_insert (ht, instr)) - instr_delete (instr); - - /* Updating counters */ - instr_count++; - } - - /* Continue to next instruction... */ - /* Note that, sometimes, ptrace(PTRACE_SINGLESTEP) returns '-1' - * to notify that the child process did not respond quick enough, - * we have to wait for ptrace() to return '0'. */ - while (ptrace(PTRACE_SINGLESTEP, child, NULL, NULL)); - } - - fprintf(output, - "\n" - "\tStatistics about this run\n" - "\t=========================\n" - "* #instructions executed: %zu\n" - "* #unique instructions: %zu\n" - "* #hashtable buckets: %zu\n" - "* #hashtable collisions: %zu\n", - instr_count, hashtable_entries (ht), - (size_t) DEFAULT_HASHTABLE_SIZE, hashtable_collisions (ht)); - - hashtable_delete (ht); - + if (str[0] != '\n') + nb_line++; + } + rewind (input); + cfg_t *cfg = NULL; + cfg_t *cfg_entry = NULL; + hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); + fp = fopen("toto.gv", "w+"); + char name_node[128]; + Agraph_t *g; + g = agopen ("G", Agstrictdirected, NULL); + Agsym_t *sym; + sym = agattr (g, AGNODE, "shape", "box"); + + if (ht == NULL) + err (EXIT_FAILURE, "error: cannot create hashtable"); + + while (fgets (str, MAX_LEN, input) != NULL) + { + if (str[0] != '\n') + { + size_t line_length = strlen (str); + char *exec_argv[line_length]; + char *token = strtok (str, " "); + int index = 0; + while (token != NULL) + { + size_t token_length = strlen (token); + if (token[token_length - 1] == '\n') + token[token_length - 1] = '\0'; /* Formatting trick */ + exec_argv[index] = token; + index++; + token = strtok (NULL, " "); + } + exec_argv[index] = NULL; + int exec_argc = index; + + /* Perfom various checks on the executable file */ + arch_t exec_arch = check_execfile (exec_argv[0]); + + /* Display the traced command */ + fprintf (output, "%s: starting to trace '", program_name); + for (int i = 0; i < exec_argc - 1; i++) + fprintf (output, "%s ", exec_argv[i]); + fprintf (output, "%s'\n\n", exec_argv[exec_argc - 1]); + + /* Forking and tracing */ + pid_t child = fork (); + if (child == -1) + errx (EXIT_FAILURE, "error: fork failed!"); + + /* Initialized and start the child */ + if (child == 0) + { + /* Disabling ASLR */ + personality (ADDR_NO_RANDOMIZE); + + /* Start tracing the process */ + if (ptrace (PTRACE_TRACEME, 0, NULL, NULL) == -1) + errx (EXIT_FAILURE, + "error: cannot operate from inside a ptrace() call!"); + + /* Starting the traced executable */ + execve (exec_argv[0], exec_argv, envp); + + } + + /* Parent process */ + int status; + byte_t buf[MAX_OPCODE_BYTES]; + uintptr_t ip; + struct user_regs_struct regs; + + /* Initializing Capstone disassembler */ + csh handle; + cs_insn *insn; + size_t count; + + cs_mode exec_mode = 0; + switch (exec_arch) + { + case x86_32_arch: + exec_mode = CS_MODE_32; + break; + + case x86_64_arch: + exec_mode = CS_MODE_64; + break; + + default: + errx (EXIT_FAILURE, + "error: '%s' unsupported architecture", exec_argv[0]); + } + + /* Initialize the assembly decoder */ + if (cs_open (CS_ARCH_X86, exec_mode, &handle) != CS_ERR_OK) + errx (EXIT_FAILURE, "error: cannot start capstone disassembler"); + + /* Set syntax flavor output */ + if (intel) + cs_option (handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); + else + cs_option (handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); + + /* Main disassembling loop */ + size_t instr_count = 0; + + + while (true) + { + /* Waiting for child process */ + wait (&status); + if (WIFEXITED (status)) + break; + + /* Get instruction pointer */ + ptrace (PTRACE_GETREGS, child, NULL, ®s); + + /* Printing instruction pointer */ + ip = get_current_ip (®s); + fprintf (output, "0x%" PRIxPTR " ", ip); + + /* Get the opcode from memory */ + for (size_t i = 0; i < MAX_OPCODE_BYTES; i += 8) + { + long *ptr = (long *) &(buf[i]); + *ptr = ptrace (PTRACE_PEEKDATA, child, ip + i, NULL); + } + + /* Get the mnemonic from decoder */ + count = cs_disasm (handle, &(buf[0]), MAX_OPCODE_BYTES, 0x1000, 0, &insn); + + if (count > 0) + { + /* Display the bytes */ + for (size_t i = 0; i < insn[0].size; i++) + fprintf (output, " %02x", buf[i]); + + /* Pretty printing and formating */ + if (insn[0].size != 8 && insn[0].size != 11) + fprintf (output, "\t"); + + for (int i = 0; i < 4 - (insn[0].size / 3); i++) + fprintf (output, "\t"); + + /* Display mnemonic and operand */ + fprintf (output, "%s %s", insn[0].mnemonic, insn[0].op_str); + fprintf (output, "\n"); + + + + sprintf(name_node,"0x%" PRIxPTR " ", ip); + for (size_t i = 0; i < insn[0].size; i++) + sprintf(name_node + strlen(name_node), "%02x ", buf[i]); + sprintf(name_node + strlen(name_node), " %s ",insn[0].mnemonic); + sprintf(name_node + strlen(name_node), "%s",insn[0].op_str); + + /* Create the instr_t structure */ + instr_t *instr = instr_new (ip, insn[0].size, buf, name_node); + if (!instr) + { + hashtable_delete (ht); + cs_free (insn, count); + cs_close (&handle); + fclose (input); + fclose (output); + err (EXIT_FAILURE, "error: cannot create instruction"); + } + cs_free (insn, count); + + if (!cfg) + { + /* Create a new trace and store it */ + cfg = cfg_new (ht, instr, name_node); + + if (!cfg) + { + hashtable_delete (ht); + cs_close (&handle); + fclose (input); + fclose (output); + err (EXIT_FAILURE, "error: cannot create a control flow graph"); + } + cfg_entry = cfg; + add_first_entry (cfg_entry); + } + else + { + /* Insert a new element in the cfg and update cfg to hold + * the new node */ + + cfg = cfg_insert (ht, cfg, instr, g,name_node); + + if (!cfg) + { + hashtable_delete (ht); + cs_close (&handle); + fclose (input); + fclose (output); + err (EXIT_FAILURE, "error: cannot create a control flow graph"); + } + } + + /* Updating counters */ + instr_count++; + } + + /* Continue to next instruction... */ + /* Note that, sometimes, ptrace(PTRACE_SINGLESTEP) returns '-1' + * to notify that the child process did not respond quick enough, + * we have to wait for ptrace() to return '0'. */ + while (ptrace(PTRACE_SINGLESTEP, child, NULL, NULL)); + } + cs_close (&handle); + fprintf(output, + "\n" + "\tStatistics about this run\n" + "\t=========================\n" + "* #instructions executed: %zu\n" + "* #unique instructions: %zu\n" + "* #hashtable buckets: %zu\n" + "* #hashtable collisions: %zu\n\n\n", + instr_count, hashtable_entries (ht), + (size_t) DEFAULT_HASHTABLE_SIZE, hashtable_collisions (ht)); + } + } + + + graph_create_function_2(g, get_function_entry(90)); + + fclose (input); + fclose (output); + + hashtable_delete (ht); + agwrite(g, fp); + agclose(g); + fclose (fp); return EXIT_SUCCESS; } diff --git a/test/Makefile b/test/Makefile index a2dae76..247bd0d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -10,7 +10,21 @@ LDFLAGS = -lcapstone all: tracker_tests tracker_tests: - @echo "TODO: Tests are not yet implemented!" + gcc -o if if.c $(CFLAGS) + gcc -o while while.c $(CFLAGS) + gcc -o switch switch.c $(CFLAGS) + gcc -o printf printf.c $(CFLAGS) + gcc -o call call.c $(CFLAGS) + @echo -e "if 0\nif 44\nif -44\n" > input_if.txt + @echo -e "while 12\nwhile 0\n" > input_while.txt + @echo -e "switch 3\nswitch 7\nswitch 11\n" > input_switch.txt + @echo -e "printf Neo\n" > input_printf.txt + @echo -e "call 1337\n" > input_call.txt + ./tracker -o output_if.txt input_if.txt + ./tracker -o output_while.txt input_while.txt + ./tracker -o output_switch.txt input_switch.txt + ./tracker -o output_printf.txt input_printf.txt + ./tracker -o output_call.txt input_call.txt clean: @echo "src: Cleaning..." diff --git a/test/call.c b/test/call.c new file mode 100644 index 0000000..2acc35e --- /dev/null +++ b/test/call.c @@ -0,0 +1,13 @@ +#include + +int foo (int x) +{ + return x + 42; +} + +int main (int argc, char *argv[]) +{ + int x = atoi (argv[1]); + foo(x); + return EXIT_SUCCESS; +} diff --git a/test/if.c b/test/if.c new file mode 100644 index 0000000..2ba3d1e --- /dev/null +++ b/test/if.c @@ -0,0 +1,13 @@ +#include + +int main (int argc, char *argv[]) +{ + int x = atoi(argv[1]); + if (x == 0) + x = x + 42; + else if (x < 0) + x = -x; + else + x++; + return EXIT_SUCCESS; +} diff --git a/test/printf.c b/test/printf.c new file mode 100644 index 0000000..535239a --- /dev/null +++ b/test/printf.c @@ -0,0 +1,8 @@ +#include +#include + +int main (int argc, char *argv[]) +{ + printf("Bonjour %s\n", argv[1]); + return EXIT_SUCCESS; +} diff --git a/test/switch.c b/test/switch.c new file mode 100644 index 0000000..aa47439 --- /dev/null +++ b/test/switch.c @@ -0,0 +1,19 @@ +#include + +int main (int argc, char *argv[]) +{ + int x = atoi(argv[1]); + switch (x % 3) + { + case 0: + x++; + break; + case 1: + x--; + break; + case 2: + x *= 2; + break; + } + return EXIT_SUCCESS; +} diff --git a/test/while.c b/test/while.c new file mode 100644 index 0000000..137d5b4 --- /dev/null +++ b/test/while.c @@ -0,0 +1,9 @@ +#include + +int main (int argc, char *argv[]) +{ + int x = atoi(argv[1]); + while (x > 0) + x--; + return EXIT_SUCCESS; +}