From ae73926cae8ee9fcb89ae3748782f1aff8f587fa Mon Sep 17 00:00:00 2001 From: mrglm Date: Tue, 14 Jan 2020 20:54:38 +0100 Subject: [PATCH 01/31] Adding the trace struct in the trace module --- include/trace.h | 18 ++++++++++++++ src/trace.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/include/trace.h b/include/trace.h index b404fea..ff6b9c8 100644 --- a/include/trace.h +++ b/include/trace.h @@ -68,4 +68,22 @@ size_t hashtable_entries (hashtable_t *ht); /* Count the number of collisions in the hashtable */ size_t hashtable_collisions (hashtable_t *ht); +/* ***** Linked list to store a trace ***** */ + +typedef struct _trace_t trace_t; + +/* Creates a trace and initialize the first element with hash_index +Returns a pointer to the created trace, or NULL if an error occured */ +trace_t *trace_new (uint64_t hash_index); + +/* Insert an element initialized with hash_index and insert it after t +Returns a pointer to the created element or NULL if an error occured*/ +trace_t *trace_insert (trace_t *t, uint64_t hash_value); + +/* Free every element in the trace t */ +void trace_delete (trace_t *t); + +/* Returns a pointer to the first element where t2 differs from t1 */ +trace_t *trace_compare (trace_t *t1, trace_t *t2); + #endif /* _TRACE_H */ diff --git a/src/trace.c b/src/trace.c index e96cbb2..5310e05 100644 --- a/src/trace.c +++ b/src/trace.c @@ -238,3 +238,67 @@ hashtable_collisions (hashtable_t *ht) { return ht->collisions; } + +struct _trace_t +{ + uint64_t h; /* Index for the hash value of the instruction */ + trace_t *next; /* Pointer to the next value in the list */ +}; + +trace_t * +trace_new (uint64_t hash_index) +{ + trace_t *t = malloc (sizeof (trace_t)); + if (!t) + return NULL; + t->h = hash_index; + t->next = NULL; + return t; +} + +trace_t * +trace_insert (trace_t *t, uint64_t hash_index) +{ + if (!t) + return NULL; + trace_t *new = trace_new (hash_index); + if (!new) + return NULL; + if (t->next) + new->next = t->next; + t->next = new; + return new; +} + +void +trace_delete (trace_t *t) +{ + if (!t) + return; + trace_t *tmp = t; + while (tmp->next) + { + tmp = tmp->next; + free(t); + t = tmp; + } + free(t); + return; +} + +trace_t * +trace_compare (trace_t *t1, trace_t *t2) +{ + trace_t *tmp1 = t1; + trace_t *tmp2 = t2; + while (tmp1->h == tmp2->h) + { + tmp1 = tmp1->next; + tmp2 = tmp2->next; + if (!tmp1) + return tmp2; + if (!tmp2) + return NULL; + } + return tmp2; +} From 75405dedc0efa9c70348003899009767252b4b0e Mon Sep 17 00:00:00 2001 From: mrglm Date: Tue, 14 Jan 2020 21:46:23 +0100 Subject: [PATCH 02/31] Modifying the input format tracker now expects a text file with on each line the executable's name and arguments, allowing it to produces multiple traces --- src/tracker.c | 335 +++++++++++++++++++++++++++----------------------- 1 file changed, 183 insertions(+), 152 deletions(-) diff --git a/src/tracker.c b/src/tracker.c index ea41651..cde1d14 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -48,10 +49,15 @@ typedef enum /* In amd64, maximum bytes for an opcode is 15 */ #define MAX_OPCODE_BYTES 16 +/* Maximum length of a line in input */ +#define MAX_LEN 1024 + /* Global variables for this module */ static bool debug = false; /* 'debug' option flag */ static bool verbose = false; /* 'verbose' option flag */ static FILE *output = NULL; /* output file (default: stdout) */ +/* input file containing executable's name and argument */ +static FILE *input = NULL; /* Get the architecture of the executable */ static arch_t @@ -198,158 +204,183 @@ main (int argc, char *argv[], char *envp[]) errx (EXIT_FAILURE, "error: missing argument: an executable is required!"); /* Extracting the complete argc/argv[] of the traced command */ - int exec_argc = argc - optind; - char *exec_argv[exec_argc + 1]; - for (int i = 0; i < exec_argc; i++) - { - exec_argv[i] = argv[optind + i]; - } - exec_argv[exec_argc] = NULL; - - /* Perfom various checks on the executable file */ - arch_t exec_arch = check_execfile (exec_argv[0]); - - /* Display the traced command */ - fprintf (output, "%s: starting to trace '", program_name); - for (int i = 0; i < exec_argc - 1; i++) - { - fprintf (output, "%s ", exec_argv[i]); - } - fprintf (output, "%s'\n\n", exec_argv[exec_argc - 1]); - - /* Forking and tracing */ - pid_t child = fork (); - if (child == -1) - errx (EXIT_FAILURE, "error: fork failed!"); - - /* Initialized and start the child */ - if (child == 0) - { - /* Disabling ASLR */ - personality (ADDR_NO_RANDOMIZE); - - /* Start tracing the process */ - if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) == -1) - errx (EXIT_FAILURE, - "error: cannot operate from inside a ptrace() call!"); - - /* Starting the traced executable */ - execve (exec_argv[0], exec_argv, envp); - } - - /* Parent process */ - int status; - byte_t buf[MAX_OPCODE_BYTES]; - uintptr_t ip; - struct user_regs_struct regs; - - /* Initializing Capstone disassembler */ - csh handle; - cs_insn *insn; - size_t count; - - cs_mode exec_mode = 0; - switch (exec_arch) - { - case x86_32_arch: - exec_mode = CS_MODE_32; - break; - - case x86_64_arch: - exec_mode = CS_MODE_64; - break; - - default: - errx (EXIT_FAILURE, "error: '%s' unsupported architecture", exec_argv[0]); - } - - /* Initialize the assembly decoder */ - if (cs_open(CS_ARCH_X86, exec_mode, &handle) != CS_ERR_OK) - errx (EXIT_FAILURE, "error: cannot start capstone disassembler"); - - /* Set syntax flavor output */ - if (intel) - cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); - else - cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); - - /* Main disassembling loop */ - size_t instr_count = 0; - hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); - if (ht == NULL) - err (EXIT_FAILURE, "error: cannot create hashtable"); - - while (true) - { - /* Waiting for child process */ - wait(&status); - if (WIFEXITED(status)) - break; - - /* Get instruction pointer */ - ptrace(PTRACE_GETREGS, child, NULL, ®s); - - /* Printing instruction pointer */ - ip = get_current_ip (®s); - fprintf (output, "0x%" PRIxPTR " ", ip); - - /* Get the opcode from memory */ - for (size_t i = 0; i < MAX_OPCODE_BYTES; i += 8) - { - long *ptr = (long *) &(buf[i]); - *ptr = ptrace (PTRACE_PEEKDATA, child, ip + i, NULL); - } - - /* Get the mnemonic from decoder */ - count = cs_disasm(handle, &(buf[0]), MAX_OPCODE_BYTES, 0x1000, 0, &insn); - if (count > 0) - { - /* Display the bytes */ - for (size_t i = 0; i < insn[0].size; i++) - fprintf (output, " %02x", buf[i]); - - /* Pretty printing and formating */ - if (insn[0].size != 8 && insn[0].size != 11) - fprintf (output, "\t"); - - for (int i = 0; i < 4 - (insn[0].size / 3); i++) - fprintf (output, "\t"); - - /* Display mnemonic and operand */ - fprintf (output, "%s %s", insn[0].mnemonic, insn[0].op_str); - fprintf(output, "\n"); - - /* Create the instr_t structure */ - instr_t *instr = instr_new (ip, insn[0].size, buf); - if (!instr) - err (EXIT_FAILURE, "error:"); - - if (!hashtable_insert (ht, instr)) - instr_delete (instr); - - /* Updating counters */ - instr_count++; - } - - /* Continue to next instruction... */ - /* Note that, sometimes, ptrace(PTRACE_SINGLESTEP) returns '-1' - * to notify that the child process did not respond quick enough, - * we have to wait for ptrace() to return '0'. */ - while (ptrace(PTRACE_SINGLESTEP, child, NULL, NULL)); - } - - fprintf(output, - "\n" - "\tStatistics about this run\n" - "\t=========================\n" - "* #instructions executed: %zu\n" - "* #unique instructions: %zu\n" - "* #hashtable buckets: %zu\n" - "* #hashtable collisions: %zu\n", - instr_count, hashtable_entries (ht), - (size_t) DEFAULT_HASHTABLE_SIZE, hashtable_collisions (ht)); - - hashtable_delete (ht); + input = fopen (argv[optind], "r"); + if (input == NULL) + errx (EXIT_FAILURE, "error: can't open the input file"); + + int nb_line = 0; + char str[MAX_LEN]; + while (fgets (str, MAX_LEN, input) != NULL) + nb_line++; + rewind(input); + + while (fgets (str, MAX_LEN, input) != NULL) + { + if (str[0] == '\n') + break; + + size_t line_length = strlen (str); + char *exec_argv[line_length]; + char *token = strtok (str, " "); + int index = 0; + while (token != NULL) + { + size_t token_length = strlen(token); + if (token[token_length - 1] == '\n') + token[token_length - 1] = '\0'; /* Formating trick */ + exec_argv[index] = token; + index++; + token = strtok (NULL, " "); + } + exec_argv[index] = NULL; + int exec_argc = index; + + /* Perfom various checks on the executable file */ + arch_t exec_arch = check_execfile (exec_argv[0]); + + /* Display the traced command */ + fprintf (output, "%s: starting to trace '", program_name); + for (int i = 0; i < exec_argc - 1; i++) + { + fprintf (output, "%s ", exec_argv[i]); + } + fprintf (output, "%s'\n\n", exec_argv[exec_argc - 1]); + + /* Forking and tracing */ + pid_t child = fork (); + if (child == -1) + errx (EXIT_FAILURE, "error: fork failed!"); + + /* Initialized and start the child */ + if (child == 0) + { + /* Disabling ASLR */ + personality (ADDR_NO_RANDOMIZE); + + /* Start tracing the process */ + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) == -1) + errx (EXIT_FAILURE, + "error: cannot operate from inside a ptrace() call!"); + + /* Starting the traced executable */ + execve (exec_argv[0], exec_argv, envp); + } + + /* Parent process */ + int status; + byte_t buf[MAX_OPCODE_BYTES]; + uintptr_t ip; + struct user_regs_struct regs; + + /* Initializing Capstone disassembler */ + csh handle; + cs_insn *insn; + size_t count; + + cs_mode exec_mode = 0; + switch (exec_arch) + { + case x86_32_arch: + exec_mode = CS_MODE_32; + break; + + case x86_64_arch: + exec_mode = CS_MODE_64; + break; + + default: + errx (EXIT_FAILURE, + "error: '%s' unsupported architecture", exec_argv[0]); + } + + /* Initialize the assembly decoder */ + if (cs_open(CS_ARCH_X86, exec_mode, &handle) != CS_ERR_OK) + errx (EXIT_FAILURE, "error: cannot start capstone disassembler"); + + /* Set syntax flavor output */ + if (intel) + cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); + else + cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); + + /* Main disassembling loop */ + size_t instr_count = 0; + hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); + if (ht == NULL) + err (EXIT_FAILURE, "error: cannot create hashtable"); + + while (true) + { + /* Waiting for child process */ + wait(&status); + if (WIFEXITED(status)) + break; + + /* Get instruction pointer */ + ptrace(PTRACE_GETREGS, child, NULL, ®s); + + /* Printing instruction pointer */ + ip = get_current_ip (®s); + fprintf (output, "0x%" PRIxPTR " ", ip); + + /* Get the opcode from memory */ + for (size_t i = 0; i < MAX_OPCODE_BYTES; i += 8) + { + long *ptr = (long *) &(buf[i]); + *ptr = ptrace (PTRACE_PEEKDATA, child, ip + i, NULL); + } + + /* Get the mnemonic from decoder */ + count = cs_disasm(handle, &(buf[0]), MAX_OPCODE_BYTES, 0x1000, 0, &insn); + if (count > 0) + { + /* Display the bytes */ + for (size_t i = 0; i < insn[0].size; i++) + fprintf (output, " %02x", buf[i]); + + /* Pretty printing and formating */ + if (insn[0].size != 8 && insn[0].size != 11) + fprintf (output, "\t"); + + for (int i = 0; i < 4 - (insn[0].size / 3); i++) + fprintf (output, "\t"); + + /* Display mnemonic and operand */ + fprintf (output, "%s %s", insn[0].mnemonic, insn[0].op_str); + fprintf(output, "\n"); + + /* Create the instr_t structure */ + instr_t *instr = instr_new (ip, insn[0].size, buf); + if (!instr) + err (EXIT_FAILURE, "error:"); + + if (!hashtable_insert (ht, instr)) + instr_delete (instr); + + /* Updating counters */ + instr_count++; + } + + /* Continue to next instruction... */ + /* Note that, sometimes, ptrace(PTRACE_SINGLESTEP) returns '-1' + * to notify that the child process did not respond quick enough, + * we have to wait for ptrace() to return '0'. */ + while (ptrace(PTRACE_SINGLESTEP, child, NULL, NULL)); + } + + fprintf(output, + "\n" + "\tStatistics about this run\n" + "\t=========================\n" + "* #instructions executed: %zu\n" + "* #unique instructions: %zu\n" + "* #hashtable buckets: %zu\n" + "* #hashtable collisions: %zu\n\n\n", + instr_count, hashtable_entries (ht), + (size_t) DEFAULT_HASHTABLE_SIZE, hashtable_collisions (ht)); + + hashtable_delete (ht); + } return EXIT_SUCCESS; } From 0e01a2e5b06aaa10ce8f32da00d383b20d1ee73a Mon Sep 17 00:00:00 2001 From: mrglm Date: Tue, 14 Jan 2020 22:12:45 +0100 Subject: [PATCH 03/31] Tracker now creates a trace struct for each run --- src/tracker.c | 66 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/src/tracker.c b/src/tracker.c index cde1d14..c9bd35d 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -64,10 +63,10 @@ static arch_t check_execfile (char *execfilename) { struct stat exec_stats; - if (stat(execfilename, &exec_stats) == -1) + if (stat (execfilename, &exec_stats) == -1) err (EXIT_FAILURE, "error: '%s'", execfilename); - if (!S_ISREG(exec_stats.st_mode) || !(exec_stats.st_mode & S_IXUSR)) + if (!S_ISREG (exec_stats.st_mode) || !(exec_stats.st_mode & S_IXUSR)) errx (EXIT_FAILURE, "error: '%s' is not an executable file", execfilename); /* Check if given file is an executable and discover its architecture */ @@ -85,7 +84,7 @@ check_execfile (char *execfilename) errx (EXIT_FAILURE, "error: '%s' is not an ELF binary", execfilename); /* Extract executable architecture (byte at 0x12) */ - fseek(execfile, 0x12, SEEK_SET); + fseek (execfile, 0x12, SEEK_SET); if (fread (&buf, 1, 1, execfile) != 1) errx (EXIT_FAILURE, "error: cannot read '%s'", execfilename); @@ -105,7 +104,7 @@ check_execfile (char *execfilename) } /* Closing file after verifications */ - fclose(execfile); + fclose (execfile); return exec_arch; } @@ -167,7 +166,7 @@ main (int argc, char *argv[], char *envp[]) case 'o': /* Output file */ output = fopen (optarg, "we"); if (!output) - err (EXIT_FAILURE, "error: cannot open file '%s'", optarg); + err (EXIT_FAILURE, "error: cannot open file '%s'", optarg); break; case 'i': /* intel syntax mode */ @@ -185,8 +184,7 @@ main (int argc, char *argv[], char *envp[]) case 'V': /* Display version number and exit */ fprintf (stdout, "%s %s\n", program_name, VERSION); - fputs ("Trace the execution of a program on the given input\n", - stdout); + fputs ("Trace the execution of a program on the given input\n", stdout); exit (EXIT_SUCCESS); break; @@ -212,7 +210,9 @@ main (int argc, char *argv[], char *envp[]) char str[MAX_LEN]; while (fgets (str, MAX_LEN, input) != NULL) nb_line++; - rewind(input); + rewind (input); + trace_t *traces[nb_line]; + int index_trace = 0; while (fgets (str, MAX_LEN, input) != NULL) { @@ -225,7 +225,7 @@ main (int argc, char *argv[], char *envp[]) int index = 0; while (token != NULL) { - size_t token_length = strlen(token); + size_t token_length = strlen (token); if (token[token_length - 1] == '\n') token[token_length - 1] = '\0'; /* Formating trick */ exec_argv[index] = token; @@ -258,7 +258,7 @@ main (int argc, char *argv[], char *envp[]) personality (ADDR_NO_RANDOMIZE); /* Start tracing the process */ - if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) == -1) + if (ptrace (PTRACE_TRACEME, 0, NULL, NULL) == -1) errx (EXIT_FAILURE, "error: cannot operate from inside a ptrace() call!"); @@ -294,14 +294,14 @@ main (int argc, char *argv[], char *envp[]) } /* Initialize the assembly decoder */ - if (cs_open(CS_ARCH_X86, exec_mode, &handle) != CS_ERR_OK) + if (cs_open (CS_ARCH_X86, exec_mode, &handle) != CS_ERR_OK) errx (EXIT_FAILURE, "error: cannot start capstone disassembler"); /* Set syntax flavor output */ if (intel) - cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); + cs_option (handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); else - cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); + cs_option (handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); /* Main disassembling loop */ size_t instr_count = 0; @@ -309,15 +309,17 @@ main (int argc, char *argv[], char *envp[]) if (ht == NULL) err (EXIT_FAILURE, "error: cannot create hashtable"); + trace_t *t = NULL; + while (true) { /* Waiting for child process */ - wait(&status); - if (WIFEXITED(status)) + wait (&status); + if (WIFEXITED (status)) break; /* Get instruction pointer */ - ptrace(PTRACE_GETREGS, child, NULL, ®s); + ptrace (PTRACE_GETREGS, child, NULL, ®s); /* Printing instruction pointer */ ip = get_current_ip (®s); @@ -331,7 +333,7 @@ main (int argc, char *argv[], char *envp[]) } /* Get the mnemonic from decoder */ - count = cs_disasm(handle, &(buf[0]), MAX_OPCODE_BYTES, 0x1000, 0, &insn); + count = cs_disasm (handle, &(buf[0]), MAX_OPCODE_BYTES, 0x1000, 0, &insn); if (count > 0) { /* Display the bytes */ @@ -347,12 +349,29 @@ main (int argc, char *argv[], char *envp[]) /* Display mnemonic and operand */ fprintf (output, "%s %s", insn[0].mnemonic, insn[0].op_str); - fprintf(output, "\n"); + fprintf (output, "\n"); /* Create the instr_t structure */ instr_t *instr = instr_new (ip, insn[0].size, buf); if (!instr) - err (EXIT_FAILURE, "error:"); + err (EXIT_FAILURE, "error: cannot create instruction"); + + if (!t) + { + /* Create a new trace and store it */ + t = trace_new (hash_instr (instr)); + if (!t) + err (EXIT_FAILURE, "error: cannot create trace"); + traces[index_trace] = t; + } + else + { + /* Insert a new element in the trace and update t to hold + * the new tail */ + t = trace_insert (t, hash_instr (instr)); + if (!t) + err (EXIT_FAILURE, "error: cannot create trace"); + } if (!hashtable_insert (ht, instr)) instr_delete (instr); @@ -380,7 +399,14 @@ main (int argc, char *argv[], char *envp[]) (size_t) DEFAULT_HASHTABLE_SIZE, hashtable_collisions (ht)); hashtable_delete (ht); + + index_trace++; } + fclose(input); + + for (int i = 0; i < nb_line; i++) + trace_delete (traces[i]); + return EXIT_SUCCESS; } From a3f59e0b17c9044a466ff64210b319df3e5fa37e Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Wed, 15 Jan 2020 14:17:11 +0100 Subject: [PATCH 04/31] fixing the \n problem --- src/tracker.c | 351 +++++++++++++++++++++++++------------------------- 1 file changed, 177 insertions(+), 174 deletions(-) diff --git a/src/tracker.c b/src/tracker.c index c9bd35d..a60cded 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -209,198 +209,201 @@ main (int argc, char *argv[], char *envp[]) int nb_line = 0; char str[MAX_LEN]; while (fgets (str, MAX_LEN, input) != NULL) - nb_line++; + { + if (str[0] != '\n') + nb_line++; + } rewind (input); trace_t *traces[nb_line]; int index_trace = 0; while (fgets (str, MAX_LEN, input) != NULL) { - if (str[0] == '\n') - break; - - size_t line_length = strlen (str); - char *exec_argv[line_length]; - char *token = strtok (str, " "); - int index = 0; - while (token != NULL) + if (str[0] != '\n') { - size_t token_length = strlen (token); - if (token[token_length - 1] == '\n') - token[token_length - 1] = '\0'; /* Formating trick */ - exec_argv[index] = token; - index++; - token = strtok (NULL, " "); - } - exec_argv[index] = NULL; - int exec_argc = index; - - /* Perfom various checks on the executable file */ - arch_t exec_arch = check_execfile (exec_argv[0]); - - /* Display the traced command */ - fprintf (output, "%s: starting to trace '", program_name); - for (int i = 0; i < exec_argc - 1; i++) - { - fprintf (output, "%s ", exec_argv[i]); - } - fprintf (output, "%s'\n\n", exec_argv[exec_argc - 1]); - - /* Forking and tracing */ - pid_t child = fork (); - if (child == -1) - errx (EXIT_FAILURE, "error: fork failed!"); - - /* Initialized and start the child */ - if (child == 0) - { - /* Disabling ASLR */ - personality (ADDR_NO_RANDOMIZE); - - /* Start tracing the process */ - if (ptrace (PTRACE_TRACEME, 0, NULL, NULL) == -1) - errx (EXIT_FAILURE, - "error: cannot operate from inside a ptrace() call!"); - - /* Starting the traced executable */ - execve (exec_argv[0], exec_argv, envp); - } - - /* Parent process */ - int status; - byte_t buf[MAX_OPCODE_BYTES]; - uintptr_t ip; - struct user_regs_struct regs; - - /* Initializing Capstone disassembler */ - csh handle; - cs_insn *insn; - size_t count; - - cs_mode exec_mode = 0; - switch (exec_arch) - { - case x86_32_arch: - exec_mode = CS_MODE_32; - break; - - case x86_64_arch: - exec_mode = CS_MODE_64; - break; - - default: - errx (EXIT_FAILURE, - "error: '%s' unsupported architecture", exec_argv[0]); - } - - /* Initialize the assembly decoder */ - if (cs_open (CS_ARCH_X86, exec_mode, &handle) != CS_ERR_OK) - errx (EXIT_FAILURE, "error: cannot start capstone disassembler"); - - /* Set syntax flavor output */ - if (intel) - cs_option (handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); - else - cs_option (handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); - - /* Main disassembling loop */ - size_t instr_count = 0; - hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); - if (ht == NULL) - err (EXIT_FAILURE, "error: cannot create hashtable"); - - trace_t *t = NULL; - - while (true) - { - /* Waiting for child process */ - wait (&status); - if (WIFEXITED (status)) - break; - - /* Get instruction pointer */ - ptrace (PTRACE_GETREGS, child, NULL, ®s); - - /* Printing instruction pointer */ - ip = get_current_ip (®s); - fprintf (output, "0x%" PRIxPTR " ", ip); - - /* Get the opcode from memory */ - for (size_t i = 0; i < MAX_OPCODE_BYTES; i += 8) + size_t line_length = strlen (str); + char *exec_argv[line_length]; + char *token = strtok (str, " "); + int index = 0; + while (token != NULL) { - long *ptr = (long *) &(buf[i]); - *ptr = ptrace (PTRACE_PEEKDATA, child, ip + i, NULL); + size_t token_length = strlen (token); + if (token[token_length - 1] == '\n') + token[token_length - 1] = '\0'; /* Formating trick */ + exec_argv[index] = token; + index++; + token = strtok (NULL, " "); } + exec_argv[index] = NULL; + int exec_argc = index; + + /* Perfom various checks on the executable file */ + arch_t exec_arch = check_execfile (exec_argv[0]); + + /* Display the traced command */ + fprintf (output, "%s: starting to trace '", program_name); + for (int i = 0; i < exec_argc - 1; i++) + { + fprintf (output, "%s ", exec_argv[i]); + } + fprintf (output, "%s'\n\n", exec_argv[exec_argc - 1]); + + /* Forking and tracing */ + pid_t child = fork (); + if (child == -1) + errx (EXIT_FAILURE, "error: fork failed!"); + + /* Initialized and start the child */ + if (child == 0) + { + /* Disabling ASLR */ + personality (ADDR_NO_RANDOMIZE); + + /* Start tracing the process */ + if (ptrace (PTRACE_TRACEME, 0, NULL, NULL) == -1) + errx (EXIT_FAILURE, + "error: cannot operate from inside a ptrace() call!"); + + /* Starting the traced executable */ + execve (exec_argv[0], exec_argv, envp); + } + + /* Parent process */ + int status; + byte_t buf[MAX_OPCODE_BYTES]; + uintptr_t ip; + struct user_regs_struct regs; + + /* Initializing Capstone disassembler */ + csh handle; + cs_insn *insn; + size_t count; + + cs_mode exec_mode = 0; + switch (exec_arch) + { + case x86_32_arch: + exec_mode = CS_MODE_32; + break; + + case x86_64_arch: + exec_mode = CS_MODE_64; + break; + + default: + errx (EXIT_FAILURE, + "error: '%s' unsupported architecture", exec_argv[0]); + } + + /* Initialize the assembly decoder */ + if (cs_open (CS_ARCH_X86, exec_mode, &handle) != CS_ERR_OK) + errx (EXIT_FAILURE, "error: cannot start capstone disassembler"); + + /* Set syntax flavor output */ + if (intel) + cs_option (handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); + else + cs_option (handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); + + /* Main disassembling loop */ + size_t instr_count = 0; + hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); + if (ht == NULL) + err (EXIT_FAILURE, "error: cannot create hashtable"); + + trace_t *t = NULL; + + while (true) + { + /* Waiting for child process */ + wait (&status); + if (WIFEXITED (status)) + break; + + /* Get instruction pointer */ + ptrace (PTRACE_GETREGS, child, NULL, ®s); + + /* Printing instruction pointer */ + ip = get_current_ip (®s); + fprintf (output, "0x%" PRIxPTR " ", ip); + + /* Get the opcode from memory */ + for (size_t i = 0; i < MAX_OPCODE_BYTES; i += 8) + { + long *ptr = (long *) &(buf[i]); + *ptr = ptrace (PTRACE_PEEKDATA, child, ip + i, NULL); + } - /* Get the mnemonic from decoder */ - count = cs_disasm (handle, &(buf[0]), MAX_OPCODE_BYTES, 0x1000, 0, &insn); - if (count > 0) - { - /* Display the bytes */ - for (size_t i = 0; i < insn[0].size; i++) - fprintf (output, " %02x", buf[i]); + /* Get the mnemonic from decoder */ + count = cs_disasm (handle, &(buf[0]), MAX_OPCODE_BYTES, 0x1000, 0, &insn); + if (count > 0) + { + /* Display the bytes */ + for (size_t i = 0; i < insn[0].size; i++) + fprintf (output, " %02x", buf[i]); - /* Pretty printing and formating */ - if (insn[0].size != 8 && insn[0].size != 11) - fprintf (output, "\t"); + /* Pretty printing and formating */ + if (insn[0].size != 8 && insn[0].size != 11) + fprintf (output, "\t"); - for (int i = 0; i < 4 - (insn[0].size / 3); i++) - fprintf (output, "\t"); + for (int i = 0; i < 4 - (insn[0].size / 3); i++) + fprintf (output, "\t"); - /* Display mnemonic and operand */ - fprintf (output, "%s %s", insn[0].mnemonic, insn[0].op_str); - fprintf (output, "\n"); + /* Display mnemonic and operand */ + fprintf (output, "%s %s", insn[0].mnemonic, insn[0].op_str); + fprintf (output, "\n"); - /* Create the instr_t structure */ - instr_t *instr = instr_new (ip, insn[0].size, buf); - if (!instr) - err (EXIT_FAILURE, "error: cannot create instruction"); + /* Create the instr_t structure */ + instr_t *instr = instr_new (ip, insn[0].size, buf); + if (!instr) + err (EXIT_FAILURE, "error: cannot create instruction"); - if (!t) - { - /* Create a new trace and store it */ - t = trace_new (hash_instr (instr)); if (!t) - err (EXIT_FAILURE, "error: cannot create trace"); - traces[index_trace] = t; + { + /* Create a new trace and store it */ + t = trace_new (hash_instr (instr)); + if (!t) + err (EXIT_FAILURE, "error: cannot create trace"); + traces[index_trace] = t; + } + else + { + /* Insert a new element in the trace and update t to hold + * the new tail */ + t = trace_insert (t, hash_instr (instr)); + if (!t) + err (EXIT_FAILURE, "error: cannot create trace"); + } + + if (!hashtable_insert (ht, instr)) + instr_delete (instr); + + /* Updating counters */ + instr_count++; } - else - { - /* Insert a new element in the trace and update t to hold - * the new tail */ - t = trace_insert (t, hash_instr (instr)); - if (!t) - err (EXIT_FAILURE, "error: cannot create trace"); - } - - if (!hashtable_insert (ht, instr)) - instr_delete (instr); - /* Updating counters */ - instr_count++; - } - - /* Continue to next instruction... */ - /* Note that, sometimes, ptrace(PTRACE_SINGLESTEP) returns '-1' - * to notify that the child process did not respond quick enough, - * we have to wait for ptrace() to return '0'. */ - while (ptrace(PTRACE_SINGLESTEP, child, NULL, NULL)); - } - - fprintf(output, - "\n" - "\tStatistics about this run\n" - "\t=========================\n" - "* #instructions executed: %zu\n" - "* #unique instructions: %zu\n" - "* #hashtable buckets: %zu\n" - "* #hashtable collisions: %zu\n\n\n", - instr_count, hashtable_entries (ht), - (size_t) DEFAULT_HASHTABLE_SIZE, hashtable_collisions (ht)); - - hashtable_delete (ht); - - index_trace++; + /* Continue to next instruction... */ + /* Note that, sometimes, ptrace(PTRACE_SINGLESTEP) returns '-1' + * to notify that the child process did not respond quick enough, + * we have to wait for ptrace() to return '0'. */ + while (ptrace(PTRACE_SINGLESTEP, child, NULL, NULL)); + } + + fprintf(output, + "\n" + "\tStatistics about this run\n" + "\t=========================\n" + "* #instructions executed: %zu\n" + "* #unique instructions: %zu\n" + "* #hashtable buckets: %zu\n" + "* #hashtable collisions: %zu\n\n\n", + instr_count, hashtable_entries (ht), + (size_t) DEFAULT_HASHTABLE_SIZE, hashtable_collisions (ht)); + + hashtable_delete (ht); + + index_trace++; + } } fclose(input); From 6655a8528fd46201455bb3005ad4c0e216354be5 Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Mon, 20 Jan 2020 11:03:10 +0100 Subject: [PATCH 05/31] creation of CFG structures, some functions related and beginning of main/.text research --- src/trace.c | 23 +++++++- src/tracker.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 1 deletion(-) diff --git a/src/trace.c b/src/trace.c index 5310e05..9b8eebb 100644 --- a/src/trace.c +++ b/src/trace.c @@ -19,7 +19,7 @@ struct _instr_t { uintptr_t address; /* Address where lies the instruction */ // uintptr_t *next; /* List of addresses of the next instructions */ - // uint8_t type; /* Instr type: 0 = instr, 1 = branch, 2 = call, 3 = jmp */ + uint8_t type; /* Instr type: 0 = instr, 1 = branch, 2 = call, 3 = jmp */ uint8_t size; /* Opcode size */ uint8_t opcodes[]; /* Instruction opcode */ }; @@ -42,6 +42,17 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) instr->size = size; memcpy (instr->opcodes, opcodes, size); + if ((opcodes[0] >= 0x70 && opcodes[0] <= 0x7F) + || (opcodes[0] == 0x0F && opcodes[1] >= 0x80 && opcodes[1] <= 0x8F)) + instr->type = 1; + else if (opcodes[0] == 0xE8 || opcodes[0] == 0x9A + || (opcodes[0] == 0xFF && (size == 2 || size == 3))) + instr->type = 2; + else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) + || (opcodes[0] == 0xFF && (size == 4 || size == 5))) + instr->type = 3; + else + instr->type = 0; return instr; } @@ -302,3 +313,13 @@ trace_compare (trace_t *t1, trace_t *t2) } return tmp2; } + + +struct _cfg_t +{ + uint64_t index; + int type; + int nb_in; + int nb_out; + cfg_t *successor; +}; diff --git a/src/tracker.c b/src/tracker.c index a60cded..10d81d3 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -122,6 +122,148 @@ get_current_ip (struct user_regs_struct *regs) #endif } + +uint64_t +find_main (const char *execfilename) +{ + FILE *execfile = fopen (execfilename, "r"); + output = stdout; + if (!execfile) + err (EXIT_FAILURE, "error: '%s'", execfilename); + + int nb = 16384; + unsigned char buf0[nb]; + fread (&buf0, nb, 1, execfile); + /*for (int i = 0; i < nb; i++) + { + if (i!= 0 && i % 16 == 0) + printf("\n"); + printf("%#02x\t",buf0[i]); + } + printf("\n\n");*/ + + unsigned char buf[8]; + fseek (execfile, 0x28, SEEK_SET); + fread (&buf, 8, 1, execfile); + uint64_t shoff = 0; + for (int i = 7; i >= 0; i--) + { + shoff = shoff << 8; + shoff += buf[i]; + } +// printf("shoff = %" PRIu64 "\n", shoff); + + fseek (execfile, 0x3A, SEEK_SET); + fread (&buf, 2, 1, execfile); + uint64_t shentsize = 0; + for (int i = 1; i >= 0; i--) + { + shentsize = shentsize << 8; + shentsize += buf[i]; + } +// printf("shentsize = %" PRIu64 "\n", shentsize); + + fseek (execfile, 0x3C, SEEK_SET); + fread (&buf, 2, 1, execfile); + uint64_t shnum = 0; + for (int i = 1; i >= 0; i--) + { + shnum = shnum << 8; + shnum += buf[i]; + } +// printf("shnum = %" PRIu64 "\n", shnum); + + fseek (execfile, 0x3E, SEEK_SET); + fread (&buf, 2, 1, execfile); + uint64_t shstrndx = 0; + for (int i = 1; i >= 0; i--) + { + shstrndx = shstrndx << 8; + shstrndx += buf[i]; + } +// printf("shstrndx = %" PRIu64 "\n", shstrndx); + uint64_t truc = shoff + (shentsize * shstrndx) + 0x18; +// printf("truc = %" PRIu64 "\n", truc); + fseek(execfile, shoff + (shentsize * shstrndx) + 0x18, SEEK_SET); + fread (&buf0, nb, 1, execfile); + /*for (int i = 0; i < nb; i++) + { + if (i!= 0 && i % 16 == 0) + printf("\n"); + printf("%#02x\t",buf0[i]); + } + printf("\n");*/ + + fseek(execfile, shoff + (shentsize * shstrndx) + 0x18, SEEK_SET); + fread(&buf, 8, 1, execfile); + uint64_t shstrtab = 0; + for (int i = 1; i >= 0; i--) + { + shstrtab = shstrtab << 8; + shstrtab += buf[i]; + } +// printf("shstrtab = %" PRIu64 "\n", shstrtab); +/*__________________________________________________________________________*/ + + fseek(execfile, shstrtab, SEEK_SET); + fread(&buf0, 50, 1, execfile); + int count = 0; + for (int i = 0; i < 50; i++) + { + if (i!= 0 && i % 16 == 0) + printf("\n"); + printf("%#02x\t",buf0[i]); + } + + printf("\n\n"); + + fseek(execfile, shoff + (shentsize * 0x00), SEEK_SET); + fread(&buf, 4, 1, execfile); + uint64_t stuff = 0; + for (int i = 3; i >= 0; i--) + { + stuff = stuff << 8; + stuff += buf[i]; + } + //printf("stuff = %" PRIu64 "\n", stuff); + + fseek(execfile, shstrtab + stuff, SEEK_SET); + fread(&buf0, 16, 1, execfile); +for (int i = 0; i < 16; i++) + { + if (i!= 0 && i % 16 == 0) + printf("\n"); + printf("%#02x\t",buf0[i]); + } + printf("\n\n"); + + fseek(execfile, shoff + (shentsize * 0x00) + 0x18, SEEK_SET); + fread(&buf, 8, 1, execfile); + uint64_t offset = 0; + for (int i = 7; i >= 0; i--) + { + offset = offset << 8; + offset += buf[i]; + } + printf("offset = %" PRIu64 "\n", offset); + + fseek(execfile, offset, SEEK_SET); + fread(&buf0, nb, 1, execfile); + for (int i = 0; i < nb; i++) + { + if (buf0[i] == 0x6D) + printf("\n YES \n"); + if (i!= 0 && i % 16 == 0) + fprintf(output, "\n"); + fprintf(output, "%#02x\t",buf0[i]); + } + fprintf(output, "\n\n\n\n"); + + fclose(execfile); + return 0; +} + + int main (int argc, char *argv[], char *envp[]) { @@ -265,7 +407,9 @@ main (int argc, char *argv[], char *envp[]) "error: cannot operate from inside a ptrace() call!"); /* Starting the traced executable */ + // find_main(exec_argv[0]); execve (exec_argv[0], exec_argv, envp); + } /* Parent process */ From 0efb4284e4ae8f5656be9e67ba59dec274e75dfc Mon Sep 17 00:00:00 2001 From: Aurelien Plet Date: Mon, 20 Jan 2020 12:32:48 +0100 Subject: [PATCH 06/31] Change in the trace struct --- include/trace.h | 8 +- src/trace.c | 12 +-- src/tracker.c | 214 ++++++++++++++++-------------------------------- 3 files changed, 84 insertions(+), 150 deletions(-) diff --git a/include/trace.h b/include/trace.h index ff6b9c8..e8fc7dd 100644 --- a/include/trace.h +++ b/include/trace.h @@ -74,11 +74,11 @@ typedef struct _trace_t trace_t; /* Creates a trace and initialize the first element with hash_index Returns a pointer to the created trace, or NULL if an error occured */ -trace_t *trace_new (uint64_t hash_index); +trace_t *trace_new (instr_t *ins); /* Insert an element initialized with hash_index and insert it after t Returns a pointer to the created element or NULL if an error occured*/ -trace_t *trace_insert (trace_t *t, uint64_t hash_value); +trace_t *trace_insert (trace_t *t, instr_t *ins); /* Free every element in the trace t */ void trace_delete (trace_t *t); @@ -86,4 +86,8 @@ void trace_delete (trace_t *t); /* Returns a pointer to the first element where t2 differs from t1 */ trace_t *trace_compare (trace_t *t1, trace_t *t2); +/* ***** CFG ***** */ + +typedef struct _cfg_t cfg_t; + #endif /* _TRACE_H */ diff --git a/src/trace.c b/src/trace.c index 9b8eebb..4c49111 100644 --- a/src/trace.c +++ b/src/trace.c @@ -252,27 +252,27 @@ hashtable_collisions (hashtable_t *ht) struct _trace_t { - uint64_t h; /* Index for the hash value of the instruction */ + instr_t *instruction; /* Index for the hash value of the instruction */ trace_t *next; /* Pointer to the next value in the list */ }; trace_t * -trace_new (uint64_t hash_index) +trace_new (instr_t *ins) { trace_t *t = malloc (sizeof (trace_t)); if (!t) return NULL; - t->h = hash_index; + t->instruction = ins; t->next = NULL; return t; } trace_t * -trace_insert (trace_t *t, uint64_t hash_index) +trace_insert (trace_t *t, instr_t *ins) { if (!t) return NULL; - trace_t *new = trace_new (hash_index); + trace_t *new = trace_new (ins); if (!new) return NULL; if (t->next) @@ -302,7 +302,7 @@ trace_compare (trace_t *t1, trace_t *t2) { trace_t *tmp1 = t1; trace_t *tmp2 = t2; - while (tmp1->h == tmp2->h) + while (tmp1->instruction->address == tmp2->instruction->address) { tmp1 = tmp1->next; tmp2 = tmp2->next; diff --git a/src/tracker.c b/src/tracker.c index 10d81d3..69f6452 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -122,148 +122,79 @@ get_current_ip (struct user_regs_struct *regs) #endif } - -uint64_t -find_main (const char *execfilename) +void +get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_size) { - FILE *execfile = fopen (execfilename, "r"); - output = stdout; - if (!execfile) - err (EXIT_FAILURE, "error: '%s'", execfilename); - - int nb = 16384; - unsigned char buf0[nb]; - fread (&buf0, nb, 1, execfile); - /*for (int i = 0; i < nb; i++) - { - if (i!= 0 && i % 16 == 0) - printf("\n"); - printf("%#02x\t",buf0[i]); - } - printf("\n\n");*/ - - unsigned char buf[8]; - fseek (execfile, 0x28, SEEK_SET); - fread (&buf, 8, 1, execfile); - uint64_t shoff = 0; - for (int i = 7; i >= 0; i--) - { - shoff = shoff << 8; - shoff += buf[i]; - } -// printf("shoff = %" PRIu64 "\n", shoff); - - fseek (execfile, 0x3A, SEEK_SET); - fread (&buf, 2, 1, execfile); - uint64_t shentsize = 0; - for (int i = 1; i >= 0; i--) - { - shentsize = shentsize << 8; - shentsize += buf[i]; - } -// printf("shentsize = %" PRIu64 "\n", shentsize); - - fseek (execfile, 0x3C, SEEK_SET); - fread (&buf, 2, 1, execfile); - uint64_t shnum = 0; - for (int i = 1; i >= 0; i--) - { - shnum = shnum << 8; - shnum += buf[i]; - } -// printf("shnum = %" PRIu64 "\n", shnum); - - fseek (execfile, 0x3E, SEEK_SET); - fread (&buf, 2, 1, execfile); - uint64_t shstrndx = 0; - for (int i = 1; i >= 0; i--) - { - shstrndx = shstrndx << 8; - shstrndx += buf[i]; - } -// printf("shstrndx = %" PRIu64 "\n", shstrndx); - uint64_t truc = shoff + (shentsize * shstrndx) + 0x18; -// printf("truc = %" PRIu64 "\n", truc); - fseek(execfile, shoff + (shentsize * shstrndx) + 0x18, SEEK_SET); - fread (&buf0, nb, 1, execfile); - /*for (int i = 0; i < nb; i++) - { - if (i!= 0 && i % 16 == 0) - printf("\n"); - printf("%#02x\t",buf0[i]); - } - printf("\n");*/ - - fseek(execfile, shoff + (shentsize * shstrndx) + 0x18, SEEK_SET); - fread(&buf, 8, 1, execfile); - uint64_t shstrtab = 0; - for (int i = 1; i >= 0; i--) - { - shstrtab = shstrtab << 8; - shstrtab += buf[i]; - } -// printf("shstrtab = %" PRIu64 "\n", shstrtab); -/*__________________________________________________________________________*/ - - fseek(execfile, shstrtab, SEEK_SET); - fread(&buf0, 50, 1, execfile); - int count = 0; - for (int i = 0; i < 50; i++) - { - if (i!= 0 && i % 16 == 0) - printf("\n"); - printf("%#02x\t",buf0[i]); - } - - printf("\n\n"); - - fseek(execfile, shoff + (shentsize * 0x00), SEEK_SET); - fread(&buf, 4, 1, execfile); - uint64_t stuff = 0; - for (int i = 3; i >= 0; i--) - { - stuff = stuff << 8; - stuff += buf[i]; - } - //printf("stuff = %" PRIu64 "\n", stuff); - - fseek(execfile, shstrtab + stuff, SEEK_SET); - fread(&buf0, 16, 1, execfile); -for (int i = 0; i < 16; i++) - { - if (i!= 0 && i % 16 == 0) - printf("\n"); - printf("%#02x\t",buf0[i]); - } - printf("\n\n"); - - fseek(execfile, shoff + (shentsize * 0x00) + 0x18, SEEK_SET); - fread(&buf, 8, 1, execfile); - uint64_t offset = 0; - for (int i = 7; i >= 0; i--) - { - offset = offset << 8; - offset += buf[i]; - } - printf("offset = %" PRIu64 "\n", offset); - - fseek(execfile, offset, SEEK_SET); - fread(&buf0, nb, 1, execfile); - for (int i = 0; i < nb; i++) - { - if (buf0[i] == 0x6D) - printf("\n YES \n"); - if (i!= 0 && i % 16 == 0) - fprintf(output, "\n"); - fprintf(output, "%#02x\t",buf0[i]); - } - fprintf(output, "\n\n\n\n"); - - fclose(execfile); - return 0; + FILE *execfile = fopen (execfilename, "r"); + unsigned char buf[8]; + fseek (execfile, 0x28, SEEK_SET); + fread (&buf, 8, 1, execfile); + uint64_t e_shoff = 0; + for (int i = 7; i >= 0; i--) { + e_shoff = e_shoff << 8; + e_shoff += buf[i]; + } + fseek (execfile, 0x3a, SEEK_SET); + fread (&buf, 2, 1, execfile); + uint64_t e_shentsize = 0; + for (int i = 1; i >= 0; i--) { + e_shentsize = e_shentsize << 8; + e_shentsize += buf[i]; + } + fseek (execfile, 0x3c, SEEK_SET); + fread (&buf, 2, 1, execfile); + uint64_t e_shnum = 0; + for (int i = 1; i >= 0; i--) { + e_shnum = e_shnum << 8; + e_shnum += buf[i]; + } + fseek (execfile, 0x3e, SEEK_SET); + fread (&buf, 2, 1, execfile); + uint64_t e_shstrndx = 0; + for (int i = 1; i >= 0; i--) { + e_shstrndx = e_shstrndx << 8; + e_shstrndx += buf[i]; + } + fseek (execfile, e_shoff + (e_shentsize * e_shstrndx) + 0x18, SEEK_SET); + fread (&buf, 8, 1, execfile); + uint64_t shstrtab = 0; + for (int i = 7; i >= 0; i--) { + shstrtab = shstrtab << 8; + shstrtab += buf[i]; + } + uint64_t index = 0; + while (1) { + uint64_t var = 0; + fseek (execfile, e_shoff + (e_shentsize * index), SEEK_SET); + fread (&buf, 4, 1, execfile); + for (int i = 3; i >= 0; i--) { + var = var << 8; + var += buf[i]; + } + fseek (execfile, shstrtab + var, SEEK_SET); + fread (&buf, 5, 1, execfile); + if (buf[0] == '.' && buf[1] == 't' && buf[2] == 'e' && buf[3] == 'x' && buf[4] == 't') + break; + index++; + } + fseek (execfile, e_shoff + (e_shentsize * index) + 0x18, SEEK_SET); + fread (&buf, 8, 1, execfile); + *text_addr = 0; + for (int i = 7; i >= 0; i--) { + *text_addr = *text_addr << 8; + *text_addr += buf[i]; + } + fseek (execfile, e_shoff + (e_shentsize * index) + 0x20, SEEK_SET); + fread (&buf, 8, 1, execfile); + *text_size = 0; + for (int i = 7; i >= 0; i--) { + *text_size = *text_size << 8; + *text_size += buf[i]; + } + fclose (execfile); + return; } - int main (int argc, char *argv[], char *envp[]) { @@ -407,7 +338,6 @@ main (int argc, char *argv[], char *envp[]) "error: cannot operate from inside a ptrace() call!"); /* Starting the traced executable */ - // find_main(exec_argv[0]); execve (exec_argv[0], exec_argv, envp); } @@ -505,7 +435,7 @@ main (int argc, char *argv[], char *envp[]) if (!t) { /* Create a new trace and store it */ - t = trace_new (hash_instr (instr)); + t = trace_new (instr); if (!t) err (EXIT_FAILURE, "error: cannot create trace"); traces[index_trace] = t; @@ -514,7 +444,7 @@ main (int argc, char *argv[], char *envp[]) { /* Insert a new element in the trace and update t to hold * the new tail */ - t = trace_insert (t, hash_instr (instr)); + t = trace_insert (t, instr); if (!t) err (EXIT_FAILURE, "error: cannot create trace"); } From 8bf24f9dbf0f79033cd8265dac2bdee2a36da482 Mon Sep 17 00:00:00 2001 From: mrglm Date: Mon, 20 Jan 2020 17:25:31 +0100 Subject: [PATCH 07/31] Adding functions for the cfg struct --- include/trace.h | 6 +++ src/trace.c | 133 +++++++++++++++++++++++++++++++++++++++++------- src/tracker.c | 39 ++++++-------- 3 files changed, 137 insertions(+), 41 deletions(-) diff --git a/include/trace.h b/include/trace.h index e8fc7dd..755fc75 100644 --- a/include/trace.h +++ b/include/trace.h @@ -90,4 +90,10 @@ trace_t *trace_compare (trace_t *t1, trace_t *t2); typedef struct _cfg_t cfg_t; +cfg_t *cfg_new (instr_t *ins); + +cfg_t *cfg_insert (cfg_t *CFG, instr_t *ins); + + + #endif /* _TRACE_H */ diff --git a/src/trace.c b/src/trace.c index 4c49111..b31490a 100644 --- a/src/trace.c +++ b/src/trace.c @@ -87,7 +87,7 @@ struct _hashtable_t size_t size; /* Hashtable size */ size_t collisions; /* Number of collisions encountered */ size_t entries; /* Number of entries registered */ - instr_t ** buckets[]; /* Hachtable buckets */ + cfg_t ** buckets[]; /* Hachtable buckets */ }; /* Compression function for Merkle-Damgard construction */ @@ -152,7 +152,7 @@ hashtable_new (const size_t size) return NULL; } - hashtable_t *ht = malloc (sizeof (hashtable_t) + size * sizeof (instr_t *)); + hashtable_t *ht = malloc (sizeof (hashtable_t) + size * sizeof (cfg_t *)); if (!ht) return NULL; @@ -161,7 +161,7 @@ hashtable_new (const size_t size) ht->size = size; ht->collisions = 0; ht->entries = 0; - memset (ht->buckets, 0, size * sizeof (instr_t *)); + memset (ht->buckets, 0, size * sizeof (cfg_t *)); return ht; } @@ -170,14 +170,22 @@ void hashtable_delete (hashtable_t *ht) { for (size_t i = 0; i < ht->size; i++) - free (ht->buckets[i]); + { + size_t j = 0; + while (buckets[i][j] != NULL) + { + free (buckets[i][j]->instruction); + free (buckets[i][j]->successor); + free (ht->buckets[i][j]); + } + } free (ht); } #include bool -hashtable_insert (hashtable_t * ht, instr_t * instr) +hashtable_insert (hashtable_t * ht, cfg_t *CFG) { if (ht == NULL || instr == NULL) { @@ -185,15 +193,15 @@ hashtable_insert (hashtable_t * ht, instr_t * instr) return false; } - size_t index = hash_instr(instr) % ht->size; + size_t index = hash_instr (CFG->instruction) % ht->size; /* Bucket is empty */ if (ht->buckets[index] == NULL) { - ht->buckets[index] = calloc (2, sizeof (instr_t *)); + ht->buckets[index] = calloc (2, sizeof (cfg_t *)); if (ht->buckets[index] == NULL) - return false; - ht->buckets[index][0] = instr; + return false; + ht->buckets[index][0] = CFG; ht->entries++; return true; } @@ -201,16 +209,16 @@ hashtable_insert (hashtable_t * ht, instr_t * instr) /* Bucket isn't NULL, scanning all entries to see if instr is already here */ size_t k = 0; while (ht->buckets[index][k] != NULL) - if (ht->buckets[index][k++]->address == instr->address) + if (ht->buckets[index][k++]->instruction->address == CFG->instruction->address) return true; - instr_t **new_bucket = calloc (k + 2, sizeof (instr_t *)); + instr_t **new_bucket = calloc (k + 2, sizeof (cfg_t *)); if (!new_bucket) return false; ht->collisions++; ht->entries++; - memcpy (new_bucket, ht->buckets[index], k * sizeof (instr_t *)); - new_bucket[k] = instr; + memcpy (new_bucket, ht->buckets[index], k * sizeof (cfg_t *)); + new_bucket[k] = CFG; free (ht->buckets[index]); ht->buckets[index] = new_bucket; @@ -250,6 +258,9 @@ hashtable_collisions (hashtable_t *ht) return ht->collisions; } +/*****************************************/ + + struct _trace_t { instr_t *instruction; /* Index for the hash value of the instruction */ @@ -314,12 +325,98 @@ trace_compare (trace_t *t1, trace_t *t2) return tmp2; } +/********************************************************************/ struct _cfg_t { - uint64_t index; - int type; - int nb_in; - int nb_out; - cfg_t *successor; + instr_t *instruction; + uint16_t nb_in; + uint16_t nb_out; + cfg_t **successor; }; + +cfg_t * +cfg_new (instr_t *ins) +{ + cfg_t *CFG = malloc (sizeof (cfg_t)); + if (!CFG) + return NULL; + CFG->instruction = ins; + CFG->nb_in = 0; + CFG->nb_out = 0; + switch (ins->type) + { + case 0: + CFG->successor = calloc (1, sizeof (cfg_t *)); + if (!CFG->successor) + return NULL; + break; + case 1: + CFG->successor = calloc (2, sizeof (cfg_t *)); + if (!CFG->successor) + return NULL; + break; + case 3: + CFG->successor = calloc (2, sizeof (cfg_t *)); + if (!CFG->successor) + return NULL; + break; + } + + return CFG; +} + +static bool +is_power_2 (uint16_t n) +{ + if (n == 0) + return false; + while (n % 2 == 0) + { + if (n == 2) + return true; + n = n / 2; + } + return false; +} + +cfg_t * +cfg_insert (cfg_t *CFG, instr_t *ins) +{ + if (!CFG) + return NULL; + cfg_t *new = cfg_new (ins); + if (!new) + return NULL; + if (!CFG->successor[0]) + { + CFG->successor[0] = new; + CFG->nb_out++; + new->nb_in++; + } + else if (CFG->instruction->type == 1 || CFG->instruction->type == 3) + { + if (CFG->instruction->type == 1) + { + if (!CFG->successor[1]) + return NULL; + else + { + CFG->successor[1] = new; + CFG->nb_out++; + new->nb_in++; + } + } + if (CFG->instruction->type == 3) + { + if (is_power_2 (CFG->nb_out)) + CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); + if (!CFG->successor) + return NULL; + CFG->successor[CFG->nb_out] = new; + CFG->nb_out++; + new->nb_in++; + } + } + return new; +} diff --git a/src/tracker.c b/src/tracker.c index 69f6452..6d72c1c 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -163,7 +163,7 @@ get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_siz shstrtab += buf[i]; } uint64_t index = 0; - while (1) { + while (true) { uint64_t var = 0; fseek (execfile, e_shoff + (e_shentsize * index), SEEK_SET); fread (&buf, 4, 1, execfile); @@ -287,8 +287,11 @@ main (int argc, char *argv[], char *envp[]) nb_line++; } rewind (input); - trace_t *traces[nb_line]; - int index_trace = 0; + cfg_t *cfg = NULL; + cfg_t *cfg_entry = NULL; + hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); + if (ht == NULL) + err (EXIT_FAILURE, "error: cannot create hashtable"); while (fgets (str, MAX_LEN, input) != NULL) { @@ -381,10 +384,6 @@ main (int argc, char *argv[], char *envp[]) /* Main disassembling loop */ size_t instr_count = 0; - hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); - if (ht == NULL) - err (EXIT_FAILURE, "error: cannot create hashtable"); - trace_t *t = NULL; while (true) @@ -432,24 +431,24 @@ main (int argc, char *argv[], char *envp[]) if (!instr) err (EXIT_FAILURE, "error: cannot create instruction"); - if (!t) + if (!cfg) { /* Create a new trace and store it */ - t = trace_new (instr); - if (!t) - err (EXIT_FAILURE, "error: cannot create trace"); - traces[index_trace] = t; + cfg = cfg_new (instr); + if (!cfg) + err (EXIT_FAILURE, "error: cannot create a control flow graph"); + cfg_entry = cfg; } else { /* Insert a new element in the trace and update t to hold * the new tail */ - t = trace_insert (t, instr); - if (!t) - err (EXIT_FAILURE, "error: cannot create trace"); + cfg = cfg_insert (cfg, instr); + if (!cfg) + err (EXIT_FAILURE, "error: cannot create a control flow graph"); } - if (!hashtable_insert (ht, instr)) + if (!hashtable_insert (ht, cfg)) instr_delete (instr); /* Updating counters */ @@ -474,16 +473,10 @@ main (int argc, char *argv[], char *envp[]) instr_count, hashtable_entries (ht), (size_t) DEFAULT_HASHTABLE_SIZE, hashtable_collisions (ht)); - hashtable_delete (ht); - - index_trace++; } } fclose(input); - - for (int i = 0; i < nb_line; i++) - trace_delete (traces[i]); - + hashtable_delete (ht); return EXIT_SUCCESS; } From 74b22f8bddc4bd1e97be2a44810014a49efd9700 Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Tue, 21 Jan 2020 11:41:31 +0100 Subject: [PATCH 08/31] segfault --- include/trace.h | 6 +++-- src/trace.c | 68 ++++++++++++++++++++++++++++++++++++------------- src/tracker.c | 18 ++++++++----- 3 files changed, 66 insertions(+), 26 deletions(-) diff --git a/include/trace.h b/include/trace.h index 755fc75..4950567 100644 --- a/include/trace.h +++ b/include/trace.h @@ -26,6 +26,8 @@ typedef uint8_t byte_t; typedef struct _instr_t instr_t; +typedef struct _cfg_t cfg_t; + /* Return a new instr_t struct, NULL otherwise (and set errno) */ instr_t *instr_new (const uintptr_t addr, const uint8_t size, @@ -57,7 +59,7 @@ hashtable_t *hashtable_new (const size_t size); void hashtable_delete (hashtable_t *ht); /* Insert the instruction in the hashtable */ -bool hashtable_insert (hashtable_t * ht, instr_t * instr); +bool hashtable_insert (hashtable_t * ht, cfg_t *cfg); /* Look-up if current instruction is already in the hashtable */ bool hashtable_lookup (hashtable_t *ht, instr_t *instr); @@ -88,7 +90,7 @@ trace_t *trace_compare (trace_t *t1, trace_t *t2); /* ***** CFG ***** */ -typedef struct _cfg_t cfg_t; + cfg_t *cfg_new (instr_t *ins); diff --git a/src/trace.c b/src/trace.c index b31490a..71668d6 100644 --- a/src/trace.c +++ b/src/trace.c @@ -14,6 +14,7 @@ #include #include +#include struct _instr_t { @@ -53,6 +54,7 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) instr->type = 3; else instr->type = 0; + //printf("type %d \n", instr->type); return instr; } @@ -90,6 +92,15 @@ struct _hashtable_t cfg_t ** buckets[]; /* Hachtable buckets */ }; + +struct _cfg_t +{ + instr_t *instruction; + uint16_t nb_in; + uint16_t nb_out; + cfg_t **successor; +}; + /* Compression function for Merkle-Damgard construction */ #define mix(h) ({ \ (h) ^= (h) >> 23; \ @@ -172,12 +183,18 @@ hashtable_delete (hashtable_t *ht) for (size_t i = 0; i < ht->size; i++) { size_t j = 0; - while (buckets[i][j] != NULL) + printf("i,j = %zu, %zu\n",i,j); + if (ht->buckets[i]) + printf("!!!\n"); + while (ht->buckets[i][j] != NULL) { - free (buckets[i][j]->instruction); - free (buckets[i][j]->successor); + printf("rentré %zu fois !", j); + free (ht->buckets[i][j]->instruction); + free (ht->buckets[i][j]->successor); free (ht->buckets[i][j]); + j++; } + free (ht->buckets[i]); } free (ht); } @@ -187,7 +204,7 @@ hashtable_delete (hashtable_t *ht) bool hashtable_insert (hashtable_t * ht, cfg_t *CFG) { - if (ht == NULL || instr == NULL) + if (ht == NULL || CFG->instruction == NULL) { errno = EINVAL; return false; @@ -198,9 +215,13 @@ hashtable_insert (hashtable_t * ht, cfg_t *CFG) /* Bucket is empty */ if (ht->buckets[index] == NULL) { + printf("création \n"); ht->buckets[index] = calloc (2, sizeof (cfg_t *)); if (ht->buckets[index] == NULL) + { + printf("coucou 1 \n"); return false; + } ht->buckets[index][0] = CFG; ht->entries++; return true; @@ -210,12 +231,18 @@ hashtable_insert (hashtable_t * ht, cfg_t *CFG) size_t k = 0; while (ht->buckets[index][k] != NULL) if (ht->buckets[index][k++]->instruction->address == CFG->instruction->address) - return true; - - instr_t **new_bucket = calloc (k + 2, sizeof (cfg_t *)); + { + return true; /* No error but we need to delete the redundant one */ + } + cfg_t **new_bucket = calloc (k + 2, sizeof (cfg_t *)); if (!new_bucket) - return false; + { + printf("coucou 3 \n"); + return false; + } + ht->collisions++; + printf("collisions \n"); ht->entries++; memcpy (new_bucket, ht->buckets[index], k * sizeof (cfg_t *)); new_bucket[k] = CFG; @@ -240,7 +267,7 @@ hashtable_lookup (hashtable_t *ht, instr_t *instr) /* Bucket is not empty, scanning all entries to see if instr is here */ size_t k = 0; while (ht->buckets[index][k] != NULL) - if (ht->buckets[index][k++]->address == instr->address) + if (ht->buckets[index][k++]->instruction->address == instr->address) return true; return false; @@ -327,13 +354,6 @@ trace_compare (trace_t *t1, trace_t *t2) /********************************************************************/ -struct _cfg_t -{ - instr_t *instruction; - uint16_t nb_in; - uint16_t nb_out; - cfg_t **successor; -}; cfg_t * cfg_new (instr_t *ins) @@ -348,21 +368,35 @@ cfg_new (instr_t *ins) { case 0: CFG->successor = calloc (1, sizeof (cfg_t *)); + CFG->successor[0] = calloc (1, sizeof (cfg_t)); + CFG->successor[0] = NULL; if (!CFG->successor) return NULL; break; case 1: CFG->successor = calloc (2, sizeof (cfg_t *)); + for (int i = 0; i < 2; i++) + CFG->successor[i] = calloc (1, sizeof (cfg_t)); + CFG->successor[0] = NULL; + if (!CFG->successor) + return NULL; + break; + case 2: + CFG->successor = calloc (1, sizeof (cfg_t *)); + CFG->successor[0] = calloc (1, sizeof (cfg_t)); + CFG->successor[0] = NULL; if (!CFG->successor) return NULL; break; case 3: CFG->successor = calloc (2, sizeof (cfg_t *)); + for (int i = 0; i < 2; i++) + CFG->successor[i] = calloc (1, sizeof (cfg_t)); + CFG->successor[0] = NULL; if (!CFG->successor) return NULL; break; } - return CFG; } diff --git a/src/tracker.c b/src/tracker.c index 6d72c1c..d6ddd35 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -384,7 +384,7 @@ main (int argc, char *argv[], char *envp[]) /* Main disassembling loop */ size_t instr_count = 0; - trace_t *t = NULL; + while (true) { @@ -430,26 +430,30 @@ main (int argc, char *argv[], char *envp[]) instr_t *instr = instr_new (ip, insn[0].size, buf); if (!instr) err (EXIT_FAILURE, "error: cannot create instruction"); + cs_free (insn, count); if (!cfg) - { + { /* Create a new trace and store it */ + printf("Premier noeud \n"); cfg = cfg_new (instr); if (!cfg) - err (EXIT_FAILURE, "error: cannot create a control flow graph"); - cfg_entry = cfg; + err (EXIT_FAILURE, "error: cannot create a control flow graph"); + cfg_entry = cfg; } else { - /* Insert a new element in the trace and update t to hold - * the new tail */ + /* Insert a new element in the cfg and update cfg to hold + * the new node */ cfg = cfg_insert (cfg, instr); if (!cfg) err (EXIT_FAILURE, "error: cannot create a control flow graph"); } if (!hashtable_insert (ht, cfg)) + { instr_delete (instr); + } /* Updating counters */ instr_count++; @@ -461,7 +465,7 @@ main (int argc, char *argv[], char *envp[]) * we have to wait for ptrace() to return '0'. */ while (ptrace(PTRACE_SINGLESTEP, child, NULL, NULL)); } - + cs_close (&handle); fprintf(output, "\n" "\tStatistics about this run\n" From 52dab9623141ca9b46556f48898bab9374f645a6 Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Tue, 21 Jan 2020 17:25:45 +0100 Subject: [PATCH 09/31] we broke it :( --- include/trace.h | 8 ++- src/trace.c | 168 +++++++++++++++++++++++++++++++++++------------- src/tracker.c | 17 ++++- 3 files changed, 145 insertions(+), 48 deletions(-) diff --git a/include/trace.h b/include/trace.h index 4950567..35b6c58 100644 --- a/include/trace.h +++ b/include/trace.h @@ -62,7 +62,7 @@ void hashtable_delete (hashtable_t *ht); bool hashtable_insert (hashtable_t * ht, cfg_t *cfg); /* Look-up if current instruction is already in the hashtable */ -bool hashtable_lookup (hashtable_t *ht, instr_t *instr); +cfg_t *hashtable_lookup (hashtable_t *ht, instr_t *instr); /* Count the number of entries in the hashtable */ size_t hashtable_entries (hashtable_t *ht); @@ -94,7 +94,11 @@ trace_t *trace_compare (trace_t *t1, trace_t *t2); cfg_t *cfg_new (instr_t *ins); -cfg_t *cfg_insert (cfg_t *CFG, instr_t *ins); +cfg_t *aux_cfg_insert (cfg_t *CFG, cfg_t *new); + +cfg_t *cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins); + +void cfg_delete (cfg_t *CFG); diff --git a/src/trace.c b/src/trace.c index 71668d6..fa18393 100644 --- a/src/trace.c +++ b/src/trace.c @@ -54,7 +54,6 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) instr->type = 3; else instr->type = 0; - //printf("type %d \n", instr->type); return instr; } @@ -183,18 +182,15 @@ hashtable_delete (hashtable_t *ht) for (size_t i = 0; i < ht->size; i++) { size_t j = 0; - printf("i,j = %zu, %zu\n",i,j); if (ht->buckets[i]) - printf("!!!\n"); - while (ht->buckets[i][j] != NULL) { - printf("rentré %zu fois !", j); - free (ht->buckets[i][j]->instruction); - free (ht->buckets[i][j]->successor); - free (ht->buckets[i][j]); - j++; + while (ht->buckets[i][j] != NULL) + { + cfg_delete (ht->buckets[i][j]); + j++; + } + free (ht->buckets[i]); } - free (ht->buckets[i]); } free (ht); } @@ -215,13 +211,9 @@ hashtable_insert (hashtable_t * ht, cfg_t *CFG) /* Bucket is empty */ if (ht->buckets[index] == NULL) { - printf("création \n"); ht->buckets[index] = calloc (2, sizeof (cfg_t *)); if (ht->buckets[index] == NULL) - { - printf("coucou 1 \n"); return false; - } ht->buckets[index][0] = CFG; ht->entries++; return true; @@ -230,19 +222,16 @@ hashtable_insert (hashtable_t * ht, cfg_t *CFG) /* Bucket isn't NULL, scanning all entries to see if instr is already here */ size_t k = 0; while (ht->buckets[index][k] != NULL) - if (ht->buckets[index][k++]->instruction->address == CFG->instruction->address) - { + { + if (ht->buckets[index][k]->instruction->address == CFG->instruction->address) return true; /* No error but we need to delete the redundant one */ - } + k++; + } cfg_t **new_bucket = calloc (k + 2, sizeof (cfg_t *)); if (!new_bucket) - { - printf("coucou 3 \n"); - return false; - } + return false; ht->collisions++; - printf("collisions \n"); ht->entries++; memcpy (new_bucket, ht->buckets[index], k * sizeof (cfg_t *)); new_bucket[k] = CFG; @@ -252,25 +241,27 @@ hashtable_insert (hashtable_t * ht, cfg_t *CFG) return true; } -bool +cfg_t * hashtable_lookup (hashtable_t *ht, instr_t *instr) { if (!ht) - return false; + return NULL; size_t index = hash_instr (instr) % ht->size; /* Bucket is empty */ if (ht->buckets[index] == NULL) - return false; + return NULL; /* Bucket is not empty, scanning all entries to see if instr is here */ size_t k = 0; while (ht->buckets[index][k] != NULL) - if (ht->buckets[index][k++]->instruction->address == instr->address) - return true; - - return false; + { + if (ht->buckets[index][k]->instruction->address == instr->address) + return ht->buckets[index][k]; + k++; + } + return NULL; } size_t @@ -368,33 +359,75 @@ cfg_new (instr_t *ins) { case 0: CFG->successor = calloc (1, sizeof (cfg_t *)); - CFG->successor[0] = calloc (1, sizeof (cfg_t)); - CFG->successor[0] = NULL; if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->successor[0] = calloc (1, sizeof (cfg_t)); + if (!CFG->successor[0]) + { + cfg_delete (CFG); return NULL; + } + CFG->successor[0] = NULL; break; case 1: CFG->successor = calloc (2, sizeof (cfg_t *)); - for (int i = 0; i < 2; i++) - CFG->successor[i] = calloc (1, sizeof (cfg_t)); - CFG->successor[0] = NULL; if (!CFG->successor) + { + cfg_delete (CFG); return NULL; + } + for (int i = 0; i < 2; i++) + { + CFG->successor[i] = calloc (1, sizeof (cfg_t)); + if (!CFG->successor[i]) + { + cfg_delete (CFG); + return NULL; + } + } + CFG->successor[0] = NULL; break; case 2: CFG->successor = calloc (1, sizeof (cfg_t *)); + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } CFG->successor[0] = calloc (1, sizeof (cfg_t)); + if (!CFG->successor[0]) + { + cfg_delete (CFG); + return NULL; + } CFG->successor[0] = NULL; + break; if (!CFG->successor) + { + cfg_delete (CFG); return NULL; + } break; case 3: CFG->successor = calloc (2, sizeof (cfg_t *)); - for (int i = 0; i < 2; i++) - CFG->successor[i] = calloc (1, sizeof (cfg_t)); - CFG->successor[0] = NULL; if (!CFG->successor) + { + cfg_delete (CFG); return NULL; + } + for (int i = 0; i < 2; i++) + { + CFG->successor[i] = calloc (1, sizeof (cfg_t)); + if (!CFG->successor[i]) + { + cfg_delete (CFG); + return NULL; + } + } + CFG->successor[0] = NULL; break; } return CFG; @@ -415,11 +448,8 @@ is_power_2 (uint16_t n) } cfg_t * -cfg_insert (cfg_t *CFG, instr_t *ins) +aux_cfg_insert (cfg_t *CFG, cfg_t *new) { - if (!CFG) - return NULL; - cfg_t *new = cfg_new (ins); if (!new) return NULL; if (!CFG->successor[0]) @@ -427,7 +457,7 @@ cfg_insert (cfg_t *CFG, instr_t *ins) CFG->successor[0] = new; CFG->nb_out++; new->nb_in++; - } + } else if (CFG->instruction->type == 1 || CFG->instruction->type == 3) { if (CFG->instruction->type == 1) @@ -446,11 +476,63 @@ cfg_insert (cfg_t *CFG, instr_t *ins) if (is_power_2 (CFG->nb_out)) CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); if (!CFG->successor) + { + cfg_delete (CFG); return NULL; + } CFG->successor[CFG->nb_out] = new; CFG->nb_out++; new->nb_in++; } } - return new; + return new; +} + +cfg_t * +cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins) +{ + if (!CFG) + return NULL; + cfg_t *new = hashtable_lookup (ht, ins); + if (!new) + { + new = cfg_new (ins); + return aux_cfg_insert(CFG, new); + } +else + { + + for (size_t i = 0; i < CFG->nb_out; i++) + { + if (CFG->successor[i]->instruction->address + == new->instruction->address) + return new; + } + return aux_cfg_insert(CFG, new); + } +} + + +void +cfg_delete (cfg_t *CFG) +{ + if (CFG) + { + if (CFG->instruction) + { + instr_delete (CFG->instruction); + } + if (CFG->successor) + { + size_t i = 0; + while (CFG->successor[i]) + { + free (CFG->successor[i]); + i++; + } + free (CFG->successor); + } + free (CFG); + } + return; } diff --git a/src/tracker.c b/src/tracker.c index d6ddd35..680dbb9 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -429,25 +429,35 @@ main (int argc, char *argv[], char *envp[]) /* Create the instr_t structure */ instr_t *instr = instr_new (ip, insn[0].size, buf); if (!instr) + { + hashtable_delete (ht); err (EXIT_FAILURE, "error: cannot create instruction"); + } cs_free (insn, count); if (!cfg) { /* Create a new trace and store it */ - printf("Premier noeud \n"); cfg = cfg_new (instr); if (!cfg) + { + cfg_delete (cfg); + hashtable_delete (ht); err (EXIT_FAILURE, "error: cannot create a control flow graph"); + } cfg_entry = cfg; } else { /* Insert a new element in the cfg and update cfg to hold * the new node */ - cfg = cfg_insert (cfg, instr); + cfg = cfg_insert (ht, cfg, instr); if (!cfg) + { + cfg_delete (cfg); + hashtable_delete (ht); err (EXIT_FAILURE, "error: cannot create a control flow graph"); + } } if (!hashtable_insert (ht, cfg)) @@ -480,7 +490,8 @@ main (int argc, char *argv[], char *envp[]) } } - fclose(input); + fclose (input); + fclose (output); hashtable_delete (ht); return EXIT_SUCCESS; } From b3a9a177a0ce4b826581c5a6c5141dd8e51c6eff Mon Sep 17 00:00:00 2001 From: mrglm Date: Wed, 22 Jan 2020 00:33:33 +0100 Subject: [PATCH 10/31] Fixing double free error and (some) memory leaks --- src/trace.c | 13 +++---------- src/tracker.c | 5 ----- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/trace.c b/src/trace.c index fa18393..c50cf18 100644 --- a/src/trace.c +++ b/src/trace.c @@ -497,11 +497,12 @@ cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins) if (!new) { new = cfg_new (ins); + hashtable_insert (ht, new); return aux_cfg_insert(CFG, new); } else { - + instr_delete (ins); for (size_t i = 0; i < CFG->nb_out; i++) { if (CFG->successor[i]->instruction->address @@ -523,15 +524,7 @@ cfg_delete (cfg_t *CFG) instr_delete (CFG->instruction); } if (CFG->successor) - { - size_t i = 0; - while (CFG->successor[i]) - { - free (CFG->successor[i]); - i++; - } - free (CFG->successor); - } + free (CFG->successor); free (CFG); } return; diff --git a/src/tracker.c b/src/tracker.c index 680dbb9..3ba4a2d 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -460,11 +460,6 @@ main (int argc, char *argv[], char *envp[]) } } - if (!hashtable_insert (ht, cfg)) - { - instr_delete (instr); - } - /* Updating counters */ instr_count++; } From 89a1040224e93055f22902ae3c5d3f2a3ecebe27 Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Wed, 22 Jan 2020 10:43:53 +0100 Subject: [PATCH 11/31] all the memory leaks are fixed ! --- include/trace.h | 2 +- src/trace.c | 54 +++++++++---------------------------------------- src/tracker.c | 2 +- 3 files changed, 12 insertions(+), 46 deletions(-) diff --git a/include/trace.h b/include/trace.h index 35b6c58..7580977 100644 --- a/include/trace.h +++ b/include/trace.h @@ -92,7 +92,7 @@ trace_t *trace_compare (trace_t *t1, trace_t *t2); -cfg_t *cfg_new (instr_t *ins); +cfg_t *cfg_new (hashtable_t *ht, instr_t *ins); cfg_t *aux_cfg_insert (cfg_t *CFG, cfg_t *new); diff --git a/src/trace.c b/src/trace.c index c50cf18..eac61bb 100644 --- a/src/trace.c +++ b/src/trace.c @@ -347,7 +347,7 @@ trace_compare (trace_t *t1, trace_t *t2) cfg_t * -cfg_new (instr_t *ins) +cfg_new (hashtable_t *ht, instr_t *ins) { cfg_t *CFG = malloc (sizeof (cfg_t)); if (!CFG) @@ -364,13 +364,6 @@ cfg_new (instr_t *ins) cfg_delete (CFG); return NULL; } - CFG->successor[0] = calloc (1, sizeof (cfg_t)); - if (!CFG->successor[0]) - { - cfg_delete (CFG); - return NULL; - } - CFG->successor[0] = NULL; break; case 1: CFG->successor = calloc (2, sizeof (cfg_t *)); @@ -379,16 +372,6 @@ cfg_new (instr_t *ins) cfg_delete (CFG); return NULL; } - for (int i = 0; i < 2; i++) - { - CFG->successor[i] = calloc (1, sizeof (cfg_t)); - if (!CFG->successor[i]) - { - cfg_delete (CFG); - return NULL; - } - } - CFG->successor[0] = NULL; break; case 2: CFG->successor = calloc (1, sizeof (cfg_t *)); @@ -397,19 +380,6 @@ cfg_new (instr_t *ins) cfg_delete (CFG); return NULL; } - CFG->successor[0] = calloc (1, sizeof (cfg_t)); - if (!CFG->successor[0]) - { - cfg_delete (CFG); - return NULL; - } - CFG->successor[0] = NULL; - break; - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } break; case 3: CFG->successor = calloc (2, sizeof (cfg_t *)); @@ -418,18 +388,9 @@ cfg_new (instr_t *ins) cfg_delete (CFG); return NULL; } - for (int i = 0; i < 2; i++) - { - CFG->successor[i] = calloc (1, sizeof (cfg_t)); - if (!CFG->successor[i]) - { - cfg_delete (CFG); - return NULL; - } - } - CFG->successor[0] = NULL; break; } + hashtable_insert (ht, CFG); return CFG; } @@ -462,10 +423,16 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) { if (CFG->instruction->type == 1) { - if (!CFG->successor[1]) + if (CFG->nb_out == 2) return NULL; else { + CFG->successor = realloc (CFG->successor, 2 * sizeof (cfg_t *)); + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } CFG->successor[1] = new; CFG->nb_out++; new->nb_in++; @@ -496,8 +463,7 @@ cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins) cfg_t *new = hashtable_lookup (ht, ins); if (!new) { - new = cfg_new (ins); - hashtable_insert (ht, new); + new = cfg_new (ht, ins); return aux_cfg_insert(CFG, new); } else diff --git a/src/tracker.c b/src/tracker.c index 3ba4a2d..73f9e70 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -438,7 +438,7 @@ main (int argc, char *argv[], char *envp[]) if (!cfg) { /* Create a new trace and store it */ - cfg = cfg_new (instr); + cfg = cfg_new (ht, instr); if (!cfg) { cfg_delete (cfg); From ee746c87c60687c4a0b65f72262b6b49975efe84 Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Wed, 22 Jan 2020 11:09:05 +0100 Subject: [PATCH 12/31] handling errors --- src/tracker.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/tracker.c b/src/tracker.c index 73f9e70..bf4a874 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -431,6 +431,10 @@ main (int argc, char *argv[], char *envp[]) if (!instr) { hashtable_delete (ht); + cs_free (insn, count); + cs_close (&handle); + fclose (input); + fclose (output); err (EXIT_FAILURE, "error: cannot create instruction"); } cs_free (insn, count); @@ -441,8 +445,10 @@ main (int argc, char *argv[], char *envp[]) cfg = cfg_new (ht, instr); if (!cfg) { - cfg_delete (cfg); hashtable_delete (ht); + cs_close (&handle); + fclose (input); + fclose (output); err (EXIT_FAILURE, "error: cannot create a control flow graph"); } cfg_entry = cfg; @@ -454,8 +460,10 @@ main (int argc, char *argv[], char *envp[]) cfg = cfg_insert (ht, cfg, instr); if (!cfg) { - cfg_delete (cfg); hashtable_delete (ht); + cs_close (&handle); + fclose (input); + fclose (output); err (EXIT_FAILURE, "error: cannot create a control flow graph"); } } From 4ed6f40f47708cf6653059bf3ec4e73c8aebfa37 Mon Sep 17 00:00:00 2001 From: mrglm Date: Wed, 22 Jan 2020 17:36:05 +0100 Subject: [PATCH 13/31] we broke it again :( function localisation is causing a segfault --- src/trace.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++----- src/tracker.c | 1 + 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/trace.c b/src/trace.c index eac61bb..5e95333 100644 --- a/src/trace.c +++ b/src/trace.c @@ -20,7 +20,7 @@ struct _instr_t { uintptr_t address; /* Address where lies the instruction */ // uintptr_t *next; /* List of addresses of the next instructions */ - uint8_t type; /* Instr type: 0 = instr, 1 = branch, 2 = call, 3 = jmp */ + uint8_t type; /* Instr type: 0 = instr, 1 = branch, 2 = call, 3 = jmp, 4 = ret */ uint8_t size; /* Opcode size */ uint8_t opcodes[]; /* Instruction opcode */ }; @@ -52,6 +52,9 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) || (opcodes[0] == 0xFF && (size == 4 || size == 5))) instr->type = 3; + else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) || + ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3)) + instr->type = 4; else instr->type = 0; return instr; @@ -97,9 +100,14 @@ struct _cfg_t instr_t *instruction; uint16_t nb_in; uint16_t nb_out; + uint16_t name; cfg_t **successor; }; +uint16_t depth = 0; +uint16_t nb_name = 0; +cfg_t *stack[256] = {NULL}; + /* Compression function for Merkle-Damgard construction */ #define mix(h) ({ \ (h) ^= (h) >> 23; \ @@ -355,6 +363,8 @@ cfg_new (hashtable_t *ht, instr_t *ins) CFG->instruction = ins; CFG->nb_in = 0; CFG->nb_out = 0; + if (nb_name == 0) + CFG->name = 0; switch (ins->type) { case 0: @@ -374,7 +384,7 @@ cfg_new (hashtable_t *ht, instr_t *ins) } break; case 2: - CFG->successor = calloc (1, sizeof (cfg_t *)); + CFG->successor = calloc (2, sizeof (cfg_t *)); if (!CFG->successor) { cfg_delete (CFG); @@ -389,6 +399,9 @@ cfg_new (hashtable_t *ht, instr_t *ins) return NULL; } break; + case 4: + CFG->successor = NULL; + break; } hashtable_insert (ht, CFG); return CFG; @@ -413,13 +426,15 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) { if (!new) return NULL; - if (!CFG->successor[0]) + if (CFG->instruction->type != 4 && !CFG->successor[0]) { CFG->successor[0] = new; CFG->nb_out++; new->nb_in++; + new->name = CFG->name; } - else if (CFG->instruction->type == 1 || CFG->instruction->type == 3) + else if (CFG->instruction->type == 1 || CFG->instruction->type == 3 + || CFG->instruction->type == 4) { if (CFG->instruction->type == 1) { @@ -427,7 +442,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) return NULL; else { - CFG->successor = realloc (CFG->successor, 2 * sizeof (cfg_t *)); + // CFG->successor = realloc (CFG->successor, 2 * sizeof (cfg_t *)); if (!CFG->successor) { cfg_delete (CFG); @@ -436,8 +451,29 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) CFG->successor[1] = new; CFG->nb_out++; new->nb_in++; + new->name = CFG->name; } } + if (CFG->instruction->type == 4) + { + depth--; + CFG = stack[depth]; + stack[depth] = NULL; + + + if (is_power_2 (CFG->nb_out)) + CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->successor[1] = new; + CFG->nb_out++; + new->nb_in++; + new->name = CFG->name; + + } if (CFG->instruction->type == 3) { if (is_power_2 (CFG->nb_out)) @@ -450,6 +486,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) CFG->successor[CFG->nb_out] = new; CFG->nb_out++; new->nb_in++; + new->name = CFG->name; } } return new; @@ -464,10 +501,17 @@ cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins) if (!new) { new = cfg_new (ht, ins); + if (CFG->instruction->type == 2) + { + stack[depth] = CFG; + depth++; + } return aux_cfg_insert(CFG, new); } else { + if (CFG->instruction->type == 2) + depth++; instr_delete (ins); for (size_t i = 0; i < CFG->nb_out; i++) { diff --git a/src/tracker.c b/src/tracker.c index bf4a874..9cd8d87 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -443,6 +443,7 @@ main (int argc, char *argv[], char *envp[]) { /* Create a new trace and store it */ cfg = cfg_new (ht, instr); + if (!cfg) { hashtable_delete (ht); From 97e1aece1e3ce4a06eeffb67cb92b119a4186192 Mon Sep 17 00:00:00 2001 From: mrglm Date: Thu, 23 Jan 2020 14:08:28 +0100 Subject: [PATCH 14/31] Formatting and adding comments in trace.h --- include/trace.h | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/include/trace.h b/include/trace.h index 7580977..7f946f3 100644 --- a/include/trace.h +++ b/include/trace.h @@ -13,21 +13,33 @@ #ifndef _TRACE_H #define _TRACE_H +#include #include #include -#include #define DEFAULT_HASHTABLE_SIZE 65536 /* 2^16 */ /* A more convenient byte_t type */ typedef uint8_t byte_t; -/* ***** Handling assembly instructions ***** */ +/* ***** Definitions of all the struct used ***** */ + +/* ***** Handling assembly instructions ***** */ typedef struct _instr_t instr_t; +/* ***** Hashtables to store cfg nodes ***** */ +typedef struct _hashtable_t hashtable_t; + +/* ***** cfg nodes keeping track of the executions ***** */ typedef struct _cfg_t cfg_t; +/* ***** Linked list to store a trace ***** */ +typedef struct _trace_t trace_t; + + +/* ***** instr_t functions ***** */ + /* Return a new instr_t struct, NULL otherwise (and set errno) */ instr_t *instr_new (const uintptr_t addr, const uint8_t size, @@ -45,9 +57,8 @@ size_t instr_get_size (instr_t * const instr); /* Get a pointer to the opcodes of the instruction */ uint8_t * instr_get_opcodes (instr_t * const instr); -/* ***** Hashtables to store instructions ***** */ -typedef struct _hashtable_t hashtable_t; +/* ***** hashtable_t functions ***** */ /* Return an hash index for the instruction */ uint64_t hash_instr (const instr_t *instr); @@ -70,16 +81,15 @@ size_t hashtable_entries (hashtable_t *ht); /* Count the number of collisions in the hashtable */ size_t hashtable_collisions (hashtable_t *ht); -/* ***** Linked list to store a trace ***** */ -typedef struct _trace_t trace_t; +/* ***** trace_t functions ***** */ -/* Creates a trace and initialize the first element with hash_index +/* Creates a trace and initialize it with ins Returns a pointer to the created trace, or NULL if an error occured */ trace_t *trace_new (instr_t *ins); -/* Insert an element initialized with hash_index and insert it after t -Returns a pointer to the created element or NULL if an error occured*/ +/* Creates an element initialized with ins and insert it after t +Returns a pointer to the created element or NULL if an error occured */ trace_t *trace_insert (trace_t *t, instr_t *ins); /* Free every element in the trace t */ @@ -88,18 +98,22 @@ void trace_delete (trace_t *t); /* Returns a pointer to the first element where t2 differs from t1 */ trace_t *trace_compare (trace_t *t1, trace_t *t2); -/* ***** CFG ***** */ - +/* ***** cfg_t functions ***** */ +/* Creates a cfg and it with hash_index +Returns a pointer to the created trace, or NULL if an error occured */ cfg_t *cfg_new (hashtable_t *ht, instr_t *ins); +/* Auxiliary function for cfg_insert */ cfg_t *aux_cfg_insert (cfg_t *CFG, cfg_t *new); +/* Creates an element initialized with ins and insert it in CFG's succesors +Returns a pointer to the created element or NULL if an error occured*/ cfg_t *cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins); +/* Free every allocated field of CFG, as well as CFG itself */ void cfg_delete (cfg_t *CFG); - #endif /* _TRACE_H */ From 6be0cb39a519f8406dba1358521874d80e7b51d4 Mon Sep 17 00:00:00 2001 From: mrglm Date: Thu, 23 Jan 2020 15:19:06 +0100 Subject: [PATCH 15/31] Enforcing coding style --- include/trace.h | 13 ++--- src/trace.c | 148 ++++++++++++++++++++++++------------------------ src/tracker.c | 34 ++++++----- 3 files changed, 96 insertions(+), 99 deletions(-) diff --git a/include/trace.h b/include/trace.h index 7f946f3..eddc42d 100644 --- a/include/trace.h +++ b/include/trace.h @@ -41,21 +41,20 @@ typedef struct _trace_t trace_t; /* ***** instr_t functions ***** */ /* Return a new instr_t struct, NULL otherwise (and set errno) */ -instr_t *instr_new (const uintptr_t addr, - const uint8_t size, - const uint8_t *opcodes); +instr_t *instr_new (const uintptr_t addr, const uint8_t size, + const uint8_t *opcodes); /* Delete the assembly instruction from memory */ void instr_delete (instr_t *instr); /* Get the address of the instruction */ -uintptr_t instr_get_addr (instr_t * const instr); +uintptr_t instr_get_addr (instr_t *const instr); /* Get the size (in bytes) of the instruction */ -size_t instr_get_size (instr_t * const instr); +size_t instr_get_size (instr_t *const instr); /* Get a pointer to the opcodes of the instruction */ -uint8_t * instr_get_opcodes (instr_t * const instr); +uint8_t * instr_get_opcodes (instr_t *const instr); /* ***** hashtable_t functions ***** */ @@ -70,7 +69,7 @@ hashtable_t *hashtable_new (const size_t size); void hashtable_delete (hashtable_t *ht); /* Insert the instruction in the hashtable */ -bool hashtable_insert (hashtable_t * ht, cfg_t *cfg); +bool hashtable_insert (hashtable_t *ht, cfg_t *cfg); /* Look-up if current instruction is already in the hashtable */ cfg_t *hashtable_lookup (hashtable_t *ht, instr_t *instr); diff --git a/src/trace.c b/src/trace.c index 5e95333..705e4e7 100644 --- a/src/trace.c +++ b/src/trace.c @@ -44,17 +44,18 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) memcpy (instr->opcodes, opcodes, size); if ((opcodes[0] >= 0x70 && opcodes[0] <= 0x7F) - || (opcodes[0] == 0x0F && opcodes[1] >= 0x80 && opcodes[1] <= 0x8F)) - instr->type = 1; - else if (opcodes[0] == 0xE8 || opcodes[0] == 0x9A - || (opcodes[0] == 0xFF && (size == 2 || size == 3))) + || (opcodes[0] == 0x0F && opcodes[1] >= 0x80 && opcodes[1] <= 0x8F)) + instr->type = 1; + else if (opcodes[0] == 0xE8 + || opcodes[0] == 0x9A + || (opcodes[0] == 0xFF && (size == 2 || size == 3))) instr->type = 2; else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) - || (opcodes[0] == 0xFF && (size == 4 || size == 5))) + || (opcodes[0] == 0xFF && (size == 4 || size == 5))) instr->type = 3; - else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) || - ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3)) - instr->type = 4; + else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) + || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3)) + instr->type = 4; else instr->type = 0; return instr; @@ -91,7 +92,7 @@ struct _hashtable_t size_t size; /* Hashtable size */ size_t collisions; /* Number of collisions encountered */ size_t entries; /* Number of entries registered */ - cfg_t ** buckets[]; /* Hachtable buckets */ + cfg_t ** buckets[]; /* Hachtable buckets */ }; @@ -109,10 +110,12 @@ uint16_t nb_name = 0; cfg_t *stack[256] = {NULL}; /* Compression function for Merkle-Damgard construction */ -#define mix(h) ({ \ - (h) ^= (h) >> 23; \ - (h) *= 0x2127598bf4325c37ULL; \ - (h) ^= (h) >> 47; }) +#define mix(h) \ + ({ \ + (h) ^= (h) >> 23ULL; \ + (h) *= 0x2127598bf4325c37ULL; \ + (h) ^= (h) >> 47ULL; \ + }) uint64_t fasthash64 (const uint8_t *buf, size_t len, uint64_t seed) @@ -125,32 +128,34 @@ fasthash64 (const uint8_t *buf, size_t len, uint64_t seed) uint64_t h = seed ^ (len * m); uint64_t v; - while (pos != end) { - v = *pos++; - h ^= mix(v); - h *= m; - } + while (pos != end) + { + v = *pos++; + h ^= mix(v); + h *= m; + } pos2 = (const uint8_t *) pos; v = 0; - switch (len & 7) { - case 7: v ^= (uint64_t) pos2[6] << 48; - /* FALLTHROUGH */ - case 6: v ^= (uint64_t) pos2[5] << 40; - /* FALLTHROUGH */ - case 5: v ^= (uint64_t) pos2[4] << 32; - /* FALLTHROUGH */ - case 4: v ^= (uint64_t) pos2[3] << 24; - /* FALLTHROUGH */ - case 3: v ^= (uint64_t) pos2[2] << 16; - /* FALLTHROUGH */ - case 2: v ^= (uint64_t) pos2[1] << 8; - /* FALLTHROUGH */ - case 1: v ^= (uint64_t) pos2[0]; - h ^= mix(v); - h *= m; - } + switch (len & 7) + { + case 7: v ^= (uint64_t) pos2[6] << 48; + /* FALLTHROUGH */ + case 6: v ^= (uint64_t) pos2[5] << 40; + /* FALLTHROUGH */ + case 5: v ^= (uint64_t) pos2[4] << 32; + /* FALLTHROUGH */ + case 4: v ^= (uint64_t) pos2[3] << 24; + /* FALLTHROUGH */ + case 3: v ^= (uint64_t) pos2[2] << 16; + /* FALLTHROUGH */ + case 2: v ^= (uint64_t) pos2[1] << 8; + /* FALLTHROUGH */ + case 1: v ^= (uint64_t) pos2[0]; + h ^= mix(v); + h *= m; + } return mix(h); } @@ -175,7 +180,7 @@ hashtable_new (const size_t size) return NULL; /* Initialize to zero */ - *ht = (hashtable_t) { 0 }; + *ht = (hashtable_t) {0}; ht->size = size; ht->collisions = 0; ht->entries = 0; @@ -193,17 +198,13 @@ hashtable_delete (hashtable_t *ht) if (ht->buckets[i]) { while (ht->buckets[i][j] != NULL) - { - cfg_delete (ht->buckets[i][j]); - j++; - } + cfg_delete (ht->buckets[i][j++]); free (ht->buckets[i]); } } free (ht); } -#include bool hashtable_insert (hashtable_t * ht, cfg_t *CFG) @@ -230,11 +231,9 @@ hashtable_insert (hashtable_t * ht, cfg_t *CFG) /* Bucket isn't NULL, scanning all entries to see if instr is already here */ size_t k = 0; while (ht->buckets[index][k] != NULL) - { - if (ht->buckets[index][k]->instruction->address == CFG->instruction->address) + if (ht->buckets[index][k++]->instruction->address + == CFG->instruction->address) return true; /* No error but we need to delete the redundant one */ - k++; - } cfg_t **new_bucket = calloc (k + 2, sizeof (cfg_t *)); if (!new_bucket) return false; @@ -327,7 +326,7 @@ trace_delete (trace_t *t) while (tmp->next) { tmp = tmp->next; - free(t); + free (t); t = tmp; } free(t); @@ -366,7 +365,7 @@ cfg_new (hashtable_t *ht, instr_t *ins) if (nb_name == 0) CFG->name = 0; switch (ins->type) - { + { case 0: CFG->successor = calloc (1, sizeof (cfg_t *)); if (!CFG->successor) @@ -378,31 +377,31 @@ cfg_new (hashtable_t *ht, instr_t *ins) case 1: CFG->successor = calloc (2, sizeof (cfg_t *)); if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } + { + cfg_delete (CFG); + return NULL; + } break; case 2: CFG->successor = calloc (2, sizeof (cfg_t *)); if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } + { + cfg_delete (CFG); + return NULL; + } break; case 3: CFG->successor = calloc (2, sizeof (cfg_t *)); if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } + { + cfg_delete (CFG); + return NULL; + } break; case 4: CFG->successor = NULL; break; - } + } hashtable_insert (ht, CFG); return CFG; } @@ -433,8 +432,9 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) new->nb_in++; new->name = CFG->name; } - else if (CFG->instruction->type == 1 || CFG->instruction->type == 3 - || CFG->instruction->type == 4) + else if (CFG->instruction->type == 1 + || CFG->instruction->type == 3 + || CFG->instruction->type == 4) { if (CFG->instruction->type == 1) { @@ -443,15 +443,15 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) else { // CFG->successor = realloc (CFG->successor, 2 * sizeof (cfg_t *)); - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } - CFG->successor[1] = new; - CFG->nb_out++; - new->nb_in++; - new->name = CFG->name; + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->successor[1] = new; + CFG->nb_out++; + new->nb_in++; + new->name = CFG->name; } } if (CFG->instruction->type == 4) @@ -530,9 +530,9 @@ cfg_delete (cfg_t *CFG) if (CFG) { if (CFG->instruction) - { - instr_delete (CFG->instruction); - } + { + instr_delete (CFG->instruction); + } if (CFG->successor) free (CFG->successor); free (CFG); diff --git a/src/tracker.c b/src/tracker.c index 9cd8d87..e1afd43 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -305,7 +305,7 @@ main (int argc, char *argv[], char *envp[]) { size_t token_length = strlen (token); if (token[token_length - 1] == '\n') - token[token_length - 1] = '\0'; /* Formating trick */ + token[token_length - 1] = '\0'; /* Formatting trick */ exec_argv[index] = token; index++; token = strtok (NULL, " "); @@ -319,9 +319,7 @@ main (int argc, char *argv[], char *envp[]) /* Display the traced command */ fprintf (output, "%s: starting to trace '", program_name); for (int i = 0; i < exec_argc - 1; i++) - { - fprintf (output, "%s ", exec_argv[i]); - } + fprintf (output, "%s ", exec_argv[i]); fprintf (output, "%s'\n\n", exec_argv[exec_argc - 1]); /* Forking and tracing */ @@ -445,13 +443,13 @@ main (int argc, char *argv[], char *envp[]) cfg = cfg_new (ht, instr); if (!cfg) - { - hashtable_delete (ht); - cs_close (&handle); - fclose (input); - fclose (output); - err (EXIT_FAILURE, "error: cannot create a control flow graph"); - } + { + hashtable_delete (ht); + cs_close (&handle); + fclose (input); + fclose (output); + err (EXIT_FAILURE, "error: cannot create a control flow graph"); + } cfg_entry = cfg; } else @@ -460,13 +458,13 @@ main (int argc, char *argv[], char *envp[]) * the new node */ cfg = cfg_insert (ht, cfg, instr); if (!cfg) - { - hashtable_delete (ht); - cs_close (&handle); - fclose (input); - fclose (output); - err (EXIT_FAILURE, "error: cannot create a control flow graph"); - } + { + hashtable_delete (ht); + cs_close (&handle); + fclose (input); + fclose (output); + err (EXIT_FAILURE, "error: cannot create a control flow graph"); + } } /* Updating counters */ From ee16253abc9719c5256c370429e3b8e15c002c83 Mon Sep 17 00:00:00 2001 From: mrglm Date: Thu, 23 Jan 2020 16:19:33 +0100 Subject: [PATCH 16/31] Improving aux_cfg_insert --- src/trace.c | 118 +++++++++++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 61 deletions(-) diff --git a/src/trace.c b/src/trace.c index 705e4e7..0b4e39e 100644 --- a/src/trace.c +++ b/src/trace.c @@ -425,71 +425,67 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) { if (!new) return NULL; - if (CFG->instruction->type != 4 && !CFG->successor[0]) - { - CFG->successor[0] = new; + if (CFG->instruction->type != 4 && !CFG->successor[0]) + { + CFG->successor[0] = new; CFG->nb_out++; new->nb_in++; new->name = CFG->name; } - else if (CFG->instruction->type == 1 - || CFG->instruction->type == 3 - || CFG->instruction->type == 4) - { - if (CFG->instruction->type == 1) - { - if (CFG->nb_out == 2) - return NULL; - else - { - // CFG->successor = realloc (CFG->successor, 2 * sizeof (cfg_t *)); - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } - CFG->successor[1] = new; - CFG->nb_out++; - new->nb_in++; - new->name = CFG->name; - } - } - if (CFG->instruction->type == 4) - { - depth--; - CFG = stack[depth]; - stack[depth] = NULL; - - - if (is_power_2 (CFG->nb_out)) - CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } - CFG->successor[1] = new; - CFG->nb_out++; - new->nb_in++; - new->name = CFG->name; - - } - if (CFG->instruction->type == 3) - { - if (is_power_2 (CFG->nb_out)) - CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } - CFG->successor[CFG->nb_out] = new; - CFG->nb_out++; - new->nb_in++; - new->name = CFG->name; - } - } - return new; + else + { + switch (CFG->instruction->type) + { + case 0: + return NULL; + break; + case 1: + if (CFG->nb_out == 2) + return NULL; + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->successor[1] = new; + CFG->nb_out++; + new->nb_in++; + new->name = CFG->name; + break; + case 2: + /* TODO */ + break; + case 3: + if (is_power_2 (CFG->nb_out)) + CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->successor[CFG->nb_out] = new; + CFG->nb_out++; + new->nb_in++; + new->name = CFG->name; + break; + case 4: + depth--; + CFG = stack[depth]; + stack[depth] = NULL; + if (is_power_2 (CFG->nb_out)) + CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->successor[1] = new; + CFG->nb_out++; + new->nb_in++; + new->name = CFG->name; + } + } + return new; } cfg_t * From 6e3af17043e0298546b22a6dbc42987dbc9a4943 Mon Sep 17 00:00:00 2001 From: mrglm Date: Thu, 23 Jan 2020 19:36:34 +0100 Subject: [PATCH 17/31] f3 c3 is now a valid ret (see comment for reference) http://repzret.org/p/repzret/ --- src/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/trace.c b/src/trace.c index 0b4e39e..2f19e5e 100644 --- a/src/trace.c +++ b/src/trace.c @@ -54,7 +54,9 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) || (opcodes[0] == 0xFF && (size == 4 || size == 5))) instr->type = 3; else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) - || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3)) + || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) + || (opcodes[0] == 0xF3 && opcodes[1] == 0xC3 && size == 3)) + instr->type = 4; else instr->type = 0; From 89b50aaf5bd72d634c17a8ceaf5f0b8afb10b6e0 Mon Sep 17 00:00:00 2001 From: mrglm Date: Thu, 23 Jan 2020 19:37:31 +0100 Subject: [PATCH 18/31] Slight fix to aux_cfg_insert --- src/trace.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/trace.c b/src/trace.c index 2f19e5e..f301929 100644 --- a/src/trace.c +++ b/src/trace.c @@ -438,9 +438,6 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) { switch (CFG->instruction->type) { - case 0: - return NULL; - break; case 1: if (CFG->nb_out == 2) return NULL; From bf421727a43e3197deccdb7068bd6d591f4ce955 Mon Sep 17 00:00:00 2001 From: mrglm Date: Fri, 24 Jan 2020 12:22:41 +0100 Subject: [PATCH 19/31] Change in aux_cfg_insert --- src/trace.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/trace.c b/src/trace.c index f301929..eef5543 100644 --- a/src/trace.c +++ b/src/trace.c @@ -55,7 +55,7 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) instr->type = 3; else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) - || (opcodes[0] == 0xF3 && opcodes[1] == 0xC3 && size == 3)) + || (opcodes[0] == 0xF3 && opcodes[1] == 0xC3 && size == 2)) instr->type = 4; else @@ -401,7 +401,12 @@ cfg_new (hashtable_t *ht, instr_t *ins) } break; case 4: - CFG->successor = NULL; + CFG->successor = calloc (1, sizeof (cfg_t *)); + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } break; } hashtable_insert (ht, CFG); @@ -451,9 +456,6 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) new->nb_in++; new->name = CFG->name; break; - case 2: - /* TODO */ - break; case 3: if (is_power_2 (CFG->nb_out)) CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); @@ -469,6 +471,8 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) break; case 4: depth--; + if (new->instruction->address == stack[depth]->instruction->address + stack[depth]->instruction->size) + { CFG = stack[depth]; stack[depth] = NULL; if (is_power_2 (CFG->nb_out)) @@ -478,10 +482,12 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) cfg_delete (CFG); return NULL; } - CFG->successor[1] = new; + } + CFG->successor[CFG->nb_out] = new; CFG->nb_out++; new->nb_in++; new->name = CFG->name; + break; } } return new; @@ -506,7 +512,10 @@ cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins) else { if (CFG->instruction->type == 2) - depth++; + { + stack[depth] = CFG; + depth++; + } instr_delete (ins); for (size_t i = 0; i < CFG->nb_out; i++) { From 0d07a914ad85d50302caf566ba419c6618fca04f Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Fri, 24 Jan 2020 14:42:33 +0100 Subject: [PATCH 20/31] function calls and rets are now recognized --- src/trace.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/trace.c b/src/trace.c index eef5543..cac57d1 100644 --- a/src/trace.c +++ b/src/trace.c @@ -401,12 +401,13 @@ cfg_new (hashtable_t *ht, instr_t *ins) } break; case 4: - CFG->successor = calloc (1, sizeof (cfg_t *)); + CFG->successor = calloc (2, sizeof (cfg_t *)); if (!CFG->successor) { cfg_delete (CFG); return NULL; } + // CFG->successor = NULL; break; } hashtable_insert (ht, CFG); @@ -471,18 +472,27 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) break; case 4: depth--; - if (new->instruction->address == stack[depth]->instruction->address + stack[depth]->instruction->size) + if (new->instruction->address + == stack[depth]->instruction->address + stack[depth]->instruction->size) { + // printf("entré ici \n"); CFG = stack[depth]; stack[depth] = NULL; + } if (is_power_2 (CFG->nb_out)) + { + CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); + + } + if (!CFG->successor) { cfg_delete (CFG); return NULL; } - } + + // printf("coucou, %d \n",CFG->nb_out); CFG->successor[CFG->nb_out] = new; CFG->nb_out++; new->nb_in++; From a2bf751ac208aa5334dd4b1bee33c4609cc0c3ee Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Fri, 24 Jan 2020 15:36:41 +0100 Subject: [PATCH 21/31] some tests added ;) --- Makefile | 1 + test/Makefile | 16 +++++++++++++++- test/call.c | 13 +++++++++++++ test/if.c | 13 +++++++++++++ test/printf.c | 8 ++++++++ test/switch.c | 19 +++++++++++++++++++ test/while.c | 9 +++++++++ 7 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 test/call.c create mode 100644 test/if.c create mode 100644 test/printf.c create mode 100644 test/switch.c create mode 100644 test/while.c diff --git a/Makefile b/Makefile index 9d5f54b..680fc6b 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ all: @cp -f src/tracker ./ check: all + @cp tracker test/ @cd test/ && $(MAKE) format: diff --git a/test/Makefile b/test/Makefile index a2dae76..247bd0d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -10,7 +10,21 @@ LDFLAGS = -lcapstone all: tracker_tests tracker_tests: - @echo "TODO: Tests are not yet implemented!" + gcc -o if if.c $(CFLAGS) + gcc -o while while.c $(CFLAGS) + gcc -o switch switch.c $(CFLAGS) + gcc -o printf printf.c $(CFLAGS) + gcc -o call call.c $(CFLAGS) + @echo -e "if 0\nif 44\nif -44\n" > input_if.txt + @echo -e "while 12\nwhile 0\n" > input_while.txt + @echo -e "switch 3\nswitch 7\nswitch 11\n" > input_switch.txt + @echo -e "printf Neo\n" > input_printf.txt + @echo -e "call 1337\n" > input_call.txt + ./tracker -o output_if.txt input_if.txt + ./tracker -o output_while.txt input_while.txt + ./tracker -o output_switch.txt input_switch.txt + ./tracker -o output_printf.txt input_printf.txt + ./tracker -o output_call.txt input_call.txt clean: @echo "src: Cleaning..." diff --git a/test/call.c b/test/call.c new file mode 100644 index 0000000..2acc35e --- /dev/null +++ b/test/call.c @@ -0,0 +1,13 @@ +#include + +int foo (int x) +{ + return x + 42; +} + +int main (int argc, char *argv[]) +{ + int x = atoi (argv[1]); + foo(x); + return EXIT_SUCCESS; +} diff --git a/test/if.c b/test/if.c new file mode 100644 index 0000000..2ba3d1e --- /dev/null +++ b/test/if.c @@ -0,0 +1,13 @@ +#include + +int main (int argc, char *argv[]) +{ + int x = atoi(argv[1]); + if (x == 0) + x = x + 42; + else if (x < 0) + x = -x; + else + x++; + return EXIT_SUCCESS; +} diff --git a/test/printf.c b/test/printf.c new file mode 100644 index 0000000..535239a --- /dev/null +++ b/test/printf.c @@ -0,0 +1,8 @@ +#include +#include + +int main (int argc, char *argv[]) +{ + printf("Bonjour %s\n", argv[1]); + return EXIT_SUCCESS; +} diff --git a/test/switch.c b/test/switch.c new file mode 100644 index 0000000..aa47439 --- /dev/null +++ b/test/switch.c @@ -0,0 +1,19 @@ +#include + +int main (int argc, char *argv[]) +{ + int x = atoi(argv[1]); + switch (x % 3) + { + case 0: + x++; + break; + case 1: + x--; + break; + case 2: + x *= 2; + break; + } + return EXIT_SUCCESS; +} diff --git a/test/while.c b/test/while.c new file mode 100644 index 0000000..137d5b4 --- /dev/null +++ b/test/while.c @@ -0,0 +1,9 @@ +#include + +int main (int argc, char *argv[]) +{ + int x = atoi(argv[1]); + while (x > 0) + x--; + return EXIT_SUCCESS; +} From 8dc81f6607e94f8e2cd51dfd99ffe137af7b255e Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Sat, 25 Jan 2020 17:41:58 +0100 Subject: [PATCH 22/31] we fixed one little mistake --- src/trace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/trace.c b/src/trace.c index cac57d1..e7aac88 100644 --- a/src/trace.c +++ b/src/trace.c @@ -475,10 +475,13 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) if (new->instruction->address == stack[depth]->instruction->address + stack[depth]->instruction->size) { - // printf("entré ici \n"); CFG = stack[depth]; stack[depth] = NULL; } + else + { + depth++; + } if (is_power_2 (CFG->nb_out)) { @@ -491,8 +494,6 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) cfg_delete (CFG); return NULL; } - - // printf("coucou, %d \n",CFG->nb_out); CFG->successor[CFG->nb_out] = new; CFG->nb_out++; new->nb_in++; From 018f9d16370a2897c67a56dc6e3d090cbb1c596d Mon Sep 17 00:00:00 2001 From: mrglm Date: Mon, 27 Jan 2020 16:57:09 +0100 Subject: [PATCH 23/31] Using cgraph to draw CFG --- include/trace.h | 11 ++++++++++- src/Makefile | 2 +- src/trace.c | 28 +++++++++++++++++++++++++++- src/tracker.c | 40 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 76 insertions(+), 5 deletions(-) diff --git a/include/trace.h b/include/trace.h index eddc42d..c6b55b2 100644 --- a/include/trace.h +++ b/include/trace.h @@ -13,6 +13,7 @@ #ifndef _TRACE_H #define _TRACE_H +#include "../graphviz/cgraph.h" #include #include #include @@ -109,10 +110,18 @@ cfg_t *aux_cfg_insert (cfg_t *CFG, cfg_t *new); /* Creates an element initialized with ins and insert it in CFG's succesors Returns a pointer to the created element or NULL if an error occured*/ -cfg_t *cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins); +cfg_t *cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins, char *name[],Agraph_t *g); /* Free every allocated field of CFG, as well as CFG itself */ void cfg_delete (cfg_t *CFG); +instr_t *cfg_get_instr (cfg_t *CFG); + +uint8_t cfg_get_type (cfg_t *CFG); + +uint16_t cfg_get_name (cfg_t *CFG); + +cfg_t **cfg_get_successor (cfg_t *CFG); + #endif /* _TRACE_H */ diff --git a/src/Makefile b/src/Makefile index db668f7..631aaa7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,7 @@ # Usual compilation flags CFLAGS = -Wall -Wextra -std=c11 -DDEBUG -g CPPFLAGS = -I../include -LDFLAGS = -lcapstone +LDFLAGS = -lcapstone -lcgraph # Special rules and targets .PHONY: all clean help diff --git a/src/trace.c b/src/trace.c index e7aac88..c22e51e 100644 --- a/src/trace.c +++ b/src/trace.c @@ -505,7 +505,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) } cfg_t * -cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins) +cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins, char *name[],Agraph_t *g ) { if (!CFG) return NULL; @@ -518,6 +518,7 @@ cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins) stack[depth] = CFG; depth++; } + return aux_cfg_insert(CFG, new); } else @@ -534,6 +535,7 @@ else == new->instruction->address) return new; } + return aux_cfg_insert(CFG, new); } } @@ -554,3 +556,27 @@ cfg_delete (cfg_t *CFG) } return; } + +instr_t * +cfg_get_instr (cfg_t *CFG) +{ + return CFG->instruction; +} + +uint8_t +cfg_get_type (cfg_t *CFG) +{ + return CFG->instruction->type; +} + +uint16_t +cfg_get_name (cfg_t *CFG) +{ + return CFG->name; +} + +cfg_t ** +cfg_get_successor (cfg_t *CFG) +{ + return CFG->successor; +} diff --git a/src/tracker.c b/src/tracker.c index e1afd43..61db685 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -11,6 +11,7 @@ */ #include "tracker.h" +#include "../graphviz/cgraph.h" #define _POSIX_C_SOURCE 200809L @@ -58,6 +59,8 @@ static FILE *output = NULL; /* output file (default: stdout) */ /* input file containing executable's name and argument */ static FILE *input = NULL; +static FILE *fp = NULL; + /* Get the architecture of the executable */ static arch_t check_execfile (char *execfilename) @@ -195,6 +198,20 @@ get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_siz return; } + +Agraph_t * +graph_create(Agraph_t *g, char *name_old, char *name_new) +{ + Agnode_t *n, *m; + Agedge_t *f; + n = agnode (g, name_old, TRUE); + m = agnode (g, name_new, TRUE); + f = agedge(g,n,m,NULL, TRUE); + return g; +} + + + int main (int argc, char *argv[], char *envp[]) { @@ -290,6 +307,11 @@ main (int argc, char *argv[], char *envp[]) cfg_t *cfg = NULL; cfg_t *cfg_entry = NULL; hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); + fp = fopen("toto.gv", "w+"); + Agraph_t *g; + g = agopen ("G", Agdirected, NULL); + char *to_keep[2]; + if (ht == NULL) err (EXIT_FAILURE, "error: cannot create hashtable"); @@ -411,7 +433,7 @@ main (int argc, char *argv[], char *envp[]) { /* Display the bytes */ for (size_t i = 0; i < insn[0].size; i++) - fprintf (output, " %02x", buf[i]); + fprintf (output, " %02x", buf[i]); /* Pretty printing and formating */ if (insn[0].size != 8 && insn[0].size != 11) @@ -424,6 +446,14 @@ main (int argc, char *argv[], char *envp[]) fprintf (output, "%s %s", insn[0].mnemonic, insn[0].op_str); fprintf (output, "\n"); + + char name_node[128]; + sprintf(name_node,"0x%" PRIxPTR " ", ip); + for (size_t i = 0; i < 8; i++) + sprintf(name_node + strlen(name_node), "%02x ", buf[i]); + sprintf(name_node + strlen(name_node), " %s ",insn[0].mnemonic); + sprintf(name_node + strlen(name_node), "%s ",insn[0].op_str); + /* Create the instr_t structure */ instr_t *instr = instr_new (ip, insn[0].size, buf); if (!instr) @@ -441,6 +471,7 @@ main (int argc, char *argv[], char *envp[]) { /* Create a new trace and store it */ cfg = cfg_new (ht, instr); + to_keep[0] = name_node; if (!cfg) { @@ -456,7 +487,10 @@ main (int argc, char *argv[], char *envp[]) { /* Insert a new element in the cfg and update cfg to hold * the new node */ - cfg = cfg_insert (ht, cfg, instr); + to_keep[1] = name_node; + cfg = cfg_insert (ht, cfg, instr, to_keep,g); + graph_create (g, to_keep[0], to_keep[1]); + to_keep[0] = name_node; if (!cfg) { hashtable_delete (ht); @@ -495,5 +529,7 @@ main (int argc, char *argv[], char *envp[]) fclose (input); fclose (output); hashtable_delete (ht); + agwrite(g, fp); + agclose(g); return EXIT_SUCCESS; } From bc5625379d81ac5e7ca7ca16779f5bae54a8068b Mon Sep 17 00:00:00 2001 From: mrglm Date: Tue, 28 Jan 2020 17:39:22 +0100 Subject: [PATCH 24/31] drawing simple (and beautiful) traces ... still some problems with if instructions, to be continued --- include/trace.h | 15 +++++++++-- src/trace.c | 45 ++++++++++++++++++++++++++++++--- src/tracker.c | 66 ++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 106 insertions(+), 20 deletions(-) diff --git a/include/trace.h b/include/trace.h index c6b55b2..50f00c2 100644 --- a/include/trace.h +++ b/include/trace.h @@ -103,14 +103,14 @@ trace_t *trace_compare (trace_t *t1, trace_t *t2); /* Creates a cfg and it with hash_index Returns a pointer to the created trace, or NULL if an error occured */ -cfg_t *cfg_new (hashtable_t *ht, instr_t *ins); +cfg_t *cfg_new (hashtable_t *ht, instr_t *ins, char *str); /* Auxiliary function for cfg_insert */ cfg_t *aux_cfg_insert (cfg_t *CFG, cfg_t *new); /* Creates an element initialized with ins and insert it in CFG's succesors Returns a pointer to the created element or NULL if an error occured*/ -cfg_t *cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins, char *name[],Agraph_t *g); +cfg_t *cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins, Agraph_t *g, char *str); /* Free every allocated field of CFG, as well as CFG itself */ void cfg_delete (cfg_t *CFG); @@ -118,10 +118,21 @@ void cfg_delete (cfg_t *CFG); instr_t *cfg_get_instr (cfg_t *CFG); +uint16_t cfg_get_nb_out (cfg_t *CFG); + uint8_t cfg_get_type (cfg_t *CFG); uint16_t cfg_get_name (cfg_t *CFG); cfg_t **cfg_get_successor (cfg_t *CFG); +cfg_t *cfg_get_successor_i (cfg_t *CFG, uint16_t i); + +uint16_t get_nb_name (void); + +char *cfg_get_str (cfg_t *CFG); + +cfg_t *get_function_entry (size_t index); + + #endif /* _TRACE_H */ diff --git a/src/trace.c b/src/trace.c index c22e51e..2b741ff 100644 --- a/src/trace.c +++ b/src/trace.c @@ -104,12 +104,14 @@ struct _cfg_t uint16_t nb_in; uint16_t nb_out; uint16_t name; + char *str_graph; cfg_t **successor; }; uint16_t depth = 0; uint16_t nb_name = 0; cfg_t *stack[256] = {NULL}; +cfg_t *function_entry[128] = {NULL}; /* Compression function for Merkle-Damgard construction */ #define mix(h) \ @@ -356,7 +358,7 @@ trace_compare (trace_t *t1, trace_t *t2) cfg_t * -cfg_new (hashtable_t *ht, instr_t *ins) +cfg_new (hashtable_t *ht, instr_t *ins, char *str) { cfg_t *CFG = malloc (sizeof (cfg_t)); if (!CFG) @@ -364,6 +366,9 @@ cfg_new (hashtable_t *ht, instr_t *ins) CFG->instruction = ins; CFG->nb_in = 0; CFG->nb_out = 0; + CFG->str_graph = calloc ((strlen (str) + 1), sizeof (char)); + strcpy (CFG->str_graph, str); + if (nb_name == 0) CFG->name = 0; switch (ins->type) @@ -505,16 +510,18 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) } cfg_t * -cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins, char *name[],Agraph_t *g ) +cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins,Agraph_t *g, char *str) { if (!CFG) return NULL; cfg_t *new = hashtable_lookup (ht, ins); if (!new) { - new = cfg_new (ht, ins); + new = cfg_new (ht, ins, str); if (CFG->instruction->type == 2) { + nb_name++; + function_entry[nb_name] = new; stack[depth] = CFG; depth++; } @@ -563,6 +570,14 @@ cfg_get_instr (cfg_t *CFG) return CFG->instruction; } + + +uint16_t +cfg_get_nb_out (cfg_t *CFG) +{ + return CFG->nb_out; +} + uint8_t cfg_get_type (cfg_t *CFG) { @@ -580,3 +595,27 @@ cfg_get_successor (cfg_t *CFG) { return CFG->successor; } + +cfg_t * +cfg_get_successor_i (cfg_t *CFG, uint16_t i) +{ + return CFG->successor[i]; +} + +uint16_t +get_nb_name (void) +{ + return nb_name; +} + +cfg_t * +get_function_entry (size_t index) +{ + return function_entry[index]; +} + +char * +cfg_get_str (cfg_t *CFG) +{ + return CFG->str_graph; +} diff --git a/src/tracker.c b/src/tracker.c index 61db685..11858cb 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -11,7 +11,7 @@ */ #include "tracker.h" -#include "../graphviz/cgraph.h" +#include #define _POSIX_C_SOURCE 200809L @@ -200,13 +200,49 @@ get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_siz Agraph_t * -graph_create(Agraph_t *g, char *name_old, char *name_new) +graph_create_function (Agraph_t *g, cfg_t *entry) { Agnode_t *n, *m; Agedge_t *f; - n = agnode (g, name_old, TRUE); - m = agnode (g, name_new, TRUE); - f = agedge(g,n,m,NULL, TRUE); + uint16_t i = 0; + cfg_t *old = entry; + while (i < cfg_get_nb_out (entry)) + { + cfg_t *new = cfg_get_successor_i (old, i); + while (cfg_get_type (new) != 4) + { + if (cfg_get_type (new) == 1 || cfg_get_type (new) == 3) + graph_create_function (g, new); + else if (cfg_get_type (new) == 2) + { + if (instr_get_addr (cfg_get_instr (new)) != + instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old))) + { + uint16_t j = 0; + while (j < cfg_get_nb_out (old)) + { + if (instr_get_addr (cfg_get_instr (cfg_get_successor_i (old, j))) + == instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old))) + { + new = cfg_get_successor_i (old, j); + j = cfg_get_nb_out (old); + } + else + j++; + } + } + } + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + f = agedge(g,n,m,NULL, TRUE); + old = new; + new = cfg_get_successor_i(old, 0); + } + i++; + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + f = agedge(g,n,m,NULL, TRUE); + } return g; } @@ -308,9 +344,10 @@ main (int argc, char *argv[], char *envp[]) cfg_t *cfg_entry = NULL; hashtable_t *ht = hashtable_new (DEFAULT_HASHTABLE_SIZE); fp = fopen("toto.gv", "w+"); + char name_node[128]; Agraph_t *g; g = agopen ("G", Agdirected, NULL); - char *to_keep[2]; + if (ht == NULL) err (EXIT_FAILURE, "error: cannot create hashtable"); @@ -447,7 +484,7 @@ main (int argc, char *argv[], char *envp[]) fprintf (output, "\n"); - char name_node[128]; + sprintf(name_node,"0x%" PRIxPTR " ", ip); for (size_t i = 0; i < 8; i++) sprintf(name_node + strlen(name_node), "%02x ", buf[i]); @@ -465,13 +502,12 @@ main (int argc, char *argv[], char *envp[]) fclose (output); err (EXIT_FAILURE, "error: cannot create instruction"); } - cs_free (insn, count); + cs_free (insn, count); if (!cfg) { /* Create a new trace and store it */ - cfg = cfg_new (ht, instr); - to_keep[0] = name_node; + cfg = cfg_new (ht, instr, name_node); if (!cfg) { @@ -487,10 +523,9 @@ main (int argc, char *argv[], char *envp[]) { /* Insert a new element in the cfg and update cfg to hold * the new node */ - to_keep[1] = name_node; - cfg = cfg_insert (ht, cfg, instr, to_keep,g); - graph_create (g, to_keep[0], to_keep[1]); - to_keep[0] = name_node; + + cfg = cfg_insert (ht, cfg, instr, g,name_node); + if (!cfg) { hashtable_delete (ht); @@ -522,10 +557,11 @@ main (int argc, char *argv[], char *envp[]) "* #hashtable collisions: %zu\n\n\n", instr_count, hashtable_entries (ht), (size_t) DEFAULT_HASHTABLE_SIZE, hashtable_collisions (ht)); - } } + + graph_create_function(g, get_function_entry(99)); fclose (input); fclose (output); hashtable_delete (ht); From 0bb6e5453a20574bd1ad689f137624c9debda196 Mon Sep 17 00:00:00 2001 From: mrglm Date: Tue, 28 Jan 2020 22:21:31 +0100 Subject: [PATCH 25/31] Adding instr_type_t enum for cosmetic reason --- include/trace.h | 11 ++++++++++- src/trace.c | 36 ++++++++++++++++++------------------ src/tracker.c | 8 ++++---- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/include/trace.h b/include/trace.h index 50f00c2..65dfb85 100644 --- a/include/trace.h +++ b/include/trace.h @@ -23,6 +23,15 @@ /* A more convenient byte_t type */ typedef uint8_t byte_t; +/* All the possible types of instruction */ +typedef enum +{ + BASIC, + BRANCH, + CALL, + JUMP, + RET +} instr_type_t; /* ***** Definitions of all the struct used ***** */ @@ -120,7 +129,7 @@ instr_t *cfg_get_instr (cfg_t *CFG); uint16_t cfg_get_nb_out (cfg_t *CFG); -uint8_t cfg_get_type (cfg_t *CFG); +instr_type_t cfg_get_type (cfg_t *CFG); uint16_t cfg_get_name (cfg_t *CFG); diff --git a/src/trace.c b/src/trace.c index 2b741ff..294b38c 100644 --- a/src/trace.c +++ b/src/trace.c @@ -18,9 +18,9 @@ struct _instr_t { - uintptr_t address; /* Address where lies the instruction */ + uintptr_t address; /* Address where lies the instruction */ // uintptr_t *next; /* List of addresses of the next instructions */ - uint8_t type; /* Instr type: 0 = instr, 1 = branch, 2 = call, 3 = jmp, 4 = ret */ + instr_type_t type; /* Instr type: 0 = instr, 1 = branch, 2 = call, 3 = jmp, 4 = ret */ uint8_t size; /* Opcode size */ uint8_t opcodes[]; /* Instruction opcode */ }; @@ -45,21 +45,21 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) if ((opcodes[0] >= 0x70 && opcodes[0] <= 0x7F) || (opcodes[0] == 0x0F && opcodes[1] >= 0x80 && opcodes[1] <= 0x8F)) - instr->type = 1; + instr->type = BRANCH; else if (opcodes[0] == 0xE8 - || opcodes[0] == 0x9A - || (opcodes[0] == 0xFF && (size == 2 || size == 3))) - instr->type = 2; + || opcodes[0] == 0x9A + || (opcodes[0] == 0xFF && (size == 2 || size == 3))) + instr->type = CALL; else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) || (opcodes[0] == 0xFF && (size == 4 || size == 5))) - instr->type = 3; + instr->type = JUMP; else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) - || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) - || (opcodes[0] == 0xF3 && opcodes[1] == 0xC3 && size == 2)) + || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) + || (opcodes[0] == 0xF3 && opcodes[1] == 0xC3 && size == 2)) - instr->type = 4; + instr->type = RET; else - instr->type = 0; + instr->type = BASIC; return instr; } @@ -438,7 +438,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) { if (!new) return NULL; - if (CFG->instruction->type != 4 && !CFG->successor[0]) + if (CFG->instruction->type != RET && !CFG->successor[0]) { CFG->successor[0] = new; CFG->nb_out++; @@ -449,7 +449,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) { switch (CFG->instruction->type) { - case 1: + case BRANCH: if (CFG->nb_out == 2) return NULL; if (!CFG->successor) @@ -462,7 +462,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) new->nb_in++; new->name = CFG->name; break; - case 3: + case JUMP: if (is_power_2 (CFG->nb_out)) CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); if (!CFG->successor) @@ -475,7 +475,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) new->nb_in++; new->name = CFG->name; break; - case 4: + case RET: depth--; if (new->instruction->address == stack[depth]->instruction->address + stack[depth]->instruction->size) @@ -518,7 +518,7 @@ cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins,Agraph_t *g, char *str) if (!new) { new = cfg_new (ht, ins, str); - if (CFG->instruction->type == 2) + if (CFG->instruction->type == CALL) { nb_name++; function_entry[nb_name] = new; @@ -530,7 +530,7 @@ cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins,Agraph_t *g, char *str) } else { - if (CFG->instruction->type == 2) + if (CFG->instruction->type == CALL) { stack[depth] = CFG; depth++; @@ -578,7 +578,7 @@ cfg_get_nb_out (cfg_t *CFG) return CFG->nb_out; } -uint8_t +instr_type_t cfg_get_type (cfg_t *CFG) { return CFG->instruction->type; diff --git a/src/tracker.c b/src/tracker.c index 11858cb..13b9c4e 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -209,11 +209,11 @@ graph_create_function (Agraph_t *g, cfg_t *entry) while (i < cfg_get_nb_out (entry)) { cfg_t *new = cfg_get_successor_i (old, i); - while (cfg_get_type (new) != 4) + while (cfg_get_type (new) != CALL) { - if (cfg_get_type (new) == 1 || cfg_get_type (new) == 3) + if (cfg_get_type (new) == BRANCH || cfg_get_type (new) == JUMP) graph_create_function (g, new); - else if (cfg_get_type (new) == 2) + else if (cfg_get_type (new) == CALL) { if (instr_get_addr (cfg_get_instr (new)) != instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old))) @@ -561,7 +561,7 @@ main (int argc, char *argv[], char *envp[]) } - graph_create_function(g, get_function_entry(99)); + graph_create_function(g, get_function_entry(42)); fclose (input); fclose (output); hashtable_delete (ht); From 629cc8d946c1c6770cdca325af377514cdaf0b7a Mon Sep 17 00:00:00 2001 From: mrglm Date: Wed, 29 Jan 2020 00:29:15 +0100 Subject: [PATCH 26/31] Some more work on trace.c --- src/trace.c | 76 ++++++++++++++++++--------------------------------- src/tracker.c | 4 +-- 2 files changed, 28 insertions(+), 52 deletions(-) diff --git a/src/trace.c b/src/trace.c index 294b38c..bb2f577 100644 --- a/src/trace.c +++ b/src/trace.c @@ -51,7 +51,8 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) || (opcodes[0] == 0xFF && (size == 2 || size == 3))) instr->type = CALL; else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) - || (opcodes[0] == 0xFF && (size == 4 || size == 5))) + || (opcodes[0] == 0xFF && (size == 4 || size == 5)) + || (opcodes[0] >= 0xE0 && opcodes[0] <= 0xE3)) instr->type = JUMP; else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) @@ -94,7 +95,7 @@ struct _hashtable_t size_t size; /* Hashtable size */ size_t collisions; /* Number of collisions encountered */ size_t entries; /* Number of entries registered */ - cfg_t ** buckets[]; /* Hachtable buckets */ + cfg_t **buckets[]; /* Hachtable buckets */ }; @@ -360,61 +361,30 @@ trace_compare (trace_t *t1, trace_t *t2) cfg_t * cfg_new (hashtable_t *ht, instr_t *ins, char *str) { - cfg_t *CFG = malloc (sizeof (cfg_t)); + cfg_t *CFG = calloc (1, sizeof (cfg_t)); if (!CFG) return NULL; - CFG->instruction = ins; + if (ins->type == BASIC) + CFG->successor = calloc (1, sizeof (cfg_t)); + else + CFG->successor = calloc (2, sizeof (cfg_t)); + if (!CFG->successor) + { + cfg_delete (CFG); + return NULL; + } + CFG->instruction = ins; CFG->nb_in = 0; CFG->nb_out = 0; CFG->str_graph = calloc ((strlen (str) + 1), sizeof (char)); + if (!CFG->str_graph) + { + cfg_delete (CFG); + return NULL; + } strcpy (CFG->str_graph, str); - if (nb_name == 0) CFG->name = 0; - switch (ins->type) - { - case 0: - CFG->successor = calloc (1, sizeof (cfg_t *)); - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } - break; - case 1: - CFG->successor = calloc (2, sizeof (cfg_t *)); - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } - break; - case 2: - CFG->successor = calloc (2, sizeof (cfg_t *)); - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } - break; - case 3: - CFG->successor = calloc (2, sizeof (cfg_t *)); - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } - break; - case 4: - CFG->successor = calloc (2, sizeof (cfg_t *)); - if (!CFG->successor) - { - cfg_delete (CFG); - return NULL; - } - // CFG->successor = NULL; - break; - } hashtable_insert (ht, CFG); return CFG; } @@ -449,8 +419,12 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) { switch (CFG->instruction->type) { + // case BASIC: + // if (CFG->nb_out >= 1) + // return NULL; + // break; case BRANCH: - if (CFG->nb_out == 2) + if (CFG->nb_out >= 2) return NULL; if (!CFG->successor) { @@ -559,6 +533,8 @@ cfg_delete (cfg_t *CFG) } if (CFG->successor) free (CFG->successor); + if (CFG->str_graph) + free (CFG->str_graph); free (CFG); } return; diff --git a/src/tracker.c b/src/tracker.c index 13b9c4e..535ffe8 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -125,7 +125,7 @@ get_current_ip (struct user_regs_struct *regs) #endif } -void +static void get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_size) { FILE *execfile = fopen (execfilename, "r"); @@ -199,7 +199,7 @@ get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_siz } -Agraph_t * +static Agraph_t * graph_create_function (Agraph_t *g, cfg_t *entry) { Agnode_t *n, *m; From c78ff3f507a4b0aa2002c4e785b9b07a99d90d7c Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Wed, 29 Jan 2020 17:40:30 +0100 Subject: [PATCH 27/31] still not working but better ; too many calls to graph_create_function --- include/trace.h | 4 ++-- src/trace.c | 35 +++++++++++++++++++++++------------ src/tracker.c | 29 ++++++++++++++++++++--------- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/include/trace.h b/include/trace.h index 65dfb85..2cccd61 100644 --- a/include/trace.h +++ b/include/trace.h @@ -52,7 +52,7 @@ typedef struct _trace_t trace_t; /* Return a new instr_t struct, NULL otherwise (and set errno) */ instr_t *instr_new (const uintptr_t addr, const uint8_t size, - const uint8_t *opcodes); + const uint8_t *opcodes, char *str_name); /* Delete the assembly instruction from memory */ void instr_delete (instr_t *instr); @@ -137,7 +137,7 @@ cfg_t **cfg_get_successor (cfg_t *CFG); cfg_t *cfg_get_successor_i (cfg_t *CFG, uint16_t i); -uint16_t get_nb_name (void); +size_t get_nb_name (void); char *cfg_get_str (cfg_t *CFG); diff --git a/src/trace.c b/src/trace.c index bb2f577..22a7583 100644 --- a/src/trace.c +++ b/src/trace.c @@ -26,7 +26,7 @@ struct _instr_t }; instr_t * -instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) +instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes, char *str_name) { /* Check size != 0 and opcodes != NULL */ if (size == 0 || opcodes == NULL) @@ -42,17 +42,19 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes) instr->address = addr; instr->size = size; memcpy (instr->opcodes, opcodes, size); - + /* Test opcodes to assign type to instruction */ if ((opcodes[0] >= 0x70 && opcodes[0] <= 0x7F) || (opcodes[0] == 0x0F && opcodes[1] >= 0x80 && opcodes[1] <= 0x8F)) instr->type = BRANCH; else if (opcodes[0] == 0xE8 || opcodes[0] == 0x9A - || (opcodes[0] == 0xFF && (size == 2 || size == 3))) + || (opcodes[0] == 0xFF && (size == 2 || size == 3)) + || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && strstr (str_name, "call"))) instr->type = CALL; else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) || (opcodes[0] == 0xFF && (size == 4 || size == 5)) - || (opcodes[0] >= 0xE0 && opcodes[0] <= 0xE3)) + || (opcodes[0] >= 0xE0 && opcodes[0] <= 0xE3) + || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && strstr (str_name, "jmp"))) instr->type = JUMP; else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) @@ -101,18 +103,22 @@ struct _hashtable_t struct _cfg_t { - instr_t *instruction; - uint16_t nb_in; - uint16_t nb_out; - uint16_t name; - char *str_graph; - cfg_t **successor; + instr_t *instruction; /* Pointer to instruction */ + uint16_t nb_in; /* Number of predecessor */ + uint16_t nb_out; /* Number of successor */ + uint16_t name; /* Current function name */ + char *str_graph; /* Address + opcodes + mnemonic + operand */ + cfg_t **successor; /* Array of pointers to successor */ }; +/* Represent the number of successive calls whithout rets */ uint16_t depth = 0; +/* Keep track of the number of different function called */ uint16_t nb_name = 0; +/* Array of caller indexed by depth */ cfg_t *stack[256] = {NULL}; -cfg_t *function_entry[128] = {NULL}; +/* Arry of function's entry */ +cfg_t *function_entry[256] = {NULL}; /* Compression function for Merkle-Damgard construction */ #define mix(h) \ @@ -436,6 +442,10 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) new->nb_in++; new->name = CFG->name; break; + // case CALL: + // stack[depth] = CFG; + // depth++; + // break; case JUMP: if (is_power_2 (CFG->nb_out)) CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); @@ -578,7 +588,8 @@ cfg_get_successor_i (cfg_t *CFG, uint16_t i) return CFG->successor[i]; } -uint16_t +/* to delete ? */ +size_t get_nb_name (void) { return nb_name; diff --git a/src/tracker.c b/src/tracker.c index 535ffe8..3baf07a 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -198,21 +198,26 @@ get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_siz return; } +int tmp = 0; static Agraph_t * graph_create_function (Agraph_t *g, cfg_t *entry) { + printf("%d\n", tmp++); Agnode_t *n, *m; - Agedge_t *f; uint16_t i = 0; cfg_t *old = entry; - while (i < cfg_get_nb_out (entry)) + int truc = 1; + while (i < cfg_get_nb_out (old)) { + cfg_t *new = cfg_get_successor_i (old, i); - while (cfg_get_type (new) != CALL) + + while (cfg_get_type (new) != RET) { + if (cfg_get_type (new) == BRANCH || cfg_get_type (new) == JUMP) - graph_create_function (g, new); + return graph_create_function (g, new); else if (cfg_get_type (new) == CALL) { if (instr_get_addr (cfg_get_instr (new)) != @@ -234,15 +239,17 @@ graph_create_function (Agraph_t *g, cfg_t *entry) } n = agnode (g, cfg_get_str(old), TRUE); m = agnode (g, cfg_get_str(new), TRUE); - f = agedge(g,n,m,NULL, TRUE); + agedge(g,n,m,NULL, TRUE); old = new; new = cfg_get_successor_i(old, 0); + } i++; n = agnode (g, cfg_get_str(old), TRUE); m = agnode (g, cfg_get_str(new), TRUE); - f = agedge(g,n,m,NULL, TRUE); + agedge(g,n,m,NULL, TRUE); } + tmp--; return g; } @@ -466,7 +473,8 @@ main (int argc, char *argv[], char *envp[]) /* Get the mnemonic from decoder */ count = cs_disasm (handle, &(buf[0]), MAX_OPCODE_BYTES, 0x1000, 0, &insn); - if (count > 0) + + if (count > 0) { /* Display the bytes */ for (size_t i = 0; i < insn[0].size; i++) @@ -492,7 +500,7 @@ main (int argc, char *argv[], char *envp[]) sprintf(name_node + strlen(name_node), "%s ",insn[0].op_str); /* Create the instr_t structure */ - instr_t *instr = instr_new (ip, insn[0].size, buf); + instr_t *instr = instr_new (ip, insn[0].size, buf, name_node); if (!instr) { hashtable_delete (ht); @@ -561,11 +569,14 @@ main (int argc, char *argv[], char *envp[]) } - graph_create_function(g, get_function_entry(42)); + graph_create_function(g, get_function_entry(1)); + fclose (input); fclose (output); + hashtable_delete (ht); agwrite(g, fp); agclose(g); + fclose (fp); return EXIT_SUCCESS; } From de055f954c080ba6a9d9997d3bcb2dd2f00c5af0 Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Thu, 30 Jan 2020 16:28:33 +0100 Subject: [PATCH 28/31] we did it ! we can draw graphs ! no more segfault ! --- include/trace.h | 2 + src/trace.c | 13 ++++-- src/tracker.c | 115 +++++++++++++++++++++++++++--------------------- 3 files changed, 76 insertions(+), 54 deletions(-) diff --git a/include/trace.h b/include/trace.h index 2cccd61..a08bd53 100644 --- a/include/trace.h +++ b/include/trace.h @@ -143,5 +143,7 @@ char *cfg_get_str (cfg_t *CFG); cfg_t *get_function_entry (size_t index); +void add_first_entry (cfg_t *CFG); + #endif /* _TRACE_H */ diff --git a/src/trace.c b/src/trace.c index 22a7583..02fb839 100644 --- a/src/trace.c +++ b/src/trace.c @@ -46,15 +46,15 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes, cha if ((opcodes[0] >= 0x70 && opcodes[0] <= 0x7F) || (opcodes[0] == 0x0F && opcodes[1] >= 0x80 && opcodes[1] <= 0x8F)) instr->type = BRANCH; - else if (opcodes[0] == 0xE8 + else if (/*opcodes[0] == 0xE8 || opcodes[0] == 0x9A || (opcodes[0] == 0xFF && (size == 2 || size == 3)) - || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && strstr (str_name, "call"))) + || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && */strstr (str_name, "call") && !strstr(str_name, "syscall"))/*)*/ instr->type = CALL; - else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) + else if (/*(opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) || (opcodes[0] == 0xFF && (size == 4 || size == 5)) || (opcodes[0] >= 0xE0 && opcodes[0] <= 0xE3) - || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && strstr (str_name, "jmp"))) + || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && */strstr (str_name, "jmp"))/*)*/ instr->type = JUMP; else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) @@ -606,3 +606,8 @@ cfg_get_str (cfg_t *CFG) { return CFG->str_graph; } + +void add_first_entry (cfg_t *CFG) +{ + function_entry[0] = CFG; +} diff --git a/src/tracker.c b/src/tracker.c index 3baf07a..6ff5cc3 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -198,59 +198,72 @@ get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_siz return; } -int tmp = 0; static Agraph_t * graph_create_function (Agraph_t *g, cfg_t *entry) { - printf("%d\n", tmp++); Agnode_t *n, *m; - uint16_t i = 0; cfg_t *old = entry; - int truc = 1; - while (i < cfg_get_nb_out (old)) - { - - cfg_t *new = cfg_get_successor_i (old, i); - - while (cfg_get_type (new) != RET) - { - - if (cfg_get_type (new) == BRANCH || cfg_get_type (new) == JUMP) - return graph_create_function (g, new); - else if (cfg_get_type (new) == CALL) - { - if (instr_get_addr (cfg_get_instr (new)) != - instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old))) - { - uint16_t j = 0; - while (j < cfg_get_nb_out (old)) - { - if (instr_get_addr (cfg_get_instr (cfg_get_successor_i (old, j))) - == instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old))) - { - new = cfg_get_successor_i (old, j); - j = cfg_get_nb_out (old); - } - else - j++; - } - } - } - n = agnode (g, cfg_get_str(old), TRUE); - m = agnode (g, cfg_get_str(new), TRUE); - agedge(g,n,m,NULL, TRUE); - old = new; - new = cfg_get_successor_i(old, 0); + cfg_t *new; + while (cfg_get_type (old) == BASIC) + { + new = cfg_get_successor_i(old, 0); + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + else + return g; + if (cfg_get_type (new) != RET) + { + new = old; + old = cfg_get_successor_i(old, 0); + } + else + { + return g; + } + } + if (cfg_get_type (old) == BRANCH || cfg_get_type (old) == JUMP) + { + uint16_t j = 0; + while (j < cfg_get_nb_out (old)) + { + new = cfg_get_successor_i(old, j); + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + if (!agedge (g, n, m, NULL, FALSE)) + { + agedge (g, n, m, NULL, TRUE); + g = graph_create_function(g, new); + } + j++; + } + } + else if (cfg_get_type (old) == CALL) + { + uint16_t i = 0; + while (i < cfg_get_nb_out (old)) + { + new = cfg_get_successor_i(old, i); + if (instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old)) + == instr_get_addr ( cfg_get_instr (new))) + { + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + if (!agedge (g, n, m, NULL, FALSE)) + { + agedge (g, n, m, NULL, TRUE); + g = graph_create_function(g, new); + } + else + return g; + } + i++; + } - } - i++; - n = agnode (g, cfg_get_str(old), TRUE); - m = agnode (g, cfg_get_str(new), TRUE); - agedge(g,n,m,NULL, TRUE); - } - tmp--; - return g; + } + return g; } @@ -353,8 +366,9 @@ main (int argc, char *argv[], char *envp[]) fp = fopen("toto.gv", "w+"); char name_node[128]; Agraph_t *g; - g = agopen ("G", Agdirected, NULL); - + g = agopen ("G", Agstrictdirected, NULL); + Agsym_t *sym; + sym = agattr (g, AGNODE, "shape", "box"); if (ht == NULL) err (EXIT_FAILURE, "error: cannot create hashtable"); @@ -494,7 +508,7 @@ main (int argc, char *argv[], char *envp[]) sprintf(name_node,"0x%" PRIxPTR " ", ip); - for (size_t i = 0; i < 8; i++) + for (size_t i = 0; i < insn[0].size; i++) sprintf(name_node + strlen(name_node), "%02x ", buf[i]); sprintf(name_node + strlen(name_node), " %s ",insn[0].mnemonic); sprintf(name_node + strlen(name_node), "%s ",insn[0].op_str); @@ -526,6 +540,7 @@ main (int argc, char *argv[], char *envp[]) err (EXIT_FAILURE, "error: cannot create a control flow graph"); } cfg_entry = cfg; + add_first_entry (cfg_entry); } else { @@ -569,7 +584,7 @@ main (int argc, char *argv[], char *envp[]) } - graph_create_function(g, get_function_entry(1)); + graph_create_function(g, get_function_entry(98)); fclose (input); fclose (output); From 5f28699356aaafa498a46c1f2430ddaae453cb60 Mon Sep 17 00:00:00 2001 From: mrglm Date: Wed, 5 Feb 2020 01:24:47 +0100 Subject: [PATCH 29/31] Now drawing basic blocks (please don't judge) --- include/trace.h | 4 +- src/trace.c | 24 +++++- src/tracker.c | 195 +++++++++++++++++++++++++++++++----------------- 3 files changed, 150 insertions(+), 73 deletions(-) diff --git a/include/trace.h b/include/trace.h index a08bd53..d70072a 100644 --- a/include/trace.h +++ b/include/trace.h @@ -127,7 +127,9 @@ void cfg_delete (cfg_t *CFG); instr_t *cfg_get_instr (cfg_t *CFG); -uint16_t cfg_get_nb_out (cfg_t *CFG); +uint16_t cfg_get_nb_out (cfg_t *CFG); + +uint16_t cfg_get_nb_in (cfg_t *CFG); instr_type_t cfg_get_type (cfg_t *CFG); diff --git a/src/trace.c b/src/trace.c index 02fb839..9f5379f 100644 --- a/src/trace.c +++ b/src/trace.c @@ -464,9 +464,21 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) if (new->instruction->address == stack[depth]->instruction->address + stack[depth]->instruction->size) { - CFG = stack[depth]; - stack[depth] = NULL; - } + CFG = stack[depth]; + stack[depth] = NULL; + bool flag = false; + for (size_t i = 0; i < CFG->nb_out; i++) + { + if (CFG->successor[i]->instruction->address + == new->instruction->address) + { + flag = true; + break; + } + } + if (flag) + break; + } else { depth++; @@ -564,6 +576,12 @@ cfg_get_nb_out (cfg_t *CFG) return CFG->nb_out; } +uint16_t +cfg_get_nb_in (cfg_t *CFG) +{ + return CFG->nb_in; +} + instr_type_t cfg_get_type (cfg_t *CFG) { diff --git a/src/tracker.c b/src/tracker.c index 6ff5cc3..01890fe 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -198,75 +198,134 @@ get_text_info (const char *execfilename, uint64_t *text_addr, uint64_t *text_siz return; } - -static Agraph_t * -graph_create_function (Agraph_t *g, cfg_t *entry) +static char * +concat_str (char *dest, char *follow) { - Agnode_t *n, *m; - cfg_t *old = entry; - cfg_t *new; - while (cfg_get_type (old) == BASIC) - { - new = cfg_get_successor_i(old, 0); - n = agnode (g, cfg_get_str(old), TRUE); - m = agnode (g, cfg_get_str(new), TRUE); - if (!agedge (g, n, m, NULL, FALSE)) - agedge (g, n, m, NULL, TRUE); - else - return g; - if (cfg_get_type (new) != RET) - { - new = old; - old = cfg_get_successor_i(old, 0); - } - else - { - return g; - } - } - if (cfg_get_type (old) == BRANCH || cfg_get_type (old) == JUMP) + if (!dest) { - uint16_t j = 0; - while (j < cfg_get_nb_out (old)) - { - new = cfg_get_successor_i(old, j); - n = agnode (g, cfg_get_str(old), TRUE); - m = agnode (g, cfg_get_str(new), TRUE); - if (!agedge (g, n, m, NULL, FALSE)) - { - agedge (g, n, m, NULL, TRUE); - g = graph_create_function(g, new); - } - j++; - } + dest = calloc ((strlen (follow) + 1), sizeof (char)); + sprintf (dest, "%s", follow); + return dest; } - else if (cfg_get_type (old) == CALL) - { - uint16_t i = 0; - while (i < cfg_get_nb_out (old)) - { - new = cfg_get_successor_i(old, i); - if (instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old)) - == instr_get_addr ( cfg_get_instr (new))) - { - n = agnode (g, cfg_get_str(old), TRUE); - m = agnode (g, cfg_get_str(new), TRUE); - if (!agedge (g, n, m, NULL, FALSE)) - { - agedge (g, n, m, NULL, TRUE); - g = graph_create_function(g, new); - } - else - return g; - } - i++; - } - - } - return g; + dest = realloc (dest, (strlen (dest) + 1 + strlen (follow) + 1) * sizeof (char)); + sprintf (dest + strlen(dest), "\n%s", follow); + return dest; } - +static Agraph_t * +graph_create_function (Agraph_t *g, cfg_t *entry, Agnode_t *n) +{ + Agnode_t *m = NULL; + cfg_t *old = entry; + cfg_t *new = NULL; + char *str_bb = NULL; + while (cfg_get_type (old) == BASIC || cfg_get_type (old) == CALL) + { + /* Not the begining of the function + more than 1 parent --> basic block */ + if (old != entry && cfg_get_nb_in (old) > 1) + { + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (n) + { + if (!agedge (g, n, m, NULL, FALSE)) + { + agedge (g, n, m, NULL, TRUE); + graph_create_function (g, old, m); + } + } + else + graph_create_function (g, old, m); + return g; + } + if (cfg_get_type (old) == CALL) + { + /* Searching for a RET following the CALL */ + uint16_t i = 0; + while (i < cfg_get_nb_out (old)) + { + new = cfg_get_successor_i (old, i); + if (instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old)) + == instr_get_addr (cfg_get_instr (new))) + break; + new = NULL; + i++; + } + str_bb = concat_str (str_bb, cfg_get_str(old)); + if (!new) + { + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (n) + { + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + } + return g; + } + else + { + old = new; + new = NULL; + } + } + else + { + str_bb = concat_str (str_bb, cfg_get_str(old)); + if (cfg_get_nb_out (old) == 0) + { + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + return g; + } + old = cfg_get_successor_i (old, 0); + /* Ugly trick to avoid infite loops if a instruction is its own parent */ + if (old == entry) + { + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + Agnode_t *tmp = agnode (g, cfg_get_str (old), TRUE); + agedge (g, m, tmp, NULL, TRUE); + agedge (g, tmp, tmp, NULL, TRUE); + return g; + } + } + } + /* Enf of a basic block */ + str_bb = concat_str (str_bb, cfg_get_str(old)); + m = agnode (g, str_bb, TRUE); + free(str_bb); + str_bb = NULL; + if (n) + { + if (!agedge (g, n, m, NULL, FALSE)) + { + agedge (g, n, m, NULL, TRUE); + if (cfg_get_type (old) == BRANCH || cfg_get_type (old) == JUMP) + { + for (uint16_t i = 0; i < cfg_get_nb_out (old); i++) + graph_create_function (g, cfg_get_successor_i (old, i), m); + } + } + } + else + { + if (cfg_get_type (old) == BRANCH || cfg_get_type (old) == JUMP) + { + for (uint16_t i = 0; i < cfg_get_nb_out (old); i++) + graph_create_function (g, cfg_get_successor_i (old, i), m); + } + } + return g; +} int main (int argc, char *argv[], char *envp[]) @@ -511,7 +570,7 @@ main (int argc, char *argv[], char *envp[]) for (size_t i = 0; i < insn[0].size; i++) sprintf(name_node + strlen(name_node), "%02x ", buf[i]); sprintf(name_node + strlen(name_node), " %s ",insn[0].mnemonic); - sprintf(name_node + strlen(name_node), "%s ",insn[0].op_str); + sprintf(name_node + strlen(name_node), "%s",insn[0].op_str); /* Create the instr_t structure */ instr_t *instr = instr_new (ip, insn[0].size, buf, name_node); @@ -583,10 +642,8 @@ main (int argc, char *argv[], char *envp[]) } } - - graph_create_function(g, get_function_entry(98)); - - fclose (input); + graph_create_function(g, get_function_entry(12), NULL); + fclose (input); fclose (output); hashtable_delete (ht); From 79ccdf07bcf323512ac84df0f05cae04b5c3ac15 Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Thu, 13 Feb 2020 16:48:16 +0100 Subject: [PATCH 30/31] opcodes to determine type of instruction are finally correct --- src/trace.c | 17 ++++++---- src/tracker.c | 92 +++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 21 deletions(-) diff --git a/src/trace.c b/src/trace.c index 9f5379f..0168e9d 100644 --- a/src/trace.c +++ b/src/trace.c @@ -46,20 +46,23 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes, cha if ((opcodes[0] >= 0x70 && opcodes[0] <= 0x7F) || (opcodes[0] == 0x0F && opcodes[1] >= 0x80 && opcodes[1] <= 0x8F)) instr->type = BRANCH; - else if (/*opcodes[0] == 0xE8 + else if (opcodes[0] == 0xE8 || opcodes[0] == 0x9A - || (opcodes[0] == 0xFF && (size == 2 || size == 3)) - || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && */strstr (str_name, "call") && !strstr(str_name, "syscall"))/*)*/ + || (opcodes[0] == 0xFF && (((size == 2 && opcodes[1] >= 0xD0 && opcodes[1] <= 0xDF) || size == 3) || opcodes[1] == 0x15)) + || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && + ((opcodes[2] >= 0xD0 && opcodes[2] <= 0xD7) || size > 3))) + /*strstr (str_name, "call") && !strstr(str_name, "syscall")))*/ instr->type = CALL; - else if (/*(opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) - || (opcodes[0] == 0xFF && (size == 4 || size == 5)) + else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) + || (opcodes[0] == 0xFF && (((size == 2 && opcodes[1] >= 0xE0 && opcodes[1] <= 0xEF) || size == 4 || size == 5) || opcodes[1] == 0x25)) || (opcodes[0] >= 0xE0 && opcodes[0] <= 0xE3) - || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && */strstr (str_name, "jmp"))/*)*/ + || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && + opcodes[2] >= 0xE0 && opcodes[2] <= 0xE7)) + /*strstr (str_name, "jmp")))*/ instr->type = JUMP; else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) || (opcodes[0] == 0xF3 && opcodes[1] == 0xC3 && size == 2)) - instr->type = RET; else instr->type = BASIC; diff --git a/src/tracker.c b/src/tracker.c index 01890fe..86b13ab 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -228,15 +228,9 @@ graph_create_function (Agraph_t *g, cfg_t *entry, Agnode_t *n) free(str_bb); str_bb = NULL; if (n) - { - if (!agedge (g, n, m, NULL, FALSE)) - { - agedge (g, n, m, NULL, TRUE); - graph_create_function (g, old, m); - } - } - else - graph_create_function (g, old, m); + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + graph_create_function (g, old, m); return g; } if (cfg_get_type (old) == CALL) @@ -259,10 +253,8 @@ graph_create_function (Agraph_t *g, cfg_t *entry, Agnode_t *n) free(str_bb); str_bb = NULL; if (n) - { - if (!agedge (g, n, m, NULL, FALSE)) - agedge (g, n, m, NULL, TRUE); - } + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); return g; } else @@ -327,6 +319,76 @@ graph_create_function (Agraph_t *g, cfg_t *entry, Agnode_t *n) return g; } + +static Agraph_t * +graph_create_function_2 (Agraph_t *g, cfg_t *entry) +{ + Agnode_t *n, *m; + cfg_t *old = entry; + cfg_t *new; + while (cfg_get_type (old) == BASIC) + { + new = cfg_get_successor_i(old, 0); + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + if (!agedge (g, n, m, NULL, FALSE)) + agedge (g, n, m, NULL, TRUE); + else + return g; + if (cfg_get_type (new) != RET) + { + new = old; + old = cfg_get_successor_i(old, 0); + } + else + { + return g; + } + } + if (cfg_get_type (old) == BRANCH || cfg_get_type (old) == JUMP) + { + uint16_t j = 0; + while (j < cfg_get_nb_out (old)) + { + new = cfg_get_successor_i(old, j); + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + if (!agedge (g, n, m, NULL, FALSE)) + { + agedge (g, n, m, NULL, TRUE); + g = graph_create_function_2(g, new); + } + j++; + } + } + else if (cfg_get_type (old) == CALL) + { + uint16_t i = 0; + while (i < cfg_get_nb_out (old)) + { + new = cfg_get_successor_i(old, i); + if (instr_get_addr (cfg_get_instr (old)) + instr_get_size (cfg_get_instr (old)) + == instr_get_addr ( cfg_get_instr (new))) + { + n = agnode (g, cfg_get_str(old), TRUE); + m = agnode (g, cfg_get_str(new), TRUE); + if (!agedge (g, n, m, NULL, FALSE)) + { + agedge (g, n, m, NULL, TRUE); + g = graph_create_function_2(g, new); + } + else + return g; + } + i++; + } + + } + return g; +} + + + int main (int argc, char *argv[], char *envp[]) { @@ -642,7 +704,9 @@ main (int argc, char *argv[], char *envp[]) } } - graph_create_function(g, get_function_entry(12), NULL); + + graph_create_function_2(g, get_function_entry(90)); + fclose (input); fclose (output); From 3f54caa7ca5d1afa6459a9254ab42074cdb3bc68 Mon Sep 17 00:00:00 2001 From: MAROTTA Date: Mon, 17 Feb 2020 15:02:36 +0100 Subject: [PATCH 31/31] adding some comments --- include/trace.h | 13 +++++++++++-- src/trace.c | 18 ++++++++++++------ src/tracker.c | 1 - 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/include/trace.h b/include/trace.h index d70072a..15fb379 100644 --- a/include/trace.h +++ b/include/trace.h @@ -124,28 +124,37 @@ cfg_t *cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins, Agraph_t *g, char /* Free every allocated field of CFG, as well as CFG itself */ void cfg_delete (cfg_t *CFG); - +/* Get the instruction in CFG */ instr_t *cfg_get_instr (cfg_t *CFG); +/* Get the number of successors of CFG */ uint16_t cfg_get_nb_out (cfg_t *CFG); +/* Get the number of "parents" of CFG */ uint16_t cfg_get_nb_in (cfg_t *CFG); +/* Get the type of the instruction in CFG */ instr_type_t cfg_get_type (cfg_t *CFG); +/* Get the index of the function CFG is in */ uint16_t cfg_get_name (cfg_t *CFG); +/* Get a pointer array of every successor of CFG */ cfg_t **cfg_get_successor (cfg_t *CFG); +/* Get a pointer to successor number i of CFG */ cfg_t *cfg_get_successor_i (cfg_t *CFG, uint16_t i); +/* Get the total number of functions */ size_t get_nb_name (void); +/* Get the str with the address, the opcodes, the mnemonics and the operands */ char *cfg_get_str (cfg_t *CFG); +/* Get a pointer to the first node in the function number index */ cfg_t *get_function_entry (size_t index); +/* Adds the very first node to function_entry */ void add_first_entry (cfg_t *CFG); - #endif /* _TRACE_H */ diff --git a/src/trace.c b/src/trace.c index 0168e9d..f56f731 100644 --- a/src/trace.c +++ b/src/trace.c @@ -51,14 +51,12 @@ instr_new (const uintptr_t addr, const uint8_t size, const uint8_t *opcodes, cha || (opcodes[0] == 0xFF && (((size == 2 && opcodes[1] >= 0xD0 && opcodes[1] <= 0xDF) || size == 3) || opcodes[1] == 0x15)) || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && ((opcodes[2] >= 0xD0 && opcodes[2] <= 0xD7) || size > 3))) - /*strstr (str_name, "call") && !strstr(str_name, "syscall")))*/ instr->type = CALL; else if ((opcodes[0] >= 0xE9 && opcodes[0] <= 0xEB) || (opcodes[0] == 0xFF && (((size == 2 && opcodes[1] >= 0xE0 && opcodes[1] <= 0xEF) || size == 4 || size == 5) || opcodes[1] == 0x25)) || (opcodes[0] >= 0xE0 && opcodes[0] <= 0xE3) || (opcodes[0] == 0x41 && opcodes[1] == 0xFF && opcodes[2] >= 0xE0 && opcodes[2] <= 0xE7)) - /*strstr (str_name, "jmp")))*/ instr->type = JUMP; else if (((opcodes[0] == 0xC3 || opcodes[0] == 0xCB) && size == 1) || ((opcodes[0] == 0xC2 || opcodes[0] == 0xCA) && size == 3) @@ -312,6 +310,7 @@ trace_new (instr_t *ins) trace_t *t = malloc (sizeof (trace_t)); if (!t) return NULL; + /* Initialize trace */ t->instruction = ins; t->next = NULL; return t; @@ -374,6 +373,7 @@ cfg_new (hashtable_t *ht, instr_t *ins, char *str) if (!CFG) return NULL; if (ins->type == BASIC) + /* If type is BASIC then we know for sure there can only be one successor */ CFG->successor = calloc (1, sizeof (cfg_t)); else CFG->successor = calloc (2, sizeof (cfg_t)); @@ -382,6 +382,7 @@ cfg_new (hashtable_t *ht, instr_t *ins, char *str) cfg_delete (CFG); return NULL; } + /* Initializing the CFG structure */ CFG->instruction = ins; CFG->nb_in = 0; CFG->nb_out = 0; @@ -392,6 +393,7 @@ cfg_new (hashtable_t *ht, instr_t *ins, char *str) return NULL; } strcpy (CFG->str_graph, str); + /* Initializing the nmae if it is the first function */ if (nb_name == 0) CFG->name = 0; hashtable_insert (ht, CFG); @@ -417,6 +419,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) { if (!new) return NULL; + /* Checking if the parent already has a successor */ if (CFG->instruction->type != RET && !CFG->successor[0]) { CFG->successor[0] = new; @@ -426,6 +429,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) } else { + /* Inserting the new node in the parent's successors */ switch (CFG->instruction->type) { // case BASIC: @@ -463,6 +467,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) new->name = CFG->name; break; case RET: + /* Checking the call on the top of the stack */ depth--; if (new->instruction->address == stack[depth]->instruction->address + stack[depth]->instruction->size) @@ -470,6 +475,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) CFG = stack[depth]; stack[depth] = NULL; bool flag = false; + /* Check if new is already a successor of CFG */ for (size_t i = 0; i < CFG->nb_out; i++) { if (CFG->successor[i]->instruction->address @@ -488,9 +494,7 @@ aux_cfg_insert (cfg_t *CFG, cfg_t *new) } if (is_power_2 (CFG->nb_out)) { - CFG->successor = realloc (CFG->successor, 2 * CFG->nb_out * sizeof (cfg_t *)); - } if (!CFG->successor) @@ -514,9 +518,11 @@ cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins,Agraph_t *g, char *str) if (!CFG) return NULL; cfg_t *new = hashtable_lookup (ht, ins); + /* First time seeing this instruction */ if (!new) { new = cfg_new (ht, ins, str); + /* Pushing the call on the stack */ if (CFG->instruction->type == CALL) { nb_name++; @@ -529,12 +535,14 @@ cfg_insert (hashtable_t *ht, cfg_t *CFG, instr_t *ins,Agraph_t *g, char *str) } else { + /* Pushing the call on the stack */ if (CFG->instruction->type == CALL) { stack[depth] = CFG; depth++; } instr_delete (ins); + /* Checking if new is already a successor of old */ for (size_t i = 0; i < CFG->nb_out; i++) { if (CFG->successor[i]->instruction->address @@ -571,8 +579,6 @@ cfg_get_instr (cfg_t *CFG) return CFG->instruction; } - - uint16_t cfg_get_nb_out (cfg_t *CFG) { diff --git a/src/tracker.c b/src/tracker.c index 86b13ab..2dd37a4 100644 --- a/src/tracker.c +++ b/src/tracker.c @@ -319,7 +319,6 @@ graph_create_function (Agraph_t *g, cfg_t *entry, Agnode_t *n) return g; } - static Agraph_t * graph_create_function_2 (Agraph_t *g, cfg_t *entry) {