diff --git a/.gitignore b/.gitignore
index e469d1c..2ee9845 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,6 @@ coverage.xml
# Test files
a.out
main.pdf
+
+# Log files
+*.log
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..fd07386
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,42 @@
+repos:
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.4.0
+ hooks:
+ - id: check-added-large-files
+ - id: check-ast
+ - id: check-builtin-literals
+ - id: check-case-conflict
+ - id: check-docstring-first
+ - id: check-executables-have-shebangs
+ - id: check-json
+ - id: check-merge-conflict
+ - id: check-shebang-scripts-are-executable
+ - id: check-symlinks
+ - id: check-toml
+ - id: check-vcs-permalinks
+ - id: check-xml
+ - id: check-yaml
+ args: [--allow-multiple-documents]
+ - id: debug-statements
+ - id: destroyed-symlinks
+ - id: detect-private-key
+ - id: end-of-file-fixer
+ - id: fix-byte-order-marker
+ - id: fix-encoding-pragma
+ args: [--remove]
+ - id: mixed-line-ending
+ - id: pretty-format-json
+ - id: trailing-whitespace
+ args: [--markdown-linebreak-ext=md]
+ - repo: https://github.com/psf/black
+ rev: 23.1.0
+ hooks:
+ - id: black
+ args: [-l 99]
+ - repo: https://github.com/doublify/pre-commit-clang-format
+ rev: 62302476d0da01515660132d76902359bed0f782
+ hooks:
+ - id: clang-format
+ types: [file]
+ files: \.(cpp|cc|cxx|c|h|hxx)$
+ args: [--style=file]
diff --git a/LICENSES/GTDGmbH.md b/LICENSES/GTDGmbH.md
new file mode 100644
index 0000000..120e287
--- /dev/null
+++ b/LICENSES/GTDGmbH.md
@@ -0,0 +1,24 @@
+Valid-License-Identifier: GTDGmbH
+License-Text:
+
+Copyright (c) 2023 GTD GmbH. All rights reserved.
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the “Software”), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/LICENSE b/LICENSES/MauriMustonen.md
similarity index 95%
rename from LICENSE
rename to LICENSES/MauriMustonen.md
index 4895137..e8969fd 100644
--- a/LICENSE
+++ b/LICENSES/MauriMustonen.md
@@ -1,3 +1,6 @@
+Valid-License-Identifier: MauriMustonen
+License-Text:
+
MIT License
Copyright (c) 2021 Mauri Mustonen
diff --git a/Pipfile b/Pipfile
index 3918b1b..a4ded50 100644
--- a/Pipfile
+++ b/Pipfile
@@ -12,3 +12,4 @@ flake8 = "*"
pytest = "*"
pytest-cov = "*"
pylint = "*"
+pre-commit = "*"
diff --git a/README.md b/README.md
index 360b555..2602e52 100644
--- a/README.md
+++ b/README.md
@@ -1,353 +1,28 @@
+# Object Code Graph (ocgraph)
-# asm2cfg
-
-[](https://codecov.io/gh/Kazhuu/asm2cfg)
+This repo contains *asm2cfg*, (**Assembler to control-flow-graph**), a tool to
+read disassembler output and optional corresponding coverage data to produce
+control flow graphs including coverage analysis on assembler level.
-Python command-line tool and GDB extension to view and save x86, ARM and objdump
-assembly files as control-flow graph (CFG) pdf files. From GDB debugging session
-use `viewcfg` command to view CFG and use `savecfg` command to save it to the
-pdf file.
+The *asm2cfg* source code implemented by GTD GmbH is forked from the asm2cfg
+repository on [Github](https://github.com/Kazhuu/asm2cfg) and reworked to
+function with several architectures and disassembler.
-
-
-
+The tool source code is subject to the MIT License as indicated by the headers
+of the corresponding source code files.
-Program has been developed to support X86, ARM and objdump assembly outputs.
-Program is mostly tested with x86 assembly. ARM and objdump formats might not be
-fully supported. If you have any suggestions or find bugs, please open an issue
-or create a pull request. If you want to contribute, check
-[Development](#development) how to get started.
+[!WARNING]
+> **WARNING**:
+>
+> **This tool is currently under development and in beta state It's not intended
+> to be complete and using is on your own risk.**
-## Table of Content
+## Documentation
-
+Due to the lack of a full documentation, all previous information can be found
+in separate *Markdown* files in the *doc* folder:
-* [Install](#install)
-* [Usage From GDB](#usage-from-gdb)
-* [Usage as Standalone](#usage-as-standalone)
- * [Knowing Function Name](#knowing-function-name)
- * [Disassemble Function](#disassemble-function)
- * [Draw CFG](#draw-cfg)
- * [Examples](#examples)
-* [Development](#development)
- * [Python Environment](#python-environment)
- * [Testing](#testing)
- * [Code Linting](#code-linting)
- * [Command-Line Interface](#command-line-interface)
- * [GDB Integration](#gdb-integration)
- * [Current Development Goals](#current-development-goals)
-
-
-
-## Install
-
-Project can be installed with pip
-
-```
-pip install asm2cfg
-```
-
-To be able to view the dot files from GDB. External dot viewer is required. For
-this purpose [xdot](https://pypi.org/project/xdot/) can be used for example. Any
-other dot viewer will also do. To install this on Debian based distro run
-
-```
-sudo apt install xdot
-```
-
-Or Arch based
-
-```
-sudo pacman -S xdot
-```
-
-To add extension to GDB you need to source the pip installed plugin to it. To
-find where pip placed GDB extension run `which gdb_asm2cfg` or in case if you
-use pyenv use `pyenv which gdb_asm2cfg`. Copy the path to the clipboard.
-
-Then in you home directory if not already add `.gdbinit` file
-and place following line in it and replace path from the earlier step.
-
-```
-source
-```
-
-For example in my Linux machine line end up to be
-
-```
-source ~/.local/bin/gdb_asm2cfg.py
-```
-
-Now when you start GDB no errors should be displayed and you are ready to go.
-
-## Usage From GDB
-
-In GDB session this extension provides command `viewcfg` to view CFG with
-external dot viewer. Command `savecfg` saves the CFG to pdf file to current
-working directory with same name as the function being dumped. Both commands
-disassemble the current execution frame/function when the command is issued. To
-see help for these commands use `help` command like `help viewcfg`.
-
-For example let's view main function from you favorite non-stripped executable.
-First run GDB until main function
-
-```
-gdb -ex 'b main' -ex 'run'
-```
-
-Now run `viewcfg` to view CFG as a dot graph with external editor. Or run `savecfg`
-to save CFG to pdf file named `main.pdf` to current working directory. If
-function is stripped then memory address of the function will used as a name
-instead. For example `0x555555555faf-0x555555557008.pdf`.
-
-If assembly function is very large with a lot of jumps and calls to other
-functions. Then rendering the CFG can take a long time. So be patient or cancel
-rendering with Ctrl-C. To make the rendering faster you can skip function calls
-instructions from splitting the code to more blocks. To set this run `set
-skipcalls on` and then run earlier command again. Note that if function is long
-and has a lot of jumps inside itself, then rendering is still gonna take a long
-time. To have normal behavior again run `set skipcalls off`.
-
-## Usage as Standalone
-
-This method can be used with assembly files saved from ouput of objdump and GDB
-disassembly. Pip installation will come with `asm2cfg` command-line tool for
-this purpose.
-
-To use as standalone script you first need to dump assembly from GDB or objdump
-to the file which is explained below.
-
-### Knowing Function Name
-
-If you don't know the name of function you're looking for then you can also list
-all function names using GDB:
-
-```
-gdb -batch -ex 'b main' -ex r -ex 'info functions' ./test_executable
-```
-
-This will set breakpoint at function `main`, then
-run the program and print symbols from all loaded libraries.
-
-For functions which come from main executable you can avoid running the program
-and simply do
-
-```
-gdb -batch -ex 'info functions' ./test_executable
-```
-
-If you want to narrow the search down you can also use regexp
-
-```
-gdb ... -ex 'info functions ' ...
-```
-
-### Disassemble Function
-
-Once you have the function name, you can produce its disassembly via
-
-```
-gdb -batch -ex 'b main' -ex r -ex 'pipe disassemble test_function | tee test_function.asm' ./test_executable
-```
-
-or
-
-```
-gdb -batch -ex 'set breakpoints pending on' -ex 'b test_function' -ex r -ex 'pipe disassemble | tee test_function.asm' ./test_executable
-```
-
-(the `set breakpoint pending on` command enables pending breakpoints and
-could be added to your `.gdbinit` instead)
-
-For functions from main executable it's enough to do
-
-```
-gdb -batch -ex 'pipe disassemble test_function | tee test_function.asm' ./test_executable
-```
-
-You can also extract function's disassembly from `objdump` output:
-
-```
-objdump -d ./test_executable | sed -ne '/ test_executable.asm
-```
-
-(this may be useful for specific non-native targets which lack GDB support).
-
-### Draw CFG
-
-Now you have the assembly file. Time to turn that to CFG pdf file. Do that by giving it
-to `asm2cfg` command-line tool like so
-
-```
-asm2cfg test_function.asm
-```
-
-Asm2cfg by default expects x86 assembly files. If you want to use ARM assembly files,
-then provide `--target arm` command-line flag.
-
-Above command should output `test_function.pdf` file in the same directory where
-the executable was ran. If the assembly file is stripped then the function
-memory range is used as a name instead. For example
-`0x555555555faf-0x555555557008.pdf`.
-
-To view CFG instead of saving provide `-v` flag. And to skip function calls from
-splitting the code to further blocks provide `-c` flag. To show the help use
-`-h`.
-
-### Examples
-
-Repository includes examples which can be used to test the standalone
-functionality for x86, ARM and objdump.
-
-File `test_function.asm` is non-stripped assembly file and its
-corresponding output `test_function.pdf`.
-
-File `stripped_function.asm` contains
-stripped function and its corresponding output
-`stripped_function.pdf`.
-
-File `att_syntax.asm` is an example of non-stripped AT&T assembly.
-
-File `huge.asm` is a large stripped
-assembly function and its corresponding output `huge.pdf`. This can be used to
-test processing time of big functions.
-
-Files `objdump.asm` and `stripped_objdump.asm` are the regular and stripped
-objdump-based disassemblies of short functions.
-
-File `arm.asm` is ARM based assembly file and its corresponding pdf file is
-`arm.pdf`.
-
-## Development
-
-You want to contribute? You're very welcome to do so! This section will give you
-guidance how to setup development environment and test things locally.
-
-### Python Environment
-
-For development this project manages packages with pipenv. Pipenv is a tool to
-manage Python virtual environments and packages with much less pain compared to
-normal pip and virtualenv usage.
-
-Install pipenv for your system following the guide
-[here](https://pipenv.pypa.io/en/latest/).
-
-After installing pipenv. Create virtual environment and install all required
-packages to it. Run following at project root
-
-```
-pipenv install -d
-```
-
-Now you can activate the virtual environment with
-
-```
-pipenv shell
-```
-
-Now your `python` and `pip` commands will correspond to created virtual environment
-instead of your system's Python installation.
-
-To deactivate the environment, use
-
-```
-exit
-```
-
-### Testing
-
-This project uses [pytest](https://pypi.org/project/pytest/) for testing. Some
-test are written using Python's own unittest testing framework, but they work
-with pytest out of the box. Pytest style is preferred way to write tests.
-
-To run tests from project root, use `pytest` or
-
-```
-pipenv run pytest
-```
-
-During testing dot viewer might be opened if you have it installed. This is
-because GDB integration command `viewcfg` is tested, which will open external
-dot viewer. Just close it after it's opened. It should not affect the test run
-itself.
-
-### Code Linting
-
-Project uses [flake8](https://flake8.pycqa.org/en/latest/) and
-[pylint](https://pylint.org/) for code linting.
-
-To run flake8, use
-
-```
-flake8
-```
-
-And to run pylint use
-
-```
-pylint src test
-```
-
-Both commands should not print any errors.
-
-### Command-Line Interface
-
-To test command-line interface of asm2cfg wihtout installing the package. You
-can execute module directly. For example to print help
-
-```
-python -m src.asm2cfg -h
-```
-
-Standalone method can be used to try out the examples under `examples` folder as
-well. For example following command should generate `main.pdf` file to current
-working directory.
-
-```
-python -m src.asm2cfg -c examples/huge.asm
-```
-
-### GDB Integration
-
-Before testing GDB functionality, make sure asm2cfg is not installed with pip!
-This can lead to GDB using code from pip installed asm2cfg package instead of
-code from this repository!
-
-Also pipenv cannot be used with GDB. You need to install required packages to
-your system's Python pip. This is because your installed GDB is linked against
-system's Python interpreter and will use it, instead of active virtual
-environment. If packages are not installed to your system's pip. You are likely
-to receive following error messages when trying to use asm2cfg with GDB
-
-```
-ModuleNotFoundError: No module named 'graphviz'
-```
-
-To fix this, install required packages to your system's pip without active
-virtual environment. Currently GDB integration only requires graphviz.
-
-```
-pip install graphviz
-```
-
-To use asm2cfg GDB related functionality. Use following line from
-project root.
-
-```
-PYTHONPATH=${PWD}/src gdb -ex 'source src/gdb_asm2cfg.py'
-```
-
-This will set Python import path so that GDB can import code from this
-repository without installing the package. After this you should be able to use
-commands `viewcfg` and `savecfg`.
-
-### Current Development Goals
-
-There are might be cases asm2cfg will not fully support all x86 or ARM assembly
-lines. If you encounter such problems please open an issue.
-
-Current developed goals are best described in issues section. Please open a new
-one if existing one does not exist.
-
-If you want to talk to me, you can contact me at Discord with name
-`Kazhuu#3121`.
+1. [Installation](doc/1_Installation.md): Instructions to install the tool
+2. [How to Run](doc/2_HowToRun.md): Guideline to run the tool
+3. [Development](doc/3_Development.md): Information for Developer
+4. [Github asm2cfg](doc/3_Development.md): Original Github documentation
diff --git a/asm2cfg b/asm2cfg
new file mode 100755
index 0000000..4f1b04a
--- /dev/null
+++ b/asm2cfg
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+# SPDX-License-Identifier: GTDGmbH
+# Copyright 2023 by GTD GmbH.
+
+dir="$(dirname "$(readlink -f "$0")")"
+cd $dir
+export PATH=$dir/bin:$PATH
+if [ -d "venv" ]; then
+ . venv/bin/activate
+fi
+python3 -m ocgraph.__main__ "$@"
diff --git a/doc/1_Installation.md b/doc/1_Installation.md
new file mode 100644
index 0000000..1e90e8b
--- /dev/null
+++ b/doc/1_Installation.md
@@ -0,0 +1,7 @@
+# Installation
+
+## Dependencies for *asm2cfg*
+
+- Python >=3.6
+- The python `graphviz` for printing the graph, please install them e.g. with
+ `pip`. Preferably use a `virtualenv`.
diff --git a/doc/2_HowToRun.md b/doc/2_HowToRun.md
new file mode 100644
index 0000000..192c301
--- /dev/null
+++ b/doc/2_HowToRun.md
@@ -0,0 +1,41 @@
+# How to Run
+
+## As python module
+
+```cmd
+python3 -m ocgraph -f a.out -d objdump -a sparc -c cov.csv -o a.pdf
+```
+
+## As command line script
+
+```cmd
+./asm2cfg -f a.out -d objdump -a sparc -c cov.csv -o a.pdf
+```
+
+## Custom python script
+
+```python
+from ocgraph.interface.analyzer import Analyzer
+from ocgraph.interface.drawer import Drawer
+from ocgraph.interface.coverage_reader import CoverageReader
+
+from ocgraph.coverage_tracer import CoverageTracer
+from ocgraph.configuration.configuration import CovTraceConfiguration
+
+# Create configuration
+config = OcGraphConfiguration(disassembler="objdump", arch="sparc")
+
+# Read input text
+lines = read_lines("a.out")
+
+# Analyze input text
+analyser = Analyzer(config=config)
+analyser.parse_lines(lines=lines)
+
+# Update analyzed input with coverage data
+cov_reader = CoverageReader(instructions=analyser.instructions config=config)
+cov_reader.update_by_csv(args.coverage)
+
+drawer = Drawer(analyser.configuration)
+drawer.draw_cfg(name=analyser.function_name, basic_blocks=analyser.basic_blocks, output="a.pdf")
+```
diff --git a/doc/3_Development.md b/doc/3_Development.md
new file mode 100644
index 0000000..e1cf37a
--- /dev/null
+++ b/doc/3_Development.md
@@ -0,0 +1,53 @@
+# Development
+
+## Design
+
+```mermaid
+---
+title: OcGraph design
+---
+classDiagram
+
+ class Configuration {
+ __init__(arch, disassembler, logging):
+ +dict disassembler_option
+ +dict architecture_option
+ +dict preset_logging
+ }
+ class Disassembler {
+ Name
+ parse_line()
+ ...()
+ }
+ class Architecture {
+ is_branch()
+ ...()
+ }
+ class Logger { Name }
+
+ Configuration --* Disassembler
+ Configuration --* Architecture
+ Configuration --* Logger
+
+ class Analyzer {
+ __init__(config)
+ parse_file(file_path): basic_blocks
+ }
+ class CoverageReader {
+ __init__(basic_blocks, config)
+ update_by_csv(file_path)
+ }
+ class Drawer {
+ __init__(config)
+ draw_cfg(basic_blocks, output)
+ }
+ class __main__ {
+ main()
+ }
+
+ __main__ --> Configuration
+ __main__ --> Analyzer
+ __main__ --> CoverageReader
+ __main__ --> Drawer
+
+```
diff --git a/doc/4_Github.md b/doc/4_Github.md
new file mode 100644
index 0000000..b3a81ff
--- /dev/null
+++ b/doc/4_Github.md
@@ -0,0 +1,354 @@
+
+# asm2cfg
+
+
+[](https://codecov.io/gh/Kazhuu/asm2cfg)
+
+Python command-line tool and GDB extension to view and save x86, ARM and objdump
+assembly files as control-flow graph (CFG) pdf files. From GDB debugging session
+use `viewcfg` command to view CFG and use `savecfg` command to save it to the
+pdf file.
+
+
+
+
+
+Program has been developed to support X86, ARM and objdump assembly outputs.
+Program is mostly tested with x86 assembly. ARM and objdump formats might not be
+fully supported. If you have any suggestions or find bugs, please open an issue
+or create a pull request. If you want to contribute, check
+[Development](#development) how to get started.
+
+## Table of Content
+
+
+
+* [Install](#install)
+* [Usage From GDB](#usage-from-gdb)
+* [Usage as Standalone](#usage-as-standalone)
+ * [Knowing Function Name](#knowing-function-name)
+ * [Disassemble Function](#disassemble-function)
+ * [Draw CFG](#draw-cfg)
+ * [Examples](#examples)
+* [Development](#development)
+ * [Python Environment](#python-environment)
+ * [Testing](#testing)
+ * [Code Linting](#code-linting)
+ * [Command-Line Interface](#command-line-interface)
+ * [GDB Integration](#gdb-integration)
+ * [Current Development Goals](#current-development-goals)
+
+
+
+## Install
+
+Project can be installed with pip
+
+```
+pip install asm2cfg
+```
+
+To be able to view the dot files from GDB. External dot viewer is required. For
+this purpose [xdot](https://pypi.org/project/xdot/) can be used for example. Any
+other dot viewer will also do. To install this on Debian based distro run
+
+```
+sudo apt install xdot
+```
+
+Or Arch based
+
+```
+sudo pacman -S xdot
+```
+
+To add extension to GDB you need to source the pip installed plugin to it. To
+find where pip placed GDB extension run `which gdb_asm2cfg` or in case if you
+use pyenv use `pyenv which gdb_asm2cfg`. Copy the path to the clipboard.
+
+Then in you home directory if not already add `.gdbinit` file
+and place following line in it and replace path from the earlier step.
+
+```
+source
+```
+
+For example in my Linux machine line end up to be
+
+```
+source ~/.local/bin/gdb_asm2cfg.py
+```
+
+Now when you start GDB no errors should be displayed and you are ready to go.
+
+## Usage From GDB
+
+In GDB session this extension provides command `viewcfg` to view CFG with
+external dot viewer. Command `savecfg` saves the CFG to pdf file to current
+working directory with same name as the function being dumped. Both commands
+disassemble the current execution frame/function when the command is issued. To
+see help for these commands use `help` command like `help viewcfg`.
+
+For example let's view main function from you favorite non-stripped executable.
+First run GDB until main function
+
+```
+gdb -ex 'b main' -ex 'run'
+```
+
+Now run `viewcfg` to view CFG as a dot graph with external editor. Or run `savecfg`
+to save CFG to pdf file named `main.pdf` to current working directory. If
+function is stripped then memory address of the function will used as a name
+instead. For example `0x555555555faf-0x555555557008.pdf`.
+
+If assembly function is very large with a lot of jumps and calls to other
+functions. Then rendering the CFG can take a long time. So be patient or cancel
+rendering with Ctrl-C. To make the rendering faster you can skip function calls
+instructions from splitting the code to more blocks. To set this run `set
+skipcalls on` and then run earlier command again. Note that if function is long
+and has a lot of jumps inside itself, then rendering is still gonna take a long
+time. To have normal behavior again run `set skipcalls off`.
+
+## Usage as Standalone
+
+This method can be used with assembly files saved from ouput of objdump and GDB
+disassembly. Pip installation will come with `asm2cfg` command-line tool for
+this purpose.
+
+To use as standalone script you first need to dump assembly from GDB or objdump
+to the file which is explained below.
+
+### Knowing Function Name
+
+If you don't know the name of function you're looking for then you can also list
+all function names using GDB:
+
+```
+gdb -batch -ex 'b main' -ex r -ex 'info functions' ./test_executable
+```
+
+This will set breakpoint at function `main`, then
+run the program and print symbols from all loaded libraries.
+
+For functions which come from main executable you can avoid running the program
+and simply do
+
+```
+gdb -batch -ex 'info functions' ./test_executable
+```
+
+If you want to narrow the search down you can also use regexp
+
+```
+gdb ... -ex 'info functions ' ...
+```
+
+### Disassemble Function
+
+Once you have the function name, you can produce its disassembly via
+
+```
+gdb -batch -ex 'b main' -ex r -ex 'pipe disassemble test_function | tee test_function.asm' ./test_executable
+```
+
+or
+
+```
+gdb -batch -ex 'set breakpoints pending on' -ex 'b test_function' -ex r -ex 'pipe disassemble | tee test_function.asm' ./test_executable
+```
+
+(the `set breakpoint pending on` command enables pending breakpoints and
+could be added to your `.gdbinit` instead)
+
+For functions from main executable it's enough to do
+
+```
+gdb -batch -ex 'pipe disassemble test_function | tee test_function.asm' ./test_executable
+```
+
+You can also extract function's disassembly from `objdump` output:
+
+```
+objdump -d ./test_executable | sed -ne '/ test_executable.asm
+```
+
+(this may be useful for specific non-native targets which lack GDB support).
+
+### Draw CFG
+
+Now you have the assembly file. Time to turn that to CFG pdf file. Do that by giving it
+to `asm2cfg` command-line tool like so
+
+```
+asm2cfg test_function.asm
+```
+
+Asm2cfg by default expects x86 assembly files. If you want to use ARM assembly files,
+then provide `--target arm` command-line flag.
+
+Above command should output `test_function.pdf` file in the same directory where
+the executable was ran. If the assembly file is stripped then the function
+memory range is used as a name instead. For example
+`0x555555555faf-0x555555557008.pdf`.
+
+To view CFG instead of saving provide `-v` flag. And to skip function calls from
+splitting the code to further blocks provide `-c` flag. To show the help use
+`-h`.
+
+### Examples
+
+Repository includes examples which can be used to test the standalone
+functionality for x86, ARM and objdump.
+
+File `test_function.asm` is non-stripped assembly file and its
+corresponding output `test_function.pdf`.
+
+File `stripped_function.asm` contains
+stripped function and its corresponding output
+`stripped_function.pdf`.
+
+File `att_syntax.asm` is an example of non-stripped AT&T assembly.
+
+File `huge.asm` is a large stripped
+assembly function and its corresponding output `huge.pdf`. This can be used to
+test processing time of big functions.
+
+Files `objdump.asm` and `stripped_objdump.asm` are the regular and stripped
+objdump-based disassemblies of short functions.
+
+File `arm.asm` is ARM based assembly file and its corresponding pdf file is
+`arm.pdf`.
+
+## Development
+
+You want to contribute? You're very welcome to do so! This section will give you
+guidance how to setup development environment and test things locally.
+
+### Python Environment
+
+For development this project manages packages with pipenv. Pipenv is a tool to
+manage Python virtual environments and packages with much less pain compared to
+normal pip and virtualenv usage.
+
+Install pipenv for your system following the guide
+[here](https://pipenv.pypa.io/en/latest/).
+
+After installing pipenv. Create virtual environment and install all required
+packages to it. Run following at project root
+
+```
+pipenv install -d
+```
+
+Now you can activate the virtual environment with
+
+```
+pipenv shell
+```
+
+Now your `python` and `pip` commands will correspond to created virtual environment
+instead of your system's Python installation.
+
+To deactivate the environment, use
+
+```
+exit
+```
+
+### Testing
+
+This project uses [pytest](https://pypi.org/project/pytest/) for testing. Some
+test are written using Python's own unittest testing framework, but they work
+with pytest out of the box. Pytest style is preferred way to write tests.
+
+To run tests from project root, use `pytest` or
+
+```
+pipenv run pytest
+```
+
+During testing dot viewer might be opened if you have it installed. This is
+because GDB integration command `viewcfg` is tested, which will open external
+dot viewer. Just close it after it's opened. It should not affect the test run
+itself.
+
+### Code Linting
+
+Project uses [flake8](https://flake8.pycqa.org/en/latest/) and
+[pylint](https://pylint.org/) for code linting.
+
+To run flake8, use
+
+```
+flake8
+```
+
+And to run pylint use
+
+```
+pylint src test
+```
+
+Both commands should not print any errors.
+
+### Command-Line Interface
+
+To test command-line interface of asm2cfg wihtout installing the package. You
+can execute module directly. For example to print help
+
+```
+python -m src.asm2cfg -h
+```
+
+Standalone method can be used to try out the examples under `examples` folder as
+well. For example following command should generate `main.pdf` file to current
+working directory.
+
+```
+python -m src.asm2cfg -c examples/huge.asm
+```
+
+### GDB Integration
+
+Before testing GDB functionality, make sure asm2cfg is not installed with pip!
+This can lead to GDB using code from pip installed asm2cfg package instead of
+code from this repository!
+
+Also pipenv cannot be used with GDB. You need to install required packages to
+your system's Python pip. This is because your installed GDB is linked against
+system's Python interpreter and will use it, instead of active virtual
+environment. If packages are not installed to your system's pip. You are likely
+to receive following error messages when trying to use asm2cfg with GDB
+
+```
+ModuleNotFoundError: No module named 'graphviz'
+```
+
+To fix this, install required packages to your system's pip without active
+virtual environment. Currently GDB integration only requires graphviz.
+
+```
+pip install graphviz
+```
+
+To use asm2cfg GDB related functionality. Use following line from
+project root.
+
+```
+PYTHONPATH=${PWD}/src gdb -ex 'source src/gdb_asm2cfg.py'
+```
+
+This will set Python import path so that GDB can import code from this
+repository without installing the package. After this you should be able to use
+commands `viewcfg` and `savecfg`.
+
+### Current Development Goals
+
+There are might be cases asm2cfg will not fully support all x86 or ARM assembly
+lines. If you encounter such problems please open an issue.
+
+Current developed goals are best described in issues section. Please open a new
+one if existing one does not exist.
+
+If you want to talk to me, you can contact me at Discord with name
+`Kazhuu#3121`.
diff --git a/images/example.png b/doc/images/example.png
similarity index 100%
rename from images/example.png
rename to doc/images/example.png
diff --git a/images/example.svg b/doc/images/example.svg
similarity index 100%
rename from images/example.svg
rename to doc/images/example.svg
diff --git a/src/asm2cfg/__init__.py b/ocgraph/__init__.py
similarity index 100%
rename from src/asm2cfg/__init__.py
rename to ocgraph/__init__.py
diff --git a/ocgraph/__main__.py b/ocgraph/__main__.py
new file mode 100755
index 0000000..fee92c2
--- /dev/null
+++ b/ocgraph/__main__.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GTDGmbH
+"""
+Let this module be executed from the command line with python -m ocgraph
+from root of the project
+"""
+import argparse
+
+from .interface.analyzer import Analyzer
+from .interface.drawer import Drawer
+from .interface.coverage_reader import CoverageReader
+
+from .configuration.configuration import OcGraphConfiguration
+
+
+def print_assembly(basic_blocks):
+ """Debug function to print the assembly."""
+ for basic_block in basic_blocks.values():
+ print(basic_block)
+
+
+def read_lines(file_path) -> list[str]:
+ """Read lines from the file and return then as a list."""
+ with open(file_path, "r", encoding="utf8") as asm_file:
+ lines = asm_file.readlines()
+ return lines
+
+
+def main():
+ """Command-line entry point to the program."""
+ parser = argparse.ArgumentParser(description="Assembly to Control-Flow-Graph rendering.")
+
+ parser.add_argument(
+ "-f",
+ "--file",
+ help="Disassembled object file",
+ required=True,
+ )
+ parser.add_argument(
+ "-d",
+ "--diss",
+ help="Disassembler option",
+ required=True,
+ choices=OcGraphConfiguration.disassemblers(),
+ )
+ parser.add_argument(
+ "-a",
+ "--arch",
+ help="Architecture option",
+ required=True,
+ choices=OcGraphConfiguration.architectures(),
+ )
+
+ parser.add_argument("-c", "--coverage", help="Coverage file for printing coverage")
+ parser.add_argument("-v", "--view", action="store_true", help="View as a dot graph")
+ parser.add_argument("-o", "--output", help="Target output filename")
+ parser.add_argument(
+ "-l",
+ "--logger",
+ choices=OcGraphConfiguration.loggers(),
+ default="default",
+ help="Logging mechanism preset",
+ )
+ args = parser.parse_args()
+
+ # Create configuration
+ config = OcGraphConfiguration(disassembler=args.diss, arch=args.arch, preset=args.logger)
+
+ lines = read_lines(args.file)
+
+ analyser = Analyzer(config=config)
+ analyser.parse_lines(lines=lines)
+
+ if args.coverage:
+ cov_reader = CoverageReader(instructions=analyser.instructions, config=config)
+ cov_reader.update_by_csv(args.coverage)
+
+ drawer = Drawer(analyser.configuration)
+ drawer.draw_cfg(
+ name=analyser.function_name, basic_blocks=analyser.basic_blocks, output=args.output
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/ocgraph/configuration/__init__.py b/ocgraph/configuration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ocgraph/configuration/architecture/__init__.py b/ocgraph/configuration/architecture/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ocgraph/configuration/architecture/architecture.py b/ocgraph/configuration/architecture/architecture.py
new file mode 100755
index 0000000..511c5a1
--- /dev/null
+++ b/ocgraph/configuration/architecture/architecture.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GTDGmbH
+"""Contains all necessary functions for a TargetInfo class."""
+
+from abc import ABC, abstractmethod
+
+from ...data.instruction import Instruction
+
+
+class Architecture(ABC):
+ """TargetInfo Class defining the target specific instruction set characteristics"""
+
+ def __init__(self):
+ pass
+
+ @abstractmethod
+ def comment(self) -> str:
+ """Return how comments starts in the disassembly"""
+ raise NotImplementedError()
+
+ @abstractmethod
+ def is_call(self, instruction: Instruction) -> bool:
+ """Return if disassembled instruction is a subroutine call"""
+ raise NotImplementedError()
+
+ @abstractmethod
+ def is_unconditional_branch(self, instruction: Instruction) -> bool:
+ """Return if disassembled instruction is an unconditional branch"""
+ raise NotImplementedError()
+
+ def get_branch_delay(self, instruction: Instruction) -> int | None:
+ """Return the branch delay of an instruction or None if not a branch"""
+ return 1 if self.is_branch(instruction) else None
+
+ @abstractmethod
+ def is_direct_branch(self, instruction: Instruction) -> bool:
+ """Return if disassembled instruction is a direct branch"""
+ raise NotImplementedError()
+
+ @abstractmethod
+ def is_branch(self, instruction: Instruction) -> bool:
+ """Return if disassembled instruction is a branch instruction (conditional or unconditional)"""
+ raise NotImplementedError()
+
+ @abstractmethod
+ def is_sink(self, instruction: Instruction) -> bool:
+ """Return if disassembled instruction serves as sink (e.g. ret)"""
+ raise NotImplementedError()
diff --git a/ocgraph/configuration/architecture/arm.py b/ocgraph/configuration/architecture/arm.py
new file mode 100755
index 0000000..920b626
--- /dev/null
+++ b/ocgraph/configuration/architecture/arm.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GTDGmbH
+""" Contains instruction info for ARM-compatible targets. """
+
+import re
+
+from .architecture import Architecture
+from ...data.instruction import Instruction
+
+# Common regexes
+HEX_PATTERN = r"[0-9a-fA-F]+"
+HEX_LONG_PATTERN = r"(?:0x0*)?" + HEX_PATTERN
+
+
+class ArmArchitecture(Architecture):
+ """ArmArchitecture Class"""
+
+ def comment(self):
+ return ";"
+
+ def is_call(self, instruction: Instruction):
+ # Various flavors of call:
+ # bl 0x19d90 <_IO_vtable_check>
+ # Note that we should be careful to not mix it with conditional
+ # branches like 'ble'.
+ return instruction.opcode.startswith("bl") and instruction.opcode not in (
+ "blt",
+ "ble",
+ "bls",
+ )
+
+ def is_branch(self, instruction: Instruction):
+ return instruction.opcode[0] == "b" and not self.is_call(instruction)
+
+ def is_direct_branch(self, instruction: Instruction):
+ return self.is_branch(instruction) and re.match(rf"{HEX_LONG_PATTERN}", instruction.ops[0])
+
+ def is_unconditional_branch(self, instruction: Instruction):
+ return instruction.opcode == "b"
+
+ def is_sink(self, instruction: Instruction):
+ """
+ Is this an instruction which terminates function execution e.g. return?
+ Detect various flavors of return like
+ bx lr
+ pop {r2-r6,pc}
+ Note that we do not consider conditional branches (e.g. 'bxle') to sink.
+ """
+ return (
+ re.search(r"\bpop\b.*\bpc\b", instruction.body)
+ or (instruction.opcode == "bx" and instruction.ops[0] == "lr")
+ or instruction.opcode == "udf"
+ )
diff --git a/ocgraph/configuration/architecture/ppc.py b/ocgraph/configuration/architecture/ppc.py
new file mode 100755
index 0000000..b446120
--- /dev/null
+++ b/ocgraph/configuration/architecture/ppc.py
@@ -0,0 +1,713 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GTDGmbH
+"""Contains instruction info for PPC-compatible targets."""
+
+import re
+
+from .architecture import Architecture
+from ...data.instruction import Instruction
+
+
+# Common regexes
+HEX_PATTERN = r"[0-9a-fA-F]+"
+HEX_LONG_PATTERN = r"(?:0x0*)?" + HEX_PATTERN
+
+# fmt: off
+ppc_call_opcodes = [
+ "bl",
+]
+
+ppc_sink_opcodes = [
+ "blr",
+]
+
+ppc_unconditional_branch_opcodes = [
+ "b", "ba", "bla",
+ "bctr", "bctrl", "blrl",
+]
+
+ppc_conditional_branch_opcodes = [
+ "bdnz-",
+ "bdnz+",
+ "bdnz",
+ "bdn",
+ "bdnzl-",
+ "bdnzl+",
+ "bdnzl",
+ "bdnl",
+ "bdnza-",
+ "bdnza+",
+ "bdnza",
+ "bdna",
+ "bdnzla-",
+ "bdnzla+",
+ "bdnzla",
+ "bdnla",
+ "bdz-",
+ "bdz+",
+ "bdz",
+ "bdzl-",
+ "bdzl+",
+ "bdzl",
+ "bdza-",
+ "bdza+",
+ "bdza",
+ "bdzla-",
+ "bdzla+",
+ "bdzla",
+
+ "bge-",
+ "bge+",
+ "bge",
+ "bnl-",
+ "bnl+",
+ "bnl",
+ "bgel-",
+ "bgel+",
+ "bgel",
+ "bnll-",
+ "bnll+",
+ "bnll",
+ "bgea-",
+ "bgea+",
+ "bgea",
+ "bnla-",
+ "bnla+",
+ "bnla",
+ "bgela-",
+ "bgela+",
+ "bgela",
+ "bnlla-",
+ "bnlla+",
+ "bnlla",
+ "ble-",
+ "ble+",
+ "ble",
+ "bng-",
+ "bng+",
+ "bng",
+ "blel-",
+ "blel+",
+ "blel",
+ "bngl-",
+ "bngl+",
+ "bngl",
+ "blea-",
+ "blea+",
+ "blea",
+ "bnga-",
+ "bnga+",
+ "bnga",
+ "blela-",
+ "blela+",
+ "blela",
+ "bngla-",
+ "bngla+",
+ "bngla",
+ "bne-",
+ "bne+",
+ "bne",
+ "bnel-",
+ "bnel+",
+ "bnel",
+ "bnea-",
+ "bnea+",
+ "bnea",
+ "bnela-",
+ "bnela+",
+ "bnela",
+ "bns-",
+ "bns+",
+ "bns",
+ "bnu-",
+ "bnu+",
+ "bnu",
+ "bnsl-",
+ "bnsl+",
+ "bnsl",
+ "bnul-",
+ "bnul+",
+ "bnul",
+ "bnsa-",
+ "bnsa+",
+ "bnsa",
+ "bnua-",
+ "bnua+",
+ "bnua",
+ "bnsla-",
+ "bnsla+",
+ "bnsla",
+ "bnula-",
+ "bnula+",
+ "bnula",
+
+ "blt-",
+ "blt+",
+ "blt",
+ "bltl-",
+ "bltl+",
+ "bltl",
+ "blta-",
+ "blta+",
+ "blta",
+ "bltla-",
+ "bltla+",
+ "bltla",
+ "bgt-",
+ "bgt+",
+ "bgt",
+ "bgtl-",
+ "bgtl+",
+ "bgtl",
+ "bgta-",
+ "bgta+",
+ "bgta",
+ "bgtla-",
+ "bgtla+",
+ "bgtla",
+ "beq-",
+ "beq+",
+ "beq",
+ "beql-",
+ "beql+",
+ "beql",
+ "beqa-",
+ "beqa+",
+ "beqa",
+ "beqla-",
+ "beqla+",
+ "beqla",
+ "bso-",
+ "bso+",
+ "bso",
+ "bun-",
+ "bun+",
+ "bun",
+ "bsol-",
+ "bsol+",
+ "bsol",
+ "bunl-",
+ "bunl+",
+ "bunl",
+ "bsoa-",
+ "bsoa+",
+ "bsoa",
+ "buna-",
+ "buna+",
+ "buna",
+ "bsola-",
+ "bsola+",
+ "bsola",
+ "bunla-",
+ "bunla+",
+ "bunla",
+
+ "bdnzf-",
+ "bdnzf+",
+ "bdnzf",
+ "bdnzfl-",
+ "bdnzfl+",
+ "bdnzfl",
+ "bdnzfa-",
+ "bdnzfa+",
+ "bdnzfa",
+ "bdnzfla-",
+ "bdnzfla+",
+ "bdnzfla",
+ "bdzf-",
+ "bdzf+",
+ "bdzf",
+ "bdzfl-",
+ "bdzfl+",
+ "bdzfl",
+ "bdzfa-",
+ "bdzfa+",
+ "bdzfa",
+ "bdzfla-",
+ "bdzfla+",
+ "bdzfla",
+
+ "bf-",
+ "bf+",
+ "bf",
+ "bbf",
+ "bfl-",
+ "bfl+",
+ "bfl",
+ "bbfl",
+ "bfa-",
+ "bfa+",
+ "bfa",
+ "bbfa",
+ "bfla-",
+ "bfla+",
+ "bfla",
+ "bbfla",
+
+ "bdnzt-",
+ "bdnzt+",
+ "bdnzt",
+ "bdnztl-",
+ "bdnztl+",
+ "bdnztl",
+ "bdnzta-",
+ "bdnzta+",
+ "bdnzta",
+ "bdnztla-",
+ "bdnztla+",
+ "bdnztla",
+ "bdzt-",
+ "bdzt+",
+ "bdzt",
+ "bdztl-",
+ "bdztl+",
+ "bdztl",
+ "bdzta-",
+ "bdzta+",
+ "bdzta",
+ "bdztla-",
+ "bdztla+",
+ "bdztla",
+
+ "bt-",
+ "bt+",
+ "bt",
+ "bbt",
+ "btl-",
+ "btl+",
+ "btl",
+ "bbtl",
+ "bta-",
+ "bta+",
+ "bta",
+ "bbta",
+ "btla-",
+ "btla+",
+ "btla",
+ "bbtla",
+
+ "bc-",
+ "bc+",
+ "bc",
+ "bcl-",
+ "bcl+",
+ "bcl",
+ "bca-",
+ "bca+",
+ "bca",
+ "bcla-",
+ "bcla+",
+ "bcla",
+
+ "bdnzlr",
+ "bdnzlr-",
+ "bdnzlrl",
+ "bdnzlrl-",
+ "bdnzlr+",
+ "bdnzlrl+",
+ "bdzlr",
+ "bdzlr-",
+ "bdzlrl",
+ "bdzlrl-",
+ "bdzlr+",
+ "bdzlrl+",
+ #"blr",
+ "br",
+ #"blrl",
+ "brl",
+ "bdnzlr-",
+ "bdnzlrl-",
+ "bdnzlr+",
+ "bdnzlrl+",
+ "bdzlr-",
+ "bdzlrl-",
+ "bdzlr+",
+ "bdzlrl+",
+
+ "bgelr",
+ "bgelr-",
+ "bger",
+ "bnllr",
+ "bnllr-",
+ "bnlr",
+ "bgelrl",
+ "bgelrl-",
+ "bgerl",
+ "bnllrl",
+ "bnllrl-",
+ "bnlrl",
+ "blelr",
+ "blelr-",
+ "bler",
+ "bnglr",
+ "bnglr-",
+ "bngr",
+ "blelrl",
+ "blelrl-",
+ "blerl",
+ "bnglrl",
+ "bnglrl-",
+ "bngrl",
+ "bnelr",
+ "bnelr-",
+ "bner",
+ "bnelrl",
+ "bnelrl-",
+ "bnerl",
+ "bnslr",
+ "bnslr-",
+ "bnsr",
+ "bnulr",
+ "bnulr-",
+ "bnslrl",
+ "bnslrl-",
+ "bnsrl",
+ "bnulrl",
+ "bnulrl-",
+ "bgelr+",
+ "bnllr+",
+ "bgelrl+",
+ "bnllrl+",
+ "blelr+",
+ "bnglr+",
+ "blelrl+",
+ "bnglrl+",
+ "bnelr+",
+ "bnelrl+",
+ "bnslr+",
+ "bnulr+",
+ "bnslrl+",
+ "bnulrl+",
+ "bgelr-",
+ "bnllr-",
+ "bgelrl-",
+ "bnllrl-",
+ "blelr-",
+ "bnglr-",
+ "blelrl-",
+ "bnglrl-",
+ "bnelr-",
+ "bnelrl-",
+ "bnslr-",
+ "bnulr-",
+ "bnslrl-",
+ "bnulrl-",
+ "bgelr+",
+ "bnllr+",
+ "bgelrl+",
+ "bnllrl+",
+ "blelr+",
+ "bnglr+",
+ "blelrl+",
+ "bnglrl+",
+ "bnelr+",
+ "bnelrl+",
+ "bnslr+",
+ "bnulr+",
+ "bnslrl+",
+ "bnulrl+",
+ "bltlr",
+ "bltlr-",
+ "bltr",
+ "bltlrl",
+ "bltlrl-",
+ "bltrl",
+ "bgtlr",
+ "bgtlr-",
+ "bgtr",
+ "bgtlrl",
+ "bgtlrl-",
+ "bgtrl",
+ "beqlr",
+ "beqlr-",
+ "beqr",
+ "beqlrl",
+ "beqlrl-",
+ "beqrl",
+ "bsolr",
+ "bsolr-",
+ "bsor",
+ "bunlr",
+ "bunlr-",
+ "bsolrl",
+ "bsolrl-",
+ "bsorl",
+ "bunlrl",
+ "bunlrl-",
+ "bltlr+",
+ "bltlrl+",
+ "bgtlr+",
+ "bgtlrl+",
+ "beqlr+",
+ "beqlrl+",
+ "bsolr+",
+ "bunlr+",
+ "bsolrl+",
+ "bunlrl+",
+ "bltlr-",
+ "bltlrl-",
+ "bgtlr-",
+ "bgtlrl-",
+ "beqlr-",
+ "beqlrl-",
+ "bsolr-",
+ "bunlr-",
+ "bsolrl-",
+ "bunlrl-",
+ "bltlr+",
+ "bltlrl+",
+ "bgtlr+",
+ "bgtlrl+",
+ "beqlr+",
+ "beqlrl+",
+ "bsolr+",
+ "bunlr+",
+ "bsolrl+",
+ "bunlrl+",
+
+ "bdnzflr",
+ "bdnzflr-",
+ "bdnzflrl",
+ "bdnzflrl-",
+ "bdnzflr+",
+ "bdnzflrl+",
+ "bdzflr",
+ "bdzflr-",
+ "bdzflrl",
+ "bdzflrl-",
+ "bdzflr+",
+ "bdzflrl+",
+ "bflr",
+ "bflr-",
+ "bbfr",
+ "bflrl",
+ "bflrl-",
+ "bbfrl",
+ "bflr+",
+ "bflrl+",
+ "bflr-",
+ "bflrl-",
+ "bflr+",
+ "bflrl+",
+ "bdnztlr",
+ "bdnztlr-",
+ "bdnztlrl",
+ "bdnztlrl-",
+ "bdnztlr+",
+ "bdnztlrl+",
+ "bdztlr",
+ "bdztlr-",
+ "bdztlrl",
+ "bdztlrl-",
+ "bdztlr+",
+ "bdztlrl+",
+ "btlr",
+ "btlr-",
+ "bbtr",
+ "btlrl",
+ "btlrl-",
+ "bbtrl",
+ "btlr+",
+ "btlrl+",
+ "btlr-",
+ "btlrl-",
+ "btlr+",
+ "btlrl+",
+
+ "bclr-",
+ "bclrl-",
+ "bclr+",
+ "bclrl+",
+ "bclr",
+ "bcr",
+ "bclrl",
+ "bcrl",
+
+ #"bctr",
+ #"bctrl",
+
+ "bgectr",
+ "bgectr-",
+ "bnlctr",
+ "bnlctr-",
+ "bgectrl",
+ "bgectrl-",
+ "bnlctrl",
+ "bnlctrl-",
+ "blectr",
+ "blectr-",
+ "bngctr",
+ "bngctr-",
+ "blectrl",
+ "blectrl-",
+ "bngctrl",
+ "bngctrl-",
+ "bnectr",
+ "bnectr-",
+ "bnectrl",
+ "bnectrl-",
+ "bnsctr",
+ "bnsctr-",
+ "bnuctr",
+ "bnuctr-",
+ "bnsctrl",
+ "bnsctrl-",
+ "bnuctrl",
+ "bnuctrl-",
+ "bgectr+",
+ "bnlctr+",
+ "bgectrl+",
+ "bnlctrl+",
+ "blectr+",
+ "bngctr+",
+ "blectrl+",
+ "bngctrl+",
+ "bnectr+",
+ "bnectrl+",
+ "bnsctr+",
+ "bnuctr+",
+ "bnsctrl+",
+ "bnuctrl+",
+ "bgectr-",
+ "bnlctr-",
+ "bgectrl-",
+ "bnlctrl-",
+ "blectr-",
+ "bngctr-",
+ "blectrl-",
+ "bngctrl-",
+ "bnectr-",
+ "bnectrl-",
+ "bnsctr-",
+ "bnuctr-",
+ "bnsctrl-",
+ "bnuctrl-",
+ "bgectr+",
+ "bnlctr+",
+ "bgectrl+",
+ "bnlctrl+",
+ "blectr+",
+ "bngctr+",
+ "blectrl+",
+ "bngctrl+",
+ "bnectr+",
+ "bnectrl+",
+ "bnsctr+",
+ "bnuctr+",
+ "bnsctrl+",
+ "bnuctrl+",
+ "bltctr",
+ "bltctr-",
+ "bltctrl",
+ "bltctrl-",
+ "bgtctr",
+ "bgtctr-",
+ "bgtctrl",
+ "bgtctrl-",
+ "beqctr",
+ "beqctr-",
+ "beqctrl",
+ "beqctrl-",
+ "bsoctr",
+ "bsoctr-",
+ "bunctr",
+ "bunctr-",
+ "bsoctrl",
+ "bsoctrl-",
+ "bunctrl",
+ "bunctrl-",
+ "bltctr+",
+ "bltctrl+",
+ "bgtctr+",
+ "bgtctrl+",
+ "beqctr+",
+ "beqctrl+",
+ "bsoctr+",
+ "bunctr+",
+ "bsoctrl+",
+ "bunctrl+",
+ "bltctr-",
+ "bltctrl-",
+ "bgtctr-",
+ "bgtctrl-",
+ "beqctr-",
+ "beqctrl-",
+ "bsoctr-",
+ "bunctr-",
+ "bsoctrl-",
+ "bunctrl-",
+ "bltctr+",
+ "bltctrl+",
+ "bgtctr+",
+ "bgtctrl+",
+ "beqctr+",
+ "beqctrl+",
+ "bsoctr+",
+ "bunctr+",
+ "bsoctrl+",
+ "bunctrl+",
+
+ "bfctr",
+ "bfctr-",
+ "bfctrl",
+ "bfctrl-",
+ "bfctr+",
+ "bfctrl+",
+ "bfctr-",
+ "bfctrl-",
+ "bfctr+",
+ "bfctrl+",
+ "btctr",
+ "btctr-",
+ "btctrl",
+ "btctrl-",
+ "btctr+",
+ "btctrl+",
+ "btctr-",
+ "btctrl-",
+ "btctr+",
+ "btctrl+",
+
+ "bcctr-",
+ "bcctrl-",
+ "bcctr+",
+ "bcctrl+",
+ "bcctr",
+ "bcc",
+ "bcctrl",
+ "bccl",
+
+ "bctar-",
+ "bctarl-",
+ "bctar+",
+ "bctarl+",
+ "bctar",
+ "bctarl",
+]
+# fmt: on
+
+
+class PpcArchitecture(Architecture):
+ """PpcArchitecture Class"""
+
+ def comment(self):
+ return "#"
+
+ def is_call(self, instruction: Instruction):
+ return instruction.opcode in ppc_call_opcodes
+
+ def is_branch(self, instruction: Instruction):
+ return instruction.opcode in (
+ ppc_conditional_branch_opcodes + ppc_unconditional_branch_opcodes
+ ) and not self.is_call(instruction)
+
+ def is_unconditional_branch(self, instruction: Instruction):
+ return instruction.opcode in ppc_unconditional_branch_opcodes
+
+ def is_sink(self, instruction: Instruction):
+ return instruction.opcode in ppc_sink_opcodes
+
+ def is_direct_branch(self, instruction: Instruction):
+ return self.is_branch(instruction) and (
+ re.search(rf"{HEX_LONG_PATTERN}", "|".join(instruction.ops))
+ )
diff --git a/ocgraph/configuration/architecture/sparc.py b/ocgraph/configuration/architecture/sparc.py
new file mode 100755
index 0000000..174577c
--- /dev/null
+++ b/ocgraph/configuration/architecture/sparc.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GTDGmbH
+"""Contains instruction info for Sparc-compatible targets."""
+
+from .architecture import Architecture
+from ...data.instruction import Instruction
+
+
+# fmt: off
+sparc_v8_call_opcodes = [
+ "call",
+]
+
+sparc_v8_sink_opcodes = [
+ # ret: return from subroutine
+ # retl: return from leaf subroutine
+ "ret", "retl",
+]
+
+sparc_v8_Bicc_opcodes = [
+ # conditional icc branch opcodes
+ "ba", "bn", "bne", "be", "bg", "ble", "bge", "bl", "bgu", "bleu", "bcc",
+ "bcs", "bpos", "bneg", "bvc", "bvs",
+]
+
+sparc_v8_FBfcc_opcodes = [
+ # conditional fcc branch opcodes
+ "fba", "fbn", "fbu", "fbg", "fbug", "fbl", "fbul", "fblg", "fbne", "fbe",
+ "fbue", "fbge", "fbuge", "fble", "fbule", "fbo",
+]
+
+sparc_v8_CBfcc_opcodes = [
+ # conditional coprocessor opcodes
+ "cba", "cbn", "cb3", "cb2", "cb23", "cb1", "cb13", "cb12", "cb123", "cb0",
+ "cb03", "cb02", "cb023", "cb01", "cb013", "cb012",
+]
+
+sparc_v8_Ticc_opcodes = [
+ # conditional traps on icc
+ "ta", "tn", "tne", "te", "tg", "tle", "tge", "tl", "tgu", "tleu", "tcc",
+ "tcs", "tpos", "tneg", "tvc", "tvs",
+]
+
+sparc_v8_branch_cond_delay_opcodes = [
+ f"{x},a" for x in
+ sparc_v8_Bicc_opcodes +
+ sparc_v8_FBfcc_opcodes +
+ sparc_v8_CBfcc_opcodes
+]
+
+sparc_v8_unconditional_branch_opcodes = [
+ "jmpl", "jmp", "b", "b,a"
+]
+
+sparc_v8_delayed_opcodes = sparc_v8_Bicc_opcodes + \
+ sparc_v8_FBfcc_opcodes + \
+ sparc_v8_CBfcc_opcodes + \
+ sparc_v8_branch_cond_delay_opcodes + \
+ sparc_v8_unconditional_branch_opcodes
+
+sparc_v8_conditional_branch_opcodes = sparc_v8_Bicc_opcodes + \
+ sparc_v8_FBfcc_opcodes + \
+ sparc_v8_CBfcc_opcodes + \
+ sparc_v8_Ticc_opcodes + \
+ sparc_v8_branch_cond_delay_opcodes
+# fmt: on
+
+
+class SparcArchitecture(Architecture):
+ """SparcArchitecture Class"""
+
+ def comment(self):
+ return "!"
+
+ def is_call(self, instruction: Instruction):
+ return instruction.opcode in sparc_v8_call_opcodes
+
+ def is_branch(self, instruction: Instruction):
+ return instruction.opcode in (
+ sparc_v8_conditional_branch_opcodes + sparc_v8_unconditional_branch_opcodes
+ )
+
+ def get_branch_delay(self, instruction: Instruction) -> int | None:
+ delay = None
+ if instruction.opcode in sparc_v8_delayed_opcodes:
+ delay = 2
+ elif self.is_sink(instruction):
+ delay = 2
+ else:
+ delay = 1
+ return delay
+
+ def is_direct_branch(self, instruction: Instruction):
+ # every branch is disassembled with the complete offset
+ return self.is_branch(instruction)
+
+ def is_unconditional_branch(self, instruction: Instruction):
+ return instruction.opcode in sparc_v8_unconditional_branch_opcodes
+
+ def is_sink(self, instruction: Instruction):
+ return instruction.opcode in sparc_v8_sink_opcodes
diff --git a/ocgraph/configuration/architecture/x86.py b/ocgraph/configuration/architecture/x86.py
new file mode 100755
index 0000000..fb12212
--- /dev/null
+++ b/ocgraph/configuration/architecture/x86.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GTDGmbH
+"""Contains instruction info for X86-compatible targets."""
+
+import re
+
+from .architecture import Architecture
+from ...data.instruction import Instruction
+
+# Common regexes
+HEX_PATTERN = r"[0-9a-fA-F]+"
+HEX_LONG_PATTERN = r"(?:0x0*)?" + HEX_PATTERN
+
+
+class X86Architecture(Architecture):
+ """X86Architecture Class"""
+
+ def comment(self):
+ return "#"
+
+ def is_call(self, instruction: Instruction):
+ # Various flavors of call:
+ # call *0x26a16(%rip)
+ # call 0x555555555542
+ # addr32 call 0x55555558add0
+ return "call" in instruction.opcode
+
+ def is_branch(self, instruction: Instruction):
+ return instruction.opcode[0] == "j"
+
+ def is_direct_branch(self, instruction: Instruction):
+ return self.is_branch(instruction) and re.match(rf"{HEX_LONG_PATTERN}", instruction.ops[0])
+
+ def is_unconditional_branch(self, instruction: Instruction):
+ return instruction.opcode.startswith("jmp")
+
+ def is_sink(self, instruction: Instruction):
+ return instruction.opcode.startswith("ret")
diff --git a/ocgraph/configuration/configuration.py b/ocgraph/configuration/configuration.py
new file mode 100755
index 0000000..34630ef
--- /dev/null
+++ b/ocgraph/configuration/configuration.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GTDGmbH
+"""Module for configuration of the ocgraph package."""
+
+from .logger import OCGraphLogger, logging_preset
+
+from .architecture.architecture import Architecture
+from .architecture.x86 import X86Architecture
+from .architecture.arm import ArmArchitecture
+from .architecture.sparc import SparcArchitecture
+from .architecture.ppc import PpcArchitecture
+
+from .disassembler.disassembler import Disassembler
+from .disassembler.objdump_sparc import ObjDumpSparcDisassembler
+from .disassembler.objdump_ppc import ObjDumpPpcDisassembler
+from .disassembler.gdb_default import GdbDisassembler
+from .disassembler.objdump_x86 import ObjDumpx86Disassembler
+from .disassembler.objdump_arm import ObjDumpArmDisassembler
+
+# fmt: off
+disassembler_option: dict[str, dict] = {
+ "OBJDUMP": {
+ "sparc": ObjDumpSparcDisassembler(),
+ "ppc": ObjDumpPpcDisassembler(),
+ "x86": ObjDumpx86Disassembler(),
+ "arm": ObjDumpArmDisassembler(),
+ },
+ "GDB": {
+ "sparc": GdbDisassembler(),
+ "ppc": GdbDisassembler(),
+ "x86": GdbDisassembler(),
+ "arm": GdbDisassembler(),
+ },
+}
+
+architecture_option: dict[str, dict] = {
+ "x86": {
+ "platform": "X86",
+ "architecture": X86Architecture(),
+ },
+ "arm": {
+ "platform": "ARM",
+ "architecture": ArmArchitecture(),
+ },
+ "sparc": {
+ "platform": "SPARC",
+ "architecture": SparcArchitecture(),
+ },
+ "ppc": {
+ "platform": "PPC",
+ "architecture": PpcArchitecture(),
+ },
+}
+# fmt: on
+
+
+class OcGraphConfiguration:
+ """Implement configuration presets for the ASM2CFG tool."""
+
+ logger: OCGraphLogger
+ """Logging mechanism for module"""
+ architecture: Architecture
+ """Target architecture instance"""
+ disassembler: Disassembler
+ """Target disassembler tool like OBJDump, GDB, ..."""
+
+ def __init__(self, arch: str = "sparc", disassembler: str = "OBJDUMP", preset="default"):
+ if architecture_option.get(arch) is None:
+ raise NotImplementedError("Architecture option not supported!")
+ if disassembler_option.get(disassembler) is None:
+ raise NotImplementedError("Disassembler option not supported!")
+ if logging_preset.get(preset) is None:
+ raise NotImplementedError("Logging preset not supported!")
+
+ # load module preset
+ _preset = architecture_option[arch]
+ _preset["disassembler"] = disassembler_option[disassembler][arch]
+ self.__dict__ = _preset
+
+ # configure logging
+ self.logger = OCGraphLogger("OcGraph", preset, "asm2cfg.log")
+
+ @staticmethod
+ def architectures():
+ """Return all available architectures options"""
+ return architecture_option.keys()
+
+ @staticmethod
+ def disassemblers():
+ """Return all available disassemblers options"""
+ return disassembler_option.keys()
+
+ @staticmethod
+ def loggers():
+ """Return all available disassemblers options"""
+ return logging_preset.keys()
diff --git a/ocgraph/configuration/disassembler/__init__.py b/ocgraph/configuration/disassembler/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ocgraph/configuration/disassembler/disassembler.py b/ocgraph/configuration/disassembler/disassembler.py
new file mode 100755
index 0000000..ff118ea
--- /dev/null
+++ b/ocgraph/configuration/disassembler/disassembler.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+"""Class configuring the used disassembler tool."""
+
+from abc import ABC, abstractmethod
+
+from ..architecture.sparc import SparcArchitecture
+from ..architecture.ppc import PpcArchitecture
+from ...data.instruction import Instruction
+
+
+class DisassemblerError(Exception):
+ """Raised when the extract_information method was not successful."""
+
+
+class Disassembler(ABC):
+ """Disassembler Class"""
+
+ def __init__(self):
+ self.architecture = SparcArchitecture()
+
+ name: str = ""
+ """ Disassembler tool identification like SparcV8Objdump, GDB, ..."""
+
+ @abstractmethod
+ def extract_information(self, str_input: str) -> dict[str, str]:
+ """Specification of the extracted information. Required attributes are:
+ * address = instruction location in the binary
+ * location: instruction address location
+ * instr_d: instruction in disassembled format
+ * instr_h: instruction in hex-notation
+ * opcode: instruction opcode
+ * printable: a printable line of the collected information
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def parse_function_header(self, line: str) -> str | None:
+ """Return function name of memory range from the given string line."""
+ raise NotImplementedError()
+
+ @abstractmethod
+ def parse_line(self, line: str, lineno, function_name: str) -> Instruction | None:
+ """Parses a single line of assembly to create Instruction instance"""
+
+ @abstractmethod
+ def parse_jump_target(self, str_input: str) -> int | None:
+ """Parses a string (e.g., coma separated operands) and returns
+ the jump target value
+ """
diff --git a/ocgraph/configuration/disassembler/gdb_default.py b/ocgraph/configuration/disassembler/gdb_default.py
new file mode 100755
index 0000000..beecbbf
--- /dev/null
+++ b/ocgraph/configuration/disassembler/gdb_default.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+"""Class for parsing the input"""
+
+import re
+from typing import List
+
+from ...data.address import Address
+from ...data.encoding import Encoding
+from ...data.instruction import Instruction
+
+from .disassembler import Disassembler, DisassemblerError
+
+# Common regexes
+HEX_PATTERN = r"[0-9a-fA-F]+"
+HEX_LONG_PATTERN = r"(?:0x0*)?" + HEX_PATTERN
+
+
+class GdbDisassembler(Disassembler):
+ """x86 GDB disassembler"""
+
+ name: str = "Default GDB Disassembler (x86 Binutils)"
+
+ # Expected format: <