Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/prerelease.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@ permissions:
contents: read

jobs:
versioning:
name: Update Version
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Run TinySemVer
uses: ashvardanian/tinysemver@v2.1.1
with:
verbose: "true"
version-file: "VERSION"
update-version-in: |
CMakeLists.txt:VERSION (\d+\.\d+\.\d+)
update-major-version-in: |
libsee.c:^#define LIBSEE_VERSION_MAJOR (\d+)
update-minor-version-in: |
libsee.c:^#define LIBSEE_VERSION_MINOR (\d+)
update-patch-version-in: |
libsee.c:^#define LIBSEE_VERSION_PATCH (\d+)
dry-run: "true"

test_ubuntu_gcc:
name: Ubuntu (GCC)
runs-on: ubuntu-22.04
Expand Down
28 changes: 21 additions & 7 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,31 @@ permissions:

jobs:
versioning:
name: Semantic Release
runs-on: ubuntu-22.04
name: Update Version
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Set up Node.js
uses: actions/setup-node@v4
- name: Run TinySemVer
uses: ashvardanian/tinysemver@v2.1.1
with:
node-version: 20
- run: npm install --ignore-scripts --save-dev --prefix ./package-ci @semantic-release/exec @semantic-release/git conventional-changelog-eslint semantic-release && npx --prefix ./package-ci semantic-release
verbose: "true"
version-file: "VERSION"
update-version-in: |
CMakeLists.txt:VERSION (\d+\.\d+\.\d+)
update-major-version-in: |
libsee.c:^#define LIBSEE_VERSION_MAJOR (\d+)
update-minor-version-in: |
libsee.c:^#define LIBSEE_VERSION_MINOR (\d+)
update-patch-version-in: |
libsee.c:^#define LIBSEE_VERSION_PATCH (\d+)
dry-run: "false"
push: "true"
create-release: "true"
github-token: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}

rebase:
name: Rebase Dev. Branch
Expand Down
7 changes: 0 additions & 7 deletions .github/workflows/update_version.sh

This file was deleted.

17 changes: 15 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
{
"files.associations": {
"string.h": "c",
"cstdio": "c"
}
"cstdio": "c",
"wchar.h": "c"
},
"cSpell.words": [
"BLAS",
"dlsym",
"dlvsym",
"hsearch",
"LAPACK",
"libsee",
"memcpy",
"memset",
"PCRE",
"tsearch"
]
}
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.14)
project(libsee
project(libsee
VERSION 1.0.2
DESCRIPTION "See where you use LibC the most. Trace calls failing tests. Then - roast!")

Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ To compile the library, run:
```bash
cmake -B build_release
cmake --build build_release --config Release
test -e build_release/libsee.so && echo "Success" || echo "Failure"
test -e build_release/libsee.* && echo "Success" || echo "Failure"
```

Want to try it out? Here's how to use it:
Expand Down
17 changes: 12 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ To download and compile the script and run your favorite query:
gcc -g -O2 -fno-builtin -fPIC -nostdlib -nostartfiles -shared -o libsee.so libsee.c
```

LibSee overrides LibC symbols using `LD_PRELOAD`, profiling the most commonly used functions, and, optionally, fuzzing their behaviour for testing.
LibSee overrides LibC symbols using `LD_PRELOAD`, profiling the most commonly used functions, and, optionally, fuzzing their behavior for testing.
The library yields a few binaries when compiled:

```bash
libsee.so # Profiles LibC calls
libsee_and_knee.so # Correct LibC behaviour, but fuzzed!
libsee_and_knee.so # Correct LibC behavior, but fuzzed!
```

## Tricks Used
Expand All @@ -25,9 +25,10 @@ There are several things worth knowing, that came handy implementing this.
- One way to implement this library would be to override the `_start` symbols, but implementing correct loading sequence for a binary is tricky, so I use conventional `dlsym` to lookup the symbols on first function invocation.
- On `x86_64` architecture, the `rdtscp` instruction yields both the CPU cycle and also the unique identifier of the core. Very handy if you are profiling a multi-threaded application.
- Once the unloading sequence reaches `libsee.so`, the `STDOUT` is already closed. So if you want to print to the console, you may want to reopen the `/dev/tty` device before printing usage stats.
- Calling convention for system calls on Aarch64 and x86 differs significantly. On Aarch64 I use the [generalized `openat`](https://github.com/torvalds/linux/blob/bf3a69c6861ff4dc7892d895c87074af7bc1c400/include/uapi/asm-generic/unistd.h#L158-L159) with opcode 56. On [x86 it's opcode 2](https://github.com/torvalds/linux/blob/0dd3ee31125508cd67f7e7172247f05b7fd1753a/arch/x86/entry/syscalls/syscall_64.tbl#L13).
- On MacOS the `sprintf`, `vsprintf`, `snprintf`, `vsnprintf` are macros. You have to `#undef` them.
- On `Release` builds compilers love replacing your code with `memset` and `memcpy` calls. As the symbol can't be found from inside LibSee, it will `SEGFAULT` so don't forget to disable such optimizations for built-ins `-fno-builtin`.
- Aarch64 doesn't seem to have an `open` system call, but it [has the generalized `openat`](https://github.com/torvalds/linux/blob/bf3a69c6861ff4dc7892d895c87074af7bc1c400/include/uapi/asm-generic/unistd.h#L158-L159) number 56.
- No symbol versioning is implemented, vanilla `dlsym` is used over the `dlvsym`.

## Coverage

Expand All @@ -39,14 +40,20 @@ Feel free to suggest PRs covering the rest:
- [x] [algorithms](https://en.cppreference.com/w/c/algorithm)
- [x] [date and time](https://en.cppreference.com/w/c/chrono)
- [x] [input/output](https://en.cppreference.com/w/c/io)
- [x] [wide-character strings](https://en.cppreference.com/w/c/string/wide)
- [ ] [concurrency and atomics](https://en.cppreference.com/w/c/thread)
- [ ] retrieving error numbers
- [ ] [numerics](https://en.cppreference.com/w/c/numeric)
- [ ] [wide-character strings](https://en.cppreference.com/w/c/string/wide)
- [ ] [multibyte strings](https://en.cppreference.com/w/c/string/multibyte)
- [ ] [multi-byte strings](https://en.cppreference.com/w/c/string/multibyte)
- [ ] [wide-character IO](https://en.cppreference.com/w/c/io)
- [ ] [localization](https://en.cppreference.com/w/c/locale)
- [ ] anything newer than C 11

There are a few other C libraries that most of the world reuses, rather than implementing from scratch in other languages:

- [ ] BLAS and LAPACK
- [ ] PCRE RegEx
- [ ] `hsearch`, `tsearch`, and pattern matching [extensions](https://ftp.gnu.org/old-gnu/Manuals/glibc-2.2.3/html_node/libc_toc.html)

[Program support](https://en.cppreference.com/w/c/program) utilities aren't intended.

95 changes: 67 additions & 28 deletions libsee.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ typedef int errno_t;
typedef size_t rsize_t;
#endif

#define LIBSEE_MAX_SYMBOLS 97

/**
* @brief Contains the number of times each function was called.
*
Expand Down Expand Up @@ -113,6 +115,10 @@ typedef union thread_local_counters {
size_t memmem;
size_t memrchr;

size_t wcstombs;
size_t wcswidth;
size_t wcwidth;

size_t malloc;
size_t calloc;
size_t realloc;
Expand Down Expand Up @@ -179,7 +185,7 @@ typedef union thread_local_counters {
size_t mktime;
} named;

size_t indexed[94];
size_t indexed[LIBSEE_MAX_SYMBOLS];
} thread_local_counters;

#pragma region Function Pointers
Expand Down Expand Up @@ -219,6 +225,10 @@ typedef errno_t (*api_strerror_s_t)(char *buf, rsize_t bufsz, errno_t errnum);
typedef void *(*api_memmem_t)(void const *haystack, size_t haystacklen, void const *needle, size_t needlelen);
typedef void *(*api_memrchr_t)(void const *s, int c, size_t n);

typedef size_t (*api_wcstombs_t)(char *dest, wchar_t const *src, size_t max);
typedef int (*api_wcswidth_t)(wchar_t const *wcs, size_t n);
typedef int (*api_wcwidth_t)(wchar_t wc);

typedef void *(*api_malloc_t)(size_t);
typedef void *(*api_calloc_t)(size_t, size_t);
typedef void *(*api_realloc_t)(void *, size_t);
Expand Down Expand Up @@ -345,6 +355,10 @@ typedef struct real_apis {
api_memmem_t memmem;
api_memrchr_t memrchr;

api_wcstombs_t wcstombs;
api_wcswidth_t wcswidth;
api_wcwidth_t wcwidth;

api_malloc_t malloc;
api_calloc_t calloc;
api_realloc_t realloc;
Expand Down Expand Up @@ -466,25 +480,27 @@ void syscall_print(char const *buf, size_t count) {
// The system call number is passed in x8, and the arguments are in x0, x1, and x2.
long syscall_write = (long)64; // System call number for write in AArch64 Linux
long file_descriptor = (long)1;
asm volatile("mov x0, %1\n" // First argument: file descriptor
"mov x1, %2\n" // Second argument: buffer address
"mov x2, %3\n" // Third argument: buffer size
"mov x8, %4\n" // System call number: SYS_write (64)
"svc #0\n" // Make the system call
"mov %0, x0" // Store the return value
: "=r"(ret)
: "r"(file_descriptor), "r"(buf), "r"((long)count), "r"(syscall_write)
: "x0", "x1", "x2", "x8", "memory");
asm volatile( //
"mov x0, %1\n" // First argument: file descriptor
"mov x1, %2\n" // Second argument: buffer address
"mov x2, %3\n" // Third argument: buffer size
"mov x8, %4\n" // System call number: SYS_write (64)
"svc #0\n" // Make the system call
"mov %0, x0" // Store the return value
: "=r"(ret)
: "r"(file_descriptor), "r"(buf), "r"((long)count), "r"(syscall_write)
: "x0", "x1", "x2", "x8", "memory");
#elif defined(__x86_64__) || defined(__i386__)
// Inline assembly syntax for making a system call in x86-64 Linux.
// Uses the syscall instruction, passing the system call number in rax,
// and the call arguments in rdi, rsi, and rdx, respectively.
long syscall_write = (long)1; // System call number for write in x86-64 Linux
unsigned int file_descriptor = (unsigned int)1;
asm volatile("syscall"
: "=a"(ret)
: "a"(syscall_write), "D"(file_descriptor), "S"(buf), "d"(count)
: "rcx", "r11", "memory");
asm volatile( //
"syscall"
: "=a"(ret)
: "a"(syscall_write), "D"(file_descriptor), "S"(buf), "d"(count)
: "rcx", "r11", "memory");
(void)ret;
#endif
(void)buf;
Expand Down Expand Up @@ -525,18 +541,20 @@ void reopen_stdout(void) {
void close_stdout(void) {
long ret;
#ifdef __aarch64__
asm volatile("mov x0, 1\n" // File descriptor for stdout
"mov x8, 57\n" // Syscall number for 'close' in AArch64
"svc #0\n"
"mov %0, x0"
: "=r"(ret)
: // No inputs besides the syscall number and FD
: "x0", "x8", "memory");
asm volatile( //
"mov x0, 1\n" // File descriptor for stdout
"mov x8, 57\n" // Syscall number for 'close' in AArch64
"svc #0\n"
"mov %0, x0"
: "=r"(ret)
: // No inputs besides the syscall number and FD
: "x0", "x8", "memory");
#elif defined(__x86_64__)
asm volatile("syscall"
: "=a"(ret)
: "a"(3), "D"(1) // Inputs: syscall number for 'close', FD for stdout
: "rcx", "r11", "memory");
asm volatile( //
"syscall"
: "=a"(ret)
: "a"(3), "D"(1) // Inputs: syscall number for 'close', FD for stdout
: "rcx", "r11", "memory");
#endif
(void)ret;
}
Expand Down Expand Up @@ -779,6 +797,20 @@ libsee_export void *memrchr(void const *s, int c, size_t n) { libsee_return(memr

#pragma endregion

#pragma region Wide Characters // Contents of `wchar.h`

#include <wchar.h>

libsee_export size_t wcstombs(char *dst, wchar_t const *src, size_t len) {
libsee_return(wcstombs, size_t, dst, src, len);
}

libsee_export int wcwidth(wchar_t c) { libsee_return(wcwidth, int, c); }

libsee_export int wcswidth(wchar_t const *s, size_t n) { libsee_return(wcswidth, int, s, n); }

#pragma endregion

#pragma region Numerics // Contents of `stdlib.h`

libsee_export void srand(unsigned seed) { libsee_noreturn(srand, seed); }
Expand Down Expand Up @@ -1150,11 +1182,12 @@ typedef struct libsee_name_stats {

void libsee_initialize(void) {

// Initialize all the counters to zeros, without using `memset`
size_t *counters = libsee_thread_cycles[0].indexed;
// Initialize all the cycles to zeros, without using `memset`
size_t *cycles = libsee_thread_cycles[0].indexed;
size_t *calls = libsee_thread_calls[0].indexed;
size_t total_counters_per_thread = sizeof(thread_local_counters) / sizeof(size_t);
size_t total_counters_across_threads = LIBSEE_MAX_THREADS * total_counters_per_thread;
for (size_t i = 0; i < total_counters_across_threads; i++) { counters[i] = 0; }
for (size_t i = 0; i < total_counters_across_threads; i++) cycles[i] = calls[i] = 0;

// Load the symbols from the underlying implementation
real_apis *apis = &libsee_apis;
Expand Down Expand Up @@ -1184,6 +1217,10 @@ void libsee_initialize(void) {
apis->memmem = (api_memmem_t)dlsym(RTLD_NEXT, "memmem");
apis->memrchr = (api_memrchr_t)dlsym(RTLD_NEXT, "memrchr");

apis->wcstombs = (api_wcstombs_t)dlsym(RTLD_NEXT, "wcstombs");
apis->wcswidth = (api_wcswidth_t)dlsym(RTLD_NEXT, "wcswidth");
apis->wcwidth = (api_wcwidth_t)dlsym(RTLD_NEXT, "wcwidth");

apis->malloc = (api_malloc_t)dlsym(RTLD_NEXT, "malloc");
apis->calloc = (api_calloc_t)dlsym(RTLD_NEXT, "calloc");
apis->realloc = (api_realloc_t)dlsym(RTLD_NEXT, "realloc");
Expand Down Expand Up @@ -1358,6 +1395,8 @@ void libsee_finalize(void) {
{"strspn"}, {"strcspn"}, {"strpbrk"}, {"strstr"}, {"strtok"}, {"strtok_s"}, {"memchr"}, {"memcmp"}, {"memset"},
{"memset_s"}, {"memcpy"}, {"memcpy_s"}, {"memmove"}, {"memmove_s"}, {"strerror"}, {"strerror_s"}, {"memmem"},
{"memrchr"},
// Wide strings
{"wcstombs"}, {"mbstowcs"}, {"mbrtowc"},
// Heap
{"malloc"}, {"calloc"}, {"realloc"}, {"free"}, {"aligned_alloc"},
// Algorithms
Expand Down
10 changes: 0 additions & 10 deletions package-ci.json

This file was deleted.