From 21ffd6a4df43c2e1fe679312fc618a8d5b48d4b3 Mon Sep 17 00:00:00 2001 From: Ylarod Date: Sun, 5 Apr 2026 12:24:51 +0800 Subject: [PATCH] kernel: trim non-exported symbols (#3397) --- .github/workflows/ddk-lkm.yml | 4 +- kernel/.gitignore | 3 + kernel/Makefile | 13 +- kernel/build-all.sh | 3 +- kernel/tools/gen_keep_globals.c | 445 +++++++++++++++++++++++++++++ kernel/tools/localize_symbols.sh | 120 ++++++++ kernel/tools/trim_local_symnames.c | 284 ++++++++++++++++++ 7 files changed, 865 insertions(+), 7 deletions(-) create mode 100644 kernel/tools/gen_keep_globals.c create mode 100755 kernel/tools/localize_symbols.sh create mode 100644 kernel/tools/trim_local_symnames.c diff --git a/.github/workflows/ddk-lkm.yml b/.github/workflows/ddk-lkm.yml index 0b1b43f155c1..2d42384e118e 100644 --- a/.github/workflows/ddk-lkm.yml +++ b/.github/workflows/ddk-lkm.yml @@ -58,7 +58,7 @@ jobs: if [ -n "$EXPECTED_SIZE2" ]; then EXTRA_ARGS="KSU_EXPECTED_SIZE2=$EXPECTED_SIZE2 KSU_EXPECTED_HASH2=$EXPECTED_HASH2" fi - CONFIG_KSU=m CC=clang make $EXTRA_ARGS + CONFIG_KSU=m CC=clang make release $EXTRA_ARGS echo "=== Build completed ===" @@ -72,8 +72,6 @@ jobs: echo "Copied to: /github/workspace/out/$OUTPUT_NAME" ls -la "/github/workspace/out/$OUTPUT_NAME" echo "Size: $(du -h "/github/workspace/out/$OUTPUT_NAME" | cut -f1)" - llvm-strip -d "/github/workspace/out/$OUTPUT_NAME" - echo "Size after stripping: $(du -h "/github/workspace/out/$OUTPUT_NAME" | cut -f1)" - name: Upload kernelsu.ko artifact uses: actions/upload-artifact@v7 diff --git a/kernel/.gitignore b/kernel/.gitignore index b661888af7c6..3c8c6223bc01 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -21,4 +21,7 @@ CLAUDE.md .ddk-version .vscode/settings.json check_symbol +gen_keep_globals +trim_local_symnames ksu_uapi.h +.codex diff --git a/kernel/Makefile b/kernel/Makefile index 068476e00ce0..8c0b25f9f1cc 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -4,19 +4,28 @@ MDIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) $(info -- KDIR: $(KDIR)) $(info -- MDIR: $(MDIR)) -.PHONY: all compdb clean format check-format +.PHONY: all release tools compdb clean format check-format all: check_symbol make -C $(KDIR) M=$(MDIR) modules -j$(shell nproc) ./check_symbol kernelsu.ko $(KDIR)/vmlinux +release: all tools + ./tools/localize_symbols.sh kernelsu.ko kernelsu.ko + +tools: check_symbol gen_keep_globals trim_local_symnames + compdb: python3 $(MDIR)/.vscode/generate_compdb.py -O $(KDIR) $(MDIR) clean: make -C $(KDIR) M=$(MDIR) clean - rm check_symbol + rm -f check_symbol gen_keep_globals trim_local_symnames check_symbol: tools/check_symbol.c $(CC) tools/check_symbol.c -o check_symbol +gen_keep_globals: tools/gen_keep_globals.c + $(CC) -O2 -Wall -Wextra -std=c11 tools/gen_keep_globals.c -o gen_keep_globals +trim_local_symnames: tools/trim_local_symnames.c + $(CC) -O2 -Wall -Wextra -std=c11 tools/trim_local_symnames.c -o trim_local_symnames format: find . \( -name "*.c" -o -name "*.h" \) -print0 | xargs -0 clang-format -i check-format: diff --git a/kernel/build-all.sh b/kernel/build-all.sh index 3eef2614e415..7e2c030526b1 100755 --- a/kernel/build-all.sh +++ b/kernel/build-all.sh @@ -10,10 +10,9 @@ mv .ddk-version .ddk-version.bak || true for kmi in $KMIS; do echo "========== Building $kmi ==========" export DDK_TARGET=$kmi - if ddk build -e CONFIG_KSU=m; then + if ddk build -e CONFIG_KSU=m -- release; then if [ -f kernelsu.ko ]; then cp kernelsu.ko "kernelsu-${kmi}.ko" - llvm-objcopy --strip-unneeded --discard-locals "kernelsu-${kmi}.ko" echo "✓ Built kernelsu-${kmi}.ko" fi else diff --git a/kernel/tools/gen_keep_globals.c b/kernel/tools/gen_keep_globals.c new file mode 100644 index 000000000000..0f16f825c9d5 --- /dev/null +++ b/kernel/tools/gen_keep_globals.c @@ -0,0 +1,445 @@ +#define _XOPEN_SOURCE 700 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct { + char **items; + size_t len; + size_t cap; +} strset_t; + +typedef struct { + void *data; + size_t size; + int fd; + Elf64_Ehdr *ehdr; + Elf64_Shdr *shdr; +} elf_file_t; + +static strset_t g_keep; + +static void die(const char *msg) +{ + fprintf(stderr, "error: %s\n", msg); + exit(1); +} + +static void die_perror(const char *msg) +{ + fprintf(stderr, "error: %s: %s\n", msg, strerror(errno)); + exit(1); +} + +static void strset_init(strset_t *s) +{ + memset(s, 0, sizeof(*s)); +} + +static void strset_free(strset_t *s) +{ + for (size_t i = 0; i < s->len; i++) { + free(s->items[i]); + } + free(s->items); + memset(s, 0, sizeof(*s)); +} + +static bool strset_contains(const strset_t *s, const char *v) +{ + for (size_t i = 0; i < s->len; i++) { + if (strcmp(s->items[i], v) == 0) { + return true; + } + } + return false; +} + +static void strset_add(strset_t *s, const char *v) +{ + if (!v || v[0] == '\0' || strset_contains(s, v)) { + return; + } + if (s->len == s->cap) { + size_t ncap = s->cap ? s->cap * 2 : 64; + char **n = realloc(s->items, ncap * sizeof(*n)); + if (!n) { + die("out of memory"); + } + s->items = n; + s->cap = ncap; + } + s->items[s->len] = strdup(v); + if (!s->items[s->len]) { + die("out of memory"); + } + s->len++; +} + +static int cmp_strptr(const void *a, const void *b) +{ + const char *const *sa = a; + const char *const *sb = b; + return strcmp(*sa, *sb); +} + +static bool ends_with(const char *s, const char *suffix) +{ + size_t a = strlen(s); + size_t b = strlen(suffix); + if (a < b) { + return false; + } + return strcmp(s + a - b, suffix) == 0; +} + +static bool is_ident_start(char c) +{ + return c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + +static bool is_ident_char(char c) +{ + return is_ident_start(c) || (c >= '0' && c <= '9'); +} + +static char *read_whole_file(const char *path, size_t *out_size) +{ + int fd = open(path, O_RDONLY); + if (fd < 0) { + return NULL; + } + struct stat st; + if (fstat(fd, &st) < 0 || st.st_size < 0) { + close(fd); + return NULL; + } + size_t sz = (size_t)st.st_size; + char *buf = malloc(sz + 1); + if (!buf) { + close(fd); + return NULL; + } + size_t off = 0; + while (off < sz) { + ssize_t n = read(fd, buf + off, sz - off); + if (n <= 0) { + free(buf); + close(fd); + return NULL; + } + off += (size_t)n; + } + close(fd); + buf[sz] = '\0'; + if (out_size) { + *out_size = sz; + } + return buf; +} + +static void parse_export_macros(const char *buf, size_t sz, strset_t *out) +{ + size_t i = 0; + while (i < sz) { + const char *needle = "EXPORT_SYMBOL"; + size_t nlen = strlen(needle); + + if (i + nlen >= sz || strncmp(buf + i, needle, nlen) != 0) { + i++; + continue; + } + if (i > 0 && is_ident_char(buf[i - 1])) { + i++; + continue; + } + + size_t j = i + nlen; + if (j + 4 <= sz && strncmp(buf + j, "_GPL", 4) == 0) { + j += 4; + } + while (j < sz && isspace((unsigned char)buf[j])) { + j++; + } + if (j >= sz || buf[j] != '(') { + i++; + continue; + } + j++; + while (j < sz && isspace((unsigned char)buf[j])) { + j++; + } + if (j >= sz || !is_ident_start(buf[j])) { + i++; + continue; + } + size_t st = j++; + while (j < sz && is_ident_char(buf[j])) { + j++; + } + size_t len = j - st; + while (j < sz && isspace((unsigned char)buf[j])) { + j++; + } + if (j >= sz || buf[j] != ')' || len == 0 || len >= 512) { + i++; + continue; + } + char sym[512]; + memcpy(sym, buf + st, len); + sym[len] = '\0'; + strset_add(out, sym); + i = j + 1; + } +} + +static int collect_src_cb(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) +{ + (void)sb; + (void)ftwbuf; + if (typeflag != FTW_F) { + return 0; + } + if (!ends_with(fpath, ".c") && !ends_with(fpath, ".h")) { + return 0; + } + size_t sz = 0; + char *buf = read_whole_file(fpath, &sz); + if (!buf) { + return 0; + } + parse_export_macros(buf, sz, &g_keep); + free(buf); + return 0; +} + +static void collect_exported_from_source(const char *src_root, strset_t *keep) +{ + g_keep = *keep; + if (nftw(src_root, collect_src_cb, 16, FTW_PHYS) != 0) { + die_perror("nftw"); + } + *keep = g_keep; +} + +static bool elf_open_ro(const char *path, elf_file_t *elf) +{ + memset(elf, 0, sizeof(*elf)); + elf->fd = open(path, O_RDONLY); + if (elf->fd < 0) { + return false; + } + struct stat st; + if (fstat(elf->fd, &st) < 0) { + close(elf->fd); + return false; + } + elf->size = (size_t)st.st_size; + elf->data = mmap(NULL, elf->size, PROT_READ, MAP_PRIVATE, elf->fd, 0); + if (elf->data == MAP_FAILED) { + close(elf->fd); + return false; + } + elf->ehdr = (Elf64_Ehdr *)elf->data; + if (memcmp(elf->ehdr->e_ident, ELFMAG, SELFMAG) != 0 || elf->ehdr->e_ident[EI_CLASS] != ELFCLASS64) { + munmap(elf->data, elf->size); + close(elf->fd); + return false; + } + elf->shdr = (Elf64_Shdr *)((char *)elf->data + elf->ehdr->e_shoff); + return true; +} + +static void elf_close(elf_file_t *elf) +{ + if (!elf || !elf->data) { + return; + } + munmap(elf->data, elf->size); + close(elf->fd); + memset(elf, 0, sizeof(*elf)); +} + +static Elf64_Shdr *find_symtab(elf_file_t *elf) +{ + for (int i = 0; i < elf->ehdr->e_shnum; i++) { + if (elf->shdr[i].sh_type == SHT_SYMTAB) { + return &elf->shdr[i]; + } + } + return NULL; +} + +static void collect_exported_from_elf(const char *ko_path, strset_t *keep) +{ + elf_file_t elf; + if (!elf_open_ro(ko_path, &elf)) { + die("failed to open ELF"); + } + Elf64_Shdr *symtab = find_symtab(&elf); + if (!symtab) { + elf_close(&elf); + die("no .symtab"); + } + Elf64_Shdr *strsec = &elf.shdr[symtab->sh_link]; + Elf64_Sym *syms = (Elf64_Sym *)((char *)elf.data + symtab->sh_offset); + const char *strtab = (const char *)elf.data + strsec->sh_offset; + size_t n = symtab->sh_size / sizeof(Elf64_Sym); + for (size_t i = 0; i < n; i++) { + if (syms[i].st_name == 0) { + continue; + } + const char *name = strtab + syms[i].st_name; + if (strncmp(name, "__ksymtab_", 10) == 0 && name[10] != '\0') { + strset_add(keep, name + 10); + } + } + elf_close(&elf); +} + +static void split_csv_keep(strset_t *keep, const char *csv) +{ + if (!csv || csv[0] == '\0') { + return; + } + const char *p = csv; + while (*p) { + while (*p == ',' || isspace((unsigned char)*p)) { + p++; + } + const char *s = p; + while (*p && *p != ',') { + p++; + } + const char *e = p; + while (e > s && isspace((unsigned char)e[-1])) { + e--; + } + if (e > s) { + size_t len = (size_t)(e - s); + if (len < 512) { + char tmp[512]; + memcpy(tmp, s, len); + tmp[len] = '\0'; + strset_add(keep, tmp); + } + } + if (*p == ',') { + p++; + } + } +} + +static void add_keep_file(strset_t *keep, const char *path) +{ + FILE *fp = fopen(path, "r"); + if (!fp) { + die_perror("open keep file"); + } + char line[1024]; + while (fgets(line, sizeof(line), fp)) { + char *hash = strchr(line, '#'); + if (hash) { + *hash = '\0'; + } + char *p = line; + while (*p && isspace((unsigned char)*p)) { + p++; + } + char *e = p + strlen(p); + while (e > p && isspace((unsigned char)e[-1])) { + *--e = '\0'; + } + if (*p) { + strset_add(keep, p); + } + } + fclose(fp); +} + +static void usage(const char *argv0) +{ + fprintf(stderr, + "Usage: %s [options]\n" + "Options:\n" + " --src-root \n" + " --keep \n" + " --keep-file \n" + " --no-default-keep\n" + " --out (default stdout)\n", + argv0); +} + +int main(int argc, char *argv[]) +{ + if (argc < 2) { + usage(argv[0]); + return 1; + } + const char *input_ko = argv[1]; + const char *src_root = "."; + const char *extra_keep = NULL; + const char *keep_file = NULL; + const char *out_path = NULL; + bool default_keep = true; + + for (int i = 2; i < argc; i++) { + if (strcmp(argv[i], "--src-root") == 0 && i + 1 < argc) { + src_root = argv[++i]; + } else if (strcmp(argv[i], "--keep") == 0 && i + 1 < argc) { + extra_keep = argv[++i]; + } else if (strcmp(argv[i], "--keep-file") == 0 && i + 1 < argc) { + keep_file = argv[++i]; + } else if (strcmp(argv[i], "--no-default-keep") == 0) { + default_keep = false; + } else if (strcmp(argv[i], "--out") == 0 && i + 1 < argc) { + out_path = argv[++i]; + } else { + usage(argv[0]); + return 1; + } + } + + strset_t keep; + strset_init(&keep); + collect_exported_from_elf(input_ko, &keep); + collect_exported_from_source(src_root, &keep); + if (default_keep) { + strset_add(&keep, "init_module"); + strset_add(&keep, "cleanup_module"); + strset_add(&keep, "__this_module"); + } + split_csv_keep(&keep, extra_keep); + if (keep_file) { + add_keep_file(&keep, keep_file); + } + + qsort(keep.items, keep.len, sizeof(char *), cmp_strptr); + + FILE *out = stdout; + if (out_path) { + out = fopen(out_path, "w"); + if (!out) { + strset_free(&keep); + die_perror("open output"); + } + } + for (size_t i = 0; i < keep.len; i++) { + fprintf(out, "%s\n", keep.items[i]); + } + if (out_path) { + fclose(out); + } + strset_free(&keep); + return 0; +} diff --git a/kernel/tools/localize_symbols.sh b/kernel/tools/localize_symbols.sh new file mode 100755 index 000000000000..3f6a25d91ed5 --- /dev/null +++ b/kernel/tools/localize_symbols.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'USAGE' +Usage: + tools/localize_symbols.sh [options] + +Options: + --keep + --keep-file + --no-default-keep + --no-anon-locals +USAGE +} + +if [[ $# -lt 2 ]]; then + usage + exit 1 +fi + +in_ko="$1" +out_ko="$2" +shift 2 + +extra_keep="" +extra_keep_file="" +no_default_keep=0 +anon_locals=1 +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +repo_root="$(cd "${script_dir}/.." && pwd)" +gen_bin="${repo_root}/gen_keep_globals" +trim_bin="${repo_root}/trim_local_symnames" + +while [[ $# -gt 0 ]]; do + case "$1" in + --keep) + extra_keep="$2" + shift 2 + ;; + --keep-file) + extra_keep_file="$2" + shift 2 + ;; + --no-default-keep) + no_default_keep=1 + shift + ;; + --no-anon-locals) + anon_locals=0 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown arg: $1" >&2 + usage + exit 1 + ;; + esac +done + +if [[ ! -f "$in_ko" ]]; then + echo "input file not found: $in_ko" >&2 + exit 1 +fi + +if ! command -v llvm-objcopy >/dev/null 2>&1; then + echo "llvm-objcopy not found" >&2 + exit 1 +fi + +if [[ ! -x "${gen_bin}" || ! -x "${trim_bin}" ]]; then + echo "missing tools: ${gen_bin} or ${trim_bin}" >&2 + echo "run: make tools" >&2 + exit 1 +fi + +tmp_keep="$(mktemp /tmp/ksu-keep.XXXXXX)" +tmp_out="" +trap 'rm -f "$tmp_keep" "${tmp_out}"' EXIT + +gen_args=("$in_ko" "--out" "$tmp_keep") +if [[ -n "$extra_keep" ]]; then + gen_args+=("--keep" "$extra_keep") +fi +if [[ -n "$extra_keep_file" ]]; then + gen_args+=("--keep-file" "$extra_keep_file") +fi +if [[ "$no_default_keep" -eq 1 ]]; then + gen_args+=("--no-default-keep") +fi + +"${gen_bin}" "${gen_args[@]}" + +work_ko="$out_ko" +if [[ "$in_ko" == "$out_ko" ]]; then + tmp_out="$(mktemp /tmp/ksu-localize.XXXXXX.ko)" + cp -f "$in_ko" "$tmp_out" + work_ko="$tmp_out" +else + cp -f "$in_ko" "$work_ko" +fi + +llvm-objcopy --keep-global-symbols="$tmp_keep" "$work_ko" +llvm-objcopy --strip-unneeded "$work_ko" + +if [[ "$anon_locals" -eq 1 ]]; then + "${trim_bin}" "$work_ko" --keep-file "$tmp_keep" +fi + +if [[ "$in_ko" == "$out_ko" ]]; then + mv -f "$work_ko" "$out_ko" + tmp_out="" +fi + +echo "final stats:" +readelf -Ws "$out_ko" | awk 'NR>3{all++; if($5=="LOCAL")l++; if($5=="GLOBAL")g++; if($5=="WEAK")w++} END{printf("all=%d local=%d global=%d weak=%d\n",all,l,g,w)}' diff --git a/kernel/tools/trim_local_symnames.c b/kernel/tools/trim_local_symnames.c new file mode 100644 index 000000000000..597ebf00395b --- /dev/null +++ b/kernel/tools/trim_local_symnames.c @@ -0,0 +1,284 @@ +#define _XOPEN_SOURCE 700 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct { + char **items; + size_t len; + size_t cap; +} strset_t; + +typedef struct { + void *data; + size_t size; + int fd; + Elf64_Ehdr *ehdr; + Elf64_Shdr *shdr; +} elf_file_t; + +static void die(const char *msg) +{ + fprintf(stderr, "error: %s\n", msg); + exit(1); +} + +static void die_perror(const char *msg) +{ + fprintf(stderr, "error: %s: %s\n", msg, strerror(errno)); + exit(1); +} + +static void strset_init(strset_t *s) +{ + memset(s, 0, sizeof(*s)); +} + +static void strset_free(strset_t *s) +{ + for (size_t i = 0; i < s->len; i++) { + free(s->items[i]); + } + free(s->items); + memset(s, 0, sizeof(*s)); +} + +static bool strset_contains(const strset_t *s, const char *v) +{ + for (size_t i = 0; i < s->len; i++) { + if (strcmp(s->items[i], v) == 0) { + return true; + } + } + return false; +} + +static void strset_add(strset_t *s, const char *v) +{ + if (!v || v[0] == '\0' || strset_contains(s, v)) { + return; + } + if (s->len == s->cap) { + size_t ncap = s->cap ? s->cap * 2 : 64; + char **n = realloc(s->items, ncap * sizeof(*n)); + if (!n) { + die("out of memory"); + } + s->items = n; + s->cap = ncap; + } + s->items[s->len] = strdup(v); + if (!s->items[s->len]) { + die("out of memory"); + } + s->len++; +} + +static void add_keep_file(strset_t *keep, const char *path) +{ + FILE *fp = fopen(path, "r"); + if (!fp) { + die_perror("open keep file"); + } + char line[1024]; + while (fgets(line, sizeof(line), fp)) { + char *hash = strchr(line, '#'); + if (hash) { + *hash = '\0'; + } + char *p = line; + while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') { + p++; + } + char *e = p + strlen(p); + while (e > p && (e[-1] == ' ' || e[-1] == '\t' || e[-1] == '\n' || e[-1] == '\r')) { + *--e = '\0'; + } + if (*p) { + strset_add(keep, p); + } + } + fclose(fp); +} + +static bool elf_open_rw(const char *path, elf_file_t *elf) +{ + memset(elf, 0, sizeof(*elf)); + elf->fd = open(path, O_RDWR); + if (elf->fd < 0) { + return false; + } + struct stat st; + if (fstat(elf->fd, &st) < 0) { + close(elf->fd); + return false; + } + elf->size = (size_t)st.st_size; + elf->data = mmap(NULL, elf->size, PROT_READ | PROT_WRITE, MAP_SHARED, elf->fd, 0); + if (elf->data == MAP_FAILED) { + close(elf->fd); + return false; + } + elf->ehdr = (Elf64_Ehdr *)elf->data; + if (memcmp(elf->ehdr->e_ident, ELFMAG, SELFMAG) != 0 || elf->ehdr->e_ident[EI_CLASS] != ELFCLASS64) { + munmap(elf->data, elf->size); + close(elf->fd); + return false; + } + elf->shdr = (Elf64_Shdr *)((char *)elf->data + elf->ehdr->e_shoff); + return true; +} + +static void elf_close(elf_file_t *elf) +{ + if (!elf || !elf->data) { + return; + } + msync(elf->data, elf->size, MS_SYNC); + munmap(elf->data, elf->size); + close(elf->fd); + memset(elf, 0, sizeof(*elf)); +} + +static Elf64_Shdr *find_symtab(elf_file_t *elf) +{ + for (int i = 0; i < elf->ehdr->e_shnum; i++) { + if (elf->shdr[i].sh_type == SHT_SYMTAB) { + return &elf->shdr[i]; + } + } + return NULL; +} + +static void usage(const char *argv0) +{ + fprintf(stderr, + "Usage: %s [--keep-file ]\n" + "Anonymize names of defined LOCAL symbols and compact .strtab\n", + argv0); +} + +int main(int argc, char *argv[]) +{ + if (argc < 2) { + usage(argv[0]); + return 1; + } + const char *target = argv[1]; + const char *keep_file = NULL; + for (int i = 2; i < argc; i++) { + if (strcmp(argv[i], "--keep-file") == 0 && i + 1 < argc) { + keep_file = argv[++i]; + } else { + usage(argv[0]); + return 1; + } + } + + strset_t keep; + strset_init(&keep); + if (keep_file) { + add_keep_file(&keep, keep_file); + } + + elf_file_t elf; + if (!elf_open_rw(target, &elf)) { + strset_free(&keep); + die("failed to open target ELF"); + } + + Elf64_Shdr *symtab = find_symtab(&elf); + if (!symtab) { + elf_close(&elf); + strset_free(&keep); + die("no .symtab found"); + } + Elf64_Shdr *strsec = &elf.shdr[symtab->sh_link]; + Elf64_Sym *syms = (Elf64_Sym *)((char *)elf.data + symtab->sh_offset); + char *strtab = (char *)elf.data + strsec->sh_offset; + size_t sym_count = symtab->sh_size / sizeof(Elf64_Sym); + size_t str_size = strsec->sh_size; + size_t old_str_size = str_size; + + size_t anonymized = 0; + for (size_t i = 0; i < sym_count; i++) { + unsigned bind = ELF64_ST_BIND(syms[i].st_info); + unsigned type = ELF64_ST_TYPE(syms[i].st_info); + if (bind != STB_LOCAL || syms[i].st_name == 0 || syms[i].st_shndx == SHN_UNDEF || type == STT_FILE || + type == STT_SECTION) { + continue; + } + const char *name = strtab + syms[i].st_name; + if (name[0] == '\0' || strset_contains(&keep, name)) { + continue; + } + syms[i].st_name = 0; + anonymized++; + } + + uint32_t *map = malloc((str_size ? str_size : 1) * sizeof(uint32_t)); + char *newtab = malloc(str_size ? str_size : 1); + if (!map || !newtab) { + free(map); + free(newtab); + elf_close(&elf); + strset_free(&keep); + die("out of memory"); + } + for (size_t i = 0; i < str_size; i++) { + map[i] = UINT32_MAX; + } + size_t new_size = 1; + newtab[0] = '\0'; + if (str_size > 0) { + map[0] = 0; + } + + for (size_t i = 0; i < sym_count; i++) { + uint32_t old = syms[i].st_name; + if (old == 0) { + continue; + } + if (old >= str_size) { + syms[i].st_name = 0; + continue; + } + if (map[old] != UINT32_MAX) { + syms[i].st_name = map[old]; + continue; + } + size_t max = str_size - old; + size_t len = strnlen(strtab + old, max); + if (len == max || new_size + len + 1 > str_size) { + syms[i].st_name = 0; + continue; + } + map[old] = (uint32_t)new_size; + memcpy(newtab + new_size, strtab + old, len + 1); + syms[i].st_name = (uint32_t)new_size; + new_size += len + 1; + } + + memcpy(strtab, newtab, new_size); + if (new_size < str_size) { + memset(strtab + new_size, 0, str_size - new_size); + } + strsec->sh_size = new_size; + + printf("trimmed: anonymized=%zu strtab=%zu->%zu\n", anonymized, old_str_size, new_size); + + free(map); + free(newtab); + elf_close(&elf); + strset_free(&keep); + return 0; +}