From 83484a5b454e49853c05d7cc0fc01d4a2b8a2761 Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Fri, 7 Mar 2025 14:09:14 +0530 Subject: [PATCH 01/15] updating to v4 for artifact actions --- .github/workflows/upload-pypi.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/upload-pypi.yml b/.github/workflows/upload-pypi.yml index 1448d00cd..d8a25932b 100644 --- a/.github/workflows/upload-pypi.yml +++ b/.github/workflows/upload-pypi.yml @@ -25,7 +25,7 @@ jobs: container: ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm20 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build IR2Vec run: bash Manylinux2014_Compliant_Source/pkg/build.sh @@ -33,7 +33,7 @@ jobs: - name: Build sdist run: cd Manylinux2014_Compliant_Source/pkg && pipx run build --sdist - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: path: Manylinux2014_Compliant_Source/pkg/dist/*.tar.gz From 9f9022fcedba885844c5ea843f3f995147d5ba37 Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Fri, 7 Mar 2025 19:06:02 +0530 Subject: [PATCH 02/15] adding version 20 for clang and clang++ From 3e131f5d1d60095969c1d07219d866282e443dea Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Fri, 7 Mar 2025 19:35:44 +0530 Subject: [PATCH 03/15] test install yum packages in build.sh --- Manylinux2014_Compliant_Source/pkg/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Manylinux2014_Compliant_Source/pkg/build.sh b/Manylinux2014_Compliant_Source/pkg/build.sh index c252eb7b8..d9a4e4f02 100644 --- a/Manylinux2014_Compliant_Source/pkg/build.sh +++ b/Manylinux2014_Compliant_Source/pkg/build.sh @@ -4,6 +4,8 @@ set -eu export CXX=clang++ export CC=clang +yum -y install gcc gcc-c++ glibc-devel glibc-static libgcc glibc glibc-devel libstdc++-devel + rm -rf build || true mkdir build cd build From 0a15d9e278aac9d04fb19c55d78cc908177d8749 Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Sat, 8 Mar 2025 00:50:35 +0530 Subject: [PATCH 04/15] manylinux_2_28 addition to Dockerfile --- .devcontainer/devcontainer.json | 2 +- .github/workflows/upload-pypi.yml | 2 +- .github/workflows/wheel.yml | 2 +- Manylinux2014_Compliant_Source/.gitignore | 1 - .../manylinux-llvm/Dockerfile | 45 ++++++++++--------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 2ebd5ed16..64d6ce1c5 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,7 +3,7 @@ { "name": "LLVM Manylinux", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile - "image": "ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm20" + "image": "ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86_2_28_llvm20" // Features to add to the dev container. More info: https://containers.dev/features. // "features": {}, diff --git a/.github/workflows/upload-pypi.yml b/.github/workflows/upload-pypi.yml index d8a25932b..14b37d82e 100644 --- a/.github/workflows/upload-pypi.yml +++ b/.github/workflows/upload-pypi.yml @@ -22,7 +22,7 @@ jobs: build_sdist: runs-on: ubuntu-latest - container: ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm20 + container: ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86_2_28_llvm20 steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index e050fe9aa..acdca4ac0 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -20,7 +20,7 @@ jobs: env: CIBW_SKIP: "pp* *-musllinux_*" CIBW_ARCHS: "x86_64" - CIBW_MANYLINUX_X86_64_IMAGE: "ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm20" + CIBW_MANYLINUX_X86_64_IMAGE: "ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86_2_28_llvm20" CIBW_BEFORE_ALL: "bash Manylinux2014_Compliant_Source/pkg/build.sh" CIBW_TEST_REQUIRES: pytest CIBW_TEST_COMMAND: "pytest {project}/Manylinux2014_Compliant_Source/pkg/tests" diff --git a/Manylinux2014_Compliant_Source/.gitignore b/Manylinux2014_Compliant_Source/.gitignore index 8a7b2a565..e69de29bb 100644 --- a/Manylinux2014_Compliant_Source/.gitignore +++ b/Manylinux2014_Compliant_Source/.gitignore @@ -1 +0,0 @@ -manylinux2014-LLVM/ diff --git a/Manylinux2014_Compliant_Source/manylinux-llvm/Dockerfile b/Manylinux2014_Compliant_Source/manylinux-llvm/Dockerfile index 97c9a6624..cbde416b2 100644 --- a/Manylinux2014_Compliant_Source/manylinux-llvm/Dockerfile +++ b/Manylinux2014_Compliant_Source/manylinux-llvm/Dockerfile @@ -1,7 +1,8 @@ -FROM quay.io/pypa/manylinux2014_x86_64 as builder +FROM quay.io/pypa/manylinux_2_28_x86_64 AS builder LABEL maintainer="Shamil K (noteness@riseup.net)" RUN yum -y install cmake wget openssl-devel +RUN yum -y install gcc gcc-c++ glibc-devel glibc-static libgcc glibc glibc-devel libstdc++-devel RUN mkdir /root/destdir @@ -10,36 +11,36 @@ ARG CMAKE_VERSION="3.26.4" RUN wget -q "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz" \ && tar -xf "cmake-${CMAKE_VERSION}.tar.gz" WORKDIR "/root/cmake/cmake-${CMAKE_VERSION}" -RUN cmake -DCMAKE_BUILD_TYPE=Release . \ - && make -j "$(nproc)" \ - && cmake --install . \ - && cmake --install . --prefix /root/destdir +RUN cmake -DCMAKE_BUILD_TYPE=Release . +RUN make -j "$(nproc)" +RUN cmake --install . +RUN cmake --install . --prefix /root/destdir WORKDIR /root/ninja -ARG NINJA_VERSION="1.11.1" +ARG NINJA_VERSION="1.12.1" RUN wget -q "https://github.com/ninja-build/ninja/archive/refs/tags/v${NINJA_VERSION}.tar.gz" \ && tar -xf "v${NINJA_VERSION}.tar.gz" WORKDIR "/root/ninja/ninja-${NINJA_VERSION}" -RUN cmake -DCMAKE_BUILD_TYPE=Release -B build \ - && cmake --build build -j"$(nproc)" \ - && cmake --install build \ - && cmake --install build --prefix /root/destdir +RUN cmake -DCMAKE_BUILD_TYPE=Release -B build +RUN cmake --build build -j"$(nproc)" +RUN cmake --install build +RUN cmake --install build --prefix /root/destdir WORKDIR /root/mold -ARG MOLD_VERSION="1.11.0" +ARG MOLD_VERSION="2.37.0" RUN wget -q "https://github.com/rui314/mold/archive/refs/tags/v${MOLD_VERSION}.tar.gz" \ && tar -xf "v${MOLD_VERSION}.tar.gz" WORKDIR "/root/mold/mold-${MOLD_VERSION}" -RUN cmake -DCMAKE_BUILD_TYPE=Release -G Ninja -B build \ - && cmake --build build -j"$(nproc)" \ - && cmake --install build \ - && cmake --install build --prefix /root/destdir +RUN cmake -DCMAKE_BUILD_TYPE=Release -G Ninja -B build +RUN cmake --build build -j"$(nproc)" +RUN cmake --install build +RUN cmake --install build --prefix /root/destdir WORKDIR /root/llvm ARG LLVM_VERSION="20.1.0" -RUN wget -q "https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVM_VERSION}-rc2/llvm-project-${LLVM_VERSION}-rc2.src.tar.xz" \ - && tar -xf "llvm-project-${LLVM_VERSION}-rc2.src.tar.xz" -WORKDIR /root/llvm/llvm-project-${LLVM_VERSION}-rc2.src/build +RUN wget -q "https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVM_VERSION}/llvm-project-${LLVM_VERSION}.src.tar.xz" \ + && tar -xf "llvm-project-${LLVM_VERSION}.src.tar.xz" +WORKDIR /root/llvm/llvm-project-${LLVM_VERSION}.src/build RUN cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_PROJECTS='clang' \ -DCLANG_ENABLE_BOOTSTRAP=On \ @@ -49,11 +50,11 @@ RUN cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DBOOTSTRAP_LLVM_USE_SPLIT_DWARF=On \ -DBOOTSTRAP_LLVM_USE_LINKER=mold \ -DCMAKE_INSTALL_PREFIX=/root/destdir \ - ../llvm \ - && ninja stage2 \ - && ninja stage2-install + ../llvm +RUN ninja stage2 -j "$(nproc)" +RUN ninja stage2-install -j "$(nproc)" -FROM quay.io/pypa/manylinux2014_x86_64 +FROM quay.io/pypa/manylinux_2_28_x86_64 COPY --from=builder /root/destdir /usr/local/ COPY entrypoint /usr/local/bin/entrypoint From 39e7dcb60d93dd09df844734f79a08e02d968890 Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Sat, 8 Mar 2025 00:53:37 +0530 Subject: [PATCH 05/15] nit - precommit format --- Manylinux2014_Compliant_Source/manylinux-llvm/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Manylinux2014_Compliant_Source/manylinux-llvm/Dockerfile b/Manylinux2014_Compliant_Source/manylinux-llvm/Dockerfile index cbde416b2..4a596174f 100644 --- a/Manylinux2014_Compliant_Source/manylinux-llvm/Dockerfile +++ b/Manylinux2014_Compliant_Source/manylinux-llvm/Dockerfile @@ -51,7 +51,7 @@ RUN cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DBOOTSTRAP_LLVM_USE_LINKER=mold \ -DCMAKE_INSTALL_PREFIX=/root/destdir \ ../llvm -RUN ninja stage2 -j "$(nproc)" +RUN ninja stage2 -j "$(nproc)" RUN ninja stage2-install -j "$(nproc)" FROM quay.io/pypa/manylinux_2_28_x86_64 From 8790cf0e1ffba61639379ffbfaa0b2bb8d03235e Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 8 Mar 2025 01:07:21 +0530 Subject: [PATCH 06/15] test commit - test_ir2vec --- .../pkg/tests/test_ir2vec.py | 190 +++++++++--------- 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py index 19165c480..3ee8a4d10 100644 --- a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py +++ b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py @@ -120,131 +120,131 @@ def test_fa_p(): assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) -def test_sym_p(): - p_vectors = [] - for file in ll_files: - full_path = str((TEST_SUITE_DIR / file).resolve()).strip() +# def test_sym_p(): +# p_vectors = [] +# for file in ll_files: +# full_path = str((TEST_SUITE_DIR / file).resolve()).strip() - initObj = ir2vec.initEmbedding(full_path, "sym", "p") - assert initObj is not None +# initObj = ir2vec.initEmbedding(full_path, "sym", "p") +# assert initObj is not None - progVector1 = ir2vec.getProgramVector(initObj) - assert_valid_progVector(progVector1) +# progVector1 = ir2vec.getProgramVector(initObj) +# assert_valid_progVector(progVector1) - progVector2 = initObj.getProgramVector() - assert_valid_progVector(progVector2) +# progVector2 = initObj.getProgramVector() +# assert_valid_progVector(progVector2) - instVecList = ir2vec.getInstructionVectors(initObj) - assert_valid_instructionVectors(instVecList) +# instVecList = ir2vec.getInstructionVectors(initObj) +# assert_valid_instructionVectors(instVecList) - instVecList2 = initObj.getInstructionVectors() - assert_valid_instructionVectors(instVecList2) +# instVecList2 = initObj.getInstructionVectors() +# assert_valid_instructionVectors(instVecList2) - for idx, vec in enumerate(progVector1): - assert vec == pytest.approx(progVector2[idx], abs=ABS_ACCURACY) +# for idx, vec in enumerate(progVector1): +# assert vec == pytest.approx(progVector2[idx], abs=ABS_ACCURACY) - p_vectors.append(progVector1) +# p_vectors.append(progVector1) - print(TEST_SUITE_DIR) - p_vectors_oracle = read_p_file( - TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_p" / "ir2vec.txt" - ) +# print(TEST_SUITE_DIR) +# p_vectors_oracle = read_p_file( +# TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_p" / "ir2vec.txt" +# ) - for idx, vec in enumerate(p_vectors_oracle): - assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) +# for idx, vec in enumerate(p_vectors_oracle): +# assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) -def test_fa_f(): - f_vecs = defaultdict(dict) - for file in ll_files: - path = (TEST_SUITE_DIR / file).resolve() - full_path = str(path).strip() +# def test_fa_f(): +# f_vecs = defaultdict(dict) +# for file in ll_files: +# path = (TEST_SUITE_DIR / file).resolve() +# full_path = str(path).strip() - initObj = ir2vec.initEmbedding(full_path, "fa", "f", 300) - assert initObj is not None +# initObj = ir2vec.initEmbedding(full_path, "fa", "f", 300) +# assert initObj is not None - functionVectorMap = ir2vec.getFunctionVectors(initObj) - assert_valid_functionVector(functionVectorMap) +# functionVectorMap = ir2vec.getFunctionVectors(initObj) +# assert_valid_functionVector(functionVectorMap) - functionVectorMap2 = initObj.getFunctionVectors() - assert_valid_functionVector(functionVectorMap2) +# functionVectorMap2 = initObj.getFunctionVectors() +# assert_valid_functionVector(functionVectorMap2) - for fun, funcObj in functionVectorMap.items(): - assert fun == funcObj["demangledName"] +# for fun, funcObj in functionVectorMap.items(): +# assert fun == funcObj["demangledName"] - f_vecs[path.name.strip()][fun] = funcObj["vector"] +# f_vecs[path.name.strip()][fun] = funcObj["vector"] - functionOutput1 = ir2vec.getFunctionVectors( - initObj, - funcObj["actualName"], - ) - assert_valid_functionVector(functionOutput1) +# functionOutput1 = ir2vec.getFunctionVectors( +# initObj, +# funcObj["actualName"], +# ) +# assert_valid_functionVector(functionOutput1) - functionOutput2 = initObj.getFunctionVectors(funcObj["actualName"]) - assert_valid_functionVector(functionOutput2) +# functionOutput2 = initObj.getFunctionVectors(funcObj["actualName"]) +# assert_valid_functionVector(functionOutput2) - assert functionOutput1[fun]["vector"] == pytest.approx( - functionOutput2[fun]["vector"], abs=ABS_ACCURACY - ) +# assert functionOutput1[fun]["vector"] == pytest.approx( +# functionOutput2[fun]["vector"], abs=ABS_ACCURACY +# ) - assert funcObj["vector"] == pytest.approx( - functionOutput1[fun]["vector"], abs=ABS_ACCURACY - ) +# assert funcObj["vector"] == pytest.approx( +# functionOutput1[fun]["vector"], abs=ABS_ACCURACY +# ) - print(TEST_SUITE_DIR) - f_vecs_oracle = read_f_file( - TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_f" / "ir2vec.txt" - ) - for pname, funs in f_vecs_oracle.items(): - for fname, vec in funs.items(): - assert vec == pytest.approx( - f_vecs[pname][fname], abs=ABS_ACCURACY - ), f"Checking {pname}: {fname}" +# print(TEST_SUITE_DIR) +# f_vecs_oracle = read_f_file( +# TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_f" / "ir2vec.txt" +# ) +# for pname, funs in f_vecs_oracle.items(): +# for fname, vec in funs.items(): +# assert vec == pytest.approx( +# f_vecs[pname][fname], abs=ABS_ACCURACY +# ), f"Checking {pname}: {fname}" -def test_sym_f(): - f_vecs = defaultdict(dict) - for file in ll_files: - path = (TEST_SUITE_DIR / file).resolve() - full_path = str(path).strip() +# def test_sym_f(): +# f_vecs = defaultdict(dict) +# for file in ll_files: +# path = (TEST_SUITE_DIR / file).resolve() +# full_path = str(path).strip() - initObj = ir2vec.initEmbedding(full_path, "sym", "f") - assert initObj is not None +# initObj = ir2vec.initEmbedding(full_path, "sym", "f") +# assert initObj is not None - functionVectorMap = ir2vec.getFunctionVectors(initObj) - assert_valid_functionVector(functionVectorMap) +# functionVectorMap = ir2vec.getFunctionVectors(initObj) +# assert_valid_functionVector(functionVectorMap) - functionVectorMap2 = initObj.getFunctionVectors() - assert_valid_functionVector(functionVectorMap2) +# functionVectorMap2 = initObj.getFunctionVectors() +# assert_valid_functionVector(functionVectorMap2) - for fun, funcObj in functionVectorMap.items(): - assert fun == funcObj["demangledName"] +# for fun, funcObj in functionVectorMap.items(): +# assert fun == funcObj["demangledName"] - f_vecs[path.name.strip()][fun] = funcObj["vector"] +# f_vecs[path.name.strip()][fun] = funcObj["vector"] - functionOutput1 = ir2vec.getFunctionVectors( - initObj, - funcObj["actualName"], - ) - assert_valid_functionVector(functionOutput1) +# functionOutput1 = ir2vec.getFunctionVectors( +# initObj, +# funcObj["actualName"], +# ) +# assert_valid_functionVector(functionOutput1) - functionOutput2 = initObj.getFunctionVectors(funcObj["actualName"]) - assert_valid_functionVector(functionOutput2) +# functionOutput2 = initObj.getFunctionVectors(funcObj["actualName"]) +# assert_valid_functionVector(functionOutput2) - assert functionOutput1[fun]["vector"] == pytest.approx( - functionOutput2[fun]["vector"], abs=ABS_ACCURACY - ) +# assert functionOutput1[fun]["vector"] == pytest.approx( +# functionOutput2[fun]["vector"], abs=ABS_ACCURACY +# ) - assert funcObj["vector"] == pytest.approx( - functionOutput1[fun]["vector"], abs=ABS_ACCURACY - ) +# assert funcObj["vector"] == pytest.approx( +# functionOutput1[fun]["vector"], abs=ABS_ACCURACY +# ) - print(TEST_SUITE_DIR) - f_vecs_oracle = read_f_file( - TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_f" / "ir2vec.txt" - ) - for pname, funs in f_vecs_oracle.items(): - for fname, vec in funs.items(): - assert vec == pytest.approx( - f_vecs[pname][fname], abs=ABS_ACCURACY - ), f"Checking {pname}: {fname}" +# print(TEST_SUITE_DIR) +# f_vecs_oracle = read_f_file( +# TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_f" / "ir2vec.txt" +# ) +# for pname, funs in f_vecs_oracle.items(): +# for fname, vec in funs.items(): +# assert vec == pytest.approx( +# f_vecs[pname][fname], abs=ABS_ACCURACY +# ), f"Checking {pname}: {fname}" From 9848999528a6ab106f76e7b4e695d9fa0aa66b9b Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 8 Mar 2025 01:12:54 +0530 Subject: [PATCH 07/15] debug commit - test sym p added for testing --- .../pkg/tests/test_ir2vec.py | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py index 3ee8a4d10..039bbe4b8 100644 --- a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py +++ b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py @@ -120,38 +120,38 @@ def test_fa_p(): assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) -# def test_sym_p(): -# p_vectors = [] -# for file in ll_files: -# full_path = str((TEST_SUITE_DIR / file).resolve()).strip() +def test_sym_p(): + p_vectors = [] + for file in ll_files: + full_path = str((TEST_SUITE_DIR / file).resolve()).strip() -# initObj = ir2vec.initEmbedding(full_path, "sym", "p") -# assert initObj is not None + initObj = ir2vec.initEmbedding(full_path, "sym", "p") + assert initObj is not None -# progVector1 = ir2vec.getProgramVector(initObj) -# assert_valid_progVector(progVector1) + progVector1 = ir2vec.getProgramVector(initObj) + assert_valid_progVector(progVector1) -# progVector2 = initObj.getProgramVector() -# assert_valid_progVector(progVector2) + progVector2 = initObj.getProgramVector() + assert_valid_progVector(progVector2) -# instVecList = ir2vec.getInstructionVectors(initObj) -# assert_valid_instructionVectors(instVecList) + instVecList = ir2vec.getInstructionVectors(initObj) + assert_valid_instructionVectors(instVecList) -# instVecList2 = initObj.getInstructionVectors() -# assert_valid_instructionVectors(instVecList2) + instVecList2 = initObj.getInstructionVectors() + assert_valid_instructionVectors(instVecList2) -# for idx, vec in enumerate(progVector1): -# assert vec == pytest.approx(progVector2[idx], abs=ABS_ACCURACY) + for idx, vec in enumerate(progVector1): + assert vec == pytest.approx(progVector2[idx], abs=ABS_ACCURACY) -# p_vectors.append(progVector1) + p_vectors.append(progVector1) -# print(TEST_SUITE_DIR) -# p_vectors_oracle = read_p_file( -# TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_p" / "ir2vec.txt" -# ) + print(TEST_SUITE_DIR) + p_vectors_oracle = read_p_file( + TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_p" / "ir2vec.txt" + ) -# for idx, vec in enumerate(p_vectors_oracle): -# assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) + for idx, vec in enumerate(p_vectors_oracle): + assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) # def test_fa_f(): From 0efb0c04c7b69e005f69bc7d8131dc89385490b2 Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 8 Mar 2025 01:17:57 +0530 Subject: [PATCH 08/15] debug commit - added test fa f added to test_ir2vec --- .../pkg/tests/test_ir2vec.py | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py index 039bbe4b8..bcf3a6fb0 100644 --- a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py +++ b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py @@ -154,52 +154,52 @@ def test_sym_p(): assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) -# def test_fa_f(): -# f_vecs = defaultdict(dict) -# for file in ll_files: -# path = (TEST_SUITE_DIR / file).resolve() -# full_path = str(path).strip() +def test_fa_f(): + f_vecs = defaultdict(dict) + for file in ll_files: + path = (TEST_SUITE_DIR / file).resolve() + full_path = str(path).strip() -# initObj = ir2vec.initEmbedding(full_path, "fa", "f", 300) -# assert initObj is not None + initObj = ir2vec.initEmbedding(full_path, "fa", "f", 300) + assert initObj is not None -# functionVectorMap = ir2vec.getFunctionVectors(initObj) -# assert_valid_functionVector(functionVectorMap) + functionVectorMap = ir2vec.getFunctionVectors(initObj) + assert_valid_functionVector(functionVectorMap) -# functionVectorMap2 = initObj.getFunctionVectors() -# assert_valid_functionVector(functionVectorMap2) + functionVectorMap2 = initObj.getFunctionVectors() + assert_valid_functionVector(functionVectorMap2) -# for fun, funcObj in functionVectorMap.items(): -# assert fun == funcObj["demangledName"] + for fun, funcObj in functionVectorMap.items(): + assert fun == funcObj["demangledName"] -# f_vecs[path.name.strip()][fun] = funcObj["vector"] + f_vecs[path.name.strip()][fun] = funcObj["vector"] -# functionOutput1 = ir2vec.getFunctionVectors( -# initObj, -# funcObj["actualName"], -# ) -# assert_valid_functionVector(functionOutput1) + functionOutput1 = ir2vec.getFunctionVectors( + initObj, + funcObj["actualName"], + ) + assert_valid_functionVector(functionOutput1) -# functionOutput2 = initObj.getFunctionVectors(funcObj["actualName"]) -# assert_valid_functionVector(functionOutput2) + functionOutput2 = initObj.getFunctionVectors(funcObj["actualName"]) + assert_valid_functionVector(functionOutput2) -# assert functionOutput1[fun]["vector"] == pytest.approx( -# functionOutput2[fun]["vector"], abs=ABS_ACCURACY -# ) + assert functionOutput1[fun]["vector"] == pytest.approx( + functionOutput2[fun]["vector"], abs=ABS_ACCURACY + ) -# assert funcObj["vector"] == pytest.approx( -# functionOutput1[fun]["vector"], abs=ABS_ACCURACY -# ) + assert funcObj["vector"] == pytest.approx( + functionOutput1[fun]["vector"], abs=ABS_ACCURACY + ) -# print(TEST_SUITE_DIR) -# f_vecs_oracle = read_f_file( -# TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_f" / "ir2vec.txt" -# ) -# for pname, funs in f_vecs_oracle.items(): -# for fname, vec in funs.items(): -# assert vec == pytest.approx( -# f_vecs[pname][fname], abs=ABS_ACCURACY -# ), f"Checking {pname}: {fname}" + print(TEST_SUITE_DIR) + f_vecs_oracle = read_f_file( + TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_f" / "ir2vec.txt" + ) + for pname, funs in f_vecs_oracle.items(): + for fname, vec in funs.items(): + assert vec == pytest.approx( + f_vecs[pname][fname], abs=ABS_ACCURACY + ), f"Checking {pname}: {fname}" # def test_sym_f(): From a11779cb45af5372fbc463b992653657e079897c Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 8 Mar 2025 01:21:48 +0530 Subject: [PATCH 09/15] debug commit - test sym f for test_ir2vec --- .../pkg/tests/test_ir2vec.py | 102 +++++++++--------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py index bcf3a6fb0..2d1408f4a 100644 --- a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py +++ b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py @@ -154,61 +154,13 @@ def test_sym_p(): assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) -def test_fa_f(): - f_vecs = defaultdict(dict) - for file in ll_files: - path = (TEST_SUITE_DIR / file).resolve() - full_path = str(path).strip() - - initObj = ir2vec.initEmbedding(full_path, "fa", "f", 300) - assert initObj is not None - - functionVectorMap = ir2vec.getFunctionVectors(initObj) - assert_valid_functionVector(functionVectorMap) - - functionVectorMap2 = initObj.getFunctionVectors() - assert_valid_functionVector(functionVectorMap2) - - for fun, funcObj in functionVectorMap.items(): - assert fun == funcObj["demangledName"] - - f_vecs[path.name.strip()][fun] = funcObj["vector"] - - functionOutput1 = ir2vec.getFunctionVectors( - initObj, - funcObj["actualName"], - ) - assert_valid_functionVector(functionOutput1) - - functionOutput2 = initObj.getFunctionVectors(funcObj["actualName"]) - assert_valid_functionVector(functionOutput2) - - assert functionOutput1[fun]["vector"] == pytest.approx( - functionOutput2[fun]["vector"], abs=ABS_ACCURACY - ) - - assert funcObj["vector"] == pytest.approx( - functionOutput1[fun]["vector"], abs=ABS_ACCURACY - ) - - print(TEST_SUITE_DIR) - f_vecs_oracle = read_f_file( - TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_f" / "ir2vec.txt" - ) - for pname, funs in f_vecs_oracle.items(): - for fname, vec in funs.items(): - assert vec == pytest.approx( - f_vecs[pname][fname], abs=ABS_ACCURACY - ), f"Checking {pname}: {fname}" - - -# def test_sym_f(): +# def test_fa_f(): # f_vecs = defaultdict(dict) # for file in ll_files: # path = (TEST_SUITE_DIR / file).resolve() # full_path = str(path).strip() -# initObj = ir2vec.initEmbedding(full_path, "sym", "f") +# initObj = ir2vec.initEmbedding(full_path, "fa", "f", 300) # assert initObj is not None # functionVectorMap = ir2vec.getFunctionVectors(initObj) @@ -241,10 +193,58 @@ def test_fa_f(): # print(TEST_SUITE_DIR) # f_vecs_oracle = read_f_file( -# TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_f" / "ir2vec.txt" +# TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_f" / "ir2vec.txt" # ) # for pname, funs in f_vecs_oracle.items(): # for fname, vec in funs.items(): # assert vec == pytest.approx( # f_vecs[pname][fname], abs=ABS_ACCURACY # ), f"Checking {pname}: {fname}" + + +def test_sym_f(): + f_vecs = defaultdict(dict) + for file in ll_files: + path = (TEST_SUITE_DIR / file).resolve() + full_path = str(path).strip() + + initObj = ir2vec.initEmbedding(full_path, "sym", "f") + assert initObj is not None + + functionVectorMap = ir2vec.getFunctionVectors(initObj) + assert_valid_functionVector(functionVectorMap) + + functionVectorMap2 = initObj.getFunctionVectors() + assert_valid_functionVector(functionVectorMap2) + + for fun, funcObj in functionVectorMap.items(): + assert fun == funcObj["demangledName"] + + f_vecs[path.name.strip()][fun] = funcObj["vector"] + + functionOutput1 = ir2vec.getFunctionVectors( + initObj, + funcObj["actualName"], + ) + assert_valid_functionVector(functionOutput1) + + functionOutput2 = initObj.getFunctionVectors(funcObj["actualName"]) + assert_valid_functionVector(functionOutput2) + + assert functionOutput1[fun]["vector"] == pytest.approx( + functionOutput2[fun]["vector"], abs=ABS_ACCURACY + ) + + assert funcObj["vector"] == pytest.approx( + functionOutput1[fun]["vector"], abs=ABS_ACCURACY + ) + + print(TEST_SUITE_DIR) + f_vecs_oracle = read_f_file( + TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_f" / "ir2vec.txt" + ) + for pname, funs in f_vecs_oracle.items(): + for fname, vec in funs.items(): + assert vec == pytest.approx( + f_vecs[pname][fname], abs=ABS_ACCURACY + ), f"Checking {pname}: {fname}" From 2511eba4e3610d4d69c273fdc14513deca8f89e5 Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 8 Mar 2025 01:26:16 +0530 Subject: [PATCH 10/15] debug commit - test sym f --- .../pkg/tests/test_ir2vec.py | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py index 2d1408f4a..63a5f9e78 100644 --- a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py +++ b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py @@ -92,66 +92,66 @@ def assert_valid_functionVector(functionVectorMap): return True -def test_fa_p(): - p_vectors = [] - for file in ll_files: - full_path = str((TEST_SUITE_DIR / file).resolve()).strip() +# def test_fa_p(): +# p_vectors = [] +# for file in ll_files: +# full_path = str((TEST_SUITE_DIR / file).resolve()).strip() - initObj = ir2vec.initEmbedding(full_path, "fa", "p") - assert initObj is not None +# initObj = ir2vec.initEmbedding(full_path, "fa", "p") +# assert initObj is not None - progVector1 = ir2vec.getProgramVector(initObj) - assert_valid_progVector(progVector1) +# progVector1 = ir2vec.getProgramVector(initObj) +# assert_valid_progVector(progVector1) - progVector2 = initObj.getProgramVector() - assert_valid_progVector(progVector2) +# progVector2 = initObj.getProgramVector() +# assert_valid_progVector(progVector2) - for idx, vec in enumerate(progVector1): - assert vec == pytest.approx(progVector2[idx], abs=ABS_ACCURACY) +# for idx, vec in enumerate(progVector1): +# assert vec == pytest.approx(progVector2[idx], abs=ABS_ACCURACY) - p_vectors.append(progVector1) +# p_vectors.append(progVector1) - print(TEST_SUITE_DIR) - p_vectors_oracle = read_p_file( - TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_p" / "ir2vec.txt" - ) +# print(TEST_SUITE_DIR) +# p_vectors_oracle = read_p_file( +# TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_p" / "ir2vec.txt" +# ) - for idx, vec in enumerate(p_vectors_oracle): - assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) +# for idx, vec in enumerate(p_vectors_oracle): +# assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) -def test_sym_p(): - p_vectors = [] - for file in ll_files: - full_path = str((TEST_SUITE_DIR / file).resolve()).strip() +# def test_sym_p(): +# p_vectors = [] +# for file in ll_files: +# full_path = str((TEST_SUITE_DIR / file).resolve()).strip() - initObj = ir2vec.initEmbedding(full_path, "sym", "p") - assert initObj is not None +# initObj = ir2vec.initEmbedding(full_path, "sym", "p") +# assert initObj is not None - progVector1 = ir2vec.getProgramVector(initObj) - assert_valid_progVector(progVector1) +# progVector1 = ir2vec.getProgramVector(initObj) +# assert_valid_progVector(progVector1) - progVector2 = initObj.getProgramVector() - assert_valid_progVector(progVector2) +# progVector2 = initObj.getProgramVector() +# assert_valid_progVector(progVector2) - instVecList = ir2vec.getInstructionVectors(initObj) - assert_valid_instructionVectors(instVecList) +# instVecList = ir2vec.getInstructionVectors(initObj) +# assert_valid_instructionVectors(instVecList) - instVecList2 = initObj.getInstructionVectors() - assert_valid_instructionVectors(instVecList2) +# instVecList2 = initObj.getInstructionVectors() +# assert_valid_instructionVectors(instVecList2) - for idx, vec in enumerate(progVector1): - assert vec == pytest.approx(progVector2[idx], abs=ABS_ACCURACY) +# for idx, vec in enumerate(progVector1): +# assert vec == pytest.approx(progVector2[idx], abs=ABS_ACCURACY) - p_vectors.append(progVector1) +# p_vectors.append(progVector1) - print(TEST_SUITE_DIR) - p_vectors_oracle = read_p_file( - TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_p" / "ir2vec.txt" - ) +# print(TEST_SUITE_DIR) +# p_vectors_oracle = read_p_file( +# TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_p" / "ir2vec.txt" +# ) - for idx, vec in enumerate(p_vectors_oracle): - assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) +# for idx, vec in enumerate(p_vectors_oracle): +# assert vec == pytest.approx(p_vectors[idx], abs=ABS_ACCURACY) # def test_fa_f(): From e8719f599631cee5eac9376f2a1e754ae2715cc5 Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 8 Mar 2025 02:09:05 +0530 Subject: [PATCH 11/15] debug commit - segfault --- Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp b/Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp index 565bd775f..84da9db8e 100644 --- a/Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp +++ b/Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp @@ -305,7 +305,7 @@ IR2VecHandlerObject *createIR2VECObject(const char *filename, if (!ir2vecHandlerObj) { return nullptr; } - ir2vecHandlerObj->ir2vecObj = ir2vecObj; + ir2vecHandlerObj->ir2vecObj = std::move(ir2vecObj); return ir2vecHandlerObj; } From a31d43203eda928808d806262cdbfd5beb22561c Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 8 Mar 2025 02:22:53 +0530 Subject: [PATCH 12/15] debug commit - segfault --- Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py index 63a5f9e78..228da8755 100644 --- a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py +++ b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py @@ -211,12 +211,12 @@ def test_sym_f(): initObj = ir2vec.initEmbedding(full_path, "sym", "f") assert initObj is not None - functionVectorMap = ir2vec.getFunctionVectors(initObj) - assert_valid_functionVector(functionVectorMap) - functionVectorMap2 = initObj.getFunctionVectors() assert_valid_functionVector(functionVectorMap2) + functionVectorMap = ir2vec.getFunctionVectors(initObj) + assert_valid_functionVector(functionVectorMap) + for fun, funcObj in functionVectorMap.items(): assert fun == funcObj["demangledName"] From 52ea55c97fac44d2f5a95aa765aef0ae6a61f751 Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 8 Mar 2025 02:35:59 +0530 Subject: [PATCH 13/15] debug commit - minimal versioning updates in wheel yml --- .github/workflows/wheel.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index acdca4ac0..5fdbf0ddb 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -8,10 +8,10 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-20.04] + os: [ubuntu-latest] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build wheels uses: pypa/cibuildwheel@v2.13.1 From 26d1cc129de6bdf62212614839de62e03aee457c Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 8 Mar 2025 03:01:42 +0530 Subject: [PATCH 14/15] debug commit - segfault --- src/Symbolic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Symbolic.cpp b/src/Symbolic.cpp index d383513a3..98276a081 100644 --- a/src/Symbolic.cpp +++ b/src/Symbolic.cpp @@ -237,7 +237,7 @@ Vector IR2Vec_Symbolic::bb2Vec(BasicBlock &B, std::transform(instVector.begin(), instVector.end(), vec.begin(), instVector.begin(), std::plus()); for (unsigned i = 0; i < I.getNumOperands(); i++) { - Vector vec; + Vector vec(DIM, 0); if (isa(I.getOperand(i))) { vec = getValue("function"); } else if (isa(I.getOperand(i)->getType())) { From 58ebb190dee157492664faa3a53693ab7a8154b7 Mon Sep 17 00:00:00 2001 From: nishant_sachdeva Date: Sat, 15 Mar 2025 00:20:20 +0530 Subject: [PATCH 15/15] test commit --- Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp b/Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp index 84da9db8e..cf21e2515 100644 --- a/Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp +++ b/Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp @@ -127,7 +127,7 @@ class IR2VecHandler { PyDict_SetItemString(FuncVecDict, demangledName.c_str(), funcDict); - Py_DECREF(funcDict); + Py_INCREF(funcDict); } return FuncVecDict; }