From 7409c9df772de6f41a36711d95f7593fab88e479 Mon Sep 17 00:00:00 2001 From: yuki-brook Date: Fri, 13 Mar 2026 14:53:24 +0800 Subject: [PATCH] feat: add PyPI upload in publish.yml --- .github/workflows/publish.yml | 45 +++++++++++++++++++++++++++++++++++ README.md | 2 +- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 62d4391..73f495e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -48,6 +48,15 @@ jobs: run: | ./build.sh --mode=release --clean --py_ver ${{ env.PYTHON_VERSION }} + - name: Install auditwheel + run: pip install auditwheel patchelf + + - name: Repair wheel (linux → manylinux) + run: | + auditwheel repair python/dist/*-linux_x86_64.whl \ + --wheel-dir python/dist/ + rm -f python/dist/*-linux_x86_64.whl + - name: Upload wheel artifact uses: actions/upload-artifact@v4 with: @@ -154,3 +163,39 @@ jobs: packages-dir: python/dist/ verbose: true skip-existing: true + + # ───────────────────────────────────────────────────────────── + # Job 4: upload wheel to PyPI + # run when push to tag(v*.*.*) + # requires publish-pypi-test to succeed first + # ───────────────────────────────────────────────────────────── + publish-pypi: + name: Publish to PyPI + runs-on: ubuntu-22.04 + needs: publish-pypi-test + if: startsWith(github.ref, 'refs/tags/v') + + environment: + name: pypi + url: https://pypi.org/project/simm/ + + permissions: + id-token: write + + steps: + - name: Download wheel artifact + uses: actions/download-artifact@v4 + with: + name: simm-wheel-py${{ env.PYTHON_VERSION }} + path: python/dist/ + + - name: List wheel files + run: ls -lh python/dist/ + + - name: Upload to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_TOKEN }} + packages-dir: python/dist/ + verbose: true + skip-existing: true diff --git a/README.md b/README.md index 3318ecc..703a37b 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Seamlessly integrated with leading inference engines like SGLang and vLLM, enabl - **High Bandwidth**: Maximizes I/O bandwidth by fully utilizing **ALL RDMA NICs** of client nodes (effectively eliminating the bottlenecks exposed in [DualPath](https://arxiv.org/abs/2602.21548)) - **Ease of Use**: Offers seamless integration with popular inference engines, with deployment orchestrated via **Kubernetes (K8s)** for production-grade reliability -Under multi-turn long-context LLM workloads with significant KV cache reuse, **SiMM drastically reduces prefill latency (TTFT)** by transforming the prefill phase from a compute-heavy task into a high-speed I/O retrieval operation. Under 32K context length, SiMM achieves **3.1x** speedup over "No Cache" configuration and **2.1x** speedup over local CPU caching, **1.2x** outperforming industry-leading alternatives [[details](#integration-with-vllmlmcache)]. +Under multi-turn long-context LLM workloads with significant KV cache reuse, **SiMM drastically reduces prefill latency (TTFT)** by transforming the prefill phase from a compute-heavy task into a high-speed I/O retrieval operation. Under 32K context length, SiMM achieves **3.1x** speedup over "No Cache" configuration and **2.1x** speedup over local CPU caching, **1.2x** outperforming industry-leading alternatives [[details](#benchmark-with-vllmlmcache)].
SiMM LLM Benchmark results