-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Description
Reminder
- I have read the above rules and searched the existing issues.
System Info
KTransformers 环境诊断
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Check ┃ Status ┃ Value ┃
┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ Python 版本 │ 正常 │ 3.12.3 │
│ CUDA 可用性 │ 正常 │ 13.0 │
│ GPU 检测 │ 正常 │ 发现 8 个 GPU: NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090… │
│ │ │ NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090, NVIDIA GeForce RTX 4090 │
│ CPU │ 正常 │ INTEL(R) XEON(R) PLATINUM 8558P (96 核心 / 192 线程) │
│ CPU 指令集 │ 正常 │ AMX-INT8, AMX-BF16, AMX-TILE, AVX512BF16, AVX512F, AVX512BW, AVX512VL, AVX2 (+14 more) │
│ NUMA 拓扑 │ 正常 │ 2 个节点 │
│ kt-kernel │ 正常 │ v0.5.0.post1 (AMX) │
│ 系统内存 │ 正常 │ 964.0GB 可用 / 1007.5GB 总计 │
│ 磁盘空间 │ 正常 │ /data/models 有 8896.0GB 可用空间 │
│ SGLang Source │ 正常 │ Source (GitHub: kvcache-ai/sglang, branch: kimi_k2) │
│ SGLang kt-kernel │ 正常 │ SGLang kt-kernel 支持已验证 │
│ Environment Managers │ 正常 │ uv 0.9.7, venv builtin, docker 28.5.1 │
└──────────────────────┴──────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
✓ 所有检查通过!您的环境已就绪。
Reproduction
[2025-12-31 12:24:48 TP0] Prefill batch, #new-seq: 1, #new-token: 5806, #cached-token: 545, token usage: 0.01, #running-req: 0, #queue-req: 0,
[2025-12-31 12:24:49 TP1] JQ Warning: create_cpu_buffers called......
[2025-12-31 12:24:49 TP0] JQ Warning: create_cpu_buffers called......
[2025-12-31 12:24:52 TP1] JQ Warning 1: wrapper=None
[2025-12-31 12:24:52 TP0] JQ Warning 1: wrapper=<kt_kernel.utils.amx.NativeMoEWrapper object at 0x7c92be172a80>
[2025-12-31 12:24:52 TP0] JQ Warning 2: wrapper=<kt_kernel.utils.amx.NativeMoEWrapper object at 0x7c92be172a80>
Fatal Python error: Fatal Python error: Segmentation faultSegmentation fault
Thread 0x00007b3e43ffe6c0 (most recent call first):
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/managers/scheduler_runtime_checker_mixin.py", line 250 in watchdog_thread
File "/usr/lib/python3.12/threading.py", line 1010 in run
File "/usr/lib/python3.12/threading.py", line 1073 in _bootstrap_inner
File "/usr/lib/python3.12/threading.py", line 1030 in _bootstrap
Thread 0x00007bfa97fff6c0 (most recent call first):
File "/usr/lib/python3.12/threading.py", line 359 in wait
File "/usr/lib/python3.12/threading.py", line 655 in wait
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/tqdm/_monitor.py", line 60 in run
File "/usr/lib/python3.12/threading.py", line 1073 in _bootstrap_inner
File "/usr/lib/python3.12/threading.py", line 1030 in _bootstrap
Thread 0x00007bfb91fff6c0 (most recent call first):
File "/usr/lib/python3.12/threading.py", line 359 in wait
File "/usr/lib/python3.12/threading.py", line 655 in wait
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/tqdm/_monitor.py", line 60 in
Extension modules: runnumpy._core._multiarray_umath
File "/usr/lib/python3.12/threading.py", line 1073 in _bootstrap_inner
, File numpy.linalg._umath_linalg"/usr/lib/python3.12/threading.py", line 1030 in _bootstrap
Thread 0x00007c9334d606c0 (most recent call first):
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/_inductor/compile_worker/subproc_pool.py", line 61 in _recv_msg
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/_inductor/compile_worker/subproc_pool.py", line 195 in , _read_threadpybase64._pybase64
File "/usr/lib/python3.12/threading.py", line 1010 in run
File "/usr/lib/python3.12/threading.py, "charset_normalizer.md, line 1073 in _bootstrap_inner
File , "requests.packages.charset_normalizer.md/usr/lib/python3.12/threading.py", line , 1030requests.packages.chardet.md in _bootstrap
Thread 0x00007c9774f5a140 (most recent call first):
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/kt_kernel/utils/amx.py", line 512 in sync_write_weight_scale_to_buffer
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/layers/moe/kt_ep_wrapper.py", line 388 in load
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/layers/moe/kt_ep_wrapper.py", line 774 in _build_full_context
File , "multidict._multidict/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/layers/moe/kt_ep_wrapper.py", line 701 in apply,
yarl._quoting_c File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/layers/moe/fused_moe_triton/layer.py", , line propcache._helpers_c867 in run_moe_core
, File aiohttp._http_writer"/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/layers/moe/fused_moe_triton/layer.py", aiohttp._http_parser, line 845 in , forwardaiohttp._websocket.mask
File , aiohttp._websocket.reader_c"/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/nn/modules/module.py", , line frozenlist._frozenlist1784 in _call_impl
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/nn/modules/module.py, "torch._C, line 1773 in _wrapped_call_impl,
torch._C._dynamo.autograd_compiler File ", /data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/models/deepseek_v2.pytorch._C._dynamo.eval_frame", line 837, in torch._C._dynamo.guardsforward_normal
File , "torch._C._dynamo.utils/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/models/deepseek_v2.py", line , 756torch._C._fft in forward
, File torch._C._linalg"/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/nn/modules/module.py, "torch._C._nested, line 1784 in , _call_impltorch._C._nn
File , "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/nn/modules/module.pytorch._C._sparse", line 1773, in torch._C._special_wrapped_call_impl
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/models/deepseek_v2.py", line 2716 in forward
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784 in _call_impl
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773 in _wrapped_call_impl
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/models/deepseek_v2.py", line , 2965psutil._psutil_linux in forward
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784 in _call_impl,
zmq.backend.cython._zmq File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773 in _wrapped_call_impl
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/models/deepseek_v2.py", , line PIL._imaging3104 in forward
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120 in decorate_context
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/model_executor/model_runner.py", line 2100 in forward_extend
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/model_executor/model_runner.py", line 2212 in _forward_raw
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/model_executor/model_runner.py", line 2155, in sentencepiece._sentencepieceforward
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/managers/tp_worker.py", yaml._yaml, line 371 in forward_batch_generation
File , regex._regex"/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/managers/scheduler.py", line , 1995markupsafe._speedups in run_batch
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/managers/scheduler.py", line 993, in PIL._imagingftevent_loop_overlap
File "/root/.virtualenvs/ktsglang/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120 in decorate_context
File "/data/cppsrc/sglang-ktransformers/sglang/python/sglang/srt/managers/scheduler.py", line 2698 in run_scheduler_process
File "/usr/lib/python3.12/multiprocessing/process.py", line 108 in run
File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in numpy.random._common_bootstrap
, File numpy.random.bit_generator"/usr/lib/python3.12/multiprocessing/spawn.py", , line numpy.random._bounded_integers135 in , _mainnumpy.random._pcg64
File ", /usr/lib/python3.12/multiprocessing/spawn.pynumpy.random._mt19937", line 122, in numpy.random._generatorspawn_main
File ", <string>numpy.random._philox", line 1 in , <module>numpy.random._sfc64
, numpy.random.mtrand, _cffi_backend, scipy._lib._ccallback_c, scipy.linalg._fblas, scipy.linalg._flapack, _cyutility, scipy._cyutility, scipy.linalg.cython_lapack, scipy.linalg._cythonized_array_utils, scipy.linalg._solve_toeplitz, scipy.linalg._decomp_lu_cython, scipy.linalg._matfuncs_schur_sqrtm, scipy.linalg._matfuncs_expm, scipy.linalg._linalg_pythran, scipy.linalg.cython_blas
Extension modules: , numpy._core._multiarray_umathscipy.linalg._decomp_update, scipy.sparse._sparsetools, numpy.linalg._umath_linalg, _csparsetools, scipy.sparse._csparsetools, scipy.sparse.linalg._dsolve._superlu, scipy.sparse.linalg._eigen.arpack._arpack, scipy.sparse.linalg._propack._spropack, , scipy.sparse.linalg._propack._dpropackpybase64._pybase64, scipy.sparse.linalg._propack._cpropack, scipy.sparse.linalg._propack._zpropack, charset_normalizer.md, scipy.optimize._group_columns, requests.packages.charset_normalizer.md, , requests.packages.chardet.mdscipy._lib.messagestream, scipy.optimize._trlib._trlib, scipy.optimize._lbfgsb, _moduleTNC, scipy.optimize._moduleTNC, , scipy.optimize._slsqplibmultidict._multidict, scipy.optimize._minpack, yarl._quoting_c, scipy.optimize._lsq.givens_elimination, , propcache._helpers_cscipy.optimize._zeros, aiohttp._http_writer, scipy._lib._uarray._uarray, aiohttp._http_parser, scipy.special._ufuncs_cxx, , aiohttp._websocket.maskscipy.special._ellip_harm_2, , scipy.special._special_ufuncsaiohttp._websocket.reader_c, , scipy.special._gufuncsfrozenlist._frozenlist, , scipy.special._ufuncstorch._C, scipy.special._specfun, torch._C._dynamo.autograd_compiler, scipy.special._comb, torch._C._dynamo.eval_frame, , scipy.linalg._decomp_interpolativetorch._C._dynamo.guards, scipy.optimize._bglu_dense, torch._C._dynamo.utils, scipy.optimize._lsap, torch._C._fft, scipy.spatial._ckdtree, torch._C._linalg, scipy.spatial._qhull, torch._C._nested, scipy.spatial._voronoi, torch._C._nn, scipy.spatial._hausdorff, , torch._C._sparsescipy.spatial._distance_wrap, , torch._C._specialscipy.spatial.transform._rotation, scipy.spatial.transform._rigid_transform, scipy.optimize._direct, psutil._psutil_linux, zmq.backend.cython._zmq, setproctitle._setproctitle, PIL._imaging, sentencepiece._sentencepiece, yaml._yaml, cuda.bindings._bindings.cydriver, , regex._regexcuda.bindings.cydriver, , cuda.bindings.drivermarkupsafe._speedups, tvm_ffi.core, PIL._imagingft, msgspec._core, numpy.random._common, , cuda.bindings._bindings.cynvrtcnumpy.random.bit_generator, cuda.bindings.cynvrtc, numpy.random._bounded_integers, cuda.bindings.nvrtc, numpy.random._pcg64, cuda.bindings._bindings.cyruntime_ptds, numpy.random._mt19937, cuda.bindings._bindings.cyruntime, , numpy.random._generatorcuda.bindings.cyruntime, , cuda.bindings.runtimenumpy.random._philox, numpy.random._sfc64, , numpy.random.mtrandcuda_utils, _cffi_backend, scipy._lib._ccallback_c, , scipy.linalg._fblas__triton_launcher, scipy.linalg._flapack (total: 110)
, _cyutility, scipy._cyutility, scipy.linalg.cython_lapack, scipy.linalg._cythonized_array_utils, scipy.linalg._solve_toeplitz, scipy.linalg._decomp_lu_cython, scipy.linalg._matfuncs_schur_sqrtm, scipy.linalg._matfuncs_expm, scipy.linalg._linalg_pythran, scipy.linalg.cython_blas, scipy.linalg._decomp_update, scipy.sparse._sparsetools, _csparsetools, scipy.sparse._csparsetools, scipy.sparse.linalg._dsolve._superlu, scipy.sparse.linalg._eigen.arpack._arpack, scipy.sparse.linalg._propack._spropack, scipy.sparse.linalg._propack._dpropack, scipy.sparse.linalg._propack._cpropack, scipy.sparse.linalg._propack._zpropack, scipy.optimize._group_columns, scipy._lib.messagestream, scipy.optimize._trlib._trlib, scipy.optimize._lbfgsb, _moduleTNC, scipy.optimize._moduleTNC, scipy.optimize._slsqplib, scipy.optimize._minpack, scipy.optimize._lsq.givens_elimination, scipy.optimize._zeros, scipy._lib._uarray._uarray, scipy.special._ufuncs_cxx, scipy.special._ellip_harm_2, scipy.special._special_ufuncs, scipy.special._gufuncs, scipy.special._ufuncs, scipy.special._specfun, scipy.special._comb, scipy.linalg._decomp_interpolative, scipy.optimize._bglu_dense, scipy.optimize._lsap, scipy.spatial._ckdtree, scipy.spatial._qhull, scipy.spatial._voronoi, scipy.spatial._hausdorff, scipy.spatial._distance_wrap, scipy.spatial.transform._rotation, scipy.spatial.transform._rigid_transform, scipy.optimize._direct, setproctitle._setproctitle, cuda.bindings._bindings.cydriver, cuda.bindings.cydriver, cuda.bindings.driver, tvm_ffi.core, msgspec._core, cuda.bindings._bindings.cynvrtc, cuda.bindings.cynvrtc, cuda.bindings.nvrtc, cuda.bindings._bindings.cyruntime_ptds, cuda.bindings._bindings.cyruntime, cuda.bindings.cyruntime, cuda.bindings.runtime, cuda_utils, __triton_launcher (total: 110)
E layer 51, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
TP MOE layer 52, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
TP MOE layer 53, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
TP MOE layer 54, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
TP MOE layer 55, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
TP MOE layer 56, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
TP MOE layer 57, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
TP MOE layer 58, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
TP MOE layer 59, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
TP MOE layer 60, pool: 0x50c12b10, expert num: 384, num_experts_per_tok: 8
From per-expert pointers (gate_projs)
TP 0 load weight done.
TP 1 load weight done.
!!!!!!! Segfault encountered !!!!!!!
File "./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c", line 0, in 0x00007c9774c4532f
File "./nptl/pthread_create.c", line 447, in start_thread
File "../sysdeps/unix/sysv/linux/x86_64/clone3.S", line 78, in clone3
File "<unknown>", line 0, in 0xffffffffffffffff
!!!!!!! Segfault encountered !!!!!!!
File "./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c", line 0, in 0x00007c9774c4532f
File "./nptl/pthread_create.c", line 447, in start_thread
File "../sysdeps/unix/sysv/linux/x86_64/clone3.S", line 78, in clone3
File "<unknown>", line 0, in 0xffffffffffffffff
Others
No response