From 141401b8a51eb810885cb2853bc5b726f632ba98 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Wed, 7 Jan 2026 23:44:03 -0500 Subject: [PATCH 1/6] fix cuda ut fail Signed-off-by: n1ck-guo --- test/helpers.py | 1 + test/test_cpu/test_cli_usage.py | 17 +++++++-------- test/test_cpu/test_gguf_format.py | 10 ++++----- test/test_cuda/test_alg_ext.py | 4 ++-- test/test_cuda/test_fp8_input.py | 24 +++++++++++----------- test/test_cuda/test_gguf.py | 8 ++++---- test/test_cuda/test_marlin_backend.py | 3 ++- test/test_cuda/test_mix_bits.py | 1 - test/test_cuda/test_multiple_card.py | 2 +- test/test_cuda/test_multiple_card_calib.py | 4 ++-- test/test_cuda/test_scheme.py | 2 +- test/test_cuda/test_support_vlms.py | 11 +++++----- 12 files changed, 45 insertions(+), 42 deletions(-) diff --git a/test/helpers.py b/test/helpers.py index 89b832c6d..6bd5ad30c 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -10,6 +10,7 @@ # Automatic choose local path or model name. def get_model_path(model_name: str) -> str: + model_name = model_name.rstrip("/") ut_path = f"/tf_dataset/auto_round/models/{model_name}" local_path = f"/models/{model_name.split('/')[-1]}" diff --git a/test/test_cpu/test_cli_usage.py b/test/test_cpu/test_cli_usage.py index b3aecf2f1..984fe5176 100644 --- a/test/test_cpu/test_cli_usage.py +++ b/test/test_cpu/test_cli_usage.py @@ -6,6 +6,7 @@ class TestAutoRoundCmd: + @classmethod def setup_class(self): pass @@ -21,24 +22,24 @@ def test_auto_round_cmd(self, tiny_opt_model_path, tiny_qwen_vl_model_path): python_path = sys.executable # Test llm script - res = os.system(f"cd .. && {python_path} -m auto_round -h") + res = os.system(f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"cd .. && {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 32 --iter 2 --nsamples 1 --format auto_gptq,auto_round --output_dir ./saved --tasks piqa" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 32 --iter 2 --nsamples 1 --format auto_gptq,auto_round --output_dir ./saved --tasks piqa" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"cd .. && {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 8 --iter 1 --nsamples 1 --eval_task_by_task --tasks openbookqa --bs 32" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 8 --iter 1 --nsamples 1 --eval_task_by_task --tasks openbookqa --bs 32" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"cd .. && {python_path} -c 'from auto_round.__main__ import run_light; run_light()' --seqlen 8 --iter 2 --nsamples 8 --output_dir ./saved --tasks lambada_openai" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -c 'from auto_round.__main__ import run_light; run_light()' --seqlen 8 --iter 2 --nsamples 8 --output_dir ./saved --tasks lambada_openai" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" @@ -46,23 +47,23 @@ def test_auto_round_cmd(self, tiny_opt_model_path, tiny_qwen_vl_model_path): # test mllm script # test auto_round_mllm --eval help - res = os.system(f"cd .. && {python_path} -m auto_round --eval -h") + res = os.system(f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --eval -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" # test auto_round_mllm --lmms help - res = os.system(f"cd .. && {python_path} -m auto_round --eval --lmms -h") + res = os.system(f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --eval --lmms -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"cd .. && {python_path} -m auto_round --mllm --model {tiny_qwen_vl_model_path} --iter 2 --nsamples 2 --seqlen 32 --format auto_round --output_dir ./saved" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --mllm --model {tiny_qwen_vl_model_path} --iter 2 --nsamples 2 --seqlen 32 --format auto_round --output_dir ./saved" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"cd .. && {python_path} -m auto_round --mllm --iter 2 --nsamples 2 --model {tiny_qwen_vl_model_path} --seqlen 32 --format auto_round" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --mllm --iter 2 --nsamples 2 --model {tiny_qwen_vl_model_path} --seqlen 32 --format auto_round" " --quant_nontext_module --output_dir ./saved " ) if res > 0 or res == -1: diff --git a/test/test_cpu/test_gguf_format.py b/test/test_cpu/test_gguf_format.py index 92e9d620e..b83d909e8 100644 --- a/test/test_cpu/test_gguf_format.py +++ b/test/test_cpu/test_gguf_format.py @@ -26,7 +26,7 @@ def teardown_class(self): def test_basic_usage(self, tiny_gemma_model_path, tiny_qwen_model_path): python_path = sys.executable res = os.system( - f"cd .. && {python_path} -m auto_round --model {tiny_gemma_model_path} " + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_gemma_model_path} " f" --bs 16 --iters 0 --nsamples 1 --format gguf:q4_k_m" ) if res > 0 or res == -1: @@ -34,7 +34,7 @@ def test_basic_usage(self, tiny_gemma_model_path, tiny_qwen_model_path): shutil.rmtree("./saved", ignore_errors=True) res = os.system( - f"cd .. && {python_path} -m auto_round --model {tiny_qwen_model_path}" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_qwen_model_path}" f" --bs 16 --iters 1 --nsamples 1 --format fake,gguf:q4_0" ) if res > 0 or res == -1: @@ -162,7 +162,7 @@ def test_all_format(self, tiny_qwen_model_path): # for gguf_format in ["gguf:q4_0", "gguf:q4_1", "gguf:q4_k_m", "gguf:q6_k"]: for gguf_format in ["gguf:q4_k_m"]: res = os.system( - f"cd .. && {python_path} -m auto_round --model {model_name} " + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name} " f" --bs 16 --iters 1 --nsamples 1 --seqlen 16 --format {gguf_format}" ) if res > 0 or res == -1: @@ -170,7 +170,7 @@ def test_all_format(self, tiny_qwen_model_path): shutil.rmtree("../../tmp_autoround", ignore_errors=True) res = os.system( - f"cd .. && {python_path} -m auto_round --model {model_name}" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" f" --bs 16 --iters 0 --nsamples 1 --seqlen 16 --format fake,{gguf_format}" ) if res > 0 or res == -1: @@ -179,7 +179,7 @@ def test_all_format(self, tiny_qwen_model_path): # test mixed q2_k_s res = os.system( - f"cd .. && {python_path} -m auto_round --model {model_name}" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" f" --bs 16 --iters 0 --nsamples 1 --seqlen 16 --scheme GGUF:Q2_K_MIXED" ) if res > 0 or res == -1: diff --git a/test/test_cuda/test_alg_ext.py b/test/test_cuda/test_alg_ext.py index 6cdbc82ab..f42cab035 100644 --- a/test/test_cuda/test_alg_ext.py +++ b/test/test_cuda/test_alg_ext.py @@ -49,13 +49,13 @@ def test_cli(self, tiny_opt_model_path): python_path = sys.executable res = os.system( - f"cd .. && CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 2 --options=W2A16,W4A16 --ignore_scale_zp_bits --nsamples 1 --seqlen 32" + f"PYTHONPATH='../..:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 2 --options=W2A16,W4A16 --ignore_scale_zp_bits --nsamples 1 --seqlen 32" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"cd .. && CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 5.5 --options=mxfp4,mxfp8 --ignore_scale_zp_bits --enable_torch_compile --nsamples 1 --seqlen 32" + f"PYTHONPATH='../..:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 5.5 --options=mxfp4,mxfp8 --ignore_scale_zp_bits --enable_torch_compile --nsamples 1 --seqlen 32" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" diff --git a/test/test_cuda/test_fp8_input.py b/test/test_cuda/test_fp8_input.py index 9e1c1cc3a..d899f0434 100644 --- a/test/test_cuda/test_fp8_input.py +++ b/test/test_cuda/test_fp8_input.py @@ -64,8 +64,8 @@ def test_gguf_imatrix(self): # print(tokenizer.decode(model.generate(**inputs, max_new_tokens=50)[0])) def test_small_model_rtn(self): - model, tokenizer = self.tiny_fp8_model() - ar = AutoRound(model=model, tokenizer=tokenizer, iters=0) + model_name = get_model_path("qwen/Qwen3-0.6B-FP8") + ar = AutoRound(model=model_name, iters=0) _, folder = ar.quantize_and_save(output_dir=self.save_dir) model_args = f"pretrained={self.save_dir}" result = simple_evaluate(model="hf", model_args=model_args, tasks="lambada_openai", batch_size="auto") @@ -75,8 +75,8 @@ def test_small_model_rtn(self): shutil.rmtree(self.save_dir, ignore_errors=True) def test_small_model_iters1(self): - model, tokenizer = self.tiny_fp8_model() - ar = AutoRound(model=model, tokenizer=tokenizer, iters=1) + model_name = get_model_path("qwen/Qwen3-0.6B-FP8") + ar = AutoRound(model=model_name, iters=1) _, folder = ar.quantize_and_save(output_dir=self.save_dir) model_args = f"pretrained={self.save_dir}" result = simple_evaluate(model="hf", model_args=model_args, tasks="lambada_openai", batch_size="auto") @@ -86,25 +86,25 @@ def test_small_model_iters1(self): shutil.rmtree(self.save_dir, ignore_errors=True) def test_medium_model_rtn(self): - model, tokenizer = self.tiny_fp8_model() - ar = AutoRound(model=model, tokenizer=tokenizer, iters=0) + model_name = get_model_path("qwen/Qwen3-0.6B-FP8") + ar = AutoRound(model=model_name, iters=0) _, folder = ar.quantize_and_save(output_dir=self.save_dir) model_args = f"pretrained={self.save_dir}" result = simple_evaluate(model="hf", model_args=model_args, tasks="lambada_openai", batch_size="auto") print(result["results"]["lambada_openai"]["acc,none"]) - assert result["results"]["lambada_openai"]["acc,none"] > 0.55 + assert result["results"]["lambada_openai"]["acc,none"] > 0.33 shutil.rmtree(self.save_dir, ignore_errors=True) def test_medium_model_rtn_with_lm_head(self): - model, tokenizer = self.tiny_fp8_model() + model_name = get_model_path("qwen/Qwen3-0.6B-FP8") layer_config = {"lm_head": {"bits": 4}} - ar = AutoRound(model=model, tokenizer=tokenizer, iters=0, layer_config=layer_config) + ar = AutoRound(model=model_name, iters=0, layer_config=layer_config) _, folder = ar.quantize_and_save(output_dir=self.save_dir) model_args = f"pretrained={self.save_dir}" result = simple_evaluate(model="hf", model_args=model_args, tasks="lambada_openai", batch_size="auto") print(result["results"]["lambada_openai"]["acc,none"]) - assert result["results"]["lambada_openai"]["acc,none"] > 0.55 + assert result["results"]["lambada_openai"]["acc,none"] > 0.33 shutil.rmtree(self.save_dir, ignore_errors=True) @@ -135,9 +135,9 @@ def test_fp8_model_gguf(self): def test_diff_datatype(self): for scheme in ["NVFP4", "MXFP4"]: - model, tokenizer = self.tiny_fp8_model() + model_name = get_model_path("qwen/Qwen3-0.6B-FP8") for iters in [0, 1]: print(f"Testing scheme: {scheme}, iters: {iters}") - ar = AutoRound(model=model, tokenizer=tokenizer, iters=iters, scheme=scheme) + ar = AutoRound(model_name, iters=iters, scheme=scheme) ar.quantize_and_save(output_dir=self.save_dir) shutil.rmtree(self.save_dir, ignore_errors=True) diff --git a/test/test_cuda/test_gguf.py b/test/test_cuda/test_gguf.py index 395969493..d5a4473e4 100644 --- a/test/test_cuda/test_gguf.py +++ b/test/test_cuda/test_gguf.py @@ -56,7 +56,7 @@ def test_gguf_format(self, tiny_qwen_model_path, dataloader): save_dir = os.path.join(os.path.dirname(__file__), "saved") res = os.system( - f"cd .. && {sys.executable} -m auto_round --model {tiny_qwen_model_path} --iter 2 " + f"PYTHONPATH='../..:$PYTHONPATH' {sys.executable} -m auto_round --model {tiny_qwen_model_path} --iter 2 " f"--output_dir {save_dir} --nsample 2 --format gguf:q4_0 --device 0" ) print(save_dir) @@ -64,8 +64,8 @@ def test_gguf_format(self, tiny_qwen_model_path, dataloader): from llama_cpp import Llama - gguf_file = os.listdir(f"{save_dir}/tmp_tiny_qwen_model_path-gguf")[0] - llm = Llama(f"{save_dir}/tmp_tiny_qwen_model_path-gguf/{gguf_file}", n_gpu_layers=-1) + gguf_file = os.listdir(f"{save_dir}/tiny_qwen_model_path-gguf")[0] + llm = Llama(f"{save_dir}/tiny_qwen_model_path-gguf/{gguf_file}", n_gpu_layers=-1) output = llm("There is a girl who likes adventure,", max_tokens=32) print(output) shutil.rmtree(save_dir, ignore_errors=True) @@ -155,7 +155,7 @@ def test_vlm_gguf(self): autoround.quantize_and_save(output_dir=quantized_model_path, format="gguf:q4_0") assert "mmproj-model.gguf" in os.listdir("./saved") file_size = os.path.getsize("./saved/Qwen2-VL-2B-Instruct-Q4_0.gguf") / 1024**2 - assert abs(file_size - 4242) < 5.0 + assert abs(file_size - 894) < 5.0 file_size = os.path.getsize("./saved/mmproj-model.gguf") / 1024**2 assert abs(file_size - 2580) < 5.0 shutil.rmtree("./saved", ignore_errors=True) diff --git a/test/test_cuda/test_marlin_backend.py b/test/test_cuda/test_marlin_backend.py index 8d7594086..d6128e0b9 100644 --- a/test/test_cuda/test_marlin_backend.py +++ b/test/test_cuda/test_marlin_backend.py @@ -7,11 +7,12 @@ from auto_round import AutoRound, AutoRoundConfig from auto_round.eval.evaluation import simple_evaluate_user_model -from ..helpers import model_infer +from ..helpers import get_model_path, model_infer class TestAutoRoundMarlinBackend: save_dir = "./saved" + model_name = get_model_path("facebook/opt-125m") @pytest.fixture(autouse=True, scope="class") def setup_and_teardown_class(self): diff --git a/test/test_cuda/test_mix_bits.py b/test/test_cuda/test_mix_bits.py index 6988709d5..177439108 100644 --- a/test/test_cuda/test_mix_bits.py +++ b/test/test_cuda/test_mix_bits.py @@ -242,7 +242,6 @@ def test_mixed_autoround_format_vllm(self, tiny_opt_model_path, dataloader): } autoround = AutoRound( tiny_opt_model_path, - self.tokenizer, scheme="W4A16", iters=2, seqlen=2, diff --git a/test/test_cuda/test_multiple_card.py b/test/test_cuda/test_multiple_card.py index e09975a19..1901bee1d 100644 --- a/test/test_cuda/test_multiple_card.py +++ b/test/test_cuda/test_multiple_card.py @@ -354,7 +354,7 @@ def test_device_map_for_triton(self): @multi_card def test_mllm_device_map(self): - model_name = get_model_path("qwen/Qwen2-VL-2B-Instruct/") + model_name = get_model_path("qwen/Qwen2-VL-2B-Instruct") from auto_round import AutoRoundMLLM device_map = "0,1" diff --git a/test/test_cuda/test_multiple_card_calib.py b/test/test_cuda/test_multiple_card_calib.py index fedb3f328..b19911659 100644 --- a/test/test_cuda/test_multiple_card_calib.py +++ b/test/test_cuda/test_multiple_card_calib.py @@ -41,7 +41,7 @@ def test_multiple_card_calib(self): ##test llm script res = os.system( - f"cd .. && {python_path} -m auto_round --model /models/Meta-Llama-3.1-8B-Instruct --devices '0,1' --quant_lm_head --iters 1 --nsamples 1 --output_dir None" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model /models/Meta-Llama-3.1-8B-Instruct --devices '0,1' --quant_lm_head --iters 1 --nsamples 1 --output_dir None" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" @@ -52,7 +52,7 @@ def test_multiple_card_nvfp4(self): ##test llm script res = os.system( - f"cd .. && {python_path} -m auto_round --model facebook/opt-125m --scheme NVFP4 --devices '0,1' --iters 1 --nsamples 1 --enable_torch_compile --low_gpu_mem_usage" + f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model facebook/opt-125m --scheme NVFP4 --devices '0,1' --iters 1 --nsamples 1 --enable_torch_compile --low_gpu_mem_usage" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" diff --git a/test/test_cuda/test_scheme.py b/test/test_cuda/test_scheme.py index 08156c927..f15077823 100644 --- a/test/test_cuda/test_scheme.py +++ b/test/test_cuda/test_scheme.py @@ -42,7 +42,7 @@ def test_w2a16(self, tiny_opt_model_path): ar.quantize() def test_mxfp4(self, tiny_opt_model_path): - ar = AutoRound(tiny_opt_model_path, scheme="MXFP4", nsamples=1, iters=1) + ar = AutoRound(tiny_opt_model_path, scheme="MXFP8_RCEIL", nsamples=1, iters=1) assert ar.bits == 4 assert ar.act_bits == 4 assert ar.data_type == "mx_fp" diff --git a/test/test_cuda/test_support_vlms.py b/test/test_cuda/test_support_vlms.py index 3358c8226..b9efe459e 100644 --- a/test/test_cuda/test_support_vlms.py +++ b/test/test_cuda/test_support_vlms.py @@ -11,6 +11,7 @@ class TestSupportVLMS: + @classmethod def setup_class(self): self.save_dir = os.path.join(os.path.dirname(__file__), "ut_saved") @@ -26,7 +27,7 @@ def test_qwen2(self): model_path = "/models/Qwen2-VL-2B-Instruct/" # test tune res = os.system( - f"cd .. && {self.python_path} -m auto_round --mllm " + f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "qwen2 tuning fail" @@ -81,7 +82,7 @@ def test_phi3(self): model_path = "/models/Phi-3.5-vision-instruct/" ## test tune res = os.system( - f"cd .. && {self.python_path} -m auto_round --mllm " + f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "Phi-3.5 tuning fail" @@ -129,7 +130,7 @@ def test_phi3_vision_awq(self): model_path = "/models/Phi-3.5-vision-instruct/" ## test tune res = os.system( - f"cd .. && {self.python_path} -m auto_round --mllm " + f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " f"--model {model_path} --iter 2 --quant_nontext_module " f"--nsample 64 --seqlen 32 " f"--format auto_awq --output_dir {self.save_dir} --device {self.device}" @@ -177,7 +178,7 @@ def test_glm(self): model_path = "/models/glm-4v-9b/" ## test tune res = os.system( - f"cd .. && {self.python_path} -m auto_round " + f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round " f"--model {model_path} --iter 1 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "glm-4v-9b tuning fail" @@ -186,7 +187,7 @@ def test_granite_vision(self): model_path = "/models/granite-vision-3.2-2b" ## test tune res = os.system( - f"cd .. && {self.python_path} -m auto_round " + f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round " f"--model {model_path} --iter 1 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "granite-vision-3.2-2b tuning fail" From 317208425c2180bacd9036a1b4482eab1f911053 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Thu, 8 Jan 2026 01:13:41 -0500 Subject: [PATCH 2/6] fix auto_awq related Signed-off-by: n1ck-guo --- auto_round/formats.py | 2 +- docs/step_by_step.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/auto_round/formats.py b/auto_round/formats.py index 6254f8e39..b2f172f40 100644 --- a/auto_round/formats.py +++ b/auto_round/formats.py @@ -493,7 +493,7 @@ def save_quantized( @OutputFormat.register("auto_awq") class AutoAWQFormat(OutputFormat): - support_schemes = ["W4A16", "W2A16", "W3A16", "W8A16", "BF16", "W2A16G64", "W2A16G32"] + support_schemes = ["W4A16"] format_name = "auto_awq" @staticmethod diff --git a/docs/step_by_step.md b/docs/step_by_step.md index 8163ac394..66b7ec9a3 100644 --- a/docs/step_by_step.md +++ b/docs/step_by_step.md @@ -152,11 +152,10 @@ adopted within the community, **only 4-bits quantization is supported**. Please |export format | supported scheme | |--------------|------------------| |**auto_round** | W4A16, W2A16, W3A16, W8A16, MXFP4, MXFP8, NVFP4, FPW8A16, W2A16G64, W2A16G32, FP8_STATIC, BF16| -|**auto_awq / auto_round:auto_awq** | W4A16, W2A16, W3A16, W8A16, BF16, W2A16G64, W2A16G32 | +|**auto_awq / auto_round:auto_awq** | W4A16| |**auto_gptq / auto_round:auto_gptq / auto_round:gptqmodel**|W4A16, W2A16, W3A16, W8A16, BF16, W2A16G64, W2A16G32| |**llm_compressor / auto_round:llm_compressor** | MXFP4, MXFP8, NVFP4, FPW8A16, FP8_STATIC | |**gguf** | GGUF:Q4_0, GGUF:Q4_1, GGUF:Q5_0, GGUF:Q5_1, GGUF:Q2_K_S, GGUF:Q3_K_S, GGUF:Q3_K_M, GGUF:Q3_K_L, GGUF:Q4_K_S, GGUF:Q4_K_M, GGUF:Q5_K_S, GGUF:Q5_K_M, GGUF:Q6_K, GGUF:Q8_0 | -|**itrex / itrex_xpu** | W4A16, W2A16, W3A16, W8A16, BF16, W2A16G64, W2A16G32 | |**fake** | all scheme| ### Hardware Compatibility From b192e87c838f34564bc90e3cd554579e444df9a5 Mon Sep 17 00:00:00 2001 From: Xin He Date: Thu, 8 Jan 2026 14:20:13 +0800 Subject: [PATCH 3/6] Update test/test_cuda/test_scheme.py --- test/test_cuda/test_scheme.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cuda/test_scheme.py b/test/test_cuda/test_scheme.py index f15077823..39ff3ba13 100644 --- a/test/test_cuda/test_scheme.py +++ b/test/test_cuda/test_scheme.py @@ -42,7 +42,7 @@ def test_w2a16(self, tiny_opt_model_path): ar.quantize() def test_mxfp4(self, tiny_opt_model_path): - ar = AutoRound(tiny_opt_model_path, scheme="MXFP8_RCEIL", nsamples=1, iters=1) + ar = AutoRound(tiny_opt_model_path, scheme="MXFP4_RCEIL", nsamples=1, iters=1) assert ar.bits == 4 assert ar.act_bits == 4 assert ar.data_type == "mx_fp" From 6f9b78e11b48c5c07a358c154d4b2cdcc2e9443a Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Fri, 9 Jan 2026 00:43:25 -0500 Subject: [PATCH 4/6] fix Signed-off-by: n1ck-guo --- test/test_cpu/test_gguf_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cpu/test_gguf_format.py b/test/test_cpu/test_gguf_format.py index b83d909e8..fc3c6b1c5 100644 --- a/test/test_cpu/test_gguf_format.py +++ b/test/test_cpu/test_gguf_format.py @@ -206,7 +206,7 @@ def test_vlm_gguf(self): for file_name in os.listdir(quantized_model_path): file_size = os.path.getsize(os.path.join(quantized_model_path, file_name)) / 1024**2 if file_name == "mmproj-model.gguf": - assert abs(file_size - 2537) < 5.0 + assert abs(file_size - 2173) < 5.0 else: assert abs(file_size - 892) < 5.0 shutil.rmtree("./saved", ignore_errors=True) From 0dd26deaa7e3bdfc0b22d2939c9e8d2f74d2a67c Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Fri, 9 Jan 2026 01:51:11 -0500 Subject: [PATCH 5/6] fix merge Signed-off-by: n1ck-guo --- test/test_cpu/export/test_gguf_format.py | 12 ++++++------ test/test_cpu/utils/test_cli_usage.py | 16 ++++++++-------- .../advanced/test_multiple_card_calib.py | 4 ++-- test/test_cuda/export/test_gguf.py | 2 +- test/test_cuda/models/test_support_vlms.py | 10 +++++----- test/test_cuda/utils/test_alg_ext.py | 4 ++-- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/test/test_cpu/export/test_gguf_format.py b/test/test_cpu/export/test_gguf_format.py index d5accc019..6b878f48f 100644 --- a/test/test_cpu/export/test_gguf_format.py +++ b/test/test_cpu/export/test_gguf_format.py @@ -26,7 +26,7 @@ def teardown_class(self): def test_basic_usage(self, tiny_gemma_model_path, tiny_qwen_model_path): python_path = sys.executable res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_gemma_model_path} " + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_gemma_model_path} " f" --bs 16 --iters 0 --nsamples 1 --format gguf:q4_k_m" ) if res > 0 or res == -1: @@ -34,7 +34,7 @@ def test_basic_usage(self, tiny_gemma_model_path, tiny_qwen_model_path): shutil.rmtree("./saved", ignore_errors=True) res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_qwen_model_path}" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_qwen_model_path}" f" --bs 16 --iters 1 --nsamples 1 --format fake,gguf:q4_0" ) if res > 0 or res == -1: @@ -162,7 +162,7 @@ def test_all_format(self, tiny_qwen_model_path): # for gguf_format in ["gguf:q4_0", "gguf:q4_1", "gguf:q4_k_m", "gguf:q6_k"]: for gguf_format in ["gguf:q4_k_m"]: res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name} " + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name} " f" --bs 16 --iters 1 --nsamples 1 --seqlen 16 --format {gguf_format}" ) if res > 0 or res == -1: @@ -170,7 +170,7 @@ def test_all_format(self, tiny_qwen_model_path): shutil.rmtree("../../tmp_autoround", ignore_errors=True) res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" f" --bs 16 --iters 0 --nsamples 1 --seqlen 16 --format fake,{gguf_format}" ) if res > 0 or res == -1: @@ -179,7 +179,7 @@ def test_all_format(self, tiny_qwen_model_path): # test mixed q2_k_s res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" f" --bs 16 --iters 0 --nsamples 1 --seqlen 16 --scheme GGUF:Q2_K_MIXED" ) if res > 0 or res == -1: @@ -206,7 +206,7 @@ def test_vlm_gguf(self): for file_name in os.listdir(quantized_model_path): file_size = os.path.getsize(os.path.join(quantized_model_path, file_name)) / 1024**2 if file_name == "mmproj-model.gguf": - assert abs(file_size - 2173) < 5.0 + assert abs(file_size - 2537) < 5.0 else: assert abs(file_size - 892) < 5.0 shutil.rmtree("./saved", ignore_errors=True) diff --git a/test/test_cpu/utils/test_cli_usage.py b/test/test_cpu/utils/test_cli_usage.py index ebd4c523f..7dad502d2 100644 --- a/test/test_cpu/utils/test_cli_usage.py +++ b/test/test_cpu/utils/test_cli_usage.py @@ -22,24 +22,24 @@ def test_auto_round_cmd(self, tiny_opt_model_path, tiny_qwen_vl_model_path): python_path = sys.executable # Test llm script - res = os.system(f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round -h") + res = os.system(f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 32 --iter 2 --nsamples 1 --format auto_gptq,auto_round --output_dir ./saved --tasks piqa" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 32 --iter 2 --nsamples 1 --format auto_gptq,auto_round --output_dir ./saved --tasks piqa" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 8 --iter 1 --nsamples 1 --eval_task_by_task --tasks openbookqa --bs 32" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 8 --iter 1 --nsamples 1 --eval_task_by_task --tasks openbookqa --bs 32" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -c 'from auto_round.__main__ import run_light; run_light()' --seqlen 8 --iter 2 --nsamples 8 --output_dir ./saved --tasks lambada_openai" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -c 'from auto_round.__main__ import run_light; run_light()' --seqlen 8 --iter 2 --nsamples 8 --output_dir ./saved --tasks lambada_openai" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" @@ -47,23 +47,23 @@ def test_auto_round_cmd(self, tiny_opt_model_path, tiny_qwen_vl_model_path): # test mllm script # test auto_round_mllm --eval help - res = os.system(f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --eval -h") + res = os.system(f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --eval -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" # test auto_round_mllm --lmms help - res = os.system(f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --eval --lmms -h") + res = os.system(f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --eval --lmms -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --mllm --model {tiny_qwen_vl_model_path} --iter 2 --nsamples 2 --seqlen 32 --format auto_round --output_dir ./saved" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --mllm --model {tiny_qwen_vl_model_path} --iter 2 --nsamples 2 --seqlen 32 --format auto_round --output_dir ./saved" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --mllm --iter 2 --nsamples 2 --model {tiny_qwen_vl_model_path} --seqlen 32 --format auto_round" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --mllm --iter 2 --nsamples 2 --model {tiny_qwen_vl_model_path} --seqlen 32 --format auto_round" " --quant_nontext_module --output_dir ./saved " ) if res > 0 or res == -1: diff --git a/test/test_cuda/advanced/test_multiple_card_calib.py b/test/test_cuda/advanced/test_multiple_card_calib.py index b19911659..2c29ddf0a 100644 --- a/test/test_cuda/advanced/test_multiple_card_calib.py +++ b/test/test_cuda/advanced/test_multiple_card_calib.py @@ -41,7 +41,7 @@ def test_multiple_card_calib(self): ##test llm script res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model /models/Meta-Llama-3.1-8B-Instruct --devices '0,1' --quant_lm_head --iters 1 --nsamples 1 --output_dir None" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model /models/Meta-Llama-3.1-8B-Instruct --devices '0,1' --quant_lm_head --iters 1 --nsamples 1 --output_dir None" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" @@ -52,7 +52,7 @@ def test_multiple_card_nvfp4(self): ##test llm script res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {python_path} -m auto_round --model facebook/opt-125m --scheme NVFP4 --devices '0,1' --iters 1 --nsamples 1 --enable_torch_compile --low_gpu_mem_usage" + f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model facebook/opt-125m --scheme NVFP4 --devices '0,1' --iters 1 --nsamples 1 --enable_torch_compile --low_gpu_mem_usage" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" diff --git a/test/test_cuda/export/test_gguf.py b/test/test_cuda/export/test_gguf.py index 3c9ed062d..daa57e26b 100644 --- a/test/test_cuda/export/test_gguf.py +++ b/test/test_cuda/export/test_gguf.py @@ -56,7 +56,7 @@ def test_gguf_format(self, tiny_qwen_model_path, dataloader): save_dir = os.path.join(os.path.dirname(__file__), "saved") res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {sys.executable} -m auto_round --model {tiny_qwen_model_path} --iter 2 " + f"PYTHONPATH='../../..:$PYTHONPATH' {sys.executable} -m auto_round --model {tiny_qwen_model_path} --iter 2 " f"--output_dir {save_dir} --nsample 2 --format gguf:q4_0 --device 0" ) print(save_dir) diff --git a/test/test_cuda/models/test_support_vlms.py b/test/test_cuda/models/test_support_vlms.py index b9efe459e..c2f12a207 100644 --- a/test/test_cuda/models/test_support_vlms.py +++ b/test/test_cuda/models/test_support_vlms.py @@ -27,7 +27,7 @@ def test_qwen2(self): model_path = "/models/Qwen2-VL-2B-Instruct/" # test tune res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " + f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "qwen2 tuning fail" @@ -82,7 +82,7 @@ def test_phi3(self): model_path = "/models/Phi-3.5-vision-instruct/" ## test tune res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " + f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "Phi-3.5 tuning fail" @@ -130,7 +130,7 @@ def test_phi3_vision_awq(self): model_path = "/models/Phi-3.5-vision-instruct/" ## test tune res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " + f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " f"--model {model_path} --iter 2 --quant_nontext_module " f"--nsample 64 --seqlen 32 " f"--format auto_awq --output_dir {self.save_dir} --device {self.device}" @@ -178,7 +178,7 @@ def test_glm(self): model_path = "/models/glm-4v-9b/" ## test tune res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round " + f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round " f"--model {model_path} --iter 1 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "glm-4v-9b tuning fail" @@ -187,7 +187,7 @@ def test_granite_vision(self): model_path = "/models/granite-vision-3.2-2b" ## test tune res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' {self.python_path} -m auto_round " + f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round " f"--model {model_path} --iter 1 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "granite-vision-3.2-2b tuning fail" diff --git a/test/test_cuda/utils/test_alg_ext.py b/test/test_cuda/utils/test_alg_ext.py index 1978da1f8..81f112720 100644 --- a/test/test_cuda/utils/test_alg_ext.py +++ b/test/test_cuda/utils/test_alg_ext.py @@ -49,13 +49,13 @@ def test_cli(self, tiny_opt_model_path): python_path = sys.executable res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 2 --options=W2A16,W4A16 --ignore_scale_zp_bits --nsamples 1 --seqlen 32" + f"PYTHONPATH='../../..:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 2 --options=W2A16,W4A16 --ignore_scale_zp_bits --nsamples 1 --seqlen 32" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../..:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 5.5 --options=mxfp4,mxfp8 --ignore_scale_zp_bits --enable_torch_compile --nsamples 1 --seqlen 32" + f"PYTHONPATH='../../..:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 5.5 --options=mxfp4,mxfp8 --ignore_scale_zp_bits --enable_torch_compile --nsamples 1 --seqlen 32" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" From 7dfcc8490c56ccf3328326a1eb072d9a563a01c1 Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Fri, 9 Jan 2026 03:28:25 -0500 Subject: [PATCH 6/6] fix Signed-off-by: n1ck-guo --- test/test_cpu/export/test_gguf_format.py | 13 ++++++++----- test/test_cpu/utils/test_cli_usage.py | 19 +++++++++++-------- .../advanced/test_multiple_card_calib.py | 7 +++++-- test/test_cuda/export/test_gguf.py | 5 ++++- .../integrations/test_transformers.py | 7 ++++++- test/test_cuda/models/test_support_vlms.py | 13 ++++++++----- test/test_cuda/utils/test_alg_ext.py | 7 +++++-- 7 files changed, 47 insertions(+), 24 deletions(-) diff --git a/test/test_cpu/export/test_gguf_format.py b/test/test_cpu/export/test_gguf_format.py index 6b878f48f..7304f22c7 100644 --- a/test/test_cpu/export/test_gguf_format.py +++ b/test/test_cpu/export/test_gguf_format.py @@ -10,6 +10,9 @@ from ...helpers import get_model_path, get_tiny_model +AUTO_ROUND_PATH = __file__.split("/") +AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")]) + class TestGGUF: @@ -26,7 +29,7 @@ def teardown_class(self): def test_basic_usage(self, tiny_gemma_model_path, tiny_qwen_model_path): python_path = sys.executable res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_gemma_model_path} " + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {tiny_gemma_model_path} " f" --bs 16 --iters 0 --nsamples 1 --format gguf:q4_k_m" ) if res > 0 or res == -1: @@ -34,7 +37,7 @@ def test_basic_usage(self, tiny_gemma_model_path, tiny_qwen_model_path): shutil.rmtree("./saved", ignore_errors=True) res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_qwen_model_path}" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {tiny_qwen_model_path}" f" --bs 16 --iters 1 --nsamples 1 --format fake,gguf:q4_0" ) if res > 0 or res == -1: @@ -162,7 +165,7 @@ def test_all_format(self, tiny_qwen_model_path): # for gguf_format in ["gguf:q4_0", "gguf:q4_1", "gguf:q4_k_m", "gguf:q6_k"]: for gguf_format in ["gguf:q4_k_m"]: res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name} " + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {model_name} " f" --bs 16 --iters 1 --nsamples 1 --seqlen 16 --format {gguf_format}" ) if res > 0 or res == -1: @@ -170,7 +173,7 @@ def test_all_format(self, tiny_qwen_model_path): shutil.rmtree("../../tmp_autoround", ignore_errors=True) res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" f" --bs 16 --iters 0 --nsamples 1 --seqlen 16 --format fake,{gguf_format}" ) if res > 0 or res == -1: @@ -179,7 +182,7 @@ def test_all_format(self, tiny_qwen_model_path): # test mixed q2_k_s res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {model_name}" f" --bs 16 --iters 0 --nsamples 1 --seqlen 16 --scheme GGUF:Q2_K_MIXED" ) if res > 0 or res == -1: diff --git a/test/test_cpu/utils/test_cli_usage.py b/test/test_cpu/utils/test_cli_usage.py index 7dad502d2..6ba676936 100644 --- a/test/test_cpu/utils/test_cli_usage.py +++ b/test/test_cpu/utils/test_cli_usage.py @@ -4,6 +4,9 @@ from ...helpers import get_model_path +AUTO_ROUND_PATH = __file__.split("/") +AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")]) + class TestAutoRoundCmd: @@ -22,24 +25,24 @@ def test_auto_round_cmd(self, tiny_opt_model_path, tiny_qwen_vl_model_path): python_path = sys.executable # Test llm script - res = os.system(f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round -h") + res = os.system(f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 32 --iter 2 --nsamples 1 --format auto_gptq,auto_round --output_dir ./saved --tasks piqa" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 32 --iter 2 --nsamples 1 --format auto_gptq,auto_round --output_dir ./saved --tasks piqa" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 8 --iter 1 --nsamples 1 --eval_task_by_task --tasks openbookqa --bs 32" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 8 --iter 1 --nsamples 1 --eval_task_by_task --tasks openbookqa --bs 32" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -c 'from auto_round.__main__ import run_light; run_light()' --seqlen 8 --iter 2 --nsamples 8 --output_dir ./saved --tasks lambada_openai" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -c 'from auto_round.__main__ import run_light; run_light()' --seqlen 8 --iter 2 --nsamples 8 --output_dir ./saved --tasks lambada_openai" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" @@ -47,23 +50,23 @@ def test_auto_round_cmd(self, tiny_opt_model_path, tiny_qwen_vl_model_path): # test mllm script # test auto_round_mllm --eval help - res = os.system(f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --eval -h") + res = os.system(f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --eval -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" # test auto_round_mllm --lmms help - res = os.system(f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --eval --lmms -h") + res = os.system(f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --eval --lmms -h") if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --mllm --model {tiny_qwen_vl_model_path} --iter 2 --nsamples 2 --seqlen 32 --format auto_round --output_dir ./saved" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --mllm --model {tiny_qwen_vl_model_path} --iter 2 --nsamples 2 --seqlen 32 --format auto_round --output_dir ./saved" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --mllm --iter 2 --nsamples 2 --model {tiny_qwen_vl_model_path} --seqlen 32 --format auto_round" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --mllm --iter 2 --nsamples 2 --model {tiny_qwen_vl_model_path} --seqlen 32 --format auto_round" " --quant_nontext_module --output_dir ./saved " ) if res > 0 or res == -1: diff --git a/test/test_cuda/advanced/test_multiple_card_calib.py b/test/test_cuda/advanced/test_multiple_card_calib.py index 2c29ddf0a..06c869a86 100644 --- a/test/test_cuda/advanced/test_multiple_card_calib.py +++ b/test/test_cuda/advanced/test_multiple_card_calib.py @@ -7,6 +7,9 @@ from auto_round.testing_utils import multi_card +AUTO_ROUND_PATH = __file__.split("/") +AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")]) + def get_accuracy(data): match = re.search(r"\|acc\s+\|[↑↓]\s+\|\s+([\d.]+)\|", data) @@ -41,7 +44,7 @@ def test_multiple_card_calib(self): ##test llm script res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model /models/Meta-Llama-3.1-8B-Instruct --devices '0,1' --quant_lm_head --iters 1 --nsamples 1 --output_dir None" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model /models/Meta-Llama-3.1-8B-Instruct --devices '0,1' --quant_lm_head --iters 1 --nsamples 1 --output_dir None" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" @@ -52,7 +55,7 @@ def test_multiple_card_nvfp4(self): ##test llm script res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {python_path} -m auto_round --model facebook/opt-125m --scheme NVFP4 --devices '0,1' --iters 1 --nsamples 1 --enable_torch_compile --low_gpu_mem_usage" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model facebook/opt-125m --scheme NVFP4 --devices '0,1' --iters 1 --nsamples 1 --enable_torch_compile --low_gpu_mem_usage" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" diff --git a/test/test_cuda/export/test_gguf.py b/test/test_cuda/export/test_gguf.py index daa57e26b..4146cb938 100644 --- a/test/test_cuda/export/test_gguf.py +++ b/test/test_cuda/export/test_gguf.py @@ -12,6 +12,9 @@ from ...helpers import get_model_path, get_tiny_model, save_tiny_model +AUTO_ROUND_PATH = __file__.split("/") +AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")]) + class TestAutoRound: save_dir = "./saved" @@ -56,7 +59,7 @@ def test_gguf_format(self, tiny_qwen_model_path, dataloader): save_dir = os.path.join(os.path.dirname(__file__), "saved") res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {sys.executable} -m auto_round --model {tiny_qwen_model_path} --iter 2 " + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {sys.executable} -m auto_round --model {tiny_qwen_model_path} --iter 2 " f"--output_dir {save_dir} --nsample 2 --format gguf:q4_0 --device 0" ) print(save_dir) diff --git a/test/test_cuda/integrations/test_transformers.py b/test/test_cuda/integrations/test_transformers.py index feb2516f7..638cb7396 100644 --- a/test/test_cuda/integrations/test_transformers.py +++ b/test/test_cuda/integrations/test_transformers.py @@ -36,7 +36,7 @@ # @slow @require_torch_gpu @require_accelerate -class AutoRoundTest: +class TestAutoRound: model_name = "OPEA/Qwen2.5-1.5B-Instruct-int4-sym-inc" input_text = "There is a girl who likes adventure," EXPECTED_OUTPUTS = set() @@ -205,3 +205,8 @@ def test_mixed_bits(self): text = "There is a girl who likes adventure," inputs = tokenizer(text, return_tensors="pt").to(model.device) tokenizer.decode(model.generate(**inputs, max_new_tokens=5)[0]) + + +# FAILED export/test_gguf.py::TestAutoRound::test_gguf_format - AssertionError: qwen2 tuning fail +# FAILED export/test_gguf.py::TestAutoRound::test_all_format - SystemExit: 1 +# FAILED export/test_gguf.py::TestAutoRound::test_vlm_gguf - AttributeError: 'Qwen2VLForConditionalGeneration' object has no attribute 'last_layer_name_to_block_name' diff --git a/test/test_cuda/models/test_support_vlms.py b/test/test_cuda/models/test_support_vlms.py index c2f12a207..4f9373ca5 100644 --- a/test/test_cuda/models/test_support_vlms.py +++ b/test/test_cuda/models/test_support_vlms.py @@ -9,6 +9,9 @@ from auto_round import AutoRoundConfig # # must import for auto-round format from auto_round.testing_utils import require_gptqmodel, require_package_version_ut, require_vlm_env +AUTO_ROUND_PATH = __file__.split("/") +AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")]) + class TestSupportVLMS: @@ -27,7 +30,7 @@ def test_qwen2(self): model_path = "/models/Qwen2-VL-2B-Instruct/" # test tune res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round --mllm " f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "qwen2 tuning fail" @@ -82,7 +85,7 @@ def test_phi3(self): model_path = "/models/Phi-3.5-vision-instruct/" ## test tune res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round --mllm " f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "Phi-3.5 tuning fail" @@ -130,7 +133,7 @@ def test_phi3_vision_awq(self): model_path = "/models/Phi-3.5-vision-instruct/" ## test tune res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round --mllm " + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round --mllm " f"--model {model_path} --iter 2 --quant_nontext_module " f"--nsample 64 --seqlen 32 " f"--format auto_awq --output_dir {self.save_dir} --device {self.device}" @@ -178,7 +181,7 @@ def test_glm(self): model_path = "/models/glm-4v-9b/" ## test tune res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round " + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round " f"--model {model_path} --iter 1 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "glm-4v-9b tuning fail" @@ -187,7 +190,7 @@ def test_granite_vision(self): model_path = "/models/granite-vision-3.2-2b" ## test tune res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' {self.python_path} -m auto_round " + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round " f"--model {model_path} --iter 1 --output_dir {self.save_dir} --device {self.device}" ) assert not (res > 0 or res == -1), "granite-vision-3.2-2b tuning fail" diff --git a/test/test_cuda/utils/test_alg_ext.py b/test/test_cuda/utils/test_alg_ext.py index 81f112720..d2e8e4bfb 100644 --- a/test/test_cuda/utils/test_alg_ext.py +++ b/test/test_cuda/utils/test_alg_ext.py @@ -10,6 +10,9 @@ from ...helpers import get_model_path +AUTO_ROUND_PATH = __file__.split("/") +AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")]) + class TestAlgExt: save_folder = "./saved" @@ -49,13 +52,13 @@ def test_cli(self, tiny_opt_model_path): python_path = sys.executable res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 2 --options=W2A16,W4A16 --ignore_scale_zp_bits --nsamples 1 --seqlen 32" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 2 --options=W2A16,W4A16 --ignore_scale_zp_bits --nsamples 1 --seqlen 32" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check" res = os.system( - f"PYTHONPATH='../../..:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 5.5 --options=mxfp4,mxfp8 --ignore_scale_zp_bits --enable_torch_compile --nsamples 1 --seqlen 32" + f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' CUDA_VISIBLE_DEVICES=0 {python_path} -m auto_round --model {tiny_opt_model_path} --iters 1 --device auto --enable_alg_ext --avg_bits 5.5 --options=mxfp4,mxfp8 --ignore_scale_zp_bits --enable_torch_compile --nsamples 1 --seqlen 32" ) if res > 0 or res == -1: assert False, "cmd line test fail, please have a check"