|
| 1 | +============================= test session starts ============================== |
| 2 | +platform linux -- Python 3.11.13, pytest-8.4.2, pluggy-1.6.0 -- /home/HDCharles/rhdev/bin/python3 |
| 3 | +cachedir: .pytest_cache |
| 4 | +rootdir: /home/HDCharles/repos/llm-compressor |
| 5 | +configfile: pyproject.toml |
| 6 | +plugins: anyio-4.11.0 |
| 7 | +collecting ... collected 1 item |
| 8 | + |
| 9 | +tests/e2e/vLLM/test_vllm.py::TestvLLM::test_vllm[/home/HDCharles/repos/llm-compressor/tests/e2e/vLLM/configs/fp8_dynamic_per_tensor_moe.yaml] 2025-10-24T02:28:38.126651+0000 | set_up | INFO - ========== RUNNING ============== |
| 10 | +2025-10-24T02:28:38.126768+0000 | set_up | INFO - Qwen3-VL-30B-A3B-Instruct-FP8_DYNAMIC |
| 11 | +`torch_dtype` is deprecated! Use `dtype` instead! |
| 12 | +Loading checkpoint shards: 0%| | 0/13 [00:00<?, ?it/s]Loading checkpoint shards: 100%|██████████| 13/13 [00:00<00:00, 114.11it/s]Loading checkpoint shards: 100%|██████████| 13/13 [00:00<00:00, 113.92it/s] |
| 13 | +2025-10-24T02:28:42.118085+0000 | run_oneshot_for_e2e_testing | INFO - ONESHOT KWARGS |
| 14 | +2025-10-24T02:28:44.584874+0000 | reset | INFO - Compression lifecycle reset |
| 15 | +2025-10-24T02:28:44.591658+0000 | _create_default_logger | INFO - Logging all LLM Compressor modifier-level logs to sparse_logs/24-10-2025_02.28.44.log |
| 16 | +2025-10-24T02:28:44.592077+0000 | from_modifiers | INFO - Creating recipe from modifiers |
| 17 | +2025-10-24T02:28:44.625401+0000 | initialize | INFO - Compression lifecycle initialized for 1 modifiers |
| 18 | +2025-10-24T02:28:44.625634+0000 | IndependentPipeline | INFO - Inferred `DataFreePipeline` for `QuantizationModifier` |
| 19 | +Updating global scales: 0%| | 0/356 [00:00<?, ?it/s]Updating global scales: 100%|██████████| 356/356 [00:00<00:00, 716837.36it/s] |
| 20 | +Fusing global scales: 0it [00:00, ?it/s]Fusing global scales: 1333it [00:00, 591453.21it/s] |
| 21 | +Calibrating weights: 0%| | 0/356 [00:00<?, ?it/s]Calibrating weights: 65%|██████▌ | 233/356 [00:00<00:00, 2328.44it/s]Calibrating weights: 100%|██████████| 356/356 [00:00<00:00, 2891.78it/s] |
| 22 | +2025-10-24T02:28:59.001972+0000 | finalize | INFO - Compression lifecycle finalized for 1 modifiers |
| 23 | +2025-10-24T02:29:13.583921+0000 | post_process | WARNING - Optimized model is not saved. To save, please provide`output_dir` as input arg.Ex. `oneshot(..., output_dir=...)` |
| 24 | +2025-10-24T02:29:13.606836+0000 | test_vllm | INFO - ================= SAVING TO DISK ====================== |
| 25 | +2025-10-24T02:29:13.607390+0000 | get_model_compressor | INFO - skip_sparsity_compression_stats set to True. Skipping sparsity compression statistic calculations. No sparsity compressor will be applied. |
| 26 | +Compressing model: 0it [00:00, ?it/s]Compressing model: 1it [00:00, 2.39it/s]Compressing model: 33it [00:00, 83.11it/s]Compressing model: 66it [00:00, 148.59it/s]Compressing model: 103it [00:00, 208.82it/s]Compressing model: 133it [00:00, 212.31it/s]Compressing model: 167it [00:00, 245.32it/s]Compressing model: 202it [00:01, 270.33it/s]Compressing model: 237it [00:01, 289.29it/s]Compressing model: 272it [00:01, 301.92it/s]Compressing model: 307it [00:01, 310.79it/s]Compressing model: 340it [00:01, 315.54it/s]Compressing model: 356it [00:01, 231.87it/s] |
| 27 | +2025-10-24T02:34:43.735488+0000 | reset | INFO - Compression lifecycle reset |
| 28 | +2025-10-24T02:34:43.735697+0000 | _run_vllm | INFO - Run vllm in subprocess.Popen() using python env: |
| 29 | +2025-10-24T02:34:43.735762+0000 | _run_vllm | INFO - /home/HDCharles/rhdev/bin/python3 |
| 30 | +2025-10-24T02:34:46.263243+0000 | _run_vllm | INFO - INFO 10-24 02:34:45 [__init__.py:225] Automatically detected platform cuda. |
| 31 | + |
| 32 | +FAILED |
| 33 | + |
| 34 | +=================================== FAILURES =================================== |
| 35 | +_ TestvLLM.test_vllm[/home/HDCharles/repos/llm-compressor/tests/e2e/vLLM/configs/fp8_dynamic_per_tensor_moe.yaml] _ |
| 36 | + |
| 37 | +self = <tests.e2e.vLLM.test_vllm.TestvLLM object at 0x7fdd7886c1d0> |
| 38 | +test_data_file = '/home/HDCharles/repos/llm-compressor/tests/e2e/vLLM/configs/fp8_dynamic_per_tensor_moe.yaml' |
| 39 | + |
| 40 | + def test_vllm(self, test_data_file: str): |
| 41 | + # Run vLLM with saved model |
| 42 | + |
| 43 | + self.set_up(test_data_file) |
| 44 | + if not self.save_dir: |
| 45 | + self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" |
| 46 | + oneshot_model, tokenizer = run_oneshot_for_e2e_testing( |
| 47 | + model=self.model, |
| 48 | + model_class=self.model_class, |
| 49 | + num_calibration_samples=self.num_calibration_samples, |
| 50 | + max_seq_length=self.max_seq_length, |
| 51 | + scheme=self.scheme, |
| 52 | + dataset_id=self.dataset_id, |
| 53 | + dataset_config=self.dataset_config, |
| 54 | + dataset_split=self.dataset_split, |
| 55 | + recipe=self.recipe, |
| 56 | + quant_type=self.quant_type, |
| 57 | + ) |
| 58 | + |
| 59 | + # check that session contains recipe |
| 60 | + self._check_session_contains_recipe() |
| 61 | + |
| 62 | + logger.info("================= SAVING TO DISK ======================") |
| 63 | + self._save_compressed_model(oneshot_model=oneshot_model, tokenizer=tokenizer) |
| 64 | + |
| 65 | + recipe_path = os.path.join(self.save_dir, "recipe.yaml") |
| 66 | + |
| 67 | + # check that expected files exist |
| 68 | + self._check_save_dir_has_expected_files() |
| 69 | + |
| 70 | + # Use the session to fetch the recipe; |
| 71 | + # Reset session for next test case |
| 72 | + session = active_session() |
| 73 | + recipe_yaml_str = session.get_serialized_recipe() |
| 74 | + with open(recipe_path, "w") as fp: |
| 75 | + fp.write(recipe_yaml_str) |
| 76 | + session.reset() |
| 77 | + |
| 78 | + # if SKIP_HF_UPLOAD.lower() != "yes": |
| 79 | + # logger.info("================= UPLOADING TO HUB ======================") |
| 80 | + |
| 81 | + # stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e" |
| 82 | + |
| 83 | + # self.api.create_repo( |
| 84 | + # repo_id=stub, |
| 85 | + # exist_ok=True, |
| 86 | + # repo_type="model", |
| 87 | + # private=False, |
| 88 | + # ) |
| 89 | + |
| 90 | + # self.api.upload_folder( |
| 91 | + # repo_id=stub, |
| 92 | + # folder_path=self.save_dir, |
| 93 | + # ) |
| 94 | + |
| 95 | + # if VLLM_PYTHON_ENV.lower() == "same": |
| 96 | + # logger.info("========== RUNNING vLLM in the same python env ==========") |
| 97 | + # else: |
| 98 | + # logger.info("========== RUNNING vLLM in a separate python env ==========") |
| 99 | + |
| 100 | +> self._run_vllm(logger) |
| 101 | + |
| 102 | +tests/e2e/vLLM/test_vllm.py:159: |
| 103 | +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ |
| 104 | +tests/test_timer/timer_utils.py:33: in wrapper |
| 105 | + return func(*args, **kwargs) |
| 106 | + ^^^^^^^^^^^^^^^^^^^^^ |
| 107 | +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ |
| 108 | + |
| 109 | +self = <tests.e2e.vLLM.test_vllm.TestvLLM object at 0x7fdd7886c1d0> |
| 110 | +logger = <loguru.logger handlers=[(id=2, level=20, sink=<stdout>), (id=3, level=10, sink='sparse_logs/oneshot_2025-10-24_02-28-42.log'), (id=4, level=10, sink='sparse_logs/24-10-2025_02.28.44.log')]> |
| 111 | + |
| 112 | + @log_time |
| 113 | + def _run_vllm(self, logger): |
| 114 | + import json |
| 115 | + import subprocess |
| 116 | + |
| 117 | + llm_kwargs = {"model": self.save_dir} |
| 118 | + |
| 119 | + if self.gpu_memory_utilization is not None: |
| 120 | + llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization |
| 121 | + |
| 122 | + json_scheme = json.dumps(self.scheme) |
| 123 | + json_llm_kwargs = json.dumps(llm_kwargs) |
| 124 | + json_prompts = json.dumps(self.prompts) |
| 125 | + |
| 126 | + test_file_dir = os.path.dirname(os.path.abspath(__file__)) |
| 127 | + run_file_path = os.path.join(test_file_dir, "run_vllm.py") |
| 128 | + |
| 129 | + logger.info("Run vllm in subprocess.Popen() using python env:") |
| 130 | + logger.info(self.vllm_env) |
| 131 | + |
| 132 | + result = subprocess.Popen( |
| 133 | + [self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts], |
| 134 | + stdout=subprocess.PIPE, |
| 135 | + stderr=subprocess.PIPE, |
| 136 | + text=True, |
| 137 | + ) |
| 138 | + stdout, stderr = result.communicate() |
| 139 | + logger.info(stdout) |
| 140 | + |
| 141 | + error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}" |
| 142 | +> assert result.returncode == 0, error_msg |
| 143 | +E AssertionError: ERROR: vLLM failed with exit code 1: Traceback (most recent call last): |
| 144 | +E File "/home/HDCharles/repos/llm-compressor/tests/e2e/vLLM/run_vllm.py", line 5, in <module> |
| 145 | +E from vllm import LLM, SamplingParams |
| 146 | +E File "/home/HDCharles/repos/vllm/vllm/__init__.py", line 74, in __getattr__ |
| 147 | +E module = import_module(module_name, __package__) |
| 148 | +E ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 149 | +E File "/home/HDCharles/.local/share/uv/python/cpython-3.11.13-linux-x86_64-gnu/lib/python3.11/importlib/__init__.py", line 126, in import_module |
| 150 | +E return _bootstrap._gcd_import(name[level:], package, level) |
| 151 | +E ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 152 | +E File "/home/HDCharles/repos/vllm/vllm/entrypoints/llm.py", line 20, in <module> |
| 153 | +E from vllm.config import ( |
| 154 | +E File "/home/HDCharles/repos/vllm/vllm/config/__init__.py", line 5, in <module> |
| 155 | +E from vllm.config.compilation import ( |
| 156 | +E File "/home/HDCharles/repos/vllm/vllm/config/compilation.py", line 18, in <module> |
| 157 | +E from vllm.platforms import current_platform |
| 158 | +E File "/home/HDCharles/repos/vllm/vllm/platforms/__init__.py", line 255, in __getattr__ |
| 159 | +E _current_platform = resolve_obj_by_qualname(platform_cls_qualname)() |
| 160 | +E ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 161 | +E File "/home/HDCharles/repos/vllm/vllm/utils/import_utils.py", line 46, in resolve_obj_by_qualname |
| 162 | +E module = importlib.import_module(module_name) |
| 163 | +E ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 164 | +E File "/home/HDCharles/.local/share/uv/python/cpython-3.11.13-linux-x86_64-gnu/lib/python3.11/importlib/__init__.py", line 126, in import_module |
| 165 | +E return _bootstrap._gcd_import(name[level:], package, level) |
| 166 | +E ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 167 | +E File "/home/HDCharles/repos/vllm/vllm/platforms/cuda.py", line 16, in <module> |
| 168 | +E import vllm._C # noqa |
| 169 | +E ^^^^^^^^^^^^^^ |
| 170 | +E ImportError: /home/HDCharles/repos/vllm/vllm/_C.abi3.so: undefined symbol: _ZNK3c106SymInt22maybe_as_int_slow_pathEv |
| 171 | +E |
| 172 | +E assert 1 == 0 |
| 173 | +E + where 1 = <Popen: returncode: 1 args: ['/home/HDCharles/rhdev/bin/python3', '/home/HDC...>.returncode |
| 174 | + |
| 175 | +tests/e2e/vLLM/test_vllm.py:216: AssertionError |
| 176 | +=========================== short test summary info ============================ |
| 177 | +FAILED tests/e2e/vLLM/test_vllm.py::TestvLLM::test_vllm[/home/HDCharles/repos/llm-compressor/tests/e2e/vLLM/configs/fp8_dynamic_per_tensor_moe.yaml] |
| 178 | +======================== 1 failed in 372.96s (0:06:12) ========================= |
|
0 commit comments