diff --git a/Jenkinsfile b/Jenkinsfile index ff5fd03945..dc15599e0e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -8,9 +8,6 @@ properties([ booleanParam(defaultValue: true, description: 'Whether to propagate commit status to GitHub', name: 'propagateStatus'), - booleanParam(defaultValue: false, - description: 'If true, forces running pre-commit scope', - name: 'forceRunPrecommitScope'), string(defaultValue: '', description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty', name: 'library_version') diff --git a/pyproject.toml b/pyproject.toml index d203f17947..d962982d24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,8 +59,3 @@ requires = [ "cmake~=3.24.0; platform_system == 'Darwin' and platform_machine == 'arm64'", ] build-backend = "py_build_cmake.build" - -[tool.pytest.ini_options] -markers = [ - "precommit: (deselect with '-m \"precommit\"')", -] diff --git a/src/cpp/src/lora/adapter.cpp b/src/cpp/src/lora/adapter.cpp index 5335f15378..75655d4cde 100644 --- a/src/cpp/src/lora/adapter.cpp +++ b/src/cpp/src/lora/adapter.cpp @@ -100,10 +100,9 @@ struct AutoSafetensor: public safetensors_File { ConstantMap safetensor_to_constant_map(const ov::Tensor& safetensor) { AutoSafetensor safe_tensors_file{}; - // Intentionally discard constness as safetensors_file_init requires a non-const pointer (used as read-only) - auto data_ptr = const_cast(safetensor.data()); - OPENVINO_ASSERT(safetensors_file_init(data_ptr, safetensor.get_byte_size(), &safe_tensors_file) == nullptr, - "Cannot parse safetensor as a Safetensors file format. Safetensors file format is supported only"); + OPENVINO_ASSERT(safetensors_file_init(safetensor.data(), safetensor.get_byte_size(), &safe_tensors_file) == nullptr, + "Cannot parse safetensor as a Safetensors file format. Safetensors file format is supported only" + ); ConstantMap tensors; for (int i = 0; i < safe_tensors_file.num_tensors; i++) { diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp index 51dea61bec..9ee58ec62d 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp @@ -338,7 +338,7 @@ std::variant> auto sample_token = [&](const ov::Tensor& logits, std::size_t idx) { size_t sequence_offset = idx * vocab_size; - const float* logits_data = logits.data() + sequence_offset; + float* logits_data = logits.data() + sequence_offset; return std::max_element(logits_data, logits_data + vocab_size) - logits_data; }; diff --git a/tests/python_tests/README.md b/tests/python_tests/README.md index b15737c2d2..de7b236652 100644 --- a/tests/python_tests/README.md +++ b/tests/python_tests/README.md @@ -14,29 +14,29 @@ pip install -r tests/python_tests/requirements.txt ## Run Tests ```sh -python -m pytest tests/python_tests/ -m precommit +python -m pytest tests/python_tests/ ``` If you have built GenAI library by yourself instead of using wheel please set `PYTHONPATH` so that test could find library, e.g. ```sh -PYTHONPATH=$PYTHONPATH:.../openvino.genai/build-Release/ python -m pytest tests/python_tests/ -m precommit +PYTHONPATH=$PYTHONPATH:.../openvino.genai/build-Release/ python -m pytest tests/python_tests/ ``` ## Customize tests run -Tests have `precommit` set of models. `precommit` contains lightweight models which can be quickly inferred. If you wish to run specific tests, you can use `-k` option, for example to run only multibatch and chat tests: +Tests have different sets of models for different purposes. If you wish to run specific tests, you can use `-k` option, for example to run only multibatch and chat tests: ```sh -python -m pytest tests/python_tests/ -m precommit -k "test_multibatch and test_chat" +python -m pytest tests/python_tests/ -k "test_multibatch and test_chat" ``` If you wish to run all tests except beam search do the following: ```sh -python -m pytest tests/python_tests/ -m precommit -k "not test_beam_search" +python -m pytest tests/python_tests/ -k "not test_beam_search" ``` Argument `--model_ids` can be used to run tests selectively only for specific models. HF model ids should be separated by space, e.g: ```sh -python -m pytest tests/python_tests/ -m precommit -k "test_multibatch" --model_ids "TinyLlama/TinyLlama-1.1B-Chat-v1.0 Qwen/Qwen2-0.5B-Instruct" +python -m pytest tests/python_tests/ -k "test_multibatch" --model_ids "TinyLlama/TinyLlama-1.1B-Chat-v1.0 Qwen/Qwen2-0.5B-Instruct" ``` List of currently supported models can be found in tests/python_tests/models.py:get_models_list diff --git a/tests/python_tests/conftest.py b/tests/python_tests/conftest.py index 4044b61209..680715b6f2 100644 --- a/tests/python_tests/conftest.py +++ b/tests/python_tests/conftest.py @@ -53,6 +53,4 @@ def pytest_addoption(parser): def pytest_configure(config: pytest.Config): - marker = "precommit" if config.getoption("-m") == "precommit" else None - pytest.run_marker = marker pytest.selected_model_ids = config.getoption("--model_ids", default=None) diff --git a/tests/python_tests/models/precommit b/tests/python_tests/models/lightweight similarity index 100% rename from tests/python_tests/models/precommit rename to tests/python_tests/models/lightweight diff --git a/tests/python_tests/pytest.ini b/tests/python_tests/pytest.ini index 60b5cc39d8..f0807854c7 100644 --- a/tests/python_tests/pytest.ini +++ b/tests/python_tests/pytest.ini @@ -2,8 +2,8 @@ markers = ; The following markers are defined for categorizing tests: - ; precommit - Tests that should be run before committing code. ; real_models - Tests that involve execution of the models from models/real_models file + ; nightly - Tests that should only run in nightly builds (uses large models or long running) ; samples - Tests related to the sample models. ; llm - Tests related to large language models. ; whisper - Tests related to the Whisper model. @@ -12,8 +12,8 @@ markers = ; vlm - Tests related to the VLM model. ; rag - Tests related to the RAG components. ; speech_generation - Tests related to text-to-speech generation - precommit real_models + nightly samples llm whisper @@ -24,4 +24,4 @@ markers = rag speech_generation -addopts = -m precommit +addopts = -m "not real_models and not nightly" diff --git a/tests/python_tests/samples/test_text2speech.py b/tests/python_tests/samples/test_text2speech.py index 8b05c6530c..c497e3d65d 100644 --- a/tests/python_tests/samples/test_text2speech.py +++ b/tests/python_tests/samples/test_text2speech.py @@ -32,7 +32,6 @@ def teardown_class(self): @pytest.mark.speech_generation @pytest.mark.samples - @pytest.mark.precommit @pytest.mark.parametrize("convert_model", ["tiny-random-SpeechT5ForTextToSpeech"], indirect=True) @pytest.mark.parametrize("input_prompt", ["Hello everyone"]) def test_sample_text_to_speech(self, convert_model, input_prompt): @@ -54,7 +53,6 @@ def test_sample_text_to_speech(self, convert_model, input_prompt): @pytest.mark.speech_generation @pytest.mark.samples - @pytest.mark.precommit @pytest.mark.parametrize("convert_model", ["tiny-random-SpeechT5ForTextToSpeech"], indirect=True) @pytest.mark.parametrize("input_prompt", ["Test text to speech without speaker embedding file"]) def test_sample_text_to_speech_no_speaker_embedding_file(self, convert_model, input_prompt): diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py index 90ea7ac316..b32ba5e8e7 100644 --- a/tests/python_tests/test_continuous_batching.py +++ b/tests/python_tests/test_continuous_batching.py @@ -36,9 +36,8 @@ def read_models_list(file_name: str): models.append(model_name) return models -@pytest.mark.precommit -@pytest.mark.parametrize("model_id", read_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit"))) -def test_e2e_precommit(model_id): +@pytest.mark.parametrize("model_id", read_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "lightweight"))) +def test_e2e_lightweight_models(model_id): prompts, generation_configs = get_test_dataset() generate_and_compare(prompts=prompts, generation_config=generation_configs, @@ -73,7 +72,6 @@ def test_e2e_real_models(model_id): ] @pytest.mark.parametrize("generation_config", test_configs) @pytest.mark.parametrize("prompt", batched_prompts[1:]) # num_beams=15 diverges on the first prompt. -@pytest.mark.precommit @pytest.mark.skip(reason="CVS-162891: Fix test_continuous_batching_vs_stateful tests after we started to compare cb vs sdpa") def test_continuous_batching_vs_stateful(prompt, generation_config): model_id = "facebook/opt-125m" @@ -93,7 +91,6 @@ def test_continuous_batching_vs_stateful(prompt, generation_config): prompts = ['The Sun is yellow because', 'Difference between Jupiter and Mars is that', 'table is made of'] @pytest.mark.parametrize("prompt", prompts) -@pytest.mark.precommit def test_cb_streamer_vs_return_vs_stateful(prompt): model_id = "facebook/opt-125m" _, _, models_path = download_and_convert_model(model_id) @@ -124,7 +121,6 @@ def test_cb_streamer_vs_return_vs_stateful(prompt): @pytest.mark.parametrize("input_type", [ GenerationChatInputsType.STRING, GenerationChatInputsType.CHAT_HISTORY]) -@pytest.mark.precommit def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: dict, pipeline_type, input_type: GenerationChatInputsType): _, _, models_path = download_and_convert_model(model_id) @@ -175,7 +171,6 @@ def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: dict, pip @pytest.mark.parametrize("generation_config_kwargs", generation_configs) @pytest.mark.parametrize("model_id", get_chat_models_list()) @pytest.mark.parametrize("pipeline_type", [PipelineType.CONTINUOUS_BATCHING, PipelineType.SPECULATIVE_DECODING, PipelineType.PROMPT_LOOKUP_DECODING,]) -@pytest.mark.precommit def test_continuous_batching_add_request_health_check(model_id, generation_config_kwargs: dict, pipeline_type): _, _, models_path = download_and_convert_model(model_id) @@ -206,7 +201,6 @@ def test_continuous_batching_add_request_health_check(model_id, generation_confi @pytest.mark.parametrize("generation_config_kwargs", invalid_generation_configs) @pytest.mark.parametrize("model_id", get_chat_models_list()) @pytest.mark.parametrize("pipeline_type", [PipelineType.CONTINUOUS_BATCHING, PipelineType.SPECULATIVE_DECODING, PipelineType.PROMPT_LOOKUP_DECODING,]) -@pytest.mark.precommit def test_continuous_batching_add_request_fails(model_id, generation_config_kwargs: dict, pipeline_type): _, _, models_path = download_and_convert_model(model_id) @@ -228,7 +222,6 @@ def test_continuous_batching_add_request_fails(model_id, generation_config_kwarg # # todo: iefode: bug reproducer!!! -@pytest.mark.precommit @pytest.mark.parametrize("sampling_config", [get_greedy(), get_beam_search(), get_multinomial_all_parameters()], ids=["greedy", "beam_search", "multinomial_all_parameters"]) def test_post_oom_health(sampling_config): @@ -289,7 +282,6 @@ def get_beam_search_seq_len_300() -> GenerationConfig: ({"num_kv_blocks": 100, "dynamic_split_fuse": True}, get_beam_search_seq_len_300()), ({"num_kv_blocks": 100, "dynamic_split_fuse": False}, get_beam_search_seq_len_300())] @pytest.mark.parametrize("params", scheduler_params_list) -@pytest.mark.precommit def test_preemption(params): model_id = "facebook/opt-125m" scheduler_params = params[0] @@ -342,7 +334,6 @@ def test_preemption(params): # todo: Anastasiia Pnevskaya: fix the test because it is hanging according max_new_tokens = std::numeric_limits::max() @pytest.mark.parametrize("dynamic_split_fuse", [True, False]) -@pytest.mark.precommit @pytest.mark.skip(reason="Random sampling results are non deterministic due to: discrete_distribution impl depends on platform, model inference results may depend on CPU. Test passes on CI but fails locally.") def test_preemption_with_multinomial(dynamic_split_fuse): generation_configs = multinomial_params.generation_config @@ -425,7 +416,6 @@ def test_preemption_with_multinomial(dynamic_split_fuse): @pytest.mark.parametrize("dynamic_split_fuse", [True, False]) -@pytest.mark.precommit @pytest.mark.skip(reason="Random sampling results are non deterministic due to: discrete_distribution impl depends on platform, model inference results may depend on CPU. Test passes on CI but fails locally.") def test_preemption_with_multinomial_n_seq(dynamic_split_fuse): model_id : str = "facebook/opt-125m" @@ -442,7 +432,6 @@ def test_preemption_with_multinomial_n_seq(dynamic_split_fuse): @pytest.mark.parametrize("pipeline_type", [PipelineType.PROMPT_LOOKUP_DECODING]) -@pytest.mark.precommit def test_dynamic_split_fuse_doesnt_affect_generated_text(pipeline_type): model_id : str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" _, _, models_path = download_and_convert_model(model_id) @@ -496,7 +485,6 @@ def run_extended_perf_metrics_collection(model_id, generation_config: Generation @pytest.mark.parametrize("pipeline_type", [PipelineType.PAGED_ATTENTION, PipelineType.SPECULATIVE_DECODING]) -@pytest.mark.precommit def test_speculative_decoding_extended_perf_metrics(pipeline_type): import time start_time = time.perf_counter() diff --git a/tests/python_tests/test_generation_config.py b/tests/python_tests/test_generation_config.py index 4acaa37648..2a5b1fa28a 100644 --- a/tests/python_tests/test_generation_config.py +++ b/tests/python_tests/test_generation_config.py @@ -61,7 +61,6 @@ def verify_set_values(generation_config, kwargs): dict(max_new_tokens=1, apply_chat_template=False), ] @pytest.mark.parametrize("generation_config_kwargs", configs) -@pytest.mark.precommit def test_valid_configs(generation_config_kwargs): config = GenerationConfig(**generation_config_kwargs) verify_set_values(config, generation_config_kwargs) @@ -107,7 +106,6 @@ def test_valid_configs(generation_config_kwargs): # TODO: add tests for invalid properties ] @pytest.mark.parametrize("generation_config_kwargs", invalid_configs) -@pytest.mark.precommit def test_invalid_generation_configs_throws(generation_config_kwargs): config = GenerationConfig(**generation_config_kwargs) with pytest.raises(RuntimeError): @@ -123,7 +121,6 @@ def test_invalid_generation_configs_throws(generation_config_kwargs): dict(eos_token_id=1), # 'stop_token_ids' does not contain 'eos_token_id' dict(eos_token_id=1, stop_token_ids={2}), # 'stop_token_ids' is not empty, but does not contain 'eos_token_id' ]) -@pytest.mark.precommit def test_invalid_fields_assinment_rises(fields): config = GenerationConfig() for key, val in fields.items(): @@ -147,7 +144,6 @@ def load_genai_generation_config_from_file(configs: list[tuple], temp_path): return ov_generation_config -@pytest.mark.precommit def test_multiple_eos_are_read_as_stop_token_ids(tmp_path): generation_config_json = { "eos_token_id": [ diff --git a/tests/python_tests/test_gguf_reader.py b/tests/python_tests/test_gguf_reader.py index 63a69694d0..f91855773c 100644 --- a/tests/python_tests/test_gguf_reader.py +++ b/tests/python_tests/test_gguf_reader.py @@ -18,7 +18,6 @@ @pytest.mark.parametrize("pipeline_type", get_gguf_pipeline_types()) @pytest.mark.parametrize("model_ids", get_gguf_model_list()) -@pytest.mark.precommit @pytest.mark.skipif(sys.platform == "win32", reason="CVS-174065") def test_pipelines_with_gguf_generate(pipeline_type, model_ids): if sys.platform == 'darwin': @@ -69,7 +68,6 @@ def test_pipelines_with_gguf_generate(pipeline_type, model_ids): '<|endoftext|><|endoftext|><|im_end|>', '<|endoftext|> Why the Sky is Blue? <|im_end|>', ]) -@pytest.mark.precommit @pytest.mark.skipif(sys.platform == "win32", reason="CVS-174065") def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model, prompt): if sys.platform == 'darwin': @@ -129,7 +127,6 @@ def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model, prom @pytest.mark.parametrize("pipeline_type", get_gguf_pipeline_types()) @pytest.mark.parametrize("model_ids", [{"gguf_model_id": "Qwen/Qwen3-0.6B-GGUF", "gguf_filename": "Qwen3-0.6B-Q8_0.gguf"}]) @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 172335") -@pytest.mark.precommit @pytest.mark.skipif(sys.platform == "win32", reason="CVS-174065") def test_full_gguf_qwen3_pipeline(pipeline_type, model_ids): # Temporal testing solution until transformers starts to support qwen3 in GGUF format diff --git a/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py b/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py index 2e96c4a096..7f2b3a60eb 100644 --- a/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py +++ b/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py @@ -42,7 +42,6 @@ class CacheOptTestStruct: SHORT_CACHE_EVICTION_CONFIG = CacheEvictionConfig(start_size=32, recent_size=32, max_cache_size=96, aggregation_mode=AggregationMode.NORM_SUM) LONGBENCH_CACHE_EVICTION_CONFIG = CacheEvictionConfig(start_size=32, recent_size=128, max_cache_size=672, aggregation_mode=AggregationMode.NORM_SUM) -@pytest.mark.precommit @pytest.mark.skipif( sys.platform in ("win32", "darwin"), reason=( @@ -164,7 +163,6 @@ def get_beam_search_seq_len_300() -> GenerationConfig: ({"num_kv_blocks": 0, "cache_size": 0, "dynamic_split_fuse": False, "max_num_batched_tokens": 600, "enable_prefix_caching": False}, get_beam_search_seq_len_300()), ({"num_kv_blocks": 0, "cache_size": 0, "dynamic_split_fuse": False, "max_num_batched_tokens": 600, "use_cache_eviction": True, "cache_eviction_config": SHORT_CACHE_EVICTION_CONFIG}, get_greedy_seq_len_300())] @pytest.mark.parametrize("params", scheduler_params_list) -@pytest.mark.precommit def test_dynamic_memory_allocation(params): prompts, _ = get_test_dataset() generate_and_compare(prompts=prompts, @@ -182,7 +180,6 @@ class LongBenchTestData: avg_cache_usage_optimization_ratio: float -@pytest.mark.precommit @pytest.mark.parametrize("test_struct", [ LongBenchTestData("samsum", 4, 1.6, 2.5), LongBenchTestData("trec", 3.2, 2.0, 3.3), diff --git a/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py b/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py index 7ab1a98f91..1b9240c901 100644 --- a/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py +++ b/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py @@ -33,7 +33,6 @@ } -@pytest.mark.precommit @pytest.mark.parametrize("subset", ["samsum", "trec", "qasper"]) def test_kvcrush_vs_snapkv_baseline_longbench(subset): """Test that KVCrush performs equal or better than SnapKV baseline on LongBench datasets.""" diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py index 36b4688bcf..282fc07d1d 100644 --- a/tests/python_tests/test_llm_pipeline.py +++ b/tests/python_tests/test_llm_pipeline.py @@ -40,7 +40,6 @@ def assert_hf_equals_genai(hf_reference, genai_output): @pytest.mark.parametrize("generation_config_dict,prompt", test_cases) @pytest.mark.parametrize("model_id", get_models_list()) @pytest.mark.parametrize("pipeline_type", get_main_pipeline_types()) -@pytest.mark.precommit def test_string_inputs(model_id, generation_config_dict, prompt, pipeline_type): generate_and_compare(model=model_id, prompts=[prompt], generation_config=generation_config_dict, pipeline_type=pipeline_type) @@ -52,7 +51,6 @@ def test_string_inputs(model_id, generation_config_dict, prompt, pipeline_type): ] @pytest.mark.parametrize("inputs", input_tensors_list) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_encoded_inputs(model_id, inputs): opt_model, hf_tokenizer, models_path = download_and_convert_model(model_id) ov_pipe = create_ov_pipeline(models_path) @@ -92,12 +90,10 @@ def test_encoded_inputs(model_id, inputs): @pytest.mark.parametrize("prompts", batched_prompts) @pytest.mark.parametrize("model_id", get_models_list()) @pytest.mark.parametrize("pipeline_type", get_main_pipeline_types()) -@pytest.mark.precommit def test_batch_string_inputs(model_id, generation_config_dict, prompts, pipeline_type): generate_and_compare(model=model_id, prompts=prompts, generation_config=generation_config_dict, pipeline_type=pipeline_type) -@pytest.mark.precommit def test_batch_size_switch(): model_id = 'katuni4ka/tiny-random-phi3' _, _, models_path = download_and_convert_model(model_id) @@ -108,7 +104,6 @@ def test_batch_size_switch(): ov_pipe.generate(["a"], max_new_tokens=2) -@pytest.mark.precommit def test_empty_encoded_inputs_throw(): model_id = 'katuni4ka/tiny-random-phi3' _, _, models_path = download_and_convert_model(model_id) @@ -118,7 +113,6 @@ def test_empty_encoded_inputs_throw(): ov_pipe.generate(ov.Tensor(np.array([[]], dtype=np.int64)), max_new_tokens=2) -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_chat_models_list()) def test_different_input_types_works_same_and_change_nothing(model_id): opt_model, hf_tokenizer, models_path = download_and_convert_model(model_id) @@ -164,7 +158,6 @@ def test_different_input_types_works_same_and_change_nothing(model_id): GenerationChatInputsType.STRING, GenerationChatInputsType.ENCODED_INPUTS, GenerationChatInputsType.CHAT_HISTORY]) -@pytest.mark.precommit def test_chat_scenario(model_id, inputs, input_type): chat_history_hf = [] chat_history_ov = ov_genai.ChatHistory() if input_type == GenerationChatInputsType.CHAT_HISTORY else [] @@ -236,7 +229,6 @@ def test_chat_scenario(model_id, inputs, input_type): assert_hf_equals_genai(chat_history_hf, chat_history_messages_ov) -@pytest.mark.precommit def test_chat_scenario_several_chats_in_series(): opt_model, hf_tokenizer, models_path = download_and_convert_model(get_chat_models_list()[0]) ov_pipe = create_ov_pipeline(models_path) @@ -269,7 +261,6 @@ def test_chat_scenario_several_chats_in_series(): assert_hf_equals_genai(chat_history_hf, chat_history_ov) -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_chat_models_list()) def test_chat_scenario_several_start(model_id): opt_model, hf_tokenizer, models_path = download_and_convert_model(model_id) @@ -284,7 +275,6 @@ def test_chat_scenario_several_start(model_id): ov_pipe.finish_chat() -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_chat_models_list()) def test_generate_works_same_before_and_after_chat(model_id): opt_model, hf_tokenizer, models_path = download_and_convert_model(model_id) @@ -319,7 +309,6 @@ def user_defined_status_callback(subword): @pytest.mark.parametrize("callback", [print, user_defined_callback, user_defined_status_callback, lambda subword: print(subword)]) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_callback_one_string(callback, model_id): _, _, models_path = download_and_convert_model(model_id) ov_pipe = create_ov_pipeline(models_path) @@ -330,7 +319,6 @@ def test_callback_one_string(callback, model_id): @pytest.mark.parametrize("callback", [print, user_defined_callback, user_defined_status_callback, lambda subword: print(subword)]) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_callback_batch_throws(callback, model_id): _, _, models_path = download_and_convert_model(model_id) ov_pipe = create_ov_pipeline(models_path) @@ -340,7 +328,6 @@ def test_callback_batch_throws(callback, model_id): @pytest.mark.parametrize("callback", [print, user_defined_callback, user_defined_status_callback, lambda subword: print(subword)]) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_callback_kwargs_one_string(callback, model_id): _, _, models_path = download_and_convert_model(model_id) ov_pipe = create_ov_pipeline(models_path) @@ -349,7 +336,6 @@ def test_callback_kwargs_one_string(callback, model_id): @pytest.mark.parametrize("callback", [print, user_defined_callback, user_defined_status_callback, lambda subword: print(subword)]) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_callback_decoding_metallama(model_id, callback): # On metallama this prompt generates output which can shorten after adding new tokens. # Test that streamer correctly handles such cases. @@ -363,7 +349,6 @@ def test_callback_decoding_metallama(model_id, callback): @pytest.mark.parametrize("callback", [print, user_defined_callback, user_defined_status_callback, lambda subword: print(subword)]) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_callback_kwargs_batch_throws(callback, model_id): _, _, models_path = download_and_convert_model(model_id) ov_pipe = create_ov_pipeline(models_path) @@ -371,7 +356,6 @@ def test_callback_kwargs_batch_throws(callback, model_id): ov_pipe.generate(['1', '2'], max_new_tokens=10, streamer=callback) -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_callback_terminate_by_bool(model_id): _, _, models_path = download_and_convert_model(model_id) @@ -395,7 +379,6 @@ def callback(subword): assert len(ov_output.tokens[0]) < max_new_tokens -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_callback_terminate_by_status(model_id): _, _, models_path = download_and_convert_model(model_id) @@ -420,7 +403,6 @@ def callback(subword): @pytest.mark.parametrize("model_id", get_chat_models_list()) -@pytest.mark.precommit def test_chat_scenario_callback_cancel(model_id): callback_questions = [ '1+1=', @@ -517,7 +499,6 @@ def end(self): @pytest.mark.parametrize("streamer_base", [PrinterNone, PrinterBool, PrinterStatus]) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_streamer_one_string(streamer_base, model_id): _, _, models_path = download_and_convert_model(model_id) ov_pipe = create_ov_pipeline(models_path) @@ -527,7 +508,6 @@ def test_streamer_one_string(streamer_base, model_id): ov_pipe.generate('table is made of', generation_config, printer) -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_streamer_batch_throws(model_id): _, _, models_path = download_and_convert_model(model_id) @@ -537,7 +517,6 @@ def test_streamer_batch_throws(model_id): ov_pipe.generate(['1', '2'], ov_pipe.get_generation_config(), printer) -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_streamer_kwargs_one_string(model_id): _, _, models_path = download_and_convert_model(model_id) @@ -546,7 +525,6 @@ def test_streamer_kwargs_one_string(model_id): ov_pipe.generate('table is made of', max_new_tokens=10, do_sample=False, streamer=printer) -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_streamer_kwargs_batch_throws(model_id): _, _, models_path = download_and_convert_model(model_id) @@ -556,7 +534,6 @@ def test_streamer_kwargs_batch_throws(model_id): ov_pipe.generate('', num_beams=2, streamer=printer) -@pytest.mark.precommit @pytest.mark.parametrize("callback", [print, user_defined_callback, user_defined_status_callback, lambda subword: print(subword)]) @pytest.mark.parametrize("model_id", get_models_list()) def test_operator_with_callback_one_string(callback, model_id): @@ -567,7 +544,6 @@ def test_operator_with_callback_one_string(callback, model_id): ov_pipe('talbe is made of', ten_tokens, callback) -@pytest.mark.precommit @pytest.mark.parametrize("callback", [print, user_defined_callback, user_defined_status_callback, lambda subword: print(subword)]) @pytest.mark.parametrize("model_id", get_models_list()) def test_operator_with_callback_batch_throws(callback, model_id): @@ -579,7 +555,6 @@ def test_operator_with_callback_batch_throws(callback, model_id): @pytest.mark.parametrize("streamer_base", [PrinterNone, PrinterBool, PrinterStatus]) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_operator_with_streamer_kwargs_one_string(streamer_base, model_id): _, _, models_path = download_and_convert_model(model_id) ov_pipe = create_ov_pipeline(models_path) @@ -587,7 +562,6 @@ def test_operator_with_streamer_kwargs_one_string(streamer_base, model_id): ov_pipe('hi', max_new_tokens=10, do_sample=True, streamer=printer) -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_operator_with_streamer_kwargs_batch_throws(model_id): _, _, models_path = download_and_convert_model(model_id) @@ -620,7 +594,6 @@ def load_genai_pipe_with_configs(configs: list[tuple], temp_path): return ov_pipe -@pytest.mark.precommit def test_eos_token_is_inherited_from_default_generation_config(model_tmp_path): _, temp_path = model_tmp_path ov_pipe = load_genai_pipe_with_configs([({"eos_token_id": 37}, "config.json")], temp_path) @@ -632,7 +605,6 @@ def test_eos_token_is_inherited_from_default_generation_config(model_tmp_path): assert 37 == ov_pipe.get_generation_config().eos_token_id -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_pipeline_validates_generation_config(model_id): _, _, models_path = download_and_convert_model(model_id) @@ -645,7 +617,6 @@ def test_pipeline_validates_generation_config(model_id): # Work with Unicode in Python API # -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_unicode_pybind_decoding_one_string(model_id): # On this model this prompt generates unfinished utf string. @@ -656,7 +627,6 @@ def test_unicode_pybind_decoding_one_string(model_id): assert '�' == res_str[-1] -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_unicode_pybind_decoding_batched(model_id): # On this model this prompt generates unfinished utf string. @@ -667,7 +637,6 @@ def test_unicode_pybind_decoding_batched(model_id): assert '�' == res_str.texts[0][-1] -@pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list()) def test_unicode_pybind_decoding_one_string_streamer(model_id): # On this model this prompt generates unfinished utf-8 string @@ -692,7 +661,6 @@ def run_perf_metrics_collection(model_id, generation_config_dict: dict, prompt: (dict(max_new_tokens=20), 'table is made of'), ] @pytest.mark.parametrize("generation_config,prompt", test_cases) -@pytest.mark.precommit def test_perf_metrics(generation_config, prompt): import time start_time = time.perf_counter() @@ -769,7 +737,6 @@ def test_perf_metrics(generation_config, prompt): (dict(max_new_tokens=20), 'Generate json of a person'), ] @pytest.mark.parametrize("generation_config,prompt", test_cases) -@pytest.mark.precommit def test_perf_metrics_with_structured_output(generation_config, prompt): class Person(BaseModel): name: str = Field(pattern=r"^[A-Z][a-z]{1,20}$") @@ -804,7 +771,6 @@ class Person(BaseModel): @pytest.mark.parametrize("pipeline_type", get_main_pipeline_types()) @pytest.mark.parametrize("stop_str", {True, False}) -@pytest.mark.precommit def test_pipelines_generate_with_streaming(pipeline_type, stop_str): # streamer it_cnt = 0 diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py index e6a7cdc623..772cc5c8bd 100644 --- a/tests/python_tests/test_llm_pipeline_static.py +++ b/tests/python_tests/test_llm_pipeline_static.py @@ -65,7 +65,6 @@ def generate_with_chat_history(model_path, device, pipeline_config, questions) - get_greedy(), get_greedy_with_penalties() ] -@pytest.mark.precommit @pytest.mark.parametrize("generation_config", generation_configs) @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) @@ -83,7 +82,6 @@ def test_generation_compare_with_stateful(generation_config, config, model_id, i assert ref_out.texts[0] == actual_out.texts[0] -@pytest.mark.precommit @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("with_weights", blob_with_weights) @pytest.mark.parametrize("model_id", get_models_list()) @@ -118,7 +116,6 @@ def test_pipeline_from_blob(model_tmp_path, config, with_weights, model_id): assert ref_out == actual_out -@pytest.mark.precommit @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("with_weights", blob_with_weights) @pytest.mark.parametrize("model_id", get_models_list()) @@ -162,7 +159,6 @@ def test_pipeline_cache_dir(model_tmp_path, config, with_weights, model_id): generation_configs = [ get_multinomial_temperature_and_presence_penalty() ] -@pytest.mark.precommit @pytest.mark.parametrize("generation_config", generation_configs) @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) @@ -179,7 +175,6 @@ def test_multinomial_sampling(generation_config, config, model_id): actual_out = static_pipe.generate(prompt, generation_config) -@pytest.mark.precommit @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) def test_length_properties_set_no_exception(config, model_id): @@ -199,7 +194,6 @@ def test_length_properties_set_no_exception(config, model_id): @pytest.mark.parametrize("length_config", length_configs) @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_invalid_length_properties_raise_error(length_config, config, model_id): _, _, model_path = download_and_convert_model(model_id) length_config |= config @@ -207,7 +201,6 @@ def test_invalid_length_properties_raise_error(length_config, config, model_id): pipe = LLMPipeline(model_path, "NPU", **length_config) -@pytest.mark.precommit @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) def test_batch_one_no_exception(config, model_id): @@ -219,7 +212,6 @@ def test_batch_one_no_exception(config, model_id): # TODO: For the further batch support -@pytest.mark.precommit @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) def test_batch_raise_error(config, model_id): @@ -239,7 +231,6 @@ def test_batch_raise_error(config, model_id): @pytest.mark.parametrize("generation_config", generation_configs) @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_unsupported_sampling_raise_error(generation_config, config, model_id): _, _, model_path = download_and_convert_model(model_id) prompt = 'What is OpenVINO?' @@ -249,7 +240,6 @@ def test_unsupported_sampling_raise_error(generation_config, config, model_id): pipe.generate(prompt, generation_config) -@pytest.mark.precommit @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) def test_terminate_by_max_number_of_tokens(config, model_id): @@ -265,7 +255,6 @@ def test_terminate_by_max_number_of_tokens(config, model_id): assert len(encoded_results.tokens[0]) == num_tokens -@pytest.mark.precommit @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) def test_terminate_by_out_of_memory(config, model_id): @@ -285,7 +274,6 @@ def test_terminate_by_out_of_memory(config, model_id): assert len(encoded_results.tokens[0]) == (kv_cache_size - input_len + 1) -@pytest.mark.precommit @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) def test_terminate_by_sampler(config, model_id): @@ -318,7 +306,6 @@ def end(self): @pytest.mark.skip(reason="JIRA-144780: Output differs from stateful pipeline") @pytest.mark.parametrize("config", pipeline_configs) @pytest.mark.parametrize("model_id", get_models_list()) -@pytest.mark.precommit def test_chat_generation(config, model_id): questions = [ '1+1=', diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 3155e17fe1..1282b63b24 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -23,7 +23,6 @@ def hf_ov_genai_models(request, tmp_path_factory): return hf_tokenizer, genai_tokenizer -@pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", ["katuni4ka/tiny-random-phi3"], # this tokenizer is used as a stub only @@ -63,7 +62,6 @@ def write(self, message): assert msg['content'] == content -@pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", ["katuni4ka/tiny-random-phi3"], # this tokenizer is used as a stub only @@ -92,7 +90,6 @@ def write(self, message): assert msg['content'] == content -@pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", ["katuni4ka/tiny-random-phi3"], # this tokenizer is used as a stub only @@ -145,7 +142,6 @@ def write(self, message): assert msg['content'] == " The answer to 2 + 1 is 3." -@pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", ["katuni4ka/tiny-random-phi3"], @@ -180,7 +176,6 @@ def write(self, message): assert msg['content'] == content -@pytest.mark.precommit @pytest.mark.parametrize("answer", [ "\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}.", ]) @@ -203,7 +198,6 @@ def test_incremental_phi4_reason_parser_nostreamer(answer): assert msg['content'] == content -@pytest.mark.precommit @pytest.mark.parametrize("keep_original_content", [True, False]) @pytest.mark.parametrize("do_reset", [False]) @pytest.mark.parametrize( @@ -270,7 +264,6 @@ def test_incremental_deepseek_parser(): assert msg['content'] == content -@pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", ["katuni4ka/tiny-random-phi3"], @@ -313,7 +306,6 @@ def write(self, message): assert msg['main_text'] == " world " -@pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", ["katuni4ka/tiny-random-phi3"], diff --git a/tests/python_tests/test_rag.py b/tests/python_tests/test_rag.py index 3facb41112..47b87fb259 100644 --- a/tests/python_tests/test_rag.py +++ b/tests/python_tests/test_rag.py @@ -290,7 +290,6 @@ def run_text_rerank_pipeline_with_ref( @pytest.mark.parametrize("download_and_convert_embeddings_models", ["BAAI/bge-small-en-v1.5"], indirect=True) -@pytest.mark.precommit def test_embedding_constructors(download_and_convert_embeddings_models): _, _, models_path = download_and_convert_embeddings_models @@ -325,7 +324,6 @@ def test_embedding_constructors(download_and_convert_embeddings_models): TextEmbeddingPipeline.Config(normalize=False, pooling_type=TextEmbeddingPipeline.PoolingType.LAST_TOKEN), ], ) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 174635") def test_qwen3_embedding(download_and_convert_embeddings_models, dataset_documents, config): opt_model, hf_tokenizer, models_path = download_and_convert_embeddings_models @@ -355,7 +353,6 @@ def test_qwen3_embedding(download_and_convert_embeddings_models, dataset_documen "embed_instruction", ], ) -@pytest.mark.precommit def test_embed_documents(download_and_convert_embeddings_models, dataset_documents, config): if ( sys.platform == "linux" @@ -389,7 +386,6 @@ def test_embed_documents(download_and_convert_embeddings_models, dataset_documen "query_instruction", ], ) -@pytest.mark.precommit def test_embed_query(download_and_convert_embeddings_models, dataset_documents, config): _, _, models_path = download_and_convert_embeddings_models run_text_embedding_pipeline_with_ref(models_path, dataset_documents[:1], config, "embed_query") @@ -414,7 +410,6 @@ def dataset_embeddings_genai_default_config_refs(download_and_convert_embeddings TextEmbeddingPipeline.Config(max_length=64, pad_to_max_length=True, batch_size=1), ], ) -@pytest.mark.precommit def test_fixed_shapes_configs(download_and_convert_embeddings_models, dataset_documents, config, dataset_embeddings_genai_default_config_refs): _, _, models_path = download_and_convert_embeddings_models @@ -438,7 +433,6 @@ def test_fixed_shapes_configs(download_and_convert_embeddings_models, dataset_do ], ) @pytest.mark.xfail() -@pytest.mark.precommit def test_fixed_shapes_configs_xfail(download_and_convert_embeddings_models, dataset_documents, config, dataset_embeddings_genai_default_config_refs): _, _, models_path = download_and_convert_embeddings_models @@ -457,7 +451,6 @@ def test_fixed_shapes_configs_xfail(download_and_convert_embeddings_models, data TextEmbeddingPipeline.Config(max_length=50, pad_to_max_length=True, batch_size=4), ], ) -@pytest.mark.precommit @pytest.mark.skipif( sys.platform == "darwin" or platform.machine() in ["aarch64", "arm64", "ARM64"], reason="NPU plugin is available only on Linux and Windows x86_64", @@ -476,7 +469,6 @@ def test_npu_fallback(download_and_convert_embeddings_models, dataset_documents, @pytest.mark.parametrize("download_and_convert_rerank_model", [RERANK_TEST_MODELS[0]], indirect=True) -@pytest.mark.precommit def test_rerank_constructors(download_and_convert_rerank_model): _, _, models_path = download_and_convert_rerank_model @@ -514,7 +506,6 @@ def test_rerank_constructors(download_and_convert_rerank_model): "top_n=10", ], ) -@pytest.mark.precommit def test_rerank_documents(download_and_convert_rerank_model, dataset_documents, query, config): _, _, models_path = download_and_convert_rerank_model run_text_rerank_pipeline_with_ref(models_path, query, dataset_documents, config) @@ -544,7 +535,6 @@ def test_rerank_documents(download_and_convert_rerank_model, dataset_documents, "top_n=4", ], ) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 174635") def test_qwen3_seq_cls_rerank_documents(download_and_convert_rerank_model, query, task, documents, config): opt_model, hf_tokenizer, models_path = download_and_convert_rerank_model @@ -581,7 +571,6 @@ def test_qwen3_seq_cls_rerank_documents(download_and_convert_rerank_model, query "top_n=4", ], ) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 174635") def test_qwen3_rerank_documents(download_and_convert_model_fixture, query, task, documents, config): opt_model, hf_tokenizer, models_path = download_and_convert_model_fixture diff --git a/tests/python_tests/test_sampling.py b/tests/python_tests/test_sampling.py index fbd296bfcb..8e73a36bb4 100644 --- a/tests/python_tests/test_sampling.py +++ b/tests/python_tests/test_sampling.py @@ -12,7 +12,6 @@ from utils.ov_genai_pipelines import generate_and_compare, run_ov_pipeline, get_main_pipeline_types from utils.hugging_face import download_and_convert_model -@pytest.mark.precommit @pytest.mark.parametrize("generation_config,prompt", [(dict(max_new_tokens=30), 'table is made of'), (dict(max_new_tokens=30, min_new_tokens=30), '你好! 你好嗎?'), @@ -33,7 +32,6 @@ def test_basic_stop_criteria(generation_config, prompt): generate_and_compare(model_id, [prompt], generation_config) -@pytest.mark.precommit @pytest.mark.parametrize("generation_config,model_id", [(dict(max_new_tokens=50, min_new_tokens=15, stop_strings={"anag"}, include_stop_str_in_output=True), 'facebook/opt-125m'), # expected match on "manage" (dict(max_new_tokens=50, min_new_tokens=1, stop_strings={".", "software", "Intel"}, include_stop_str_in_output=True), 'facebook/opt-125m'), @@ -57,7 +55,6 @@ def test_stop_strings(generation_config, model_id, pipeline_type): generate_and_compare(model_id, prompts, generation_config, pipeline_type=pipeline_type) -@pytest.mark.precommit @pytest.mark.parametrize("generation_config", [dict(max_new_tokens=30), dict(max_new_tokens=30, repetition_penalty=2.0), @@ -79,7 +76,6 @@ def test_greedy(generation_config, prompt): generation_config=generation_config) -@pytest.mark.precommit @pytest.mark.parametrize("generation_config", [dict(max_new_tokens=30, num_beams=2), dict(max_new_tokens=30, num_beams=2, stop_criteria=StopCriteria.NEVER), @@ -105,7 +101,6 @@ def test_beam_search(generation_config): generate_and_compare(model_id, prompts, generation_config) -@pytest.mark.precommit @pytest.mark.xfail( raises=AssertionError, reason="Stop strings do not seem to work as expected with beam search in HF, so comparison will fail. If it changes, these cases shall be merged to the test above.", @@ -121,7 +116,6 @@ def test_beam_search_with_stop_string(generation_config): generate_and_compare(model_id, prompts, generation_config) -@pytest.mark.precommit @pytest.mark.parametrize("generation_config", [dict(max_new_tokens=1, min_new_tokens=0, echo=True), dict(max_new_tokens=30, num_beams=2, echo=True),], @@ -312,7 +306,6 @@ class RandomSamplingTestStruct: ] -@pytest.mark.precommit @pytest.mark.parametrize("test_struct", RANDOM_SAMPLING_TEST_CASES, ids=["multinomial_temperature", "multinomial_temperature_and_top_p", diff --git a/tests/python_tests/test_stateful_speculative_decoding.py b/tests/python_tests/test_stateful_speculative_decoding.py index 9aa90836e9..f4be11f328 100644 --- a/tests/python_tests/test_stateful_speculative_decoding.py +++ b/tests/python_tests/test_stateful_speculative_decoding.py @@ -29,7 +29,6 @@ def get_npu_llm_properties_for_test(): ] @pytest.mark.parametrize("main_model,draft_model,prompt", models_and_input) @pytest.mark.parametrize("main_device,draft_device", devices) -@pytest.mark.precommit def test_string_inputs(main_model, main_device, draft_model, draft_device, prompt): # Download and convert model: main_opt_model, main_hf_tokenizer, main_model_path = download_and_convert_model(main_model) @@ -67,7 +66,6 @@ def test_string_inputs(main_model, main_device, draft_model, draft_device, promp compare_generation_results([prompt], ref_gen_results, ov_chat_history_gen_results, ov_generation_config) -@pytest.mark.precommit def test_perf_metrics(): import time start_time = time.perf_counter() @@ -147,7 +145,6 @@ def test_perf_metrics(): assert len(raw_metrics.m_batch_sizes) > 0 assert len(raw_metrics.m_durations) > 0 -@pytest.mark.precommit def test_extended_perf_metrics(): import time start_time = time.perf_counter() diff --git a/tests/python_tests/test_structured_output.py b/tests/python_tests/test_structured_output.py index 98b704095e..9cc1b74691 100644 --- a/tests/python_tests/test_structured_output.py +++ b/tests/python_tests/test_structured_output.py @@ -39,7 +39,6 @@ class RESTAPIResponse(BaseModel): ] -@pytest.mark.precommit @pytest.mark.parametrize("ov_pipe", structured_id_models, indirect=True) @pytest.mark.parametrize( "prompt_and_scheme", @@ -70,7 +69,6 @@ def test_structured_json(ov_pipe, prompt_and_scheme, use_compound_grammar, capfd pytest.fail(f"Output {res_str} is not valid json schema {SchemeType.model_json_schema()}: {e}") -@pytest.mark.precommit @pytest.mark.parametrize("ov_pipe", structured_id_models, indirect=True) @pytest.mark.parametrize( "prompt_and_regex", @@ -98,7 +96,6 @@ def test_structured_regex(ov_pipe, prompt_and_regex, use_compound_grammar): assert re.match(regex_str, res_str), f"Output {res_str} does not match regex {regex_str}" -@pytest.mark.precommit @pytest.mark.parametrize("ov_pipe", structured_id_models, indirect=True) @pytest.mark.parametrize( "prompt_and_ebnf", @@ -138,7 +135,6 @@ def test_structured_ebnf(ov_pipe, prompt_and_ebnf, use_compound_grammar): assert re.match(r"^\d{4}-\d{2}-\d{2}$", res_str), f"Output {res_str} does not match date format" -@pytest.mark.precommit @pytest.mark.parametrize( "ov_pipe", [model_id for model_id in structured_id_models if "random" not in model_id], indirect=True ) @@ -173,7 +169,6 @@ def test_structural_tags_old(ov_pipe, prompt_and_structural_tag): RESTAPIResponse.model_validate_json(match.group(1)) -@pytest.mark.precommit # use only non-random model for stable output in TriggeredTags test @pytest.mark.parametrize("ov_pipe", ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"], indirect=True) @pytest.mark.parametrize( diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py index 75804256b1..a8df543da9 100644 --- a/tests/python_tests/test_text_streamer.py +++ b/tests/python_tests/test_text_streamer.py @@ -52,7 +52,6 @@ def chunks(arr: list, n: int): ])] @pytest.mark.parametrize("model_id", tokenizer_model_ids) -@pytest.mark.precommit @pytest.mark.parametrize("prompt", [*eng_prompts, *unicode_prompts]) def test_text_prompts(tmp_path, prompt, model_id): prompt = prompt.decode('unicode_escape') if isinstance(prompt, bytes) else prompt @@ -93,7 +92,6 @@ def test_text_prompts(tmp_path, prompt, model_id): [167, 96, 227, 169, 232, 250, 167, 96, 227, 169, 232, 250, 167] ] @pytest.mark.parametrize("model_id", tokenizer_model_ids) -@pytest.mark.precommit @pytest.mark.parametrize("encoded_prompt", encoded_prompts) def test_encoded_prompts(tmp_path, encoded_prompt, model_id): model_id, hf_tok_load_params = (model_id[0], model_id[1]) if isinstance(model_id, tuple) else (model_id, {}) diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py index eeba864d5a..0b6065af3c 100644 --- a/tests/python_tests/test_tokenizer.py +++ b/tests/python_tests/test_tokenizer.py @@ -70,7 +70,6 @@ def ov_hf_tokenizers(request): @pytest.mark.parametrize("ov_hf_tokenizers", get_models_list(), indirect=True) @pytest.mark.parametrize("prompt", prompts) -@pytest.mark.precommit def test_encode(ov_hf_tokenizers, prompt): ov_tokenizer, hf_tokenizer = ov_hf_tokenizers @@ -102,7 +101,6 @@ def test_encode(ov_hf_tokenizers, prompt): @pytest.mark.parametrize("ov_hf_tokenizers", get_models_list(), indirect=True) @pytest.mark.parametrize("encoded_prompt", encoded_prompts) -@pytest.mark.precommit def test_decode(ov_hf_tokenizers, encoded_prompt): ov_tokenizer, hf_tokenizer = ov_hf_tokenizers decoded_ov = ov_tokenizer.decode(encoded_prompt) @@ -130,7 +128,6 @@ def test_decode(ov_hf_tokenizers, encoded_prompt): ] -@pytest.mark.precommit @pytest.mark.parametrize("chat_config", get_chat_templates()) @pytest.mark.parametrize("ov_hf_tokenizers", get_models_list(), indirect=True) def test_apply_chat_template(model_tmp_path, chat_config: tuple[str, dict], ov_hf_tokenizers): @@ -152,7 +149,6 @@ def test_apply_chat_template(model_tmp_path, chat_config: tuple[str, dict], ov_h assert_hf_equals_genai(hf_full_history_str, ov_full_history_str) -@pytest.mark.precommit @pytest.mark.parametrize("ov_hf_tokenizers", get_models_list(), indirect=True) @pytest.mark.parametrize("tokenizer_config_model_id", ["google/gemma-3-1b-it"]) def test_apply_chat_template_nested_content(model_tmp_path, ov_hf_tokenizers, tokenizer_config_model_id): @@ -192,7 +188,6 @@ def test_apply_chat_template_nested_content(model_tmp_path, ov_hf_tokenizers, to assert genai_templated_chat_history == ov_full_history_str -@pytest.mark.precommit @pytest.mark.parametrize("ov_hf_tokenizers", get_models_list(), indirect=True) @pytest.mark.parametrize("tokenizer_config_model_id", ["Qwen/Qwen3-8B-Base"]) def test_apply_chat_template_with_tools_and_extra_context(model_tmp_path, ov_hf_tokenizers, tokenizer_config_model_id): @@ -247,7 +242,6 @@ def test_apply_chat_template_with_tools_and_extra_context(model_tmp_path, ov_hf_ assert_hf_equals_genai(genai_templated_chat_history, ov_full_history_str) -@pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", [("Xenova/c4ai-command-r-v01-tokenizer", { "padding_side": None })], @@ -265,7 +259,6 @@ def test_non_string_chat_template(hf_ov_genai_models): assert_hf_equals_genai(hf_full_history_str, ov_full_history_str) -@pytest.mark.precommit @pytest.mark.parametrize("ov_hf_tokenizers", get_models_list(), indirect=True) def test_set_chat_template(ov_hf_tokenizers): ov_tokenizer, hf_tokenizer = ov_hf_tokenizers @@ -314,7 +307,6 @@ def test_set_chat_template(ov_hf_tokenizers): ], indirect=True, ) -@pytest.mark.precommit @pytest.mark.parametrize("prompt", [*eng_prompts, *unicode_prompts]) def test_special_tokens(prompt, ov_hf_tokenizers): prompt = prompt.decode("unicode_escape") if isinstance(prompt, bytes) else prompt @@ -346,7 +338,6 @@ def test_special_tokens(prompt, ov_hf_tokenizers): assert decoded_hf_skip_spec != decoded_hf_no_skip -@pytest.mark.precommit def test_multiple_infer_request_state(tmp_path): hf_tokenizer = retry_request(lambda: AutoTokenizer.from_pretrained("llamafactory/tiny-random-Llama-3")) ov_tokenizer = convert_tokenizer(hf_tokenizer) @@ -410,7 +401,6 @@ def hf_ov_genai_models(request, tmp_path_factory): ] -@pytest.mark.precommit @pytest.mark.parametrize("add_special_tokens", [True, False]) @pytest.mark.parametrize("max_length", [None, 16, 103, 512, 1024]) @pytest.mark.parametrize("pad_to_max_length", [None, True, False]) @@ -506,7 +496,6 @@ def make_model_params(): models_with_pair_input = make_model_params() @pytest.mark.parametrize("hf_ov_genai_models", models_with_pair_input, indirect=True) -@pytest.mark.precommit @pytest.mark.parametrize("input_pair", [[ ["hi", "sun in yellow"], ["Eng... test, string?!" * 100, "Multiline\nstring!\nWow!"], @@ -522,7 +511,6 @@ def test_two_inputs_string_list_of_lists_batched(hf_ov_genai_models, input_pair) assert np.all(ov_encoded == hf_encoded) @pytest.mark.parametrize("hf_ov_genai_models", models_with_pair_input, indirect=True) -@pytest.mark.precommit @pytest.mark.parametrize("input_pair", [ [["hi", "sun in yellow"]], [["Eng... test, string?!" * 100, "Multiline\nstring!\nWow!"]], @@ -539,7 +527,6 @@ def test_two_inputs_string_list_of_lists(hf_ov_genai_models, input_pair): @pytest.mark.parametrize("hf_ov_genai_models", models_with_pair_input, indirect=True) -@pytest.mark.precommit @pytest.mark.parametrize("input_pair", [ [["Eng... test, string?!" * 100], ["Multiline\nstring!\nWow!"]], [["hi" * 20], ["buy" * 90]], @@ -562,7 +549,6 @@ def test_two_inputs_string(hf_ov_genai_models, input_pair): assert np.all(ov_encoded == hf_encoded) -@pytest.mark.precommit def test_load_special_tokens_from_config_json(model_tmp_path): # test when there is an available config.json config_json = { @@ -576,7 +562,6 @@ def test_load_special_tokens_from_config_json(model_tmp_path): assert tok.get_eos_token_id() == config_json["eos_token_id"] -@pytest.mark.precommit def test_load_special_tokens_from_special_tokens_map_json(model_tmp_path): # test with special_tokens_map special_tokens_map_json = { @@ -590,7 +575,6 @@ def test_load_special_tokens_from_special_tokens_map_json(model_tmp_path): assert tok.get_eos_token() == special_tokens_map_json["eos_token"]["content"] -@pytest.mark.precommit def test_load_special_tokens_from_tokenizer_config_json(model_tmp_path): # special_tokens_map is not available # but tokenize_config.json exists @@ -616,7 +600,6 @@ def test_load_special_tokens_from_tokenizer_config_json(model_tmp_path): assert tok.get_eos_token_id() == 42 -@pytest.mark.precommit def test_load_special_tokens_from_tokenizer_config_and_config_json(model_tmp_path): # both config.json is available and tokenizer_config.json available # check that it does not read int values from tokenizer_config.json if they are in config.json @@ -647,7 +630,6 @@ def test_load_special_tokens_from_tokenizer_config_and_config_json(model_tmp_pat assert tok.get_eos_token() == tok_config_json["eos_token"] -@pytest.mark.precommit @pytest.mark.xfail( raises=AssertionError, reason="CVS-143410 ov tokenizer should be aligned with hf", @@ -721,14 +703,12 @@ def generate_tokenizer(tmp_path, chat_templates): SIMPLIFIED_QWEN2_VL_2B = "{% for message in messages %}{{ message['content'] }}{% endfor %}" -@pytest.mark.precommit def test_set_special_runtime_template(tmp_path): tokenizer = generate_tokenizer(tmp_path, ChatTemplates(None, None, None, None, None)) tokenizer.chat_template = QWEN2_VL_2B assert tokenizer.chat_template == SIMPLIFIED_QWEN2_VL_2B -@pytest.mark.precommit @pytest.mark.parametrize( "chat_templates", [ @@ -745,7 +725,6 @@ def test_template_priorities(tmp_path, chat_templates): assert tokenizer.chat_template == chat_templates.reference -@pytest.mark.precommit def test_chat_template_with_empty_output(tmp_path): tokenizer = generate_tokenizer(tmp_path, ChatTemplates(None, None, None, None, None)) # Test throwing exception for empty rendered chat template (e.g. Qwen2-VL) diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py index d72051c35c..c9a567ce16 100644 --- a/tests/python_tests/test_vlm_pipeline.py +++ b/tests/python_tests/test_vlm_pipeline.py @@ -420,7 +420,6 @@ def test_images(request: pytest.FixtureRequest): return [request.getfixturevalue(image) for image in request.param] -@pytest.mark.precommit @parametrize_all_models def test_vlm_pipeline(ov_pipe_model: VlmModelInfo, test_images: list[openvino.Tensor]): ov_pipe = ov_pipe_model.pipeline @@ -442,7 +441,6 @@ def streamer(word: str) -> bool: assert res.texts[0] == "".join(result_from_streamer) -@pytest.mark.precommit @pytest.mark.parametrize( "config", [ @@ -506,7 +504,6 @@ def test_vlm_continuous_batching_generate_vs_add_request( ) -@pytest.mark.precommit @pytest.mark.parametrize( "config", [ @@ -618,7 +615,6 @@ def iteration_images_and_videos(request): return params -@pytest.mark.precommit @parametrize_all_models @pytest.mark.parametrize("system_message", ["", "You are a helpful assistant."]) def test_vlm_pipeline_chat( @@ -679,7 +675,6 @@ def iteration_images_npu(request): return [[request.getfixturevalue(image) for image in bundle] for bundle in request.param] -@pytest.mark.precommit @pytest.mark.parametrize("model_id", MODEL_IDS) @pytest.mark.parametrize("system_message", ["", "You are a helpful assistant."]) def test_vlm_pipeline_chat_npu(model_id, system_message, iteration_images_npu): @@ -721,7 +716,6 @@ def streamer(word: str) -> bool: run_chat(npu_pipe, system_message, iteration_images_npu) -@pytest.mark.precommit @parametrize_all_models_with_video @pytest.mark.parametrize("system_message", ["", "You are a helpful assistant."]) def test_vlm_pipeline_chat_with_video( @@ -771,7 +765,6 @@ def streamer(word: str) -> bool: ov_pipe.finish_chat() -@pytest.mark.precommit @parametrize_one_model_backends def test_vlm_get_tokenizer(ov_pipe_model: VlmModelInfo): ov_pipe = ov_pipe_model.pipeline @@ -779,7 +772,6 @@ def test_vlm_get_tokenizer(ov_pipe_model: VlmModelInfo): tokenizer.encode("") -@pytest.mark.precommit @pytest.mark.parametrize( "config", [ @@ -797,7 +789,6 @@ def test_sampling( ov_pipe.generate(PROMPTS[0], image=cat_tensor, generation_config=config) -@pytest.mark.precommit @pytest.mark.parametrize("backend", ATTENTION_BACKEND) def test_perf_metrics( backend: str, @@ -865,7 +856,6 @@ def test_perf_metrics( assert np.allclose(std_dur, np.std(raw_dur)) -@pytest.mark.precommit @pytest.mark.parametrize("model_id", MODEL_IDS) @pytest.mark.parametrize("backend", ATTENTION_BACKEND) @pytest.mark.skipif( @@ -901,7 +891,6 @@ def image_sequence(request): return [request.getfixturevalue(image) for image in request.param] -@pytest.mark.precommit @pytest.mark.skipif( sys.platform == "darwin" or platform.machine() in ["aarch64", "arm64", "ARM64"], reason="NPU plugin is available only on Linux and Windows x86_64", @@ -923,7 +912,6 @@ def test_vlm_npu_no_image(): ) -@pytest.mark.precommit @parametrize_all_models def test_vlm_pipeline_chat_streamer_cancel_second_generate( request: pytest.FixtureRequest, @@ -998,7 +986,6 @@ def streamer(subword): assert results_with_cancel == results -@pytest.mark.precommit @parametrize_one_model_backends def test_start_chat_clears_history( ov_pipe_model: VlmModelInfo, @@ -1025,7 +1012,6 @@ def test_start_chat_clears_history( assert results_first_generate == results_second_generate -@pytest.mark.precommit def test_start_chat_clears_history_cb_api( ov_continious_batching_pipe: ContinuousBatchingPipeline, image_sequence: list[openvino.Tensor] @@ -1051,7 +1037,6 @@ def test_start_chat_clears_history_cb_api( assert results_first_generate == results_second_generate -@pytest.mark.precommit @parametrize_all_models def test_vlm_pipeline_chat_streamer_cancel_first_generate( request: pytest.FixtureRequest, @@ -1182,7 +1167,6 @@ def model_and_tag_parametrize( ) -@pytest.mark.precommit @model_and_tag_parametrize(TAG_INSERTED_BY_TEMPLATE) def test_model_tags_representation(ov_pipe_model: VlmModelInfo, cat_tensor: openvino.Tensor): ov_pipe = ov_pipe_model.pipeline @@ -1235,7 +1219,6 @@ def workaround_inconsistent_inference(): retry(workaround_inconsistent_inference) -@pytest.mark.precommit @model_and_tag_parametrize() def test_model_tags_prepend_native( ov_pipe_model: VlmModelInfo, @@ -1264,7 +1247,6 @@ def workaround_inconsistent_inference(): retry(workaround_inconsistent_inference) -@pytest.mark.precommit @model_and_tag_parametrize() def test_model_tags_prepend_universal( ov_pipe_model: VlmModelInfo, @@ -1297,7 +1279,6 @@ def workaround_inconsistent_inference(): def cat_image_384x384(cat_image): return cat_image.resize((384, 384)) -@pytest.mark.precommit @model_and_tag_parametrize() def test_model_tags_append( ov_pipe_model: VlmModelInfo, @@ -1338,7 +1319,6 @@ def workaround_inconsistent_inference(): retry(workaround_inconsistent_inference) -@pytest.mark.precommit @model_and_tag_parametrize(IMAGE_ID_IGNORANT_MODELS_TO_TAG) def test_model_tags_same_reference(ov_pipe_model: VlmModelInfo, cat_tensor: openvino.Tensor): ov_pipe = ov_pipe_model.pipeline @@ -1358,7 +1338,6 @@ def workaround_inconsistent_inference(): retry(workaround_inconsistent_inference) -@pytest.mark.precommit @model_and_tag_parametrize() def test_model_tags_older(ov_pipe_model: VlmModelInfo, car_tensor: openvino.Tensor): ov_pipe = ov_pipe_model.pipeline @@ -1372,7 +1351,6 @@ def test_model_tags_older(ov_pipe_model: VlmModelInfo, car_tensor: openvino.Tens ov_pipe.finish_chat() -@pytest.mark.precommit @model_and_tag_parametrize() def test_model_tags_missing_universal(ov_pipe_model: VlmModelInfo): ov_pipe = ov_pipe_model.pipeline @@ -1381,7 +1359,6 @@ def test_model_tags_missing_universal(ov_pipe_model: VlmModelInfo): ov_pipe.generate("") -@pytest.mark.precommit @model_and_tag_parametrize() def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo): ov_pipe = ov_pipe_model.pipeline @@ -1391,7 +1368,6 @@ def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo): ov_pipe.generate(image_tag(0)) -@pytest.mark.precommit @pytest.mark.parametrize( "ov_pipe_model,has_image,has_video", [ diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py index 655f527852..66c08335a9 100644 --- a/tests/python_tests/test_whisper_pipeline.py +++ b/tests/python_tests/test_whisper_pipeline.py @@ -275,7 +275,6 @@ def compare_results(hf_result, genai_result): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language": "en", "sample_id": 0}], indirect=True) -@pytest.mark.precommit def test_smoke(model_descr, sample_from_dataset): run_pipeline_with_ref( model_id=model_descr[0], @@ -285,7 +284,6 @@ def test_smoke(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) -@pytest.mark.precommit def test_whisper_config_constructor(model_descr): model_id, path = model_descr @@ -325,7 +323,6 @@ def test_whisper_config_constructor(model_descr): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language" : "en", "sample_id": 0}], indirect=True) -@pytest.mark.precommit def test_whisper_constructors(model_descr, sample_from_dataset): model_id, path, hf_pipe, genai_pipe = read_whisper_model(model_descr) @@ -345,7 +342,6 @@ def test_whisper_constructors(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"sample_id": 0}], indirect=True) -@pytest.mark.precommit def test_max_new_tokens(model_descr, sample_from_dataset): model_id, path, hf_pipe, genai_pipe = read_whisper_model(model_descr) @@ -363,7 +359,6 @@ def test_max_new_tokens(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("language", ["fr", "de"]) -@pytest.mark.precommit def test_language_mode(model_descr, language): model_id, path, hf_pipe, genai_pipe = read_whisper_model(model_descr) sample = get_whisper_dataset(language, long_form=False)[0] @@ -388,7 +383,6 @@ def test_language_mode(model_descr, language): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", get_fixture_params_for_n_whisper_dataset_samples(n=1, language="fr"), indirect=True) -@pytest.mark.precommit def test_task_mode(model_descr, sample_from_dataset): model_id, path, hf_pipe, genai_pipe = read_whisper_model(model_descr) @@ -438,7 +432,6 @@ def test_task_mode(model_descr, sample_from_dataset): @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=1, language="fr"), *get_fixture_params_for_n_whisper_dataset_samples(n=1, language="de"), *get_fixture_params_for_n_whisper_dataset_samples(n=1, language="es")], indirect=True) -@pytest.mark.precommit def test_language_autodetect(model_descr, sample_from_dataset): model_id, path, hf_pipe, genai_pipe = read_whisper_model(model_descr) @@ -457,7 +450,6 @@ def test_language_autodetect(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=1)], indirect=True) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 173169") def test_return_timestamps_short_form(model_descr, sample_from_dataset): run_pipeline_with_ref( @@ -470,7 +462,6 @@ def test_return_timestamps_short_form(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=1)], indirect=True) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 173169") def test_return_timestamps_max_new_tokens_short_form(model_descr, sample_from_dataset): run_pipeline_with_ref( @@ -485,7 +476,6 @@ def test_return_timestamps_max_new_tokens_short_form(model_descr, sample_from_da @pytest.mark.parametrize("model_descr", get_whisper_models_list()) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=10, long_form=True)], indirect=True) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 173169") def test_longform_audio(model_descr, sample_from_dataset): _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr) @@ -512,7 +502,6 @@ def test_longform_audio(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list()) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=2, long_form=True)], indirect=True) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 173169") def test_longform_audio_with_past(model_descr, sample_from_dataset): _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr, stateful=True) @@ -538,7 +527,6 @@ def test_longform_audio_with_past(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list()) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 173169") def test_shortform(model_descr): samples = [] @@ -558,7 +546,6 @@ def test_shortform(model_descr): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=2, long_form=True)], indirect=True) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 173169") def test_beam_search(model_descr, sample_from_dataset): # use only 30 seconds of audio due to beam search results wrong with enabled timestamps @@ -576,7 +563,6 @@ def test_beam_search(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language" : "en", "sample_id": 0}], indirect=True) -@pytest.mark.precommit def test_initial_prompt_hotwords(model_descr, sample_from_dataset): model_id, path, hf_pipe, genai_pipe = read_whisper_model(model_descr) @@ -598,7 +584,6 @@ def test_initial_prompt_hotwords(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language" : "en", "sample_id": 0}], indirect=True) -@pytest.mark.precommit def test_random_sampling(model_descr, sample_from_dataset): _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr) @@ -637,7 +622,6 @@ def test_random_sampling(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language" : "en", "sample_id": 0}], indirect=True) -@pytest.mark.precommit @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 173169") def test_perf_metrics(model_descr, sample_from_dataset): model_id, path, hf_pipe, genai_pipe = read_whisper_model(model_descr) @@ -785,7 +769,6 @@ def streamer_bool_callback(subword): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language" : "en", "sample_id": 0}], indirect=True) -@pytest.mark.precommit def test_streamers(model_descr, sample_from_dataset, streamer_for_test): _, _, _, genai_pipe = read_whisper_model(model_descr) diff --git a/tests/python_tests/test_whisper_pipeline_static.py b/tests/python_tests/test_whisper_pipeline_static.py index 86e22bae60..08afe6215d 100644 --- a/tests/python_tests/test_whisper_pipeline_static.py +++ b/tests/python_tests/test_whisper_pipeline_static.py @@ -76,7 +76,6 @@ def compare_results_with_assert(expected, actual_out): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language": "en", "sample_id": 0}], indirect=True) -@pytest.mark.precommit def test_static_whisper_generation_compare_with_cpu(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr) @@ -89,7 +88,6 @@ def test_static_whisper_generation_compare_with_cpu(model_descr, sample_from_dat @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=2, language="fr"), *get_fixture_params_for_n_whisper_dataset_samples(n=2, language="de"), *get_fixture_params_for_n_whisper_dataset_samples(n=2, language="es")], indirect=True) -@pytest.mark.precommit def test_static_whisper_autodetect(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr) @@ -100,7 +98,6 @@ def test_static_whisper_autodetect(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(language='de', n=3)], indirect=True) -@pytest.mark.precommit def test_static_whisper_language_de(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr) @@ -111,7 +108,6 @@ def test_static_whisper_language_de(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(language='fr', n=3)], indirect=True) -@pytest.mark.precommit def test_static_whisper_language_fr(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr) @@ -122,7 +118,6 @@ def test_static_whisper_language_fr(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(language='ru', n=3)], indirect=True) -@pytest.mark.precommit def test_static_whisper_language_ru(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr) @@ -133,7 +128,6 @@ def test_static_whisper_language_ru(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language": "en", "sample_id": 0, "long_form": True}], indirect=True) -@pytest.mark.precommit def test_static_whisper_generation_long(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr) @@ -144,7 +138,6 @@ def test_static_whisper_generation_long(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language": "en", "sample_id": 0}], indirect=True) -@pytest.mark.precommit def test_static_whisper_stateful_generation_compare_with_cpu(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr, stateful=True) @@ -157,7 +150,6 @@ def test_static_whisper_stateful_generation_compare_with_cpu(model_descr, sample @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=2, language="fr"), *get_fixture_params_for_n_whisper_dataset_samples(n=2, language="de"), *get_fixture_params_for_n_whisper_dataset_samples(n=2, language="es")], indirect=True) -@pytest.mark.precommit def test_static_whisper_stateful_autodetect(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr, stateful=True) @@ -168,7 +160,6 @@ def test_static_whisper_stateful_autodetect(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(language='de', n=3)], indirect=True) -@pytest.mark.precommit def test_static_whisper_stateful_language_de(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr, stateful=True) @@ -179,7 +170,6 @@ def test_static_whisper_stateful_language_de(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(language='fr', n=3)], indirect=True) -@pytest.mark.precommit def test_static_whisper_stateful_language_fr(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr, stateful=True) @@ -190,7 +180,6 @@ def test_static_whisper_stateful_language_fr(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(language='ru', n=3)], indirect=True) -@pytest.mark.precommit def test_static_whisper_stateful_language_ru(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr, stateful=True) @@ -201,7 +190,6 @@ def test_static_whisper_stateful_language_ru(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language": "en", "sample_id": 0, "long_form": True}], indirect=True) -@pytest.mark.precommit def test_static_whisper_stateful_generation_long(model_descr, sample_from_dataset): model_id, model_path = load_and_save_whisper_model(model_descr, stateful=True)