openvinotoolkit · Wovchena · Nov 11, 2025 · Nov 11, 2025
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -8,9 +8,6 @@ properties([
         booleanParam(defaultValue: true,
                      description: 'Whether to propagate commit status to GitHub',
                      name: 'propagateStatus'),
-        booleanParam(defaultValue: false,
-               description: 'If true, forces running pre-commit scope',
-               name: 'forceRunPrecommitScope'),
         string(defaultValue: '',
                description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty',
                name: 'library_version')

diff --git a/pyproject.toml b/pyproject.toml
@@ -59,8 +59,3 @@ requires = [
     "cmake~=3.24.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
 ]
 build-backend = "py_build_cmake.build"
-
-[tool.pytest.ini_options]
-markers = [
-    "precommit: (deselect with '-m \"precommit\"')",
-]
diff --git a/src/cpp/src/lora/adapter.cpp b/src/cpp/src/lora/adapter.cpp
@@ -100,10 +100,9 @@ struct AutoSafetensor: public safetensors_File {
 ConstantMap safetensor_to_constant_map(const ov::Tensor& safetensor) {
     AutoSafetensor safe_tensors_file{};
 
-    // Intentionally discard constness as safetensors_file_init requires a non-const pointer (used as read-only)
-    auto data_ptr = const_cast<char*>(safetensor.data<char>());
-    OPENVINO_ASSERT(safetensors_file_init(data_ptr, safetensor.get_byte_size(), &safe_tensors_file) == nullptr,
-                    "Cannot parse safetensor as a Safetensors file format. Safetensors file format is supported only");
+    OPENVINO_ASSERT(safetensors_file_init(safetensor.data<char>(), safetensor.get_byte_size(), &safe_tensors_file) == nullptr,
+        "Cannot parse safetensor as a Safetensors file format. Safetensors file format is supported only"
+    );
 
     ConstantMap tensors;
     for (int i = 0; i < safe_tensors_file.num_tensors; i++) {

diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp
@@ -338,7 +338,7 @@ std::variant<int64_t, std::vector<int64_t>>
 
     auto sample_token = [&](const ov::Tensor& logits, std::size_t idx) {
         size_t sequence_offset = idx * vocab_size;
-        const float* logits_data = logits.data<float>() + sequence_offset;
+        float* logits_data = logits.data<float>() + sequence_offset;
         return std::max_element(logits_data, logits_data + vocab_size) - logits_data;
     };
 

diff --git a/tests/python_tests/README.md b/tests/python_tests/README.md
@@ -14,29 +14,29 @@ pip install -r tests/python_tests/requirements.txt
 ## Run Tests
 
 ```sh
-python -m pytest tests/python_tests/ -m precommit
+python -m pytest tests/python_tests/
 ```
 
 If you have built GenAI library by yourself instead of using wheel please set `PYTHONPATH` so that test could find library, e.g.
 ```sh
-PYTHONPATH=$PYTHONPATH:.../openvino.genai/build-Release/ python -m pytest tests/python_tests/ -m precommit
+PYTHONPATH=$PYTHONPATH:.../openvino.genai/build-Release/ python -m pytest tests/python_tests/
 ```
 
 ## Customize tests run
 
-Tests have `precommit` set of models. `precommit` contains lightweight models which can be quickly inferred. If you wish to run specific tests, you can use `-k` option, for example to run only multibatch and chat tests:
+Tests have different sets of models for different purposes. If you wish to run specific tests, you can use `-k` option, for example to run only multibatch and chat tests:
 ```sh
-python -m pytest tests/python_tests/ -m precommit -k "test_multibatch and test_chat"
+python -m pytest tests/python_tests/ -k "test_multibatch and test_chat"
 ```
 
 If you wish to run all tests except beam search do the following:
 ```sh
-python -m pytest tests/python_tests/ -m precommit -k "not test_beam_search"
+python -m pytest tests/python_tests/ -k "not test_beam_search"
 ```
 
 Argument `--model_ids` can be used to run tests selectively only for specific models. HF model ids should be separated by space, e.g:
 ```sh
-python -m pytest tests/python_tests/ -m precommit -k "test_multibatch" --model_ids "TinyLlama/TinyLlama-1.1B-Chat-v1.0 Qwen/Qwen2-0.5B-Instruct"
+python -m pytest tests/python_tests/ -k "test_multibatch" --model_ids "TinyLlama/TinyLlama-1.1B-Chat-v1.0 Qwen/Qwen2-0.5B-Instruct"
 ```
 
 List of currently supported models can be found in tests/python_tests/models.py:get_models_list

diff --git a/tests/python_tests/conftest.py b/tests/python_tests/conftest.py
@@ -53,6 +53,4 @@ def pytest_addoption(parser):
 
 
 def pytest_configure(config: pytest.Config):
-    marker = "precommit" if config.getoption("-m") == "precommit" else None
-    pytest.run_marker = marker
     pytest.selected_model_ids = config.getoption("--model_ids", default=None)
diff --git a/tests/python_tests/models/precommit → tests/python_tests/models/lightweight b/tests/python_tests/models/precommit → tests/python_tests/models/lightweight
diff --git a/tests/python_tests/pytest.ini b/tests/python_tests/pytest.ini
@@ -2,8 +2,8 @@
 
 markers =
     ; The following markers are defined for categorizing tests:
-    ; precommit   - Tests that should be run before committing code.
     ; real_models - Tests that involve execution of the models from models/real_models file
+    ; nightly     - Tests that should only run in nightly builds (uses large models or long running)
     ; samples     - Tests related to the sample models.
     ; llm         - Tests related to large language models.
     ; whisper     - Tests related to the Whisper model.
@@ -12,8 +12,8 @@ markers =
     ; vlm         - Tests related to the VLM model.
     ; rag         - Tests related to the RAG components.
     ; speech_generation - Tests related to text-to-speech generation
-    precommit
     real_models
+    nightly
     samples
     llm
     whisper
@@ -24,4 +24,4 @@ markers =
     rag
     speech_generation
 
-addopts = -m precommit
+addopts = -m "not real_models and not nightly"
diff --git a/tests/python_tests/samples/test_text2speech.py b/tests/python_tests/samples/test_text2speech.py
@@ -32,7 +32,6 @@ def teardown_class(self):
 
     @pytest.mark.speech_generation
     @pytest.mark.samples
-    @pytest.mark.precommit
     @pytest.mark.parametrize("convert_model", ["tiny-random-SpeechT5ForTextToSpeech"], indirect=True)
     @pytest.mark.parametrize("input_prompt", ["Hello everyone"])
     def test_sample_text_to_speech(self, convert_model, input_prompt):
@@ -54,7 +53,6 @@ def test_sample_text_to_speech(self, convert_model, input_prompt):
 
     @pytest.mark.speech_generation
     @pytest.mark.samples
-    @pytest.mark.precommit
     @pytest.mark.parametrize("convert_model", ["tiny-random-SpeechT5ForTextToSpeech"], indirect=True)
     @pytest.mark.parametrize("input_prompt", ["Test text to speech without speaker embedding file"])
     def test_sample_text_to_speech_no_speaker_embedding_file(self, convert_model, input_prompt):

diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
@@ -36,9 +36,8 @@ def read_models_list(file_name: str):
             models.append(model_name)
     return models
 
-@pytest.mark.precommit
-@pytest.mark.parametrize("model_id", read_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit")))
-def test_e2e_precommit(model_id):
+@pytest.mark.parametrize("model_id", read_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "lightweight")))
+def test_e2e_lightweight_models(model_id):
     prompts, generation_configs = get_test_dataset()
     generate_and_compare(prompts=prompts,
                          generation_config=generation_configs,
@@ -73,7 +72,6 @@ def test_e2e_real_models(model_id):
 ]
 @pytest.mark.parametrize("generation_config", test_configs)
 @pytest.mark.parametrize("prompt", batched_prompts[1:])  # num_beams=15 diverges on the first prompt.
-@pytest.mark.precommit
 @pytest.mark.skip(reason="CVS-162891: Fix test_continuous_batching_vs_stateful tests after we started to compare cb vs sdpa")
 def test_continuous_batching_vs_stateful(prompt, generation_config):
     model_id = "facebook/opt-125m"
@@ -93,7 +91,6 @@ def test_continuous_batching_vs_stateful(prompt, generation_config):
 
 prompts = ['The Sun is yellow because', 'Difference between Jupiter and Mars is that', 'table is made of']
 @pytest.mark.parametrize("prompt", prompts)
-@pytest.mark.precommit
 def test_cb_streamer_vs_return_vs_stateful(prompt):
     model_id = "facebook/opt-125m"
     _, _, models_path = download_and_convert_model(model_id)
@@ -124,7 +121,6 @@ def test_cb_streamer_vs_return_vs_stateful(prompt):
 @pytest.mark.parametrize("input_type", [
     GenerationChatInputsType.STRING,
     GenerationChatInputsType.CHAT_HISTORY])
-@pytest.mark.precommit
 def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: dict, pipeline_type, input_type: GenerationChatInputsType):
     _, _, models_path = download_and_convert_model(model_id)
 
@@ -175,7 +171,6 @@ def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: dict, pip
 @pytest.mark.parametrize("generation_config_kwargs", generation_configs)
 @pytest.mark.parametrize("model_id", get_chat_models_list())
 @pytest.mark.parametrize("pipeline_type", [PipelineType.CONTINUOUS_BATCHING, PipelineType.SPECULATIVE_DECODING, PipelineType.PROMPT_LOOKUP_DECODING,])
-@pytest.mark.precommit
 def test_continuous_batching_add_request_health_check(model_id, generation_config_kwargs: dict, pipeline_type):
     _, _, models_path = download_and_convert_model(model_id)
 
@@ -206,7 +201,6 @@ def test_continuous_batching_add_request_health_check(model_id, generation_confi
 @pytest.mark.parametrize("generation_config_kwargs", invalid_generation_configs)
 @pytest.mark.parametrize("model_id", get_chat_models_list())
 @pytest.mark.parametrize("pipeline_type", [PipelineType.CONTINUOUS_BATCHING, PipelineType.SPECULATIVE_DECODING, PipelineType.PROMPT_LOOKUP_DECODING,])
-@pytest.mark.precommit
 def test_continuous_batching_add_request_fails(model_id, generation_config_kwargs: dict, pipeline_type):
     _, _, models_path = download_and_convert_model(model_id)
 
@@ -228,7 +222,6 @@ def test_continuous_batching_add_request_fails(model_id, generation_config_kwarg
 #
 
 # todo: iefode: bug reproducer!!!
-@pytest.mark.precommit
 @pytest.mark.parametrize("sampling_config", [get_greedy(), get_beam_search(), get_multinomial_all_parameters()],
                          ids=["greedy", "beam_search", "multinomial_all_parameters"])
 def test_post_oom_health(sampling_config):
@@ -289,7 +282,6 @@ def get_beam_search_seq_len_300() -> GenerationConfig:
                          ({"num_kv_blocks": 100, "dynamic_split_fuse": True}, get_beam_search_seq_len_300()),
                          ({"num_kv_blocks": 100, "dynamic_split_fuse": False}, get_beam_search_seq_len_300())]
 @pytest.mark.parametrize("params", scheduler_params_list)
-@pytest.mark.precommit
 def test_preemption(params):
     model_id = "facebook/opt-125m"
     scheduler_params = params[0]
@@ -342,7 +334,6 @@ def test_preemption(params):
 
 # todo: Anastasiia Pnevskaya: fix the test because it is hanging according max_new_tokens = std::numeric_limits<std::size_t>::max()
 @pytest.mark.parametrize("dynamic_split_fuse", [True, False])
-@pytest.mark.precommit
 @pytest.mark.skip(reason="Random sampling results are non deterministic due to: discrete_distribution impl depends on platform, model inference results may depend on CPU. Test passes on CI but fails locally.")
 def test_preemption_with_multinomial(dynamic_split_fuse):
     generation_configs = multinomial_params.generation_config
@@ -425,7 +416,6 @@ def test_preemption_with_multinomial(dynamic_split_fuse):
 
 
 @pytest.mark.parametrize("dynamic_split_fuse", [True, False])
-@pytest.mark.precommit
 @pytest.mark.skip(reason="Random sampling results are non deterministic due to: discrete_distribution impl depends on platform, model inference results may depend on CPU. Test passes on CI but fails locally.")
 def test_preemption_with_multinomial_n_seq(dynamic_split_fuse):
     model_id : str = "facebook/opt-125m"
@@ -442,7 +432,6 @@ def test_preemption_with_multinomial_n_seq(dynamic_split_fuse):
 
 
 @pytest.mark.parametrize("pipeline_type", [PipelineType.PROMPT_LOOKUP_DECODING])
-@pytest.mark.precommit
 def test_dynamic_split_fuse_doesnt_affect_generated_text(pipeline_type):
     model_id : str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
     _, _, models_path = download_and_convert_model(model_id)
@@ -496,7 +485,6 @@ def run_extended_perf_metrics_collection(model_id, generation_config: Generation
 
 
 @pytest.mark.parametrize("pipeline_type", [PipelineType.PAGED_ATTENTION, PipelineType.SPECULATIVE_DECODING])
-@pytest.mark.precommit
 def test_speculative_decoding_extended_perf_metrics(pipeline_type):
     import time
     start_time = time.perf_counter()

diff --git a/tests/python_tests/test_generation_config.py b/tests/python_tests/test_generation_config.py
@@ -61,7 +61,6 @@ def verify_set_values(generation_config, kwargs):
     dict(max_new_tokens=1, apply_chat_template=False),
 ]
 @pytest.mark.parametrize("generation_config_kwargs", configs)
-@pytest.mark.precommit
 def test_valid_configs(generation_config_kwargs):
     config = GenerationConfig(**generation_config_kwargs)
     verify_set_values(config, generation_config_kwargs)
@@ -107,7 +106,6 @@ def test_valid_configs(generation_config_kwargs):
     # TODO: add tests for invalid properties
 ]
 @pytest.mark.parametrize("generation_config_kwargs", invalid_configs)
-@pytest.mark.precommit
 def test_invalid_generation_configs_throws(generation_config_kwargs):
     config = GenerationConfig(**generation_config_kwargs)
     with pytest.raises(RuntimeError):
@@ -123,7 +121,6 @@ def test_invalid_generation_configs_throws(generation_config_kwargs):
     dict(eos_token_id=1), # 'stop_token_ids' does not contain 'eos_token_id'
     dict(eos_token_id=1, stop_token_ids={2}), # 'stop_token_ids' is not empty, but does not contain 'eos_token_id'
 ])
-@pytest.mark.precommit
 def test_invalid_fields_assinment_rises(fields):
     config = GenerationConfig()
     for key, val in fields.items():
@@ -147,7 +144,6 @@ def load_genai_generation_config_from_file(configs: list[tuple], temp_path):
 
     return ov_generation_config
 
-@pytest.mark.precommit
 def test_multiple_eos_are_read_as_stop_token_ids(tmp_path):
     generation_config_json = {
         "eos_token_id": [

diff --git a/tests/python_tests/test_gguf_reader.py b/tests/python_tests/test_gguf_reader.py
@@ -18,7 +18,6 @@
 
 @pytest.mark.parametrize("pipeline_type", get_gguf_pipeline_types())
 @pytest.mark.parametrize("model_ids", get_gguf_model_list())
-@pytest.mark.precommit
 @pytest.mark.skipif(sys.platform == "win32", reason="CVS-174065")
 def test_pipelines_with_gguf_generate(pipeline_type, model_ids):
     if sys.platform == 'darwin':
@@ -69,7 +68,6 @@ def test_pipelines_with_gguf_generate(pipeline_type, model_ids):
     '<|endoftext|><|endoftext|><|im_end|>', 
     '<|endoftext|> Why the Sky is Blue? <|im_end|>',
 ])
-@pytest.mark.precommit
 @pytest.mark.skipif(sys.platform == "win32", reason="CVS-174065")
 def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model, prompt):
     if sys.platform == 'darwin':
@@ -129,7 +127,6 @@ def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model, prom
 @pytest.mark.parametrize("pipeline_type", get_gguf_pipeline_types())
 @pytest.mark.parametrize("model_ids", [{"gguf_model_id": "Qwen/Qwen3-0.6B-GGUF", "gguf_filename": "Qwen3-0.6B-Q8_0.gguf"}])
 @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 172335")
-@pytest.mark.precommit
 @pytest.mark.skipif(sys.platform == "win32", reason="CVS-174065")
 def test_full_gguf_qwen3_pipeline(pipeline_type, model_ids):
     # Temporal testing solution until transformers starts to support qwen3 in GGUF format

diff --git a/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py b/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py
@@ -42,7 +42,6 @@ class CacheOptTestStruct:
 SHORT_CACHE_EVICTION_CONFIG = CacheEvictionConfig(start_size=32, recent_size=32, max_cache_size=96, aggregation_mode=AggregationMode.NORM_SUM)
 LONGBENCH_CACHE_EVICTION_CONFIG = CacheEvictionConfig(start_size=32, recent_size=128, max_cache_size=672, aggregation_mode=AggregationMode.NORM_SUM)
 
-@pytest.mark.precommit
 @pytest.mark.skipif(
     sys.platform in ("win32", "darwin"),
     reason=(
@@ -164,7 +163,6 @@ def get_beam_search_seq_len_300() -> GenerationConfig:
                          ({"num_kv_blocks": 0, "cache_size": 0, "dynamic_split_fuse": False, "max_num_batched_tokens": 600, "enable_prefix_caching": False}, get_beam_search_seq_len_300()),
                          ({"num_kv_blocks": 0, "cache_size": 0, "dynamic_split_fuse": False, "max_num_batched_tokens": 600, "use_cache_eviction": True, "cache_eviction_config": SHORT_CACHE_EVICTION_CONFIG}, get_greedy_seq_len_300())]
 @pytest.mark.parametrize("params", scheduler_params_list)
-@pytest.mark.precommit
 def test_dynamic_memory_allocation(params):
     prompts, _ = get_test_dataset()
     generate_and_compare(prompts=prompts,
@@ -182,7 +180,6 @@ class LongBenchTestData:
     avg_cache_usage_optimization_ratio: float
 
 
-@pytest.mark.precommit
 @pytest.mark.parametrize("test_struct", [
     LongBenchTestData("samsum", 4, 1.6, 2.5),
     LongBenchTestData("trec", 3.2, 2.0, 3.3),

diff --git a/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py b/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py
@@ -33,7 +33,6 @@
 }
 
 
-@pytest.mark.precommit
 @pytest.mark.parametrize("subset", ["samsum", "trec", "qasper"])
 def test_kvcrush_vs_snapkv_baseline_longbench(subset):
     """Test that KVCrush performs equal or better than SnapKV baseline on LongBench datasets."""