@@ -36,9 +36,8 @@ def read_models_list(file_name: str):
3636 models .append (model_name )
3737 return models
3838
39- @pytest .mark .precommit
40- @pytest .mark .parametrize ("model_id" , read_models_list (os .path .join (os .path .dirname (os .path .realpath (__file__ )), "models" , "precommit" )))
41- def test_e2e_precommit (model_id ):
39+ @pytest .mark .parametrize ("model_id" , read_models_list (os .path .join (os .path .dirname (os .path .realpath (__file__ )), "models" , "lightweight" )))
40+ def test_e2e_lightweight_models (model_id ):
4241 prompts , generation_configs = get_test_dataset ()
4342 generate_and_compare (prompts = prompts ,
4443 generation_config = generation_configs ,
@@ -73,7 +72,6 @@ def test_e2e_real_models(model_id):
7372]
7473@pytest .mark .parametrize ("generation_config" , test_configs )
7574@pytest .mark .parametrize ("prompt" , batched_prompts [1 :]) # num_beams=15 diverges on the first prompt.
76- @pytest .mark .precommit
7775@pytest .mark .skip (reason = "CVS-162891: Fix test_continuous_batching_vs_stateful tests after we started to compare cb vs sdpa" )
7876def test_continuous_batching_vs_stateful (prompt , generation_config ):
7977 model_id = "facebook/opt-125m"
@@ -93,7 +91,6 @@ def test_continuous_batching_vs_stateful(prompt, generation_config):
9391
9492prompts = ['The Sun is yellow because' , 'Difference between Jupiter and Mars is that' , 'table is made of' ]
9593@pytest .mark .parametrize ("prompt" , prompts )
96- @pytest .mark .precommit
9794def test_cb_streamer_vs_return_vs_stateful (prompt ):
9895 model_id = "facebook/opt-125m"
9996 _ , _ , models_path = download_and_convert_model (model_id )
@@ -124,7 +121,6 @@ def test_cb_streamer_vs_return_vs_stateful(prompt):
124121@pytest .mark .parametrize ("input_type" , [
125122 GenerationChatInputsType .STRING ,
126123 GenerationChatInputsType .CHAT_HISTORY ])
127- @pytest .mark .precommit
128124def test_chat_scenario_vs_stateful (model_id , generation_config_kwargs : dict , pipeline_type , input_type : GenerationChatInputsType ):
129125 _ , _ , models_path = download_and_convert_model (model_id )
130126
@@ -175,7 +171,6 @@ def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: dict, pip
175171@pytest .mark .parametrize ("generation_config_kwargs" , generation_configs )
176172@pytest .mark .parametrize ("model_id" , get_chat_models_list ())
177173@pytest .mark .parametrize ("pipeline_type" , [PipelineType .CONTINUOUS_BATCHING , PipelineType .SPECULATIVE_DECODING , PipelineType .PROMPT_LOOKUP_DECODING ,])
178- @pytest .mark .precommit
179174def test_continuous_batching_add_request_health_check (model_id , generation_config_kwargs : dict , pipeline_type ):
180175 _ , _ , models_path = download_and_convert_model (model_id )
181176
@@ -206,7 +201,6 @@ def test_continuous_batching_add_request_health_check(model_id, generation_confi
206201@pytest .mark .parametrize ("generation_config_kwargs" , invalid_generation_configs )
207202@pytest .mark .parametrize ("model_id" , get_chat_models_list ())
208203@pytest .mark .parametrize ("pipeline_type" , [PipelineType .CONTINUOUS_BATCHING , PipelineType .SPECULATIVE_DECODING , PipelineType .PROMPT_LOOKUP_DECODING ,])
209- @pytest .mark .precommit
210204def test_continuous_batching_add_request_fails (model_id , generation_config_kwargs : dict , pipeline_type ):
211205 _ , _ , models_path = download_and_convert_model (model_id )
212206
@@ -228,7 +222,6 @@ def test_continuous_batching_add_request_fails(model_id, generation_config_kwarg
228222#
229223
230224# todo: iefode: bug reproducer!!!
231- @pytest .mark .precommit
232225@pytest .mark .parametrize ("sampling_config" , [get_greedy (), get_beam_search (), get_multinomial_all_parameters ()],
233226 ids = ["greedy" , "beam_search" , "multinomial_all_parameters" ])
234227def test_post_oom_health (sampling_config ):
@@ -289,7 +282,6 @@ def get_beam_search_seq_len_300() -> GenerationConfig:
289282 ({"num_kv_blocks" : 100 , "dynamic_split_fuse" : True }, get_beam_search_seq_len_300 ()),
290283 ({"num_kv_blocks" : 100 , "dynamic_split_fuse" : False }, get_beam_search_seq_len_300 ())]
291284@pytest .mark .parametrize ("params" , scheduler_params_list )
292- @pytest .mark .precommit
293285def test_preemption (params ):
294286 model_id = "facebook/opt-125m"
295287 scheduler_params = params [0 ]
@@ -342,7 +334,6 @@ def test_preemption(params):
342334
343335# todo: Anastasiia Pnevskaya: fix the test because it is hanging according max_new_tokens = std::numeric_limits<std::size_t>::max()
344336@pytest .mark .parametrize ("dynamic_split_fuse" , [True , False ])
345- @pytest .mark .precommit
346337@pytest .mark .skip (reason = "Random sampling results are non deterministic due to: discrete_distribution impl depends on platform, model inference results may depend on CPU. Test passes on CI but fails locally." )
347338def test_preemption_with_multinomial (dynamic_split_fuse ):
348339 generation_configs = multinomial_params .generation_config
@@ -425,7 +416,6 @@ def test_preemption_with_multinomial(dynamic_split_fuse):
425416
426417
427418@pytest .mark .parametrize ("dynamic_split_fuse" , [True , False ])
428- @pytest .mark .precommit
429419@pytest .mark .skip (reason = "Random sampling results are non deterministic due to: discrete_distribution impl depends on platform, model inference results may depend on CPU. Test passes on CI but fails locally." )
430420def test_preemption_with_multinomial_n_seq (dynamic_split_fuse ):
431421 model_id : str = "facebook/opt-125m"
@@ -442,7 +432,6 @@ def test_preemption_with_multinomial_n_seq(dynamic_split_fuse):
442432
443433
444434@pytest .mark .parametrize ("pipeline_type" , [PipelineType .PROMPT_LOOKUP_DECODING ])
445- @pytest .mark .precommit
446435def test_dynamic_split_fuse_doesnt_affect_generated_text (pipeline_type ):
447436 model_id : str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
448437 _ , _ , models_path = download_and_convert_model (model_id )
@@ -496,7 +485,6 @@ def run_extended_perf_metrics_collection(model_id, generation_config: Generation
496485
497486
498487@pytest .mark .parametrize ("pipeline_type" , [PipelineType .PAGED_ATTENTION , PipelineType .SPECULATIVE_DECODING ])
499- @pytest .mark .precommit
500488def test_speculative_decoding_extended_perf_metrics (pipeline_type ):
501489 import time
502490 start_time = time .perf_counter ()
0 commit comments