Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ properties([
booleanParam(defaultValue: true,
description: 'Whether to propagate commit status to GitHub',
name: 'propagateStatus'),
booleanParam(defaultValue: false,
description: 'If true, forces running pre-commit scope',
name: 'forceRunPrecommitScope'),
string(defaultValue: '',
description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty',
name: 'library_version')
Expand Down
5 changes: 0 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,3 @@ requires = [
"cmake~=3.24.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
]
build-backend = "py_build_cmake.build"

[tool.pytest.ini_options]
markers = [
"precommit: (deselect with '-m \"precommit\"')",
]
7 changes: 3 additions & 4 deletions src/cpp/src/lora/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,9 @@ struct AutoSafetensor: public safetensors_File {
ConstantMap safetensor_to_constant_map(const ov::Tensor& safetensor) {
AutoSafetensor safe_tensors_file{};

// Intentionally discard constness as safetensors_file_init requires a non-const pointer (used as read-only)
auto data_ptr = const_cast<char*>(safetensor.data<char>());
OPENVINO_ASSERT(safetensors_file_init(data_ptr, safetensor.get_byte_size(), &safe_tensors_file) == nullptr,
"Cannot parse safetensor as a Safetensors file format. Safetensors file format is supported only");
OPENVINO_ASSERT(safetensors_file_init(safetensor.data<char>(), safetensor.get_byte_size(), &safe_tensors_file) == nullptr,
"Cannot parse safetensor as a Safetensors file format. Safetensors file format is supported only"
);

ConstantMap tensors;
for (int i = 0; i < safe_tensors_file.num_tensors; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ std::variant<int64_t, std::vector<int64_t>>

auto sample_token = [&](const ov::Tensor& logits, std::size_t idx) {
size_t sequence_offset = idx * vocab_size;
const float* logits_data = logits.data<float>() + sequence_offset;
float* logits_data = logits.data<float>() + sequence_offset;
return std::max_element(logits_data, logits_data + vocab_size) - logits_data;
};

Expand Down
12 changes: 6 additions & 6 deletions tests/python_tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,29 @@ pip install -r tests/python_tests/requirements.txt
## Run Tests

```sh
python -m pytest tests/python_tests/ -m precommit
python -m pytest tests/python_tests/
```

If you have built GenAI library by yourself instead of using wheel please set `PYTHONPATH` so that test could find library, e.g.
```sh
PYTHONPATH=$PYTHONPATH:.../openvino.genai/build-Release/ python -m pytest tests/python_tests/ -m precommit
PYTHONPATH=$PYTHONPATH:.../openvino.genai/build-Release/ python -m pytest tests/python_tests/
```

## Customize tests run

Tests have `precommit` set of models. `precommit` contains lightweight models which can be quickly inferred. If you wish to run specific tests, you can use `-k` option, for example to run only multibatch and chat tests:
Tests have different sets of models for different purposes. If you wish to run specific tests, you can use `-k` option, for example to run only multibatch and chat tests:
```sh
python -m pytest tests/python_tests/ -m precommit -k "test_multibatch and test_chat"
python -m pytest tests/python_tests/ -k "test_multibatch and test_chat"
```

If you wish to run all tests except beam search do the following:
```sh
python -m pytest tests/python_tests/ -m precommit -k "not test_beam_search"
python -m pytest tests/python_tests/ -k "not test_beam_search"
```

Argument `--model_ids` can be used to run tests selectively only for specific models. HF model ids should be separated by space, e.g:
```sh
python -m pytest tests/python_tests/ -m precommit -k "test_multibatch" --model_ids "TinyLlama/TinyLlama-1.1B-Chat-v1.0 Qwen/Qwen2-0.5B-Instruct"
python -m pytest tests/python_tests/ -k "test_multibatch" --model_ids "TinyLlama/TinyLlama-1.1B-Chat-v1.0 Qwen/Qwen2-0.5B-Instruct"
```

List of currently supported models can be found in tests/python_tests/models.py:get_models_list
Expand Down
2 changes: 0 additions & 2 deletions tests/python_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,4 @@ def pytest_addoption(parser):


def pytest_configure(config: pytest.Config):
marker = "precommit" if config.getoption("-m") == "precommit" else None
pytest.run_marker = marker
pytest.selected_model_ids = config.getoption("--model_ids", default=None)
6 changes: 3 additions & 3 deletions tests/python_tests/pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

markers =
; The following markers are defined for categorizing tests:
; precommit - Tests that should be run before committing code.
; real_models - Tests that involve execution of the models from models/real_models file
; nightly - Tests that should only run in nightly builds (uses large models or long running)
; samples - Tests related to the sample models.
; llm - Tests related to large language models.
; whisper - Tests related to the Whisper model.
Expand All @@ -12,8 +12,8 @@ markers =
; vlm - Tests related to the VLM model.
; rag - Tests related to the RAG components.
; speech_generation - Tests related to text-to-speech generation
precommit
real_models
nightly
samples
llm
whisper
Expand All @@ -24,4 +24,4 @@ markers =
rag
speech_generation

addopts = -m precommit
addopts = -m "not real_models and not nightly"
2 changes: 0 additions & 2 deletions tests/python_tests/samples/test_text2speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def teardown_class(self):

@pytest.mark.speech_generation
@pytest.mark.samples
@pytest.mark.precommit
@pytest.mark.parametrize("convert_model", ["tiny-random-SpeechT5ForTextToSpeech"], indirect=True)
@pytest.mark.parametrize("input_prompt", ["Hello everyone"])
def test_sample_text_to_speech(self, convert_model, input_prompt):
Expand All @@ -54,7 +53,6 @@ def test_sample_text_to_speech(self, convert_model, input_prompt):

@pytest.mark.speech_generation
@pytest.mark.samples
@pytest.mark.precommit
@pytest.mark.parametrize("convert_model", ["tiny-random-SpeechT5ForTextToSpeech"], indirect=True)
@pytest.mark.parametrize("input_prompt", ["Test text to speech without speaker embedding file"])
def test_sample_text_to_speech_no_speaker_embedding_file(self, convert_model, input_prompt):
Expand Down
16 changes: 2 additions & 14 deletions tests/python_tests/test_continuous_batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@ def read_models_list(file_name: str):
models.append(model_name)
return models

@pytest.mark.precommit
@pytest.mark.parametrize("model_id", read_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit")))
def test_e2e_precommit(model_id):
@pytest.mark.parametrize("model_id", read_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "lightweight")))
def test_e2e_lightweight_models(model_id):
prompts, generation_configs = get_test_dataset()
generate_and_compare(prompts=prompts,
generation_config=generation_configs,
Expand Down Expand Up @@ -73,7 +72,6 @@ def test_e2e_real_models(model_id):
]
@pytest.mark.parametrize("generation_config", test_configs)
@pytest.mark.parametrize("prompt", batched_prompts[1:]) # num_beams=15 diverges on the first prompt.
@pytest.mark.precommit
@pytest.mark.skip(reason="CVS-162891: Fix test_continuous_batching_vs_stateful tests after we started to compare cb vs sdpa")
def test_continuous_batching_vs_stateful(prompt, generation_config):
model_id = "facebook/opt-125m"
Expand All @@ -93,7 +91,6 @@ def test_continuous_batching_vs_stateful(prompt, generation_config):

prompts = ['The Sun is yellow because', 'Difference between Jupiter and Mars is that', 'table is made of']
@pytest.mark.parametrize("prompt", prompts)
@pytest.mark.precommit
def test_cb_streamer_vs_return_vs_stateful(prompt):
model_id = "facebook/opt-125m"
_, _, models_path = download_and_convert_model(model_id)
Expand Down Expand Up @@ -124,7 +121,6 @@ def test_cb_streamer_vs_return_vs_stateful(prompt):
@pytest.mark.parametrize("input_type", [
GenerationChatInputsType.STRING,
GenerationChatInputsType.CHAT_HISTORY])
@pytest.mark.precommit
def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: dict, pipeline_type, input_type: GenerationChatInputsType):
_, _, models_path = download_and_convert_model(model_id)

Expand Down Expand Up @@ -175,7 +171,6 @@ def test_chat_scenario_vs_stateful(model_id, generation_config_kwargs: dict, pip
@pytest.mark.parametrize("generation_config_kwargs", generation_configs)
@pytest.mark.parametrize("model_id", get_chat_models_list())
@pytest.mark.parametrize("pipeline_type", [PipelineType.CONTINUOUS_BATCHING, PipelineType.SPECULATIVE_DECODING, PipelineType.PROMPT_LOOKUP_DECODING,])
@pytest.mark.precommit
def test_continuous_batching_add_request_health_check(model_id, generation_config_kwargs: dict, pipeline_type):
_, _, models_path = download_and_convert_model(model_id)

Expand Down Expand Up @@ -206,7 +201,6 @@ def test_continuous_batching_add_request_health_check(model_id, generation_confi
@pytest.mark.parametrize("generation_config_kwargs", invalid_generation_configs)
@pytest.mark.parametrize("model_id", get_chat_models_list())
@pytest.mark.parametrize("pipeline_type", [PipelineType.CONTINUOUS_BATCHING, PipelineType.SPECULATIVE_DECODING, PipelineType.PROMPT_LOOKUP_DECODING,])
@pytest.mark.precommit
def test_continuous_batching_add_request_fails(model_id, generation_config_kwargs: dict, pipeline_type):
_, _, models_path = download_and_convert_model(model_id)

Expand All @@ -228,7 +222,6 @@ def test_continuous_batching_add_request_fails(model_id, generation_config_kwarg
#

# todo: iefode: bug reproducer!!!
@pytest.mark.precommit
@pytest.mark.parametrize("sampling_config", [get_greedy(), get_beam_search(), get_multinomial_all_parameters()],
ids=["greedy", "beam_search", "multinomial_all_parameters"])
def test_post_oom_health(sampling_config):
Expand Down Expand Up @@ -289,7 +282,6 @@ def get_beam_search_seq_len_300() -> GenerationConfig:
({"num_kv_blocks": 100, "dynamic_split_fuse": True}, get_beam_search_seq_len_300()),
({"num_kv_blocks": 100, "dynamic_split_fuse": False}, get_beam_search_seq_len_300())]
@pytest.mark.parametrize("params", scheduler_params_list)
@pytest.mark.precommit
def test_preemption(params):
model_id = "facebook/opt-125m"
scheduler_params = params[0]
Expand Down Expand Up @@ -342,7 +334,6 @@ def test_preemption(params):

# todo: Anastasiia Pnevskaya: fix the test because it is hanging according max_new_tokens = std::numeric_limits<std::size_t>::max()
@pytest.mark.parametrize("dynamic_split_fuse", [True, False])
@pytest.mark.precommit
@pytest.mark.skip(reason="Random sampling results are non deterministic due to: discrete_distribution impl depends on platform, model inference results may depend on CPU. Test passes on CI but fails locally.")
def test_preemption_with_multinomial(dynamic_split_fuse):
generation_configs = multinomial_params.generation_config
Expand Down Expand Up @@ -425,7 +416,6 @@ def test_preemption_with_multinomial(dynamic_split_fuse):


@pytest.mark.parametrize("dynamic_split_fuse", [True, False])
@pytest.mark.precommit
@pytest.mark.skip(reason="Random sampling results are non deterministic due to: discrete_distribution impl depends on platform, model inference results may depend on CPU. Test passes on CI but fails locally.")
def test_preemption_with_multinomial_n_seq(dynamic_split_fuse):
model_id : str = "facebook/opt-125m"
Expand All @@ -442,7 +432,6 @@ def test_preemption_with_multinomial_n_seq(dynamic_split_fuse):


@pytest.mark.parametrize("pipeline_type", [PipelineType.PROMPT_LOOKUP_DECODING])
@pytest.mark.precommit
def test_dynamic_split_fuse_doesnt_affect_generated_text(pipeline_type):
model_id : str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
_, _, models_path = download_and_convert_model(model_id)
Expand Down Expand Up @@ -496,7 +485,6 @@ def run_extended_perf_metrics_collection(model_id, generation_config: Generation


@pytest.mark.parametrize("pipeline_type", [PipelineType.PAGED_ATTENTION, PipelineType.SPECULATIVE_DECODING])
@pytest.mark.precommit
def test_speculative_decoding_extended_perf_metrics(pipeline_type):
import time
start_time = time.perf_counter()
Expand Down
4 changes: 0 additions & 4 deletions tests/python_tests/test_generation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def verify_set_values(generation_config, kwargs):
dict(max_new_tokens=1, apply_chat_template=False),
]
@pytest.mark.parametrize("generation_config_kwargs", configs)
@pytest.mark.precommit
def test_valid_configs(generation_config_kwargs):
config = GenerationConfig(**generation_config_kwargs)
verify_set_values(config, generation_config_kwargs)
Expand Down Expand Up @@ -107,7 +106,6 @@ def test_valid_configs(generation_config_kwargs):
# TODO: add tests for invalid properties
]
@pytest.mark.parametrize("generation_config_kwargs", invalid_configs)
@pytest.mark.precommit
def test_invalid_generation_configs_throws(generation_config_kwargs):
config = GenerationConfig(**generation_config_kwargs)
with pytest.raises(RuntimeError):
Expand All @@ -123,7 +121,6 @@ def test_invalid_generation_configs_throws(generation_config_kwargs):
dict(eos_token_id=1), # 'stop_token_ids' does not contain 'eos_token_id'
dict(eos_token_id=1, stop_token_ids={2}), # 'stop_token_ids' is not empty, but does not contain 'eos_token_id'
])
@pytest.mark.precommit
def test_invalid_fields_assinment_rises(fields):
config = GenerationConfig()
for key, val in fields.items():
Expand All @@ -147,7 +144,6 @@ def load_genai_generation_config_from_file(configs: list[tuple], temp_path):

return ov_generation_config

@pytest.mark.precommit
def test_multiple_eos_are_read_as_stop_token_ids(tmp_path):
generation_config_json = {
"eos_token_id": [
Expand Down
3 changes: 0 additions & 3 deletions tests/python_tests/test_gguf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

@pytest.mark.parametrize("pipeline_type", get_gguf_pipeline_types())
@pytest.mark.parametrize("model_ids", get_gguf_model_list())
@pytest.mark.precommit
@pytest.mark.skipif(sys.platform == "win32", reason="CVS-174065")
def test_pipelines_with_gguf_generate(pipeline_type, model_ids):
if sys.platform == 'darwin':
Expand Down Expand Up @@ -69,7 +68,6 @@ def test_pipelines_with_gguf_generate(pipeline_type, model_ids):
'<|endoftext|><|endoftext|><|im_end|>',
'<|endoftext|> Why the Sky is Blue? <|im_end|>',
])
@pytest.mark.precommit
@pytest.mark.skipif(sys.platform == "win32", reason="CVS-174065")
def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model, prompt):
if sys.platform == 'darwin':
Expand Down Expand Up @@ -129,7 +127,6 @@ def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model, prom
@pytest.mark.parametrize("pipeline_type", get_gguf_pipeline_types())
@pytest.mark.parametrize("model_ids", [{"gguf_model_id": "Qwen/Qwen3-0.6B-GGUF", "gguf_filename": "Qwen3-0.6B-Q8_0.gguf"}])
@pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 172335")
@pytest.mark.precommit
@pytest.mark.skipif(sys.platform == "win32", reason="CVS-174065")
def test_full_gguf_qwen3_pipeline(pipeline_type, model_ids):
# Temporal testing solution until transformers starts to support qwen3 in GGUF format
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ class CacheOptTestStruct:
SHORT_CACHE_EVICTION_CONFIG = CacheEvictionConfig(start_size=32, recent_size=32, max_cache_size=96, aggregation_mode=AggregationMode.NORM_SUM)
LONGBENCH_CACHE_EVICTION_CONFIG = CacheEvictionConfig(start_size=32, recent_size=128, max_cache_size=672, aggregation_mode=AggregationMode.NORM_SUM)

@pytest.mark.precommit
@pytest.mark.skipif(
sys.platform in ("win32", "darwin"),
reason=(
Expand Down Expand Up @@ -164,7 +163,6 @@ def get_beam_search_seq_len_300() -> GenerationConfig:
({"num_kv_blocks": 0, "cache_size": 0, "dynamic_split_fuse": False, "max_num_batched_tokens": 600, "enable_prefix_caching": False}, get_beam_search_seq_len_300()),
({"num_kv_blocks": 0, "cache_size": 0, "dynamic_split_fuse": False, "max_num_batched_tokens": 600, "use_cache_eviction": True, "cache_eviction_config": SHORT_CACHE_EVICTION_CONFIG}, get_greedy_seq_len_300())]
@pytest.mark.parametrize("params", scheduler_params_list)
@pytest.mark.precommit
def test_dynamic_memory_allocation(params):
prompts, _ = get_test_dataset()
generate_and_compare(prompts=prompts,
Expand All @@ -182,7 +180,6 @@ class LongBenchTestData:
avg_cache_usage_optimization_ratio: float


@pytest.mark.precommit
@pytest.mark.parametrize("test_struct", [
LongBenchTestData("samsum", 4, 1.6, 2.5),
LongBenchTestData("trec", 3.2, 2.0, 3.3),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
}


@pytest.mark.precommit
@pytest.mark.parametrize("subset", ["samsum", "trec", "qasper"])
def test_kvcrush_vs_snapkv_baseline_longbench(subset):
"""Test that KVCrush performs equal or better than SnapKV baseline on LongBench datasets."""
Expand Down
Loading
Loading