diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index c031b22b87..2ae88e1be6 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -508,49 +508,49 @@ jobs: fail-fast: false matrix: test: - - name: 'Whisper' - # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed - cmd: 'python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} - timeout: 45 - - name: 'Cacheopt E2E (Part 1)' - cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} - timeout: 180 - - name: 'Cacheopt E2E (Part 2)' - cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} - timeout: 360 - - name: 'LLM & VLM' - cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} - timeout: 180 - - name: 'GGUF Reader tests' - cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} - timeout: 360 - - name: 'Tokenizer tests' - cmd: 'python -m pytest -v ./tests/python_tests/test_tokenizer.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} - timeout: 60 - - name: 'API tests' - cmd: 'python -m pytest -v ./tests/python_tests/test_continuous_batching.py ./tests/python_tests/test_generation_config.py ./tests/python_tests/test_sampling.py ./tests/python_tests/test_text_streamer.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} - timeout: 60 + # - name: 'Whisper' + # # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed + # cmd: 'python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} + # timeout: 45 + # - name: 'Cacheopt E2E (Part 1)' + # cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + # timeout: 180 + # - name: 'Cacheopt E2E (Part 2)' + # cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + # timeout: 360 + # - name: 'LLM & VLM' + # cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + # timeout: 180 + # - name: 'GGUF Reader tests' + # cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + # timeout: 360 + # - name: 'Tokenizer tests' + # cmd: 'python -m pytest -v ./tests/python_tests/test_tokenizer.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} + # timeout: 60 + # - name: 'API tests' + # cmd: 'python -m pytest -v ./tests/python_tests/test_continuous_batching.py ./tests/python_tests/test_generation_config.py ./tests/python_tests/test_sampling.py ./tests/python_tests/test_text_streamer.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} + # timeout: 60 - name: 'Rag tests' - cmd: 'python -m pytest -v ./tests/python_tests/test_rag.py' + cmd: 'pip list && python -m pytest -s -v ./tests/python_tests/test_rag.py -k test_qwen3_rerank_documents' run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }} timeout: 30 - - name: 'WWB tests' - cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} - timeout: 120 - - name: 'WWB tests (nanollava)' - cmd: | - python -m pip install transformers==4.48.0 - python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} - timeout: 90 + # - name: 'WWB tests' + # cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + # timeout: 120 + # - name: 'WWB tests (nanollava)' + # cmd: | + # python -m pip install transformers==4.48.0 + # python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + # timeout: 90 defaults: run: shell: bash diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 38fcd6d4e8..3f9e13cad1 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -446,49 +446,49 @@ jobs: fail-fast: false matrix: test: - - name: 'Whisper' - # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed - cmd: 'python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} - timeout: 120 - - name: 'Cacheopt E2E (Part 1)' - cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} - timeout: 180 - - name: 'Cacheopt E2E (Part 2)' - cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} - timeout: 360 - - name: 'LLM & VLM' - cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} - timeout: 180 - - name: 'GGUF Reader tests' - cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} - timeout: 360 - - name: 'Tokenizer tests' - cmd: 'python -m pytest -v ./tests/python_tests/test_tokenizer.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} - timeout: 60 - - name: 'API tests' - cmd: 'python -m pytest -v ./tests/python_tests/test_continuous_batching.py ./tests/python_tests/test_generation_config.py ./tests/python_tests/test_sampling.py ./tests/python_tests/test_text_streamer.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} - timeout: 60 + # - name: 'Whisper' + # # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed + # cmd: 'python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} + # timeout: 120 + # - name: 'Cacheopt E2E (Part 1)' + # cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + # timeout: 180 + # - name: 'Cacheopt E2E (Part 2)' + # cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + # timeout: 360 + # - name: 'LLM & VLM' + # cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + # timeout: 180 + # - name: 'GGUF Reader tests' + # cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + # timeout: 360 + # - name: 'Tokenizer tests' + # cmd: 'python -m pytest -v ./tests/python_tests/test_tokenizer.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} + # timeout: 60 + # - name: 'API tests' + # cmd: 'python -m pytest -v ./tests/python_tests/test_continuous_batching.py ./tests/python_tests/test_generation_config.py ./tests/python_tests/test_sampling.py ./tests/python_tests/test_text_streamer.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} + # timeout: 60 - name: 'Rag tests' - cmd: 'python -m pytest -v ./tests/python_tests/test_rag.py' + cmd: 'python -m pytest -s -v ./tests/python_tests/test_rag.py -k test_qwen3_rerank_documents' run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }} timeout: 30 - - name: 'WWB tests' - cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} - timeout: 120 - - name: 'WWB tests (nanollava)' - cmd: | - python -m pip install transformers==4.48.0 - python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} - timeout: 90 + # - name: 'WWB tests' + # cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + # timeout: 120 + # - name: 'WWB tests (nanollava)' + # cmd: | + # python -m pip install transformers==4.48.0 + # python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + # timeout: 90 defaults: run: shell: bash diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index b0587ec758..d32af33ce6 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -597,49 +597,49 @@ jobs: fail-fast: false matrix: test: - - name: 'Whisper' - # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed - cmd: 'python -m pytest -s -v tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} - timeout: 120 - - name: 'Cacheopt E2E (Part 1)' - cmd: 'python -m pytest -s -v tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} - timeout: 180 - - name: 'Cacheopt E2E (Part 2)' - cmd: 'python -m pytest -s -v tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} - timeout: 360 - - name: 'LLM & VLM' - cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} - timeout: 180 - - name: 'GGUF Reader tests' - cmd: 'python -m pytest -s -v tests/python_tests/test_gguf_reader.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} - timeout: 360 - - name: 'Tokenizer tests' - cmd: 'python -m pytest -s -v tests/python_tests/test_tokenizer.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} - timeout: 60 - - name: 'API tests' - cmd: 'python -m pytest -s -v tests/python_tests/test_continuous_batching.py tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} - timeout: 60 + # - name: 'Whisper' + # # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed + # cmd: 'python -m pytest -s -v tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} + # timeout: 120 + # - name: 'Cacheopt E2E (Part 1)' + # cmd: 'python -m pytest -s -v tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + # timeout: 180 + # - name: 'Cacheopt E2E (Part 2)' + # cmd: 'python -m pytest -s -v tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + # timeout: 360 + # - name: 'LLM & VLM' + # cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + # timeout: 180 + # - name: 'GGUF Reader tests' + # cmd: 'python -m pytest -s -v tests/python_tests/test_gguf_reader.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + # timeout: 360 + # - name: 'Tokenizer tests' + # cmd: 'python -m pytest -s -v tests/python_tests/test_tokenizer.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} + # timeout: 60 + # - name: 'API tests' + # cmd: 'python -m pytest -s -v tests/python_tests/test_continuous_batching.py tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} + # timeout: 60 - name: 'Rag tests' - cmd: 'python -m pytest -s -v tests/python_tests/test_rag.py' + cmd: 'pip list; python -m pytest -s -v tests/python_tests/test_rag.py -k test_qwen3_rerank_documents' run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }} timeout: 30 - - name: 'WWB tests' - cmd: 'python -m pytest -s -v tools/who_what_benchmark/tests -m "not nanollava"' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} - timeout: 120 - - name: 'WWB tests (nanollava)' - cmd: | - python -m pip install transformers==4.48.0 - python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} - timeout: 90 + # - name: 'WWB tests' + # cmd: 'python -m pytest -s -v tools/who_what_benchmark/tests -m "not nanollava"' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + # timeout: 120 + # - name: 'WWB tests (nanollava)' + # cmd: | + # python -m pip install transformers==4.48.0 + # python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + # timeout: 90 defaults: run: shell: pwsh diff --git a/src/cpp/include/openvino/genai/rag/text_rerank_pipeline.hpp b/src/cpp/include/openvino/genai/rag/text_rerank_pipeline.hpp index 751c1af1ed..3f9b6dbf99 100644 --- a/src/cpp/include/openvino/genai/rag/text_rerank_pipeline.hpp +++ b/src/cpp/include/openvino/genai/rag/text_rerank_pipeline.hpp @@ -21,6 +21,11 @@ class OPENVINO_GENAI_EXPORTS TextRerankPipeline { */ std::optional max_length; + /** + * @brief Side to use for padding "left" or "right" + */ + std::optional padding_side; + /** * @brief Constructs text rerank pipeline configuration */ diff --git a/src/cpp/src/debug_utils.hpp b/src/cpp/src/debug_utils.hpp index 80879ee9b3..9403c5f1aa 100644 --- a/src/cpp/src/debug_utils.hpp +++ b/src/cpp/src/debug_utils.hpp @@ -18,12 +18,12 @@ void print_array(T* array, size_t size) { } template -void print_tensor(ov::Tensor tensor) { +void print_tensor(ov::Tensor tensor, bool print_full = false) { const auto shape = tensor.get_shape(); const size_t rank = shape.size(); const auto* data = tensor.data(); - if (rank > 3) { + if (rank != 2) { print_array(data, tensor.get_size()); return; } @@ -37,7 +37,8 @@ void print_tensor(ov::Tensor tensor) { const size_t batch_offset = batch * seq_length; if (rank == 2) { - for (size_t j = 0; j < std::min(seq_length, size_t(10)); ++j) { + size_t max_length = print_full ? seq_length : std::min(seq_length, size_t(10)); + for (size_t j = 0; j < max_length; ++j) { std::cout << data[batch_offset + j] << " "; } std::cout << "]\n"; @@ -60,19 +61,19 @@ void print_tensor(ov::Tensor tensor) { std::cout << " ]" << std::endl; } -inline void print_tensor(std::string name, ov::Tensor tensor) { +inline void print_tensor(std::string name, ov::Tensor tensor, bool print_full = false) { std::cout << name; std::cout << " " << tensor.get_shape().to_string(); if (tensor.get_element_type() == ov::element::i32) { - print_tensor(tensor); + print_tensor(tensor, print_full); } else if (tensor.get_element_type() == ov::element::i64) { - print_tensor(tensor); + print_tensor(tensor, print_full); } else if (tensor.get_element_type() == ov::element::f32) { - print_tensor(tensor); + print_tensor(tensor, print_full); } else if (tensor.get_element_type() == ov::element::boolean) { - print_tensor(tensor); + print_tensor(tensor, print_full); } else if (tensor.get_element_type() == ov::element::f16) { - print_tensor(tensor); + print_tensor(tensor, print_full); } } diff --git a/src/cpp/src/rag/text_rerank_pipeline.cpp b/src/cpp/src/rag/text_rerank_pipeline.cpp index a10e48244e..e52d4e4192 100644 --- a/src/cpp/src/rag/text_rerank_pipeline.cpp +++ b/src/cpp/src/rag/text_rerank_pipeline.cpp @@ -23,6 +23,7 @@ ov::AnyMap remove_config_properties(const ov::AnyMap& properties) { properties_copy.erase(top_n.name()); properties_copy.erase(max_length.name()); + properties_copy.erase(padding_side.name()); return properties_copy; } @@ -138,6 +139,7 @@ using utils::read_anymap_param; TextRerankPipeline::Config::Config(const ov::AnyMap& properties) { read_anymap_param(properties, ov::genai::top_n.name(), top_n); read_anymap_param(properties, ov::genai::max_length.name(), max_length); + read_anymap_param(properties, ov::genai::padding_side.name(), padding_side); }; class TextRerankPipeline::TextRerankPipelineImpl { @@ -154,6 +156,10 @@ class TextRerankPipeline::TextRerankPipelineImpl { m_tokenization_params.insert({max_length.name(), *m_config.max_length}); } + if (m_config.padding_side) { + m_tokenization_params.insert({padding_side.name(), *m_config.padding_side}); + } + // qwen3 tokenizer doesn't support add_second_input(true) m_tokenizer = Tokenizer(models_path, ov::genai::add_second_input(!is_qwen3)); @@ -185,11 +191,20 @@ class TextRerankPipeline::TextRerankPipelineImpl { } void start_rerank_async(const std::string& query, const std::vector& texts) { + std::cout << "Start reranking..." << std::endl; + std::cout << "Query: " << query << std::endl; + for (size_t i = 0; i < texts.size(); i++) { + std::cout << "Text[" << i << "]: " << texts[i] << std::endl; + } + const TokenizedInputs& encoded = tokenize(query, texts); m_request.set_tensor("input_ids", encoded.input_ids); m_request.set_tensor("attention_mask", encoded.attention_mask); + print_tensor("input_ids", encoded.input_ids, true); + print_tensor("attention_mask", encoded.attention_mask, true); + if (encoded.token_type_ids.has_value()) { m_request.set_tensor("token_type_ids", *encoded.token_type_ids); } @@ -220,6 +235,8 @@ class TextRerankPipeline::TextRerankPipelineImpl { auto scores_data = scores_tensor.data(); + print_tensor("scores", scores_tensor); + std::vector> results; results.reserve(batch_size); @@ -267,6 +284,10 @@ class TextRerankPipeline::TextRerankPipelineImpl { for (auto& text : texts) { concatenated.push_back(query + text); } + std::string padding_side_val; + ov::genai::utils::read_anymap_param(m_tokenization_params, padding_side.name(), padding_side_val); + + std::cout << "Requested padding_side: " << (padding_side_val.empty() ? "not set" : padding_side_val) << std::endl; return m_tokenizer.encode(concatenated, m_tokenization_params); } diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 7002f5e24b..bf467089f3 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -3448,7 +3448,10 @@ class TextRerankPipeline: Number of documents to return sorted by score. max_length (int, optional): Maximum length of tokens passed to the embedding model. + padding_side (str, optional): + Side to use for padding "left" or "right" """ + padding_side: str | None @typing.overload def __init__(self) -> None: ... diff --git a/src/python/py_rag.cpp b/src/python/py_rag.cpp index a2556975e5..ec38331713 100644 --- a/src/python/py_rag.cpp +++ b/src/python/py_rag.cpp @@ -53,6 +53,8 @@ Structure to keep TextRerankPipeline configuration parameters. Number of documents to return sorted by score. max_length (int, optional): Maximum length of tokens passed to the embedding model. + padding_side (str, optional): + Side to use for padding "left" or "right" )"; } // namespace @@ -226,7 +228,8 @@ kwargs: Plugin and/or config properties return ov::genai::TextRerankPipeline::Config(pyutils::kwargs_to_any_map(kwargs)); })) .def_readwrite("top_n", &ov::genai::TextRerankPipeline::Config::top_n) - .def_readwrite("max_length", &ov::genai::TextRerankPipeline::Config::max_length); + .def_readwrite("max_length", &ov::genai::TextRerankPipeline::Config::max_length) + .def_readwrite("padding_side", &ov::genai::TextRerankPipeline::Config::padding_side); text_rerank_pipeline.def( py::init([](const std::filesystem::path& models_path, diff --git a/tests/python_tests/test_rag.py b/tests/python_tests/test_rag.py index 3facb41112..489e1933c9 100644 --- a/tests/python_tests/test_rag.py +++ b/tests/python_tests/test_rag.py @@ -233,6 +233,7 @@ def run_qwen3_rerank_optimum( truncation=True, return_tensors="pt", ) + print(inputs["attention_mask"]) logits = model(**inputs).logits # support seq-cls reranker @@ -539,9 +540,13 @@ def test_rerank_documents(download_and_convert_rerank_model, dataset_documents, "config", [ TextRerankPipeline.Config(top_n=4), + TextRerankPipeline.Config(top_n=4, padding_side="left"), + TextRerankPipeline.Config(top_n=4, padding_side="right"), ], ids=[ "top_n=4", + "top_n=4,padding_side=left", + "top_n=4,padding_side=right", ], ) @pytest.mark.precommit @@ -555,6 +560,11 @@ def test_qwen3_seq_cls_rerank_documents(download_and_convert_rerank_model, query opt_result = run_qwen3_rerank_optimum(opt_model, hf_tokenizer, formatted_query, formatted_documents, config) genai_result = run_text_rerank_genai(models_path, formatted_query, formatted_documents, config) + for opt, genai in zip(opt_result, genai_result): + print(f"Optimum: {opt}") + print(f"GenAI: {genai}") + print("-----") + assert_rerank_results(opt_result, genai_result) @@ -576,9 +586,13 @@ def test_qwen3_seq_cls_rerank_documents(download_and_convert_rerank_model, query "config", [ TextRerankPipeline.Config(top_n=4), + TextRerankPipeline.Config(top_n=4, padding_side="left"), + TextRerankPipeline.Config(top_n=4, padding_side="right"), ], ids=[ "top_n=4", + "top_n=4, padding_side=left", + "top_n=4, padding_side=right", ], ) @pytest.mark.precommit @@ -592,4 +606,9 @@ def test_qwen3_rerank_documents(download_and_convert_model_fixture, query, task, opt_result = run_qwen3_rerank_optimum(opt_model, hf_tokenizer, formatted_query, formatted_documents, config) genai_result = run_text_rerank_genai(models_path, formatted_query, formatted_documents, config) + for opt, genai in zip(opt_result, genai_result): + print(f"Optimum: {opt}") + print(f"GenAI: {genai}") + print("-----") + assert_rerank_results(opt_result, genai_result)