From 7b3ad318100f8223cce755b63bd97ef80e5d2e8e Mon Sep 17 00:00:00 2001 From: "xiping.yan" Date: Mon, 3 Nov 2025 14:15:18 +0800 Subject: [PATCH 01/38] Introduce add_extension to genai. Signed-off-by: xiping.yan --- src/cpp/include/openvino/genai/generation_config.hpp | 7 +++++++ src/cpp/src/generation_config.cpp | 4 ++++ src/python/openvino_genai/__init__.py | 3 ++- src/python/openvino_genai/__init__.pyi | 1 + src/python/openvino_genai/py_openvino_genai.pyi | 4 ++++ src/python/py_openvino_genai.cpp | 6 ++++++ 6 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index e592cb36ff..da2fccbfa0 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -706,6 +706,13 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { void validate() const; }; +/** + * @brief Registers an extension to ov::Core object for model with custom op. + * @param library_path Path to the library with ov::Extension. + * @{ + */ +void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path); + /* * utils that allow to use generate and operator() in the following way: * pipe.generate(input_ids, ov::genai::max_new_tokens(200), ov::genai::temperature(1.0f),...) diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp index 44fc616c1e..739e7cf4c2 100644 --- a/src/cpp/src/generation_config.cpp +++ b/src/cpp/src/generation_config.cpp @@ -490,5 +490,9 @@ GenerationConfig multinomial() { return multinomial_config; } +void add_extension(const std::string& library_path) { + utils::singleton_core().add_extension(library_path); +} + } // namespace genai } // namespace ov diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index b10aadd062..997cb5cf19 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -19,7 +19,8 @@ get_version, StreamingStatus, TextStreamer, - TextParserStreamer + TextParserStreamer, + add_extension ) from .py_openvino_genai import ( diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index c1d1f1dc30..fe8fea17dd 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -74,6 +74,7 @@ from openvino_genai.py_openvino_genai import WhisperPipeline from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version +from openvino_genai.py_openvino_genai import add_extension import os as os from . import py_openvino_genai __all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 0444146334..b0b13e0c26 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -4406,3 +4406,7 @@ def get_version() -> str: """ OpenVINO GenAI version """ +def add_extension(str) -> None: + """ + OpenVINO GenAI version + """ \ No newline at end of file diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp index ed010a7581..ae7e85ceb6 100644 --- a/src/python/py_openvino_genai.cpp +++ b/src/python/py_openvino_genai.cpp @@ -13,6 +13,7 @@ #include "openvino/genai/llm_pipeline.hpp" #include "openvino/genai/text_streamer.hpp" #include "openvino/genai/version.hpp" +#include "openvino/genai/generation_config.hpp" #include "py_utils.hpp" @@ -28,6 +29,7 @@ using ov::genai::StreamingStatus; using ov::genai::TextStreamer; using ov::genai::Tokenizer; using ov::genai::get_version; +using ov::genai::add_extension; void init_lora_adapter(py::module_& m); void init_perf_metrics(py::module_& m); @@ -88,6 +90,10 @@ PYBIND11_MODULE(py_openvino_genai, m) { return get_version().buildNumber; }, get_version().description); + m.def("add_extension", [] (py::str library_path) { + return add_extension(library_path); + }); + init_perf_metrics(m); py::class_(m, "DecodedResults", decoded_results_docstring) From 2f603013849b337c07f7cb3b4c6c2b5e999c9a92 Mon Sep 17 00:00:00 2001 From: "xiping.yan" Date: Wed, 5 Nov 2025 12:03:45 +0800 Subject: [PATCH 02/38] add draft test. Signed-off-by: xiping.yan --- tests/python_tests/test_add_extension.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/python_tests/test_add_extension.py diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py new file mode 100644 index 0000000000..6625f5d3ce --- /dev/null +++ b/tests/python_tests/test_add_extension.py @@ -0,0 +1,16 @@ +# Copyright (C) 2023-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest +import openvino_genai as ov_genai + +@pytest.mark.precommit +def test_add_extension(): + print(ov_genai.get_version()) + # I don't know how to get tokenizer path. + tokenizer_path = "" + try: + ov_genai.add_extension(tokenizer_path) + except: + assert(False) \ No newline at end of file From 80efe54e0a29669c003ccbf3edb26b745d2faf81 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 12:56:12 +0800 Subject: [PATCH 03/38] Update src/python/openvino_genai/py_openvino_genai.pyi Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/python/openvino_genai/py_openvino_genai.pyi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index b0b13e0c26..d6e11da73d 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -4408,5 +4408,7 @@ def get_version() -> str: """ def add_extension(str) -> None: """ - OpenVINO GenAI version + Registers an OpenVINO extension from a library path to enable support for models with custom operations. + + :param str: Path to the extension library to register. """ \ No newline at end of file From 4a02c8ef4917d3da2a8b60550c4c6be123dc9aed Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 12:56:51 +0800 Subject: [PATCH 04/38] Update src/python/openvino_genai/__init__.pyi Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/python/openvino_genai/__init__.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index fe8fea17dd..6a2ffd2d2f 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -77,5 +77,5 @@ from openvino_genai.py_openvino_genai import get_version from openvino_genai.py_openvino_genai import add_extension import os as os from . import py_openvino_genai -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'add_extension', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str From 31078ab20e888a9f0e179a761c75c8b905411480 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 12:57:16 +0800 Subject: [PATCH 05/38] Update tests/python_tests/test_add_extension.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_add_extension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py index 6625f5d3ce..addc3f13f5 100644 --- a/tests/python_tests/test_add_extension.py +++ b/tests/python_tests/test_add_extension.py @@ -12,5 +12,5 @@ def test_add_extension(): tokenizer_path = "" try: ov_genai.add_extension(tokenizer_path) - except: + except Exception: assert(False) \ No newline at end of file From 281fe8ceebad212c5c48493b20b70bbb5b101f74 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 12:57:25 +0800 Subject: [PATCH 06/38] Update tests/python_tests/test_add_extension.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_add_extension.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py index addc3f13f5..8b699650bc 100644 --- a/tests/python_tests/test_add_extension.py +++ b/tests/python_tests/test_add_extension.py @@ -8,9 +8,9 @@ @pytest.mark.precommit def test_add_extension(): print(ov_genai.get_version()) - # I don't know how to get tokenizer path. - tokenizer_path = "" + # Path to the OpenVINO extension shared library (update as needed). + extension_path = "" try: - ov_genai.add_extension(tokenizer_path) + ov_genai.add_extension(extension_path) except Exception: assert(False) \ No newline at end of file From e449070a0a81b94ad845ff3f4d01cbbc668f4a86 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 13:06:49 +0800 Subject: [PATCH 07/38] Update src/python/openvino_genai/py_openvino_genai.pyi Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/python/openvino_genai/py_openvino_genai.pyi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 279979bed2..ef054740ca 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -4406,9 +4406,9 @@ def get_version() -> str: """ OpenVINO GenAI version """ -def add_extension(str) -> None: +def add_extension(library_path: str) -> None: """ Registers an OpenVINO extension from a library path to enable support for models with custom operations. - :param str: Path to the extension library to register. + :param library_path: Path to the extension library to register. """ \ No newline at end of file From 4f60a60c7bbb8e6e93e40d89e0ca0c3b82c057ac Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 13:07:16 +0800 Subject: [PATCH 08/38] Update src/cpp/include/openvino/genai/generation_config.hpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/cpp/include/openvino/genai/generation_config.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index da2fccbfa0..5bb7ce587a 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -712,6 +712,7 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { * @{ */ void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path); +/** @} */ /* * utils that allow to use generate and operator() in the following way: From 698e5ec74058038cdfa4087b538e159dc9f4f385 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 13:09:22 +0800 Subject: [PATCH 09/38] Update generation_config.hpp --- src/cpp/include/openvino/genai/generation_config.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index 5bb7ce587a..20ae3937dd 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -709,10 +709,9 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { /** * @brief Registers an extension to ov::Core object for model with custom op. * @param library_path Path to the library with ov::Extension. - * @{ */ void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path); -/** @} */ + /* * utils that allow to use generate and operator() in the following way: From 8c56ca83ca07a01540ba037acf4a79dbd2af4e57 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 13:11:17 +0800 Subject: [PATCH 10/38] Update tests/python_tests/test_add_extension.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_add_extension.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py index 8b699650bc..76193627b0 100644 --- a/tests/python_tests/test_add_extension.py +++ b/tests/python_tests/test_add_extension.py @@ -10,7 +10,5 @@ def test_add_extension(): print(ov_genai.get_version()) # Path to the OpenVINO extension shared library (update as needed). extension_path = "" - try: - ov_genai.add_extension(extension_path) - except Exception: - assert(False) \ No newline at end of file + with pytest.raises(Exception): + ov_genai.add_extension(extension_path) \ No newline at end of file From c1e0a7c5a6d0a75b2adb7db494f58e4e823eea4a Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 13:19:40 +0800 Subject: [PATCH 11/38] Update src/python/py_openvino_genai.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/python/py_openvino_genai.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp index ae7e85ceb6..ebc4ee2cc1 100644 --- a/src/python/py_openvino_genai.cpp +++ b/src/python/py_openvino_genai.cpp @@ -92,8 +92,21 @@ PYBIND11_MODULE(py_openvino_genai, m) { m.def("add_extension", [] (py::str library_path) { return add_extension(library_path); - }); - + }, + R"doc( + Adds a custom extension library to the OpenVINO GenAI backend. + + Parameters + ---------- + library_path : str + Path to the shared library containing the extension. + + Raises + ------ + RuntimeError + If the extension cannot be loaded. + )doc" + ); init_perf_metrics(m); py::class_(m, "DecodedResults", decoded_results_docstring) From 57bd6a155bc5edb9b8a35b82a80fa1ffe2890712 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 13:30:47 +0800 Subject: [PATCH 12/38] Update tests/python_tests/test_add_extension.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_add_extension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py index 76193627b0..9cad8af3d2 100644 --- a/tests/python_tests/test_add_extension.py +++ b/tests/python_tests/test_add_extension.py @@ -10,5 +10,5 @@ def test_add_extension(): print(ov_genai.get_version()) # Path to the OpenVINO extension shared library (update as needed). extension_path = "" - with pytest.raises(Exception): + with pytest.raises(RuntimeError): ov_genai.add_extension(extension_path) \ No newline at end of file From a4ef9c804832978379d685446b2c74a46744e6b3 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Wed, 5 Nov 2025 13:36:24 +0800 Subject: [PATCH 13/38] Update tests/python_tests/test_add_extension.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_add_extension.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py index 9cad8af3d2..352f62e29b 100644 --- a/tests/python_tests/test_add_extension.py +++ b/tests/python_tests/test_add_extension.py @@ -7,7 +7,6 @@ @pytest.mark.precommit def test_add_extension(): - print(ov_genai.get_version()) # Path to the OpenVINO extension shared library (update as needed). extension_path = "" with pytest.raises(RuntimeError): From 7260493950961430e4b81fc68da986d615a2c6e9 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Mon, 10 Nov 2025 13:15:32 +0800 Subject: [PATCH 14/38] Update src/cpp/include/openvino/genai/generation_config.hpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/cpp/include/openvino/genai/generation_config.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index 20ae3937dd..c9853afe86 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -707,7 +707,7 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { }; /** - * @brief Registers an extension to ov::Core object for model with custom op. + * @brief Registers an extension to ov::Core object for models with custom ops. * @param library_path Path to the library with ov::Extension. */ void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path); From 689b08c9b9599ef9ce22e8ba648088ec103719ab Mon Sep 17 00:00:00 2001 From: xipingya Date: Mon, 10 Nov 2025 15:57:25 +0800 Subject: [PATCH 15/38] Get tokenizer so path. Signed-off-by: xipingya --- tests/python_tests/test_add_extension.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py index 352f62e29b..8478cf651c 100644 --- a/tests/python_tests/test_add_extension.py +++ b/tests/python_tests/test_add_extension.py @@ -4,10 +4,23 @@ import pytest import openvino_genai as ov_genai +import platform +import os +import openvino_tokenizers; @pytest.mark.precommit def test_add_extension(): # Path to the OpenVINO extension shared library (update as needed). - extension_path = "" - with pytest.raises(RuntimeError): - ov_genai.add_extension(extension_path) \ No newline at end of file + os_name = platform.system() + if os_name == "Windows": + ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "\\lib\\openvino_tokenizers.dll" + elif os_name == "Linux": + ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "/lib/libopenvino_tokenizers.so" + else: + print(f"Skipped. Current test only support Windows and Linux") + return + + try: + ov_genai.add_extension(ov_tokenizer_path) + except RuntimeError as e: + raise RuntimeError(f"Add extension fail, maybe tokenizers' version mismatch. Original error: {e}") \ No newline at end of file From 072b0b0ca822e7e97a9adfd82224e9a7c102774f Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Mon, 10 Nov 2025 16:16:44 +0800 Subject: [PATCH 16/38] Update tests/python_tests/test_add_extension.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_add_extension.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py index 8478cf651c..636ed09943 100644 --- a/tests/python_tests/test_add_extension.py +++ b/tests/python_tests/test_add_extension.py @@ -13,9 +13,9 @@ def test_add_extension(): # Path to the OpenVINO extension shared library (update as needed). os_name = platform.system() if os_name == "Windows": - ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "\\lib\\openvino_tokenizers.dll" + ov_tokenizer_path = os.path.join(os.path.dirname(openvino_tokenizers.__file__), "lib", "openvino_tokenizers.dll") elif os_name == "Linux": - ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "/lib/libopenvino_tokenizers.so" + ov_tokenizer_path = os.path.join(os.path.dirname(openvino_tokenizers.__file__), "lib", "libopenvino_tokenizers.so") else: print(f"Skipped. Current test only support Windows and Linux") return From c05b09203f67f2132a33030842f4128a653e4c28 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Mon, 10 Nov 2025 16:17:00 +0800 Subject: [PATCH 17/38] Update tests/python_tests/test_add_extension.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_add_extension.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py index 636ed09943..955dd1dd6a 100644 --- a/tests/python_tests/test_add_extension.py +++ b/tests/python_tests/test_add_extension.py @@ -17,8 +17,7 @@ def test_add_extension(): elif os_name == "Linux": ov_tokenizer_path = os.path.join(os.path.dirname(openvino_tokenizers.__file__), "lib", "libopenvino_tokenizers.so") else: - print(f"Skipped. Current test only support Windows and Linux") - return + pytest.skip("Skipped. Current test only supports Windows and Linux") try: ov_genai.add_extension(ov_tokenizer_path) From 7e1ac0f58ac781da18aa3bb128b254053bd6197a Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Mon, 10 Nov 2025 16:43:23 +0800 Subject: [PATCH 18/38] Update tests/python_tests/test_add_extension.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_add_extension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py index 955dd1dd6a..d27e04b224 100644 --- a/tests/python_tests/test_add_extension.py +++ b/tests/python_tests/test_add_extension.py @@ -6,7 +6,7 @@ import openvino_genai as ov_genai import platform import os -import openvino_tokenizers; +import openvino_tokenizers @pytest.mark.precommit def test_add_extension(): From ba86cc40c01f12ef480ec498987d9c6b69120d16 Mon Sep 17 00:00:00 2001 From: xipingya Date: Tue, 11 Nov 2025 13:18:16 +0800 Subject: [PATCH 19/38] enable add_extension to vlm pipleline with properties. Signed-off-by: xipingya --- .../openvino/genai/generation_config.hpp | 6 ----- src/cpp/src/generation_config.cpp | 4 --- src/cpp/src/utils.cpp | 16 ++++++++++++ src/cpp/src/utils.hpp | 2 ++ src/cpp/src/visual_language/pipeline.cpp | 15 ++++++++--- src/python/openvino_genai/__init__.py | 3 +-- src/python/openvino_genai/__init__.pyi | 3 +-- .../openvino_genai/py_openvino_genai.pyi | 6 ----- src/python/py_openvino_genai.cpp | 18 ------------- tests/python_tests/test_add_extension.py | 26 ------------------- tests/python_tests/test_vlm_pipeline.py | 13 ++++++++++ 11 files changed, 45 insertions(+), 67 deletions(-) delete mode 100644 tests/python_tests/test_add_extension.py diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index c9853afe86..dbc6060c36 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -706,12 +706,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { void validate() const; }; -/** - * @brief Registers an extension to ov::Core object for models with custom ops. - * @param library_path Path to the library with ov::Extension. - */ -void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path); - /* * utils that allow to use generate and operator() in the following way: diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp index 739e7cf4c2..44fc616c1e 100644 --- a/src/cpp/src/generation_config.cpp +++ b/src/cpp/src/generation_config.cpp @@ -490,9 +490,5 @@ GenerationConfig multinomial() { return multinomial_config; } -void add_extension(const std::string& library_path) { - utils::singleton_core().add_extension(library_path); -} - } // namespace genai } // namespace ov diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index 8948832f26..8b6fce7640 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -718,6 +718,22 @@ std::pair extract_attention_backend(const ov::AnyMap& e return {properties, attention_backend}; }; +std::pair> extract_extensions(const ov::AnyMap& external_properties) { + std::vector extensions; + ov::AnyMap properties = external_properties; + + auto it = properties.find("EXTENSIONS"); + if (it != properties.end()) { + extensions = it->second.as>(); + for (auto ext : extensions) { + std::cout << "ext = " << ext.c_str() << std::endl; + } + properties.erase(it); + } + + return {properties, extensions}; +} + void release_core_plugin(const std::string& device) { try { singleton_core().unload_plugin(device); diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 14106ef8f7..edc4adde21 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -286,6 +286,8 @@ bool explicitly_requires_paged_attention(const ov::AnyMap& properties, bool is_n std::pair extract_attention_backend(const ov::AnyMap& external_properties, bool is_npu_requested = false); +std::pair> extract_extensions(const ov::AnyMap& external_properties); + void save_openvino_model(const std::shared_ptr& model, const std::string& save_path, bool compress_to_fp16); ov::Tensor merge_text_and_image_embeddings_llava(const ov::Tensor& input_ids, ov::Tensor& text_embeds, const std::vector& image_embeds, int64_t image_token_id); diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index f6f8b07f7b..df4ec16eb7 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -80,9 +80,12 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ } { m_is_npu = device.find("NPU") != std::string::npos; - auto properties_copy = properties; auto language_model_path = models_dir / "openvino_language_model.xml"; - auto language_model = utils::singleton_core().read_model(language_model_path, {}, properties_copy); + auto [properties_copy, extensions] = utils::extract_extensions(properties); + for (auto extension : extensions) { + utils::singleton_core().add_extension(extension); + } + auto language_model = utils::singleton_core().read_model(language_model_path, {}, properties_copy); auto kv_pos = ov::genai::utils::get_kv_axes_pos(language_model); // In case user provided properties per-device @@ -157,8 +160,14 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ m_embedding = m_inputs_embedder->get_embedding_model(); auto m_language_pair = utils::get_model_weights_pair(models_map, "language"); + + auto [properties_copy, extensions] = utils::extract_extensions(properties); + for (auto extension : extensions) { + utils::singleton_core().add_extension(extension); + } + m_language = utils::singleton_core().compile_model( - m_language_pair.first, m_language_pair.second, device, properties + m_language_pair.first, m_language_pair.second, device, properties_copy ).create_infer_request(); m_language.get_tensor("attention_mask").set_shape({1, 0}); diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index 997cb5cf19..b10aadd062 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -19,8 +19,7 @@ get_version, StreamingStatus, TextStreamer, - TextParserStreamer, - add_extension + TextParserStreamer ) from .py_openvino_genai import ( diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 6a2ffd2d2f..c1d1f1dc30 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -74,8 +74,7 @@ from openvino_genai.py_openvino_genai import WhisperPipeline from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version -from openvino_genai.py_openvino_genai import add_extension import os as os from . import py_openvino_genai -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'add_extension', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index ef054740ca..bec0eacdbf 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -4405,10 +4405,4 @@ def draft_model(models_path: os.PathLike | str | bytes, device: str = '', **kwar def get_version() -> str: """ OpenVINO GenAI version - """ -def add_extension(library_path: str) -> None: - """ - Registers an OpenVINO extension from a library path to enable support for models with custom operations. - - :param library_path: Path to the extension library to register. """ \ No newline at end of file diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp index ebc4ee2cc1..92ab5d15e1 100644 --- a/src/python/py_openvino_genai.cpp +++ b/src/python/py_openvino_genai.cpp @@ -29,7 +29,6 @@ using ov::genai::StreamingStatus; using ov::genai::TextStreamer; using ov::genai::Tokenizer; using ov::genai::get_version; -using ov::genai::add_extension; void init_lora_adapter(py::module_& m); void init_perf_metrics(py::module_& m); @@ -90,23 +89,6 @@ PYBIND11_MODULE(py_openvino_genai, m) { return get_version().buildNumber; }, get_version().description); - m.def("add_extension", [] (py::str library_path) { - return add_extension(library_path); - }, - R"doc( - Adds a custom extension library to the OpenVINO GenAI backend. - - Parameters - ---------- - library_path : str - Path to the shared library containing the extension. - - Raises - ------ - RuntimeError - If the extension cannot be loaded. - )doc" - ); init_perf_metrics(m); py::class_(m, "DecodedResults", decoded_results_docstring) diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py deleted file mode 100644 index 8478cf651c..0000000000 --- a/tests/python_tests/test_add_extension.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2023-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -import pytest -import openvino_genai as ov_genai -import platform -import os -import openvino_tokenizers; - -@pytest.mark.precommit -def test_add_extension(): - # Path to the OpenVINO extension shared library (update as needed). - os_name = platform.system() - if os_name == "Windows": - ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "\\lib\\openvino_tokenizers.dll" - elif os_name == "Linux": - ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "/lib/libopenvino_tokenizers.so" - else: - print(f"Skipped. Current test only support Windows and Linux") - return - - try: - ov_genai.add_extension(ov_tokenizer_path) - except RuntimeError as e: - raise RuntimeError(f"Add extension fail, maybe tokenizers' version mismatch. Original error: {e}") \ No newline at end of file diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py index 973574ba64..211d076eab 100644 --- a/tests/python_tests/test_vlm_pipeline.py +++ b/tests/python_tests/test_vlm_pipeline.py @@ -1516,3 +1516,16 @@ def get_nanollava_processor(): genai_text = genai_output.texts[0] assert optimum_text == genai_text + +@pytest.mark.precommit +def test_vlm_pipeline_add_extension(): + model_id = MODEL_IDS[6] + models_path = _get_ov_model(model_id) + + properties = {} + properties["EXTENSIONS"] = ["fake_path"] + + try: + pipe = VLMPipeline(models_path, "CPU", properties) + except RuntimeError as e: + assert("Cannot find entry point to the extension library" in str(e)) \ No newline at end of file From 323e4c2a028728e8e1058b76f299bf462c027af3 Mon Sep 17 00:00:00 2001 From: xipingya Date: Tue, 11 Nov 2025 13:27:51 +0800 Subject: [PATCH 20/38] remove unecessary file --- src/python/py_openvino_genai.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp index 92ab5d15e1..ed010a7581 100644 --- a/src/python/py_openvino_genai.cpp +++ b/src/python/py_openvino_genai.cpp @@ -13,7 +13,6 @@ #include "openvino/genai/llm_pipeline.hpp" #include "openvino/genai/text_streamer.hpp" #include "openvino/genai/version.hpp" -#include "openvino/genai/generation_config.hpp" #include "py_utils.hpp" From 82b65bc33d473c716ea2c9e8b1a6a8e57ecb1af6 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Tue, 11 Nov 2025 13:46:43 +0800 Subject: [PATCH 21/38] Update src/cpp/src/utils.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/cpp/src/utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index 8b6fce7640..c5f87e0e12 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -725,7 +725,7 @@ std::pair> extract_extensions(const ov::Any auto it = properties.find("EXTENSIONS"); if (it != properties.end()) { extensions = it->second.as>(); - for (auto ext : extensions) { + for (const auto& ext : extensions) { std::cout << "ext = " << ext.c_str() << std::endl; } properties.erase(it); From 3b6391d4c56eb79eba29b0da85fb6fdcd9a2a929 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Tue, 11 Nov 2025 13:47:12 +0800 Subject: [PATCH 22/38] Update src/cpp/src/visual_language/pipeline.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/cpp/src/visual_language/pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index 7bbcdf0823..460f592eb8 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -82,7 +82,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ auto language_model_path = models_dir / "openvino_language_model.xml"; auto [properties_copy, extensions] = utils::extract_extensions(properties); - for (auto extension : extensions) { + for (const auto& extension : extensions) { utils::singleton_core().add_extension(extension); } auto language_model = utils::singleton_core().read_model(language_model_path, {}, properties_copy); From 0542ae33fa819eb04105beb80655be7398b58836 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Tue, 11 Nov 2025 13:48:06 +0800 Subject: [PATCH 23/38] Update tests/python_tests/test_vlm_pipeline.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_vlm_pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py index a9ac42ee6f..27a5526e80 100644 --- a/tests/python_tests/test_vlm_pipeline.py +++ b/tests/python_tests/test_vlm_pipeline.py @@ -1541,7 +1541,6 @@ def test_vlm_pipeline_add_extension(): properties = {} properties["EXTENSIONS"] = ["fake_path"] - try: + with pytest.raises(RuntimeError) as exc_info: pipe = VLMPipeline(models_path, "CPU", properties) - except RuntimeError as e: - assert("Cannot find entry point to the extension library" in str(e)) \ No newline at end of file + assert "Cannot find entry point to the extension library" in str(exc_info.value) \ No newline at end of file From 02d531083166cd4b8031de4383d2f4e4eebbc2b6 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Tue, 11 Nov 2025 13:48:32 +0800 Subject: [PATCH 24/38] Update tests/python_tests/test_vlm_pipeline.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/python_tests/test_vlm_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py index 27a5526e80..7656145537 100644 --- a/tests/python_tests/test_vlm_pipeline.py +++ b/tests/python_tests/test_vlm_pipeline.py @@ -1542,5 +1542,5 @@ def test_vlm_pipeline_add_extension(): properties["EXTENSIONS"] = ["fake_path"] with pytest.raises(RuntimeError) as exc_info: - pipe = VLMPipeline(models_path, "CPU", properties) + VLMPipeline(models_path, "CPU", properties) assert "Cannot find entry point to the extension library" in str(exc_info.value) \ No newline at end of file From 97a88fc88047ba7c0f9ba3ec06668a195ab5a7c2 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Tue, 11 Nov 2025 13:49:04 +0800 Subject: [PATCH 25/38] Update src/cpp/src/visual_language/pipeline.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/cpp/src/visual_language/pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index 460f592eb8..452c8bd3e6 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -162,7 +162,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ auto m_language_pair = utils::get_model_weights_pair(models_map, "language"); auto [properties_copy, extensions] = utils::extract_extensions(properties); - for (auto extension : extensions) { + for (const auto& extension : extensions) { utils::singleton_core().add_extension(extension); } From 200fb05bea6b44a105db91892a9130a4b1e46be8 Mon Sep 17 00:00:00 2001 From: xipingya Date: Tue, 11 Nov 2025 14:02:02 +0800 Subject: [PATCH 26/38] Remove print. --- src/cpp/src/utils.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index c5f87e0e12..ef81e6cccd 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -725,9 +725,6 @@ std::pair> extract_extensions(const ov::Any auto it = properties.find("EXTENSIONS"); if (it != properties.end()) { extensions = it->second.as>(); - for (const auto& ext : extensions) { - std::cout << "ext = " << ext.c_str() << std::endl; - } properties.erase(it); } From f49ebb93f832b7f962b1b589ffc2829af715e572 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Tue, 11 Nov 2025 14:02:32 +0800 Subject: [PATCH 27/38] Update src/cpp/include/openvino/genai/generation_config.hpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/cpp/include/openvino/genai/generation_config.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index dbc6060c36..e592cb36ff 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -706,7 +706,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { void validate() const; }; - /* * utils that allow to use generate and operator() in the following way: * pipe.generate(input_ids, ov::genai::max_new_tokens(200), ov::genai::temperature(1.0f),...) From 34499ec3953470d56d1e85155ce91a0bc8cb500b Mon Sep 17 00:00:00 2001 From: xipingya Date: Tue, 11 Nov 2025 16:39:19 +0800 Subject: [PATCH 28/38] enable llm piple , lots of interleave call, about ContinuousBatchingPipeline, it is very complex, just write to ContinuousBatchingPipeline constructor. Signed-off-by: xipingya --- src/cpp/src/continuous_batching/pipeline.cpp | 20 ++++++++++++++++++++ src/cpp/src/llm/pipeline.cpp | 5 +++++ src/cpp/src/llm/pipeline_stateful.cpp | 6 +++++- src/cpp/src/utils.cpp | 6 ++---- src/cpp/src/utils.hpp | 2 +- src/cpp/src/visual_language/pipeline.cpp | 5 +++-- 6 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp index 16eb169de7..37bb2e28c0 100644 --- a/src/cpp/src/continuous_batching/pipeline.cpp +++ b/src/cpp/src/continuous_batching/pipeline.cpp @@ -46,6 +46,11 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model); auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model); + auto extensions = utils::extract_extensions(properties); + for (const auto& extension : extensions) { + utils::singleton_core().add_extension(extension); + } + auto model = utils::read_model(models_path, properties); auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model); properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; @@ -88,6 +93,11 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model); auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model); + auto extensions = utils::extract_extensions(properties); + for (const auto& extension : extensions) { + utils::singleton_core().add_extension(extension); + } + auto model = utils::read_model(models_path, properties_without_draft_model); auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model); properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; @@ -133,6 +143,11 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model); auto model = utils::singleton_core().read_model(model_str, weights_tensor); + auto extensions = utils::extract_extensions(properties); + for (const auto& extension : extensions) { + utils::singleton_core().add_extension(extension); + } + auto rt_info = model->get_rt_info(); std::shared_ptr embedder = nullptr; std::filesystem::path directory; @@ -179,6 +194,11 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( auto model_pair = utils::get_model_weights_pair(models_map, "language"); auto model = utils::singleton_core().read_model(model_pair.first, model_pair.second); + auto extensions = utils::extract_extensions(properties); + for (const auto& extension : extensions) { + utils::singleton_core().add_extension(extension); + } + auto rt_info = model->get_rt_info(); std::filesystem::path directory; std::shared_ptr embedder = nullptr; diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index e057d5da72..cbdda03700 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -260,6 +260,11 @@ ov::genai::LLMPipeline::LLMPipeline( bool is_npu_requested = ov::genai::utils::is_npu_requested(device, user_properties); auto [properties, attention_backend] = utils::extract_attention_backend(user_properties, is_npu_requested); + auto extensions = utils::extract_extensions(properties); + for (const auto& extension : extensions) { + utils::singleton_core().add_extension(extension); + } + if (is_npu_requested) { m_pimpl = StatefulPipeline::create( utils::singleton_core().read_model(model_str, weights_tensor), diff --git a/src/cpp/src/llm/pipeline_stateful.cpp b/src/cpp/src/llm/pipeline_stateful.cpp index 81f91d7054..52cd0bf305 100644 --- a/src/cpp/src/llm/pipeline_stateful.cpp +++ b/src/cpp/src/llm/pipeline_stateful.cpp @@ -78,7 +78,11 @@ StatefulLLMPipeline::StatefulLLMPipeline( m_max_prompt_len = kv_desc.max_prompt_len; m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len; } else { - compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties); + auto extensions = utils::extract_extensions(properties); + for (const auto& extension : extensions) { + utils::singleton_core().add_extension(extension); + } + compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties); } m_model_runner = compiled_model.create_infer_request(); ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model"); diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index ef81e6cccd..b2f2e41dd0 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -718,17 +718,15 @@ std::pair extract_attention_backend(const ov::AnyMap& e return {properties, attention_backend}; }; -std::pair> extract_extensions(const ov::AnyMap& external_properties) { +std::vector extract_extensions(const ov::AnyMap& properties) { std::vector extensions; - ov::AnyMap properties = external_properties; auto it = properties.find("EXTENSIONS"); if (it != properties.end()) { extensions = it->second.as>(); - properties.erase(it); } - return {properties, extensions}; + return extensions; } void release_core_plugin(const std::string& device) { diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index edc4adde21..1e614d9836 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -286,7 +286,7 @@ bool explicitly_requires_paged_attention(const ov::AnyMap& properties, bool is_n std::pair extract_attention_backend(const ov::AnyMap& external_properties, bool is_npu_requested = false); -std::pair> extract_extensions(const ov::AnyMap& external_properties); +std::vector extract_extensions(const ov::AnyMap& properties); void save_openvino_model(const std::shared_ptr& model, const std::string& save_path, bool compress_to_fp16); diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index 452c8bd3e6..89c0640e3a 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -80,8 +80,9 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ } { m_is_npu = device.find("NPU") != std::string::npos; + auto properties_copy = properties; auto language_model_path = models_dir / "openvino_language_model.xml"; - auto [properties_copy, extensions] = utils::extract_extensions(properties); + auto extensions = utils::extract_extensions(properties_copy); for (const auto& extension : extensions) { utils::singleton_core().add_extension(extension); } @@ -161,7 +162,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ auto m_language_pair = utils::get_model_weights_pair(models_map, "language"); - auto [properties_copy, extensions] = utils::extract_extensions(properties); + auto extensions = utils::extract_extensions(properties); for (const auto& extension : extensions) { utils::singleton_core().add_extension(extension); } From a92fe1f03ba92be3b8f669ed44e139a68faf635e Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Thu, 13 Nov 2025 16:52:17 +0800 Subject: [PATCH 29/38] add test case of cb and llm --- .../src/visual_language/embedding_model.cpp | 8 ++++++ src/cpp/src/visual_language/pipeline.cpp | 2 +- .../python_tests/test_continuous_batching.py | 14 ++++++++++ tests/python_tests/test_llm_pipeline.py | 11 ++++++++ tests/python_tests/test_vlm_pipeline.py | 27 ++++++++++--------- 5 files changed, 48 insertions(+), 14 deletions(-) diff --git a/src/cpp/src/visual_language/embedding_model.cpp b/src/cpp/src/visual_language/embedding_model.cpp index db436dac07..fcee8a66c2 100644 --- a/src/cpp/src/visual_language/embedding_model.cpp +++ b/src/cpp/src/visual_language/embedding_model.cpp @@ -45,6 +45,10 @@ EmbeddingsModel::EmbeddingsModel(const std::filesystem::path& model_dir, const std::string& device, const ov::AnyMap& properties) { ov::Core core = utils::singleton_core(); + auto extensions = utils::extract_extensions(properties); + for (const auto& extension : extensions) { + core.add_extension(extension); + } std::shared_ptr m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties); // apply embedding postprocessing step by merging them into the model merge_postprocess(m_model, scale_emb); @@ -60,6 +64,10 @@ EmbeddingsModel::EmbeddingsModel(const std::string& model, const std::string& device, const ov::AnyMap& properties) { ov::Core core = utils::singleton_core(); + auto extensions = utils::extract_extensions(properties); + for (const auto& extension : extensions) { + core.add_extension(extension); + } std::shared_ptr m_model = core.read_model(model, weights); // apply embedding postprocessing step by merging them into the model merge_postprocess(m_model, scale_emb); diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index 89c0640e3a..f52d4df161 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -168,7 +168,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ } m_language = utils::singleton_core().compile_model( - m_language_pair.first, m_language_pair.second, device, properties_copy + m_language_pair.first, m_language_pair.second, device, properties ).create_infer_request(); m_language.get_tensor("attention_mask").set_shape({1, 0}); diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py index 90ea7ac316..4b4d22c2fd 100644 --- a/tests/python_tests/test_continuous_batching.py +++ b/tests/python_tests/test_continuous_batching.py @@ -542,3 +542,17 @@ def test_speculative_decoding_extended_perf_metrics(pipeline_type): assert std_gen_duration == 0 else: assert extended_perf_metrics is None + + +@pytest.mark.precommit +def test_continuous_batching_add_extension(): + model_id = 'katuni4ka/tiny-random-phi3' + _, _, models_path = download_and_convert_model(model_id) + + scheduler_config = SchedulerConfig() + + properties = {"EXTENSIONS": ["fake_path"]} + + with pytest.raises(RuntimeError) as exc_info: + ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", properties) + assert "Cannot find entry point to the extension library" in str(exc_info.value) diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py index 36b4688bcf..698e45bde7 100644 --- a/tests/python_tests/test_llm_pipeline.py +++ b/tests/python_tests/test_llm_pipeline.py @@ -832,3 +832,14 @@ def py_streamer(py_str: str): else: assert it_cnt > 0 + +@pytest.mark.precommit +def test_llm_pipeline_add_extension(): + model_id = "katuni4ka/tiny-random-phi3" + _, _, models_path = download_and_convert_model(model_id) + + properties = {"EXTENSIONS": ["fake_path"]} + + with pytest.raises(RuntimeError) as exc_info: + ov_genai.LLMPipeline(models_path, "CPU", **properties) + assert "Cannot find entry point to the extension library" in str(exc_info.value) diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py index 7656145537..62d72cdec7 100644 --- a/tests/python_tests/test_vlm_pipeline.py +++ b/tests/python_tests/test_vlm_pipeline.py @@ -304,12 +304,14 @@ def ov_pipe_model(request: pytest.FixtureRequest) -> VlmModelInfo: ids=lambda p: f"{p[0]}/{p[1]}", indirect=["ov_pipe_model"], ) - + + @pytest.fixture(scope="module") def ov_continious_batching_pipe() -> ContinuousBatchingPipeline: models_path = _get_ov_model(MODEL_IDS[0]) return ContinuousBatchingPipeline(models_path, SchedulerConfig(), "CPU") - + + @pytest.fixture(scope="module") def ov_continious_batching_pipe_gemma() -> ContinuousBatchingPipeline: models_path = _get_ov_model(MODEL_IDS[8]) @@ -400,7 +402,7 @@ def cat_tensor(cat_image) -> openvino.Tensor: def car_tensor(pytestconfig: pytest.Config) -> openvino.Tensor: return openvino.Tensor(from_cache_or_download(pytestconfig, TEST_IMAGE_URLS['car'], "car.jpg")) - + @pytest.fixture(scope="module") def synthetic_video_32x32_tensor(synthetic_video_32x32): return openvino.Tensor(synthetic_video_32x32) @@ -1370,8 +1372,8 @@ def test_model_tags_older(ov_pipe_model: VlmModelInfo, car_tensor: openvino.Tens with pytest.raises(RuntimeError): ov_pipe.generate("", images=[car_tensor]) ov_pipe.finish_chat() - - + + @pytest.mark.precommit @model_and_tag_parametrize() def test_model_tags_missing_universal(ov_pipe_model: VlmModelInfo): @@ -1379,8 +1381,8 @@ def test_model_tags_missing_universal(ov_pipe_model: VlmModelInfo): with pytest.raises(RuntimeError): ov_pipe.generate("") - - + + @pytest.mark.precommit @model_and_tag_parametrize() def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo): @@ -1389,7 +1391,7 @@ def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo): with pytest.raises(RuntimeError): ov_pipe.generate(image_tag(0)) - + @pytest.mark.precommit @pytest.mark.parametrize( @@ -1535,12 +1537,11 @@ def get_nanollava_processor(): @pytest.mark.precommit def test_vlm_pipeline_add_extension(): - model_id = MODEL_IDS[6] + model_id = VIDEO_MODEL_IDS[1] models_path = _get_ov_model(model_id) - properties = {} - properties["EXTENSIONS"] = ["fake_path"] + properties = {"EXTENSIONS": ["fake_path"]} with pytest.raises(RuntimeError) as exc_info: - VLMPipeline(models_path, "CPU", properties) - assert "Cannot find entry point to the extension library" in str(exc_info.value) \ No newline at end of file + VLMPipeline(models_path, "CPU", config=properties) + assert "Cannot find entry point to the extension library" in str(exc_info.value) From 75900d046043ed7d7001c50ff1d749395a2c0bf4 Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Fri, 14 Nov 2025 14:23:05 +0800 Subject: [PATCH 30/38] change extract_extensions to add_extensions_to_core --- src/cpp/src/continuous_batching/pipeline.cpp | 26 +++++-------------- src/cpp/src/llm/pipeline.cpp | 6 +---- src/cpp/src/llm/pipeline_stateful.cpp | 8 +++--- src/cpp/src/utils.cpp | 14 +++++----- src/cpp/src/utils.hpp | 3 ++- .../src/visual_language/embedding_model.cpp | 18 +++++-------- src/cpp/src/visual_language/pipeline.cpp | 14 +++------- .../src/visual_language/qwen2vl/classes.cpp | 16 +++++++++--- 8 files changed, 42 insertions(+), 63 deletions(-) diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp index 37bb2e28c0..24f4f4f1ef 100644 --- a/src/cpp/src/continuous_batching/pipeline.cpp +++ b/src/cpp/src/continuous_batching/pipeline.cpp @@ -46,11 +46,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model); auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model); - auto extensions = utils::extract_extensions(properties); - for (const auto& extension : extensions) { - utils::singleton_core().add_extension(extension); - } - + utils::add_extensions_to_core(properties_without_draft_model); auto model = utils::read_model(models_path, properties); auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model); properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; @@ -93,11 +89,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model); auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model); - auto extensions = utils::extract_extensions(properties); - for (const auto& extension : extensions) { - utils::singleton_core().add_extension(extension); - } - + utils::add_extensions_to_core(properties_without_draft_model); auto model = utils::read_model(models_path, properties_without_draft_model); auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model); properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; @@ -141,12 +133,9 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( auto properties_without_draft_model = properties; auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model); auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model); - auto model = utils::singleton_core().read_model(model_str, weights_tensor); - auto extensions = utils::extract_extensions(properties); - for (const auto& extension : extensions) { - utils::singleton_core().add_extension(extension); - } + utils::add_extensions_to_core(properties_without_draft_model); + auto model = utils::singleton_core().read_model(model_str, weights_tensor); auto rt_info = model->get_rt_info(); std::shared_ptr embedder = nullptr; @@ -192,12 +181,9 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model); auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model); auto model_pair = utils::get_model_weights_pair(models_map, "language"); - auto model = utils::singleton_core().read_model(model_pair.first, model_pair.second); - auto extensions = utils::extract_extensions(properties); - for (const auto& extension : extensions) { - utils::singleton_core().add_extension(extension); - } + utils::add_extensions_to_core(properties_without_draft_model); + auto model = utils::singleton_core().read_model(model_pair.first, model_pair.second); auto rt_info = model->get_rt_info(); std::filesystem::path directory; diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index cbdda03700..6efede9c1c 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -259,11 +259,7 @@ ov::genai::LLMPipeline::LLMPipeline( bool is_npu_requested = ov::genai::utils::is_npu_requested(device, user_properties); auto [properties, attention_backend] = utils::extract_attention_backend(user_properties, is_npu_requested); - - auto extensions = utils::extract_extensions(properties); - for (const auto& extension : extensions) { - utils::singleton_core().add_extension(extension); - } + utils::add_extensions_to_core(properties); if (is_npu_requested) { m_pimpl = StatefulPipeline::create( diff --git a/src/cpp/src/llm/pipeline_stateful.cpp b/src/cpp/src/llm/pipeline_stateful.cpp index 52cd0bf305..2bc1e4fb22 100644 --- a/src/cpp/src/llm/pipeline_stateful.cpp +++ b/src/cpp/src/llm/pipeline_stateful.cpp @@ -78,11 +78,9 @@ StatefulLLMPipeline::StatefulLLMPipeline( m_max_prompt_len = kv_desc.max_prompt_len; m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len; } else { - auto extensions = utils::extract_extensions(properties); - for (const auto& extension : extensions) { - utils::singleton_core().add_extension(extension); - } - compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties); + auto properties_without_extensions = *filtered_properties; + utils::add_extensions_to_core(properties_without_extensions); + compiled_model = utils::singleton_core().compile_model(model, device, properties_without_extensions); } m_model_runner = compiled_model.create_infer_request(); ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model"); diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index b2f2e41dd0..72eccd2aea 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -718,15 +718,15 @@ std::pair extract_attention_backend(const ov::AnyMap& e return {properties, attention_backend}; }; -std::vector extract_extensions(const ov::AnyMap& properties) { - std::vector extensions; - - auto it = properties.find("EXTENSIONS"); +void add_extensions_to_core(ov::AnyMap& properties) { + auto it = properties.find(EXTENSIONS_ARG_NAME); if (it != properties.end()) { - extensions = it->second.as>(); + auto extensions = it->second.as>(); + for (const auto& extension : extensions) { + singleton_core().add_extension(extension); + } + properties.erase(it); } - - return extensions; } void release_core_plugin(const std::string& device) { diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 1e614d9836..64de9d22a8 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -103,6 +103,7 @@ void read_anymap_param(const ov::AnyMap& config_map, const std::string& name, T& const std::string STREAMER_ARG_NAME = "streamer"; const std::string CONFIG_ARG_NAME = "generation_config"; const std::string DRAFT_MODEL_ARG_NAME = "draft_model"; +const std::string EXTENSIONS_ARG_NAME = "EXTENSIONS"; template Config from_config_json_if_exists(const std::filesystem::path& models_path, const char config_name[] = "generation_config.json") { @@ -286,7 +287,7 @@ bool explicitly_requires_paged_attention(const ov::AnyMap& properties, bool is_n std::pair extract_attention_backend(const ov::AnyMap& external_properties, bool is_npu_requested = false); -std::vector extract_extensions(const ov::AnyMap& properties); +void add_extensions_to_core(ov::AnyMap& properties); void save_openvino_model(const std::shared_ptr& model, const std::string& save_path, bool compress_to_fp16); diff --git a/src/cpp/src/visual_language/embedding_model.cpp b/src/cpp/src/visual_language/embedding_model.cpp index fcee8a66c2..3da633c704 100644 --- a/src/cpp/src/visual_language/embedding_model.cpp +++ b/src/cpp/src/visual_language/embedding_model.cpp @@ -45,15 +45,13 @@ EmbeddingsModel::EmbeddingsModel(const std::filesystem::path& model_dir, const std::string& device, const ov::AnyMap& properties) { ov::Core core = utils::singleton_core(); - auto extensions = utils::extract_extensions(properties); - for (const auto& extension : extensions) { - core.add_extension(extension); - } - std::shared_ptr m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties); + auto properties_copy = properties; + utils::add_extensions_to_core(properties_copy); + std::shared_ptr m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties_copy); // apply embedding postprocessing step by merging them into the model merge_postprocess(m_model, scale_emb); - ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties); + ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties_copy); ov::genai::utils::print_compiled_model_properties(compiled_model, "text embeddings model"); m_embeddings_requests_queue = init(compiled_model); } @@ -64,15 +62,13 @@ EmbeddingsModel::EmbeddingsModel(const std::string& model, const std::string& device, const ov::AnyMap& properties) { ov::Core core = utils::singleton_core(); - auto extensions = utils::extract_extensions(properties); - for (const auto& extension : extensions) { - core.add_extension(extension); - } + auto properties_copy = properties; + utils::add_extensions_to_core(properties_copy); std::shared_ptr m_model = core.read_model(model, weights); // apply embedding postprocessing step by merging them into the model merge_postprocess(m_model, scale_emb); - ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties); + ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties_copy); m_embeddings_requests_queue = init(compiled_model); } diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index f52d4df161..9fa8f94f59 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -82,10 +82,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ auto properties_copy = properties; auto language_model_path = models_dir / "openvino_language_model.xml"; - auto extensions = utils::extract_extensions(properties_copy); - for (const auto& extension : extensions) { - utils::singleton_core().add_extension(extension); - } + utils::add_extensions_to_core(properties_copy); auto language_model = utils::singleton_core().read_model(language_model_path, {}, properties_copy); auto kv_pos = ov::genai::utils::get_kv_axes_pos(language_model); @@ -161,14 +158,11 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ m_embedding = m_inputs_embedder->get_embedding_model(); auto m_language_pair = utils::get_model_weights_pair(models_map, "language"); - - auto extensions = utils::extract_extensions(properties); - for (const auto& extension : extensions) { - utils::singleton_core().add_extension(extension); - } + auto properties_without_extensions = properties; + utils::add_extensions_to_core(properties_without_extensions); m_language = utils::singleton_core().compile_model( - m_language_pair.first, m_language_pair.second, device, properties + m_language_pair.first, m_language_pair.second, device, properties_without_extensions ).create_infer_request(); m_language.get_tensor("attention_mask").set_shape({1, 0}); diff --git a/src/cpp/src/visual_language/qwen2vl/classes.cpp b/src/cpp/src/visual_language/qwen2vl/classes.cpp index 9b200d2baf..e3010de602 100644 --- a/src/cpp/src/visual_language/qwen2vl/classes.cpp +++ b/src/cpp/src/visual_language/qwen2vl/classes.cpp @@ -660,8 +660,10 @@ VisionEncoderQwen2VL::VisionEncoderQwen2VL(const std::filesystem::path& model_di : VisionEncoder(model_dir, device, properties), use_ov_image_preprocess(check_image_preprocess_env()) { if (use_ov_image_preprocess) { + auto properties_without_extensions = properties; + utils::add_extensions_to_core(properties_without_extensions); auto model_org = utils::singleton_core().read_model(model_dir / "openvino_vision_embeddings_model.xml"); - m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties); + m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties_without_extensions); } } @@ -674,8 +676,10 @@ VisionEncoderQwen2VL::VisionEncoderQwen2VL(const ModelsMap& models_map, if (use_ov_image_preprocess) { const auto& [vision_encoder_model, vision_encoder_weights] = utils::get_model_weights_pair(models_map, "vision_embeddings"); + auto properties_without_extensions = properties; + utils::add_extensions_to_core(properties_without_extensions); auto model_org = utils::singleton_core().read_model(vision_encoder_model, vision_encoder_weights); - m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties); + m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties_without_extensions); } } @@ -923,10 +927,12 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL( const std::string& device, const ov::AnyMap device_config) : IInputsEmbedder(vlm_config, model_dir, device, device_config) { + auto properties_without_extensions = device_config; + utils::add_extensions_to_core(properties_without_extensions); auto model = utils::singleton_core().read_model(model_dir / "openvino_vision_embeddings_merger_model.xml"); utils::request_vl_sdpa_transformations(model); - auto compiled_model = utils::singleton_core().compile_model(model, device, device_config); + auto compiled_model = utils::singleton_core().compile_model(model, device, properties_without_extensions); m_with_cu_seqlens_input = utils::check_vl_sdpa_transformations(compiled_model); ov::genai::utils::print_compiled_model_properties(compiled_model, @@ -952,6 +958,8 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL( const std::string& device, const ov::AnyMap device_config) : IInputsEmbedder(vlm_config, models_map, tokenizer, config_dir_path, device, device_config) { + auto properties_without_extensions = device_config; + utils::add_extensions_to_core(properties_without_extensions); auto model = utils::singleton_core().read_model( utils::get_model_weights_pair(models_map, "vision_embeddings_merger").first, utils::get_model_weights_pair(models_map, "vision_embeddings_merger").second); @@ -959,7 +967,7 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL( auto compiled_model = utils::singleton_core().compile_model(model, device, - device_config + properties_without_extensions ); m_with_cu_seqlens_input = utils::check_vl_sdpa_transformations(compiled_model); From 8a80895240a9197b2dd3d3adabb744e5de35a38b Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Fri, 14 Nov 2025 14:41:46 +0800 Subject: [PATCH 31/38] revert some change --- tests/python_tests/test_vlm_pipeline.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py index 5a9f03961e..6cecac9b06 100644 --- a/tests/python_tests/test_vlm_pipeline.py +++ b/tests/python_tests/test_vlm_pipeline.py @@ -305,13 +305,11 @@ def ov_pipe_model(request: pytest.FixtureRequest) -> VlmModelInfo: indirect=["ov_pipe_model"], ) - @pytest.fixture(scope="module") def ov_continious_batching_pipe() -> ContinuousBatchingPipeline: models_path = _get_ov_model(MODEL_IDS[0]) return ContinuousBatchingPipeline(models_path, SchedulerConfig(), "CPU") - @pytest.fixture(scope="module") def ov_continious_batching_pipe_gemma() -> ContinuousBatchingPipeline: models_path = _get_ov_model(MODEL_IDS[8]) @@ -402,7 +400,7 @@ def cat_tensor(cat_image) -> openvino.Tensor: def car_tensor(pytestconfig: pytest.Config) -> openvino.Tensor: return openvino.Tensor(from_cache_or_download(pytestconfig, TEST_IMAGE_URLS['car'], "car.jpg")) - + @pytest.fixture(scope="module") def synthetic_video_32x32_tensor(synthetic_video_32x32): return openvino.Tensor(synthetic_video_32x32) @@ -1368,7 +1366,7 @@ def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo): with pytest.raises(RuntimeError): ov_pipe.generate(image_tag(0)) - + @pytest.mark.parametrize( "ov_pipe_model,has_image,has_video", From 85608a05f587de04ecf257102c887a1b999f080c Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Wed, 19 Nov 2025 09:06:42 +0800 Subject: [PATCH 32/38] rm add_extensions_to_core from pipeline_stateful --- src/cpp/src/continuous_batching/pipeline.cpp | 2 +- src/cpp/src/llm/pipeline.cpp | 2 ++ src/cpp/src/llm/pipeline_stateful.cpp | 4 +--- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp index 24f4f4f1ef..1aa47475c7 100644 --- a/src/cpp/src/continuous_batching/pipeline.cpp +++ b/src/cpp/src/continuous_batching/pipeline.cpp @@ -47,7 +47,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model); utils::add_extensions_to_core(properties_without_draft_model); - auto model = utils::read_model(models_path, properties); + auto model = utils::read_model(models_path, properties_without_draft_model); auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model); properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; auto tokenizer = ov::genai::Tokenizer(models_path, tokenizer_properties); diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 6efede9c1c..c326c2ff93 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -180,6 +180,7 @@ ov::genai::LLMPipeline::LLMPipeline( bool is_npu_requested = ov::genai::utils::is_npu_requested(device, user_properties); auto [properties, attention_backend] = utils::extract_attention_backend(user_properties, is_npu_requested); + utils::add_extensions_to_core(properties); if (is_npu_requested) { m_pimpl = StatefulPipeline::create(models_path, tokenizer, device, properties); @@ -218,6 +219,7 @@ ov::genai::LLMPipeline::LLMPipeline( bool is_npu_requested = ov::genai::utils::is_npu_requested(device, user_properties); auto [properties, attention_backend] = utils::extract_attention_backend(user_properties, is_npu_requested); + utils::add_extensions_to_core(properties); if (is_npu_requested) { m_pimpl = StatefulPipeline::create(models_path, device, properties); diff --git a/src/cpp/src/llm/pipeline_stateful.cpp b/src/cpp/src/llm/pipeline_stateful.cpp index 2bc1e4fb22..c013fd58e9 100644 --- a/src/cpp/src/llm/pipeline_stateful.cpp +++ b/src/cpp/src/llm/pipeline_stateful.cpp @@ -78,9 +78,7 @@ StatefulLLMPipeline::StatefulLLMPipeline( m_max_prompt_len = kv_desc.max_prompt_len; m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len; } else { - auto properties_without_extensions = *filtered_properties; - utils::add_extensions_to_core(properties_without_extensions); - compiled_model = utils::singleton_core().compile_model(model, device, properties_without_extensions); + compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties); } m_model_runner = compiled_model.create_infer_request(); ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model"); From 6fd6b2606f42f8145f3884af6d6576a5081d6673 Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Wed, 19 Nov 2025 09:09:45 +0800 Subject: [PATCH 33/38] revert a small change --- src/cpp/src/llm/pipeline_stateful.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/src/llm/pipeline_stateful.cpp b/src/cpp/src/llm/pipeline_stateful.cpp index c013fd58e9..81f91d7054 100644 --- a/src/cpp/src/llm/pipeline_stateful.cpp +++ b/src/cpp/src/llm/pipeline_stateful.cpp @@ -78,7 +78,7 @@ StatefulLLMPipeline::StatefulLLMPipeline( m_max_prompt_len = kv_desc.max_prompt_len; m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len; } else { - compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties); + compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties); } m_model_runner = compiled_model.create_infer_request(); ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model"); From 3208ed15a9cc848c09181f1e03f42adc476286da Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Wed, 19 Nov 2025 09:23:25 +0800 Subject: [PATCH 34/38] revert a change --- src/python/openvino_genai/py_openvino_genai.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 354517f823..1af39275ec 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -4465,4 +4465,4 @@ def draft_model(models_path: os.PathLike | str | bytes, device: str = '', **kwar def get_version() -> str: """ OpenVINO GenAI version - """ \ No newline at end of file + """ From 6e10e60a49e34f90c0a498e715b071237fc8addc Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Wed, 19 Nov 2025 09:36:50 +0800 Subject: [PATCH 35/38] double quotes --- tests/python_tests/test_continuous_batching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py index 2d1b68013e..6aa6a9c1c0 100644 --- a/tests/python_tests/test_continuous_batching.py +++ b/tests/python_tests/test_continuous_batching.py @@ -534,7 +534,7 @@ def test_speculative_decoding_extended_perf_metrics(pipeline_type): @pytest.mark.precommit def test_continuous_batching_add_extension(): - model_id = 'katuni4ka/tiny-random-phi3' + model_id = "katuni4ka/tiny-random-phi3" _, _, models_path = download_and_convert_model(model_id) scheduler_config = SchedulerConfig() From 6e673c9c60445f5a172cb91dcc0328018382a016 Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Wed, 19 Nov 2025 10:19:57 +0800 Subject: [PATCH 36/38] rm add_extensions_to_core from embedding_model and qwen2vl due to being done external --- src/cpp/src/visual_language/embedding_model.cpp | 10 +++------- src/cpp/src/visual_language/qwen2vl/classes.cpp | 16 ++++------------ 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/src/cpp/src/visual_language/embedding_model.cpp b/src/cpp/src/visual_language/embedding_model.cpp index 3da633c704..db436dac07 100644 --- a/src/cpp/src/visual_language/embedding_model.cpp +++ b/src/cpp/src/visual_language/embedding_model.cpp @@ -45,13 +45,11 @@ EmbeddingsModel::EmbeddingsModel(const std::filesystem::path& model_dir, const std::string& device, const ov::AnyMap& properties) { ov::Core core = utils::singleton_core(); - auto properties_copy = properties; - utils::add_extensions_to_core(properties_copy); - std::shared_ptr m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties_copy); + std::shared_ptr m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties); // apply embedding postprocessing step by merging them into the model merge_postprocess(m_model, scale_emb); - ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties_copy); + ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties); ov::genai::utils::print_compiled_model_properties(compiled_model, "text embeddings model"); m_embeddings_requests_queue = init(compiled_model); } @@ -62,13 +60,11 @@ EmbeddingsModel::EmbeddingsModel(const std::string& model, const std::string& device, const ov::AnyMap& properties) { ov::Core core = utils::singleton_core(); - auto properties_copy = properties; - utils::add_extensions_to_core(properties_copy); std::shared_ptr m_model = core.read_model(model, weights); // apply embedding postprocessing step by merging them into the model merge_postprocess(m_model, scale_emb); - ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties_copy); + ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties); m_embeddings_requests_queue = init(compiled_model); } diff --git a/src/cpp/src/visual_language/qwen2vl/classes.cpp b/src/cpp/src/visual_language/qwen2vl/classes.cpp index e3010de602..9b200d2baf 100644 --- a/src/cpp/src/visual_language/qwen2vl/classes.cpp +++ b/src/cpp/src/visual_language/qwen2vl/classes.cpp @@ -660,10 +660,8 @@ VisionEncoderQwen2VL::VisionEncoderQwen2VL(const std::filesystem::path& model_di : VisionEncoder(model_dir, device, properties), use_ov_image_preprocess(check_image_preprocess_env()) { if (use_ov_image_preprocess) { - auto properties_without_extensions = properties; - utils::add_extensions_to_core(properties_without_extensions); auto model_org = utils::singleton_core().read_model(model_dir / "openvino_vision_embeddings_model.xml"); - m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties_without_extensions); + m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties); } } @@ -676,10 +674,8 @@ VisionEncoderQwen2VL::VisionEncoderQwen2VL(const ModelsMap& models_map, if (use_ov_image_preprocess) { const auto& [vision_encoder_model, vision_encoder_weights] = utils::get_model_weights_pair(models_map, "vision_embeddings"); - auto properties_without_extensions = properties; - utils::add_extensions_to_core(properties_without_extensions); auto model_org = utils::singleton_core().read_model(vision_encoder_model, vision_encoder_weights); - m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties_without_extensions); + m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties); } } @@ -927,12 +923,10 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL( const std::string& device, const ov::AnyMap device_config) : IInputsEmbedder(vlm_config, model_dir, device, device_config) { - auto properties_without_extensions = device_config; - utils::add_extensions_to_core(properties_without_extensions); auto model = utils::singleton_core().read_model(model_dir / "openvino_vision_embeddings_merger_model.xml"); utils::request_vl_sdpa_transformations(model); - auto compiled_model = utils::singleton_core().compile_model(model, device, properties_without_extensions); + auto compiled_model = utils::singleton_core().compile_model(model, device, device_config); m_with_cu_seqlens_input = utils::check_vl_sdpa_transformations(compiled_model); ov::genai::utils::print_compiled_model_properties(compiled_model, @@ -958,8 +952,6 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL( const std::string& device, const ov::AnyMap device_config) : IInputsEmbedder(vlm_config, models_map, tokenizer, config_dir_path, device, device_config) { - auto properties_without_extensions = device_config; - utils::add_extensions_to_core(properties_without_extensions); auto model = utils::singleton_core().read_model( utils::get_model_weights_pair(models_map, "vision_embeddings_merger").first, utils::get_model_weights_pair(models_map, "vision_embeddings_merger").second); @@ -967,7 +959,7 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL( auto compiled_model = utils::singleton_core().compile_model(model, device, - properties_without_extensions + device_config ); m_with_cu_seqlens_input = utils::check_vl_sdpa_transformations(compiled_model); From e59ab7c4e3c3f60015eb41f40c77560dca8d94f3 Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Wed, 19 Nov 2025 10:32:49 +0800 Subject: [PATCH 37/38] move add_extensions_to_core before InputsEmbedder --- src/cpp/src/visual_language/pipeline.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index 9fa8f94f59..017d29c8c3 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -152,15 +152,14 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{ OPENVINO_ASSERT(!m_is_npu, "VLMPipeline initialization from string isn't supported for NPU device"); - m_inputs_embedder = std::make_shared(models_map, tokenizer, config_dir_path, device, properties); + auto properties_without_extensions = properties; + utils::add_extensions_to_core(properties_without_extensions); + m_inputs_embedder = std::make_shared(models_map, tokenizer, config_dir_path, device, properties_without_extensions); m_tokenizer = m_inputs_embedder->get_tokenizer(); m_embedding = m_inputs_embedder->get_embedding_model(); auto m_language_pair = utils::get_model_weights_pair(models_map, "language"); - auto properties_without_extensions = properties; - utils::add_extensions_to_core(properties_without_extensions); - m_language = utils::singleton_core().compile_model( m_language_pair.first, m_language_pair.second, device, properties_without_extensions ).create_infer_request(); From 54880e3d321888cc51679c8229e1ab9d08226fb0 Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Wed, 19 Nov 2025 10:50:48 +0800 Subject: [PATCH 38/38] add a documentation comment for add_extensions_to_core --- src/cpp/src/utils.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 64de9d22a8..6f3562a2bf 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -287,6 +287,11 @@ bool explicitly_requires_paged_attention(const ov::AnyMap& properties, bool is_n std::pair extract_attention_backend(const ov::AnyMap& external_properties, bool is_npu_requested = false); +/** + * @brief Extracts the "EXTENSIONS" key from the provided properties map, adds each extension path to the singleton + * core, and removes the key from the properties map. This function is used to dynamically add custom extensions to the + * OpenVINO core at runtime. + */ void add_extensions_to_core(ov::AnyMap& properties); void save_openvino_model(const std::shared_ptr& model, const std::string& save_path, bool compress_to_fp16);