From 7b3ad318100f8223cce755b63bd97ef80e5d2e8e Mon Sep 17 00:00:00 2001
From: "xiping.yan" <xiping.yan@intel.com>
Date: Mon, 3 Nov 2025 14:15:18 +0800
Subject: [PATCH 01/38] Introduce add_extension to genai.

Signed-off-by: xiping.yan <xiping.yan@intel.com>
---
 src/cpp/include/openvino/genai/generation_config.hpp | 7 +++++++
 src/cpp/src/generation_config.cpp                    | 4 ++++
 src/python/openvino_genai/__init__.py                | 3 ++-
 src/python/openvino_genai/__init__.pyi               | 1 +
 src/python/openvino_genai/py_openvino_genai.pyi      | 4 ++++
 src/python/py_openvino_genai.cpp                     | 6 ++++++
 6 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index e592cb36ff..da2fccbfa0 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -706,6 +706,13 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     void validate() const;
 };
 
+/**
+ * @brief Registers an extension to ov::Core object for model with custom op.
+ * @param library_path Path to the library with ov::Extension.
+ * @{
+ */
+void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path);
+
 /*
  * utils that allow to use generate and operator() in the following way:
  * pipe.generate(input_ids, ov::genai::max_new_tokens(200), ov::genai::temperature(1.0f),...)
diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index 44fc616c1e..739e7cf4c2 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -490,5 +490,9 @@ GenerationConfig multinomial() {
     return multinomial_config;
 }
 
+void add_extension(const std::string& library_path) {
+    utils::singleton_core().add_extension(library_path);
+}
+
 }  // namespace genai
 }  // namespace ov
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index b10aadd062..997cb5cf19 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -19,7 +19,8 @@
     get_version,
     StreamingStatus,
     TextStreamer,
-    TextParserStreamer
+    TextParserStreamer,
+    add_extension
 )
 
 from .py_openvino_genai import (
diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index c1d1f1dc30..fe8fea17dd 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -74,6 +74,7 @@ from openvino_genai.py_openvino_genai import WhisperPipeline
 from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics
 from openvino_genai.py_openvino_genai import draft_model
 from openvino_genai.py_openvino_genai import get_version
+from openvino_genai.py_openvino_genai import add_extension
 import os as os
 from . import py_openvino_genai
 __all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 0444146334..b0b13e0c26 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -4406,3 +4406,7 @@ def get_version() -> str:
     """
     OpenVINO GenAI version
     """
+def add_extension(str) -> None:
+    """
+    OpenVINO GenAI version
+    """
\ No newline at end of file
diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index ed010a7581..ae7e85ceb6 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -13,6 +13,7 @@
 #include "openvino/genai/llm_pipeline.hpp"
 #include "openvino/genai/text_streamer.hpp"
 #include "openvino/genai/version.hpp"
+#include "openvino/genai/generation_config.hpp"
 
 #include "py_utils.hpp"
 
@@ -28,6 +29,7 @@ using ov::genai::StreamingStatus;
 using ov::genai::TextStreamer;
 using ov::genai::Tokenizer;
 using ov::genai::get_version;
+using ov::genai::add_extension;
 
 void init_lora_adapter(py::module_& m);
 void init_perf_metrics(py::module_& m);
@@ -88,6 +90,10 @@ PYBIND11_MODULE(py_openvino_genai, m) {
         return get_version().buildNumber;
     }, get_version().description);
 
+    m.def("add_extension", [] (py::str library_path) {
+        return add_extension(library_path);
+    });
+
     init_perf_metrics(m);
 
     py::class_<DecodedResults>(m, "DecodedResults", decoded_results_docstring)

From 2f603013849b337c07f7cb3b4c6c2b5e999c9a92 Mon Sep 17 00:00:00 2001
From: "xiping.yan" <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 12:03:45 +0800
Subject: [PATCH 02/38] add draft test.

Signed-off-by: xiping.yan <xiping.yan@intel.com>
---
 tests/python_tests/test_add_extension.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 tests/python_tests/test_add_extension.py

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
new file mode 100644
index 0000000000..6625f5d3ce
--- /dev/null
+++ b/tests/python_tests/test_add_extension.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2023-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import pytest
+import openvino_genai as ov_genai
+
+@pytest.mark.precommit
+def test_add_extension():
+    print(ov_genai.get_version())
+    # I don't know how to get tokenizer path.
+    tokenizer_path = ""
+    try:
+        ov_genai.add_extension(tokenizer_path)
+    except:
+        assert(False)
\ No newline at end of file

From 80efe54e0a29669c003ccbf3edb26b745d2faf81 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 12:56:12 +0800
Subject: [PATCH 03/38] Update src/python/openvino_genai/py_openvino_genai.pyi

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/python/openvino_genai/py_openvino_genai.pyi | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index b0b13e0c26..d6e11da73d 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -4408,5 +4408,7 @@ def get_version() -> str:
     """
 def add_extension(str) -> None:
     """
-    OpenVINO GenAI version
+    Registers an OpenVINO extension from a library path to enable support for models with custom operations.
+
+    :param str: Path to the extension library to register.
     """
\ No newline at end of file

From 4a02c8ef4917d3da2a8b60550c4c6be123dc9aed Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 12:56:51 +0800
Subject: [PATCH 04/38] Update src/python/openvino_genai/__init__.pyi

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/python/openvino_genai/__init__.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index fe8fea17dd..6a2ffd2d2f 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -77,5 +77,5 @@ from openvino_genai.py_openvino_genai import get_version
 from openvino_genai.py_openvino_genai import add_extension
 import os as os
 from . import py_openvino_genai
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'add_extension', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
 __version__: str

From 31078ab20e888a9f0e179a761c75c8b905411480 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 12:57:16 +0800
Subject: [PATCH 05/38] Update tests/python_tests/test_add_extension.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_add_extension.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
index 6625f5d3ce..addc3f13f5 100644
--- a/tests/python_tests/test_add_extension.py
+++ b/tests/python_tests/test_add_extension.py
@@ -12,5 +12,5 @@ def test_add_extension():
     tokenizer_path = ""
     try:
         ov_genai.add_extension(tokenizer_path)
-    except:
+    except Exception:
         assert(False)
\ No newline at end of file

From 281fe8ceebad212c5c48493b20b70bbb5b101f74 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 12:57:25 +0800
Subject: [PATCH 06/38] Update tests/python_tests/test_add_extension.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_add_extension.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
index addc3f13f5..8b699650bc 100644
--- a/tests/python_tests/test_add_extension.py
+++ b/tests/python_tests/test_add_extension.py
@@ -8,9 +8,9 @@
 @pytest.mark.precommit
 def test_add_extension():
     print(ov_genai.get_version())
-    # I don't know how to get tokenizer path.
-    tokenizer_path = ""
+    # Path to the OpenVINO extension shared library (update as needed).
+    extension_path = ""
     try:
-        ov_genai.add_extension(tokenizer_path)
+        ov_genai.add_extension(extension_path)
     except Exception:
         assert(False)
\ No newline at end of file

From e449070a0a81b94ad845ff3f4d01cbbc668f4a86 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 13:06:49 +0800
Subject: [PATCH 07/38] Update src/python/openvino_genai/py_openvino_genai.pyi

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/python/openvino_genai/py_openvino_genai.pyi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 279979bed2..ef054740ca 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -4406,9 +4406,9 @@ def get_version() -> str:
     """
     OpenVINO GenAI version
     """
-def add_extension(str) -> None:
+def add_extension(library_path: str) -> None:
     """
     Registers an OpenVINO extension from a library path to enable support for models with custom operations.
 
-    :param str: Path to the extension library to register.
+    :param library_path: Path to the extension library to register.
     """
\ No newline at end of file

From 4f60a60c7bbb8e6e93e40d89e0ca0c3b82c057ac Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 13:07:16 +0800
Subject: [PATCH 08/38] Update
 src/cpp/include/openvino/genai/generation_config.hpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/cpp/include/openvino/genai/generation_config.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index da2fccbfa0..5bb7ce587a 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -712,6 +712,7 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
  * @{
  */
 void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path);
+/** @} */
 
 /*
  * utils that allow to use generate and operator() in the following way:

From 698e5ec74058038cdfa4087b538e159dc9f4f385 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 13:09:22 +0800
Subject: [PATCH 09/38] Update generation_config.hpp

---
 src/cpp/include/openvino/genai/generation_config.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 5bb7ce587a..20ae3937dd 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -709,10 +709,9 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
 /**
  * @brief Registers an extension to ov::Core object for model with custom op.
  * @param library_path Path to the library with ov::Extension.
- * @{
  */
 void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path);
-/** @} */
+
 
 /*
  * utils that allow to use generate and operator() in the following way:

From 8c56ca83ca07a01540ba037acf4a79dbd2af4e57 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 13:11:17 +0800
Subject: [PATCH 10/38] Update tests/python_tests/test_add_extension.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_add_extension.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
index 8b699650bc..76193627b0 100644
--- a/tests/python_tests/test_add_extension.py
+++ b/tests/python_tests/test_add_extension.py
@@ -10,7 +10,5 @@ def test_add_extension():
     print(ov_genai.get_version())
     # Path to the OpenVINO extension shared library (update as needed).
     extension_path = ""
-    try:
-        ov_genai.add_extension(extension_path)
-    except Exception:
-        assert(False)
\ No newline at end of file
+    with pytest.raises(Exception):
+        ov_genai.add_extension(extension_path)
\ No newline at end of file

From c1e0a7c5a6d0a75b2adb7db494f58e4e823eea4a Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 13:19:40 +0800
Subject: [PATCH 11/38] Update src/python/py_openvino_genai.cpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/python/py_openvino_genai.cpp | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index ae7e85ceb6..ebc4ee2cc1 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -92,8 +92,21 @@ PYBIND11_MODULE(py_openvino_genai, m) {
 
     m.def("add_extension", [] (py::str library_path) {
         return add_extension(library_path);
-    });
-
+    },
+    R"doc(
+        Adds a custom extension library to the OpenVINO GenAI backend.
+
+        Parameters
+        ----------
+        library_path : str
+            Path to the shared library containing the extension.
+
+        Raises
+        ------
+        RuntimeError
+            If the extension cannot be loaded.
+        )doc"
+    );
     init_perf_metrics(m);
 
     py::class_<DecodedResults>(m, "DecodedResults", decoded_results_docstring)

From 57bd6a155bc5edb9b8a35b82a80fa1ffe2890712 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 13:30:47 +0800
Subject: [PATCH 12/38] Update tests/python_tests/test_add_extension.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_add_extension.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
index 76193627b0..9cad8af3d2 100644
--- a/tests/python_tests/test_add_extension.py
+++ b/tests/python_tests/test_add_extension.py
@@ -10,5 +10,5 @@ def test_add_extension():
     print(ov_genai.get_version())
     # Path to the OpenVINO extension shared library (update as needed).
     extension_path = ""
-    with pytest.raises(Exception):
+    with pytest.raises(RuntimeError):
         ov_genai.add_extension(extension_path)
\ No newline at end of file

From a4ef9c804832978379d685446b2c74a46744e6b3 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Wed, 5 Nov 2025 13:36:24 +0800
Subject: [PATCH 13/38] Update tests/python_tests/test_add_extension.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_add_extension.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
index 9cad8af3d2..352f62e29b 100644
--- a/tests/python_tests/test_add_extension.py
+++ b/tests/python_tests/test_add_extension.py
@@ -7,7 +7,6 @@
 
 @pytest.mark.precommit
 def test_add_extension():
-    print(ov_genai.get_version())
     # Path to the OpenVINO extension shared library (update as needed).
     extension_path = ""
     with pytest.raises(RuntimeError):

From 7260493950961430e4b81fc68da986d615a2c6e9 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Mon, 10 Nov 2025 13:15:32 +0800
Subject: [PATCH 14/38] Update
 src/cpp/include/openvino/genai/generation_config.hpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/cpp/include/openvino/genai/generation_config.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 20ae3937dd..c9853afe86 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -707,7 +707,7 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
 };
 
 /**
- * @brief Registers an extension to ov::Core object for model with custom op.
+ * @brief Registers an extension to ov::Core object for models with custom ops.
  * @param library_path Path to the library with ov::Extension.
  */
 void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path);

From 689b08c9b9599ef9ce22e8ba648088ec103719ab Mon Sep 17 00:00:00 2001
From: xipingya <xiping.yan@intel.com>
Date: Mon, 10 Nov 2025 15:57:25 +0800
Subject: [PATCH 15/38] Get tokenizer so path.

Signed-off-by: xipingya <xiping.yan@intel.com>
---
 tests/python_tests/test_add_extension.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
index 352f62e29b..8478cf651c 100644
--- a/tests/python_tests/test_add_extension.py
+++ b/tests/python_tests/test_add_extension.py
@@ -4,10 +4,23 @@
 
 import pytest
 import openvino_genai as ov_genai
+import platform
+import os
+import openvino_tokenizers;
 
 @pytest.mark.precommit
 def test_add_extension():
     # Path to the OpenVINO extension shared library (update as needed).
-    extension_path = ""
-    with pytest.raises(RuntimeError):
-        ov_genai.add_extension(extension_path)
\ No newline at end of file
+    os_name = platform.system()
+    if os_name == "Windows":
+        ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "\\lib\\openvino_tokenizers.dll"
+    elif os_name == "Linux":
+        ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "/lib/libopenvino_tokenizers.so"
+    else:
+        print(f"Skipped. Current test only support Windows and Linux")
+        return
+
+    try:
+        ov_genai.add_extension(ov_tokenizer_path)
+    except RuntimeError as e:
+        raise RuntimeError(f"Add extension fail, maybe tokenizers' version mismatch. Original error: {e}")
\ No newline at end of file

From 072b0b0ca822e7e97a9adfd82224e9a7c102774f Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Mon, 10 Nov 2025 16:16:44 +0800
Subject: [PATCH 16/38] Update tests/python_tests/test_add_extension.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_add_extension.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
index 8478cf651c..636ed09943 100644
--- a/tests/python_tests/test_add_extension.py
+++ b/tests/python_tests/test_add_extension.py
@@ -13,9 +13,9 @@ def test_add_extension():
     # Path to the OpenVINO extension shared library (update as needed).
     os_name = platform.system()
     if os_name == "Windows":
-        ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "\\lib\\openvino_tokenizers.dll"
+        ov_tokenizer_path = os.path.join(os.path.dirname(openvino_tokenizers.__file__), "lib", "openvino_tokenizers.dll")
     elif os_name == "Linux":
-        ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "/lib/libopenvino_tokenizers.so"
+        ov_tokenizer_path = os.path.join(os.path.dirname(openvino_tokenizers.__file__), "lib", "libopenvino_tokenizers.so")
     else:
         print(f"Skipped. Current test only support Windows and Linux")
         return

From c05b09203f67f2132a33030842f4128a653e4c28 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Mon, 10 Nov 2025 16:17:00 +0800
Subject: [PATCH 17/38] Update tests/python_tests/test_add_extension.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_add_extension.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
index 636ed09943..955dd1dd6a 100644
--- a/tests/python_tests/test_add_extension.py
+++ b/tests/python_tests/test_add_extension.py
@@ -17,8 +17,7 @@ def test_add_extension():
     elif os_name == "Linux":
         ov_tokenizer_path = os.path.join(os.path.dirname(openvino_tokenizers.__file__), "lib", "libopenvino_tokenizers.so")
     else:
-        print(f"Skipped. Current test only support Windows and Linux")
-        return
+        pytest.skip("Skipped. Current test only supports Windows and Linux")
 
     try:
         ov_genai.add_extension(ov_tokenizer_path)

From 7e1ac0f58ac781da18aa3bb128b254053bd6197a Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Mon, 10 Nov 2025 16:43:23 +0800
Subject: [PATCH 18/38] Update tests/python_tests/test_add_extension.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_add_extension.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
index 955dd1dd6a..d27e04b224 100644
--- a/tests/python_tests/test_add_extension.py
+++ b/tests/python_tests/test_add_extension.py
@@ -6,7 +6,7 @@
 import openvino_genai as ov_genai
 import platform
 import os
-import openvino_tokenizers;
+import openvino_tokenizers
 
 @pytest.mark.precommit
 def test_add_extension():

From ba86cc40c01f12ef480ec498987d9c6b69120d16 Mon Sep 17 00:00:00 2001
From: xipingya <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 13:18:16 +0800
Subject: [PATCH 19/38] enable add_extension to vlm pipleline with properties.

Signed-off-by: xipingya <xiping.yan@intel.com>
---
 .../openvino/genai/generation_config.hpp      |  6 -----
 src/cpp/src/generation_config.cpp             |  4 ---
 src/cpp/src/utils.cpp                         | 16 ++++++++++++
 src/cpp/src/utils.hpp                         |  2 ++
 src/cpp/src/visual_language/pipeline.cpp      | 15 ++++++++---
 src/python/openvino_genai/__init__.py         |  3 +--
 src/python/openvino_genai/__init__.pyi        |  3 +--
 .../openvino_genai/py_openvino_genai.pyi      |  6 -----
 src/python/py_openvino_genai.cpp              | 18 -------------
 tests/python_tests/test_add_extension.py      | 26 -------------------
 tests/python_tests/test_vlm_pipeline.py       | 13 ++++++++++
 11 files changed, 45 insertions(+), 67 deletions(-)
 delete mode 100644 tests/python_tests/test_add_extension.py

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index c9853afe86..dbc6060c36 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -706,12 +706,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     void validate() const;
 };
 
-/**
- * @brief Registers an extension to ov::Core object for models with custom ops.
- * @param library_path Path to the library with ov::Extension.
- */
-void OPENVINO_GENAI_EXPORTS add_extension(const std::string& library_path);
-
 
 /*
  * utils that allow to use generate and operator() in the following way:
diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index 739e7cf4c2..44fc616c1e 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -490,9 +490,5 @@ GenerationConfig multinomial() {
     return multinomial_config;
 }
 
-void add_extension(const std::string& library_path) {
-    utils::singleton_core().add_extension(library_path);
-}
-
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
index 8948832f26..8b6fce7640 100644
--- a/src/cpp/src/utils.cpp
+++ b/src/cpp/src/utils.cpp
@@ -718,6 +718,22 @@ std::pair<ov::AnyMap, std::string> extract_attention_backend(const ov::AnyMap& e
     return {properties, attention_backend};
 };
 
+std::pair<ov::AnyMap, std::vector<std::string>> extract_extensions(const ov::AnyMap& external_properties) {
+    std::vector<std::string> extensions;
+    ov::AnyMap properties = external_properties;
+
+    auto it = properties.find("EXTENSIONS");
+    if (it != properties.end()) {
+        extensions = it->second.as<std::vector<std::string>>();
+        for (auto ext : extensions) {
+            std::cout << "ext = " << ext.c_str() << std::endl;
+        }
+        properties.erase(it);
+    }
+
+    return {properties, extensions};
+}
+
 void release_core_plugin(const std::string& device) {
     try {
         singleton_core().unload_plugin(device);
diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp
index 14106ef8f7..edc4adde21 100644
--- a/src/cpp/src/utils.hpp
+++ b/src/cpp/src/utils.hpp
@@ -286,6 +286,8 @@ bool explicitly_requires_paged_attention(const ov::AnyMap& properties, bool is_n
 
 std::pair<ov::AnyMap, std::string> extract_attention_backend(const ov::AnyMap& external_properties, bool is_npu_requested = false);
 
+std::pair<ov::AnyMap, std::vector<std::string>> extract_extensions(const ov::AnyMap& external_properties);
+
 void save_openvino_model(const std::shared_ptr<ov::Model>& model, const std::string& save_path, bool compress_to_fp16);
 
 ov::Tensor merge_text_and_image_embeddings_llava(const ov::Tensor& input_ids, ov::Tensor& text_embeds, const std::vector<ov::Tensor>& image_embeds, int64_t image_token_id);
diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
index f6f8b07f7b..df4ec16eb7 100644
--- a/src/cpp/src/visual_language/pipeline.cpp
+++ b/src/cpp/src/visual_language/pipeline.cpp
@@ -80,9 +80,12 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
         } {
         m_is_npu = device.find("NPU") != std::string::npos;
 
-        auto properties_copy = properties;
         auto language_model_path = models_dir / "openvino_language_model.xml";
-        auto language_model =  utils::singleton_core().read_model(language_model_path, {}, properties_copy);
+        auto [properties_copy, extensions] = utils::extract_extensions(properties);
+        for (auto extension : extensions) {
+            utils::singleton_core().add_extension(extension);
+        }
+        auto language_model = utils::singleton_core().read_model(language_model_path, {}, properties_copy);
         auto kv_pos = ov::genai::utils::get_kv_axes_pos(language_model);
 
         // In case user provided properties per-device
@@ -157,8 +160,14 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
         m_embedding = m_inputs_embedder->get_embedding_model();
 
         auto m_language_pair = utils::get_model_weights_pair(models_map, "language");
+
+        auto [properties_copy, extensions] = utils::extract_extensions(properties);
+        for (auto extension : extensions) {
+            utils::singleton_core().add_extension(extension);
+        }
+
         m_language = utils::singleton_core().compile_model(
-            m_language_pair.first, m_language_pair.second, device, properties
+            m_language_pair.first, m_language_pair.second, device, properties_copy
         ).create_infer_request();
 
         m_language.get_tensor("attention_mask").set_shape({1, 0});
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index 997cb5cf19..b10aadd062 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -19,8 +19,7 @@
     get_version,
     StreamingStatus,
     TextStreamer,
-    TextParserStreamer,
-    add_extension
+    TextParserStreamer
 )
 
 from .py_openvino_genai import (
diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index 6a2ffd2d2f..c1d1f1dc30 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -74,8 +74,7 @@ from openvino_genai.py_openvino_genai import WhisperPipeline
 from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics
 from openvino_genai.py_openvino_genai import draft_model
 from openvino_genai.py_openvino_genai import get_version
-from openvino_genai.py_openvino_genai import add_extension
 import os as os
 from . import py_openvino_genai
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'add_extension', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
 __version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index ef054740ca..bec0eacdbf 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -4405,10 +4405,4 @@ def draft_model(models_path: os.PathLike | str | bytes, device: str = '', **kwar
 def get_version() -> str:
     """
     OpenVINO GenAI version
-    """
-def add_extension(library_path: str) -> None:
-    """
-    Registers an OpenVINO extension from a library path to enable support for models with custom operations.
-
-    :param library_path: Path to the extension library to register.
     """
\ No newline at end of file
diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index ebc4ee2cc1..92ab5d15e1 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -29,7 +29,6 @@ using ov::genai::StreamingStatus;
 using ov::genai::TextStreamer;
 using ov::genai::Tokenizer;
 using ov::genai::get_version;
-using ov::genai::add_extension;
 
 void init_lora_adapter(py::module_& m);
 void init_perf_metrics(py::module_& m);
@@ -90,23 +89,6 @@ PYBIND11_MODULE(py_openvino_genai, m) {
         return get_version().buildNumber;
     }, get_version().description);
 
-    m.def("add_extension", [] (py::str library_path) {
-        return add_extension(library_path);
-    },
-    R"doc(
-        Adds a custom extension library to the OpenVINO GenAI backend.
-
-        Parameters
-        ----------
-        library_path : str
-            Path to the shared library containing the extension.
-
-        Raises
-        ------
-        RuntimeError
-            If the extension cannot be loaded.
-        )doc"
-    );
     init_perf_metrics(m);
 
     py::class_<DecodedResults>(m, "DecodedResults", decoded_results_docstring)
diff --git a/tests/python_tests/test_add_extension.py b/tests/python_tests/test_add_extension.py
deleted file mode 100644
index 8478cf651c..0000000000
--- a/tests/python_tests/test_add_extension.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (C) 2023-2025 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-
-import pytest
-import openvino_genai as ov_genai
-import platform
-import os
-import openvino_tokenizers;
-
-@pytest.mark.precommit
-def test_add_extension():
-    # Path to the OpenVINO extension shared library (update as needed).
-    os_name = platform.system()
-    if os_name == "Windows":
-        ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "\\lib\\openvino_tokenizers.dll"
-    elif os_name == "Linux":
-        ov_tokenizer_path = os.path.dirname(openvino_tokenizers.__file__) + "/lib/libopenvino_tokenizers.so"
-    else:
-        print(f"Skipped. Current test only support Windows and Linux")
-        return
-
-    try:
-        ov_genai.add_extension(ov_tokenizer_path)
-    except RuntimeError as e:
-        raise RuntimeError(f"Add extension fail, maybe tokenizers' version mismatch. Original error: {e}")
\ No newline at end of file
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index 973574ba64..211d076eab 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -1516,3 +1516,16 @@ def get_nanollava_processor():
     genai_text = genai_output.texts[0]
 
     assert optimum_text == genai_text
+
+@pytest.mark.precommit
+def test_vlm_pipeline_add_extension():
+    model_id = MODEL_IDS[6]
+    models_path = _get_ov_model(model_id)
+
+    properties = {}
+    properties["EXTENSIONS"] = ["fake_path"]
+
+    try:
+        pipe = VLMPipeline(models_path, "CPU", properties)
+    except RuntimeError as e:
+        assert("Cannot find entry point to the extension library" in str(e))
\ No newline at end of file

From 323e4c2a028728e8e1058b76f299bf462c027af3 Mon Sep 17 00:00:00 2001
From: xipingya <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 13:27:51 +0800
Subject: [PATCH 20/38] remove unecessary file

---
 src/python/py_openvino_genai.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index 92ab5d15e1..ed010a7581 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -13,7 +13,6 @@
 #include "openvino/genai/llm_pipeline.hpp"
 #include "openvino/genai/text_streamer.hpp"
 #include "openvino/genai/version.hpp"
-#include "openvino/genai/generation_config.hpp"
 
 #include "py_utils.hpp"
 

From 82b65bc33d473c716ea2c9e8b1a6a8e57ecb1af6 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 13:46:43 +0800
Subject: [PATCH 21/38] Update src/cpp/src/utils.cpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/cpp/src/utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
index 8b6fce7640..c5f87e0e12 100644
--- a/src/cpp/src/utils.cpp
+++ b/src/cpp/src/utils.cpp
@@ -725,7 +725,7 @@ std::pair<ov::AnyMap, std::vector<std::string>> extract_extensions(const ov::Any
     auto it = properties.find("EXTENSIONS");
     if (it != properties.end()) {
         extensions = it->second.as<std::vector<std::string>>();
-        for (auto ext : extensions) {
+        for (const auto& ext : extensions) {
             std::cout << "ext = " << ext.c_str() << std::endl;
         }
         properties.erase(it);

From 3b6391d4c56eb79eba29b0da85fb6fdcd9a2a929 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 13:47:12 +0800
Subject: [PATCH 22/38] Update src/cpp/src/visual_language/pipeline.cpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/cpp/src/visual_language/pipeline.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
index 7bbcdf0823..460f592eb8 100644
--- a/src/cpp/src/visual_language/pipeline.cpp
+++ b/src/cpp/src/visual_language/pipeline.cpp
@@ -82,7 +82,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
 
         auto language_model_path = models_dir / "openvino_language_model.xml";
         auto [properties_copy, extensions] = utils::extract_extensions(properties);
-        for (auto extension : extensions) {
+        for (const auto& extension : extensions) {
             utils::singleton_core().add_extension(extension);
         }
         auto language_model = utils::singleton_core().read_model(language_model_path, {}, properties_copy);

From 0542ae33fa819eb04105beb80655be7398b58836 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 13:48:06 +0800
Subject: [PATCH 23/38] Update tests/python_tests/test_vlm_pipeline.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_vlm_pipeline.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index a9ac42ee6f..27a5526e80 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -1541,7 +1541,6 @@ def test_vlm_pipeline_add_extension():
     properties = {}
     properties["EXTENSIONS"] = ["fake_path"]
 
-    try:
+    with pytest.raises(RuntimeError) as exc_info:
         pipe = VLMPipeline(models_path, "CPU", properties)
-    except RuntimeError as e:
-        assert("Cannot find entry point to the extension library" in str(e))
\ No newline at end of file
+    assert "Cannot find entry point to the extension library" in str(exc_info.value)
\ No newline at end of file

From 02d531083166cd4b8031de4383d2f4e4eebbc2b6 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 13:48:32 +0800
Subject: [PATCH 24/38] Update tests/python_tests/test_vlm_pipeline.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/python_tests/test_vlm_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index 27a5526e80..7656145537 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -1542,5 +1542,5 @@ def test_vlm_pipeline_add_extension():
     properties["EXTENSIONS"] = ["fake_path"]
 
     with pytest.raises(RuntimeError) as exc_info:
-        pipe = VLMPipeline(models_path, "CPU", properties)
+        VLMPipeline(models_path, "CPU", properties)
     assert "Cannot find entry point to the extension library" in str(exc_info.value)
\ No newline at end of file

From 97a88fc88047ba7c0f9ba3ec06668a195ab5a7c2 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 13:49:04 +0800
Subject: [PATCH 25/38] Update src/cpp/src/visual_language/pipeline.cpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/cpp/src/visual_language/pipeline.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
index 460f592eb8..452c8bd3e6 100644
--- a/src/cpp/src/visual_language/pipeline.cpp
+++ b/src/cpp/src/visual_language/pipeline.cpp
@@ -162,7 +162,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
         auto m_language_pair = utils::get_model_weights_pair(models_map, "language");
 
         auto [properties_copy, extensions] = utils::extract_extensions(properties);
-        for (auto extension : extensions) {
+        for (const auto& extension : extensions) {
             utils::singleton_core().add_extension(extension);
         }
 

From 200fb05bea6b44a105db91892a9130a4b1e46be8 Mon Sep 17 00:00:00 2001
From: xipingya <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 14:02:02 +0800
Subject: [PATCH 26/38] Remove print.

---
 src/cpp/src/utils.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
index c5f87e0e12..ef81e6cccd 100644
--- a/src/cpp/src/utils.cpp
+++ b/src/cpp/src/utils.cpp
@@ -725,9 +725,6 @@ std::pair<ov::AnyMap, std::vector<std::string>> extract_extensions(const ov::Any
     auto it = properties.find("EXTENSIONS");
     if (it != properties.end()) {
         extensions = it->second.as<std::vector<std::string>>();
-        for (const auto& ext : extensions) {
-            std::cout << "ext = " << ext.c_str() << std::endl;
-        }
         properties.erase(it);
     }
 

From f49ebb93f832b7f962b1b589ffc2829af715e572 Mon Sep 17 00:00:00 2001
From: Xiping Yan <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 14:02:32 +0800
Subject: [PATCH 27/38] Update
 src/cpp/include/openvino/genai/generation_config.hpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/cpp/include/openvino/genai/generation_config.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index dbc6060c36..e592cb36ff 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -706,7 +706,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     void validate() const;
 };
 
-
 /*
  * utils that allow to use generate and operator() in the following way:
  * pipe.generate(input_ids, ov::genai::max_new_tokens(200), ov::genai::temperature(1.0f),...)

From 34499ec3953470d56d1e85155ce91a0bc8cb500b Mon Sep 17 00:00:00 2001
From: xipingya <xiping.yan@intel.com>
Date: Tue, 11 Nov 2025 16:39:19 +0800
Subject: [PATCH 28/38] enable llm piple , lots of interleave call, about
 ContinuousBatchingPipeline, it is very complex, just write to
 ContinuousBatchingPipeline constructor.

Signed-off-by: xipingya <xiping.yan@intel.com>
---
 src/cpp/src/continuous_batching/pipeline.cpp | 20 ++++++++++++++++++++
 src/cpp/src/llm/pipeline.cpp                 |  5 +++++
 src/cpp/src/llm/pipeline_stateful.cpp        |  6 +++++-
 src/cpp/src/utils.cpp                        |  6 ++----
 src/cpp/src/utils.hpp                        |  2 +-
 src/cpp/src/visual_language/pipeline.cpp     |  5 +++--
 6 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp
index 16eb169de7..37bb2e28c0 100644
--- a/src/cpp/src/continuous_batching/pipeline.cpp
+++ b/src/cpp/src/continuous_batching/pipeline.cpp
@@ -46,6 +46,11 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
     auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model);
     auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
 
+    auto extensions = utils::extract_extensions(properties);
+    for (const auto& extension : extensions) {
+        utils::singleton_core().add_extension(extension);
+    }
+
     auto model = utils::read_model(models_path, properties);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
     properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
@@ -88,6 +93,11 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
     auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model);
     auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
 
+    auto extensions = utils::extract_extensions(properties);
+    for (const auto& extension : extensions) {
+        utils::singleton_core().add_extension(extension);
+    }
+
     auto model = utils::read_model(models_path, properties_without_draft_model);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
     properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
@@ -133,6 +143,11 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
     auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
     auto model = utils::singleton_core().read_model(model_str, weights_tensor);
 
+    auto extensions = utils::extract_extensions(properties);
+    for (const auto& extension : extensions) {
+        utils::singleton_core().add_extension(extension);
+    }
+
     auto rt_info = model->get_rt_info();
     std::shared_ptr<InputsEmbedder> embedder = nullptr;
     std::filesystem::path directory;
@@ -179,6 +194,11 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
     auto model_pair = utils::get_model_weights_pair(models_map, "language");
     auto model = utils::singleton_core().read_model(model_pair.first, model_pair.second);
 
+    auto extensions = utils::extract_extensions(properties);
+    for (const auto& extension : extensions) {
+        utils::singleton_core().add_extension(extension);
+    }
+
     auto rt_info = model->get_rt_info();
     std::filesystem::path directory;
     std::shared_ptr<InputsEmbedder> embedder = nullptr;
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index e057d5da72..cbdda03700 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -260,6 +260,11 @@ ov::genai::LLMPipeline::LLMPipeline(
     bool is_npu_requested = ov::genai::utils::is_npu_requested(device, user_properties);
     auto [properties, attention_backend] = utils::extract_attention_backend(user_properties, is_npu_requested);
 
+    auto extensions = utils::extract_extensions(properties);
+    for (const auto& extension : extensions) {
+        utils::singleton_core().add_extension(extension);
+    }
+
     if (is_npu_requested) {
         m_pimpl = StatefulPipeline::create(
             utils::singleton_core().read_model(model_str, weights_tensor),
diff --git a/src/cpp/src/llm/pipeline_stateful.cpp b/src/cpp/src/llm/pipeline_stateful.cpp
index 81f91d7054..52cd0bf305 100644
--- a/src/cpp/src/llm/pipeline_stateful.cpp
+++ b/src/cpp/src/llm/pipeline_stateful.cpp
@@ -78,7 +78,11 @@ StatefulLLMPipeline::StatefulLLMPipeline(
         m_max_prompt_len = kv_desc.max_prompt_len;
         m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len;
     } else {
-       compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties);
+        auto extensions = utils::extract_extensions(properties);
+        for (const auto& extension : extensions) {
+            utils::singleton_core().add_extension(extension);
+        }
+        compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties);
     }
     m_model_runner = compiled_model.create_infer_request();
     ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model");
diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
index ef81e6cccd..b2f2e41dd0 100644
--- a/src/cpp/src/utils.cpp
+++ b/src/cpp/src/utils.cpp
@@ -718,17 +718,15 @@ std::pair<ov::AnyMap, std::string> extract_attention_backend(const ov::AnyMap& e
     return {properties, attention_backend};
 };
 
-std::pair<ov::AnyMap, std::vector<std::string>> extract_extensions(const ov::AnyMap& external_properties) {
+std::vector<std::string> extract_extensions(const ov::AnyMap& properties) {
     std::vector<std::string> extensions;
-    ov::AnyMap properties = external_properties;
 
     auto it = properties.find("EXTENSIONS");
     if (it != properties.end()) {
         extensions = it->second.as<std::vector<std::string>>();
-        properties.erase(it);
     }
 
-    return {properties, extensions};
+    return extensions;
 }
 
 void release_core_plugin(const std::string& device) {
diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp
index edc4adde21..1e614d9836 100644
--- a/src/cpp/src/utils.hpp
+++ b/src/cpp/src/utils.hpp
@@ -286,7 +286,7 @@ bool explicitly_requires_paged_attention(const ov::AnyMap& properties, bool is_n
 
 std::pair<ov::AnyMap, std::string> extract_attention_backend(const ov::AnyMap& external_properties, bool is_npu_requested = false);
 
-std::pair<ov::AnyMap, std::vector<std::string>> extract_extensions(const ov::AnyMap& external_properties);
+std::vector<std::string> extract_extensions(const ov::AnyMap& properties);
 
 void save_openvino_model(const std::shared_ptr<ov::Model>& model, const std::string& save_path, bool compress_to_fp16);
 
diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
index 452c8bd3e6..89c0640e3a 100644
--- a/src/cpp/src/visual_language/pipeline.cpp
+++ b/src/cpp/src/visual_language/pipeline.cpp
@@ -80,8 +80,9 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
         } {
         m_is_npu = device.find("NPU") != std::string::npos;
 
+        auto properties_copy = properties;
         auto language_model_path = models_dir / "openvino_language_model.xml";
-        auto [properties_copy, extensions] = utils::extract_extensions(properties);
+        auto extensions = utils::extract_extensions(properties_copy);
         for (const auto& extension : extensions) {
             utils::singleton_core().add_extension(extension);
         }
@@ -161,7 +162,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
 
         auto m_language_pair = utils::get_model_weights_pair(models_map, "language");
 
-        auto [properties_copy, extensions] = utils::extract_extensions(properties);
+        auto extensions = utils::extract_extensions(properties);
         for (const auto& extension : extensions) {
             utils::singleton_core().add_extension(extension);
         }

From a92fe1f03ba92be3b8f669ed44e139a68faf635e Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Thu, 13 Nov 2025 16:52:17 +0800
Subject: [PATCH 29/38] add test case of cb and llm

---
 .../src/visual_language/embedding_model.cpp   |  8 ++++++
 src/cpp/src/visual_language/pipeline.cpp      |  2 +-
 .../python_tests/test_continuous_batching.py  | 14 ++++++++++
 tests/python_tests/test_llm_pipeline.py       | 11 ++++++++
 tests/python_tests/test_vlm_pipeline.py       | 27 ++++++++++---------
 5 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/src/cpp/src/visual_language/embedding_model.cpp b/src/cpp/src/visual_language/embedding_model.cpp
index db436dac07..fcee8a66c2 100644
--- a/src/cpp/src/visual_language/embedding_model.cpp
+++ b/src/cpp/src/visual_language/embedding_model.cpp
@@ -45,6 +45,10 @@ EmbeddingsModel::EmbeddingsModel(const std::filesystem::path& model_dir,
                                  const std::string& device,
                                  const ov::AnyMap& properties) {
     ov::Core core = utils::singleton_core();
+    auto extensions = utils::extract_extensions(properties);
+    for (const auto& extension : extensions) {
+        core.add_extension(extension);
+    }
     std::shared_ptr<ov::Model> m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties);
     // apply embedding postprocessing step by merging them into the model
     merge_postprocess(m_model, scale_emb);
@@ -60,6 +64,10 @@ EmbeddingsModel::EmbeddingsModel(const std::string& model,
                                  const std::string& device,
                                  const ov::AnyMap& properties) {
     ov::Core core = utils::singleton_core();
+    auto extensions = utils::extract_extensions(properties);
+    for (const auto& extension : extensions) {
+        core.add_extension(extension);
+    }
     std::shared_ptr<ov::Model> m_model = core.read_model(model, weights);
     // apply embedding postprocessing step by merging them into the model
     merge_postprocess(m_model, scale_emb);
diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
index 89c0640e3a..f52d4df161 100644
--- a/src/cpp/src/visual_language/pipeline.cpp
+++ b/src/cpp/src/visual_language/pipeline.cpp
@@ -168,7 +168,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
         }
 
         m_language = utils::singleton_core().compile_model(
-            m_language_pair.first, m_language_pair.second, device, properties_copy
+            m_language_pair.first, m_language_pair.second, device, properties
         ).create_infer_request();
 
         m_language.get_tensor("attention_mask").set_shape({1, 0});
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index 90ea7ac316..4b4d22c2fd 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -542,3 +542,17 @@ def test_speculative_decoding_extended_perf_metrics(pipeline_type):
             assert std_gen_duration == 0
     else:
         assert extended_perf_metrics is None
+
+
+@pytest.mark.precommit
+def test_continuous_batching_add_extension():
+    model_id = 'katuni4ka/tiny-random-phi3'
+    _, _, models_path = download_and_convert_model(model_id)
+
+    scheduler_config = SchedulerConfig()
+
+    properties = {"EXTENSIONS": ["fake_path"]}
+
+    with pytest.raises(RuntimeError) as exc_info:
+        ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", properties)
+    assert "Cannot find entry point to the extension library" in str(exc_info.value)
diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py
index 36b4688bcf..698e45bde7 100644
--- a/tests/python_tests/test_llm_pipeline.py
+++ b/tests/python_tests/test_llm_pipeline.py
@@ -832,3 +832,14 @@ def py_streamer(py_str: str):
     else:
         assert it_cnt > 0
 
+
+@pytest.mark.precommit
+def test_llm_pipeline_add_extension():
+    model_id = "katuni4ka/tiny-random-phi3"
+    _, _, models_path = download_and_convert_model(model_id)
+
+    properties = {"EXTENSIONS": ["fake_path"]}
+
+    with pytest.raises(RuntimeError) as exc_info:
+        ov_genai.LLMPipeline(models_path, "CPU", **properties)
+    assert "Cannot find entry point to the extension library" in str(exc_info.value)
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index 7656145537..62d72cdec7 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -304,12 +304,14 @@ def ov_pipe_model(request: pytest.FixtureRequest) -> VlmModelInfo:
     ids=lambda p: f"{p[0]}/{p[1]}",
     indirect=["ov_pipe_model"],
 )
-    
+
+
 @pytest.fixture(scope="module")
 def ov_continious_batching_pipe() -> ContinuousBatchingPipeline:
     models_path = _get_ov_model(MODEL_IDS[0])
     return ContinuousBatchingPipeline(models_path, SchedulerConfig(), "CPU")
-    
+
+
 @pytest.fixture(scope="module")
 def ov_continious_batching_pipe_gemma() -> ContinuousBatchingPipeline:
     models_path = _get_ov_model(MODEL_IDS[8])
@@ -400,7 +402,7 @@ def cat_tensor(cat_image) -> openvino.Tensor:
 def car_tensor(pytestconfig: pytest.Config) -> openvino.Tensor:
     return openvino.Tensor(from_cache_or_download(pytestconfig, TEST_IMAGE_URLS['car'], "car.jpg"))
 
- 
+
 @pytest.fixture(scope="module")
 def synthetic_video_32x32_tensor(synthetic_video_32x32):
     return openvino.Tensor(synthetic_video_32x32)
@@ -1370,8 +1372,8 @@ def test_model_tags_older(ov_pipe_model: VlmModelInfo, car_tensor: openvino.Tens
     with pytest.raises(RuntimeError):
         ov_pipe.generate("<ov_genai_image_0>", images=[car_tensor])
     ov_pipe.finish_chat()
-        
-        
+
+
 @pytest.mark.precommit
 @model_and_tag_parametrize()
 def test_model_tags_missing_universal(ov_pipe_model: VlmModelInfo):
@@ -1379,8 +1381,8 @@ def test_model_tags_missing_universal(ov_pipe_model: VlmModelInfo):
     
     with pytest.raises(RuntimeError):
         ov_pipe.generate("<ov_genai_image_0>")
-        
-        
+
+
 @pytest.mark.precommit
 @model_and_tag_parametrize()
 def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo):
@@ -1389,7 +1391,7 @@ def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo):
     
     with pytest.raises(RuntimeError):
         ov_pipe.generate(image_tag(0))
-            
+
 
 @pytest.mark.precommit
 @pytest.mark.parametrize(
@@ -1535,12 +1537,11 @@ def get_nanollava_processor():
 
 @pytest.mark.precommit
 def test_vlm_pipeline_add_extension():
-    model_id = MODEL_IDS[6]
+    model_id = VIDEO_MODEL_IDS[1]
     models_path = _get_ov_model(model_id)
 
-    properties = {}
-    properties["EXTENSIONS"] = ["fake_path"]
+    properties = {"EXTENSIONS": ["fake_path"]}
 
     with pytest.raises(RuntimeError) as exc_info:
-        VLMPipeline(models_path, "CPU", properties)
-    assert "Cannot find entry point to the extension library" in str(exc_info.value)
\ No newline at end of file
+        VLMPipeline(models_path, "CPU", config=properties)
+    assert "Cannot find entry point to the extension library" in str(exc_info.value)

From 75900d046043ed7d7001c50ff1d749395a2c0bf4 Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Fri, 14 Nov 2025 14:23:05 +0800
Subject: [PATCH 30/38] change extract_extensions to add_extensions_to_core

---
 src/cpp/src/continuous_batching/pipeline.cpp  | 26 +++++--------------
 src/cpp/src/llm/pipeline.cpp                  |  6 +----
 src/cpp/src/llm/pipeline_stateful.cpp         |  8 +++---
 src/cpp/src/utils.cpp                         | 14 +++++-----
 src/cpp/src/utils.hpp                         |  3 ++-
 .../src/visual_language/embedding_model.cpp   | 18 +++++--------
 src/cpp/src/visual_language/pipeline.cpp      | 14 +++-------
 .../src/visual_language/qwen2vl/classes.cpp   | 16 +++++++++---
 8 files changed, 42 insertions(+), 63 deletions(-)

diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp
index 37bb2e28c0..24f4f4f1ef 100644
--- a/src/cpp/src/continuous_batching/pipeline.cpp
+++ b/src/cpp/src/continuous_batching/pipeline.cpp
@@ -46,11 +46,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
     auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model);
     auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
 
-    auto extensions = utils::extract_extensions(properties);
-    for (const auto& extension : extensions) {
-        utils::singleton_core().add_extension(extension);
-    }
-
+    utils::add_extensions_to_core(properties_without_draft_model);
     auto model = utils::read_model(models_path, properties);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
     properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
@@ -93,11 +89,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
     auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model);
     auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
 
-    auto extensions = utils::extract_extensions(properties);
-    for (const auto& extension : extensions) {
-        utils::singleton_core().add_extension(extension);
-    }
-
+    utils::add_extensions_to_core(properties_without_draft_model);
     auto model = utils::read_model(models_path, properties_without_draft_model);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
     properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
@@ -141,12 +133,9 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
     auto properties_without_draft_model = properties;
     auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model);
     auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
-    auto model = utils::singleton_core().read_model(model_str, weights_tensor);
 
-    auto extensions = utils::extract_extensions(properties);
-    for (const auto& extension : extensions) {
-        utils::singleton_core().add_extension(extension);
-    }
+    utils::add_extensions_to_core(properties_without_draft_model);
+    auto model = utils::singleton_core().read_model(model_str, weights_tensor);
 
     auto rt_info = model->get_rt_info();
     std::shared_ptr<InputsEmbedder> embedder = nullptr;
@@ -192,12 +181,9 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
     auto draft_model_desr = utils::extract_draft_model_from_config(properties_without_draft_model);
     auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
     auto model_pair = utils::get_model_weights_pair(models_map, "language");
-    auto model = utils::singleton_core().read_model(model_pair.first, model_pair.second);
 
-    auto extensions = utils::extract_extensions(properties);
-    for (const auto& extension : extensions) {
-        utils::singleton_core().add_extension(extension);
-    }
+    utils::add_extensions_to_core(properties_without_draft_model);
+    auto model = utils::singleton_core().read_model(model_pair.first, model_pair.second);
 
     auto rt_info = model->get_rt_info();
     std::filesystem::path directory;
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index cbdda03700..6efede9c1c 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -259,11 +259,7 @@ ov::genai::LLMPipeline::LLMPipeline(
 
     bool is_npu_requested = ov::genai::utils::is_npu_requested(device, user_properties);
     auto [properties, attention_backend] = utils::extract_attention_backend(user_properties, is_npu_requested);
-
-    auto extensions = utils::extract_extensions(properties);
-    for (const auto& extension : extensions) {
-        utils::singleton_core().add_extension(extension);
-    }
+    utils::add_extensions_to_core(properties);
 
     if (is_npu_requested) {
         m_pimpl = StatefulPipeline::create(
diff --git a/src/cpp/src/llm/pipeline_stateful.cpp b/src/cpp/src/llm/pipeline_stateful.cpp
index 52cd0bf305..2bc1e4fb22 100644
--- a/src/cpp/src/llm/pipeline_stateful.cpp
+++ b/src/cpp/src/llm/pipeline_stateful.cpp
@@ -78,11 +78,9 @@ StatefulLLMPipeline::StatefulLLMPipeline(
         m_max_prompt_len = kv_desc.max_prompt_len;
         m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len;
     } else {
-        auto extensions = utils::extract_extensions(properties);
-        for (const auto& extension : extensions) {
-            utils::singleton_core().add_extension(extension);
-        }
-        compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties);
+        auto properties_without_extensions = *filtered_properties;
+        utils::add_extensions_to_core(properties_without_extensions);
+        compiled_model = utils::singleton_core().compile_model(model, device, properties_without_extensions);
     }
     m_model_runner = compiled_model.create_infer_request();
     ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model");
diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
index b2f2e41dd0..72eccd2aea 100644
--- a/src/cpp/src/utils.cpp
+++ b/src/cpp/src/utils.cpp
@@ -718,15 +718,15 @@ std::pair<ov::AnyMap, std::string> extract_attention_backend(const ov::AnyMap& e
     return {properties, attention_backend};
 };
 
-std::vector<std::string> extract_extensions(const ov::AnyMap& properties) {
-    std::vector<std::string> extensions;
-
-    auto it = properties.find("EXTENSIONS");
+void add_extensions_to_core(ov::AnyMap& properties) {
+    auto it = properties.find(EXTENSIONS_ARG_NAME);
     if (it != properties.end()) {
-        extensions = it->second.as<std::vector<std::string>>();
+        auto extensions = it->second.as<std::vector<std::string>>();
+        for (const auto& extension : extensions) {
+            singleton_core().add_extension(extension);
+        }
+        properties.erase(it);
     }
-
-    return extensions;
 }
 
 void release_core_plugin(const std::string& device) {
diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp
index 1e614d9836..64de9d22a8 100644
--- a/src/cpp/src/utils.hpp
+++ b/src/cpp/src/utils.hpp
@@ -103,6 +103,7 @@ void read_anymap_param(const ov::AnyMap& config_map, const std::string& name, T&
 const std::string STREAMER_ARG_NAME = "streamer";
 const std::string CONFIG_ARG_NAME = "generation_config";
 const std::string DRAFT_MODEL_ARG_NAME = "draft_model";
+const std::string EXTENSIONS_ARG_NAME = "EXTENSIONS";
 
 template<typename Config = ov::genai::GenerationConfig>
 Config from_config_json_if_exists(const std::filesystem::path& models_path, const char config_name[] = "generation_config.json") {
@@ -286,7 +287,7 @@ bool explicitly_requires_paged_attention(const ov::AnyMap& properties, bool is_n
 
 std::pair<ov::AnyMap, std::string> extract_attention_backend(const ov::AnyMap& external_properties, bool is_npu_requested = false);
 
-std::vector<std::string> extract_extensions(const ov::AnyMap& properties);
+void add_extensions_to_core(ov::AnyMap& properties);
 
 void save_openvino_model(const std::shared_ptr<ov::Model>& model, const std::string& save_path, bool compress_to_fp16);
 
diff --git a/src/cpp/src/visual_language/embedding_model.cpp b/src/cpp/src/visual_language/embedding_model.cpp
index fcee8a66c2..3da633c704 100644
--- a/src/cpp/src/visual_language/embedding_model.cpp
+++ b/src/cpp/src/visual_language/embedding_model.cpp
@@ -45,15 +45,13 @@ EmbeddingsModel::EmbeddingsModel(const std::filesystem::path& model_dir,
                                  const std::string& device,
                                  const ov::AnyMap& properties) {
     ov::Core core = utils::singleton_core();
-    auto extensions = utils::extract_extensions(properties);
-    for (const auto& extension : extensions) {
-        core.add_extension(extension);
-    }
-    std::shared_ptr<ov::Model> m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties);
+    auto properties_copy = properties;
+    utils::add_extensions_to_core(properties_copy);
+    std::shared_ptr<ov::Model> m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties_copy);
     // apply embedding postprocessing step by merging them into the model
     merge_postprocess(m_model, scale_emb);
 
-    ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties);
+    ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties_copy);
     ov::genai::utils::print_compiled_model_properties(compiled_model, "text embeddings model");
     m_embeddings_requests_queue = init(compiled_model);
 }
@@ -64,15 +62,13 @@ EmbeddingsModel::EmbeddingsModel(const std::string& model,
                                  const std::string& device,
                                  const ov::AnyMap& properties) {
     ov::Core core = utils::singleton_core();
-    auto extensions = utils::extract_extensions(properties);
-    for (const auto& extension : extensions) {
-        core.add_extension(extension);
-    }
+    auto properties_copy = properties;
+    utils::add_extensions_to_core(properties_copy);
     std::shared_ptr<ov::Model> m_model = core.read_model(model, weights);
     // apply embedding postprocessing step by merging them into the model
     merge_postprocess(m_model, scale_emb);
 
-    ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties);
+    ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties_copy);
     m_embeddings_requests_queue = init(compiled_model);
 }
 
diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
index f52d4df161..9fa8f94f59 100644
--- a/src/cpp/src/visual_language/pipeline.cpp
+++ b/src/cpp/src/visual_language/pipeline.cpp
@@ -82,10 +82,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
 
         auto properties_copy = properties;
         auto language_model_path = models_dir / "openvino_language_model.xml";
-        auto extensions = utils::extract_extensions(properties_copy);
-        for (const auto& extension : extensions) {
-            utils::singleton_core().add_extension(extension);
-        }
+        utils::add_extensions_to_core(properties_copy);
         auto language_model = utils::singleton_core().read_model(language_model_path, {}, properties_copy);
         auto kv_pos = ov::genai::utils::get_kv_axes_pos(language_model);
 
@@ -161,14 +158,11 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
         m_embedding = m_inputs_embedder->get_embedding_model();
 
         auto m_language_pair = utils::get_model_weights_pair(models_map, "language");
-
-        auto extensions = utils::extract_extensions(properties);
-        for (const auto& extension : extensions) {
-            utils::singleton_core().add_extension(extension);
-        }
+        auto properties_without_extensions = properties;
+        utils::add_extensions_to_core(properties_without_extensions);
 
         m_language = utils::singleton_core().compile_model(
-            m_language_pair.first, m_language_pair.second, device, properties
+            m_language_pair.first, m_language_pair.second, device, properties_without_extensions
         ).create_infer_request();
 
         m_language.get_tensor("attention_mask").set_shape({1, 0});
diff --git a/src/cpp/src/visual_language/qwen2vl/classes.cpp b/src/cpp/src/visual_language/qwen2vl/classes.cpp
index 9b200d2baf..e3010de602 100644
--- a/src/cpp/src/visual_language/qwen2vl/classes.cpp
+++ b/src/cpp/src/visual_language/qwen2vl/classes.cpp
@@ -660,8 +660,10 @@ VisionEncoderQwen2VL::VisionEncoderQwen2VL(const std::filesystem::path& model_di
     : VisionEncoder(model_dir, device, properties),
       use_ov_image_preprocess(check_image_preprocess_env()) {
     if (use_ov_image_preprocess) {
+        auto properties_without_extensions = properties;
+        utils::add_extensions_to_core(properties_without_extensions);
         auto model_org = utils::singleton_core().read_model(model_dir / "openvino_vision_embeddings_model.xml");
-        m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties);
+        m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties_without_extensions);
     }
 }
 
@@ -674,8 +676,10 @@ VisionEncoderQwen2VL::VisionEncoderQwen2VL(const ModelsMap& models_map,
     if (use_ov_image_preprocess) {
         const auto& [vision_encoder_model, vision_encoder_weights] =
             utils::get_model_weights_pair(models_map, "vision_embeddings");
+        auto properties_without_extensions = properties;
+        utils::add_extensions_to_core(properties_without_extensions);
         auto model_org = utils::singleton_core().read_model(vision_encoder_model, vision_encoder_weights);
-        m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties);
+        m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties_without_extensions);
     }
 }
 
@@ -923,10 +927,12 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL(
     const std::string& device,
     const ov::AnyMap device_config) :
     IInputsEmbedder(vlm_config, model_dir, device, device_config) {
+    auto properties_without_extensions = device_config;
+    utils::add_extensions_to_core(properties_without_extensions);
     auto model = utils::singleton_core().read_model(model_dir / "openvino_vision_embeddings_merger_model.xml");
     utils::request_vl_sdpa_transformations(model);
 
-    auto compiled_model = utils::singleton_core().compile_model(model, device, device_config);
+    auto compiled_model = utils::singleton_core().compile_model(model, device, properties_without_extensions);
 
     m_with_cu_seqlens_input = utils::check_vl_sdpa_transformations(compiled_model);
     ov::genai::utils::print_compiled_model_properties(compiled_model,
@@ -952,6 +958,8 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL(
     const std::string& device,
     const ov::AnyMap device_config) :
     IInputsEmbedder(vlm_config, models_map, tokenizer, config_dir_path, device, device_config) {
+    auto properties_without_extensions = device_config;
+    utils::add_extensions_to_core(properties_without_extensions);
     auto model = utils::singleton_core().read_model(
         utils::get_model_weights_pair(models_map, "vision_embeddings_merger").first,
         utils::get_model_weights_pair(models_map, "vision_embeddings_merger").second);
@@ -959,7 +967,7 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL(
 
     auto compiled_model = utils::singleton_core().compile_model(model,
         device,
-        device_config
+        properties_without_extensions
     );
 
     m_with_cu_seqlens_input = utils::check_vl_sdpa_transformations(compiled_model);

From 8a80895240a9197b2dd3d3adabb744e5de35a38b Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Fri, 14 Nov 2025 14:41:46 +0800
Subject: [PATCH 31/38] revert some change

---
 tests/python_tests/test_vlm_pipeline.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index 5a9f03961e..6cecac9b06 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -305,13 +305,11 @@ def ov_pipe_model(request: pytest.FixtureRequest) -> VlmModelInfo:
     indirect=["ov_pipe_model"],
 )
 
-
 @pytest.fixture(scope="module")
 def ov_continious_batching_pipe() -> ContinuousBatchingPipeline:
     models_path = _get_ov_model(MODEL_IDS[0])
     return ContinuousBatchingPipeline(models_path, SchedulerConfig(), "CPU")
 
-
 @pytest.fixture(scope="module")
 def ov_continious_batching_pipe_gemma() -> ContinuousBatchingPipeline:
     models_path = _get_ov_model(MODEL_IDS[8])
@@ -402,7 +400,7 @@ def cat_tensor(cat_image) -> openvino.Tensor:
 def car_tensor(pytestconfig: pytest.Config) -> openvino.Tensor:
     return openvino.Tensor(from_cache_or_download(pytestconfig, TEST_IMAGE_URLS['car'], "car.jpg"))
 
-
+ 
 @pytest.fixture(scope="module")
 def synthetic_video_32x32_tensor(synthetic_video_32x32):
     return openvino.Tensor(synthetic_video_32x32)
@@ -1368,7 +1366,7 @@ def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo):
     
     with pytest.raises(RuntimeError):
         ov_pipe.generate(image_tag(0))
-
+        
 
 @pytest.mark.parametrize(
     "ov_pipe_model,has_image,has_video",

From 85608a05f587de04ecf257102c887a1b999f080c Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Wed, 19 Nov 2025 09:06:42 +0800
Subject: [PATCH 32/38] rm add_extensions_to_core from pipeline_stateful

---
 src/cpp/src/continuous_batching/pipeline.cpp | 2 +-
 src/cpp/src/llm/pipeline.cpp                 | 2 ++
 src/cpp/src/llm/pipeline_stateful.cpp        | 4 +---
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp
index 24f4f4f1ef..1aa47475c7 100644
--- a/src/cpp/src/continuous_batching/pipeline.cpp
+++ b/src/cpp/src/continuous_batching/pipeline.cpp
@@ -47,7 +47,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
     auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
 
     utils::add_extensions_to_core(properties_without_draft_model);
-    auto model = utils::read_model(models_path, properties);
+    auto model = utils::read_model(models_path, properties_without_draft_model);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
     properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
     auto tokenizer = ov::genai::Tokenizer(models_path, tokenizer_properties);
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 6efede9c1c..c326c2ff93 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -180,6 +180,7 @@ ov::genai::LLMPipeline::LLMPipeline(
 
     bool is_npu_requested = ov::genai::utils::is_npu_requested(device, user_properties);
     auto [properties, attention_backend] = utils::extract_attention_backend(user_properties, is_npu_requested);
+    utils::add_extensions_to_core(properties);
 
     if (is_npu_requested) {
         m_pimpl = StatefulPipeline::create(models_path, tokenizer, device, properties);
@@ -218,6 +219,7 @@ ov::genai::LLMPipeline::LLMPipeline(
 
     bool is_npu_requested = ov::genai::utils::is_npu_requested(device, user_properties);
     auto [properties, attention_backend] = utils::extract_attention_backend(user_properties, is_npu_requested);
+    utils::add_extensions_to_core(properties);
 
     if (is_npu_requested) {
         m_pimpl = StatefulPipeline::create(models_path, device, properties);
diff --git a/src/cpp/src/llm/pipeline_stateful.cpp b/src/cpp/src/llm/pipeline_stateful.cpp
index 2bc1e4fb22..c013fd58e9 100644
--- a/src/cpp/src/llm/pipeline_stateful.cpp
+++ b/src/cpp/src/llm/pipeline_stateful.cpp
@@ -78,9 +78,7 @@ StatefulLLMPipeline::StatefulLLMPipeline(
         m_max_prompt_len = kv_desc.max_prompt_len;
         m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len;
     } else {
-        auto properties_without_extensions = *filtered_properties;
-        utils::add_extensions_to_core(properties_without_extensions);
-        compiled_model = utils::singleton_core().compile_model(model, device, properties_without_extensions);
+        compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties);
     }
     m_model_runner = compiled_model.create_infer_request();
     ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model");

From 6fd6b2606f42f8145f3884af6d6576a5081d6673 Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Wed, 19 Nov 2025 09:09:45 +0800
Subject: [PATCH 33/38] revert a small change

---
 src/cpp/src/llm/pipeline_stateful.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cpp/src/llm/pipeline_stateful.cpp b/src/cpp/src/llm/pipeline_stateful.cpp
index c013fd58e9..81f91d7054 100644
--- a/src/cpp/src/llm/pipeline_stateful.cpp
+++ b/src/cpp/src/llm/pipeline_stateful.cpp
@@ -78,7 +78,7 @@ StatefulLLMPipeline::StatefulLLMPipeline(
         m_max_prompt_len = kv_desc.max_prompt_len;
         m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len;
     } else {
-        compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties);
+       compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties);
     }
     m_model_runner = compiled_model.create_infer_request();
     ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model");

From 3208ed15a9cc848c09181f1e03f42adc476286da Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Wed, 19 Nov 2025 09:23:25 +0800
Subject: [PATCH 34/38] revert a change

---
 src/python/openvino_genai/py_openvino_genai.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 354517f823..1af39275ec 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -4465,4 +4465,4 @@ def draft_model(models_path: os.PathLike | str | bytes, device: str = '', **kwar
 def get_version() -> str:
     """
     OpenVINO GenAI version
-    """
\ No newline at end of file
+    """

From 6e10e60a49e34f90c0a498e715b071237fc8addc Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Wed, 19 Nov 2025 09:36:50 +0800
Subject: [PATCH 35/38] double quotes

---
 tests/python_tests/test_continuous_batching.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index 2d1b68013e..6aa6a9c1c0 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -534,7 +534,7 @@ def test_speculative_decoding_extended_perf_metrics(pipeline_type):
 
 @pytest.mark.precommit
 def test_continuous_batching_add_extension():
-    model_id = 'katuni4ka/tiny-random-phi3'
+    model_id = "katuni4ka/tiny-random-phi3"
     _, _, models_path = download_and_convert_model(model_id)
 
     scheduler_config = SchedulerConfig()

From 6e673c9c60445f5a172cb91dcc0328018382a016 Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Wed, 19 Nov 2025 10:19:57 +0800
Subject: [PATCH 36/38] rm add_extensions_to_core from embedding_model and
 qwen2vl due to being done external

---
 src/cpp/src/visual_language/embedding_model.cpp | 10 +++-------
 src/cpp/src/visual_language/qwen2vl/classes.cpp | 16 ++++------------
 2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/cpp/src/visual_language/embedding_model.cpp b/src/cpp/src/visual_language/embedding_model.cpp
index 3da633c704..db436dac07 100644
--- a/src/cpp/src/visual_language/embedding_model.cpp
+++ b/src/cpp/src/visual_language/embedding_model.cpp
@@ -45,13 +45,11 @@ EmbeddingsModel::EmbeddingsModel(const std::filesystem::path& model_dir,
                                  const std::string& device,
                                  const ov::AnyMap& properties) {
     ov::Core core = utils::singleton_core();
-    auto properties_copy = properties;
-    utils::add_extensions_to_core(properties_copy);
-    std::shared_ptr<ov::Model> m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties_copy);
+    std::shared_ptr<ov::Model> m_model = core.read_model(model_dir / "openvino_text_embeddings_model.xml", {}, properties);
     // apply embedding postprocessing step by merging them into the model
     merge_postprocess(m_model, scale_emb);
 
-    ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties_copy);
+    ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties);
     ov::genai::utils::print_compiled_model_properties(compiled_model, "text embeddings model");
     m_embeddings_requests_queue = init(compiled_model);
 }
@@ -62,13 +60,11 @@ EmbeddingsModel::EmbeddingsModel(const std::string& model,
                                  const std::string& device,
                                  const ov::AnyMap& properties) {
     ov::Core core = utils::singleton_core();
-    auto properties_copy = properties;
-    utils::add_extensions_to_core(properties_copy);
     std::shared_ptr<ov::Model> m_model = core.read_model(model, weights);
     // apply embedding postprocessing step by merging them into the model
     merge_postprocess(m_model, scale_emb);
 
-    ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties_copy);
+    ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties);
     m_embeddings_requests_queue = init(compiled_model);
 }
 
diff --git a/src/cpp/src/visual_language/qwen2vl/classes.cpp b/src/cpp/src/visual_language/qwen2vl/classes.cpp
index e3010de602..9b200d2baf 100644
--- a/src/cpp/src/visual_language/qwen2vl/classes.cpp
+++ b/src/cpp/src/visual_language/qwen2vl/classes.cpp
@@ -660,10 +660,8 @@ VisionEncoderQwen2VL::VisionEncoderQwen2VL(const std::filesystem::path& model_di
     : VisionEncoder(model_dir, device, properties),
       use_ov_image_preprocess(check_image_preprocess_env()) {
     if (use_ov_image_preprocess) {
-        auto properties_without_extensions = properties;
-        utils::add_extensions_to_core(properties_without_extensions);
         auto model_org = utils::singleton_core().read_model(model_dir / "openvino_vision_embeddings_model.xml");
-        m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties_without_extensions);
+        m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties);
     }
 }
 
@@ -676,10 +674,8 @@ VisionEncoderQwen2VL::VisionEncoderQwen2VL(const ModelsMap& models_map,
     if (use_ov_image_preprocess) {
         const auto& [vision_encoder_model, vision_encoder_weights] =
             utils::get_model_weights_pair(models_map, "vision_embeddings");
-        auto properties_without_extensions = properties;
-        utils::add_extensions_to_core(properties_without_extensions);
         auto model_org = utils::singleton_core().read_model(vision_encoder_model, vision_encoder_weights);
-        m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties_without_extensions);
+        m_ireq_queue_vision_encoder = create_vision_encoder_ireq(model_org, m_processor_config, device, properties);
     }
 }
 
@@ -927,12 +923,10 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL(
     const std::string& device,
     const ov::AnyMap device_config) :
     IInputsEmbedder(vlm_config, model_dir, device, device_config) {
-    auto properties_without_extensions = device_config;
-    utils::add_extensions_to_core(properties_without_extensions);
     auto model = utils::singleton_core().read_model(model_dir / "openvino_vision_embeddings_merger_model.xml");
     utils::request_vl_sdpa_transformations(model);
 
-    auto compiled_model = utils::singleton_core().compile_model(model, device, properties_without_extensions);
+    auto compiled_model = utils::singleton_core().compile_model(model, device, device_config);
 
     m_with_cu_seqlens_input = utils::check_vl_sdpa_transformations(compiled_model);
     ov::genai::utils::print_compiled_model_properties(compiled_model,
@@ -958,8 +952,6 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL(
     const std::string& device,
     const ov::AnyMap device_config) :
     IInputsEmbedder(vlm_config, models_map, tokenizer, config_dir_path, device, device_config) {
-    auto properties_without_extensions = device_config;
-    utils::add_extensions_to_core(properties_without_extensions);
     auto model = utils::singleton_core().read_model(
         utils::get_model_weights_pair(models_map, "vision_embeddings_merger").first,
         utils::get_model_weights_pair(models_map, "vision_embeddings_merger").second);
@@ -967,7 +959,7 @@ InputsEmbedderQwen2VL::InputsEmbedderQwen2VL(
 
     auto compiled_model = utils::singleton_core().compile_model(model,
         device,
-        properties_without_extensions
+        device_config
     );
 
     m_with_cu_seqlens_input = utils::check_vl_sdpa_transformations(compiled_model);

From e59ab7c4e3c3f60015eb41f40c77560dca8d94f3 Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Wed, 19 Nov 2025 10:32:49 +0800
Subject: [PATCH 37/38] move add_extensions_to_core before InputsEmbedder

---
 src/cpp/src/visual_language/pipeline.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
index 9fa8f94f59..017d29c8c3 100644
--- a/src/cpp/src/visual_language/pipeline.cpp
+++ b/src/cpp/src/visual_language/pipeline.cpp
@@ -152,15 +152,14 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
         OPENVINO_ASSERT(!m_is_npu,
             "VLMPipeline initialization from string isn't supported for NPU device");
 
-        m_inputs_embedder = std::make_shared<InputsEmbedder>(models_map, tokenizer, config_dir_path, device, properties);
+        auto properties_without_extensions = properties;
+        utils::add_extensions_to_core(properties_without_extensions);
+        m_inputs_embedder = std::make_shared<InputsEmbedder>(models_map, tokenizer, config_dir_path, device, properties_without_extensions);
 
         m_tokenizer = m_inputs_embedder->get_tokenizer();
         m_embedding = m_inputs_embedder->get_embedding_model();
 
         auto m_language_pair = utils::get_model_weights_pair(models_map, "language");
-        auto properties_without_extensions = properties;
-        utils::add_extensions_to_core(properties_without_extensions);
-
         m_language = utils::singleton_core().compile_model(
             m_language_pair.first, m_language_pair.second, device, properties_without_extensions
         ).create_infer_request();

From 54880e3d321888cc51679c8229e1ab9d08226fb0 Mon Sep 17 00:00:00 2001
From: sunxiaoxia2022 <xiaoxia.sun@intel.com>
Date: Wed, 19 Nov 2025 10:50:48 +0800
Subject: [PATCH 38/38] add a documentation comment for add_extensions_to_core

---
 src/cpp/src/utils.hpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp
index 64de9d22a8..6f3562a2bf 100644
--- a/src/cpp/src/utils.hpp
+++ b/src/cpp/src/utils.hpp
@@ -287,6 +287,11 @@ bool explicitly_requires_paged_attention(const ov::AnyMap& properties, bool is_n
 
 std::pair<ov::AnyMap, std::string> extract_attention_backend(const ov::AnyMap& external_properties, bool is_npu_requested = false);
 
+/**
+ * @brief Extracts the "EXTENSIONS" key from the provided properties map, adds each extension path to the singleton
+ * core, and removes the key from the properties map. This function is used to dynamically add custom extensions to the
+ * OpenVINO core at runtime.
+ */
 void add_extensions_to_core(ov::AnyMap& properties);
 
 void save_openvino_model(const std::shared_ptr<ov::Model>& model, const std::string& save_path, bool compress_to_fp16);