From e736005427d3f2516a3be34473dda9fbd647b804 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 28 Oct 2025 17:03:24 +0100 Subject: [PATCH 01/17] wip --- .../paged_attention_transformations.cpp | 68 ------------------- .../paged_attention_transformations.hpp | 13 ---- .../src/continuous_batching/pipeline_impl.cpp | 3 +- .../speculative_decoding_impl.cpp | 9 ++- 4 files changed, 9 insertions(+), 84 deletions(-) delete mode 100644 src/cpp/src/continuous_batching/paged_attention_transformations.cpp diff --git a/src/cpp/src/continuous_batching/paged_attention_transformations.cpp b/src/cpp/src/continuous_batching/paged_attention_transformations.cpp deleted file mode 100644 index f175ab2cde..0000000000 --- a/src/cpp/src/continuous_batching/paged_attention_transformations.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (C) 2023-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "continuous_batching/paged_attention_transformations.hpp" - -#include "openvino/pass/manager.hpp" -#include "openvino/pass/sdpa_to_paged_attention.hpp" - -namespace ov { -namespace genai { -namespace utils { - -void apply_paged_attention_transformations(std::shared_ptr model, bool per_layer_cache_control, bool allow_cache_rotation, bool allow_xattention) { - const ov::op::util::VariableVector& variables = model->get_variables(); - OPENVINO_ASSERT(!variables.empty(), "Model is supposed to be stateful"); - - bool use_block_indices_inputs = per_layer_cache_control; - bool use_score_outputs = per_layer_cache_control; - ov::pass::SDPAToPagedAttention(use_block_indices_inputs, use_score_outputs, /* allow_score_aggregation = */ true, allow_cache_rotation, allow_xattention).run_on_model(model); - - std::map> key_cache_params, value_cache_params; - for (const auto& param_ptr : model->get_parameters()) { - const auto& name = param_ptr->get_friendly_name(); - if (name.find("key_cache.") == 0) { - key_cache_params[name] = param_ptr; - } else if (name.find("value_cache.") == 0) { - value_cache_params[name] = param_ptr; - } - } - - OPENVINO_ASSERT(key_cache_params.size() == value_cache_params.size() && key_cache_params.size() > 0); - - size_t num_decoder_layers = key_cache_params.size(); - for (size_t idx = 0; idx < num_decoder_layers; idx++) { - auto k = key_cache_params[std::string("key_cache.") + std::to_string(idx)]; - auto key_shape = k->get_partial_shape(); - size_t num_k_heads = key_shape[1].get_length(); - size_t k_head_size = key_shape[2].get_length(); - - auto v = value_cache_params[std::string("value_cache.") + std::to_string(idx)]; - auto value_shape = v->get_partial_shape(); - size_t num_v_heads = value_shape[1].get_length(); - size_t v_head_size = value_shape[2].get_length(); - - // reset information in KV cache parameters and set PagedAttention's rt_info - // allow a plugin to automatically set KV cache precisions - k->set_element_type(ov::element::dynamic); - v->set_element_type(ov::element::dynamic); - - // order of dimensions within shapes are not required for plugin during compilation - k->set_partial_shape(ov::PartialShape::dynamic(4)); - v->set_partial_shape(ov::PartialShape::dynamic(4)); - - // set KV cache parameters as rt_info for PagedAttention op, so plugins can apply - // model compile-time optimizations based on them - auto pa_op = k->get_output_target_inputs(0).begin()->get_node(); - pa_op->get_rt_info()["num_k_heads"] = num_k_heads; - pa_op->get_rt_info()["k_head_size"] = k_head_size; - pa_op->get_rt_info()["num_v_heads"] = num_v_heads; - pa_op->get_rt_info()["v_head_size"] = v_head_size; - } - - model->validate_nodes_and_infer_types(); -} - -} // namespace utils -} // namespace genai -} // namespace ov diff --git a/src/cpp/src/continuous_batching/paged_attention_transformations.hpp b/src/cpp/src/continuous_batching/paged_attention_transformations.hpp index d2b6445997..baf8b6a0ee 100644 --- a/src/cpp/src/continuous_batching/paged_attention_transformations.hpp +++ b/src/cpp/src/continuous_batching/paged_attention_transformations.hpp @@ -13,19 +13,6 @@ namespace genai { namespace utils { -/** Applies transformations to the ov::Model to enable paged attention inference. - * @param model Pointer to the ov::Model representing one of the supported LLM architectures. - * @param device_config Configuration struct for inferencing device specifics. - * @param per_layer_cache_control If true, then the transformations will enable per-layer control of KV cache blocks, allowing to specify - * different sets of KV cache blocks for different attention layers. If false, then the KV cache block structure will be identical across all - * decoder layers. - * @param allow_cache_rotation If true, then the transformations will enable additional per-layer inputs to perform re-rotation of specific - * blocks (in a RoPE fashion) before the inference step. - * @param allow_xattention If true, then the transformations will enable additional per-layer inputs to control the XAttention block-sparse - * attention optimization. - */ -void apply_paged_attention_transformations(std::shared_ptr model, bool per_layer_cache_control = false, bool allow_cache_rotation = false, bool allow_xattention = false); - void apply_gather_before_matmul_transformation(std::shared_ptr model); } // namespace utils diff --git a/src/cpp/src/continuous_batching/pipeline_impl.cpp b/src/cpp/src/continuous_batching/pipeline_impl.cpp index 1e33c80979..267faf5480 100644 --- a/src/cpp/src/continuous_batching/pipeline_impl.cpp +++ b/src/cpp/src/continuous_batching/pipeline_impl.cpp @@ -13,6 +13,7 @@ #endif #include "openvino/genai/text_streamer.hpp" +#include "openvino/pass/sdpa_to_paged_attention.hpp" #include "continuous_batching/pipeline_impl.hpp" #include "utils.hpp" #include "continuous_batching/paged_attention_transformations.hpp" @@ -75,7 +76,7 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl( bool is_need_per_layer_cache_control = scheduler_config.use_cache_eviction; bool allow_cache_rotation = scheduler_config.cache_eviction_config.apply_rotation; bool allow_xattention = scheduler_config.use_sparse_attention && scheduler_config.sparse_attention_config.mode == SparseAttentionMode::XATTENTION; - utils::apply_paged_attention_transformations(model, is_need_per_layer_cache_control, allow_cache_rotation, allow_xattention); + ov::pass::SDPAToPagedAttention(is_need_per_layer_cache_control, is_need_per_layer_cache_control, /* allow_score_aggregation = */ true, allow_cache_rotation, allow_xattention).run_on_model(model); utils::apply_gather_before_matmul_transformation(model); initialize_pipeline(model, scheduler_config, device, properties); diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp index 7f6134c10a..104f065d50 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp @@ -4,6 +4,7 @@ #include #include "openvino/genai/text_streamer.hpp" +#include "openvino/pass/sdpa_to_paged_attention.hpp" #include "speculative_decoding_impl.hpp" #include "continuous_batching/paged_attention_transformations.hpp" #include "utils.hpp" @@ -36,8 +37,12 @@ ContinuousBatchingPipeline::SpeculativeDecodingImpl::SpeculativeDecodingImpl(con auto main_scheduler_config = main_model_desc.scheduler_config; auto main_device = main_model_desc.device; - utils::apply_paged_attention_transformations(main_model, main_model_desc.scheduler_config.use_cache_eviction); - utils::apply_paged_attention_transformations(draft_model, main_model_desc.scheduler_config.use_cache_eviction); + ov::pass::SDPAToPagedAttention(main_model_desc.scheduler_config.use_cache_eviction, + main_model_desc.scheduler_config.use_cache_eviction, + /* allow_score_aggregation = */ true).run_on_model(main_model); + ov::pass::SDPAToPagedAttention(main_model_desc.scheduler_config.use_cache_eviction, + main_model_desc.scheduler_config.use_cache_eviction, + /* allow_score_aggregation = */ true).run_on_model(draft_model); utils::apply_gather_before_matmul_transformation(main_model); utils::apply_gather_before_matmul_transformation(draft_model); From 9bc56ac121320dc4df9866543dc7963775bd37a2 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 31 Oct 2025 13:51:19 +0100 Subject: [PATCH 02/17] review concerns --- src/cpp/src/continuous_batching/pipeline_impl.cpp | 3 ++- .../speculative_decoding/speculative_decoding_impl.cpp | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/cpp/src/continuous_batching/pipeline_impl.cpp b/src/cpp/src/continuous_batching/pipeline_impl.cpp index 267faf5480..5f03a66227 100644 --- a/src/cpp/src/continuous_batching/pipeline_impl.cpp +++ b/src/cpp/src/continuous_batching/pipeline_impl.cpp @@ -76,7 +76,8 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl( bool is_need_per_layer_cache_control = scheduler_config.use_cache_eviction; bool allow_cache_rotation = scheduler_config.cache_eviction_config.apply_rotation; bool allow_xattention = scheduler_config.use_sparse_attention && scheduler_config.sparse_attention_config.mode == SparseAttentionMode::XATTENTION; - ov::pass::SDPAToPagedAttention(is_need_per_layer_cache_control, is_need_per_layer_cache_control, /* allow_score_aggregation = */ true, allow_cache_rotation, allow_xattention).run_on_model(model); + bool allow_score_aggregation = true; + ov::pass::SDPAToPagedAttention(is_need_per_layer_cache_control, is_need_per_layer_cache_control, allow_score_aggregation, allow_cache_rotation, allow_xattention).run_on_model(model); utils::apply_gather_before_matmul_transformation(model); initialize_pipeline(model, scheduler_config, device, properties); diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp index 104f065d50..eeb36336fd 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp @@ -36,13 +36,17 @@ ContinuousBatchingPipeline::SpeculativeDecodingImpl::SpeculativeDecodingImpl(con auto main_scheduler_config = main_model_desc.scheduler_config; auto main_device = main_model_desc.device; + bool allow_score_aggregation = true; + bool allow_xattention = false; ov::pass::SDPAToPagedAttention(main_model_desc.scheduler_config.use_cache_eviction, main_model_desc.scheduler_config.use_cache_eviction, - /* allow_score_aggregation = */ true).run_on_model(main_model); + allow_score_aggregation, + allow_xattention).run_on_model(main_model); ov::pass::SDPAToPagedAttention(main_model_desc.scheduler_config.use_cache_eviction, main_model_desc.scheduler_config.use_cache_eviction, - /* allow_score_aggregation = */ true).run_on_model(draft_model); + allow_score_aggregation, + allow_xattention).run_on_model(draft_model); utils::apply_gather_before_matmul_transformation(main_model); utils::apply_gather_before_matmul_transformation(draft_model); From 238fc3519ffa6428cc4f876dce2b18b01d944911 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 12 Nov 2025 10:59:04 +0100 Subject: [PATCH 03/17] check CI --- .github/workflows/linux.yml | 5 ++++- .github/workflows/mac.yml | 5 ++++- .github/workflows/manylinux_2_28.yml | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 79e566b0fd..7ab70bbf53 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -93,7 +93,10 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: latest_available_commit + revision: eacbd941f39094e0114a64db882f65f7b4949e81 + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + event_name: pull_request # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 9e8aa3eae7..00bab38ef3 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -85,7 +85,10 @@ jobs: platform: macos_14_7 arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: latest_available_commit + revision: eacbd941f39094e0114a64db882f65f7b4949e81 + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + event_name: pull_request # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index e76028050d..5a841497cd 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -93,7 +93,10 @@ jobs: with: platform: almalinux8 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: latest_available_commit + revision: eacbd941f39094e0114a64db882f65f7b4949e81 + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 From 56c542608b606090ec893b75735635d426ee7acb Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 12 Nov 2025 14:26:22 +0100 Subject: [PATCH 04/17] add new OV runtime commit --- .github/workflows/coverity.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/mac.yml | 2 +- .github/workflows/manylinux_2_28.yml | 2 +- .github/workflows/windows.yml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 417da9cd49..7e4c9065ca 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -43,7 +43,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: latest_available_commit + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 7ab70bbf53..59f4d7a92d 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -93,7 +93,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: eacbd941f39094e0114a64db882f65f7b4949e81 + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master event_name: pull_request diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 00bab38ef3..6a5984d5ce 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -85,7 +85,7 @@ jobs: platform: macos_14_7 arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: eacbd941f39094e0114a64db882f65f7b4949e81 + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master event_name: pull_request diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 5a841497cd..25b3e90718 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -93,7 +93,7 @@ jobs: with: platform: almalinux8 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: eacbd941f39094e0114a64db882f65f7b4949e81 + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master event_name: pull_request diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index c374450916..6e4107bafd 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -89,10 +89,10 @@ jobs: with: platform: windows commit_packages_to_provide: wheels,openvino_node_npm_package.zip - revision: latest_available_commit + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - # event_name: pull_request + event_name: pull_request genai_build_cpack: name: genai cpack (${{ matrix.build-type }}) From 429c6c0c3fc68d5ea3551657a516ffaf5dfb9760 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 12 Nov 2025 17:10:06 +0100 Subject: [PATCH 05/17] edit coverity build --- .github/workflows/coverity.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 7e4c9065ca..07f3a8f752 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -46,7 +46,7 @@ jobs: revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - # event_name: pull_request + event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 From abbffc70cd5107059d16429582de7d03550c9dc3 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 14 Nov 2025 15:44:56 +0100 Subject: [PATCH 06/17] use latest available commit --- .github/workflows/coverity.yml | 4 ++-- .github/workflows/linux.yml | 5 +---- .github/workflows/mac.yml | 5 +---- .github/workflows/manylinux_2_28.yml | 5 +---- .github/workflows/windows.yml | 5 +---- 5 files changed, 6 insertions(+), 18 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 07f3a8f752..417da9cd49 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -43,10 +43,10 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb + revision: latest_available_commit # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - event_name: pull_request + # event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 59f4d7a92d..79e566b0fd 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -93,10 +93,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - event_name: pull_request + revision: latest_available_commit # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 6a5984d5ce..9e8aa3eae7 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -85,10 +85,7 @@ jobs: platform: macos_14_7 arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - event_name: pull_request + revision: latest_available_commit # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 25b3e90718..e76028050d 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -93,10 +93,7 @@ jobs: with: platform: almalinux8 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - event_name: pull_request + revision: latest_available_commit - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 6e4107bafd..7286d2b2ab 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -89,10 +89,7 @@ jobs: with: platform: windows commit_packages_to_provide: wheels,openvino_node_npm_package.zip - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - event_name: pull_request + revision: latest_available_commit genai_build_cpack: name: genai cpack (${{ matrix.build-type }}) From 5113a63c41ffdc55d90087fbe1f9152ca0d865c4 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 14 Nov 2025 16:02:53 +0100 Subject: [PATCH 07/17] use lac --- .github/workflows/coverity.yml | 5 +---- .github/workflows/linux.yml | 5 +---- .github/workflows/mac.yml | 5 +---- .github/workflows/windows.yml | 3 --- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 5811790ecd..a8ea1ae131 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -43,10 +43,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - # event_name: pull_request + revision: latest_available_commit - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 7ec5890f3c..6c6d893fe2 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -93,10 +93,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - # event_name: pull_request + revision: latest_available_commit - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 0eeced44bb..b863dc8911 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -85,10 +85,7 @@ jobs: platform: macos_14_7 arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - # event_name: pull_request + revision: latest_available_commit genai_build_cmake: name: Build cpack - ${{ matrix.build-type }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index b0587ec758..7eedd75291 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -90,9 +90,6 @@ jobs: platform: windows commit_packages_to_provide: wheels,openvino_node_npm_package.zip revision: latest_available_commit - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - # event_name: pull_request genai_build_cpack: name: genai cpack (${{ matrix.build-type }}) From cbe430f20c1e90c717dcb8b6c6215cd5c20f0480 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 14 Nov 2025 16:13:05 +0100 Subject: [PATCH 08/17] Apply suggestions from code review --- .github/workflows/coverity.yml | 3 +++ .github/workflows/linux.yml | 3 +++ .github/workflows/mac.yml | 3 +++ .github/workflows/windows.yml | 3 +++ 4 files changed, 12 insertions(+) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index a8ea1ae131..417da9cd49 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -44,6 +44,9 @@ jobs: platform: ubuntu22 commit_packages_to_provide: wheels revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 6c6d893fe2..c031b22b87 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -94,6 +94,9 @@ jobs: platform: ubuntu22 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index b863dc8911..020a7b133e 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -86,6 +86,9 @@ jobs: arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request genai_build_cmake: name: Build cpack - ${{ matrix.build-type }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 7eedd75291..b0587ec758 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -90,6 +90,9 @@ jobs: platform: windows commit_packages_to_provide: wheels,openvino_node_npm_package.zip revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request genai_build_cpack: name: genai cpack (${{ matrix.build-type }}) From 6d53231ce85755a6865c994bffee44dbc907a087 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 14 Nov 2025 16:16:13 +0100 Subject: [PATCH 09/17] use more latest av. commit --- .github/workflows/manylinux_2_28.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 97b9e6291d..2f8defa74b 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -93,7 +93,10 @@ jobs: with: platform: almalinux8 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 From f11a031b51163d08d1e87642ce8537a549bc7b9f Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 28 Oct 2025 17:03:24 +0100 Subject: [PATCH 10/17] wip --- .../paged_attention_transformations.cpp | 68 ------------------- .../paged_attention_transformations.hpp | 13 ---- .../src/continuous_batching/pipeline_impl.cpp | 3 +- .../speculative_decoding_impl.cpp | 9 ++- 4 files changed, 9 insertions(+), 84 deletions(-) delete mode 100644 src/cpp/src/continuous_batching/paged_attention_transformations.cpp diff --git a/src/cpp/src/continuous_batching/paged_attention_transformations.cpp b/src/cpp/src/continuous_batching/paged_attention_transformations.cpp deleted file mode 100644 index f175ab2cde..0000000000 --- a/src/cpp/src/continuous_batching/paged_attention_transformations.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (C) 2023-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "continuous_batching/paged_attention_transformations.hpp" - -#include "openvino/pass/manager.hpp" -#include "openvino/pass/sdpa_to_paged_attention.hpp" - -namespace ov { -namespace genai { -namespace utils { - -void apply_paged_attention_transformations(std::shared_ptr model, bool per_layer_cache_control, bool allow_cache_rotation, bool allow_xattention) { - const ov::op::util::VariableVector& variables = model->get_variables(); - OPENVINO_ASSERT(!variables.empty(), "Model is supposed to be stateful"); - - bool use_block_indices_inputs = per_layer_cache_control; - bool use_score_outputs = per_layer_cache_control; - ov::pass::SDPAToPagedAttention(use_block_indices_inputs, use_score_outputs, /* allow_score_aggregation = */ true, allow_cache_rotation, allow_xattention).run_on_model(model); - - std::map> key_cache_params, value_cache_params; - for (const auto& param_ptr : model->get_parameters()) { - const auto& name = param_ptr->get_friendly_name(); - if (name.find("key_cache.") == 0) { - key_cache_params[name] = param_ptr; - } else if (name.find("value_cache.") == 0) { - value_cache_params[name] = param_ptr; - } - } - - OPENVINO_ASSERT(key_cache_params.size() == value_cache_params.size() && key_cache_params.size() > 0); - - size_t num_decoder_layers = key_cache_params.size(); - for (size_t idx = 0; idx < num_decoder_layers; idx++) { - auto k = key_cache_params[std::string("key_cache.") + std::to_string(idx)]; - auto key_shape = k->get_partial_shape(); - size_t num_k_heads = key_shape[1].get_length(); - size_t k_head_size = key_shape[2].get_length(); - - auto v = value_cache_params[std::string("value_cache.") + std::to_string(idx)]; - auto value_shape = v->get_partial_shape(); - size_t num_v_heads = value_shape[1].get_length(); - size_t v_head_size = value_shape[2].get_length(); - - // reset information in KV cache parameters and set PagedAttention's rt_info - // allow a plugin to automatically set KV cache precisions - k->set_element_type(ov::element::dynamic); - v->set_element_type(ov::element::dynamic); - - // order of dimensions within shapes are not required for plugin during compilation - k->set_partial_shape(ov::PartialShape::dynamic(4)); - v->set_partial_shape(ov::PartialShape::dynamic(4)); - - // set KV cache parameters as rt_info for PagedAttention op, so plugins can apply - // model compile-time optimizations based on them - auto pa_op = k->get_output_target_inputs(0).begin()->get_node(); - pa_op->get_rt_info()["num_k_heads"] = num_k_heads; - pa_op->get_rt_info()["k_head_size"] = k_head_size; - pa_op->get_rt_info()["num_v_heads"] = num_v_heads; - pa_op->get_rt_info()["v_head_size"] = v_head_size; - } - - model->validate_nodes_and_infer_types(); -} - -} // namespace utils -} // namespace genai -} // namespace ov diff --git a/src/cpp/src/continuous_batching/paged_attention_transformations.hpp b/src/cpp/src/continuous_batching/paged_attention_transformations.hpp index d2b6445997..baf8b6a0ee 100644 --- a/src/cpp/src/continuous_batching/paged_attention_transformations.hpp +++ b/src/cpp/src/continuous_batching/paged_attention_transformations.hpp @@ -13,19 +13,6 @@ namespace genai { namespace utils { -/** Applies transformations to the ov::Model to enable paged attention inference. - * @param model Pointer to the ov::Model representing one of the supported LLM architectures. - * @param device_config Configuration struct for inferencing device specifics. - * @param per_layer_cache_control If true, then the transformations will enable per-layer control of KV cache blocks, allowing to specify - * different sets of KV cache blocks for different attention layers. If false, then the KV cache block structure will be identical across all - * decoder layers. - * @param allow_cache_rotation If true, then the transformations will enable additional per-layer inputs to perform re-rotation of specific - * blocks (in a RoPE fashion) before the inference step. - * @param allow_xattention If true, then the transformations will enable additional per-layer inputs to control the XAttention block-sparse - * attention optimization. - */ -void apply_paged_attention_transformations(std::shared_ptr model, bool per_layer_cache_control = false, bool allow_cache_rotation = false, bool allow_xattention = false); - void apply_gather_before_matmul_transformation(std::shared_ptr model); } // namespace utils diff --git a/src/cpp/src/continuous_batching/pipeline_impl.cpp b/src/cpp/src/continuous_batching/pipeline_impl.cpp index 98cf140ed3..68b6422648 100644 --- a/src/cpp/src/continuous_batching/pipeline_impl.cpp +++ b/src/cpp/src/continuous_batching/pipeline_impl.cpp @@ -13,6 +13,7 @@ #endif #include "openvino/genai/text_streamer.hpp" +#include "openvino/pass/sdpa_to_paged_attention.hpp" #include "continuous_batching/pipeline_impl.hpp" #include "utils.hpp" #include "continuous_batching/paged_attention_transformations.hpp" @@ -76,7 +77,7 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl( bool is_need_per_layer_cache_control = scheduler_config.use_cache_eviction; bool allow_cache_rotation = scheduler_config.cache_eviction_config.apply_rotation; bool allow_xattention = scheduler_config.use_sparse_attention && scheduler_config.sparse_attention_config.mode == SparseAttentionMode::XATTENTION; - utils::apply_paged_attention_transformations(model, is_need_per_layer_cache_control, allow_cache_rotation, allow_xattention); + ov::pass::SDPAToPagedAttention(is_need_per_layer_cache_control, is_need_per_layer_cache_control, /* allow_score_aggregation = */ true, allow_cache_rotation, allow_xattention).run_on_model(model); utils::apply_gather_before_matmul_transformation(model); initialize_pipeline(model, scheduler_config, device, properties); diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp index 1ca63e9de7..0c5ecfa47d 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp @@ -4,6 +4,7 @@ #include #include "openvino/genai/text_streamer.hpp" +#include "openvino/pass/sdpa_to_paged_attention.hpp" #include "speculative_decoding_impl.hpp" #include "continuous_batching/paged_attention_transformations.hpp" #include "utils.hpp" @@ -36,8 +37,12 @@ ContinuousBatchingPipeline::SpeculativeDecodingImpl::SpeculativeDecodingImpl(con auto main_scheduler_config = main_model_desc.scheduler_config; auto main_device = main_model_desc.device; - utils::apply_paged_attention_transformations(main_model, main_model_desc.scheduler_config.use_cache_eviction); - utils::apply_paged_attention_transformations(draft_model, main_model_desc.scheduler_config.use_cache_eviction); + ov::pass::SDPAToPagedAttention(main_model_desc.scheduler_config.use_cache_eviction, + main_model_desc.scheduler_config.use_cache_eviction, + /* allow_score_aggregation = */ true).run_on_model(main_model); + ov::pass::SDPAToPagedAttention(main_model_desc.scheduler_config.use_cache_eviction, + main_model_desc.scheduler_config.use_cache_eviction, + /* allow_score_aggregation = */ true).run_on_model(draft_model); utils::apply_gather_before_matmul_transformation(main_model); utils::apply_gather_before_matmul_transformation(draft_model); From 4166628f76bcc19ab6eca5d7b5e0b327725c0f9d Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 31 Oct 2025 13:51:19 +0100 Subject: [PATCH 11/17] review concerns --- src/cpp/src/continuous_batching/pipeline_impl.cpp | 3 ++- .../speculative_decoding/speculative_decoding_impl.cpp | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/cpp/src/continuous_batching/pipeline_impl.cpp b/src/cpp/src/continuous_batching/pipeline_impl.cpp index 68b6422648..441939ecef 100644 --- a/src/cpp/src/continuous_batching/pipeline_impl.cpp +++ b/src/cpp/src/continuous_batching/pipeline_impl.cpp @@ -77,7 +77,8 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl( bool is_need_per_layer_cache_control = scheduler_config.use_cache_eviction; bool allow_cache_rotation = scheduler_config.cache_eviction_config.apply_rotation; bool allow_xattention = scheduler_config.use_sparse_attention && scheduler_config.sparse_attention_config.mode == SparseAttentionMode::XATTENTION; - ov::pass::SDPAToPagedAttention(is_need_per_layer_cache_control, is_need_per_layer_cache_control, /* allow_score_aggregation = */ true, allow_cache_rotation, allow_xattention).run_on_model(model); + bool allow_score_aggregation = true; + ov::pass::SDPAToPagedAttention(is_need_per_layer_cache_control, is_need_per_layer_cache_control, allow_score_aggregation, allow_cache_rotation, allow_xattention).run_on_model(model); utils::apply_gather_before_matmul_transformation(model); initialize_pipeline(model, scheduler_config, device, properties); diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp index 0c5ecfa47d..b667053d84 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp @@ -36,13 +36,17 @@ ContinuousBatchingPipeline::SpeculativeDecodingImpl::SpeculativeDecodingImpl(con auto main_scheduler_config = main_model_desc.scheduler_config; auto main_device = main_model_desc.device; + bool allow_score_aggregation = true; + bool allow_xattention = false; ov::pass::SDPAToPagedAttention(main_model_desc.scheduler_config.use_cache_eviction, main_model_desc.scheduler_config.use_cache_eviction, - /* allow_score_aggregation = */ true).run_on_model(main_model); + allow_score_aggregation, + allow_xattention).run_on_model(main_model); ov::pass::SDPAToPagedAttention(main_model_desc.scheduler_config.use_cache_eviction, main_model_desc.scheduler_config.use_cache_eviction, - /* allow_score_aggregation = */ true).run_on_model(draft_model); + allow_score_aggregation, + allow_xattention).run_on_model(draft_model); utils::apply_gather_before_matmul_transformation(main_model); utils::apply_gather_before_matmul_transformation(draft_model); From 8c27d3286b8040a18f22b7a80c5858209722a2c5 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 12 Nov 2025 14:26:22 +0100 Subject: [PATCH 12/17] add new OV runtime commit --- .github/workflows/coverity.yml | 4 ++-- .github/workflows/linux.yml | 4 ++-- .github/workflows/mac.yml | 4 ++-- .github/workflows/manylinux_2_28.yml | 5 ++++- .github/workflows/windows.yml | 4 ++-- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 5811790ecd..07f3a8f752 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -43,10 +43,10 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - # event_name: pull_request + event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 7ec5890f3c..82cfd817bc 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -93,10 +93,10 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - # event_name: pull_request + event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 0eeced44bb..ed7a211713 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -85,10 +85,10 @@ jobs: platform: macos_14_7 arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - # event_name: pull_request + event_name: pull_request genai_build_cmake: name: Build cpack - ${{ matrix.build-type }} diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 97b9e6291d..356dcaa780 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -93,7 +93,10 @@ jobs: with: platform: almalinux8 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index a85ac74351..31dc121f2f 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -89,10 +89,10 @@ jobs: with: platform: windows commit_packages_to_provide: wheels,openvino_node_npm_package.zip - revision: fcf7c2964cf460ecfcb039f748d1f4028626d58c + revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - # event_name: pull_request + event_name: pull_request genai_build_cpack: name: genai cpack (${{ matrix.build-type }}) From 5b9070c676cfc04024ed92481699de9f4d42011d Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 14 Nov 2025 15:44:56 +0100 Subject: [PATCH 13/17] use latest available commit --- .github/workflows/coverity.yml | 4 ++-- .github/workflows/linux.yml | 4 ++-- .github/workflows/mac.yml | 4 ++-- .github/workflows/manylinux_2_28.yml | 5 +---- .github/workflows/windows.yml | 5 +---- 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 07f3a8f752..417da9cd49 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -43,10 +43,10 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb + revision: latest_available_commit # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - event_name: pull_request + # event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 82cfd817bc..c031b22b87 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -93,10 +93,10 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb + revision: latest_available_commit # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - event_name: pull_request + # event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index ed7a211713..020a7b133e 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -85,10 +85,10 @@ jobs: platform: macos_14_7 arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb + revision: latest_available_commit # Set specific revision and uncomment to use OV from its PR build: # branch_name: master - event_name: pull_request + # event_name: pull_request genai_build_cmake: name: Build cpack - ${{ matrix.build-type }} diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 356dcaa780..38fcd6d4e8 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -93,10 +93,7 @@ jobs: with: platform: almalinux8 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - event_name: pull_request + revision: latest_available_commit - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 31dc121f2f..7eedd75291 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -89,10 +89,7 @@ jobs: with: platform: windows commit_packages_to_provide: wheels,openvino_node_npm_package.zip - revision: d0a810e97cd98e6e008e89ba213a1728e3c1eddb - # Set specific revision and uncomment to use OV from its PR build: - # branch_name: master - event_name: pull_request + revision: latest_available_commit genai_build_cpack: name: genai cpack (${{ matrix.build-type }}) From 1d484c26f6a8537450acc09da9788201bbf7cff3 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 14 Nov 2025 16:02:53 +0100 Subject: [PATCH 14/17] use lac --- .github/workflows/coverity.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 417da9cd49..84897e10d1 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -44,9 +44,12 @@ jobs: platform: ubuntu22 commit_packages_to_provide: wheels revision: latest_available_commit +<<<<<<< HEAD # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request +======= +>>>>>>> 5113a63c (use lac) - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 From 95b127d55579a83d738f78c0ce3dadcbec90156f Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 14 Nov 2025 16:13:05 +0100 Subject: [PATCH 15/17] Apply suggestions from code review --- .github/workflows/coverity.yml | 3 --- .github/workflows/windows.yml | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 84897e10d1..417da9cd49 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -44,12 +44,9 @@ jobs: platform: ubuntu22 commit_packages_to_provide: wheels revision: latest_available_commit -<<<<<<< HEAD # Set specific revision and uncomment to use OV from its PR build: # branch_name: master # event_name: pull_request -======= ->>>>>>> 5113a63c (use lac) - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 7eedd75291..b0587ec758 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -90,6 +90,9 @@ jobs: platform: windows commit_packages_to_provide: wheels,openvino_node_npm_package.zip revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request genai_build_cpack: name: genai cpack (${{ matrix.build-type }}) From 5ee239f3734615abd824a7250145c3cdd5c1844e Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 14 Nov 2025 16:16:13 +0100 Subject: [PATCH 16/17] use more latest av. commit --- .github/workflows/manylinux_2_28.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml index 38fcd6d4e8..2f8defa74b 100644 --- a/.github/workflows/manylinux_2_28.yml +++ b/.github/workflows/manylinux_2_28.yml @@ -94,6 +94,9 @@ jobs: platform: almalinux8 commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request - name: Clone docker tag from OpenVINO repo uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 From 3fe8f11abcb410fa0ce6adffc17cf4a836057f06 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Thu, 27 Nov 2025 13:53:53 +0100 Subject: [PATCH 17/17] Trigger Build