openvinotoolkit · yatarkan · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -1,5 +1,5 @@
-1. See [pull_request_template.md](./pull_request_template.md) for pull request (PR) requirements.
-2. See [BUILD.md](../src/docs/BUILD.md) for instructions on how to build `OpenVINO™ GenAI`.
+1. See [pull_request_template.md](/.github/pull_request_template.md) for pull request (PR) requirements.
+2. See [BUILD.md](/src/docs/BUILD.md) for instructions on how to build `OpenVINO™ GenAI`.
 3. Code style is determined by the file the change is made in. If ambiguous, look into the neighboring files of the same type. In case of contradiction, pick any of the options but stay consistent in your choice.
 4. Don't push branches directly to the upstream repository. Once a branch is pushed to upstream, non-admins lose push access to it, preventing you from updating your changes. Instead, push to your fork and open PRs from there.
 5. Your PR will be tested after one of the developers approves the tests run.

diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@
 ![Python](https://img.shields.io/badge/python-3.10+-green)
 ![OS](https://img.shields.io/badge/OS-Linux_|_Windows_|_MacOS-blue)
 
-![](src/docs/openvino_genai.svg)
+![](site/static/img/openvino-genai-workflow.svg)
 
 </div>
 

diff --git a/site/docs/guides/debug-logging.mdx b/site/docs/guides/debug-logging.mdx
@@ -76,3 +76,13 @@ Accepted token rate, %: 51
 ===============================
 Request_id: 0 ||| 40 0 40 20 0 0 40 40 0 20 20 20 0 40 0 0 20 80 0 80 20 0 0 0 40 80 0 40 60 40 80 0 0 0 0 40 20 20 0 40 20 40 0 20 0 0 0
 ```
+
+When a GGUF model is passed to the pipeline, the detailed debug info will also be printed.
+
+```sh title="Output:"
+[GGUF Reader]: Loading and unpacking model from: gguf_models/qwen2.5-0.5b-instruct-q4_0.gguf
+[GGUF Reader]: Loading and unpacking model done. Time: 196ms
+[GGUF Reader]: Start generating OpenVINO model...
+[GGUF Reader]: Save generated OpenVINO model to: gguf_models/openvino_model.xml done. Time: 466 ms
+[GGUF Reader]: Model generation done. Time: 757ms
+```
diff --git a/site/docs/supported-models/_components/speech-generation-models-table/index.tsx b/site/docs/supported-models/_components/speech-generation-models-table/index.tsx
@@ -1,9 +1,9 @@
 import React from 'react';
-import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
+import { BaseModelsTable, LinksCell } from '../base-models-table';
 import { SPEECH_GENERATION_MODELS } from './models';
 
 export default function SpeechGenerationModelsTable(): React.JSX.Element {
-  const headers = ['Architecture', 'Models', 'LoRA Support', 'Example HuggingFace Models'];
+  const headers = ['Architecture', 'Models', 'Example HuggingFace Models'];
 
   const rows = SPEECH_GENERATION_MODELS.map(({ architecture, models }) => (
     <>
@@ -12,13 +12,11 @@ export default function SpeechGenerationModelsTable(): React.JSX.Element {
           <code>{architecture}</code>
         </td>
         <td>{models[0].name}</td>
-        <StatusCell value={models[0].loraSupport} />
         <LinksCell links={models[0].links} />
       </tr>
-      {models.slice(1).map(({ name, loraSupport, links }) => (
+      {models.slice(1).map(({ name, links }) => (
         <tr key={name}>
           <td>{name}</td>
-          <StatusCell value={loraSupport} />
           <LinksCell links={links} />
         </tr>
       ))}

diff --git a/site/docs/supported-models/_components/speech-generation-models-table/models.ts b/site/docs/supported-models/_components/speech-generation-models-table/models.ts
@@ -2,7 +2,6 @@ type SpeechGenerationModelType = {
   architecture: string;
   models: Array<{
     name: string;
-    loraSupport: boolean;
     links: string[];
   }>;
 };
@@ -13,7 +12,6 @@ export const SPEECH_GENERATION_MODELS: SpeechGenerationModelType[] = [
     models: [
       {
         name: 'SpeechT5 TTS',
-        loraSupport: false,
         links: ['https://huggingface.co/microsoft/speecht5_tts'],
       },
     ],

diff --git a/site/docs/supported-models/_components/vlm-models-table/index.tsx b/site/docs/supported-models/_components/vlm-models-table/index.tsx
@@ -1,10 +1,10 @@
 import Link from '@docusaurus/Link';
 import React from 'react';
-import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
+import { BaseModelsTable, LinksCell } from '../base-models-table';
 import { VLM_MODELS } from './models';
 
 export default function VLMModelsTable(): React.JSX.Element {
-  const headers = ['Architecture', 'Models', 'LoRA Support', 'Example HuggingFace Models'];
+  const headers = ['Architecture', 'Models', 'Example HuggingFace Models'];
 
   const rows = VLM_MODELS.map(({ architecture, models }) => (
     <>
@@ -20,13 +20,11 @@ export default function VLMModelsTable(): React.JSX.Element {
             </>
           )}
         </td>
-        <StatusCell value={models[0].loraSupport} />
         <LinksCell links={models[0].links} />
       </tr>
-      {models.slice(1).map(({ name, loraSupport, links }) => (
+      {models.slice(1).map(({ name, links }) => (
         <tr key={name}>
           <td>{name}</td>
-          <StatusCell value={loraSupport} />
           <LinksCell links={links} />
         </tr>
       ))}

diff --git a/site/docs/supported-models/_components/vlm-models-table/models.ts b/site/docs/supported-models/_components/vlm-models-table/models.ts
@@ -2,7 +2,6 @@ type VLMModelType = {
   architecture: string;
   models: Array<{
     name: string;
-    loraSupport: boolean;
     links: string[];
     notesLink?: string;
   }>;
@@ -14,7 +13,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'InternVLChatModel',
-        loraSupport: false,
         links: [
           'https://huggingface.co/OpenGVLab/InternVL2-1B',
           'https://huggingface.co/OpenGVLab/InternVL2-2B',
@@ -39,7 +37,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'LLaVA-v1.5',
-        loraSupport: false,
         links: ['https://huggingface.co/llava-hf/llava-1.5-7b-hf'],
       },
     ],
@@ -49,12 +46,10 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'nanoLLaVA',
-        loraSupport: false,
         links: ['https://huggingface.co/qnguyen3/nanoLLaVA'],
       },
       {
         name: 'nanoLLaVA-1.5',
-        loraSupport: false,
         links: ['https://huggingface.co/qnguyen3/nanoLLaVA-1.5'],
       },
     ],
@@ -64,7 +59,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'LLaVA-v1.6',
-        loraSupport: false,
         links: [
           'https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf',
           'https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf',
@@ -78,7 +72,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'LLaVA-Next-Video',
-        loraSupport: false,
         links: [
           'https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf',
         ],
@@ -90,7 +83,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'MiniCPM-o-2_6',
-        loraSupport: false,
         links: ['https://huggingface.co/openbmb/MiniCPM-o-2_6'],
         notesLink: '#minicpm-o-notes',
       },
@@ -101,7 +93,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'MiniCPM-V-2_6',
-        loraSupport: false,
         links: ['https://huggingface.co/openbmb/MiniCPM-V-2_6'],
       },
     ],
@@ -111,7 +102,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'phi3_v',
-        loraSupport: false,
         links: [
           'https://huggingface.co/microsoft/Phi-3-vision-128k-instruct',
           'https://huggingface.co/microsoft/Phi-3.5-vision-instruct',
@@ -125,7 +115,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'phi4mm',
-        loraSupport: false,
         links: [
           'https://huggingface.co/microsoft/Phi-4-multimodal-instruct',
         ],
@@ -138,7 +127,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'Qwen2-VL',
-        loraSupport: false,
         links: [
           'https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct',
           'https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct',
@@ -153,7 +141,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'Qwen2.5-VL',
-        loraSupport: false,
         links: [
           'https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct',
           'https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct',
@@ -166,7 +153,6 @@ export const VLM_MODELS: VLMModelType[] = [
     models: [
       {
         name: 'gemma3',
-        loraSupport: false,
         links: [
           'https://huggingface.co/google/gemma-3-4b-it',
           'https://huggingface.co/google/gemma-3-12b-it',

diff --git a/site/docs/supported-models/_components/whisper-models-table/index.tsx b/site/docs/supported-models/_components/whisper-models-table/index.tsx
@@ -1,9 +1,9 @@
 import React from 'react';
-import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
+import { BaseModelsTable, LinksCell } from '../base-models-table';
 import { WHISPER_MODELS } from './models';
 
 export default function WhisperModelsTable(): React.JSX.Element {
-  const headers = ['Architecture', 'Models', 'LoRA Support', 'Example HuggingFace Models'];
+  const headers = ['Architecture', 'Models', 'Example HuggingFace Models'];
 
   const rows = WHISPER_MODELS.map(({ architecture, models }) => (
     <>
@@ -12,13 +12,11 @@ export default function WhisperModelsTable(): React.JSX.Element {
           <code>{architecture}</code>
         </td>
         <td>{models[0].name}</td>
-        <StatusCell value={models[0].loraSupport} />
         <LinksCell links={models[0].links} />
       </tr>
-      {models.slice(1).map(({ name, loraSupport, links }) => (
+      {models.slice(1).map(({ name, links }) => (
         <tr key={name}>
           <td>{name}</td>
-          <StatusCell value={loraSupport} />
           <LinksCell links={links} />
         </tr>
       ))}

diff --git a/site/docs/supported-models/_components/whisper-models-table/models.ts b/site/docs/supported-models/_components/whisper-models-table/models.ts
@@ -2,7 +2,6 @@ type WhisperModelType = {
   architecture: string;
   models: Array<{
     name: string;
-    loraSupport: boolean;
     links: string[];
   }>;
 };
@@ -13,7 +12,6 @@ export const WHISPER_MODELS: WhisperModelType[] = [
     models: [
       {
         name: 'Whisper',
-        loraSupport: false,
         links: [
           'https://huggingface.co/openai/whisper-tiny',
           'https://huggingface.co/openai/whisper-tiny.en',
@@ -28,7 +26,6 @@ export const WHISPER_MODELS: WhisperModelType[] = [
       },
       {
         name: 'Distil-Whisper',
-        loraSupport: false,
         links: [
           'https://huggingface.co/distil-whisper/distil-small.en',
           'https://huggingface.co/distil-whisper/distil-medium.en',

diff --git a/site/docs/supported-models/index.mdx b/site/docs/supported-models/index.mdx
@@ -9,26 +9,22 @@ import TextRerankModelsTable from './_components/text-rerank-models-table';
 
 # Supported Models
 
-:::info
-
+:::info Models Compatibility
 Other models with similar architectures may also work successfully even if not explicitly validated.
 Consider testing any unlisted models to verify compatibility with your specific use case.
-
 :::
 
 ## Large Language Models (LLMs)
 
-<LLMModelsTable />
-
-:::info
-
-LoRA adapters are supported.
-
+:::tip LoRA Support
+LLM pipeline supports LoRA adapters.
 :::
 
+<LLMModelsTable />
+
 ::::info
 
-The pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.
+The LLM pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.
 The model is required to have the following inputs after the conversion:
 
 1. `input_ids` contains the tokens.
@@ -50,6 +46,10 @@ Models should belong to the same family and have the same tokenizers.
 
 ## Visual Language Models (VLMs)
 
+:::info LoRA Support
+VLM pipeline does **not** support LoRA adapters.
+:::
+
 <VLMModelsTable />
 
 :::warning VLM Models Notes
@@ -62,7 +62,7 @@ pip install timm einops
 ```
 #### MiniCPMO {#minicpm-o-notes}
 
-1. `openbmb/MiniCPM-o-2_6` doesn't support transformers>=4.52 which is required for `optimum-cli` export.
+1. `openbmb/MiniCPM-o-2_6` doesn't support `transformers>=4.52` which is required for `optimum-cli` export.
 2. `--task image-text-to-text` is required for `optimum-cli export openvino --trust-remote-code` because `image-text-to-text` isn't `MiniCPM-o-2_6`'s native task.
 
 #### phi3_v {#phi3_v-notes}
@@ -73,42 +73,52 @@ generation_config.set_eos_token_id(pipe.get_tokenizer().get_eos_token_id())
 ```
 #### phi4mm {#phi4mm-notes}
 
-Apply https://huggingface.co/microsoft/Phi-4-multimodal-instruct/discussions/78/files to fix the model export for transformers>=4.50
+Apply https://huggingface.co/microsoft/Phi-4-multimodal-instruct/discussions/78/files to fix the model export for `transformers>=4.50`
 :::
 
 ## Speech Recognition Models (Whisper-based)
 
+:::info LoRA Support
+Speech recognition pipeline does **not** support LoRA adapters.
+:::
+
 <WhisperModelsTable />
 
 ## Speech Generation Models
 
+:::info LoRA Support
+Speech generation pipeline does **not** support LoRA adapters.
+:::
+
 <SpeechGenerationModelsTable />
 
 ## Text Embeddings Models
 
-<TextEmbeddingsModelsTable />
-
-:::info
-LoRA adapters are not supported.
+:::info LoRA Support
+Text embeddings pipeline does **not** support LoRA adapters.
 :::
 
-:::info
+<TextEmbeddingsModelsTable />
+
+:::warning Text Embeddings Models Notes
 Qwen3 Embedding models require `--task feature-extraction` during the conversion with `optimum-cli`.
 :::
 
 ## Text Rerank Models
 
-<TextRerankModelsTable />
-
-:::info
-LoRA adapters are not supported.
+:::info LoRA Support
+Text rerank pipeline does **not** support LoRA adapters.
 :::
 
-:::info
+<TextRerankModelsTable />
+
+:::warning Text Rerank Models Notes
 Text Rerank models require appropriate `--task` provided during the conversion with `optimum-cli`. Task can be found in the table above.
 :::
 
-:::info
+___
+
+:::info Hugging Face Notes
 Some models may require access request submission on the Hugging Face page to be downloaded.
 
 If https://huggingface.co/ is down, the conversion step won't be able to download the models.