From b3fd08564ada32eb9316b280cd1e2ecd66642303 Mon Sep 17 00:00:00 2001 From: mzegla Date: Wed, 22 Oct 2025 16:28:01 +0200 Subject: [PATCH 1/7] init --- prepare_llm_models.sh | 23 ++++-- src/llm/servable.cpp | 7 +- src/llm/servable_initializer.cpp | 113 +++++++++++------------------- src/llm/servable_initializer.hpp | 4 -- src/test/llm/llmtemplate_test.cpp | 33 +++++++++ windows_prepare_llm_models.bat | 11 +++ 6 files changed, 108 insertions(+), 83 deletions(-) diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index 017cfa48be..32d94a0453 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -20,7 +20,7 @@ if [ -z "$1" ]; then exit 1 fi -CB_MODEL="facebook/opt-125m" +TEXT_GENERATION_MODEL="facebook/opt-125m" TOKENIZER_FILE="openvino_tokenizer.bin" LEGACY_MODEL_FILE="1/model.bin" EMBEDDING_MODEL="thenlper/gte-small" @@ -35,7 +35,7 @@ PHI4_MODEL="microsoft/Phi-4-mini-instruct" MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3" GPT_OSS="openai/gpt-oss-20b" -MODELS=("$CB_MODEL/$TOKENIZER_FILE" "$RERANK_MODEL/rerank/$LEGACY_MODEL_FILE" "$VLM_MODEL/$TOKENIZER_FILE" "$QWEN3_MODEL/$TOKENIZER_FILE" "$LLAMA3_MODEL/$TOKENIZER_FILE" "$HERMES3_MODEL/$TOKENIZER_FILE" "$PHI4_MODEL/$TOKENIZER_FILE" "$MISTRAL_MODEL/$TOKENIZER_FILE" "$GPT_OSS/$TOKENIZER_FILE" "$EMBEDDING_MODEL/ov/$TOKENIZER_FILE" "$RERANK_MODEL/ov/$TOKENIZER_FILE") +MODELS=("$TEXT_GENERATION_MODEL/$TOKENIZER_FILE" "$EMBEDDING_MODEL/embeddings/$LEGACY_MODEL_FILE" "$RERANK_MODEL/rerank/$LEGACY_MODEL_FILE" "$VLM_MODEL/$TOKENIZER_FILE" "$QWEN3_MODEL/$TOKENIZER_FILE" "$LLAMA3_MODEL/$TOKENIZER_FILE" "$HERMES3_MODEL/$TOKENIZER_FILE" "$PHI4_MODEL/$TOKENIZER_FILE" "$MISTRAL_MODEL/$TOKENIZER_FILE" "$GPT_OSS/$TOKENIZER_FILE" "$EMBEDDING_MODEL/ov/$TOKENIZER_FILE" "$RERANK_MODEL/ov/$TOKENIZER_FILE") all_exist=true for model in "${MODELS[@]}"; do @@ -69,13 +69,22 @@ else fi mkdir -p $1 -if [ -f "$1/$CB_MODEL/$TOKENIZER_FILE" ]; then - echo "Models file $1/$CB_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." +if [ -f "$1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE" ]; then + echo "Models file $1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." else - python3 demos/common/export_models/export_model.py text_generation --source_model "$CB_MODEL" --weight-format int8 --model_repository_path $1 + python3 demos/common/export_models/export_model.py text_generation --source_model "$TEXT_GENERATION_MODEL" --weight-format int8 --model_repository_path $1 + if [ ! -f "$1/$TEXT_GENERATION_MODEL/chat_template.jinja" ]; then + dummy_chat_template="{% for message in messages %}\ +{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] }}\ +{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}\ +{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}\ +{% endif %}\ +{% endfor %}" + echo "$dummy_chat_template" > "$1/$TEXT_GENERATION_MODEL/chat_template.jinja" + fi fi -if [ ! -f "$1/$CB_MODEL/$TOKENIZER_FILE" ]; then - echo "[ERROR] Models file $1/$CB_MODEL/$TOKENIZER_FILE does not exist." +if [ ! -f "$1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE" ]; then + echo "[ERROR] Models file $1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE does not exist." exit 1 fi diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index 49ce2c59db..ccf644cddd 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -124,7 +124,12 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrapiHandler->getChatHistory(); constexpr bool add_generation_prompt = true; // confirm it should be hardcoded - inputText = getProperties()->tokenizer.apply_chat_template(chatHistory, add_generation_prompt); + try { + inputText = getProperties()->tokenizer.apply_chat_template(chatHistory, add_generation_prompt); + } catch (const std::exception& e) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what()); + return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one."); + } #endif if (inputText.size() == 0) { return absl::Status(absl::StatusCode::kInvalidArgument, "Final prompt after applying chat template is empty"); diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index 2af9f690e0..2d43e6418b 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -51,15 +51,17 @@ namespace ovms { static const std::string CHAT_TEMPLATE_WARNING_MESSAGE = "Warning: Chat template has not been loaded properly. Servable will not respond to /chat/completions endpoint."; -static const std::string DEFAULT_CHAT_TEMPLATE = R"({% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] }}{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %})"; void GenAiServableInitializer::loadChatTemplate(std::shared_ptr properties, const std::string& chatTemplateDirectory) { #if (PYTHON_DISABLE == 0) ExtraGenerationInfo extraGenInfo = readExtraGenerationInfo(properties, chatTemplateDirectory); loadPyTemplateProcessor(properties, extraGenInfo); #else - loadDefaultTemplateProcessorIfNeeded(properties); + if (properties->tokenizer.get_chat_template().empty()) { + SPDLOG_LOGGER_DEBUG(modelmanager_logger, CHAT_TEMPLATE_WARNING_MESSAGE); + } #endif + // In non-python build, GenAI handles chat template loading } #if (PYTHON_DISABLE == 0) @@ -123,29 +125,34 @@ ExtraGenerationInfo GenAiServableInitializer::readExtraGenerationInfo(std::share } void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr properties, const ExtraGenerationInfo& extraGenInfo) { - // GGUF models specific validation - if (extraGenInfo.isGgufModel) { - bool errorFound = false; - if (extraGenInfo.eosTokenFromTokenizer.empty()) { - SPDLOG_ERROR("Tokenizer eos token not found in tokenizer nor in vocabulary but required for GGUF models."); - errorFound = true; - } - if (extraGenInfo.bosTokenFromTokenizer.empty()) { - SPDLOG_ERROR("Tokenizer bos token not found in tokenizer nor in vocabulary but required for GGUF models."); - errorFound = true; - } - if (extraGenInfo.chatTemplateFromTokenizer.empty()) { - SPDLOG_ERROR("Tokenizer chat template not found in tokenizer but required for GGUF models."); - errorFound = true; - } - if (errorFound) - return; + // At this point tokenizer cannot be uninitialized as we need to access its methods for prepare for chat template processing + if (properties->tokenizer == ov::genai::Tokenizer()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Tokenizer is not initialized. Cannot load chat template processor."); + return; + } + std::string chatTemplate = properties->tokenizer.get_original_chat_template(); + std::string bosToken = properties->tokenizer.get_bos_token(); + std::string eosToken = properties->tokenizer.get_eos_token(); + if (bosToken.empty()) { + SPDLOG_ERROR("BOS token was not found in model files."); + return; } + if (eosToken.empty()) { + SPDLOG_ERROR("EOS token was not found in model files."); + return; + } + if (chatTemplate.empty()) { + SPDLOG_ERROR("Chat template was not found in model files."); + return; + } + + properties->templateProcessor.bosToken = bosToken; + properties->templateProcessor.eosToken = eosToken; + py::gil_scoped_acquire acquire; try { - auto locals = py::dict("tokenizer_template"_a = extraGenInfo.chatTemplateFromTokenizer, - "templates_directory"_a = extraGenInfo.chatTemplateDirectory, - "is_gguf_model"_a = extraGenInfo.isGgufModel); + auto locals = py::dict("chat_template"_a = chatTemplate, + "templates_directory"_a = extraGenInfo.chatTemplateDirectory); py::exec(R"( # Following the logic from: # https://github.com/huggingface/transformers/blob/25245ec26dc29bcf6102e1b4ddd0dfd02e720cf5/src/transformers/tokenization_utils_base.py#L1837 @@ -214,57 +221,37 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptrtemplateProcessor.bosToken = extraGenInfo.bosTokenFromTokenizer; - properties->templateProcessor.eosToken = extraGenInfo.eosTokenFromTokenizer; - } else { - properties->templateProcessor.bosToken = locals["bos_token"].cast(); - properties->templateProcessor.eosToken = locals["eos_token"].cast(); - } properties->templateProcessor.chatTemplate = std::make_unique>(locals["template"]); properties->templateProcessor.toolTemplate = std::make_unique>(locals["tool_template"]); } catch (const pybind11::error_already_set& e) { @@ -298,15 +278,6 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr properties) { - const std::string modelChatTemplate = properties->tokenizer.get_chat_template(); - if (modelChatTemplate.empty()) { - SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Could not load model chat template. Using default template."); - properties->tokenizer.set_chat_template(DEFAULT_CHAT_TEMPLATE); - } -} #endif Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string graphPath) { diff --git a/src/llm/servable_initializer.hpp b/src/llm/servable_initializer.hpp index 75d77ae098..d742db9c3e 100644 --- a/src/llm/servable_initializer.hpp +++ b/src/llm/servable_initializer.hpp @@ -52,10 +52,6 @@ class GenAiServableInitializer { // Use Python Jinja module for template processing static void loadPyTemplateProcessor(std::shared_ptr properties, const ExtraGenerationInfo& extraGenInfo); static ExtraGenerationInfo readExtraGenerationInfo(std::shared_ptr properties, const std::string& chatTemplateDirectory); -#else - // In C++ only version we use GenAI for template processing, but to have the same behavior as in Python-enabled version - // we use default template if model does not have its own, so that servable can also work on chat/completion endpoint. - static void loadDefaultTemplateProcessorIfNeeded(std::shared_ptr properties); #endif /* initialize method implementation MUST fill servable with all required properties i.e. pipeline, tokenizer, configs etc. based on mediapipe node options. diff --git a/src/test/llm/llmtemplate_test.cpp b/src/test/llm/llmtemplate_test.cpp index 3c49439ac7..b9ca15dddc 100644 --- a/src/test/llm/llmtemplate_test.cpp +++ b/src/test/llm/llmtemplate_test.cpp @@ -67,8 +67,30 @@ class LLMChatTemplateTest : public TestWithTempDir { } void LoadTemplateProcessor() { + // We need real model tokenizer and detokenizer as we rely on them to load chat template properly + std::string realModelPath = getGenericFullPathForSrcTest("/ovms/src/test/llm_testing/facebook/opt-125m"); + + std::string srcTokenizerPath = ovms::FileSystem::joinPath({realModelPath, "openvino_tokenizer.xml"}); + std::string dstTokenizerPath = ovms::FileSystem::joinPath({directoryPath, "openvino_tokenizer.xml"}); + std::filesystem::copy_file(srcTokenizerPath, dstTokenizerPath, std::filesystem::copy_options::overwrite_existing); + + std::string srcTokenizerBinPath = ovms::FileSystem::joinPath({realModelPath, "openvino_tokenizer.bin"}); + std::string dstTokenizerBinPath = ovms::FileSystem::joinPath({directoryPath, "openvino_tokenizer.bin"}); + std::filesystem::copy_file(srcTokenizerBinPath, dstTokenizerBinPath, std::filesystem::copy_options::overwrite_existing); + + std::string srcDetokenizerPath = ovms::FileSystem::joinPath({realModelPath, "openvino_detokenizer.xml"}); + std::string dstDetokenizerPath = ovms::FileSystem::joinPath({directoryPath, "openvino_detokenizer.xml"}); + std::filesystem::copy_file(srcDetokenizerPath, dstDetokenizerPath, std::filesystem::copy_options::overwrite_existing); + + std::string srcDetokenizerBinPath = ovms::FileSystem::joinPath({realModelPath, "openvino_detokenizer.bin"}); + std::string dstDetokenizerBinPath = ovms::FileSystem::joinPath({directoryPath, "openvino_detokenizer.bin"}); + std::filesystem::copy_file(srcDetokenizerBinPath, dstDetokenizerBinPath, std::filesystem::copy_options::overwrite_existing); + servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; + servable->getProperties()->tokenizer = ov::genai::Tokenizer(directoryPath); + std::cout << "Chat template to be used: \n" + << servable->getProperties()->tokenizer.get_original_chat_template() << std::endl; ExtraGenerationInfo extraGenInfo = GenAiServableInitializer::readExtraGenerationInfo(servable->getProperties(), directoryPath); GenAiServableInitializer::loadPyTemplateProcessor(servable->getProperties(), extraGenInfo); } @@ -90,9 +112,15 @@ class LLMChatTemplateTest : public TestWithTempDir { bool CreateJinjaConfig(std::string& fileContents) { return createConfigFileWithContent(fileContents, jinjaConfigFilePath); } + void CopyDefaultChatTemplate() { + std::string srcFilePath = getGenericFullPathForSrcTest("/ovms/src/test/llm_testing/facebook/opt-125m/chat_template.jinja"); + std::string dstFilePath = ovms::FileSystem::joinPath({directoryPath, "chat_template.jinja"}); + std::filesystem::copy_file(srcFilePath, dstFilePath, std::filesystem::copy_options::overwrite_existing); + } }; TEST_F(LLMChatTemplateTest, ChatTemplateEmptyBody) { + CopyDefaultChatTemplate(); LoadTemplateProcessor(); std::string finalPrompt = ""; std::string payloadBody = ""; @@ -102,6 +130,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateEmptyBody) { } TEST_F(LLMChatTemplateTest, ChatTemplateEmptyMessage) { + CopyDefaultChatTemplate(); LoadTemplateProcessor(); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -116,6 +145,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateEmptyMessage) { } TEST_F(LLMChatTemplateTest, ChatTemplateMessageWithEmptyObject) { + CopyDefaultChatTemplate(); LoadTemplateProcessor(); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -130,6 +160,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateMessageWithEmptyObject) { } TEST_F(LLMChatTemplateTest, ChatTemplateDefault) { + CopyDefaultChatTemplate(); LoadTemplateProcessor(); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -143,6 +174,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateDefault) { } TEST_F(LLMChatTemplateTest, ChatTemplateMultiMessage) { + CopyDefaultChatTemplate(); LoadTemplateProcessor(); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -156,6 +188,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateMultiMessage) { } TEST_F(LLMChatTemplateTest, ChatTemplateComplexMessage) { + CopyDefaultChatTemplate(); LoadTemplateProcessor(); std::string finalPrompt = ""; std::string payloadBody = R"( diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat index 7448cf74ab..b8b133c09b 100644 --- a/windows_prepare_llm_models.bat +++ b/windows_prepare_llm_models.bat @@ -79,6 +79,17 @@ if exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" ( ) else ( echo Downloading text generation model to %~1\%TEXT_GENERATION_MODEL% directory. python demos\common\export_models\export_model.py text_generation --source_model "%TEXT_GENERATION_MODEL%" --weight-format int8 --model_repository_path %~1 + + if not exist "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" ( + set "dummy_chat_template={%% for message in messages %%}^ +{%% if message['role'] == 'user' %%}{{ 'User: ' + message['content'] }}^ +{%% elif message['role'] == 'system' %%}{{ '<|system|>\n' + message['content'] + eos_token }}^ +{%% elif message['role'] == 'assistant' %%}{{ message['content'] + eos_token }}^ +{%% endif %%}^ +{%% endfor %%}" + echo !dummy_chat_template! > "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" + ) + if !errorlevel! neq 0 exit /b !errorlevel! ) if not exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" ( From b0f7b86d016a4ae75a40f5adadea1fe72be4f66d Mon Sep 17 00:00:00 2001 From: mzegla Date: Thu, 23 Oct 2025 11:15:38 +0200 Subject: [PATCH 2/7] minor fixes --- prepare_llm_models.sh | 14 ++++++++------ src/llm/servable_initializer.cpp | 10 ++++++++++ windows_prepare_llm_models.bat | 16 ++++++++-------- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index 32d94a0453..9214db9a8c 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -73,7 +73,14 @@ if [ -f "$1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE" ]; then echo "Models file $1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." else python3 demos/common/export_models/export_model.py text_generation --source_model "$TEXT_GENERATION_MODEL" --weight-format int8 --model_repository_path $1 - if [ ! -f "$1/$TEXT_GENERATION_MODEL/chat_template.jinja" ]; then +fi + +if [ ! -f "$1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE" ]; then + echo "[ERROR] Models file $1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE does not exist." + exit 1 +fi + +if [ ! -f "$1/$TEXT_GENERATION_MODEL/chat_template.jinja" ]; then dummy_chat_template="{% for message in messages %}\ {% if message['role'] == 'user' %}{{ 'User: ' + message['content'] }}\ {% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}\ @@ -81,11 +88,6 @@ else {% endif %}\ {% endfor %}" echo "$dummy_chat_template" > "$1/$TEXT_GENERATION_MODEL/chat_template.jinja" - fi -fi -if [ ! -f "$1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE" ]; then - echo "[ERROR] Models file $1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE does not exist." - exit 1 fi if [ -f "$1/$VLM_MODEL/$TOKENIZER_FILE" ]; then diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index 2d43e6418b..6adc126d5d 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -149,6 +149,9 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptrtemplateProcessor.bosToken = bosToken; properties->templateProcessor.eosToken = eosToken; + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Loading Python Jinja template processor with chat template from tokenizer. Bos token: {}, Eos token: {}, chat template: \n{}", + bosToken, eosToken, chatTemplate); + py::gil_scoped_acquire acquire; try { auto locals = py::dict("chat_template"_a = chatTemplate, @@ -250,6 +253,13 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr\n' + message['content'] + eos_token }}^ @@ -88,14 +94,8 @@ if exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" ( {%% endif %%}^ {%% endfor %%}" echo !dummy_chat_template! > "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" - ) - - if !errorlevel! neq 0 exit /b !errorlevel! + if !errorlevel! neq 0 exit /b !errorlevel! ) -if not exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE% does not exists. - exit /b 1 -) if exist "%~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE%" ( echo Models file %~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE% exists. Skipping downloading models. From d1ca964f3fb16e5c3df8bf83a3f3a3c00e647af1 Mon Sep 17 00:00:00 2001 From: mzegla Date: Mon, 27 Oct 2025 13:30:21 +0100 Subject: [PATCH 3/7] remove pre-checks in prepare_models scripts --- prepare_llm_models.sh | 17 ----------------- windows_prepare_llm_models.bat | 16 ---------------- 2 files changed, 33 deletions(-) diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index 9214db9a8c..b65bd5715d 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -35,23 +35,6 @@ PHI4_MODEL="microsoft/Phi-4-mini-instruct" MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3" GPT_OSS="openai/gpt-oss-20b" -MODELS=("$TEXT_GENERATION_MODEL/$TOKENIZER_FILE" "$EMBEDDING_MODEL/embeddings/$LEGACY_MODEL_FILE" "$RERANK_MODEL/rerank/$LEGACY_MODEL_FILE" "$VLM_MODEL/$TOKENIZER_FILE" "$QWEN3_MODEL/$TOKENIZER_FILE" "$LLAMA3_MODEL/$TOKENIZER_FILE" "$HERMES3_MODEL/$TOKENIZER_FILE" "$PHI4_MODEL/$TOKENIZER_FILE" "$MISTRAL_MODEL/$TOKENIZER_FILE" "$GPT_OSS/$TOKENIZER_FILE" "$EMBEDDING_MODEL/ov/$TOKENIZER_FILE" "$RERANK_MODEL/ov/$TOKENIZER_FILE") - -all_exist=true -for model in "${MODELS[@]}"; do - if [ ! -f "$1/$model" ]; then - echo "Model file does not exist $1/$model" - all_exist=false - break - fi - echo "Model file exist $1/$model" -done - -if $all_exist; then - echo "All model directories exist in $1. Skipping downloading models." - exit 0 -fi - if [ "$(python3 -c 'import sys; print(sys.version_info[1])')" -le "8" ]; then echo "Prepare models with python > 3.8."; exit 1 ; fi echo "Downloading LLM testing models to directory $1" diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat index 6de1e82f22..16e8989b9a 100644 --- a/windows_prepare_llm_models.bat +++ b/windows_prepare_llm_models.bat @@ -43,22 +43,6 @@ set "PHI4_MODEL=microsoft/Phi-4-mini-instruct" set "MISTRAL_MODEL=mistralai/Mistral-7B-Instruct-v0.3" set "GPTOSS_MODEL=openai/gpt-oss-20b" -set MODELS_LIST=%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE% %EMBEDDING_MODEL%\ov\%TOKENIZER_FILE% %RERANK_MODEL%\rerank\%LEGACY_MODEL_FILE% %VLM_MODEL%\%TOKENIZER_FILE% %QWEN3_MODEL%\%TOKENIZER_FILE% %LLAMA3_MODEL%\%TOKENIZER_FILE% %HERMES3_MODEL%\%TOKENIZER_FILE% %PHI4_MODEL%\%TOKENIZER_FILE% %MISTRAL_MODEL%\%TOKENIZER_FILE% %GPTOSS_MODEL%\%TOKENIZER_FILE% - -set "ALL_EXIST=1" -for %%M in (%MODELS_LIST%) do ( - if not exist "%~1\%%~M" ( - echo "%~1\%%~M" does not exist - set "ALL_EXIST=0" - ) - echo "%~1\%%~M" exists -) - -if "!ALL_EXIST!"=="1" ( - echo All required models exist in %~1. Skipping downloading models. - exit /b 0 -) - echo Downloading LLM testing models to directory %~1 set "PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly" set "PYTHONPATH=" From eed6684073dff49a29f5dc25a1e3e086ef047053 Mon Sep 17 00:00:00 2001 From: mzegla Date: Mon, 27 Oct 2025 17:02:00 +0100 Subject: [PATCH 4/7] fix win prepare models script --- prepare_llm_models.sh | 1 + windows_prepare_llm_models.bat | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index b65bd5715d..018a6567af 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -64,6 +64,7 @@ if [ ! -f "$1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE" ]; then fi if [ ! -f "$1/$TEXT_GENERATION_MODEL/chat_template.jinja" ]; then + echo "Creating dummy chat template for $TEXT_GENERATION_MODEL model." dummy_chat_template="{% for message in messages %}\ {% if message['role'] == 'user' %}{{ 'User: ' + message['content'] }}\ {% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}\ diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat index 16e8989b9a..eb3e51c02d 100644 --- a/windows_prepare_llm_models.bat +++ b/windows_prepare_llm_models.bat @@ -71,9 +71,10 @@ if not exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" ( ) if not exist "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" ( + echo Creating dummy chat template for %TEXT_GENERATION_MODEL% model. set "dummy_chat_template={%% for message in messages %%}^ {%% if message['role'] == 'user' %%}{{ 'User: ' + message['content'] }}^ -{%% elif message['role'] == 'system' %%}{{ '<|system|>\n' + message['content'] + eos_token }}^ +{%% elif message['role'] == 'system' %%}{{ '<^|system^|>\n' + message['content'] + eos_token }}^ {%% elif message['role'] == 'assistant' %%}{{ message['content'] + eos_token }}^ {%% endif %%}^ {%% endfor %%}" From c516f8f617cef9a71279f59212309bc3306e5848 Mon Sep 17 00:00:00 2001 From: mzegla Date: Wed, 29 Oct 2025 11:08:41 +0100 Subject: [PATCH 5/7] keep dummy template as a file --- prepare_llm_models.sh | 10 ++-------- src/test/llm/dummy_facebook_template.jinja | 1 + windows_prepare_llm_models.bat | 10 ++-------- 3 files changed, 5 insertions(+), 16 deletions(-) create mode 100644 src/test/llm/dummy_facebook_template.jinja diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index 018a6567af..43a9cc1df2 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -64,14 +64,8 @@ if [ ! -f "$1/$TEXT_GENERATION_MODEL/$TOKENIZER_FILE" ]; then fi if [ ! -f "$1/$TEXT_GENERATION_MODEL/chat_template.jinja" ]; then - echo "Creating dummy chat template for $TEXT_GENERATION_MODEL model." - dummy_chat_template="{% for message in messages %}\ -{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] }}\ -{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}\ -{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}\ -{% endif %}\ -{% endfor %}" - echo "$dummy_chat_template" > "$1/$TEXT_GENERATION_MODEL/chat_template.jinja" + echo "Copying dummy chat template to $TEXT_GENERATION_MODEL model directory." + cp src/test/llm/dummy_facebook_template.jinja "$1/$TEXT_GENERATION_MODEL/chat_template.jinja" fi if [ -f "$1/$VLM_MODEL/$TOKENIZER_FILE" ]; then diff --git a/src/test/llm/dummy_facebook_template.jinja b/src/test/llm/dummy_facebook_template.jinja new file mode 100644 index 0000000000..49ef320258 --- /dev/null +++ b/src/test/llm/dummy_facebook_template.jinja @@ -0,0 +1 @@ +{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] }}{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %} \ No newline at end of file diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat index eb3e51c02d..db535e24c4 100644 --- a/windows_prepare_llm_models.bat +++ b/windows_prepare_llm_models.bat @@ -71,14 +71,8 @@ if not exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" ( ) if not exist "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" ( - echo Creating dummy chat template for %TEXT_GENERATION_MODEL% model. - set "dummy_chat_template={%% for message in messages %%}^ -{%% if message['role'] == 'user' %%}{{ 'User: ' + message['content'] }}^ -{%% elif message['role'] == 'system' %%}{{ '<^|system^|>\n' + message['content'] + eos_token }}^ -{%% elif message['role'] == 'assistant' %%}{{ message['content'] + eos_token }}^ -{%% endif %%}^ -{%% endfor %%}" - echo !dummy_chat_template! > "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" + echo Copying dummy chat template to %TEXT_GENERATION_MODEL% model directory. + copy /Y "src\test\llm\dummy_facebook_template.jinja" "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" if !errorlevel! neq 0 exit /b !errorlevel! ) From dcc47b7239d8562329f266f5cdc9fc9f50a2182a Mon Sep 17 00:00:00 2001 From: mzegla Date: Wed, 29 Oct 2025 11:42:43 +0100 Subject: [PATCH 6/7] do not check dummy template --- ci/lib_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/lib_search.py b/ci/lib_search.py index d8490f6dde..da0bbba9ad 100644 --- a/ci/lib_search.py +++ b/ci/lib_search.py @@ -151,6 +151,7 @@ def check_dir(start_dir): "results.txt", "windows_bdba.bat", "windows_sign.bat", + "dummy_facebook_template.jinja", ] exclude_directories = ['/dist/', 'release_files/thirdparty-licenses', 'extras/chat_template_examples'] From ff2925fd6bd06b7d869cb1a3c5e5dd99bff15d4a Mon Sep 17 00:00:00 2001 From: mzegla Date: Wed, 29 Oct 2025 13:14:26 +0100 Subject: [PATCH 7/7] additional removals in llm template tests --- src/test/llm/llmtemplate_test.cpp | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/test/llm/llmtemplate_test.cpp b/src/test/llm/llmtemplate_test.cpp index b9ca15dddc..e77e72432d 100644 --- a/src/test/llm/llmtemplate_test.cpp +++ b/src/test/llm/llmtemplate_test.cpp @@ -67,6 +67,20 @@ class LLMChatTemplateTest : public TestWithTempDir { } void LoadTemplateProcessor() { + servable = std::make_shared(); + servable->getProperties()->modelsPath = directoryPath; + servable->getProperties()->tokenizer = ov::genai::Tokenizer(directoryPath); + std::cout << "Chat template to be used: \n" + << servable->getProperties()->tokenizer.get_original_chat_template() << std::endl; + ExtraGenerationInfo extraGenInfo = GenAiServableInitializer::readExtraGenerationInfo(servable->getProperties(), directoryPath); + GenAiServableInitializer::loadPyTemplateProcessor(servable->getProperties(), extraGenInfo); + } + + void SetUp() { + TestWithTempDir::SetUp(); + tokenizerConfigFilePath = directoryPath + "/tokenizer_config.json"; + jinjaConfigFilePath = directoryPath + "/chat_template.jinja"; + // We need real model tokenizer and detokenizer as we rely on them to load chat template properly std::string realModelPath = getGenericFullPathForSrcTest("/ovms/src/test/llm_testing/facebook/opt-125m"); @@ -85,23 +99,11 @@ class LLMChatTemplateTest : public TestWithTempDir { std::string srcDetokenizerBinPath = ovms::FileSystem::joinPath({realModelPath, "openvino_detokenizer.bin"}); std::string dstDetokenizerBinPath = ovms::FileSystem::joinPath({directoryPath, "openvino_detokenizer.bin"}); std::filesystem::copy_file(srcDetokenizerBinPath, dstDetokenizerBinPath, std::filesystem::copy_options::overwrite_existing); - - servable = std::make_shared(); - servable->getProperties()->modelsPath = directoryPath; - servable->getProperties()->tokenizer = ov::genai::Tokenizer(directoryPath); - std::cout << "Chat template to be used: \n" - << servable->getProperties()->tokenizer.get_original_chat_template() << std::endl; - ExtraGenerationInfo extraGenInfo = GenAiServableInitializer::readExtraGenerationInfo(servable->getProperties(), directoryPath); - GenAiServableInitializer::loadPyTemplateProcessor(servable->getProperties(), extraGenInfo); - } - - void SetUp() { - TestWithTempDir::SetUp(); - tokenizerConfigFilePath = directoryPath + "/tokenizer_config.json"; - jinjaConfigFilePath = directoryPath + "/chat_template.jinja"; } void TearDown() { + servable.reset(); + std::filesystem::remove_all(directoryPath); TestWithTempDir::TearDown(); }