5151namespace ovms {
5252
5353static const std::string CHAT_TEMPLATE_WARNING_MESSAGE = " Warning: Chat template has not been loaded properly. Servable will not respond to /chat/completions endpoint." ;
54- static const std::string DEFAULT_CHAT_TEMPLATE = R"( {% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }})" ;
5554
5655void GenAiServableInitializer::loadChatTemplate (std::shared_ptr<GenAiServableProperties> properties, const std::string& chatTemplateDirectory) {
5756#if (PYTHON_DISABLE == 0)
5857 ExtraGenerationInfo extraGenInfo = readExtraGenerationInfo (properties, chatTemplateDirectory);
5958 loadPyTemplateProcessor (properties, extraGenInfo);
6059#else
61- loadDefaultTemplateProcessorIfNeeded (properties);
60+ if (properties->tokenizer .get_chat_template ().empty ()) {
61+ SPDLOG_LOGGER_DEBUG (modelmanager_logger, CHAT_TEMPLATE_WARNING_MESSAGE);
62+ }
6263#endif
64+ // In non-python build, GenAI handles chat template loading
6365}
6466
6567#if (PYTHON_DISABLE == 0)
@@ -123,29 +125,34 @@ ExtraGenerationInfo GenAiServableInitializer::readExtraGenerationInfo(std::share
123125}
124126
125127void GenAiServableInitializer::loadPyTemplateProcessor (std::shared_ptr<GenAiServableProperties> properties, const ExtraGenerationInfo& extraGenInfo) {
126- // GGUF models specific validation
127- if (extraGenInfo.isGgufModel ) {
128- bool errorFound = false ;
129- if (extraGenInfo.eosTokenFromTokenizer .empty ()) {
130- SPDLOG_ERROR (" Tokenizer eos token not found in tokenizer nor in vocabulary but required for GGUF models." );
131- errorFound = true ;
132- }
133- if (extraGenInfo.bosTokenFromTokenizer .empty ()) {
134- SPDLOG_ERROR (" Tokenizer bos token not found in tokenizer nor in vocabulary but required for GGUF models." );
135- errorFound = true ;
136- }
137- if (extraGenInfo.chatTemplateFromTokenizer .empty ()) {
138- SPDLOG_ERROR (" Tokenizer chat template not found in tokenizer but required for GGUF models." );
139- errorFound = true ;
140- }
141- if (errorFound)
142- return ;
128+ // At this point tokenizer cannot be uninitialized as we need to access its methods for prepare for chat template processing
129+ if (properties->tokenizer == ov::genai::Tokenizer ()) {
130+ SPDLOG_LOGGER_ERROR (modelmanager_logger, " Tokenizer is not initialized. Cannot load chat template processor." );
131+ return ;
132+ }
133+ std::string chatTemplate = properties->tokenizer .get_original_chat_template ();
134+ std::string bosToken = properties->tokenizer .get_bos_token ();
135+ std::string eosToken = properties->tokenizer .get_eos_token ();
136+ if (bosToken.empty ()) {
137+ SPDLOG_ERROR (" BOS token was not found in model files." );
138+ return ;
143139 }
140+ if (eosToken.empty ()) {
141+ SPDLOG_ERROR (" EOS token was not found in model files." );
142+ return ;
143+ }
144+ if (chatTemplate.empty ()) {
145+ SPDLOG_ERROR (" Chat template was not found in model files." );
146+ return ;
147+ }
148+
149+ properties->templateProcessor .bosToken = bosToken;
150+ properties->templateProcessor .eosToken = eosToken;
151+
144152 py::gil_scoped_acquire acquire;
145153 try {
146- auto locals = py::dict (" tokenizer_template" _a = extraGenInfo.chatTemplateFromTokenizer ,
147- " templates_directory" _a = extraGenInfo.chatTemplateDirectory ,
148- " is_gguf_model" _a = extraGenInfo.isGgufModel );
154+ auto locals = py::dict (" chat_template" _a = chatTemplate,
155+ " templates_directory" _a = extraGenInfo.chatTemplateDirectory );
149156 py::exec (R"(
150157 # Following the logic from:
151158 # https://github.com/huggingface/transformers/blob/25245ec26dc29bcf6102e1b4ddd0dfd02e720cf5/src/transformers/tokenization_utils_base.py#L1837
@@ -214,71 +221,44 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
214221 self._rendered_blocks = None
215222 self._generation_indices = None
216223
217-
218- # Default chat template accepts only single message and outputs only it's 'content'
219- # effectively turning it into a regular prompt.
220- default_chat_template = "{% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }}"
221-
222- bos_token = ""
223- eos_token = ""
224- chat_template = default_chat_template
224+
225+ # Optional dedicated tool chat template (might not be present)
225226 tool_chat_template = None
226227
228+ # Variables needed to be set at the end of this script execution
227229 template = None
228230 tool_template = None
229231
230- # Try to read template from template.jinja file
231- jinja_file = Path(templates_directory + "/chat_template.jinja")
232- jinja_file_legacy = Path(templates_directory + "/template.jinja")
232+ # Load Jinja2 environment
233233 template_loader = jinja2.FileSystemLoader(searchpath=templates_directory)
234234 jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True, extensions=[AssistantTracker, jinja2.ext.loopcontrols], loader=template_loader)
235235 jinja_env.policies["json.dumps_kwargs"]["ensure_ascii"] = False
236236 jinja_env.globals["raise_exception"] = raise_exception
237237 jinja_env.globals["strftime_now"] = strftime_now
238238 jinja_env.filters["from_json"] = json.loads
239- if jinja_file.is_file():
240- template = jinja_env.get_template("chat_template.jinja")
241- elif jinja_file_legacy.is_file():
242- template = jinja_env.get_template("template.jinja")
243239
244- # Try to read data from tokenizer_config.json
240+ # Try to read data from tokenizer_config.json to get additional tool chat template if present
245241 tokenizer_config_file = Path(templates_directory + "/tokenizer_config.json")
246242 if tokenizer_config_file.is_file():
247243 f = open(templates_directory + "/tokenizer_config.json", "r", encoding="utf-8")
248244 data = json.load(f)
249- bos_token = data.get("bos_token", "")
250- bos_token = "" if bos_token is None else bos_token # Null token conversion to empty string.
251- eos_token = data.get("eos_token", "")
252- eos_token = "" if eos_token is None else eos_token # Null token conversion to empty string.
253-
254- chat_template = data.get("chat_template", default_chat_template)
255- if isinstance(chat_template, list):
256- for template_entry in chat_template:
245+
246+ chat_template_from_tokenizer_config = data.get("chat_template", None)
247+ if isinstance(chat_template_from_tokenizer_config, list):
248+ for template_entry in chat_template_from_tokenizer_config:
257249 if isinstance(template_entry, dict):
258- if template_entry.get("name") == "default":
259- chat_template = template_entry.get("template")
260- elif template_entry.get("name") == "tool_use":
250+ if template_entry.get("name") == "tool_use":
261251 tool_chat_template = template_entry.get("template")
262- if template is None:
263- if is_gguf_model and (chat_template == default_chat_template):
264- # GGUF model directory might not contain files with chat template and in that case we use template read from the tokenizer
265- template = jinja_env.from_string(tokenizer_template)
266- else:
267- template = jinja_env.from_string(chat_template)
252+
253+ # Load templates from strings
254+ template = jinja_env.from_string(chat_template)
268255 if tool_chat_template is not None:
269256 tool_template = jinja_env.from_string(tool_chat_template)
270257 else:
271258 tool_template = template
272259 )" ,
273260 py::globals (), locals);
274261
275- if (extraGenInfo.isGgufModel ) {
276- properties->templateProcessor .bosToken = extraGenInfo.bosTokenFromTokenizer ;
277- properties->templateProcessor .eosToken = extraGenInfo.eosTokenFromTokenizer ;
278- } else {
279- properties->templateProcessor .bosToken = locals[" bos_token" ].cast <std::string>();
280- properties->templateProcessor .eosToken = locals[" eos_token" ].cast <std::string>();
281- }
282262 properties->templateProcessor .chatTemplate = std::make_unique<PyObjectWrapper<py::object>>(locals[" template" ]);
283263 properties->templateProcessor .toolTemplate = std::make_unique<PyObjectWrapper<py::object>>(locals[" tool_template" ]);
284264 } catch (const pybind11::error_already_set& e) {
@@ -298,15 +278,6 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
298278 SPDLOG_DEBUG (" Chat template loading failed with an unexpected error" );
299279 }
300280}
301-
302- #else
303- void GenAiServableInitializer::loadDefaultTemplateProcessorIfNeeded (std::shared_ptr<GenAiServableProperties> properties) {
304- const std::string modelChatTemplate = properties->tokenizer .get_chat_template ();
305- if (modelChatTemplate.empty ()) {
306- SPDLOG_LOGGER_DEBUG (modelmanager_logger, " Could not load model chat template. Using default template." );
307- properties->tokenizer .set_chat_template (DEFAULT_CHAT_TEMPLATE);
308- }
309- }
310281#endif
311282
312283Status parseModelsPath (std::string& outPath, std::string modelsPath, std::string graphPath) {
0 commit comments