Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 9311ae8

Browse files
fix: better error handing (#65)
Co-authored-by: sangjanai <sang@jan.ai>
1 parent 9d83224 commit 9311ae8

File tree

1 file changed

+28
-1
lines changed

1 file changed

+28
-1
lines changed

cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.cc

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,34 @@ void TensorrtllmEngine::LoadModel(std::shared_ptr<Json::Value> json_body, std::f
471471

472472
// Init gpt_session
473473
auto model_path = model_dir / json.engineFilename(world_config, model_id_);
474-
gpt_session = std::make_unique<GptSession>(session_config_, *model_config_, world_config, model_path.string(), logger_);
474+
try {
475+
gpt_session = std::make_unique<GptSession>(session_config_, *model_config_, world_config, model_path.string(), logger_);
476+
} catch(const std::exception& e) {
477+
LOG_ERROR << "Failed to load model: " << e.what();
478+
LOG_INFO << "Retry once with smaller maxSequenceLength";
479+
gpt_session.reset();
480+
// Retry again with smaller maxSequenceLength once
481+
session_config_.maxSequenceLength /= 2;
482+
try {
483+
gpt_session = std::make_unique<GptSession>(session_config_, *model_config_, world_config, model_path.string(), logger_);
484+
} catch(const std::exception& e) {
485+
LOG_ERROR << "Failed to load model: " << e.what();
486+
gpt_session.reset();
487+
cortex_tokenizer.reset();
488+
q_.reset();
489+
model_config_.reset();
490+
logger_.reset();
491+
Json::Value json_resp;
492+
json_resp["message"] = "Failed to load model";
493+
Json::Value status;
494+
status["is_done"] = false;
495+
status["has_error"] = true;
496+
status["is_stream"] = false;
497+
status["status_code"] = k500InternalServerError;
498+
callback(std::move(status), std::move(json_resp));
499+
return;
500+
}
501+
}
475502

476503
model_loaded_ = true;
477504
if (q_ == nullptr) {

0 commit comments

Comments
 (0)