Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 3ac131a

Browse files
committed
refactor: rename class variables
1 parent 9998c48 commit 3ac131a

File tree

2 files changed

+32
-33
lines changed

2 files changed

+32
-33
lines changed

cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.cc

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,12 @@ GenerationInput::TensorPtr TensorrtllmEngine::GetTensorChatMLStopWordList() {
123123

124124
GenerationInput TensorrtllmEngine::CreateGenerationInput(std::vector<int32_t> input_ids_host) {
125125
int input_len = input_ids_host.size();
126-
std::vector<int32_t> input_lengths_host(batchSize, input_len);
126+
std::vector<int32_t> input_lengths_host(batch_size_, input_len);
127127
GenerationInput::TensorPtr input_lengths
128-
= gpt_session->getBufferManager().copyFrom(input_lengths_host, ITensor::makeShape({batchSize}), MemoryType::kGPU);
128+
= gpt_session->getBufferManager().copyFrom(input_lengths_host, ITensor::makeShape({batch_size_}), MemoryType::kGPU);
129129
GenerationInput::TensorPtr input_ids = gpt_session->getBufferManager().copyFrom(
130-
input_ids_host, ITensor::makeShape({batchSize, input_len}), MemoryType::kGPU);
131-
GenerationInput generation_input{0, 0, input_ids, input_lengths, model_config->usePackedInput()};
130+
input_ids_host, ITensor::makeShape({batch_size_, input_len}), MemoryType::kGPU);
131+
GenerationInput generation_input{0, 0, input_ids, input_lengths, model_config_->usePackedInput()};
132132
generation_input.stopWordsList = GetTensorChatMLStopWordList();
133133

134134
LOG_INFO << "Create generation input successfully";
@@ -249,7 +249,7 @@ bool TensorrtllmEngine::CheckModelLoaded(std::function<void(Json::Value&&, Json:
249249

250250
void TensorrtllmEngine::HandleChatCompletion(std::shared_ptr<Json::Value> json_body, std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
251251
inferences::ChatCompletionRequest request = inferences::fromJson(json_body);
252-
std::string formatted_input = pre_prompt;
252+
std::string formatted_input = pre_prompt_;
253253
nlohmann::json data;
254254
// data["stream"] = completion.stream;
255255
// data["n_predict"] = completion.max_tokens;
@@ -261,17 +261,17 @@ void TensorrtllmEngine::HandleChatCompletion(std::shared_ptr<Json::Value> json_b
261261
std::string input_role = message["role"].asString();
262262
std::string role;
263263
if (input_role == "user") {
264-
role = user_prompt;
264+
role = user_prompt_;
265265
std::string content = message["content"].asString();
266266
formatted_input += role + content;
267267
}
268268
else if (input_role == "assistant") {
269-
role = ai_prompt;
269+
role = ai_prompt_;
270270
std::string content = message["content"].asString();
271271
formatted_input += role + content;
272272
}
273273
else if (input_role == "system") {
274-
role = system_prompt;
274+
role = system_prompt_;
275275
std::string content = message["content"].asString();
276276
formatted_input = role + content + formatted_input;
277277
}
@@ -281,7 +281,7 @@ void TensorrtllmEngine::HandleChatCompletion(std::shared_ptr<Json::Value> json_b
281281
formatted_input += role + content;
282282
}
283283
}
284-
formatted_input += ai_prompt;
284+
formatted_input += ai_prompt_;
285285
// LOG_INFO << formatted_input;
286286
// Format the input from user
287287

@@ -366,14 +366,14 @@ void TensorrtllmEngine::LoadModel(std::shared_ptr<Json::Value> json_body, std::f
366366
is_openhermes_ = IsOpenhermes(request.model_path);
367367

368368
int ctx_len = request.ctx_len;
369-
this->user_prompt = request.user_prompt.empty() ? GetUserPrompt(is_openhermes_) : request.user_prompt;
370-
this->ai_prompt = request.ai_prompt.empty() ? GetAiPrompt(is_openhermes_) : request.ai_prompt;
371-
this->system_prompt = request.system_prompt.empty() ? GetSystemPrompt(is_openhermes_) : request.system_prompt;
372-
this->model_id_ = GetModelId(*json_body);
369+
user_prompt_ = request.user_prompt.empty() ? GetUserPrompt(is_openhermes_) : request.user_prompt;
370+
ai_prompt_ = request.ai_prompt.empty() ? GetAiPrompt(is_openhermes_) : request.ai_prompt;
371+
system_prompt_ = request.system_prompt.empty() ? GetSystemPrompt(is_openhermes_) : request.system_prompt;
372+
model_id_ = GetModelId(*json_body);
373373

374-
logger = std::make_shared<TllmLogger>();
375-
logger->setLevel(nvinfer1::ILogger::Severity::kINFO);
376-
initTrtLlmPlugins(logger.get());
374+
logger_ = std::make_shared<TllmLogger>();
375+
logger_->setLevel(nvinfer1::ILogger::Severity::kINFO);
376+
initTrtLlmPlugins(logger_.get());
377377

378378
std::filesystem::path tokenizer_model_name = model_dir / "tokenizer.model";
379379
cortex_tokenizer = std::make_unique<Tokenizer>(tokenizer_model_name.string());
@@ -382,20 +382,20 @@ void TensorrtllmEngine::LoadModel(std::shared_ptr<Json::Value> json_body, std::f
382382
std::filesystem::path json_file_name = model_dir / "config.json";
383383
auto json = GptJsonConfig::parse(json_file_name);
384384
auto config = json.getModelConfig();
385-
model_config = std::make_unique<GptModelConfig>(config);
385+
model_config_ = std::make_unique<GptModelConfig>(config);
386386
auto world_config = WorldConfig::mpi(1, json.getTensorParallelism(), json.getPipelineParallelism());
387387
LOG_INFO << "Loaded config from " << json_file_name.string();
388388
// auto dtype = model_config->getDataType();
389389

390390
// Currently doing fixed session config
391-
session_config.maxBatchSize = batchSize;
392-
session_config.maxBeamWidth = 1; // Fixed for simplicity
393-
session_config.maxSequenceLength = ctx_len;
394-
session_config.cudaGraphMode = true; // Fixed for simplicity
391+
session_config_.maxBatchSize = batch_size_;
392+
session_config_.maxBeamWidth = 1; // Fixed for simplicity
393+
session_config_.maxSequenceLength = ctx_len;
394+
session_config_.cudaGraphMode = true; // Fixed for simplicity
395395

396396
// Init gpt_session
397397
auto model_path = model_dir / json.engineFilename(world_config, model_id_);
398-
gpt_session = std::make_unique<GptSession>(session_config, *model_config, world_config, model_path.string(), logger);
398+
gpt_session = std::make_unique<GptSession>(session_config_, *model_config_, world_config, model_path.string(), logger_);
399399

400400
model_loaded_ = true;
401401
if (q_ == nullptr) {
@@ -427,8 +427,8 @@ void TensorrtllmEngine::UnloadModel(std::shared_ptr<Json::Value> json_body, std:
427427
gpt_session.reset();
428428
cortex_tokenizer.reset();
429429
q_.reset();
430-
model_config.reset();
431-
logger.reset();
430+
model_config_.reset();
431+
logger_.reset();
432432
model_loaded_ = false;
433433

434434
Json::Value json_resp;

cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -139,15 +139,14 @@ class TensorrtllmEngine : public EngineI {
139139
bool CheckModelLoaded(
140140
std::function<void(Json::Value&&, Json::Value&&)>& callback);
141141

142-
GptSession::Config session_config{1, 1, 1};
143-
SamplingConfig sampling_config{1};
144-
std::unique_ptr<GptModelConfig> model_config;
145-
std::shared_ptr<TllmLogger> logger;
146-
std::string user_prompt;
147-
std::string ai_prompt;
148-
std::string system_prompt;
149-
std::string pre_prompt;
150-
int batchSize = 1;
142+
GptSession::Config session_config_{1, 1, 1};
143+
std::unique_ptr<GptModelConfig> model_config_;
144+
std::shared_ptr<TllmLogger> logger_;
145+
std::string user_prompt_;
146+
std::string ai_prompt_;
147+
std::string system_prompt_;
148+
std::string pre_prompt_;
149+
int batch_size_ = 1;
151150
std::string model_id_;
152151
uint64_t start_time_;
153152
std::atomic<bool> model_loaded_;

0 commit comments

Comments
 (0)