Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 8e33246

Browse files
automaticcatjan-service-account
authored andcommitted
latest
1 parent 764b41b commit 8e33246

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ bool handleMatch(const std::string& rawText, std::shared_ptr<inferenceState> inf
7878
inferState->reset();
7979
return false; // Reset to start if sequence breaks
8080
}
81+
return false;
8182
}
8283

8384
// Only support single token stopping point now
@@ -202,6 +203,7 @@ void inferenceThread(std::shared_ptr<inferenceState> inferState, std::vector<int
202203
inferState->textsToStream.push("[DONE]");
203204
return;
204205
}
206+
return;
205207
};
206208
// The rest of the logic inside the `chat_completion` remains unchanged...
207209
// After finishing the setup, call the inference logic
@@ -279,11 +281,12 @@ void tensorrtllm::chat_completion(
279281
std::thread infThread(inferenceThread, inferState, inputIdsHost, callback, this);
280282
infThread.detach(); // Detach the thread to allow it to run independently
281283

282-
auto chunked_content_provider = [inferState](char* pBuffer, std::size_t nBuffSize) -> std::size_t
284+
auto chunked_content_provider = [this,inferState](char* pBuffer, std::size_t nBuffSize) -> std::size_t
283285
{
284286
if (!pBuffer)
285287
{
286288
LOG_INFO << "Connection closed or buffer is null. Reset context";
289+
inferState->isFinished = true;
287290
return 0; // Indicate no more data to send
288291
}
289292

0 commit comments

Comments
 (0)