latest

automaticcat · jan-service-account · commit 8e33246802d7 · 2024-03-13T19:32:36.000+07:00
diff --git a/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc b/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc
@@ -78,6 +78,7 @@ bool handleMatch(const std::string& rawText, std::shared_ptr<inferenceState> inf
         inferState->reset();
         return false; // Reset to start if sequence breaks
     }
+    return false;
 }
 
 // Only support single token stopping point now
@@ -202,6 +203,7 @@ void inferenceThread(std::shared_ptr<inferenceState> inferState, std::vector<int
             inferState->textsToStream.push("[DONE]");
             return;
         }
+        return;
     };
     // The rest of the logic inside the `chat_completion` remains unchanged...
     // After finishing the setup, call the inference logic
@@ -279,11 +281,12 @@ void tensorrtllm::chat_completion(
     std::thread infThread(inferenceThread, inferState, inputIdsHost, callback, this);
     infThread.detach(); // Detach the thread to allow it to run independently
 
-    auto chunked_content_provider = [inferState](char* pBuffer, std::size_t nBuffSize) -> std::size_t
+    auto chunked_content_provider = [this,inferState](char* pBuffer, std::size_t nBuffSize) -> std::size_t
     {
         if (!pBuffer)
         {
             LOG_INFO << "Connection closed or buffer is null. Reset context";
+            inferState->isFinished = true;
             return 0; // Indicate no more data to send
         }
 

Original file line number	Diff line number	Diff line change
`@@ -78,6 +78,7 @@ bool handleMatch(const std::string& rawText, std::shared_ptr<inferenceState> inf`
`78`	`78`	`inferState->reset();`
`79`	`79`	`return false; // Reset to start if sequence breaks`
`80`	`80`	`}`
	`81`	`+ return false;`
`81`	`82`	`}`
`82`	`83`
`83`	`84`	`// Only support single token stopping point now`
`@@ -202,6 +203,7 @@ void inferenceThread(std::shared_ptr<inferenceState> inferState, std::vector<int`
`202`	`203`	`inferState->textsToStream.push("[DONE]");`
`203`	`204`	`return;`
`204`	`205`	`}`
	`206`	`+ return;`
`205`	`207`	`};`
`206`	`208`	// The rest of the logic inside the `chat_completion` remains unchanged...
`207`	`209`	`// After finishing the setup, call the inference logic`
`@@ -279,11 +281,12 @@ void tensorrtllm::chat_completion(`
`279`	`281`	`std::thread infThread(inferenceThread, inferState, inputIdsHost, callback, this);`
`280`	`282`	`infThread.detach(); // Detach the thread to allow it to run independently`
`281`	`283`
`282`		`- auto chunked_content_provider = [inferState](char* pBuffer, std::size_t nBuffSize) -> std::size_t`
	`284`	`+ auto chunked_content_provider = [this,inferState](char* pBuffer, std::size_t nBuffSize) -> std::size_t`
`283`	`285`	`{`
`284`	`286`	`if (!pBuffer)`
`285`	`287`	`{`
`286`	`288`	`LOG_INFO << "Connection closed or buffer is null. Reset context";`
	`289`	`+ inferState->isFinished = true;`
`287`	`290`	`return 0; // Indicate no more data to send`
`288`	`291`	`}`
`289`	`292`