fix im_end words filtering

automaticcat · jan-service-account · commit 764b41b3e536 · 2024-03-13T19:32:08.000+07:00
diff --git a/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc b/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc
@@ -50,10 +50,18 @@ bool handleMatch(const std::string& rawText, std::shared_ptr<inferenceState> inf
 {
     if (inferState->isComplete())
     {
-        return true;
+        return false;
     }
-
-    if (rawText == inferState->sequence[inferState->stopWordMatchLen])
+    if (inferState->stopWordMatchLen == 0)
+    {
+        if (rawText.find('<') != std::string::npos) // Found "<" anywhere in the text
+        {
+            inferState->stopWordMatchLen++; // Move to next state
+            inferState->prevText = rawText;
+            return true;
+        }
+    }
+    else if (rawText == inferState->sequence[inferState->stopWordMatchLen])
     {
         inferState->stopWordMatchLen++; // Move to next state
         inferState->prevText = rawText;
@@ -110,9 +118,9 @@ GenerationInput::TensorPtr tensorrtllm::getTensorSingleStopWordList(int stopToke
 
 GenerationInput::TensorPtr tensorrtllm::getTensorChatMLStopWordList()
 {
-    std::vector<int32_t> stopWordsTokens = {28789, 28766, 321, 28730, 416, 28766, 28767, 32000, 6, 8, -1, -1, -1, -1,
-        -1, -1}; // Extend with -1 for increased length
-    return gptSession->getBufferManager().copyFrom(stopWordsTokens, ITensor::makeShape({1, 2, 8}), MemoryType::kGPU);
+    std::vector<int32_t> stopWordsTokens = {28789, 28766, 321, 28730, 416, 28766, 28767, 2, 32000, 7, 8, 9, -1, -1, -1,
+        -1, -1, -1}; // Extend with -1 for increased length
+    return gptSession->getBufferManager().copyFrom(stopWordsTokens, ITensor::makeShape({1, 2, 9}), MemoryType::kGPU);
 }
 
 GenerationInput tensorrtllm::createGenerationInput(std::vector<int32_t> inputIdsHost)

Original file line number	Diff line number	Diff line change
`@@ -50,10 +50,18 @@ bool handleMatch(const std::string& rawText, std::shared_ptr<inferenceState> inf`
`50`	`50`	`{`
`51`	`51`	`if (inferState->isComplete())`
`52`	`52`	`{`
`53`		`- return true;`
	`53`	`+ return false;`
`54`	`54`	`}`
`55`		`-`
`56`		`- if (rawText == inferState->sequence[inferState->stopWordMatchLen])`
	`55`	`+ if (inferState->stopWordMatchLen == 0)`
	`56`	`+ {`
	`57`	`+ if (rawText.find('<') != std::string::npos) // Found "<" anywhere in the text`
	`58`	`+ {`
	`59`	`+ inferState->stopWordMatchLen++; // Move to next state`
	`60`	`+ inferState->prevText = rawText;`
	`61`	`+ return true;`
	`62`	`+ }`
	`63`	`+ }`
	`64`	`+ else if (rawText == inferState->sequence[inferState->stopWordMatchLen])`
`57`	`65`	`{`
`58`	`66`	`inferState->stopWordMatchLen++; // Move to next state`
`59`	`67`	`inferState->prevText = rawText;`
`@@ -110,9 +118,9 @@ GenerationInput::TensorPtr tensorrtllm::getTensorSingleStopWordList(int stopToke`
`110`	`118`
`111`	`119`	`GenerationInput::TensorPtr tensorrtllm::getTensorChatMLStopWordList()`
`112`	`120`	`{`
`113`		`- std::vector<int32_t> stopWordsTokens = {28789, 28766, 321, 28730, 416, 28766, 28767, 32000, 6, 8, -1, -1, -1, -1,`
`114`		`- -1, -1}; // Extend with -1 for increased length`
`115`		`- return gptSession->getBufferManager().copyFrom(stopWordsTokens, ITensor::makeShape({1, 2, 8}), MemoryType::kGPU);`
	`121`	`+ std::vector<int32_t> stopWordsTokens = {28789, 28766, 321, 28730, 416, 28766, 28767, 2, 32000, 7, 8, 9, -1, -1, -1,`
	`122`	`+ -1, -1, -1}; // Extend with -1 for increased length`
	`123`	`+ return gptSession->getBufferManager().copyFrom(stopWordsTokens, ITensor::makeShape({1, 2, 9}), MemoryType::kGPU);`
`116`	`124`	`}`
`117`	`125`
`118`	`126`	`GenerationInput tensorrtllm::createGenerationInput(std::vector<int32_t> inputIdsHost)`