vllm-project · bbrowning · Nov 7, 2025
diff --git a/requirements/common.txt b/requirements/common.txt
@@ -47,5 +47,5 @@ ninja # Required for xgrammar, rocm, tpu, xpu
 pybase64 # fast base64 implementation
 cbor2 # Required for cross-language serialization of hashable objects
 setproctitle # Used to set process names for better debugging and monitoring
-openai-harmony >= 0.0.3  # Required for gpt-oss
+openai-harmony >= 0.0.8  # Required for gpt-oss
 anthropic == 0.71.0
diff --git a/requirements/test.txt b/requirements/test.txt
@@ -620,7 +620,7 @@ omegaconf==2.3.0
     #   lightning
 open-clip-torch==2.32.0
     # via -r requirements/test.in
-openai-harmony==0.0.4
+openai-harmony==0.0.8
     # via gpt-oss
 opencensus==0.11.4
     # via ray

@@ -4,6 +4,8 @@
 from openai_harmony import Role
 
 from vllm.entrypoints.harmony_utils import (
+    get_encoding,
+    get_streamable_parser_for_assistant,
     has_custom_tools,
     parse_input_to_harmony_message,
 )
@@ -264,3 +266,26 @@ def test_has_custom_tools() -> None:
     assert has_custom_tools(
         {"web_search_preview", "code_interpreter", "container", "others"}
     )
+
+
+def test_malformed_refusal_message() -> None:
+    """Test parsing a malformed refusal message sometimes generated by gpt-oss"""
+    output_text = (
+        "...\n\nAccording to policy, we must refuse.<|end|>"
+        "<|start|>assistant<|channel|>analysis<|message|>We must refuse.<|end|>"
+        "<|start|>assistant<|channel|>final<|message|>I can't help with that.<|end|>"
+    )
+    output_tokens = get_encoding().encode(output_text, allowed_special="all")
+    parser = get_streamable_parser_for_assistant()
+    for token in output_tokens:
+        parser.process(token)
+    assert len(parser.messages) == 3
+    assert parser.messages[0].author.role == Role.ASSISTANT
+    # using "in" here instead of "==" to allow for whitespace variances
+    assert "According to policy, we must refuse." in parser.messages[0].content[0].text
+    assert parser.messages[1].author.role == Role.ASSISTANT
+    assert parser.messages[1].channel == "analysis"
+    assert parser.messages[1].content[0].text == "We must refuse."
+    assert parser.messages[2].author.role == Role.ASSISTANT
+    assert parser.messages[2].channel == "final"
+    assert parser.messages[2].content[0].text == "I can't help with that."
@@ -503,7 +503,7 @@ def get_stop_tokens_for_assistant_actions() -> list[int]:
 
 
 def get_streamable_parser_for_assistant() -> StreamableParser:
-    return StreamableParser(get_encoding(), role=Role.ASSISTANT)
+    return StreamableParser(get_encoding(), role=Role.ASSISTANT, strict=False)
 
 
 def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser: