diff --git a/requirements/common.txt b/requirements/common.txt index ce5607b7fbf2..a03d4a6411d9 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -47,5 +47,5 @@ ninja # Required for xgrammar, rocm, tpu, xpu pybase64 # fast base64 implementation cbor2 # Required for cross-language serialization of hashable objects setproctitle # Used to set process names for better debugging and monitoring -openai-harmony >= 0.0.3 # Required for gpt-oss +openai-harmony >= 0.0.8 # Required for gpt-oss anthropic == 0.71.0 diff --git a/requirements/test.txt b/requirements/test.txt index 9d13fa424115..85ae104c89ba 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -620,7 +620,7 @@ omegaconf==2.3.0 # lightning open-clip-torch==2.32.0 # via -r requirements/test.in -openai-harmony==0.0.4 +openai-harmony==0.0.8 # via gpt-oss opencensus==0.11.4 # via ray diff --git a/tests/entrypoints/test_harmony_utils.py b/tests/entrypoints/test_harmony_utils.py index 6fa051a678d6..60f9ae1d0627 100644 --- a/tests/entrypoints/test_harmony_utils.py +++ b/tests/entrypoints/test_harmony_utils.py @@ -4,6 +4,8 @@ from openai_harmony import Role from vllm.entrypoints.harmony_utils import ( + get_encoding, + get_streamable_parser_for_assistant, has_custom_tools, parse_input_to_harmony_message, ) @@ -264,3 +266,26 @@ def test_has_custom_tools() -> None: assert has_custom_tools( {"web_search_preview", "code_interpreter", "container", "others"} ) + + +def test_malformed_refusal_message() -> None: + """Test parsing a malformed refusal message sometimes generated by gpt-oss""" + output_text = ( + "...\n\nAccording to policy, we must refuse.<|end|>" + "<|start|>assistant<|channel|>analysis<|message|>We must refuse.<|end|>" + "<|start|>assistant<|channel|>final<|message|>I can't help with that.<|end|>" + ) + output_tokens = get_encoding().encode(output_text, allowed_special="all") + parser = get_streamable_parser_for_assistant() + for token in output_tokens: + parser.process(token) + assert len(parser.messages) == 3 + assert parser.messages[0].author.role == Role.ASSISTANT + # using "in" here instead of "==" to allow for whitespace variances + assert "According to policy, we must refuse." in parser.messages[0].content[0].text + assert parser.messages[1].author.role == Role.ASSISTANT + assert parser.messages[1].channel == "analysis" + assert parser.messages[1].content[0].text == "We must refuse." + assert parser.messages[2].author.role == Role.ASSISTANT + assert parser.messages[2].channel == "final" + assert parser.messages[2].content[0].text == "I can't help with that." diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py index 7958d0317739..950e97012998 100644 --- a/vllm/entrypoints/harmony_utils.py +++ b/vllm/entrypoints/harmony_utils.py @@ -503,7 +503,7 @@ def get_stop_tokens_for_assistant_actions() -> list[int]: def get_streamable_parser_for_assistant() -> StreamableParser: - return StreamableParser(get_encoding(), role=Role.ASSISTANT) + return StreamableParser(get_encoding(), role=Role.ASSISTANT, strict=False) def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser: