Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,5 @@ ninja # Required for xgrammar, rocm, tpu, xpu
pybase64 # fast base64 implementation
cbor2 # Required for cross-language serialization of hashable objects
setproctitle # Used to set process names for better debugging and monitoring
openai-harmony >= 0.0.3 # Required for gpt-oss
openai-harmony >= 0.0.8 # Required for gpt-oss
anthropic == 0.71.0
2 changes: 1 addition & 1 deletion requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ omegaconf==2.3.0
# lightning
open-clip-torch==2.32.0
# via -r requirements/test.in
openai-harmony==0.0.4
openai-harmony==0.0.8
# via gpt-oss
opencensus==0.11.4
# via ray
Expand Down
25 changes: 25 additions & 0 deletions tests/entrypoints/test_harmony_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from openai_harmony import Role

from vllm.entrypoints.harmony_utils import (
get_encoding,
get_streamable_parser_for_assistant,
has_custom_tools,
parse_input_to_harmony_message,
)
Expand Down Expand Up @@ -264,3 +266,26 @@ def test_has_custom_tools() -> None:
assert has_custom_tools(
{"web_search_preview", "code_interpreter", "container", "others"}
)


def test_malformed_refusal_message() -> None:
"""Test parsing a malformed refusal message sometimes generated by gpt-oss"""
output_text = (
"...\n\nAccording to policy, we must refuse.<|end|>"
"<|start|>assistant<|channel|>analysis<|message|>We must refuse.<|end|>"
"<|start|>assistant<|channel|>final<|message|>I can't help with that.<|end|>"
)
output_tokens = get_encoding().encode(output_text, allowed_special="all")
parser = get_streamable_parser_for_assistant()
for token in output_tokens:
parser.process(token)
assert len(parser.messages) == 3
assert parser.messages[0].author.role == Role.ASSISTANT
# using "in" here instead of "==" to allow for whitespace variances
assert "According to policy, we must refuse." in parser.messages[0].content[0].text
assert parser.messages[1].author.role == Role.ASSISTANT
assert parser.messages[1].channel == "analysis"
assert parser.messages[1].content[0].text == "We must refuse."
assert parser.messages[2].author.role == Role.ASSISTANT
assert parser.messages[2].channel == "final"
assert parser.messages[2].content[0].text == "I can't help with that."
2 changes: 1 addition & 1 deletion vllm/entrypoints/harmony_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ def get_stop_tokens_for_assistant_actions() -> list[int]:


def get_streamable_parser_for_assistant() -> StreamableParser:
return StreamableParser(get_encoding(), role=Role.ASSISTANT)
return StreamableParser(get_encoding(), role=Role.ASSISTANT, strict=False)


def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser:
Expand Down
Loading