Skip to content

Commit acd8a7d

Browse files
authored
feat(openai): instrument openai responses prompts (#15159)
# PR Description ## Description Adds prompt tracking for OpenAI reusable prompts. **The problem:** OpenAI returns rendered prompts (with variables filled in), but prompt tracking needs templates with placeholders like `{{variable_name}}`. **The solution:** Reverse templating - reconstruct the template by replacing variable values with placeholders. **How it works:** ```python # Input from OpenAI: variables: {"question": "What is ML?"} instructions: [{role: "user", content: "Answer: What is ML?"}] # We do: 1. Build map: {"What is ML?": "{{question}}"} 2. Extract: "Answer: What is ML?" 3. Replace: "Answer: What is ML?" → "Answer: {{question}}" # Output: chat_template: [{role: "user", content: "Answer: {{question}}"}] ``` **Why longest values first?** Overlapping values need careful handling: ```python # Problem: overlapping values variables = {"short": "AI", "long": "AI is cool"} text = "AI is cool" # Wrong order breaks: text.replace("AI", "{{short}}") # -> "{{short}} is cool" # Now can't find "AI is cool" anymore! # Solution: sort by length (longest first), then replace sorted_values = ["AI is cool", "AI"] # Longest first for value in sorted_values: text = text.replace(value, placeholder) # Result: "{{long}}" ``` The implementation uses a simple `.replace()` loop with longest-first sorting. Benchmarks show this is faster than regex for typical prompts with <50 variables. ## Testing - Added `test_response_with_prompt_tracking()` verifying prompt metadata, chat_template extraction, and placeholder replacement. - Added comprehensive unit tests for `_extract_chat_template_from_instructions()` covering edge cases (overlaps, special chars, large patterns, etc.) - Tested on a personal sandbox with real templates. They can be found on staging here: [link](https://dd.datad0g.com/llm/applications?query=%40ml_app%3Allmobs-sandbox&compareLens=inputs&fromUser=false&start=1762765198999&end=1762766040247&paused=true#promptTemplates) ## Risks Making this perfect is likely impossible since we're reverse-engineering the template from rendered output. The approach works well for typical real-world usage where: - Variable values are reasonably unique - Users follow sensible naming patterns - Variables don't create ambiguous overlaps For instance, when two variables have the same value, only one placeholder will be used: ``` variables = {"var1": "hello", "var2": "hello"} text = "Say hello" # Result: "Say {{var2}}" or "Say {{var1}}" ``` ## Additional Notes OpenAI doesn't expose templates via API, so we reconstruct them. If they add template retrieval later or backend supports template-less prompts, we can remove this logic.
1 parent 08d3021 commit acd8a7d

File tree

5 files changed

+394
-0
lines changed

5 files changed

+394
-0
lines changed

ddtrace/llmobs/_integrations/utils.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
1717
from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
1818
from ddtrace.llmobs._constants import INPUT_MESSAGES
19+
from ddtrace.llmobs._constants import INPUT_PROMPT
1920
from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
2021
from ddtrace.llmobs._constants import INPUT_VALUE
2122
from ddtrace.llmobs._constants import METADATA
@@ -26,6 +27,7 @@
2627
from ddtrace.llmobs._constants import TOOL_DEFINITIONS
2728
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
2829
from ddtrace.llmobs._utils import _get_attr
30+
from ddtrace.llmobs._utils import _validate_prompt
2931
from ddtrace.llmobs._utils import load_data_value
3032
from ddtrace.llmobs._utils import safe_json
3133
from ddtrace.llmobs._utils import safe_load_json
@@ -738,9 +740,78 @@ def openai_get_metadata_from_response(
738740
return metadata
739741

740742

743+
def _extract_chat_template_from_instructions(
744+
instructions: List[Any], variables: Dict[str, Any]
745+
) -> List[Dict[str, str]]:
746+
"""
747+
Extract a chat template from OpenAI response instructions by replacing variable values with placeholders.
748+
749+
Args:
750+
instructions: List of instruction messages from the OpenAI response
751+
variables: Dictionary of variables used in the prompt
752+
753+
Returns:
754+
List of chat template messages with placeholders (e.g., {{variable_name}})
755+
"""
756+
chat_template = []
757+
758+
# Create a mapping of variable values to placeholder names
759+
value_to_placeholder = {}
760+
for var_name, var_value in variables.items():
761+
if hasattr(var_value, "text"): # ResponseInputText
762+
value_str = str(var_value.text)
763+
else:
764+
value_str = str(var_value)
765+
766+
# Skip empty values
767+
if not value_str:
768+
continue
769+
770+
value_to_placeholder[value_str] = f"{{{{{var_name}}}}}"
771+
772+
# Sort by length (longest first) to handle overlapping values correctly
773+
sorted_values = sorted(value_to_placeholder.keys(), key=len, reverse=True)
774+
775+
for instruction in instructions:
776+
role = _get_attr(instruction, "role", "")
777+
if not role:
778+
continue
779+
780+
content_items = _get_attr(instruction, "content", [])
781+
if not content_items:
782+
continue
783+
784+
text_parts = []
785+
for content_item in content_items:
786+
text = _get_attr(content_item, "text", "")
787+
if text:
788+
text_parts.append(str(text))
789+
790+
if not text_parts:
791+
continue
792+
793+
full_text = "".join(text_parts)
794+
795+
# Replace variable values with placeholders (longest first)
796+
for value_str in sorted_values:
797+
placeholder = value_to_placeholder[value_str]
798+
full_text = full_text.replace(value_str, placeholder)
799+
800+
chat_template.append({"role": role, "content": full_text})
801+
802+
return chat_template
803+
804+
741805
def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], response: Optional[Any]) -> None:
742806
"""Extract input/output tags from response and set them as temporary "_ml_obs.meta.*" tags."""
743807
input_data = kwargs.get("input", [])
808+
809+
# For reusable prompts, input may not be in kwargs, extract from response.instructions
810+
if not input_data and response and "prompt" in kwargs:
811+
instructions = _get_attr(response, "instructions", [])
812+
if instructions:
813+
input_data = load_data_value(instructions)
814+
744815
input_messages = openai_get_input_messages_from_response_input(input_data)
745816

746817
if "instructions" in kwargs:
@@ -753,6 +824,25 @@ def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], respo
753824
}
754825
)
755826

827+
if "prompt" in kwargs:
828+
prompt_data = kwargs.get("prompt")
829+
if prompt_data:
830+
try:
831+
# Extract chat_template from response instructions if available
832+
if response and not prompt_data.get("chat_template") and not prompt_data.get("template"):
833+
instructions = _get_attr(response, "instructions", None)
834+
variables = prompt_data.get("variables", {})
835+
if instructions and variables:
836+
chat_template = _extract_chat_template_from_instructions(instructions, variables)
837+
if chat_template:
838+
prompt_data = dict(prompt_data) # Make a copy to avoid modifying the original
839+
prompt_data["chat_template"] = chat_template
840+
841+
validated_prompt = _validate_prompt(prompt_data, strict_validation=False)
842+
span._set_ctx_item(INPUT_PROMPT, validated_prompt)
843+
except (TypeError, ValueError, AttributeError) as e:
844+
logger.debug("Failed to validate prompt for OpenAI response: %s", e)
845+
756846
if span.error or not response:
757847
span._set_ctx_item(OUTPUT_MESSAGES, [Message(content="")])
758848
return
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
features:
3+
- |
4+
LLM Observability: The OpenAI integration now captures prompt metadata (id, version, variables, and chat template)
5+
for reusable prompts when using the ``responses`` endpoint (available in OpenAI SDK >= 1.87.0).
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
interactions:
2+
- request:
3+
body: '{"prompt":{"id":"pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b","version":"4","variables":{"question":"What
4+
is machine learning?"}}}'
5+
headers:
6+
accept:
7+
- application/json
8+
accept-encoding:
9+
- gzip, deflate
10+
connection:
11+
- keep-alive
12+
content-length:
13+
- '140'
14+
content-type:
15+
- application/json
16+
host:
17+
- api.openai.com
18+
user-agent:
19+
- OpenAI/Python 2.3.0
20+
x-stainless-arch:
21+
- arm64
22+
x-stainless-async:
23+
- 'false'
24+
x-stainless-lang:
25+
- python
26+
x-stainless-os:
27+
- Linux
28+
x-stainless-package-version:
29+
- 2.3.0
30+
x-stainless-read-timeout:
31+
- '600'
32+
x-stainless-retry-count:
33+
- '0'
34+
x-stainless-runtime:
35+
- CPython
36+
x-stainless-runtime-version:
37+
- 3.10.18
38+
method: POST
39+
uri: https://api.openai.com/v1/responses
40+
response:
41+
body:
42+
string: !!binary |
43+
H4sIAAAAAAAAA7xYWW8byRF+968o8CGQAYoYHqJFvQTGbhZYJM4GmyyMIDaImu6amVr1MemDFL3w
44+
fw+qhxySkh1vXvImdU/X8dVXF397BTBhPXmASaDYb6u7iqo7Pd9sar1EWlbz9aZSuFrS6k7f3883
45+
c1xu5tWmwfWqUW82b5rJVET4+ldS6STGu0jDuQqEifQW5W7+Zr1YLdfzu/tyFxOmHOWN8rY3lEgP
46+
j2pUj23w2YldDZpIwzEbw66dPMBvrwAAJj0eKMh7TTsyvqcweQXwuXxMIXi5c9mYcsDupGWrKSGb
47+
eH0bU8gqsXdy/q+iYFADMEmHnkSPpRixHVwrF8q7RC6NLy5fXb1k1+e0TfSUxsfDvZw8wOR7DqQS
48+
/AG+825HIaJYggaSdzQZH3w+/vVxNCB4Qy8QOGLwf3Xgnz4DBgKEjkzfZAMYI8eELs3gb4YwEqCL
49+
ewqQOo7w70xRfHyA9x0m4AgWVceOwBAGx6794+/xO8fR5VfH+4nFp63PqdjrH8ldBVouk/dmq9Bc
50+
U8B6TUZk+tWtZce3i2pxd1utbufrI8eLzJfsOKZP/HryNFW1UZI892pxp5ekqvVcKXW/PgfihHMg
51+
jF7cP1/FbC2Ggyj++MXoDgbY2H7VgtV8tdxUYkGt5zXVzRu1vluuNqvqpQUvOPK1PP3fCHSKyAsG
52+
oXM+4Zh5H68ujW/74Osv3JyI9+4ZbYRKCHVApzrwDWBI3LBiNMAukTHcklMESWjX8o4iiFs5UYiQ
53+
OgKs2XA6QPKDTOgxJQouAjoNFh8JNCmOYjA0wVvQmHAKAVNX2I0OahJL6Kk3rDiZA/TBtwGtJQ2N
54+
D0A7CgeIihwG9jP40YEvr/c+6DgFKUeEWuw/+Az7wEkEfsiLaq64uZUUuk0duVtxoxxrCNlQnJYH
55+
DZEuzsRDTGSBnlAiN7jACRpuc6AIPqfyWXaagjmIjiIFvANOEfzezT64D+7PdADWhPFB/hN9iwV8
56+
jwmLwBEg+OBgMHIJUhCUN0bKmgAENy7bmkKcgoRuCmyxFXsjuegDBELNro1ToKRmr4tggzUZsXff
57+
kQNHpEnPrrS8PccdTesDp84COjSHTxSHGBflyYOmJLaMxhadZiBex32cXbj2TmoB1JmN2HSl8R9C
58+
kbMq/WuOKRZqBSnYPQa0VMh0IwgOISu1ZQjTa7GlpphAYZ9yECb6SKNd1/79VKgakB3paQlVEQUK
59+
3UDFPpDmoW+JR2dmegFsP4XsIpErKBQP3yE7kKSMwq7nRbfEdz6Dv+eewo4j6TPAxa6TYb9EikN8
60+
SJ/pdUOzdjaFvvPJR0jYtqSPEKiRpz4cj7Rvj5jMroX/RVRKGlvse1Fd0qy0ILgZeFNgHGoK3BQ7
61+
4nMp33lrvYOE8TE+gDLSkBpWJeJwE3u0sIszcD7dyj+vpxCoDRRjuT8B61rofJYABVYUXxcQFzP4
62+
xcVvYfTeh8cIe04dZHfCasiFASeEng1JHFSOyVsK0OegOmmWHcfkA9MLt35gpyMM44rQxweQYUlw
63+
usrb/w5GjomCWHxzegyRLRsMoy3x9RQ0W3KxzCJSEwPpYUiCm8i2N9yUmiE+DcAsZ/AzsWt8UGTJ
64+
pa8gcwxwfYAUpDJLspeJTWKgiHelEtEegy607smhSSwMSx27R4GsZEVJfGjR0m1vsBiDLbn0HLSS
65+
tXIBPavHCHhMGXaADsjtOHhXDE4eLD6x5U8EKtssFWJHR2PA76TEs6Xi7c+E5nbvgzmnwGWB/JNF
66+
NlCI1rA5Nhg8YiKKYu8TZLdHl0jDsfEOFWAsRDsuIe6DF+gFHm8tOT3QmFzLTmD5K6XG8NMU3lr8
67+
5N3rSyE/SsaUl63j8oydtIkgieqoQLSJQ9YC9j3cYE7eYmIFDaqifxjwS0qza6/Ev83JO299jrCj
68+
jpWh53463HGLqVSypmF1+fqHgFkfi7OYNnSsEpka3SO7tkD9vjtII7ClSg4ov+j8vd8LxhbdARKp
69+
znnjWyHNnkAyeOi6Gg9TqZ2F8r5Jexlbo8WQKEzBehliNfYS9WkhZk2iEzBBh07LCgLDIPQkbB0Z
70+
UCosvI1DhsvDYa4YLQOZldhlEkja4PfTF+X3FAoZbwsCOUp4pUsqLmOLyNXI5gCGG4JHoj7KoIFO
71+
OtXs98zM42D+fHCW3mUMmesBOYU87F99oB37HLenFW9bBs9xgO6Dt306r2fDWNrbPm3Xm6perNbr
72+
jb6XOXh1j0pVtLnXOK9ovdpU803VUL3E9YKIqvo4IU52GBhrQ3GUCjA5rQ4XZ99aVMZp8ZvLxufL
73+
CXsiq9igabI675aDo1uFqqPtIx1egnC8CySz8SBg/OI84Y9IUdP4kIbhW3O2J/fPk7+8HvVHbCgd
74+
tqxFeMN0tedG6UmKtolPu3GD2QxgTKSl0NVSncj2FFA6iWzos+p4+nQRyMYHi+kyBCewy3fPEat9
75+
5HS48GY0fOBV51kNRMzJT8aL84w/Sb7fXkz+1XjYX9oYshuaeXGToxDl+AtCLhvMmYnuahlczqcv
76+
zy9+FRjdLFHU54fVlavPd8z1avGlmy8JHjlwfj3fLK6kJ5/QXAh/sxxhLEvv5eZKCaXoiIbPrz7/
77+
BwAA//8DAHhWLkvUEQAA
78+
headers:
79+
CF-RAY:
80+
- 99a4fa22dbb601cc-CDG
81+
Connection:
82+
- keep-alive
83+
Content-Encoding:
84+
- gzip
85+
Content-Type:
86+
- application/json
87+
Date:
88+
- Thu, 06 Nov 2025 13:36:05 GMT
89+
Server:
90+
- cloudflare
91+
Set-Cookie:
92+
- __cf_bm=oAc59HaJwUjrUv2uHgTgDkTP1sVynTMJVzliRX11b7o-1762436165-1.0.1.1-STkKgI9BlQHAvGzS.Rqi6UQVssVb5_M5J9QpUZICssvaO35gDy6yDFJo.tYdjVGKAGufaBJ9rwowcVi0u.xMc6oV0zOSTM2nqB6IjkP9W.4;
93+
path=/; expires=Thu, 06-Nov-25 14:06:05 GMT; domain=.api.openai.com; HttpOnly;
94+
Secure; SameSite=None
95+
- _cfuvid=bDZxnxovYk7l9OeXSX6u2DbwKyUR5GDTvi_l5SLAkiY-1762436165819-0.0.1.1-604800000;
96+
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
97+
Strict-Transport-Security:
98+
- max-age=31536000; includeSubDomains; preload
99+
Transfer-Encoding:
100+
- chunked
101+
X-Content-Type-Options:
102+
- nosniff
103+
alt-svc:
104+
- h3=":443"; ma=86400
105+
cf-cache-status:
106+
- DYNAMIC
107+
openai-organization:
108+
- datadog-staging
109+
openai-processing-ms:
110+
- '7512'
111+
openai-project:
112+
- proj_gt6TQZPRbZfoY2J9AQlEJMpd
113+
openai-version:
114+
- '2020-10-01'
115+
x-envoy-upstream-service-time:
116+
- '7514'
117+
x-ratelimit-limit-requests:
118+
- '30000'
119+
x-ratelimit-limit-tokens:
120+
- '150000000'
121+
x-ratelimit-remaining-requests:
122+
- '29999'
123+
x-ratelimit-remaining-tokens:
124+
- '149999762'
125+
x-ratelimit-reset-requests:
126+
- 2ms
127+
x-ratelimit-reset-tokens:
128+
- 0s
129+
x-request-id:
130+
- req_2409b397395c43bcaa8b763bb736ebf5
131+
status:
132+
code: 200
133+
message: OK
134+
version: 1

tests/contrib/openai/test_openai_llmobs.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2150,6 +2150,57 @@ class MathResponse(BaseModel):
21502150
)
21512151
)
21522152

2153+
@pytest.mark.skipif(
2154+
parse_version(openai_module.version.VERSION) < (1, 87),
2155+
reason="Reusable prompts only available in openai >= 1.87",
2156+
)
2157+
def test_response_with_prompt_tracking(self, openai, mock_llmobs_writer, mock_tracer):
2158+
"""Test that prompt metadata (id, version, variables) is captured for reusable prompts."""
2159+
with get_openai_vcr(subdirectory_name="v1").use_cassette("response_with_prompt.yaml"):
2160+
client = openai.OpenAI()
2161+
client.responses.create(
2162+
prompt={
2163+
"id": "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b",
2164+
"version": "4",
2165+
"variables": {"question": "What is machine learning?"},
2166+
}
2167+
)
2168+
mock_tracer.pop_traces()
2169+
assert mock_llmobs_writer.enqueue.call_count == 1
2170+
2171+
call_args = mock_llmobs_writer.enqueue.call_args[0][0]
2172+
2173+
# Verify prompt metadata is captured
2174+
assert "prompt" in call_args["meta"]["input"]
2175+
actual_prompt = call_args["meta"]["input"]["prompt"]
2176+
assert actual_prompt["id"] == "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b"
2177+
assert actual_prompt["version"] == "4"
2178+
assert actual_prompt["variables"] == {"question": "What is machine learning?"}
2179+
2180+
# Verify chat_template is extracted with variable placeholders
2181+
assert "chat_template" in actual_prompt
2182+
chat_template = actual_prompt["chat_template"]
2183+
assert len(chat_template) == 2
2184+
# First message: developer role
2185+
assert chat_template[0]["role"] == "developer"
2186+
assert chat_template[0]["content"] == "Direct & Conversational tone"
2187+
# Second message: user role with variable placeholder
2188+
assert chat_template[1]["role"] == "user"
2189+
assert chat_template[1]["content"] == "You are a helpful assistant. Please answer this question: {{question}}"
2190+
2191+
# Verify the actual prompt content is captured in input messages
2192+
input_messages = call_args["meta"]["input"]["messages"]
2193+
assert len(input_messages) == 2
2194+
# Developer message
2195+
assert input_messages[0]["role"] == "developer"
2196+
assert input_messages[0]["content"] == "Direct & Conversational tone"
2197+
# User message with rendered variables
2198+
assert input_messages[1]["role"] == "user"
2199+
assert (
2200+
input_messages[1]["content"]
2201+
== "You are a helpful assistant. Please answer this question: What is machine learning?"
2202+
)
2203+
21532204

21542205
@pytest.mark.parametrize(
21552206
"ddtrace_global_config",

0 commit comments

Comments
 (0)