Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,20 @@ async def _aset_token_usage(
token_histogram: Histogram = None,
choice_counter: Counter = None,
):
"""
Record token usage and choice counts from an Anthropic response into the provided span and optional metric instruments.

This function will await coroutine responses and call parse() on wrapped responses when present. It extracts input (prompt + cache read + cache creation) and output token counts from response. If usage data is absent, it falls back to counting prompt or completion tokens via the provided Anthropic client. When metric instruments are provided, it records input/output token values to token_histogram and increments choice_counter with the number of choices. It then sets span attributes for input tokens, output tokens, total tokens, and cache-related input tokens.

Parameters:
span: The tracing span to annotate with token and choice attributes.
anthropic: Anthropic client or compatible object used to count tokens when usage metadata is not available.
request: The original request object used to compute prompt token counts when needed.
response: The response object, which may be a coroutine, a with_raw_response wrapper exposing parse(), or a concrete response containing usage/content/completion attributes.
metric_attributes (dict): Additional attributes to attach to metric recordings.
token_histogram (Histogram): Optional histogram instrument to record token counts.
choice_counter (Counter): Optional counter instrument to record the number of generation choices.
"""
import inspect

# If we get a coroutine, await it
Expand Down Expand Up @@ -283,11 +297,11 @@ async def _aset_token_usage(
set_span_attribute(span, SpanAttributes.LLM_USAGE_TOTAL_TOKENS, total_tokens)

set_span_attribute(
span, GenAIAttributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read_tokens
span, SpanAttributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read_tokens
)
set_span_attribute(
span,
GenAIAttributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
SpanAttributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
cache_creation_tokens,
)

Expand All @@ -302,6 +316,20 @@ def _set_token_usage(
token_histogram: Histogram = None,
choice_counter: Counter = None,
):
"""
Record token usage metrics and set related span attributes for an Anthropic response.

Extracts input and output token counts from the response when available (or computes them from the request/response using the provided Anthropic client), records input/output token values to token_histogram with metric_attributes, increments choice_counter with the number of generated choices, and sets span attributes for input, output, total, and cache-related token counts. If the response is a coroutine or cannot be parsed, token processing is skipped.

Parameters:
span: The tracing span on which to set token-related attributes.
anthropic: Anthropic client/SDK instance used to compute token counts when response usage data is not present.
request: The original request object used to compute prompt/input tokens if needed.
response: The response object (or a wrapper with a parse() method); coroutine responses are ignored.
metric_attributes (dict): Additional attributes to attach to recorded metrics.
token_histogram (Histogram | None): Histogram instrument for recording token counts; if None, metrics are not recorded.
choice_counter (Counter | None): Counter instrument for recording number of generated choices; if None, choice counts are not recorded.
"""
import inspect

# If we get a coroutine, we cannot process it in sync context
Expand Down Expand Up @@ -397,11 +425,11 @@ def _set_token_usage(
set_span_attribute(span, SpanAttributes.LLM_USAGE_TOTAL_TOKENS, total_tokens)

set_span_attribute(
span, GenAIAttributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read_tokens
span, SpanAttributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read_tokens
)
set_span_attribute(
span,
GenAIAttributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
SpanAttributes.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
cache_creation_tokens,
)

Expand Down Expand Up @@ -874,4 +902,4 @@ def _uninstrument(self, **kwargs):
unwrap(
f"{wrap_package}.{wrap_object}",
wrapped_method.get("method"),
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,18 @@ def set_chat_response_usage(
record_token_usage: bool,
model_name: str
) -> None:
"""
Aggregate token usage from an LLM response, record it on the provided span, and optionally record token counts to a histogram.

This function sums input, output, total, and cache-read token counts from response.generations' usage metadata (looking for keys such as "input_tokens"/"prompt_tokens", "output_tokens"/"completion_tokens", and "input_token_details.cache_read"). If any of these counts are greater than zero, the corresponding span attributes are set. If record_token_usage is True, input and output token counts are also recorded in token_histogram with attributes identifying the vendor and response model. Any errors encountered while parsing usage metadata are ignored and do not stop span attribute recording.

Parameters:
span (Span): The span on which to set usage attributes.
response (LLMResult): The LLM response whose generations contain usage metadata.
token_histogram (Histogram): Histogram to record token counts when record_token_usage is True.
record_token_usage (bool): If True, record input/output token counts to token_histogram.
model_name (str): The model name to attach to histogram records as the response model.
"""
input_tokens = 0
output_tokens = 0
total_tokens = 0
Expand Down Expand Up @@ -346,7 +358,7 @@ def set_chat_response_usage(
)
_set_span_attribute(
span,
GenAIAttributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
SpanAttributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
cache_read_tokens,
)
if record_token_usage:
Expand Down Expand Up @@ -414,4 +426,4 @@ def _set_chat_tool_calls(
span,
f"{tool_call_prefix}.arguments",
json.dumps(tool_args, cls=CallbackFilteredJSONEncoder),
)
)
Loading