Skip to content

Commit 6372eed

Browse files
authored
chore(telemetry): truncate error stack traces and add error_type tag (#15149)
## Description Reduce data sent to telemetry logs, making them easier to query. We generally only need the last few frames anyways. Adding an "error_type" tag will make it so we don't have to query the error stack to group by the exception class name. ## Testing <!-- Describe your testing strategy or note what tests are included --> ## Risks <!-- Note any risks associated with this change, or "None" if no risks --> ## Additional Notes <!-- Any other information that would be helpful for reviewers -->
1 parent d86c66f commit 6372eed

File tree

2 files changed

+87
-1
lines changed

2 files changed

+87
-1
lines changed

ddtrace/internal/telemetry/writer.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,10 +433,23 @@ def add_error_log(self, msg: str, exc: Union[BaseException, tuple, None]) -> Non
433433
if config.LOG_COLLECTION_ENABLED:
434434
stack_trace = None if exc is None else self._format_stack_trace(exc)
435435

436+
error_type = "unknown"
437+
try:
438+
if exc is not None:
439+
if isinstance(exc, tuple) and len(exc) == 3:
440+
error_type = exc[0].__name__
441+
else:
442+
error_type = type(exc).__name__
443+
except Exception:
444+
log.debug("Failed to extract error type from exception: %r", (exc,), exc_info=True)
445+
436446
self.add_log(
437447
TELEMETRY_LOG_LEVEL.ERROR,
438448
msg,
439449
stack_trace=stack_trace if stack_trace is not None else "",
450+
tags={
451+
"error_type": error_type,
452+
},
440453
)
441454

442455
def _format_stack_trace(self, exc: Union[BaseException, tuple]) -> Optional[str]:
@@ -450,7 +463,8 @@ def _format_stack_trace(self, exc: Union[BaseException, tuple]) -> Optional[str]
450463

451464
tb = traceback.extract_tb(exc_traceback)
452465
formatted_tb = ["Traceback (most recent call last):"]
453-
for filename, lineno, funcname, srcline in tb:
466+
# Only include the last 20 frames
467+
for filename, lineno, funcname, srcline in tb[-20:]:
454468
if is_user_code(filename):
455469
formatted_tb.append(" <REDACTED>")
456470
formatted_tb.append(" <REDACTED>")

tests/telemetry/test_writer.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,7 @@ def test_add_error_log(mock_time, telemetry_writer, test_agent_session):
995995
log_entry = logs[0]
996996
assert log_entry["level"] == TELEMETRY_LOG_LEVEL.ERROR.value
997997
assert log_entry["message"] == "Test error message"
998+
assert log_entry["tags"] == "error_type:jsondecodeerror"
998999

9991000
stack_trace = log_entry["stack_trace"]
10001001
expected_lines = [
@@ -1012,6 +1013,77 @@ def test_add_error_log(mock_time, telemetry_writer, test_agent_session):
10121013
assert expected_line in stack_trace
10131014

10141015

1016+
def test_add_error_log_large_stack(mock_time, telemetry_writer, test_agent_session):
1017+
"""Test add_integration_error_log functionality with real stack trace"""
1018+
try:
1019+
1020+
def _(n):
1021+
if n == 200:
1022+
raise ValueError("Test exception for large stack trace")
1023+
return _(n + 1)
1024+
1025+
_(0)
1026+
except Exception as e:
1027+
telemetry_writer.add_error_log("Test error message", e)
1028+
telemetry_writer.periodic(force_flush=True)
1029+
1030+
log_events = test_agent_session.get_events("logs")
1031+
assert len(log_events) == 1
1032+
1033+
logs = log_events[0]["payload"]["logs"]
1034+
assert len(logs) == 1
1035+
1036+
log_entry = logs[0]
1037+
assert log_entry["level"] == TELEMETRY_LOG_LEVEL.ERROR.value
1038+
assert log_entry["message"] == "Test error message"
1039+
assert log_entry["tags"] == "error_type:valueerror"
1040+
1041+
stack_trace = log_entry["stack_trace"]
1042+
expected_lines = """Traceback (most recent call last):
1043+
<REDACTED>
1044+
<REDACTED>
1045+
<REDACTED>
1046+
<REDACTED>
1047+
<REDACTED>
1048+
<REDACTED>
1049+
<REDACTED>
1050+
<REDACTED>
1051+
<REDACTED>
1052+
<REDACTED>
1053+
<REDACTED>
1054+
<REDACTED>
1055+
<REDACTED>
1056+
<REDACTED>
1057+
<REDACTED>
1058+
<REDACTED>
1059+
<REDACTED>
1060+
<REDACTED>
1061+
<REDACTED>
1062+
<REDACTED>
1063+
<REDACTED>
1064+
<REDACTED>
1065+
<REDACTED>
1066+
<REDACTED>
1067+
<REDACTED>
1068+
<REDACTED>
1069+
<REDACTED>
1070+
<REDACTED>
1071+
<REDACTED>
1072+
<REDACTED>
1073+
<REDACTED>
1074+
<REDACTED>
1075+
<REDACTED>
1076+
<REDACTED>
1077+
<REDACTED>
1078+
<REDACTED>
1079+
<REDACTED>
1080+
<REDACTED>
1081+
<REDACTED>
1082+
<REDACTED>
1083+
builtins.ValueError: <REDACTED>"""
1084+
assert stack_trace == expected_lines
1085+
1086+
10151087
def test_add_integration_error_log_with_log_collection_disabled(mock_time, telemetry_writer, test_agent_session):
10161088
"""Test that add_integration_error_log respects LOG_COLLECTION_ENABLED setting"""
10171089
original_value = telemetry_config.LOG_COLLECTION_ENABLED

0 commit comments

Comments
 (0)