Skip to content

Commit 3b37dd2

Browse files
Bring Python telemetry event model consistent with JDBC (#701)
* Added driver connection params Signed-off-by: Nikhil Suri <nikhil.suri@databricks.com> * Added model fields for chunk/result latency Signed-off-by: Nikhil Suri <nikhil.suri@databricks.com> * fixed linting issues Signed-off-by: Nikhil Suri <nikhil.suri@databricks.com> * lint issue fixing Signed-off-by: Nikhil Suri <nikhil.suri@databricks.com> --------- Signed-off-by: Nikhil Suri <nikhil.suri@databricks.com>
1 parent fd65fd2 commit 3b37dd2

File tree

5 files changed

+512
-4
lines changed

5 files changed

+512
-4
lines changed

src/databricks/sql/client.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import json
1010
import os
1111
import decimal
12+
from urllib.parse import urlparse
1213
from uuid import UUID
1314

1415
from databricks.sql import __version__
@@ -322,6 +323,20 @@ def read(self) -> Optional[OAuthToken]:
322323
session_id_hex=self.get_session_id_hex()
323324
)
324325

326+
# Determine proxy usage
327+
use_proxy = self.http_client.using_proxy()
328+
proxy_host_info = None
329+
if (
330+
use_proxy
331+
and self.http_client.proxy_uri
332+
and isinstance(self.http_client.proxy_uri, str)
333+
):
334+
parsed = urlparse(self.http_client.proxy_uri)
335+
proxy_host_info = HostDetails(
336+
host_url=parsed.hostname or self.http_client.proxy_uri,
337+
port=parsed.port or 8080,
338+
)
339+
325340
driver_connection_params = DriverConnectionParameters(
326341
http_path=http_path,
327342
mode=DatabricksClientType.SEA
@@ -331,13 +346,31 @@ def read(self) -> Optional[OAuthToken]:
331346
auth_mech=TelemetryHelper.get_auth_mechanism(self.session.auth_provider),
332347
auth_flow=TelemetryHelper.get_auth_flow(self.session.auth_provider),
333348
socket_timeout=kwargs.get("_socket_timeout", None),
349+
azure_workspace_resource_id=kwargs.get("azure_workspace_resource_id", None),
350+
azure_tenant_id=kwargs.get("azure_tenant_id", None),
351+
use_proxy=use_proxy,
352+
use_system_proxy=use_proxy,
353+
proxy_host_info=proxy_host_info,
354+
use_cf_proxy=False, # CloudFlare proxy not yet supported in Python
355+
cf_proxy_host_info=None, # CloudFlare proxy not yet supported in Python
356+
non_proxy_hosts=None,
357+
allow_self_signed_support=kwargs.get("_tls_no_verify", False),
358+
use_system_trust_store=True, # Python uses system SSL by default
359+
enable_arrow=pyarrow is not None,
360+
enable_direct_results=True, # Always enabled in Python
361+
enable_sea_hybrid_results=kwargs.get("use_hybrid_disposition", False),
362+
http_connection_pool_size=kwargs.get("pool_maxsize", None),
363+
rows_fetched_per_block=DEFAULT_ARRAY_SIZE,
364+
async_poll_interval_millis=2000, # Default polling interval
365+
support_many_parameters=True, # Native parameters supported
366+
enable_complex_datatype_support=_use_arrow_native_complex_types,
367+
allowed_volume_ingestion_paths=self.staging_allowed_local_path,
334368
)
335369

336370
self._telemetry_client.export_initial_telemetry_log(
337371
driver_connection_params=driver_connection_params,
338372
user_agent=self.session.useragent_header,
339373
)
340-
self.staging_allowed_local_path = kwargs.get("staging_allowed_local_path", None)
341374

342375
def _set_use_inline_params_with_warning(self, value: Union[bool, str]):
343376
"""Valid values are True, False, and "silent"

src/databricks/sql/common/unified_http_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,11 @@ def using_proxy(self) -> bool:
301301
"""Check if proxy support is available (not whether it's being used for a specific request)."""
302302
return self._proxy_pool_manager is not None
303303

304+
@property
305+
def proxy_uri(self) -> Optional[str]:
306+
"""Get the configured proxy URI, if any."""
307+
return self._proxy_uri
308+
304309
def close(self):
305310
"""Close the underlying connection pools."""
306311
if self._direct_pool_manager:

src/databricks/sql/telemetry/models/event.py

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,25 @@ class DriverConnectionParameters(JsonSerializableMixin):
3838
auth_mech (AuthMech): The authentication mechanism used
3939
auth_flow (AuthFlow): The authentication flow type
4040
socket_timeout (int): Connection timeout in milliseconds
41+
azure_workspace_resource_id (str): Azure workspace resource ID
42+
azure_tenant_id (str): Azure tenant ID
43+
use_proxy (bool): Whether proxy is being used
44+
use_system_proxy (bool): Whether system proxy is being used
45+
proxy_host_info (HostDetails): Proxy host details if configured
46+
use_cf_proxy (bool): Whether CloudFlare proxy is being used
47+
cf_proxy_host_info (HostDetails): CloudFlare proxy host details if configured
48+
non_proxy_hosts (list): List of hosts that bypass proxy
49+
allow_self_signed_support (bool): Whether self-signed certificates are allowed
50+
use_system_trust_store (bool): Whether system trust store is used
51+
enable_arrow (bool): Whether Arrow format is enabled
52+
enable_direct_results (bool): Whether direct results are enabled
53+
enable_sea_hybrid_results (bool): Whether SEA hybrid results are enabled
54+
http_connection_pool_size (int): HTTP connection pool size
55+
rows_fetched_per_block (int): Number of rows fetched per block
56+
async_poll_interval_millis (int): Async polling interval in milliseconds
57+
support_many_parameters (bool): Whether many parameters are supported
58+
enable_complex_datatype_support (bool): Whether complex datatypes are supported
59+
allowed_volume_ingestion_paths (str): Allowed paths for volume ingestion
4160
"""
4261

4362
http_path: str
@@ -46,6 +65,25 @@ class DriverConnectionParameters(JsonSerializableMixin):
4665
auth_mech: Optional[AuthMech] = None
4766
auth_flow: Optional[AuthFlow] = None
4867
socket_timeout: Optional[int] = None
68+
azure_workspace_resource_id: Optional[str] = None
69+
azure_tenant_id: Optional[str] = None
70+
use_proxy: Optional[bool] = None
71+
use_system_proxy: Optional[bool] = None
72+
proxy_host_info: Optional[HostDetails] = None
73+
use_cf_proxy: Optional[bool] = None
74+
cf_proxy_host_info: Optional[HostDetails] = None
75+
non_proxy_hosts: Optional[list] = None
76+
allow_self_signed_support: Optional[bool] = None
77+
use_system_trust_store: Optional[bool] = None
78+
enable_arrow: Optional[bool] = None
79+
enable_direct_results: Optional[bool] = None
80+
enable_sea_hybrid_results: Optional[bool] = None
81+
http_connection_pool_size: Optional[int] = None
82+
rows_fetched_per_block: Optional[int] = None
83+
async_poll_interval_millis: Optional[int] = None
84+
support_many_parameters: Optional[bool] = None
85+
enable_complex_datatype_support: Optional[bool] = None
86+
allowed_volume_ingestion_paths: Optional[str] = None
4987

5088

5189
@dataclass
@@ -111,6 +149,69 @@ class DriverErrorInfo(JsonSerializableMixin):
111149
stack_trace: str
112150

113151

152+
@dataclass
153+
class ChunkDetails(JsonSerializableMixin):
154+
"""
155+
Contains detailed metrics about chunk downloads during result fetching.
156+
157+
These metrics are accumulated across all chunk downloads for a single statement.
158+
159+
Attributes:
160+
initial_chunk_latency_millis (int): Latency of the first chunk download
161+
slowest_chunk_latency_millis (int): Latency of the slowest chunk download
162+
total_chunks_present (int): Total number of chunks available
163+
total_chunks_iterated (int): Number of chunks actually downloaded
164+
sum_chunks_download_time_millis (int): Total time spent downloading all chunks
165+
"""
166+
167+
initial_chunk_latency_millis: Optional[int] = None
168+
slowest_chunk_latency_millis: Optional[int] = None
169+
total_chunks_present: Optional[int] = None
170+
total_chunks_iterated: Optional[int] = None
171+
sum_chunks_download_time_millis: Optional[int] = None
172+
173+
174+
@dataclass
175+
class ResultLatency(JsonSerializableMixin):
176+
"""
177+
Contains latency metrics for different phases of query execution.
178+
179+
This tracks two distinct phases:
180+
1. result_set_ready_latency_millis: Time from query submission until results are available (execute phase)
181+
- Set when execute() completes
182+
2. result_set_consumption_latency_millis: Time spent iterating/fetching results (fetch phase)
183+
- Measured from first fetch call until no more rows available
184+
- In Java: tracked via markResultSetConsumption(hasNext) method
185+
- Records start time on first fetch, calculates total on last fetch
186+
187+
Attributes:
188+
result_set_ready_latency_millis (int): Time until query results are ready (execution phase)
189+
result_set_consumption_latency_millis (int): Time spent fetching/consuming results (fetch phase)
190+
191+
"""
192+
193+
result_set_ready_latency_millis: Optional[int] = None
194+
result_set_consumption_latency_millis: Optional[int] = None
195+
196+
197+
@dataclass
198+
class OperationDetail(JsonSerializableMixin):
199+
"""
200+
Contains detailed information about the operation being performed.
201+
202+
Attributes:
203+
n_operation_status_calls (int): Number of status polling calls made
204+
operation_status_latency_millis (int): Total latency of all status calls
205+
operation_type (str): Specific operation type (e.g., EXECUTE_STATEMENT, LIST_TABLES, CANCEL_STATEMENT)
206+
is_internal_call (bool): Whether this is an internal driver operation
207+
"""
208+
209+
n_operation_status_calls: Optional[int] = None
210+
operation_status_latency_millis: Optional[int] = None
211+
operation_type: Optional[str] = None
212+
is_internal_call: Optional[bool] = None
213+
214+
114215
@dataclass
115216
class SqlExecutionEvent(JsonSerializableMixin):
116217
"""
@@ -122,14 +223,20 @@ class SqlExecutionEvent(JsonSerializableMixin):
122223
is_compressed (bool): Whether the result is compressed
123224
execution_result (ExecutionResultFormat): Format of the execution result
124225
retry_count (int): Number of retry attempts made
125-
chunk_id (int): ID of the chunk if applicable
226+
chunk_id (int): ID of the chunk if applicable (used for error tracking)
227+
chunk_details (ChunkDetails): Aggregated chunk download metrics
228+
result_latency (ResultLatency): Latency breakdown by execution phase
229+
operation_detail (OperationDetail): Detailed operation information
126230
"""
127231

128232
statement_type: StatementType
129233
is_compressed: bool
130234
execution_result: ExecutionResultFormat
131235
retry_count: Optional[int]
132236
chunk_id: Optional[int]
237+
chunk_details: Optional[ChunkDetails] = None
238+
result_latency: Optional[ResultLatency] = None
239+
operation_detail: Optional[OperationDetail] = None
133240

134241

135242
@dataclass

src/databricks/sql/telemetry/telemetry_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ class TelemetryClientFactory:
380380
# Shared flush thread for all clients
381381
_flush_thread = None
382382
_flush_event = threading.Event()
383-
_flush_interval_seconds = 90
383+
_flush_interval_seconds = 300 # 5 minutes
384384

385385
DEFAULT_BATCH_SIZE = 100
386386

0 commit comments

Comments
 (0)