Revised structure for lint errors and making it simpler

dat-a-man · dat-a-man · commit 43c08a11632d · 2025-11-11T08:46:09.000Z
diff --git a/ai/cd b/ai/cd
diff --git a/ai/git b/ai/git
diff --git a/sources/pipedrive/__init__.py b/sources/pipedrive/__init__.py
@@ -22,30 +22,21 @@
 from dlt.common.time import ensure_pendulum_datetime
 from dlt.sources import DltResource, TDataItems
 
-from .rest_v2 import rest_v2_resources
-from .settings import ENTITIES_V2, NESTED_ENTITIES_V2
+# Export v2 source for easy access
+from .rest_v2 import pipedrive_v2_source
 
 
 @dlt.source(name="pipedrive")
 def pipedrive_source(
     pipedrive_api_key: str = dlt.secrets.value,
-    company_domain: str = dlt.secrets.value,
     since_timestamp: Optional[Union[pendulum.DateTime, str]] = "1970-01-01 00:00:00",
-    use_v2_endpoints: Optional[List[str]] = None,
-    v2_prefix: str = "v2_",
-    v2_only: bool = False,
 ) -> Iterator[DltResource]:
     """
     Get data from the Pipedrive API. Supports incremental loading and custom fields mapping.
-    Can switch between v1 and v2 endpoints.
 
     Args:
         pipedrive_api_key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
-        company_domain: The domain of your company in Pipedrive
         since_timestamp: Starting timestamp for incremental loading. By default complete history is loaded on first run.
-        use_v2_endpoints: A list of resource names to load using the v2 API. If None, all resources are loaded from v1.
-        v2_prefix: A prefix to add to the table names of resources loaded from v2 endpoints.
-        v2_only: When True, only v2 resources are yielded (v1 resources are skipped).
 
     Returns resources:
         custom_fields_mapping
@@ -72,51 +63,22 @@ def pipedrive_source(
     Resources that depend on another resource are implemented as transformers
     so they can re-use the original resource data without re-downloading.
     Examples:  deals_participants, deals_flow
-    """
-    # create v2 resources
-    use_v2_endpoints = use_v2_endpoints or []
-    v2_resources_config = {
-        resource: ENTITIES_V2[resource]
-        for resource in use_v2_endpoints
-        if resource in ENTITIES_V2  # this ensures that resource is supported by v2 api
-    }
-    if v2_resources_config:
-        # Only include nested endpoints if their parent is in the v2 endpoints list
-        nested_configs_to_create = {
-            nested_name: nested_config
-            for nested_name, nested_config in NESTED_ENTITIES_V2.items()
-            if nested_config["parent"] in v2_resources_config
-        }
-
-        v2_resources = rest_v2_resources(
-            pipedrive_api_key,
-            company_domain,
-            v2_resources_config,
-            nested_configs_to_create,
-            v2_prefix,
-        )
-        for resource in v2_resources:
-            yield resource
 
-        if v2_only:
-            return
-    elif v2_only:
-        raise ValueError(
-            "v2_only was set but no valid v2 endpoints were supplied via use_v2_endpoints."
-        )
+    Note: For v2 API endpoints, use pipedrive_v2_source from pipedrive.rest_v2
+    """
 
-    # yield nice rename mapping - always from v1
+    # yield nice rename mapping
     yield create_state(pipedrive_api_key) | parsed_mapping
 
-    # parse timestamp and build kwargs for v1
+    # parse timestamp and build kwargs
     since_timestamp = ensure_pendulum_datetime(since_timestamp).strftime(
         "%Y-%m-%d %H:%M:%S"
     )
     resource_kwargs: Any = (
         {"since_timestamp": since_timestamp} if since_timestamp else {}
     )
 
-    # create resources for all v1 endpoints
+    # create resources for all endpoints
     endpoints_resources = {}
     for entity, resource_name in RECENTS_ENTITIES.items():
         endpoints_resources[resource_name] = dlt.resource(
@@ -128,7 +90,7 @@ def pipedrive_source(
 
     yield from endpoints_resources.values()
 
-    # create transformers for deals to participants and flows, attached to v1 deals resource
+    # create transformers for deals to participants and flows
     yield endpoints_resources["deals"] | dlt.transformer(
         name="deals_participants", write_disposition="merge", primary_key="id"
     )(_get_deals_participants)(pipedrive_api_key)
diff --git a/sources/pipedrive/rest_v2/__init__.py b/sources/pipedrive/rest_v2/__init__.py
@@ -1,8 +1,66 @@
-from typing import Iterable, Dict, Any, List, cast
+from typing import Iterable, Dict, Any, List, Optional, Iterator, Union, cast
 
 import dlt
 from dlt.sources import DltResource
-from dlt.sources.rest_api import rest_api_source
+from dlt.sources.rest_api import rest_api_source, RESTAPIConfig
+from dlt.sources.rest_api.typing import EndpointResource
+
+from ..settings import ENTITIES_V2, NESTED_ENTITIES_V2
+
+
+@dlt.source(name="pipedrive_v2")
+def pipedrive_v2_source(
+    pipedrive_api_key: str = dlt.secrets.value,
+    company_domain: str = dlt.secrets.value,
+    resources: Optional[List[str]] = None,
+    prefix: str = "v2_",
+) -> Iterator[DltResource]:
+    """
+    Get data from the Pipedrive API v2.
+
+    Args:
+        pipedrive_api_key: API token for authentication
+        company_domain: Your Pipedrive company domain
+        resources: List of resource names to load (e.g., ["deals", "persons"]). If None, loads all available v2 resources.
+        prefix: Prefix for table names (default: "v2_")
+
+    Returns:
+        Resources for v2 endpoints. Nested endpoints (e.g., deal_products, deal_followers) are automatically included when their parent resource is selected.
+
+    See also: https://pipedrive.readme.io/docs/pipedrive-api-v2#api-v2-availability
+    """
+    resources = resources or list(ENTITIES_V2.keys())
+
+    # Filter valid v2 endpoints
+    v2_resources_config = {
+        resource: ENTITIES_V2[resource]
+        for resource in resources
+        if resource in ENTITIES_V2  # this ensures that resource is supported by v2 api
+    }
+
+    if not v2_resources_config:
+        raise ValueError(
+            f"No valid v2 endpoints found in: {resources}. "
+            f"Available endpoints: {list(ENTITIES_V2.keys())}"
+        )
+
+    # Only include nested endpoints if their parent is in the v2 endpoints list
+    nested_configs_to_create = {
+        nested_name: nested_config
+        for nested_name, nested_config in NESTED_ENTITIES_V2.items()
+        if nested_config["parent"] in v2_resources_config
+    }
+
+    # Create and yield v2 resources
+    v2_resources = rest_v2_resources(
+        pipedrive_api_key,
+        company_domain,
+        v2_resources_config,
+        nested_configs_to_create,
+        prefix,
+    )
+    for resource in v2_resources:
+        yield resource
 
 
 def rest_v2_resources(
@@ -16,40 +74,9 @@ def rest_v2_resources(
     Build and yield REST v2 resources for the given resource configurations.
     Includes nested endpoints that depend on parent resources.
     """
-
+    # Build resources list
     resources: List[Dict[str, Any]] = []
 
-    config: Dict[str, Any] = {
-        "client": {
-            "base_url": f"https://{company_domain}.pipedrive.com/api/v2/",
-            "auth": {
-                "type": "api_key",
-                "name": "api_token",
-                "api_key": pipedrive_api_key,
-                "location": "query",
-            },
-        },
-        "resource_defaults": {
-            "primary_key": "id",
-            "write_disposition": "merge",
-            "endpoint": {
-                "params": {
-                    "limit": 500,
-                    "sort_by": "update_time",
-                    "sort_direction": "desc",
-                },
-                "data_selector": "data",
-                "paginator": {
-                    "type": "cursor",
-                    "cursor_path": "additional_data.next_cursor",
-                    "cursor_param": "cursor",
-                },
-            },
-        },
-        # IMPORTANT: bind the typed list here
-        "resources": resources,
-    }
-
     # Build the resources list for the config from the provided resource configs
     for resource_name, endpoint_config in resource_configs.items():
         resource_def: Dict[str, Any] = {
@@ -63,7 +90,7 @@ def rest_v2_resources(
         parent_name = nested_config["parent"]
         endpoint_path = nested_config["endpoint_path"]
         params = nested_config.get("params", {})
-        primary_key = nested_config.get("primary_key", "id")
+        primary_key: Union[str, List[str]] = nested_config.get("primary_key", "id")
         include_from_parent = nested_config.get("include_from_parent")
 
         # Use native rest_api_source nested endpoint syntax: {resources.parent_name.id}
@@ -80,10 +107,40 @@ def rest_v2_resources(
             nested_resource_def["include_from_parent"] = include_from_parent
         if primary_key != "id":
             nested_resource_def["primary_key"] = primary_key
-
         resources.append(nested_resource_def)
 
-    api_source = rest_api_source(cast(Any, config))
+    # Create config with proper typing
+    # Cast resources to the expected type since our Dict[str, Any] matches EndpointResource structure
+    config: RESTAPIConfig = {
+        "client": {
+            "base_url": f"https://{company_domain}.pipedrive.com/api/v2/",
+            "auth": {
+                "type": "api_key",
+                "name": "api_token",
+                "api_key": pipedrive_api_key,
+                "location": "query",
+            },
+        },
+        "resource_defaults": {
+            "primary_key": "id",
+            "write_disposition": "merge",
+            "endpoint": {
+                "params": {
+                    "limit": 500,
+                    "sort_by": "update_time",
+                    "sort_direction": "desc",
+                },
+                "data_selector": "data",
+                "paginator": {
+                    "type": "cursor",
+                    "cursor_path": "additional_data.next_cursor",
+                    "cursor_param": "cursor",
+                },
+            },
+        },
+        "resources": cast(List[Union[str, EndpointResource, DltResource]], resources),
+    }
 
+    api_source = rest_api_source(config)
     for resource in api_source.resources.values():
         yield resource.with_name(f"{prefix}{resource.name}")
diff --git a/sources/pipedrive/settings.py b/sources/pipedrive/settings.py
@@ -28,6 +28,7 @@
     "user": "users",
 }
 
+
 """
 Available Pipedrive API v2 endpoints for configuration.
 
@@ -36,7 +37,6 @@
 
 # For more details, see: https://developers.pipedrive.com/docs/api/v2
 """
-
 ENTITIES_V2 = {
     "activities": {},
     "deals": {
diff --git a/sources/pipedrive_pipeline.py b/sources/pipedrive_pipeline.py
@@ -1,15 +1,19 @@
 from typing import Optional, Sequence
 
 import dlt
-from pipedrive import pipedrive_source
+from pipedrive import pipedrive_source, pipedrive_v2_source
 from pipedrive.settings import DEFAULT_V2_RESOURCES
 
 
 def load_pipedrive() -> None:
     """Constructs a pipeline that will load all pipedrive data"""
     # configure the pipeline with your destination details
     pipeline = dlt.pipeline(
-        pipeline_name="pipedrive", destination="duckdb", dataset_name="pipedrive_data"
+        pipeline_name="pipedrive",
+        destination="duckdb",
+        dataset_name="pipedrive_data",
+        progress="log",
+        dev_mode=True,
     )
     load_info = pipeline.run(pipedrive_source())
     print(load_info)
@@ -19,13 +23,18 @@ def load_pipedrive() -> None:
 def load_selected_data() -> None:
     """Shows how to load just selected tables using `with_resources`"""
     pipeline = dlt.pipeline(
-        pipeline_name="pipedrive", destination="duckdb", dataset_name="pipedrive_data"
+        pipeline_name="pipedrive",
+        destination="duckdb",
+        dataset_name="pipedrive_data",
+        progress="log",
+        dev_mode=True,
     )
     # Use with_resources to select which entities to load
     # Note: `custom_fields_mapping` must be included to translate custom field hashes to corresponding names
     load_info = pipeline.run(
         pipedrive_source().with_resources(
-            "products", "deals", "deals_participants", "custom_fields_mapping"
+            # "products", "deals", "deals_participants", "custom_fields_mapping"
+            "deals"
         )
     )
     print(load_info)
@@ -47,7 +56,11 @@ def load_selected_data() -> None:
 def load_from_start_date() -> None:
     """Example to incrementally load activities limited to items updated after a given date"""
     pipeline = dlt.pipeline(
-        pipeline_name="pipedrive", destination="duckdb", dataset_name="pipedrive_data"
+        pipeline_name="pipedrive",
+        destination="duckdb",
+        dataset_name="pipedrive_data",
+        progress="log",
+        dev_mode=True,
     )
 
     # First source configure to load everything except activities from the beginning
@@ -65,16 +78,19 @@ def load_from_start_date() -> None:
 
 
 def load_v2_resources(resources: Optional[Sequence[str]] = None) -> None:
-    """Load v2 entities using the default configuration."""
+    """Load v2 entities using the separate v2 source.
+
+    Note: company_domain will be read from dlt secrets if not provided.
+    """
     resources = list(resources or DEFAULT_V2_RESOURCES)
     pipeline = dlt.pipeline(
-        pipeline_name="pipedrive_v2232",
+        pipeline_name="pipedrive",
         destination="duckdb",
-        dataset_name="pipedrive_v2_data",
+        dataset_name="pipedrive_data",
         progress="log",
         dev_mode=True,
     )
-    source = pipedrive_source(use_v2_endpoints=resources, v2_only=True)
+    source = pipedrive_v2_source(resources=resources)
     load_info = pipeline.run(source)
     print(load_info)
     print(pipeline.last_trace.last_normalize_info)
@@ -83,13 +99,13 @@ def load_v2_resources(resources: Optional[Sequence[str]] = None) -> None:
 def load_selected_v2_data(resources: Sequence[str]) -> None:
     """Load only the specified v2 entities (and their nested resources)."""
     pipeline = dlt.pipeline(
-        pipeline_name="pipedrive_v2_selected",
+        pipeline_name="pipedrive",
         destination="duckdb",
-        dataset_name="pipedrive_v2_data",
+        dataset_name="pipedrive_data",
         progress="log",
         dev_mode=True,
     )
-    source = pipedrive_source(use_v2_endpoints=list(resources), v2_only=True)
+    source = pipedrive_v2_source(resources=list(resources))
     load_info = pipeline.run(source)
     print(load_info)
     print(pipeline.last_trace.last_normalize_info)