This commit introduces the ability to select specific v2 API endpoints

dat-a-man · dat-a-man · commit 4a7807fe8227 · 2025-11-11T09:07:57.000Z
to be used in the Pipedrive source.

Users can now configure this by passing a list of endpoint names to
the `use_v2_endpoints` parameter. This allows for a gradual
migration to v2 without enabling all endpoints at once.

fix: clean whitespace and update pipedrive v2 source

Revised structure for lint errors and making it simpler

minor fixes
diff --git a/sources/pipedrive/__init__.py b/sources/pipedrive/__init__.py
@@ -9,25 +9,29 @@
 To get an api key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
 """
 
-from typing import Any, Dict, Iterator, List, Optional, Union, Iterator
+from typing import Any, Dict, Iterator, List, Optional, Union, Iterable
 
 import dlt
+from dlt.common import pendulum
+from dlt.common.time import ensure_pendulum_datetime
+from dlt.common.typing import TDataItems
+from dlt.sources import DltResource
 
 from .helpers.custom_fields_munger import update_fields_mapping, rename_fields
 from .helpers.pages import get_recent_items_incremental, get_pages
 from .helpers import group_deal_flows
 from .typing import TDataPage
 from .settings import ENTITY_MAPPINGS, RECENTS_ENTITIES
-from dlt.common import pendulum
-from dlt.common.time import ensure_pendulum_datetime
-from dlt.sources import DltResource, TDataItems
+
+# Export v2 source for easy access
+from .rest_v2 import pipedrive_v2_source
 
 
 @dlt.source(name="pipedrive")
 def pipedrive_source(
     pipedrive_api_key: str = dlt.secrets.value,
     since_timestamp: Optional[Union[pendulum.DateTime, str]] = "1970-01-01 00:00:00",
-) -> Iterator[DltResource]:
+) -> Iterable[DltResource]:
     """
     Get data from the Pipedrive API. Supports incremental loading and custom fields mapping.
 
@@ -60,6 +64,8 @@ def pipedrive_source(
     Resources that depend on another resource are implemented as transformers
     so they can re-use the original resource data without re-downloading.
     Examples:  deals_participants, deals_flow
+
+    Note: For v2 API endpoints, use pipedrive_v2_source from pipedrive.rest_v2
     """
 
     # yield nice rename mapping
diff --git a/sources/pipedrive/rest_v2/__init__.py b/sources/pipedrive/rest_v2/__init__.py
@@ -0,0 +1,144 @@
+from typing import Iterable, Dict, Any, List, Optional, Union, cast
+
+import dlt
+from dlt.sources import DltResource
+from dlt.sources.rest_api import rest_api_source, RESTAPIConfig
+from dlt.sources.rest_api.typing import EndpointResource
+
+from ..settings import ENTITIES_V2, NESTED_ENTITIES_V2
+
+
+@dlt.source(name="pipedrive_v2")
+def pipedrive_v2_source(
+    pipedrive_api_key: str = dlt.secrets.value,
+    company_domain: str = dlt.secrets.value,
+    resources: Optional[List[str]] = None,
+    prefix: str = "v2_",
+) -> Iterable[DltResource]:
+    """
+    Get data from the Pipedrive API v2.
+
+    Args:
+        pipedrive_api_key: API token for authentication
+        company_domain: Your Pipedrive company domain
+        resources: List of resource names to load (e.g., ["deals", "persons"]). If None, loads all available v2 resources.
+        prefix: Prefix for table names (default: "v2_")
+
+    Returns:
+        Resources for v2 endpoints. Nested endpoints (e.g., deal_products, deal_followers) are automatically included when their parent resource is selected.
+
+    See also: https://pipedrive.readme.io/docs/pipedrive-api-v2#api-v2-availability
+    """
+    resources = resources or list(ENTITIES_V2.keys())
+
+    # Filter valid v2 endpoints
+    v2_resources_config = {
+        resource: ENTITIES_V2[resource]
+        for resource in resources
+        if resource in ENTITIES_V2  # this ensures that resource is supported by v2 api
+    }
+
+    if not v2_resources_config:
+        raise ValueError(
+            f"No valid v2 endpoints found in: {resources}. "
+            f"Available endpoints: {list(ENTITIES_V2.keys())}"
+        )
+
+    # Only include nested endpoints if their parent is in the v2 endpoints list
+    nested_configs_to_create = {
+        nested_name: nested_config
+        for nested_name, nested_config in NESTED_ENTITIES_V2.items()
+        if nested_config["parent"] in v2_resources_config
+    }
+
+    # Create and yield v2 resources
+    v2_resources = rest_v2_resources(
+        pipedrive_api_key,
+        company_domain,
+        v2_resources_config,
+        nested_configs_to_create,
+        prefix,
+    )
+    for resource in v2_resources:
+        yield resource
+
+
+def rest_v2_resources(
+    pipedrive_api_key: str,
+    company_domain: str,
+    resource_configs: Dict[str, Any],
+    nested_configs: Dict[str, Dict[str, Any]],
+    prefix: str,
+) -> Iterable[DltResource]:
+    """
+    Build and yield REST v2 resources for the given resource configurations.
+    Includes nested endpoints that depend on parent resources.
+    """
+    # Build resources list
+    resources: List[Dict[str, Any]] = []
+
+    # Build the resources list for the config from the provided resource configs
+    for resource_name, endpoint_config in resource_configs.items():
+        resource_def: Dict[str, Any] = {
+            "name": resource_name,
+            "endpoint": endpoint_config,
+        }
+        resources.append(resource_def)
+
+    # Add nested resources using native rest_api_source support
+    for nested_name, nested_config in nested_configs.items():
+        parent_name = nested_config["parent"]
+        endpoint_path = nested_config["endpoint_path"]
+        params = nested_config.get("params", {})
+        primary_key: Union[str, List[str]] = nested_config.get("primary_key", "id")
+        include_from_parent = nested_config.get("include_from_parent")
+
+        # Use native rest_api_source nested endpoint syntax: {resources.parent_name.id}
+        nested_resource_def: Dict[str, Any] = {
+            "name": nested_name,
+            "endpoint": {
+                "path": endpoint_path.replace(
+                    "{id}", f"{{resources.{parent_name}.id}}"
+                ),
+                "params": params,
+            },
+        }
+        if include_from_parent:
+            nested_resource_def["include_from_parent"] = include_from_parent
+        if primary_key != "id":
+            nested_resource_def["primary_key"] = primary_key
+        resources.append(nested_resource_def)
+
+    config: RESTAPIConfig = {
+        "client": {
+            "base_url": f"https://{company_domain}.pipedrive.com/api/v2/",
+            "auth": {
+                "type": "api_key",
+                "name": "api_token",
+                "api_key": pipedrive_api_key,
+                "location": "query",
+            },
+        },
+        "resource_defaults": {
+            "primary_key": "id",
+            "write_disposition": "merge",
+            "endpoint": {
+                "params": {
+                    "limit": 500,
+                    "sort_by": "update_time",
+                    "sort_direction": "desc",
+                },
+                "data_selector": "data",
+                "paginator": {
+                    "type": "cursor",
+                    "cursor_path": "additional_data.next_cursor",
+                    "cursor_param": "cursor",
+                },
+            },
+        },
+        "resources": cast(List[Union[str, EndpointResource, DltResource]], resources),
+    }
+
+    api_source = rest_api_source(config)
+    for resource in api_source.resources.values():
+        yield resource.with_name(f"{prefix}{resource.name}")
diff --git a/sources/pipedrive/settings.py b/sources/pipedrive/settings.py
@@ -27,3 +27,82 @@
     "task": "tasks",
     "user": "users",
 }
+
+
+"""
+Available Pipedrive API v2 endpoints for configuration.
+
+Note: Some endpoints (e.g., followers, deal_products) require nested configuration.
+See NESTED_ENTITIES_V2 for examples.
+
+# For more details, see: https://developers.pipedrive.com/docs/api/v2
+"""
+ENTITIES_V2 = {
+    "activities": {},
+    "deals": {
+        "params": {
+            "include_fields": (
+                "next_activity_id,last_activity_id,first_won_time,products_count,"
+                "files_count,notes_count,followers_count,email_messages_count,"
+                "activities_count,done_activities_count,undone_activities_count,"
+                "participants_count,last_incoming_mail_time,last_outgoing_mail_time,"
+                "smart_bcc_email"
+            )
+        }
+    },
+    "persons": {
+        "params": {
+            "include_fields": (
+                "next_activity_id,last_activity_id,open_deals_count,"
+                "related_open_deals_count,closed_deals_count,related_closed_deals_count,"
+                "participant_open_deals_count,participant_closed_deals_count,"
+                "email_messages_count,activities_count,done_activities_count,"
+                "undone_activities_count,files_count,notes_count,followers_count,"
+                "won_deals_count,related_won_deals_count,lost_deals_count,"
+                "related_lost_deals_count,last_incoming_mail_time,last_outgoing_mail_time"
+            )
+        }
+    },
+    "organizations": {
+        "params": {
+            "include_fields": (
+                "next_activity_id,last_activity_id,open_deals_count,"
+                "related_open_deals_count,closed_deals_count,related_closed_deals_count,"
+                "email_messages_count,activities_count,done_activities_count,"
+                "undone_activities_count,files_count,notes_count,followers_count,"
+                "won_deals_count,related_won_deals_count,lost_deals_count,"
+                "related_lost_deals_count"
+            )
+        }
+    },
+    "products": {},
+    "pipelines": {},
+    "stages": {},
+}
+
+# Nested V2 API Endpoints Configuration
+# Automatically loaded when their parent resource is included in use_v2_endpoints.
+NESTED_ENTITIES_V2 = {
+    "deal_products": {
+        "parent": "deals",
+        "endpoint_path": "deals/{id}/products",
+        "params": {
+            "limit": 500,
+        },
+    },
+    "deal_followers": {
+        "parent": "deals",
+        "endpoint_path": "deals/{id}/followers",
+        "primary_key": ["user_id", "_deals_id"],  # Followers don't have 'id', use composite key
+        "include_from_parent": ["id"],  # Include deal id from parent
+        "params": {
+            "limit": 500,
+        },
+    },
+}
+
+# Default v2 resources to load when none are specified
+# This curated set includes the most commonly used endpoints.
+# Users can customize this list to match their needs.
+# See ENTITIES_V2 above for all available v2 endpoints.
+DEFAULT_V2_RESOURCES = ["deals", "persons", "organizations" ,"products", "pipelines", "stages"]
diff --git a/sources/pipedrive_pipeline.py b/sources/pipedrive_pipeline.py
@@ -1,12 +1,19 @@
+from typing import Optional, Sequence
+
 import dlt
-from pipedrive import pipedrive_source
+from pipedrive import pipedrive_source, pipedrive_v2_source
+from pipedrive.settings import DEFAULT_V2_RESOURCES
 
 
 def load_pipedrive() -> None:
     """Constructs a pipeline that will load all pipedrive data"""
     # configure the pipeline with your destination details
     pipeline = dlt.pipeline(
-        pipeline_name="pipedrive", destination="duckdb", dataset_name="pipedrive_data"
+        pipeline_name="pipedrive",
+        destination="duckdb",
+        dataset_name="pipedrive_data",
+        progress="log",
+        dev_mode=True,
     )
     load_info = pipeline.run(pipedrive_source())
     print(load_info)
@@ -16,13 +23,18 @@ def load_pipedrive() -> None:
 def load_selected_data() -> None:
     """Shows how to load just selected tables using `with_resources`"""
     pipeline = dlt.pipeline(
-        pipeline_name="pipedrive", destination="duckdb", dataset_name="pipedrive_data"
+        pipeline_name="pipedrive",
+        destination="duckdb",
+        dataset_name="pipedrive_data",
+        progress="log",
+        dev_mode=True,
     )
     # Use with_resources to select which entities to load
     # Note: `custom_fields_mapping` must be included to translate custom field hashes to corresponding names
     load_info = pipeline.run(
         pipedrive_source().with_resources(
-            "products", "deals", "deals_participants", "custom_fields_mapping"
+            # "products", "deals", "deals_participants", "custom_fields_mapping"
+            "deals"
         )
     )
     print(load_info)
@@ -44,7 +56,11 @@ def load_selected_data() -> None:
 def load_from_start_date() -> None:
     """Example to incrementally load activities limited to items updated after a given date"""
     pipeline = dlt.pipeline(
-        pipeline_name="pipedrive", destination="duckdb", dataset_name="pipedrive_data"
+        pipeline_name="pipedrive",
+        destination="duckdb",
+        dataset_name="pipedrive_data",
+        progress="log",
+        dev_mode=True,
     )
 
     # First source configure to load everything except activities from the beginning
@@ -61,10 +77,48 @@ def load_from_start_date() -> None:
     print(load_info)
 
 
+def load_v2_resources(resources: Optional[Sequence[str]] = None) -> None:
+    """Load v2 entities using the separate v2 source.
+
+    Note: company_domain will be read from dlt secrets if not provided.
+    """
+    resources = list(resources or DEFAULT_V2_RESOURCES)
+    pipeline = dlt.pipeline(
+        pipeline_name="pipedrive",
+        destination="duckdb",
+        dataset_name="pipedrive_data",
+        progress="log",
+        dev_mode=True,
+    )
+    source = pipedrive_v2_source(resources=resources)
+    load_info = pipeline.run(source)
+    print(load_info)
+    print(pipeline.last_trace.last_normalize_info)
+
+
+def load_selected_v2_data(resources: Sequence[str]) -> None:
+    """Load only the specified v2 entities (and their nested resources)."""
+    pipeline = dlt.pipeline(
+        pipeline_name="pipedrive",
+        destination="duckdb",
+        dataset_name="pipedrive_data",
+        progress="log",
+        dev_mode=True,
+    )
+    source = pipedrive_v2_source(resources=list(resources))
+    load_info = pipeline.run(source)
+    print(load_info)
+    print(pipeline.last_trace.last_normalize_info)
+
+
 if __name__ == "__main__":
     # run our main example
-    load_pipedrive()
+    # load_pipedrive()
     # load selected tables and display resource info
     # load_selected_data()
     # load activities updated since given date
     # load_from_start_date()
+    # load v2 resources (optional addon)
+    load_v2_resources()
+    # load only selected v2 resources
+    # load_selected_v2_data(["deals", "stages"])