dlt-hub
diff --git a/‎sources/facebook_ads/__init__.py‎
Lines changed: 20 additions & 14 deletions b/‎sources/facebook_ads/__init__.py‎
Lines changed: 20 additions & 14 deletions
diff --git a/‎sources/facebook_ads_pipeline.py‎
Lines changed: 20 additions & 0 deletions b/‎sources/facebook_ads_pipeline.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎sources/pg_replication/README.md‎
Lines changed: 8 additions & 0 deletions b/‎sources/pg_replication/README.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎sources/pg_replication/helpers.py‎
Lines changed: 4 additions & 3 deletions b/‎sources/pg_replication/helpers.py‎
Lines changed: 4 additions & 3 deletions
@@ -39,7 +39,6 @@
 )
 from .settings import (
     FACEBOOK_INSIGHTS_RETENTION_PERIOD,
-    ALL_ACTION_BREAKDOWNS,
     ALL_ACTION_ATTRIBUTION_WINDOWS,
     DEFAULT_INSIGHT_FIELDS,
     INSIGHT_FIELDS_TYPES,
@@ -126,8 +125,8 @@ def facebook_insights_source(
     fields: Sequence[str] = DEFAULT_INSIGHT_FIELDS,
     attribution_window_days_lag: int = 7,
     time_increment_days: int = 1,
-    breakdowns: TInsightsBreakdownOptions = "ads_insights",
-    action_breakdowns: Sequence[str] = ALL_ACTION_BREAKDOWNS,
+    breakdowns: TInsightsBreakdownOptions = None,
+    action_breakdowns: Sequence[str] = None,
     level: TInsightsLevels = "ad",
     action_attribution_windows: Sequence[str] = ALL_ACTION_ATTRIBUTION_WINDOWS,
     batch_size: int = 50,
@@ -149,8 +148,8 @@ def facebook_insights_source(
         fields (Sequence[str], optional): A list of fields to include in each reports. Note that `breakdowns` option adds fields automatically. Defaults to DEFAULT_INSIGHT_FIELDS.
         attribution_window_days_lag (int, optional): Attribution window in days. The reports in attribution window are refreshed on each run.. Defaults to 7.
         time_increment_days (int, optional): The report aggregation window in days. use 7 for weekly aggregation. Defaults to 1.
-        breakdowns (TInsightsBreakdownOptions, optional): A presents with common aggregations. See settings.py for details. Defaults to "ads_insights_age_and_gender".
-        action_breakdowns (Sequence[str], optional): Action aggregation types. See settings.py for details. Defaults to ALL_ACTION_BREAKDOWNS.
+        breakdowns (TInsightsBreakdownOptions, optional): A presents with common aggregations. See settings.py for details. Defaults to None (no breakdowns).
+        action_breakdowns (Sequence[str], optional): Action aggregation types. See settings.py for details. Defaults to None (no action breakdowns).
         level (TInsightsLevels, optional): The granularity level. Defaults to "ad".
         action_attribution_windows (Sequence[str], optional): Attribution windows for actions. Defaults to ALL_ACTION_ATTRIBUTION_WINDOWS.
         batch_size (int, optional): Page size when reading data from particular report. Defaults to 50.
@@ -186,16 +185,7 @@ def facebook_insights(
         while start_date <= end_date:
             query = {
                 "level": level,
-                "action_breakdowns": list(action_breakdowns),
-                "breakdowns": list(
-                    INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["breakdowns"]
-                ),
                 "limit": batch_size,
-                "fields": list(
-                    set(fields)
-                    .union(INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["fields"])
-                    .difference(INVALID_INSIGHTS_FIELDS)
-                ),
                 "time_increment": time_increment_days,
                 "action_attribution_windows": list(action_attribution_windows),
                 "time_ranges": [
@@ -207,6 +197,22 @@ def facebook_insights(
                     }
                 ],
             }
+
+            fields_to_use = set(fields)
+            # Only add breakdowns if explicitly provided
+            if breakdowns is not None:
+                query["breakdowns"] = list(
+                    INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["breakdowns"]
+                )
+                fields_to_use = fields_to_use.union(
+                    INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["fields"]
+                )
+            query["fields"] = list(fields_to_use.difference(INVALID_INSIGHTS_FIELDS))
+
+            # Only add action_breakdowns if explicitly provided
+            if action_breakdowns is not None:
+                query["action_breakdowns"] = list(action_breakdowns)
+
             job = execute_job(account.get_insights(params=query, is_async=True))
             yield list(map(process_report_item, job.get_result()))
             start_date = start_date.add(days=time_increment_days)
 
@@ -117,10 +117,30 @@ def load_insights() -> None:
     print(info)
 
 
+def load_insights_with_breakdowns() -> None:
+    """Shows how to load insights with custom breakdowns and action breakdowns"""
+    pipeline = dlt.pipeline(
+        pipeline_name="facebook_insights_breakdowns",
+        destination="duckdb",
+        dataset_name="facebook_insights_data",
+        dev_mode=True,
+    )
+    # Load insights with age and gender breakdowns
+    i_with_breakdowns = facebook_insights_source(
+        initial_load_past_days=7,
+        breakdowns="ads_insights_age_and_gender",
+        # Uncomment to add action breakdowns:
+        # action_breakdowns=["action_type", "action_target_id"]
+    )
+    info = pipeline.run(i_with_breakdowns)
+    print(info)
+
+
 if __name__ == "__main__":
     # load_all_ads_objects()
     merge_ads_objects()
     # load_ads_with_custom_fields()
     # load_only_disapproved_ads()
     # load_and_enrich_objects()
     # load_insights()
+    # load_insights_with_breakdowns()
@@ -6,6 +6,7 @@ Resources that can be loaded using this verified source are:
 | Name                 | Description                                     |
 |----------------------|-------------------------------------------------|
 | replication_resource | Load published messages from a replication slot |
+| init_replication     | Initialize replication and optionally return snapshot resources for initial data load  |
 
 ## Initialize the pipeline
 
@@ -29,6 +30,13 @@ It also needs `CREATE` privilege on the database:
 GRANT CREATE ON DATABASE dlt_data TO replication_user;
 ```
 
+If not a superuser, the user must have ownership of the tables that need to be replicated:
+
+```sql
+ALTER TABLE your_table OWNER TO replication_user;  
+```
+
+
 ### Set up RDS
 1. You must enable replication for RDS Postgres instance via **Parameter Group**: https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PostgreSQL.Replication.ReadReplicas.html
 2. `WITH LOGIN REPLICATION;` does not work on RDS, instead do:
 
@@ -42,7 +42,7 @@
 from dlt.sources.credentials import ConnectionStringCredentials
 from dlt.sources.sql_database import (
     sql_table as core_sql_table,
-    sql_database as core_sql_datbase,
+    sql_database as core_sql_database,
 )
 
 from .schema_types import _to_dlt_column_schema, _to_dlt_val
@@ -114,7 +114,8 @@ def init_replication(
         table_names (Optional[Union[str, Sequence[str]]]):  Name(s) of the table(s)
           to include in the publication. If not provided, the whole schema with `schema_name` will be replicated
           (also tables added to the schema after the publication was created). You need superuser privileges
-          for the schema replication.
+          for the whole schema replication. When specifying individual table names, the database role must
+          own the tables if it's not a superuser.
         credentials (ConnectionStringCredentials): Postgres database credentials.
         publish (str): Comma-separated string of DML operations. Can be used to
           control which changes are included in the publication. Allowed operations
@@ -184,7 +185,7 @@ def init_replication(
                     # do not include dlt tables
                     table_names = [
                         table_name
-                        for table_name in core_sql_datbase(
+                        for table_name in core_sql_database(
                             credentials, schema=schema_name, reflection_level="minimal"
                         ).resources.keys()
                         if not table_name.lower().startswith(DLT_NAME_PREFIX)