pymc-labs
diff --git a/‎causalpy/data/simulate_data.py‎
Lines changed: 54 additions & 24 deletions b/‎causalpy/data/simulate_data.py‎
Lines changed: 54 additions & 24 deletions
diff --git a/‎causalpy/experiments/base.py‎
Lines changed: 15 additions & 9 deletions b/‎causalpy/experiments/base.py‎
Lines changed: 15 additions & 9 deletions
diff --git a/‎causalpy/experiments/diff_in_diff.py‎
Lines changed: 20 additions & 11 deletions b/‎causalpy/experiments/diff_in_diff.py‎
Lines changed: 20 additions & 11 deletions
@@ -26,8 +26,11 @@
 
 
 def _smoothed_gaussian_random_walk(
-    gaussian_random_walk_mu, gaussian_random_walk_sigma, N, lowess_kwargs
-):
+    gaussian_random_walk_mu: float,
+    gaussian_random_walk_sigma: float,
+    N: int,
+    lowess_kwargs: dict,
+) -> tuple[np.ndarray, np.ndarray]:
     """
     Generates Gaussian random walk data and applies LOWESS
 
@@ -48,12 +51,12 @@ def _smoothed_gaussian_random_walk(
 
 
 def generate_synthetic_control_data(
-    N=100,
-    treatment_time=70,
-    grw_mu=0.25,
-    grw_sigma=1,
-    lowess_kwargs=default_lowess_kwargs,
-):
+    N: int = 100,
+    treatment_time: int = 70,
+    grw_mu: float = 0.25,
+    grw_sigma: float = 1,
+    lowess_kwargs: dict = default_lowess_kwargs,
+) -> tuple[pd.DataFrame, np.ndarray]:
     """
     Generates data for synthetic control example.
 
@@ -108,8 +111,12 @@ def generate_synthetic_control_data(
 
 
 def generate_time_series_data(
-    N=100, treatment_time=70, beta_temp=-1, beta_linear=0.5, beta_intercept=3
-):
+    N: int = 100,
+    treatment_time: int = 70,
+    beta_temp: float = -1,
+    beta_linear: float = 0.5,
+    beta_intercept: float = 3,
+) -> pd.DataFrame:
     """
     Generates interrupted time series example data
 
@@ -155,7 +162,9 @@ def generate_time_series_data(
     return df
 
 
-def generate_time_series_data_seasonal(treatment_time):
+def generate_time_series_data_seasonal(
+    treatment_time: pd.Timestamp,
+) -> pd.DataFrame:
     """
     Generates 10 years of monthly data with seasonality
     """
@@ -169,11 +178,13 @@ def generate_time_series_data_seasonal(treatment_time):
         t=df.index,
     ).set_index("date", drop=True)
     month_effect = np.array([11, 13, 12, 15, 19, 23, 21, 28, 20, 17, 15, 12])
-    df["y"] = 0.2 * df["t"] + 2 * month_effect[df.month.values - 1]
+    df["y"] = 0.2 * df["t"] + 2 * month_effect[np.asarray(df.month.values) - 1]
 
     N = df.shape[0]
     idx = np.arange(N)[df.index > treatment_time]
-    df["causal effect"] = 100 * gamma(10).pdf(np.arange(0, N, 1) - np.min(idx))
+    df["causal effect"] = 100 * gamma(10).pdf(
+        np.array(np.arange(0, N, 1)) - int(np.min(idx))
+    )
 
     df["y"] += df["causal effect"]
     df["y"] += norm(0, 2).rvs(N)
@@ -183,7 +194,9 @@ def generate_time_series_data_seasonal(treatment_time):
     return df
 
 
-def generate_time_series_data_simple(treatment_time, slope=0.0):
+def generate_time_series_data_simple(
+    treatment_time: pd.Timestamp, slope: float = 0.0
+) -> pd.DataFrame:
     """Generate simple interrupted time series data, with no seasonality or temporal
     structure.
     """
@@ -205,7 +218,7 @@ def generate_time_series_data_simple(treatment_time, slope=0.0):
     return df
 
 
-def generate_did():
+def generate_did() -> pd.DataFrame:
     """
     Generate Difference in Differences data
 
@@ -257,8 +270,8 @@ def outcome(
 
 
 def generate_regression_discontinuity_data(
-    N=100, true_causal_impact=0.5, true_treatment_threshold=0.0
-):
+    N: int = 100, true_causal_impact: float = 0.5, true_treatment_threshold: float = 0.0
+) -> pd.DataFrame:
     """
     Generate regression discontinuity example data
 
@@ -289,8 +302,11 @@ def impact(x):
 
 
 def generate_ancova_data(
-    N=200, pre_treatment_means=np.array([10, 12]), treatment_effect=2, sigma=1
-):
+    N: int = 200,
+    pre_treatment_means: np.ndarray = np.array([10, 12]),
+    treatment_effect: int = 2,
+    sigma: int = 1,
+) -> pd.DataFrame:
     """
     Generate ANCOVA example data
 
@@ -310,7 +326,7 @@ def generate_ancova_data(
     return df
 
 
-def generate_geolift_data():
+def generate_geolift_data() -> pd.DataFrame:
     """Generate synthetic data for a geolift example. This will consists of 6 untreated
     countries. The treated unit `Denmark` is a weighted combination of the untreated
     units. We additionally specify a treatment effect which takes effect after the
@@ -360,7 +376,7 @@ def generate_geolift_data():
     return df
 
 
-def generate_multicell_geolift_data():
+def generate_multicell_geolift_data() -> pd.DataFrame:
     """Generate synthetic data for a geolift example. This will consists of 6 untreated
     countries. The treated unit `Denmark` is a weighted combination of the untreated
     units. We additionally specify a treatment effect which takes effect after the
@@ -422,7 +438,9 @@ def generate_multicell_geolift_data():
 # -----------------
 
 
-def generate_seasonality(n=12, amplitude=1, length_scale=0.5):
+def generate_seasonality(
+    n: int = 12, amplitude: int = 1, length_scale: float = 0.5
+) -> np.ndarray:
     """Generate monthly seasonality by sampling from a Gaussian process with a
     Gaussian kernel, using numpy code"""
     # Generate the covariance matrix
@@ -436,14 +454,26 @@ def generate_seasonality(n=12, amplitude=1, length_scale=0.5):
     return seasonality
 
 
-def periodic_kernel(x1, x2, period=1, length_scale=1, amplitude=1):
+def periodic_kernel(
+    x1: np.ndarray,
+    x2: np.ndarray,
+    period: int = 1,
+    length_scale: float = 1.0,
+    amplitude: int = 1,
+) -> np.ndarray:
     """Generate a periodic kernel for gaussian process"""
     return amplitude**2 * np.exp(
         -2 * np.sin(np.pi * np.abs(x1 - x2) / period) ** 2 / length_scale**2
     )
 
 
-def create_series(n=52, amplitude=1, length_scale=2, n_years=4, intercept=3):
+def create_series(
+    n: int = 52,
+    amplitude: int = 1,
+    length_scale: int = 2,
+    n_years: int = 4,
+    intercept: int = 3,
+) -> np.ndarray:
     """
     Returns numpy tile with generated seasonality data repeated over
     multiple years
 
@@ -16,6 +16,7 @@
 """
 
 from abc import abstractmethod
+from typing import Any, Union
 
 import arviz as az
 import matplotlib.pyplot as plt
@@ -29,10 +30,12 @@
 class BaseExperiment:
     """Base class for quasi experimental designs."""
 
+    labels: list[str]
+
     supports_bayes: bool
     supports_ols: bool
 
-    def __init__(self, model=None):
+    def __init__(self, model: Union[PyMCModel, RegressorMixin] | None = None) -> None:
         # Ensure we've made any provided Scikit Learn model (as identified as being type
         # RegressorMixin) compatible with CausalPy by appending our custom methods.
         if isinstance(model, RegressorMixin):
@@ -50,16 +53,19 @@ def __init__(self, model=None):
         if self.model is None:
             raise ValueError("model not set or passed.")
 
+    def fit(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("fit method not implemented")
+
     @property
-    def idata(self):
+    def idata(self) -> az.InferenceData:
         """Return the InferenceData object of the model. Only relevant for PyMC models."""
         return self.model.idata
 
-    def print_coefficients(self, round_to=None):
+    def print_coefficients(self, round_to: int | None = None) -> None:
         """Ask the model to print its coefficients."""
         self.model.print_coefficients(self.labels, round_to)
 
-    def plot(self, *args, **kwargs) -> tuple:
+    def plot(self, *args: Any, **kwargs: Any) -> tuple:
         """Plot the model.
 
         Internally, this function dispatches to either `_bayesian_plot` or `_ols_plot`
@@ -75,16 +81,16 @@ def plot(self, *args, **kwargs) -> tuple:
                 raise ValueError("Unsupported model type")
 
     @abstractmethod
-    def _bayesian_plot(self, *args, **kwargs):
+    def _bayesian_plot(self, *args: Any, **kwargs: Any) -> tuple:
         """Abstract method for plotting the model."""
         raise NotImplementedError("_bayesian_plot method not yet implemented")
 
     @abstractmethod
-    def _ols_plot(self, *args, **kwargs):
+    def _ols_plot(self, *args: Any, **kwargs: Any) -> tuple:
         """Abstract method for plotting the model."""
         raise NotImplementedError("_ols_plot method not yet implemented")
 
-    def get_plot_data(self, *args, **kwargs) -> pd.DataFrame:
+    def get_plot_data(self, *args: Any, **kwargs: Any) -> pd.DataFrame:
         """Recover the data of an experiment along with the prediction and causal impact information.
 
         Internally, this function dispatches to either :func:`get_plot_data_bayesian` or :func:`get_plot_data_ols`
@@ -98,11 +104,11 @@ def get_plot_data(self, *args, **kwargs) -> pd.DataFrame:
             raise ValueError("Unsupported model type")
 
     @abstractmethod
-    def get_plot_data_bayesian(self, *args, **kwargs):
+    def get_plot_data_bayesian(self, *args: Any, **kwargs: Any) -> pd.DataFrame:
         """Abstract method for recovering plot data."""
         raise NotImplementedError("get_plot_data_bayesian method not yet implemented")
 
     @abstractmethod
-    def get_plot_data_ols(self, *args, **kwargs):
+    def get_plot_data_ols(self, *args: Any, **kwargs: Any) -> pd.DataFrame:
         """Abstract method for recovering plot data."""
         raise NotImplementedError("get_plot_data_ols method not yet implemented")
@@ -15,6 +15,8 @@
 Difference in differences
 """
 
+from typing import Union
+
 import arviz as az
 import numpy as np
 import pandas as pd
@@ -92,8 +94,8 @@ def __init__(
         time_variable_name: str,
         group_variable_name: str,
         post_treatment_variable_name: str = "post_treatment",
-        model=None,
-        **kwargs,
+        model: Union[PyMCModel, RegressorMixin] | None = None,
+        **kwargs: dict,
     ) -> None:
         super().__init__(model=model)
         self.causal_impact: xr.DataArray | float | None
@@ -234,14 +236,14 @@ def __init__(
                 f"{self.group_variable_name}:{self.post_treatment_variable_name}"
             )
             matched_key = next((k for k in coef_map if interaction_term in k), None)
-            att = coef_map.get(matched_key)
+            att = coef_map.get(matched_key) if matched_key is not None else None
             self.causal_impact = att
         else:
             raise ValueError("Model type not recognized")
 
         return
 
-    def input_validation(self):
+    def input_validation(self) -> None:
         # Validate formula structure and interaction interaction terms
         self._validate_formula_interaction_terms()
 
@@ -269,7 +271,7 @@ def input_validation(self):
                 coded. Consisting of 0's and 1's only."""
             )
 
-    def _validate_formula_interaction_terms(self):
+    def _validate_formula_interaction_terms(self) -> None:
         """
         Validate that the formula contains at most one interaction term and no three-way or higher-order interactions.
         Raises FormulaException if more than one interaction term is found or if any interaction term has more than 2 variables.
@@ -299,7 +301,7 @@ def _validate_formula_interaction_terms(self):
                 "Multiple interaction terms are not currently supported as they complicate interpretation of the causal effect."
             )
 
-    def summary(self, round_to=None) -> None:
+    def summary(self, round_to: int | None = 2) -> None:
         """Print summary of main results and model coefficients.
 
         :param round_to:
@@ -311,11 +313,13 @@ def summary(self, round_to=None) -> None:
         print(self._causal_impact_summary_stat(round_to))
         self.print_coefficients(round_to)
 
-    def _causal_impact_summary_stat(self, round_to=None) -> str:
+    def _causal_impact_summary_stat(self, round_to: int | None = None) -> str:
         """Computes the mean and 94% credible interval bounds for the causal impact."""
         return f"Causal impact = {convert_to_string(self.causal_impact, round_to=round_to)}"
 
-    def _bayesian_plot(self, round_to=None, **kwargs) -> tuple[plt.Figure, plt.Axes]:
+    def _bayesian_plot(
+        self, round_to: int | None = None, **kwargs: dict
+    ) -> tuple[plt.Figure, plt.Axes]:
         """
         Plot the results
 
@@ -463,9 +467,10 @@ def _plot_causal_impact_arrow(results, ax):
         )
         return fig, ax
 
-    def _ols_plot(self, round_to=None, **kwargs) -> tuple[plt.Figure, plt.Axes]:
+    def _ols_plot(
+        self, round_to: int | None = 2, **kwargs: dict
+    ) -> tuple[plt.Figure, plt.Axes]:
         """Generate plot for difference-in-differences"""
-        round_to = kwargs.get("round_to")
         fig, ax = plt.subplots()
 
         # Plot raw data
@@ -528,11 +533,15 @@ def _ols_plot(self, round_to=None, **kwargs) -> tuple[plt.Figure, plt.Axes]:
             va="center",
         )
         # formatting
+        # In OLS context, causal_impact should be a float, but mypy doesn't know this
+        causal_impact_value = (
+            float(self.causal_impact) if self.causal_impact is not None else 0.0
+        )
         ax.set(
             xlim=[-0.05, 1.1],
             xticks=[0, 1],
             xticklabels=["pre", "post"],
-            title=f"Causal impact = {round_num(self.causal_impact, round_to)}",
+            title=f"Causal impact = {round_num(causal_impact_value, round_to)}",
         )
         ax.legend(fontsize=LEGEND_FONT_SIZE)
         return fig, ax