From df03887c56b81d34ad7c499976ceca24e806e71f Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 13 Jan 2025 10:32:40 +0100
Subject: [PATCH 01/48] Logistic regression implementation WIP

---
 doubleml/__init__.py          |    2 +
 doubleml/double_ml.py         |    6 +
 doubleml/double_ml_data.py    | 1049 +++++++++++++++++++++++++++++++++
 doubleml/logistic/logistic.py |  463 +++++++++++++++
 doubleml/utils/resampling.py  |   45 ++
 5 files changed, 1565 insertions(+)
 create mode 100644 doubleml/double_ml_data.py
 create mode 100644 doubleml/logistic/logistic.py

diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index 6cf7de962..935491167 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -15,6 +15,7 @@
 from .irm.ssm import DoubleMLSSM
 from .plm.pliv import DoubleMLPLIV
 from .plm.plr import DoubleMLPLR
+from .logistic.logistic import DoubleMLLogit
 from .utils.blp import DoubleMLBLP
 from .utils.policytree import DoubleMLPolicyTree
 
@@ -42,6 +43,7 @@
     "DoubleMLBLP",
     "DoubleMLPolicyTree",
     "DoubleMLSSM",
+    "DoubleMLLogit",
 ]
 
 __version__ = importlib.metadata.version("doubleml")
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 05481bf16..1cc6bcf9b 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -585,6 +585,12 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
         # construct framework for inference
         self._framework = self.construct_framework()
 
+
+
+
+
+
+
         return self
 
     def construct_framework(self):
diff --git a/doubleml/double_ml_data.py b/doubleml/double_ml_data.py
new file mode 100644
index 000000000..4f8d7cbc7
--- /dev/null
+++ b/doubleml/double_ml_data.py
@@ -0,0 +1,1049 @@
+import numpy as np
+import pandas as pd
+import io
+
+from abc import ABC, abstractmethod
+
+from sklearn.utils.validation import check_array, column_or_1d,  check_consistent_length
+from sklearn.utils import assert_all_finite
+from sklearn.utils.multiclass import type_of_target
+from .utils._estimation import _assure_2d_array
+from .utils._checks import _check_set
+
+
+class DoubleMLBaseData(ABC):
+    """Base Class Double machine learning data-backends
+    """
+    def __init__(self,
+                 data):
+        if not isinstance(data, pd.DataFrame):
+            raise TypeError('data must be of pd.DataFrame type. '
+                            f'{str(data)} of type {str(type(data))} was passed.')
+        if not data.columns.is_unique:
+            raise ValueError('Invalid pd.DataFrame: '
+                             'Contains duplicate column names.')
+        self._data = data
+
+    def __str__(self):
+        data_summary = self._data_summary_str()
+        buf = io.StringIO()
+        self.data.info(verbose=False, buf=buf)
+        df_info = buf.getvalue()
+        res = '================== DoubleMLBaseData Object ==================\n' + \
+              '\n------------------ Data summary      ------------------\n' + data_summary + \
+              '\n------------------ DataFrame info    ------------------\n' + df_info
+        return res
+
+    def _data_summary_str(self):
+        data_summary = f'No. Observations: {self.n_obs}\n'
+        return data_summary
+
+    @property
+    def data(self):
+        """
+        The data.
+        """
+        return self._data
+
+    @property
+    def all_variables(self):
+        """
+        All variables available in the dataset.
+        """
+        return self.data.columns
+
+    @property
+    def n_obs(self):
+        """
+        The number of observations.
+        """
+        return self.data.shape[0]
+
+    # TODO: This and the following property does not make sense but the base class DoubleML needs it (especially for the
+    #  multiple treatment variables case) and other things are also build around it, see for example DoubleML._params
+    @property
+    def d_cols(self):
+        return ['theta']
+
+    @property
+    def n_treat(self):
+        """
+        The number of treatment variables.
+        """
+        return 1
+
+    @property
+    @abstractmethod
+    def n_coefs(self):
+        pass
+
+
+class DoubleMLData(DoubleMLBaseData):
+    """Double machine learning data-backend.
+
+    :class:`DoubleMLData` objects can be initialized from
+    :class:`pandas.DataFrame`'s as well as :class:`numpy.ndarray`'s.
+
+    Parameters
+    ----------
+    data : :class:`pandas.DataFrame`
+        The data.
+
+    y_col : str
+        The outcome variable.
+
+    d_cols : str or list
+        The treatment variable(s).
+
+    x_cols : None, str or list
+        The covariates.
+        If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
+        treatment variables ``d_cols``, nor instrumental variables ``z_cols`` are used as covariates.
+        Default is ``None``.
+
+    z_cols : None, str or list
+        The instrumental variable(s).
+        Default is ``None``.
+
+    t_col : None or str
+        The time variable (only relevant/used for DiD Estimators).
+        Default is ``None``.
+
+    s_col : None or str
+        The score or selection variable (only relevant/used for RDD or SSM Estimatiors).
+        Default is ``None``.
+
+    use_other_treat_as_covariate : bool
+        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+        Default is ``True``.
+
+    force_all_x_finite : bool or str
+        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+        Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+        allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+        Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+        for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+        in the covariates ``x``.
+        Default is ``True``.
+
+    Examples
+    --------
+    >>> from doubleml import DoubleMLData
+    >>> from doubleml.datasets import make_plr_CCDDHNR2018
+    >>> # initialization from pandas.DataFrame
+    >>> df = make_plr_CCDDHNR2018(return_type='DataFrame')
+    >>> obj_dml_data_from_df = DoubleMLData(df, 'y', 'd')
+    >>> # initialization from np.ndarray
+    >>> (x, y, d) = make_plr_CCDDHNR2018(return_type='array')
+    >>> obj_dml_data_from_array = DoubleMLData.from_arrays(x, y, d)
+    """
+    def __init__(self,
+                 data,
+                 y_col,
+                 d_cols,
+                 x_cols=None,
+                 z_cols=None,
+                 t_col=None,
+                 s_col=None,
+                 use_other_treat_as_covariate=True,
+                 force_all_x_finite=True):
+        DoubleMLBaseData.__init__(self, data)
+
+        self.y_col = y_col
+        self.d_cols = d_cols
+        self.z_cols = z_cols
+        self.t_col = t_col
+        self.s_col = s_col
+        self.x_cols = x_cols
+        self._check_disjoint_sets_y_d_x_z_t_s()
+        self.use_other_treat_as_covariate = use_other_treat_as_covariate
+        self.force_all_x_finite = force_all_x_finite
+        self._binary_treats = self._check_binary_treats()
+        self._binary_outcome = self._check_binary_outcome()
+        self._set_y_z_t_s()
+        # by default, we initialize to the first treatment variable
+        self.set_x_d(self.d_cols[0])
+
+    def __str__(self):
+        data_summary = self._data_summary_str()
+        buf = io.StringIO()
+        self.data.info(verbose=False, buf=buf)
+        df_info = buf.getvalue()
+        res = '================== DoubleMLData Object ==================\n' + \
+              '\n------------------ Data summary      ------------------\n' + data_summary + \
+              '\n------------------ DataFrame info    ------------------\n' + df_info
+        return res
+
+    def _data_summary_str(self):
+        data_summary = f'Outcome variable: {self.y_col}\n' \
+                       f'Treatment variable(s): {self.d_cols}\n' \
+                       f'Covariates: {self.x_cols}\n' \
+                       f'Instrument variable(s): {self.z_cols}\n'
+        if self.t_col is not None:
+            data_summary += f'Time variable: {self.t_col}\n'
+        if self.s_col is not None:
+            data_summary += f'Score/Selection variable: {self.s_col}\n'
+        data_summary += f'No. Observations: {self.n_obs}\n'
+        return data_summary
+
+    @classmethod
+    def from_arrays(cls, x, y, d, z=None, t=None, s=None, use_other_treat_as_covariate=True,
+                    force_all_x_finite=True):
+        """
+        Initialize :class:`DoubleMLData` from :class:`numpy.ndarray`'s.
+
+        Parameters
+        ----------
+        x : :class:`numpy.ndarray`
+            Array of covariates.
+
+        y : :class:`numpy.ndarray`
+            Array of the outcome variable.
+
+        d : :class:`numpy.ndarray`
+            Array of treatment variables.
+
+        z : None or :class:`numpy.ndarray`
+            Array of instrumental variables.
+            Default is ``None``.
+
+        t : :class:`numpy.ndarray`
+            Array of the time variable (only relevant/used for DiD models).
+            Default is ``None``.
+
+        s : :class:`numpy.ndarray`
+            Array of the score or selection variable (only relevant/used for RDD and SSM models).
+            Default is ``None``.
+
+        use_other_treat_as_covariate : bool
+            Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+            Default is ``True``.
+
+        force_all_x_finite : bool or str
+            Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+            Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+            allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+            Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+            for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+            in the covariates ``x``.
+            Default is ``True``.
+
+        Examples
+        --------
+        >>> from doubleml import DoubleMLData
+        >>> from doubleml.datasets import make_plr_CCDDHNR2018
+        >>> (x, y, d) = make_plr_CCDDHNR2018(return_type='array')
+        >>> obj_dml_data_from_array = DoubleMLData.from_arrays(x, y, d)
+        """
+        if isinstance(force_all_x_finite, str):
+            if force_all_x_finite != 'allow-nan':
+                raise ValueError("Invalid force_all_x_finite " + force_all_x_finite + ". " +
+                                 "force_all_x_finite must be True, False or 'allow-nan'.")
+        elif not isinstance(force_all_x_finite, bool):
+            raise TypeError("Invalid force_all_x_finite. " +
+                            "force_all_x_finite must be True, False or 'allow-nan'.")
+
+        x = check_array(x, ensure_2d=False, allow_nd=False,
+                        force_all_finite=force_all_x_finite)
+        d = check_array(d, ensure_2d=False, allow_nd=False)
+        y = column_or_1d(y, warn=True)
+
+        x = _assure_2d_array(x)
+        d = _assure_2d_array(d)
+
+        y_col = 'y'
+        if z is None:
+            check_consistent_length(x, y, d)
+            z_cols = None
+        else:
+            z = check_array(z, ensure_2d=False, allow_nd=False)
+            z = _assure_2d_array(z)
+            check_consistent_length(x, y, d, z)
+            if z.shape[1] == 1:
+                z_cols = ['z']
+            else:
+                z_cols = [f'z{i + 1}' for i in np.arange(z.shape[1])]
+
+        if t is None:
+            t_col = None
+        else:
+            t = column_or_1d(t, warn=True)
+            check_consistent_length(x, y, d, t)
+            t_col = 't'
+
+        if s is None:
+            s_col = None
+        else:
+            s = column_or_1d(s, warn=True)
+            check_consistent_length(x, y, d, s)
+            s_col = 's'
+
+        if d.shape[1] == 1:
+            d_cols = ['d']
+        else:
+            d_cols = [f'd{i+1}' for i in np.arange(d.shape[1])]
+
+        x_cols = [f'X{i+1}' for i in np.arange(x.shape[1])]
+
+        # basline version with features, outcome and treatments
+        data = pd.DataFrame(np.column_stack((x, y, d)),
+                            columns=x_cols + [y_col] + d_cols)
+
+        if z is not None:
+            df_z = pd.DataFrame(z, columns=z_cols)
+            data = pd.concat([data, df_z], axis=1)
+
+        if t is not None:
+            data[t_col] = t
+
+        if s is not None:
+            data[s_col] = s
+
+        return cls(data, y_col, d_cols, x_cols, z_cols, t_col, s_col, use_other_treat_as_covariate, force_all_x_finite)
+
+    @property
+    def x(self):
+        """
+        Array of covariates;
+        Dynamic! May depend on the currently set treatment variable;
+        To get an array of all covariates (independent of the currently set treatment variable)
+        call ``obj.data[obj.x_cols].values``.
+        """
+        return self._X.values
+
+    @property
+    def y(self):
+        """
+        Array of outcome variable.
+        """
+        return self._y.values
+
+    @property
+    def d(self):
+        """
+        Array of treatment variable;
+        Dynamic! Depends on the currently set treatment variable;
+        To get an array of all treatment variables (independent of the currently set treatment variable)
+        call ``obj.data[obj.d_cols].values``.
+        """
+        return self._d.values
+
+    @property
+    def z(self):
+        """
+        Array of instrumental variables.
+        """
+        if self.z_cols is not None:
+            return self._z.values
+        else:
+            return None
+
+    @property
+    def t(self):
+        """
+        Array of time variable.
+        """
+        if self.t_col is not None:
+            return self._t.values
+        else:
+            return None
+
+    @property
+    def s(self):
+        """
+        Array of score or selection variable.
+        """
+        if self.s_col is not None:
+            return self._s.values
+        else:
+            return None
+
+    @property
+    def n_treat(self):
+        """
+        The number of treatment variables.
+        """
+        return len(self.d_cols)
+
+    @property
+    def n_coefs(self):
+        """
+        The number of coefficients to be estimated.
+        """
+        return self.n_treat
+
+    @property
+    def n_instr(self):
+        """
+        The number of instruments.
+        """
+        if self.z_cols is not None:
+            n_instr = len(self.z_cols)
+        else:
+            n_instr = 0
+        return n_instr
+
+    @property
+    def binary_treats(self):
+        """
+        Series with logical(s) indicating whether the treatment variable(s) are binary with values 0 and 1.
+        """
+        return self._binary_treats
+
+    @property
+    def binary_outcome(self):
+        """
+        Logical indicating whether the outcome variable is binary with values 0 and 1.
+        """
+        return self._binary_outcome
+
+    @property
+    def x_cols(self):
+        """
+        The covariates.
+        """
+        return self._x_cols
+
+    @x_cols.setter
+    def x_cols(self, value):
+        reset_value = hasattr(self, '_x_cols')
+        if value is not None:
+            if isinstance(value, str):
+                value = [value]
+            if not isinstance(value, list):
+                raise TypeError('The covariates x_cols must be of str or list type (or None). '
+                                f'{str(value)} of type {str(type(value))} was passed.')
+            if not len(se
+
+            t(value)) == len(value):
+                raise ValueError('Invalid covariates x_cols: '
+                                 'Contains duplicate values.')
+            if not set(value).issubset(set(self.all_variables)):
+                raise ValueError('Invalid covariates x_cols. '
+                                 'At least one covariate is no data column.')
+            assert set(value).issubset(set(self.all_variables))
+            self._x_cols = value
+        else:
+            excluded_cols = set.union({self.y_col}, set(self.d_cols))
+            if (self.z_cols is not None):
+                excluded_cols = set.union(excluded_cols, set(self.z_cols))
+            for col in [self.t_col, self.s_col]:
+                col = _check_set(col)
+                excluded_cols = set.union(excluded_cols, col)
+            self._x_cols = [col for col in self.data.columns if col not in excluded_cols]
+        if reset_value:
+            self._check_disjoint_sets()
+            # by default, we initialize to the first treatment variable
+            self.set_x_d(self.d_cols[0])
+
+    @property
+    def d_cols(self):
+        """
+        The treatment variable(s).
+        """
+        return self._d_cols
+
+    @d_cols.setter
+    def d_cols(self, value):
+        reset_value = hasattr(self, '_d_cols')
+        if isinstance(value, str):
+            value = [value]
+        if not isinstance(value, list):
+            raise TypeError('The treatment variable(s) d_cols must be of str or list type. '
+                            f'{str(value)} of type {str(type(value))} was passed.')
+        if not len(set(value)) == len(value):
+            raise ValueError('Invalid treatment variable(s) d_cols: '
+                             'Contains duplicate values.')
+        if not set(value).issubset(set(self.all_variables)):
+            raise ValueError('Invalid treatment variable(s) d_cols. '
+                             'At least one treatment variable is no data column.')
+        self._d_cols = value
+        if reset_value:
+            self._check_disjoint_sets()
+            # by default, we initialize to the first treatment variable
+            self.set_x_d(self.d_cols[0])
+
+    @property
+    def y_col(self):
+        """
+        The outcome variable.
+        """
+        return self._y_col
+
+    @y_col.setter
+    def y_col(self, value):
+        reset_value = hasattr(self, '_y_col')
+        if not isinstance(value, str):
+            raise TypeError('The outcome variable y_col must be of str type. '
+                            f'{str(value)} of type {str(type(value))} was passed.')
+        if value not in self.all_variables:
+            raise ValueError('Invalid outcome variable y_col. '
+                             f'{value} is no data column.')
+        self._y_col = value
+        if reset_value:
+            self._check_disjoint_sets()
+            self._set_y_z_t_s()
+
+    @property
+    def z_cols(self):
+        """
+        The instrumental variable(s).
+        """
+        return self._z_cols
+
+    @z_cols.setter
+    def z_cols(self, value):
+        reset_value = hasattr(self, '_z_cols')
+        if value is not None:
+            if isinstance(value, str):
+                value = [value]
+            if not isinstance(value, list):
+                raise TypeError('The instrumental variable(s) z_cols must be of str or list type (or None). '
+                                f'{str(value)} of type {str(type(value))} was passed.')
+            if not len(set(value)) == len(value):
+                raise ValueError('Invalid instrumental variable(s) z_cols: '
+                                 'Contains duplicate values.')
+            if not set(value).issubset(set(self.all_variables)):
+                raise ValueError('Invalid instrumental variable(s) z_cols. '
+                                 'At least one instrumental variable is no data column.')
+            self._z_cols = value
+        else:
+            self._z_cols = None
+        if reset_value:
+            self._check_disjoint_sets()
+            self._set_y_z_t_s()
+
+    @property
+    def t_col(self):
+        """
+        The time variable.
+        """
+        return self._t_col
+
+    @t_col.setter
+    def t_col(self, value):
+        reset_value = hasattr(self, '_t_col')
+        if value is not None:
+            if not isinstance(value, str):
+                raise TypeError('The time variable t_col must be of str type (or None). '
+                                f'{str(value)} of type {str(type(value))} was passed.')
+            if value not in self.all_variables:
+                raise ValueError('Invalid time variable t_col. '
+                                 f'{value} is no data column.')
+            self._t_col = value
+        else:
+            self._t_col = None
+        if reset_value:
+            self._check_disjoint_sets()
+            self._set_y_z_t_s()
+
+    @property
+    def s_col(self):
+        """
+        The score or selection variable.
+        """
+        return self._s_col
+
+    @s_col.setter
+    def s_col(self, value):
+        reset_value = hasattr(self, '_s_col')
+        if value is not None:
+            if not isinstance(value, str):
+                raise TypeError('The score or selection variable s_col must be of str type (or None). '
+                                f'{str(value)} of type {str(type(value))} was passed.')
+            if value not in self.all_variables:
+                raise ValueError('Invalid score or selection variable s_col. '
+                                 f'{value} is no data column.')
+            self._s_col = value
+        else:
+            self._s_col = None
+        if reset_value:
+            self._check_disjoint_sets()
+            self._set_y_z_t_s()
+
+    @property
+    def use_other_treat_as_covariate(self):
+        """
+        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+        """
+        return self._use_other_treat_as_covariate
+
+    @use_other_treat_as_covariate.setter
+    def use_other_treat_as_covariate(self, value):
+        reset_value = hasattr(self, '_use_other_treat_as_covariate')
+        if not isinstance(value, bool):
+            raise TypeError('use_other_treat_as_covariate must be True or False. '
+                            f'Got {str(value)}.')
+        self._use_other_treat_as_covariate = value
+        if reset_value:
+            # by default, we initialize to the first treatment variable
+            self.set_x_d(self.d_cols[0])
+
+    @property
+    def force_all_x_finite(self):
+        """
+        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+        """
+        return self._force_all_x_finite
+
+    @force_all_x_finite.setter
+    def force_all_x_finite(self, value):
+        reset_value = hasattr(self, '_force_all_x_finite')
+        if isinstance(value, str):
+            if value != 'allow-nan':
+                raise ValueError("Invalid force_all_x_finite " + value + ". " +
+                                 "force_all_x_finite must be True, False or 'allow-nan'.")
+        elif not isinstance(value, bool):
+            raise TypeError("Invalid force_all_x_finite. " +
+                            "force_all_x_finite must be True, False or 'allow-nan'.")
+        self._force_all_x_finite = value
+        if reset_value:
+            # by default, we initialize to the first treatment variable
+            self.set_x_d(self.d_cols[0])
+
+    def _set_y_z_t_s(self):
+        assert_all_finite(self.data.loc[:, self.y_col])
+        self._y = self.data.loc[:, self.y_col]
+        if self.z_cols is None:
+            self._z = None
+        else:
+            assert_all_finite(self.data.loc[:, self.z_cols])
+            self._z = self.data.loc[:, self.z_cols]
+
+        if self.t_col is None:
+            self._t = None
+        else:
+            assert_all_finite(self.data.loc[:, self.t_col])
+            self._t = self.data.loc[:, self.t_col]
+
+        if self.s_col is None:
+            self._s = None
+        else:
+            assert_all_finite(self.data.loc[:, self.s_col])
+            self._s = self.data.loc[:, self.s_col]
+
+    def set_x_d(self, treatment_var):
+        """
+        Function that assigns the role for the treatment variables in the multiple-treatment case.
+
+        Parameters
+        ----------
+        treatment_var : str
+            Active treatment variable that will be set to d.
+        """
+        if not isinstance(treatment_var, str):
+            raise TypeError('treatment_var must be of str type. '
+                            f'{str(treatment_var)} of type {str(type(treatment_var))} was passed.')
+        if treatment_var not in self.d_cols:
+            raise ValueError('Invalid treatment_var. '
+                             f'{treatment_var} is not in d_cols.')
+        if self.use_other_treat_as_covariate:
+            # note that the following line needs to be adapted in case an intersection of x_cols and d_cols as allowed
+            # (see https://github.com/DoubleML/doubleml-for-py/issues/83)
+            xd_list = self.x_cols + self.d_cols
+            xd_list.remove(treatment_var)
+        else:
+            xd_list = self.x_cols
+        assert_all_finite(self.data.loc[:, treatment_var])
+        if self.force_all_x_finite:
+            assert_all_finite(self.data.loc[:, xd_list],
+                              allow_nan=self.force_all_x_finite == 'allow-nan')
+        self._d = self.data.loc[:, treatment_var]
+        self._X = self.data.loc[:, xd_list]
+
+    def _check_binary_treats(self):
+        is_binary = pd.Series(dtype=bool, index=self.d_cols)
+        for treatment_var in self.d_cols:
+            this_d = self.data.loc[:, treatment_var]
+            binary_treat = (type_of_target(this_d) == 'binary')
+            zero_one_treat = np.all((np.power(this_d, 2) - this_d) == 0)
+            is_binary[treatment_var] = (binary_treat & zero_one_treat)
+        return is_binary
+
+    def _check_binary_outcome(self):
+        y = self.data.loc[:, self.y_col]
+        binary_outcome = (type_of_target(y) == 'binary')
+        zero_one_outcome = np.all((np.power(y, 2) - y) == 0)
+        is_binary = (binary_outcome & zero_one_outcome)
+        return is_binary
+
+    def _check_disjoint_sets(self):
+        # this function can be extended in inherited subclasses
+        self._check_disjoint_sets_y_d_x_z_t_s()
+
+    def _check_disjoint_sets_y_d_x_z_t_s(self):
+        y_col_set = {self.y_col}
+        x_cols_set = set(self.x_cols)
+        d_cols_set = set(self.d_cols)
+
+        if not y_col_set.isdisjoint(x_cols_set):
+            raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and covariate in '
+                             '``x_cols``.')
+        if not y_col_set.isdisjoint(d_cols_set):
+            raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and treatment variable in '
+                             '``d_cols``.')
+        # note that the line xd_list = self.x_cols + self.d_cols in method set_x_d needs adaption if an intersection of
+        # x_cols and d_cols as allowed (see https://github.com/DoubleML/doubleml-for-py/issues/83)
+        if not d_cols_set.isdisjoint(x_cols_set):
+            raise ValueError('At least one variable/column is set as treatment variable (``d_cols``) and as covariate'
+                             '(``x_cols``). Consider using parameter ``use_other_treat_as_covariate``.')
+
+        if self.z_cols is not None:
+            z_cols_set = set(self.z_cols)
+            if not y_col_set.isdisjoint(z_cols_set):
+                raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and instrumental '
+                                 'variable in ``z_cols``.')
+            if not d_cols_set.isdisjoint(z_cols_set):
+                raise ValueError('At least one variable/column is set as treatment variable (``d_cols``) and '
+                                 'instrumental variable in ``z_cols``.')
+            if not x_cols_set.isdisjoint(z_cols_set):
+                raise ValueError('At least one variable/column is set as covariate (``x_cols``) and instrumental '
+                                 'variable in ``z_cols``.')
+
+        self._check_disjoint_sets_t_s()
+
+    def _check_disjoint_sets_t_s(self):
+        y_col_set = {self.y_col}
+        x_cols_set = set(self.x_cols)
+        d_cols_set = set(self.d_cols)
+
+        if self.t_col is not None:
+            t_col_set = {self.t_col}
+            if not t_col_set.isdisjoint(x_cols_set):
+                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and covariate in '
+                                 '``x_cols``.')
+            if not t_col_set.isdisjoint(d_cols_set):
+                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and treatment variable in '
+                                 '``d_cols``.')
+            if not t_col_set.isdisjoint(y_col_set):
+                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and outcome variable '
+                                 '``y_col``.')
+            if self.z_cols is not None:
+                z_cols_set = set(self.z_cols)
+                if not t_col_set.isdisjoint(z_cols_set):
+                    raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and instrumental '
+                                     'variable in ``z_cols``.')
+
+        if self.s_col is not None:
+            s_col_set = {self.s_col}
+            if not s_col_set.isdisjoint(x_cols_set):
+                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and covariate in '
+                                 '``x_cols``.')
+            if not s_col_set.isdisjoint(d_cols_set):
+                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and treatment '
+                                 'variable in ``d_cols``.')
+            if not s_col_set.isdisjoint(y_col_set):
+                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and outcome '
+                                 'variable ``y_col``.')
+            if self.z_cols is not None:
+                z_cols_set = set(self.z_cols)
+                if not s_col_set.isdisjoint(z_cols_set):
+                    raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and '
+                                     'instrumental variable in ``z_cols``.')
+            if self.t_col is not None:
+                t_col_set = {self.t_col}
+                if not s_col_set.isdisjoint(t_col_set):
+                    raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and time '
+                                     'variable ``t_col``.')
+
+
+class DoubleMLClusterData(DoubleMLData):
+    """Double machine learning data-backend for data with cluster variables.
+
+    :class:`DoubleMLClusterData` objects can be initialized from
+    :class:`pandas.DataFrame`'s as well as :class:`numpy.ndarray`'s.
+
+    Parameters
+    ----------
+    data : :class:`pandas.DataFrame`
+        The data.
+
+    y_col : str
+        The outcome variable.
+
+    d_cols : str or list
+        The treatment variable(s).
+
+    cluster_cols : str or list
+        The cluster variable(s).
+
+    x_cols : None, str or list
+        The covariates.
+        If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
+        treatment variables ``d_cols``, nor instrumental variables ``z_cols`` are used as covariates.
+        Default is ``None``.
+
+    z_cols : None, str or list
+        The instrumental variable(s).
+        Default is ``None``.
+
+    t_col : None or str
+        The time variable (only relevant/used for DiD Estimators).
+        Default is ``None``.
+
+    s_col : None or str
+        The score or selection variable (only relevant/used for RDD and SSM Estimatiors).
+        Default is ``None``.
+
+    use_other_treat_as_covariate : bool
+        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+        Default is ``True``.
+
+    force_all_x_finite : bool or str
+        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+        Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+        allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+        Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+        for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+        in the covariates ``x``.
+        Default is ``True``.
+
+    Examples
+    --------
+    >>> from doubleml import DoubleMLClusterData
+    >>> from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+    >>> # initialization from pandas.DataFrame
+    >>> df = make_pliv_multiway_cluster_CKMS2021(return_type='DataFrame')
+    >>> obj_dml_data_from_df = DoubleMLClusterData(df, 'Y', 'D', ['cluster_var_i', 'cluster_var_j'], z_cols='Z')
+    >>> # initialization from np.ndarray
+    >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
+    >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
+    """
+    def __init__(self,
+                 data,
+                 y_col,
+                 d_cols,
+                 cluster_cols,
+                 x_cols=None,
+                 z_cols=None,
+                 t_col=None,
+                 s_col=None,
+                 use_other_treat_as_covariate=True,
+                 force_all_x_finite=True):
+        DoubleMLBaseData.__init__(self, data)
+
+        # we need to set cluster_cols (needs _data) before call to the super __init__ because of the x_cols setter
+        self.cluster_cols = cluster_cols
+        self._set_cluster_vars()
+        DoubleMLData.__init__(self,
+                              data,
+                              y_col,
+                              d_cols,
+                              x_cols,
+                              z_cols,
+                              t_col,
+                              s_col,
+                              use_other_treat_as_covariate,
+                              force_all_x_finite)
+        self._check_disjoint_sets_cluster_cols()
+
+    def __str__(self):
+        data_summary = self._data_summary_str()
+        buf = io.StringIO()
+        self.data.info(verbose=False, buf=buf)
+        df_info = buf.getvalue()
+        res = '================== DoubleMLClusterData Object ==================\n' + \
+              '\n------------------ Data summary      ------------------\n' + data_summary + \
+              '\n------------------ DataFrame info    ------------------\n' + df_info
+        return res
+
+    def _data_summary_str(self):
+        data_summary = f'Outcome variable: {self.y_col}\n' \
+                       f'Treatment variable(s): {self.d_cols}\n' \
+                       f'Cluster variable(s): {self.cluster_cols}\n' \
+                       f'Covariates: {self.x_cols}\n' \
+                       f'Instrument variable(s): {self.z_cols}\n'
+        if self.t_col is not None:
+            data_summary += f'Time variable: {self.t_col}\n'
+        if self.s_col is not None:
+            data_summary += f'Score/Selection variable: {self.s_col}\n'
+
+        data_summary += f'No. Observations: {self.n_obs}\n'
+        return data_summary
+
+    @classmethod
+    def from_arrays(cls, x, y, d, cluster_vars, z=None, t=None, s=None, use_other_treat_as_covariate=True,
+                    force_all_x_finite=True):
+        """
+        Initialize :class:`DoubleMLClusterData` from :class:`numpy.ndarray`'s.
+
+        Parameters
+        ----------
+        x : :class:`numpy.ndarray`
+            Array of covariates.
+
+        y : :class:`numpy.ndarray`
+            Array of the outcome variable.
+
+        d : :class:`numpy.ndarray`
+            Array of treatment variables.
+
+        cluster_vars : :class:`numpy.ndarray`
+            Array of cluster variables.
+
+        z : None or :class:`numpy.ndarray`
+            Array of instrumental variables.
+            Default is ``None``.
+
+        t : :class:`numpy.ndarray`
+            Array of the time variable (only relevant/used for DiD models).
+            Default is ``None``.
+
+        s : :class:`numpy.ndarray`
+            Array of the score or selection variable (only relevant/used for RDD or SSM models).
+            Default is ``None``.
+
+        use_other_treat_as_covariate : bool
+            Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+            Default is ``True``.
+
+        force_all_x_finite : bool or str
+            Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+            Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+            allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+            Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+            for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+            in the covariates ``x``.
+            Default is ``True``.
+
+        Examples
+        --------
+        >>> from doubleml import DoubleMLClusterData
+        >>> from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+        >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
+        >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
+        """
+        dml_data = DoubleMLData.from_arrays(x, y, d, z, t, s, use_other_treat_as_covariate, force_all_x_finite)
+        cluster_vars = check_array(cluster_vars, ensure_2d=False, allow_nd=False)
+        cluster_vars = _assure_2d_array(cluster_vars)
+        if cluster_vars.shape[1] == 1:
+            cluster_cols = ['cluster_var']
+        else:
+            cluster_cols = [f'cluster_var{i + 1}' for i in np.arange(cluster_vars.shape[1])]
+
+        data = pd.concat((pd.DataFrame(cluster_vars, columns=cluster_cols), dml_data.data), axis=1)
+
+        return (cls(data, dml_data.y_col, dml_data.d_cols, cluster_cols,
+                    dml_data.x_cols, dml_data.z_cols, dml_data.t_col, dml_data.s_col,
+                    dml_data.use_other_treat_as_covariate, dml_data.force_all_x_finite))
+
+    @property
+    def cluster_cols(self):
+        """
+        The cluster variable(s).
+        """
+        return self._cluster_cols
+
+    @cluster_cols.setter
+    def cluster_cols(self, value):
+        reset_value = hasattr(self, '_cluster_cols')
+        if isinstance(value, str):
+            value = [value]
+        if not isinstance(value, list):
+            raise TypeError('The cluster variable(s) cluster_cols must be of str or list type. '
+                            f'{str(value)} of type {str(type(value))} was passed.')
+        if not len(set(value)) == len(value):
+            raise ValueError('Invalid cluster variable(s) cluster_cols: '
+                             'Contains duplicate values.')
+        if not set(value).issubset(set(self.all_variables)):
+            raise ValueError('Invalid cluster variable(s) cluster_cols. '
+                             'At least one cluster variable is no data column.')
+        self._cluster_cols = value
+        if reset_value:
+            self._check_disjoint_sets()
+            self._set_cluster_vars()
+
+    @property
+    def n_cluster_vars(self):
+        """
+        The number of cluster variables.
+        """
+        return len(self.cluster_cols)
+
+    @property
+    def cluster_vars(self):
+        """
+        Array of cluster variable(s).
+        """
+        return self._cluster_vars.values
+
+    @DoubleMLData.x_cols.setter
+    def x_cols(self, value):
+        if value is not None:
+            # this call might become much easier with https://github.com/python/cpython/pull/26194
+            super(self.__class__, self.__class__).x_cols.__set__(self, value)
+        else:
+            if self.s_col is None:
+                if (self.z_cols is not None) & (self.t_col is not None):
+                    y_d_z_t = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), {self.t_col}, set(self.cluster_cols))
+                    x_cols = [col for col in self.data.columns if col not in y_d_z_t]
+                elif self.z_cols is not None:
+                    y_d_z = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), set(self.cluster_cols))
+                    x_cols = [col for col in self.data.columns if col not in y_d_z]
+                elif self.t_col is not None:
+                    y_d_t = set.union({self.y_col}, set(self.d_cols), {self.t_col}, set(self.cluster_cols))
+                    x_cols = [col for col in self.data.columns if col not in y_d_t]
+                else:
+                    y_d = set.union({self.y_col}, set(self.d_cols), set(self.cluster_cols))
+                    x_cols = [col for col in self.data.columns if col not in y_d]
+            else:
+                if (self.z_cols is not None) & (self.t_col is not None):
+                    y_d_z_t_s = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), {self.t_col}, {self.s_col},
+                                          set(self.cluster_cols))
+                    x_cols = [col for col in self.data.columns if col not in y_d_z_t_s]
+                elif self.z_cols is not None:
+                    y_d_z_s = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), {self.s_col}, set(self.cluster_cols))
+                    x_cols = [col for col in self.data.columns if col not in y_d_z_s]
+                elif self.t_col is not None:
+                    y_d_t_s = set.union({self.y_col}, set(self.d_cols), {self.t_col}, {self.s_col}, set(self.cluster_cols))
+                    x_cols = [col for col in self.data.columns if col not in y_d_t_s]
+                else:
+                    y_d_s = set.union({self.y_col}, set(self.d_cols), {self.s_col}, set(self.cluster_cols))
+                    x_cols = [col for col in self.data.columns if col not in y_d_s]
+            # this call might become much easier with https://github.com/python/cpython/pull/26194
+            super(self.__class__, self.__class__).x_cols.__set__(self, x_cols)
+
+    def _check_disjoint_sets(self):
+        # apply the standard checks from the DoubleMLData class
+        super(DoubleMLClusterData, self)._check_disjoint_sets()
+        self._check_disjoint_sets_cluster_cols()
+
+    def _check_disjoint_sets_cluster_cols(self):
+        # apply the standard checks from the DoubleMLData class
+        super(DoubleMLClusterData, self)._check_disjoint_sets()
+
+        # special checks for the additional cluster variables
+        cluster_cols_set = set(self.cluster_cols)
+        y_col_set = {self.y_col}
+        x_cols_set = set(self.x_cols)
+        d_cols_set = set(self.d_cols)
+        t_col_set = {self.t_col}
+        s_col_set = {self.s_col}
+
+        if not y_col_set.isdisjoint(cluster_cols_set):
+            raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and cluster '
+                             'variable in ``cluster_cols``.')
+        if not d_cols_set.isdisjoint(cluster_cols_set):
+            raise ValueError('At least one variable/column is set as treatment variable (``d_cols``) and '
+                             'cluster variable in ``cluster_cols``.')
+        # TODO: Is the following combination allowed, or not?
+        if not x_cols_set.isdisjoint(cluster_cols_set):
+            raise ValueError('At least one variable/column is set as covariate (``x_cols``) and cluster '
+                             'variable in ``cluster_cols``.')
+        if self.z_cols is not None:
+            z_cols_set = set(self.z_cols)
+            if not z_cols_set.isdisjoint(cluster_cols_set):
+                raise ValueError('At least one variable/column is set as instrumental variable (``z_cols``) and '
+                                 'cluster variable in ``cluster_cols``.')
+        if self.t_col is not None:
+            if not t_col_set.isdisjoint(cluster_cols_set):
+                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and '
+                                 'cluster variable in ``cluster_cols``.')
+        if self.s_col is not None:
+            if not s_col_set.isdisjoint(cluster_cols_set):
+                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and '
+                                 'cluster variable in ``cluster_cols``.')
+
+    def _set_cluster_vars(self):
+        assert_all_finite(self.data.loc[:, self.cluster_cols])
+        self._cluster_vars = self.data.loc[:, self.cluster_cols]
diff --git a/doubleml/logistic/logistic.py b/doubleml/logistic/logistic.py
new file mode 100644
index 000000000..8915215bf
--- /dev/null
+++ b/doubleml/logistic/logistic.py
@@ -0,0 +1,463 @@
+import numpy as np
+from doubleml.utils._estimation import (
+    _dml_cv_predict,
+    _trimm,
+    _predict_zero_one_propensity,
+    _cond_targets,
+    _get_bracket_guess,
+    _default_kde,
+    _normalize_ipw,
+    _dml_tune,
+    _solve_ipw_score,
+)
+from sklearn.base import clone
+from sklearn.utils import check_X_y
+import scipy
+from sklearn.utils.multiclass import type_of_target
+
+from doubleml import DoubleMLData, DoubleMLBLP
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_score_mixins import NonLinearScoreMixin
+from doubleml.utils import DoubleMLClusterResampling
+from doubleml.utils._checks import _check_score, _check_finite_predictions, _check_is_propensity
+from doubleml.utils.resampling import DoubleMLDoubleResampling
+
+
+class DoubleMLLogit(NonLinearScoreMixin, DoubleML):
+    """Double machine learning for partially linear regression models
+
+    Parameters
+    ----------
+    obj_dml_data : :class:`DoubleMLData` object
+        The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model.
+
+    ml_r : estimator implementing ``fit()`` and ``predict()``
+        A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
+        :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`\\ell_0(X) = E[Y|X]`.
+
+    ml_m : estimator implementing ``fit()`` and ``predict()``
+        A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
+        :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`m_0(X) = E[D|X]`.
+        For binary treatment variables :math:`D` (with values 0 and 1), a classifier implementing ``fit()`` and
+        ``predict_proba()`` can also be specified. If :py:func:`sklearn.base.is_classifier` returns ``True``,
+        ``predict_proba()`` is used otherwise ``predict()``.
+
+    ml_g : estimator implementing ``fit()`` and ``predict()``
+        A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
+        :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function
+        :math:`g_0(X) = E[Y - D \\theta_0|X]`.
+        Note: The learner `ml_g` is only required for the score ``'IV-type'``. Optionally, it can be specified and
+        estimated for callable scores.
+
+    n_folds : int
+        Number of folds.
+        Default is ``5``.
+
+    n_rep : int
+        Number of repetitons for the sample splitting.
+        Default is ``1``.
+
+    score : str or callable
+        A str (``'partialling out'`` or ``'IV-type'``) specifying the score function
+        or a callable object / function with signature ``psi_a, psi_b = score(y, d, l_hat, m_hat, g_hat, smpls)``.
+        Default is ``'partialling out'``.
+
+    draw_sample_splitting : bool
+        Indicates whether the sample splitting should be drawn during initialization of the object.
+        Default is ``True``.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import doubleml as dml
+    >>> from doubleml.datasets import make_plr_CCDDHNR2018
+    >>> from sklearn.ensemble import RandomForestRegressor
+    >>> from sklearn.base import clone
+    >>> np.random.seed(3141)
+    >>> learner = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
+    >>> ml_g = learner
+    >>> ml_m = learner
+    >>> obj_dml_data = make_plr_CCDDHNR2018(alpha=0.5, n_obs=500, dim_x=20)
+    >>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m)
+    >>> dml_plr_obj.fit().summary
+           coef  std err          t         P>|t|     2.5 %    97.5 %
+    d  0.462321  0.04107  11.256983  2.139582e-29  0.381826  0.542816
+
+    Notes
+    -----
+    **Partially linear regression (PLR)** models take the form
+
+    .. math::
+
+        Y = D \\theta_0 + g_0(X) + \\zeta, & &\\mathbb{E}(\\zeta | D,X) = 0,
+
+        D = m_0(X) + V, & &\\mathbb{E}(V | X) = 0,
+
+    where :math:`Y` is the outcome variable and :math:`D` is the policy variable of interest.
+    The high-dimensional vector :math:`X = (X_1, \\ldots, X_p)` consists of other confounding covariates,
+    and :math:`\\zeta` and :math:`V` are stochastic errors.
+    """
+
+    def __init__(self,
+                 obj_dml_data,
+                 ml_r,
+                 ml_m,
+                 ml_M,
+                 ml_t,
+                 ml_a=None,
+                 n_folds=5,
+                 n_folds_inner=5,
+                 n_rep=1,
+                 score='logistic',
+                 draw_sample_splitting=True):
+        super().__init__(obj_dml_data,
+                         n_folds,
+                         n_rep,
+                         score,
+                         draw_sample_splitting)
+
+        self._check_data(self._dml_data)
+        valid_scores = ['logistic']
+        _check_score(self.score, valid_scores, allow_callable=True)
+
+        _ = self._check_learner(ml_r, 'ml_r', regressor=True, classifier=False)
+        _ = self._check_learner(ml_t, 'ml_t', regressor=True, classifier=False)
+        _ = self._check_learner(ml_M, 'ml_M', regressor=False, classifier=True)
+        ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=True)
+        self._learner = {'ml_l': ml_r, 'ml_m': ml_m, 'ml_t': ml_t, 'ml_M': ml_M}
+
+        if ml_a is not None:
+            ml_a_is_classifier = self._check_learner(ml_a, 'ml_a', regressor=True, classifier=True)
+            self._learner['ml_a'] = ml_a
+        else:
+            self._learner['ml_a'] = clone(ml_m)
+            ml_a_is_classifier = ml_m_is_classifier
+
+        self._predict_method = {'ml_r': 'predict', 'ml_t': 'predict', 'ml_M': 'predict_proba'}
+
+        if ml_m_is_classifier:
+            if self._dml_data.binary_treats.all():
+                self._predict_method['ml_m'] = 'predict_proba'
+            else:
+                raise ValueError(f'The ml_m learner {str(ml_m)} was identified as classifier '
+                                 'but at least one treatment variable is not binary with values 0 and 1.')
+        else:
+            self._predict_method['ml_m'] = 'predict'
+
+        if ml_a_is_classifier:
+            if self._dml_data.binary_treats.all():
+                self._predict_method['ml_a'] = 'predict_proba'
+            else:
+                raise ValueError(f'The ml_a learner {str(ml_a)} was identified as classifier '
+                                 'but at least one treatment variable is not binary with values 0 and 1.')
+        else:
+            self._predict_method['ml_a'] = 'predict'
+
+        self._initialize_ml_nuisance_params()
+        self._sensitivity_implemented = True
+        self._external_predictions_implemented = True
+
+    def _initialize_ml_nuisance_params(self):
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
+                        for learner in self._learner}
+
+    def _check_data(self, obj_dml_data):
+        if not isinstance(obj_dml_data, DoubleMLData):
+            raise TypeError('The data must be of DoubleMLData type. '
+                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+        return
+
+    def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, smpls_inner=None,
+                    n_jobs=None, est_params=None, method='predict'):
+        res = {}
+        res['preds'] = np.zeros_like(y)
+        res['preds_inner'] = np.zeros_like(y)
+        for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
+            res_inner = _dml_cv_predict(estimator, x, y, smpls=smpls_double_split, n_jobs=n_jobs,
+                                    est_params=est_params, method=method,
+                                    return_models=True)
+            _check_finite_predictions(res_inner['preds'], estimator, estimator_name, smpls_double_split)
+
+            res['preds_inner'] += res_inner['preds']
+            for model in res_inner['models']:
+                res['models'].append(model)
+                res['preds'][smpls_single_split[1]] += model.predict(x[smpls_single_split[1]])
+
+        res["preds"] /= len(smpls)
+        res['targets'] = np.copy(y)
+
+
+
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
+                         force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d,
+                         force_all_finite=False)
+        x_d_concat = np.hstack([[d, np.newaxis], x])
+        r_external = external_predictions['ml_r'] is not None
+        m_external = external_predictions['ml_m'] is not None
+        M_external = external_predictions['ml_M'] is not None
+        t_external = external_predictions['ml_t'] is not None
+        if 'ml_a' in self._learner:
+            a_external = external_predictions['ml_a'] is not None
+        else:
+            a_external = False
+
+        # nuisance m
+        if m_external:
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+
+        if self._check_learner(self._learner['ml_m'], 'ml_m', regressor=True, classifier=True):
+            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+
+        if self._dml_data.binary_treats[self._dml_data.d_cols[self._i_treat]]:
+            binary_preds = (type_of_target(m_hat['preds']) == 'binary')
+            zero_one_preds = np.all((np.power(m_hat['preds'], 2) - m_hat['preds']) == 0)
+            if binary_preds & zero_one_preds:
+                raise ValueError(f'For the binary treatment variable {self._dml_data.d_cols[self._i_treat]}, '
+                                 f'predictions obtained with the ml_m learner {str(self._learner["ml_m"])} are also '
+                                 'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                 'probabilities and not labels are predicted.')
+
+
+        if M_external:
+            M_hat = {'preds': external_predictions['ml_M'],
+                     'targets': None,
+                     'models': None}
+        else:
+            M_hat = (self.double_dml_cv_predict(self._learner['ml_M'], 'ml_M', x_d_concat, y, smpls=smpls, smpls_inner=smpls_inner,
+                                                n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_M'), method=self._predict_method['ml_M']))
+
+        if a_external:
+            a_hat = {'preds': external_predictions['ml_a'],
+                     'targets': None,
+                     'models': None}
+        else:
+            a_hat = (self.double_dml_cv_predict(self._learner['ml_a'], 'ml_a', x_d_concat, y, smpls=smpls, smpls_inner=smpls_inner,
+                                                n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_a'), method=self._predict_method['ml_a']))
+
+
+        W = scipy.special.logit(M_hat['preds'])
+        d_tilde_full = d - a_hat['preds']
+
+        beta_notFold = np.zeros_like(d)
+
+        for _, test in smpls:
+            beta_notFold[test] = np.sum(d_tilde_full[test] * W[test]) / np.sum(d_tilde_full[test] ** 2)
+
+        # nuisance t
+        if t_external:
+            t_hat = {'preds': external_predictions['ml_t'],
+                     'targets': None,
+                     'models': None}
+        else:
+            t_hat = _dml_cv_predict(self._learner['ml_t'], x, W, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_t'), method=self._predict_method['ml_t'],
+                                    return_models=return_models)
+            _check_finite_predictions(t_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
+
+        W = scipy.special.expit(M_hat['preds'])
+
+        # nuisance W
+        if t_external:
+            t_hat = {'preds': external_predictions['ml_t'],
+                     'targets': None,
+                     'models': None}
+        else:
+            t_hat = _dml_cv_predict(self._learner['ml_t'], x, W, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_t'), method=self._predict_method['ml_t'],
+                                    return_models=return_models)
+            _check_finite_predictions(t_hat['preds'], self._learner['ml_t'], 'ml_t', smpls)
+
+        r_hat = {}
+        r_hat['preds'] = t_hat['preds'] - beta_notFold * a_hat['preds']
+
+
+        psi_elements = self._score_elements(y, d, r_hat['preds'], m_hat['preds'])
+
+        preds = {'predictions': {'ml_r': r_hat['preds'],
+                                 'ml_m': m_hat['preds'],
+                                 'ml_a': a_hat['preds'],
+                                 'ml_t': t_hat['preds'],
+                                 'ml_M': M_hat['preds']},
+                 'targets': {'ml_r': r_hat['targets'],
+                             'ml_m': m_hat['targets'],
+                             'ml_a': a_hat['targets'],
+                             'ml_t': t_hat['targets'],
+                             'ml_M': M_hat['targets']},
+                 'models': {'ml_r': None,
+                            'ml_m': m_hat['models'],
+                            'ml_a': a_hat['models'],
+                            'ml_t': t_hat['models'],
+                            'ml_M': M_hat['models']}}
+
+        return psi_elements, preds
+
+    def _score_elements(self, y, d, r_hat, m_hat):
+        # compute residual
+        d_tilde = d - m_hat
+        psi_hat = scipy.special.expit(-r)
+        score_const = d_tilde * (1 - y) * np.exp(r)
+        psi_elements = {"y": y, "d": d, "r_hat": r_hat, "m_hat": m_hat, "psi_hat": psi_hat, "score_const": score_const}
+
+        return psi_elements
+
+    def _sensitivity_element_est(self, preds):
+       pass
+
+    def _nuisance_tuning(self):
+        pass
+
+    @property
+    def __smpls__inner(self):
+        return self._smpls[self._i_rep]
+
+    def draw_sample_splitting(self):
+        """
+        Draw sample splitting for DoubleML models.
+
+        The samples are drawn according to the attributes
+        ``n_folds`` and ``n_rep``.
+
+        Returns
+        -------
+        self : object
+        """
+
+        obj_dml_resampling = DoubleMLDoubleResampling(n_folds=self.n_folds,
+                                                      n_folds_inner=self.n_folds_inner,
+                                                      n_rep=self.n_rep,
+                                                      n_obs=self._dml_data.n_obs,
+                                                      stratify=self._strata)
+        self._smpls, self._smpls_inner = obj_dml_resampling.split_samples()
+
+        return self
+
+    def set_sample_splitting(self):
+        raise NotImplementedError('set_sample_splitting is not implemented for DoubleMLLogit.')
+
+    def _compute_score(self, psi_elements, coef):
+
+        score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["r_hat"]) * psi_elements["d_tilde"]
+
+
+        return psi_elements["psi_hat"] * (score_1 - psi_elements["score_const"])
+
+    def _compute_score_deriv(self, psi_elements, coef, inds=None):
+        deriv_1 = - psi_elements["y"] * np.exp(-coef * psi_elements["r_hat"]) * psi_elements["d"]
+
+        return psi_elements["psi_hat"] * psi_elements["d_tilde"] *  deriv_1
+
+
+    def cate(self, basis, is_gate=False):
+        """
+        Calculate conditional average treatment effects (CATE) for a given basis.
+
+        Parameters
+        ----------
+        basis : :class:`pandas.DataFrame`
+            The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
+            where ``n_obs`` is the number of observations and ``d`` is the number of predictors.
+        is_gate : bool
+            Indicates whether the basis is constructed for GATEs (dummy-basis).
+            Default is ``False``.
+
+        Returns
+        -------
+        model : :class:`doubleML.DoubleMLBLP`
+            Best linear Predictor model.
+        """
+        if self._dml_data.n_treat > 1:
+            raise NotImplementedError('Only implemented for single treatment. ' +
+                                      f'Number of treatments is {str(self._dml_data.n_treat)}.')
+        if self.n_rep != 1:
+            raise NotImplementedError('Only implemented for one repetition. ' +
+                                      f'Number of repetitions is {str(self.n_rep)}.')
+
+        Y_tilde, D_tilde = self._partial_out()
+
+        D_basis = basis * D_tilde
+        model = DoublelMLBLP(
+            orth_signal=Y_tilde.reshape(-1),
+            basis=D_basis,
+            is_gate=is_gate,
+        )
+        model.fit()
+
+        ## TODO: Solve score
+
+
+        return model
+
+    def gate(self, groups):
+        """
+        Calculate group average treatment effects (GATE) for groups.
+
+        Parameters
+        ----------
+        groups : :class:`pandas.DataFrame`
+            The group indicator for estimating the best linear predictor. Groups should be mutually exclusive.
+            Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
+            and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).
+
+        Returns
+        -------
+        model : :class:`doubleML.DoubleMLBLP`
+            Best linear Predictor model for Group Effects.
+        """
+
+        if not isinstance(groups, pd.DataFrame):
+            raise TypeError('Groups must be of DataFrame type. '
+                            f'Groups of type {str(type(groups))} was passed.')
+        if not all(groups.dtypes == bool) or all(groups.dtypes == int):
+            if groups.shape[1] == 1:
+                groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
+            else:
+                raise TypeError('Columns of groups must be of bool type or int type (dummy coded). '
+                                'Alternatively, groups should only contain one column.')
+
+        if any(groups.sum(0) <= 5):
+            warnings.warn('At least one group effect is estimated with less than 6 observations.')
+
+        model = self.cate(groups, is_gate=True)
+        return model
+
+    def _partial_out(self):
+        """
+        Helper function. Returns the partialled out quantities of Y and D.
+        Works with multiple repetitions.
+
+        Returns
+        -------
+        Y_tilde : :class:`numpy.ndarray`
+            The residual of the regression of Y on X.
+        D_tilde : :class:`numpy.ndarray`
+            The residual of the regression of D on X.
+        """
+        if self.predictions is None:
+            raise ValueError('predictions are None. Call .fit(store_predictions=True) to store the predictions.')
+
+        y = self._dml_data.y.reshape(-1, 1)
+        d = self._dml_data.d.reshape(-1, 1)
+        ml_m = self.predictions["ml_m"].squeeze(axis=2)
+
+        if self.score == "partialling out":
+            ml_l = self.predictions["ml_l"].squeeze(axis=2)
+            Y_tilde = y - ml_l
+            D_tilde = d - ml_m
+        else:
+            assert self.score == "IV-type"
+            ml_g = self.predictions["ml_g"].squeeze(axis=2)
+            Y_tilde = y - (self.coef * ml_m) - ml_g
+            D_tilde = d - ml_m
+
+        return Y_tilde, D_tilde
\ No newline at end of file
diff --git a/doubleml/utils/resampling.py b/doubleml/utils/resampling.py
index 188d2f248..18153944c 100644
--- a/doubleml/utils/resampling.py
+++ b/doubleml/utils/resampling.py
@@ -25,6 +25,51 @@ def split_samples(self):
         return smpls
 
 
+class DoubleMLDoubleResampling:
+    def __init__(self,
+                 n_folds,
+                 n_folds_inner,
+                 n_rep,
+                 n_obs,
+                 stratify=None):
+        self.n_folds = n_folds
+        self.n_rep = n_rep
+        self.n_obs = n_obs
+        self.stratify = stratify
+
+        if n_folds < 2:
+            raise ValueError('n_folds must be greater than 1. '
+                             'You can use set_sample_splitting with a tuple to only use one fold.')
+        if n_folds_inner < 2:
+            raise ValueError('n_folds_inner must be greater than 1. '
+                             'You can use set_sample_splitting with a tuple to only use one fold.')
+
+
+        if self.stratify is None:
+            self.resampling = RepeatedKFold(n_splits=n_folds, n_repeats=n_rep)
+            self.resampling_inner = RepeatedKFold(n_splits=n_folds_inner)
+        else:
+            self.resampling = RepeatedStratifiedKFold(n_splits=n_folds, n_repeats=n_rep)
+            self.resampling_inner = RepeatedStratifiedKFold(n_splits=n_folds_inner)
+
+    def split_samples(self):
+        all_smpls = [(train, test) for train, test in self.resampling.split(X=np.zeros(self.n_obs), y=self.stratify)]
+        smpls = [all_smpls[(i_repeat * self.n_folds):((i_repeat + 1) * self.n_folds)]
+                 for i_repeat in range(self.n_rep)]
+        smpls_inner = []
+        for _ in range(self.n_rep):
+            smpls_inner_rep = []
+            for _, test in all_smpls:
+                if self.stratify is None:
+                    smpls_inner_rep.append([(train_inner, test_inner) for train_inner, test_inner in self.resampling_inner.split(X=test)])
+                else:
+                    smpls_inner_rep.append([(train_inner, test_inner) for train_inner, test_inner in
+                                            self.resampling_inner.split(X=np.zeros(len(test)), y=self.stratify[test])])
+            smpls_inner.append(smpls_inner_rep)
+
+        return smpls, smpls_inner
+
+
 class DoubleMLClusterResampling:
     def __init__(self, n_folds, n_rep, n_obs, n_cluster_vars, cluster_vars):
         self.n_folds = n_folds

From f5521f142de7ad22e754c7d1d8d7c5f4c18ffa3b Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 27 Jan 2025 10:11:36 +0100
Subject: [PATCH 02/48] First WIP of implementation

---
 doubleml/double_ml_data.py    |  4 +---
 doubleml/logistic/logistic.py | 24 +++++++++++++++++-------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/doubleml/double_ml_data.py b/doubleml/double_ml_data.py
index 4f8d7cbc7..fdee739dd 100644
--- a/doubleml/double_ml_data.py
+++ b/doubleml/double_ml_data.py
@@ -413,9 +413,7 @@ def x_cols(self, value):
             if not isinstance(value, list):
                 raise TypeError('The covariates x_cols must be of str or list type (or None). '
                                 f'{str(value)} of type {str(type(value))} was passed.')
-            if not len(se
-
-            t(value)) == len(value):
+            if not len(set(value)) == len(value):
                 raise ValueError('Invalid covariates x_cols: '
                                  'Contains duplicate values.')
             if not set(value).issubset(set(self.all_variables)):
diff --git a/doubleml/logistic/logistic.py b/doubleml/logistic/logistic.py
index 8915215bf..26c14a80d 100644
--- a/doubleml/logistic/logistic.py
+++ b/doubleml/logistic/logistic.py
@@ -1,5 +1,5 @@
 import numpy as np
-from doubleml.utils._estimation import (
+from ..utils._estimation import (
     _dml_cv_predict,
     _trimm,
     _predict_zero_one_propensity,
@@ -15,12 +15,15 @@
 import scipy
 from sklearn.utils.multiclass import type_of_target
 
-from doubleml import DoubleMLData, DoubleMLBLP
-from doubleml.double_ml import DoubleML
-from doubleml.double_ml_score_mixins import NonLinearScoreMixin
-from doubleml.utils import DoubleMLClusterResampling
-from doubleml.utils._checks import _check_score, _check_finite_predictions, _check_is_propensity
-from doubleml.utils.resampling import DoubleMLDoubleResampling
+from .. import DoubleMLData
+from ..double_ml import DoubleML
+from ..double_ml_score_mixins import NonLinearScoreMixin
+from ..utils import DoubleMLClusterResampling
+from ..utils._checks import _check_score, _check_finite_predictions, _check_is_propensity
+from ..utils.resampling import DoubleMLDoubleResampling
+
+
+
 
 
 class DoubleMLLogit(NonLinearScoreMixin, DoubleML):
@@ -110,6 +113,7 @@ def __init__(self,
                  n_rep=1,
                  score='logistic',
                  draw_sample_splitting=True):
+        self.n_folds_inner = n_folds_inner
         super().__init__(obj_dml_data,
                          n_folds,
                          n_rep,
@@ -165,6 +169,8 @@ def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError('The data must be of DoubleMLData type. '
                             f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+        if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
+            raise TypeError('The outcome variable y must be binary with values 0 and 1.')
         return
 
     def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, smpls_inner=None,
@@ -311,6 +317,10 @@ def _score_elements(self, y, d, r_hat, m_hat):
 
         return psi_elements
 
+    @property
+    def _score_element_names(self):
+        return ['y', 'd', 'r_hat', 'm_hat', 'psi_hat', 'score_const']
+
     def _sensitivity_element_est(self, preds):
        pass
 

From bfa756c58797a36943741c2a5d03a9ae57e4e82a Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Fri, 21 Feb 2025 15:07:11 +0100
Subject: [PATCH 03/48] Working implementation. Started on test set-up.

---
 doubleml/logistic/logistic.py                 | 189 +++++++---
 .../logistic/tests/_utils_logistic_manual.py  | 346 +++++++++++++++++
 doubleml/logistic/tests/tests_logistic.py     | 352 ++++++++++++++++++
 doubleml/utils/_estimation.py                 |   6 +-
 doubleml/utils/resampling.py                  |  13 +-
 5 files changed, 853 insertions(+), 53 deletions(-)
 create mode 100644 doubleml/logistic/tests/_utils_logistic_manual.py
 create mode 100644 doubleml/logistic/tests/tests_logistic.py

diff --git a/doubleml/logistic/logistic.py b/doubleml/logistic/logistic.py
index 26c14a80d..25ba37634 100644
--- a/doubleml/logistic/logistic.py
+++ b/doubleml/logistic/logistic.py
@@ -103,7 +103,6 @@ class DoubleMLLogit(NonLinearScoreMixin, DoubleML):
 
     def __init__(self,
                  obj_dml_data,
-                 ml_r,
                  ml_m,
                  ml_M,
                  ml_t,
@@ -119,16 +118,17 @@ def __init__(self,
                          n_rep,
                          score,
                          draw_sample_splitting)
+        self._coef_bounds = (-1e-2, 1e2)
+        self._coef_start_val = 1.0
 
         self._check_data(self._dml_data)
         valid_scores = ['logistic']
         _check_score(self.score, valid_scores, allow_callable=True)
 
-        _ = self._check_learner(ml_r, 'ml_r', regressor=True, classifier=False)
         _ = self._check_learner(ml_t, 'ml_t', regressor=True, classifier=False)
         _ = self._check_learner(ml_M, 'ml_M', regressor=False, classifier=True)
         ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=True)
-        self._learner = {'ml_l': ml_r, 'ml_m': ml_m, 'ml_t': ml_t, 'ml_M': ml_M}
+        self._learner = {'ml_m': ml_m, 'ml_t': ml_t, 'ml_M': ml_M}
 
         if ml_a is not None:
             ml_a_is_classifier = self._check_learner(ml_a, 'ml_a', regressor=True, classifier=True)
@@ -137,7 +137,7 @@ def __init__(self,
             self._learner['ml_a'] = clone(ml_m)
             ml_a_is_classifier = ml_m_is_classifier
 
-        self._predict_method = {'ml_r': 'predict', 'ml_t': 'predict', 'ml_M': 'predict_proba'}
+        self._predict_method = {'ml_t': 'predict', 'ml_M': 'predict_proba'}
 
         if ml_m_is_classifier:
             if self._dml_data.binary_treats.all():
@@ -158,7 +158,6 @@ def __init__(self,
             self._predict_method['ml_a'] = 'predict'
 
         self._initialize_ml_nuisance_params()
-        self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
     def _initialize_ml_nuisance_params(self):
@@ -173,34 +172,40 @@ def _check_data(self, obj_dml_data):
             raise TypeError('The outcome variable y must be binary with values 0 and 1.')
         return
 
+
     def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, smpls_inner=None,
                     n_jobs=None, est_params=None, method='predict'):
         res = {}
         res['preds'] = np.zeros_like(y)
-        res['preds_inner'] = np.zeros_like(y)
+        res['preds_inner'] = []
+        res['models'] = []
         for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
             res_inner = _dml_cv_predict(estimator, x, y, smpls=smpls_double_split, n_jobs=n_jobs,
                                     est_params=est_params, method=method,
-                                    return_models=True)
+                                    return_models=True, smpls_is_partition=True)
             _check_finite_predictions(res_inner['preds'], estimator, estimator_name, smpls_double_split)
 
-            res['preds_inner'] += res_inner['preds']
+            res['preds_inner'].append(res_inner['preds'])
             for model in res_inner['models']:
                 res['models'].append(model)
-                res['preds'][smpls_single_split[1]] += model.predict(x[smpls_single_split[1]])
-
+                if method == 'predict_proba':
+                    res['preds'][smpls_single_split[1]] += model.predict_proba(x[smpls_single_split[1]])[:, 1]
+                else:
+                    res['preds'][smpls_single_split[1]] += model.predict(x[smpls_single_split[1]])
+        res["preds_inner"]
         res["preds"] /= len(smpls)
         res['targets'] = np.copy(y)
+        return res
 
 
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
+        # TODO: How to deal with smpls_inner?
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
-        x_d_concat = np.hstack([[d, np.newaxis], x])
-        r_external = external_predictions['ml_r'] is not None
+        x_d_concat = np.hstack((d.reshape(-1,1), x))
         m_external = external_predictions['ml_m'] is not None
         M_external = external_predictions['ml_M'] is not None
         t_external = external_predictions['ml_t'] is not None
@@ -215,7 +220,11 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                      'targets': None,
                      'models': None}
         else:
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+            filtered_smpls = []
+            for train, test in smpls:
+                train_filtered = train[y[train] == 0]
+                filtered_smpls.append((train_filtered, test))
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=filtered_smpls, n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
                                     return_models=return_models)
             _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
@@ -238,7 +247,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                      'targets': None,
                      'models': None}
         else:
-            M_hat = (self.double_dml_cv_predict(self._learner['ml_M'], 'ml_M', x_d_concat, y, smpls=smpls, smpls_inner=smpls_inner,
+            M_hat = (self._double_dml_cv_predict(self._learner['ml_M'], 'ml_M', x_d_concat, y, smpls=smpls, smpls_inner=self.__smpls__inner,
                                                 n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_M'), method=self._predict_method['ml_M']))
 
@@ -247,18 +256,49 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                      'targets': None,
                      'models': None}
         else:
-            a_hat = (self.double_dml_cv_predict(self._learner['ml_a'], 'ml_a', x_d_concat, y, smpls=smpls, smpls_inner=smpls_inner,
+            a_hat = (self._double_dml_cv_predict(self._learner['ml_a'], 'ml_a', x, d, smpls=smpls, smpls_inner=self.__smpls__inner,
                                                 n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_a'), method=self._predict_method['ml_a']))
 
+        # r_legacy = np.zeros_like(y)
+        # smpls_inner = self.__smpls__inner
+        # M_hat = {}
+        # a_hat = {}
+        # M_hat['preds_inner'] = []
+        # M_hat['preds'] = np.full_like(y, np.nan)
+        # a_hat['preds_inner'] = []
+        # a_hat['preds'] = np.full_like(y, np.nan)
+        # for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
+        #     test = smpls_single_split[1]
+        #     train = smpls_single_split[0]
+        #     # r_legacy[test] =
+        #     Mleg, aleg, a_nf_leg = self.legacy_implementation(y[train], x[train], d[train], x[test], d[test],
+        #                                                       self._learner['ml_m'], self._learner['ml_M'],
+        #                                                       smpls_single_split, smpls_double_split, y, x, d,
+        #                                                       x_d_concat, n_jobs_cv)
+        #     Mtemp = np.full_like(y, np.nan)
+        #     Mtemp[train] = Mleg
+        #     Atemp = np.full_like(y, np.nan)
+        #     Atemp[train] = aleg
+        #     M_hat['preds_inner'].append(Mtemp)
+        #     a_hat['preds_inner'].append(Atemp)
+        #     a_hat['preds'][test] = a_nf_leg
+        #
+        # #r_hat['preds'] = r_legacy
+
+
+
+        W_inner = []
+        beta = np.zeros_like(d)
+
+        for i, (train, test) in enumerate(smpls):
+            M_iteration = M_hat['preds_inner'][i][train]
+            M_iteration = np.clip(M_iteration, 1e-8, 1 - 1e-8)
+            w = scipy.special.logit(M_iteration)
+            W_inner.append(w)
+            d_tilde = (d - a_hat['preds_inner'][i])[train]
+            beta[test] = np.sum(d_tilde * w) / np.sum(d_tilde ** 2)
 
-        W = scipy.special.logit(M_hat['preds'])
-        d_tilde_full = d - a_hat['preds']
-
-        beta_notFold = np.zeros_like(d)
-
-        for _, test in smpls:
-            beta_notFold[test] = np.sum(d_tilde_full[test] * W[test]) / np.sum(d_tilde_full[test] ** 2)
 
         # nuisance t
         if t_external:
@@ -266,26 +306,17 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                      'targets': None,
                      'models': None}
         else:
-            t_hat = _dml_cv_predict(self._learner['ml_t'], x, W, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_t'), method=self._predict_method['ml_t'],
-                                    return_models=return_models)
-            _check_finite_predictions(t_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
-
-        W = scipy.special.expit(M_hat['preds'])
-
-        # nuisance W
-        if t_external:
-            t_hat = {'preds': external_predictions['ml_t'],
-                     'targets': None,
-                     'models': None}
-        else:
-            t_hat = _dml_cv_predict(self._learner['ml_t'], x, W, smpls=smpls, n_jobs=n_jobs_cv,
+            t_hat = _dml_cv_predict(self._learner['ml_t'], x, W_inner, smpls=smpls, n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_t'), method=self._predict_method['ml_t'],
                                     return_models=return_models)
             _check_finite_predictions(t_hat['preds'], self._learner['ml_t'], 'ml_t', smpls)
 
+
         r_hat = {}
-        r_hat['preds'] = t_hat['preds'] - beta_notFold * a_hat['preds']
+        r_hat['preds'] = t_hat['preds'] - beta * a_hat['preds']
+
+
+
 
 
         psi_elements = self._score_elements(y, d, r_hat['preds'], m_hat['preds'])
@@ -295,7 +326,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                  'ml_a': a_hat['preds'],
                                  'ml_t': t_hat['preds'],
                                  'ml_M': M_hat['preds']},
-                 'targets': {'ml_r': r_hat['targets'],
+                 'targets': {'ml_r': None,
                              'ml_m': m_hat['targets'],
                              'ml_a': a_hat['targets'],
                              'ml_t': t_hat['targets'],
@@ -308,18 +339,86 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         return psi_elements, preds
 
+
+    def legacy_implementation(self, Yfold: np.ndarray, Xfold: np.ndarray, Afold: np.ndarray, XnotFold: np.ndarray, AnotFold: np.ndarray,
+                    learner, learnerClassifier, smpls_single_split, smpls_double_split, yfull, xfull, afull, x_d_concat, n_jobs_cv, noFolds: int = 5, seed=None, )-> (np.ndarray, np.ndarray, np.ndarray):
+
+        def learn_predict(X, Y, Xpredict, learner, learnerClassifier, fit_args={}):
+            results = []
+            if len(np.unique(Y)) == 2:
+                learnerClassifier.fit(X, Y, **fit_args)
+                for x in Xpredict:
+                    results.append(learnerClassifier.predict_proba(x)[:, 1])
+            else:
+                learner.fit(X, Y, **fit_args)
+                for x in Xpredict:
+                    results.append(learner.predict(x))
+            return (*results,)
+
+        nFold = len(Yfold)
+        i = np.remainder(np.arange(nFold), noFolds)
+        np.random.default_rng(seed).shuffle(i)
+
+        M = np.zeros((nFold))
+        a_hat = np.zeros((nFold))
+        a_hat_notFold = np.zeros((len(XnotFold)))
+        M_notFold = np.zeros((len(XnotFold)))
+        loss = {}
+
+        a_hat_inner = _dml_cv_predict(self._learner['ml_a'], xfull, afull, smpls=smpls_double_split, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_a'), method=self._predict_method['ml_a'],
+                                    return_models=True, smpls_is_partition=True)
+        _check_finite_predictions(a_hat_inner['preds'], self._learner['ml_a'], 'ml_a', smpls_double_split)
+        a_hat_notFold = np.full_like(yfull, 0.)
+        for model in a_hat_inner['models']:
+            if self._predict_method['ml_a'] == 'predict_proba':
+                a_hat_notFold[smpls_single_split[1]] += model.predict_proba(xfull[smpls_single_split[1]])[:, 1]
+            else:
+                a_hat_notFold[smpls_single_split[1]] += model.predict(xfull[smpls_single_split[1]])
+
+        M_hat = _dml_cv_predict(self._learner['ml_M'], x_d_concat, yfull, smpls=smpls_double_split, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_M'), method=self._predict_method['ml_M'],
+                                    return_models=True, smpls_is_partition=True)
+        _check_finite_predictions(M_hat['preds'], self._learner['ml_M'], 'ml_M', smpls_double_split)
+
+        M = M_hat['preds'][~np.isnan(M_hat['preds'])]
+        a_hat = a_hat_inner['preds'][~np.isnan(a_hat_inner['preds'])]
+        a_hat_notFold = a_hat_notFold[smpls_single_split[1]]
+
+        np.clip(M, 1e-8, 1 - 1e-8, out=M)
+#        loss["M"] = compute_loss(Yfold, M)
+#        loss["a_hat"] = compute_loss(Afold, a_hat)
+        a_hat_notFold /= noFolds
+      #  M_notFold /= noFolds
+        np.clip(M_notFold, 1e-8, 1 - 1e-8, out=M_notFold)
+
+        # Obtain preliminary estimate of beta based on M and residual of a
+        W = scipy.special.logit(M)
+        A_resid = Afold - a_hat
+        beta_notFold = sum(A_resid * W) / sum(A_resid ** 2)
+    #    print(beta_notFold)
+        t_notFold, = learn_predict(Xfold, W, [XnotFold], learner, learnerClassifier)
+        W_notFold = scipy.special.expit(M_notFold)
+#        loss["t"] = compute_loss(W_notFold, t_notFold)
+
+
+        # Compute r based on estimates for W=logit(M), beta and residual of A
+        r_notFold = t_notFold - beta_notFold * a_hat_notFold
+
+        return M, a_hat, a_hat_notFold #r_notFold #, a_hat_notFold, M_notFold, t_notFold
+
     def _score_elements(self, y, d, r_hat, m_hat):
         # compute residual
         d_tilde = d - m_hat
-        psi_hat = scipy.special.expit(-r)
-        score_const = d_tilde * (1 - y) * np.exp(r)
-        psi_elements = {"y": y, "d": d, "r_hat": r_hat, "m_hat": m_hat, "psi_hat": psi_hat, "score_const": score_const}
+        psi_hat = scipy.special.expit(-r_hat)
+        score_const = d_tilde * (1 - y) * np.exp(r_hat)
+        psi_elements = {"y": y, "d": d, "d_tilde": d_tilde, "r_hat": r_hat, "m_hat": m_hat, "psi_hat": psi_hat, "score_const": score_const}
 
         return psi_elements
 
     @property
     def _score_element_names(self):
-        return ['y', 'd', 'r_hat', 'm_hat', 'psi_hat', 'score_const']
+        return ['y', 'd', 'd_tilde', 'r_hat', 'm_hat', 'psi_hat', 'score_const']
 
     def _sensitivity_element_est(self, preds):
        pass
@@ -329,7 +428,7 @@ def _nuisance_tuning(self):
 
     @property
     def __smpls__inner(self):
-        return self._smpls[self._i_rep]
+        return self._smpls_inner[self._i_rep]
 
     def draw_sample_splitting(self):
         """
@@ -357,13 +456,13 @@ def set_sample_splitting(self):
 
     def _compute_score(self, psi_elements, coef):
 
-        score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["r_hat"]) * psi_elements["d_tilde"]
+        score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d_tilde"]
 
 
         return psi_elements["psi_hat"] * (score_1 - psi_elements["score_const"])
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
-        deriv_1 = - psi_elements["y"] * np.exp(-coef * psi_elements["r_hat"]) * psi_elements["d"]
+        deriv_1 = - psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d"]
 
         return psi_elements["psi_hat"] * psi_elements["d_tilde"] *  deriv_1
 
diff --git a/doubleml/logistic/tests/_utils_logistic_manual.py b/doubleml/logistic/tests/_utils_logistic_manual.py
new file mode 100644
index 000000000..ae53992a6
--- /dev/null
+++ b/doubleml/logistic/tests/_utils_logistic_manual.py
@@ -0,0 +1,346 @@
+import numpy as np
+import scipy
+from sklearn.base import clone, is_classifier
+
+from ...tests._utils_boot import boot_manual, draw_weights
+from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
+
+
+def fit_logistic_multitreat(y, x, d, learner_l, learner_m, learner_g, all_smpls, score,
+                       n_rep=1, l_params=None, m_params=None, g_params=None,
+                       use_other_treat_as_covariate=True):
+    n_obs = len(y)
+    n_d = d.shape[1]
+
+    thetas = list()
+    ses = list()
+    all_l_hat = list()
+    all_m_hat = list()
+    all_g_hat = list()
+    for i_rep in range(n_rep):
+        smpls = all_smpls[i_rep]
+        thetas_this_rep = np.full(n_d, np.nan)
+        ses_this_rep = np.full(n_d, np.nan)
+        all_l_hat_this_rep = list()
+        all_m_hat_this_rep = list()
+        all_g_hat_this_rep = list()
+
+        for i_d in range(n_d):
+            if use_other_treat_as_covariate:
+                xd = np.hstack((x, np.delete(d, i_d, axis=1)))
+            else:
+                xd = x
+
+            l_hat, m_hat, g_hat, thetas_this_rep[i_d], ses_this_rep[i_d] = fit_plr_single_split(
+                y, xd, d[:, i_d],
+                learner_l, learner_m, learner_g,
+                smpls, score,
+                l_params, m_params, g_params)
+            all_l_hat_this_rep.append(l_hat)
+            all_m_hat_this_rep.append(m_hat)
+            all_g_hat_this_rep.append(g_hat)
+
+        thetas.append(thetas_this_rep)
+        ses.append(ses_this_rep)
+        all_l_hat.append(all_l_hat_this_rep)
+        all_m_hat.append(all_m_hat_this_rep)
+        all_g_hat.append(all_g_hat_this_rep)
+
+    theta = np.full(n_d, np.nan)
+    se = np.full(n_d, np.nan)
+    for i_d in range(n_d):
+        theta_vec = np.array([xx[i_d] for xx in thetas])
+        se_vec = np.array([xx[i_d] for xx in ses])
+        theta[i_d] = np.median(theta_vec)
+        se[i_d] = np.sqrt(np.median(np.power(se_vec, 2) * n_obs + np.power(theta_vec - theta[i_d], 2)) / n_obs)
+
+    res = {'theta': theta, 'se': se,
+           'thetas': thetas, 'ses': ses,
+           'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_g_hat': all_g_hat}
+
+    return res
+
+
+def fit_logistic(y, x, d, learner_l, learner_m, learner_g, all_smpls, score,
+            n_rep=1, l_params=None, m_params=None, g_params=None):
+    n_obs = len(y)
+
+    thetas = np.zeros(n_rep)
+    ses = np.zeros(n_rep)
+    all_l_hat = list()
+    all_m_hat = list()
+    all_g_hat = list()
+    for i_rep in range(n_rep):
+        smpls = all_smpls[i_rep]
+        l_hat, m_hat, g_hat, thetas[i_rep], ses[i_rep] = fit_plr_single_split(
+            y, x, d,
+            learner_l, learner_m, learner_g,
+            smpls, score,
+            l_params, m_params, g_params)
+        all_l_hat.append(l_hat)
+        all_m_hat.append(m_hat)
+        all_g_hat.append(g_hat)
+
+    theta = np.median(thetas)
+    se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
+
+    res = {'theta': theta, 'se': se,
+           'thetas': thetas, 'ses': ses,
+           'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_g_hat': all_g_hat}
+
+    return res
+
+
+def fit_plr_logistic_split(y, x, d, learner_l, learner_m, learner_g, smpls, score,
+                         l_params=None, m_params=None, g_params=None):
+    fit_g = (score == 'IV-type') | callable(score)
+    if is_classifier(learner_m):
+        l_hat, m_hat, g_hat = fit_nuisance_plr_classifier(y, x, d,
+                                                          learner_l, learner_m, learner_g,
+                                                          smpls, fit_g,
+                                                          l_params, m_params, g_params)
+    else:
+        l_hat, m_hat, g_hat = fit_nuisance_plr(y, x, d,
+                                               learner_l, learner_m, learner_g,
+                                               smpls, fit_g,
+                                               l_params, m_params, g_params)
+
+    theta, se = plr_dml2(y, x, d, l_hat, m_hat, g_hat,
+                         smpls, score)
+
+    return l_hat, m_hat, g_hat, theta, se
+
+
+def fit_nuisance_logistic(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True,
+                     l_params=None, m_params=None, g_params=None):
+    ml_l = clone(learner_l)
+    l_hat = fit_predict(y, x, ml_l, l_params, smpls)
+
+    ml_m = clone(learner_m)
+    m_hat = fit_predict(d, x, ml_m, m_params, smpls)
+
+    if fit_g:
+        y_minus_l_hat, d_minus_m_hat, _ = compute_plr_residuals(y, d, l_hat, m_hat, [], smpls)
+        psi_a = -np.multiply(d_minus_m_hat, d_minus_m_hat)
+        psi_b = np.multiply(d_minus_m_hat, y_minus_l_hat)
+        theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
+
+        ml_g = clone(learner_g)
+        g_hat = fit_predict(y - theta_initial*d, x, ml_g, g_params, smpls)
+    else:
+        g_hat = []
+
+    return l_hat, m_hat, g_hat
+
+
+def fit_nuisance_logistic_classifier(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True,
+                                l_params=None, m_params=None, g_params=None):
+    ml_l = clone(learner_l)
+    l_hat = fit_predict(y, x, ml_l, l_params, smpls)
+
+    ml_m = clone(learner_m)
+    m_hat = fit_predict_proba(d, x, ml_m, m_params, smpls)
+
+    if fit_g:
+        y_minus_l_hat, d_minus_m_hat, _ = compute_plr_residuals(y, d, l_hat, m_hat, [], smpls)
+        psi_a = -np.multiply(d_minus_m_hat, d_minus_m_hat)
+        psi_b = np.multiply(d_minus_m_hat, y_minus_l_hat)
+        theta_initial = -np.mean(psi_b) / np.mean(psi_a)
+
+        ml_g = clone(learner_g)
+        g_hat = fit_predict(y - theta_initial*d, x, ml_g, g_params, smpls)
+    else:
+        g_hat = []
+
+    return l_hat, m_hat, g_hat
+
+
+def tune_nuisance_plr(y, x, d, ml_l, ml_m, ml_g, smpls, n_folds_tune, param_grid_l, param_grid_m, param_grid_g, tune_g=True):
+    l_tune_res = tune_grid_search(y, x, ml_l, smpls, param_grid_l, n_folds_tune)
+
+    m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
+
+    if tune_g:
+        l_hat = np.full_like(y, np.nan)
+        m_hat = np.full_like(d, np.nan)
+        for idx, (train_index, _) in enumerate(smpls):
+            l_hat[train_index] = l_tune_res[idx].predict(x[train_index, :])
+            m_hat[train_index] = m_tune_res[idx].predict(x[train_index, :])
+        psi_a = -np.multiply(d - m_hat, d - m_hat)
+        psi_b = np.multiply(d - m_hat, y - l_hat)
+        theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
+
+        g_tune_res = tune_grid_search(y - theta_initial*d, x, ml_g, smpls, param_grid_g, n_folds_tune)
+        g_best_params = [xx.best_params_ for xx in g_tune_res]
+    else:
+        g_best_params = []
+
+    l_best_params = [xx.best_params_ for xx in l_tune_res]
+    m_best_params = [xx.best_params_ for xx in m_tune_res]
+
+    return l_best_params, m_best_params, g_best_params
+
+
+def compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls):
+    y_minus_l_hat = np.full_like(y, np.nan, dtype='float64')
+    d_minus_m_hat = np.full_like(d, np.nan, dtype='float64')
+    y_minus_g_hat = np.full_like(y, np.nan, dtype='float64')
+    for idx, (_, test_index) in enumerate(smpls):
+        y_minus_l_hat[test_index] = y[test_index] - l_hat[idx]
+        if len(g_hat) > 0:
+            y_minus_g_hat[test_index] = y[test_index] - g_hat[idx]
+        d_minus_m_hat[test_index] = d[test_index] - m_hat[idx]
+    return y_minus_l_hat, d_minus_m_hat, y_minus_g_hat
+
+
+def plr_dml2(y, x, d, l_hat, m_hat, g_hat, smpls, score):
+    n_obs = len(y)
+    y_minus_l_hat, d_minus_m_hat, y_minus_g_hat = compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls)
+    theta_hat = plr_orth(y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, d, score)
+    se = np.sqrt(var_plr(theta_hat, d, y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, score, n_obs))
+
+    return theta_hat, se
+
+
+def var_plr(theta, d, y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, score, n_obs):
+    if score == 'partialling out':
+        var = 1/n_obs * 1/np.power(np.mean(np.multiply(d_minus_m_hat, d_minus_m_hat)), 2) * \
+            np.mean(np.power(np.multiply(y_minus_l_hat - d_minus_m_hat*theta, d_minus_m_hat), 2))
+    else:
+        assert score == 'IV-type'
+        var = 1/n_obs * 1/np.power(np.mean(np.multiply(d_minus_m_hat, d)), 2) * \
+            np.mean(np.power(np.multiply(y_minus_g_hat - d*theta, d_minus_m_hat), 2))
+
+    return var
+
+
+def plr_orth(y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, d, score):
+    if score == 'IV-type':
+        res = np.mean(np.multiply(d_minus_m_hat, y_minus_g_hat))/np.mean(np.multiply(d_minus_m_hat, d))
+    else:
+        assert score == 'partialling out'
+        res = scipy.linalg.lstsq(d_minus_m_hat.reshape(-1, 1), y_minus_l_hat)[0]
+
+    return res
+
+
+def boot_plr(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
+             all_smpls, score, bootstrap, n_rep_boot,
+             n_rep=1, apply_cross_fitting=True):
+    all_boot_t_stat = list()
+    for i_rep in range(n_rep):
+        smpls = all_smpls[i_rep]
+        if apply_cross_fitting:
+            n_obs = len(y)
+        else:
+            test_index = smpls[0][1]
+            n_obs = len(test_index)
+        weights = draw_weights(bootstrap, n_rep_boot, n_obs)
+
+        boot_t_stat = boot_plr_single_split(
+            thetas[i_rep], y, d, all_l_hat[i_rep], all_m_hat[i_rep], all_g_hat[i_rep], smpls,
+            score, ses[i_rep],
+            weights, n_rep_boot, apply_cross_fitting)
+        all_boot_t_stat.append(boot_t_stat)
+
+    # differently for plr because of n_rep_boot and multiple treatmentsa
+    boot_t_stat = np.transpose(np.vstack(all_boot_t_stat))
+
+    return boot_t_stat
+
+
+def boot_plr_multitreat(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
+                        all_smpls, score, bootstrap, n_rep_boot,
+                        n_rep=1, apply_cross_fitting=True):
+    n_d = d.shape[1]
+    all_boot_t_stat = list()
+    for i_rep in range(n_rep):
+        smpls = all_smpls[i_rep]
+        if apply_cross_fitting:
+            n_obs = len(y)
+        else:
+            test_index = smpls[0][1]
+            n_obs = len(test_index)
+        weights = draw_weights(bootstrap, n_rep_boot, n_obs)
+
+        boot_t_stat = np.full((n_d, n_rep_boot), np.nan)
+        for i_d in range(n_d):
+            boot_t_stat[i_d, :] = boot_plr_single_split(
+                thetas[i_rep][i_d], y, d[:, i_d],
+                all_l_hat[i_rep][i_d], all_m_hat[i_rep][i_d], all_g_hat[i_rep][i_d],
+                smpls, score, ses[i_rep][i_d],
+                weights, n_rep_boot, apply_cross_fitting)
+
+        # transpose for shape (n_rep_boot, n_d)
+        boot_t_stat = np.transpose(boot_t_stat)
+        all_boot_t_stat.append(boot_t_stat)
+
+    # stack repetitions along the last axis
+    boot_t_stat = np.stack(all_boot_t_stat, axis=2)
+
+    return boot_t_stat
+
+
+def boot_plr_single_split(theta, y, d, l_hat, m_hat, g_hat,
+                          smpls, score, se, weights, n_rep, apply_cross_fitting):
+    y_minus_l_hat, d_minus_m_hat, y_minus_g_hat = compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls)
+
+    if apply_cross_fitting:
+        if score == 'partialling out':
+            J = np.mean(-np.multiply(d_minus_m_hat, d_minus_m_hat))
+        else:
+            assert score == 'IV-type'
+            J = np.mean(-np.multiply(d_minus_m_hat, d))
+    else:
+        test_index = smpls[0][1]
+        if score == 'partialling out':
+            J = np.mean(-np.multiply(d_minus_m_hat[test_index], d_minus_m_hat[test_index]))
+        else:
+            assert score == 'IV-type'
+            J = np.mean(-np.multiply(d_minus_m_hat[test_index], d[test_index]))
+
+    if score == 'partialling out':
+        psi = np.multiply(y_minus_l_hat - d_minus_m_hat * theta, d_minus_m_hat)
+    else:
+        assert score == 'IV-type'
+        psi = np.multiply(y_minus_g_hat - d * theta, d_minus_m_hat)
+
+    boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep, apply_cross_fitting)
+
+    return boot_t_stat
+
+
+def fit_sensitivity_elements_plr(y, d, all_coef, predictions, score, n_rep):
+    n_treat = d.shape[1]
+    n_obs = len(y)
+
+    sigma2 = np.full(shape=(1, n_rep, n_treat), fill_value=np.nan)
+    nu2 = np.full(shape=(1, n_rep, n_treat), fill_value=np.nan)
+    psi_sigma2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan)
+    psi_nu2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan)
+
+    for i_rep in range(n_rep):
+        for i_treat in range(n_treat):
+            d_tilde = d[:, i_treat]
+            m_hat = predictions['ml_m'][:, i_rep, i_treat]
+            theta = all_coef[i_treat, i_rep]
+            if score == 'partialling out':
+                l_hat = predictions['ml_l'][:, i_rep, i_treat]
+                sigma2_score_element = np.square(y - l_hat - np.multiply(theta, d_tilde-m_hat))
+            else:
+                assert score == 'IV-type'
+                g_hat = predictions['ml_g'][:, i_rep, i_treat]
+                sigma2_score_element = np.square(y - g_hat - np.multiply(theta, d_tilde))
+
+            sigma2[0, i_rep, i_treat] = np.mean(sigma2_score_element)
+            psi_sigma2[:, i_rep, i_treat] = sigma2_score_element - sigma2[0, i_rep, i_treat]
+
+            nu2[0, i_rep, i_treat] = np.divide(1.0, np.mean(np.square(d_tilde-m_hat)))
+            psi_nu2[:, i_rep, i_treat] = nu2[0, i_rep, i_treat] - \
+                np.multiply(np.square(d_tilde-m_hat), np.square(nu2[0, i_rep, i_treat]))
+
+    element_dict = {'sigma2': sigma2,
+                    'nu2': nu2,
+                    'psi_sigma2': psi_sigma2,
+                    'psi_nu2': psi_nu2}
+    return element_dict
diff --git a/doubleml/logistic/tests/tests_logistic.py b/doubleml/logistic/tests/tests_logistic.py
new file mode 100644
index 000000000..2b97bf76b
--- /dev/null
+++ b/doubleml/logistic/tests/tests_logistic.py
@@ -0,0 +1,352 @@
+import pytest
+import math
+import scipy
+import numpy as np
+import pandas as pd
+
+from sklearn.base import clone
+
+from sklearn.linear_model import LinearRegression, Lasso
+from sklearn.ensemble import RandomForestRegressor
+
+import doubleml as dml
+
+from ...tests._utils import draw_smpls
+from ._utils_logistic_manual import fit_logistic, , boot_plr
+
+
+@pytest.fixture(scope='module',
+                params=[RandomForestRegressor(max_depth=2, n_estimators=10),
+                        LinearRegression(),
+                        Lasso(alpha=0.1)])
+def learner(request):
+    return request.param
+
+
+@pytest.fixture(scope='module',
+                params=['IV-type', 'partialling out'])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def dml_plr_fixture(generate_data1, learner, score):
+    boot_methods = ['normal']
+    n_folds = 2
+    n_rep_boot = 502
+
+    # collect data
+    data = generate_data1
+    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+
+    # Set machine learning methods for m & g
+    ml_l = clone(learner)
+    ml_m = clone(learner)
+    ml_g = clone(learner)
+
+    np.random.seed(3141)
+    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
+    if score == 'partialling out':
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m,
+                                      n_folds=n_folds,
+                                      score=score)
+    else:
+        assert score == 'IV-type'
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m, ml_g,
+                                      n_folds,
+                                      score=score)
+
+    dml_plr_obj.fit()
+
+    np.random.seed(3141)
+    y = data['y'].values
+    x = data.loc[:, x_cols].values
+    d = data['d'].values
+    n_obs = len(y)
+    all_smpls = draw_smpls(n_obs, n_folds)
+
+    res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner),
+                         all_smpls, score)
+
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    if score == 'partialling out':
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                          ml_l, ml_m,
+                                          n_folds,
+                                          score=score)
+    else:
+        assert score == 'IV-type'
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                          ml_l, ml_m, ml_g,
+                                          n_folds,
+                                          score=score)
+
+    # synchronize the sample splitting
+    dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+
+    if score == 'partialling out':
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1)}}
+    else:
+        assert score == 'IV-type'
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1),
+                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, 1)}}
+
+    dml_plr_obj_ext.fit(external_predictions=prediction_dict)
+
+    res_dict = {'coef': dml_plr_obj.coef,
+                'coef_manual': res_manual['theta'],
+                'coef_ext': dml_plr_obj_ext.coef,
+                'se': dml_plr_obj.se,
+                'se_manual': res_manual['se'],
+                'se_ext': dml_plr_obj_ext.se,
+                'boot_methods': boot_methods}
+
+    for bootstrap in boot_methods:
+        np.random.seed(3141)
+        boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'],
+                               res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'],
+                               all_smpls, score, bootstrap, n_rep_boot)
+
+        np.random.seed(3141)
+        dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_plr_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
+        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_plr_obj_ext.boot_t_stat
+
+    # sensitivity tests
+    res_dict['sensitivity_elements'] = dml_plr_obj.sensitivity_elements
+    res_dict['sensitivity_elements_manual'] = fit_sensitivity_elements_plr(y, d.reshape(-1, 1),
+                                                                           all_coef=dml_plr_obj.all_coef,
+                                                                           predictions=dml_plr_obj.predictions,
+                                                                           score=score,
+                                                                           n_rep=1)
+    # check if sensitivity score with rho=0 gives equal asymptotic standard deviation
+    dml_plr_obj.sensitivity_analysis(rho=0.0)
+    res_dict['sensitivity_ses'] = dml_plr_obj.sensitivity_params['se']
+    return res_dict
+
+
+@pytest.mark.ci
+def test_dml_plr_coef(dml_plr_fixture):
+    assert math.isclose(dml_plr_fixture['coef'],
+                        dml_plr_fixture['coef_manual'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture['coef'],
+                        dml_plr_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_plr_se(dml_plr_fixture):
+    assert math.isclose(dml_plr_fixture['se'],
+                        dml_plr_fixture['se_manual'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture['se'],
+                        dml_plr_fixture['se_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_plr_boot(dml_plr_fixture):
+    for bootstrap in dml_plr_fixture['boot_methods']:
+        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
+                           dml_plr_fixture['boot_t_stat' + bootstrap + '_manual'],
+                           rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
+                           dml_plr_fixture['boot_t_stat' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_plr_sensitivity(dml_plr_fixture):
+    sensitivity_element_names = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+    for sensitivity_element in sensitivity_element_names:
+        assert np.allclose(dml_plr_fixture['sensitivity_elements'][sensitivity_element],
+                           dml_plr_fixture['sensitivity_elements_manual'][sensitivity_element])
+
+
+@pytest.mark.ci
+def test_dml_plr_sensitivity_rho0(dml_plr_fixture):
+    assert np.allclose(dml_plr_fixture['se'],
+                       dml_plr_fixture['sensitivity_ses']['lower'],
+                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_fixture['se'],
+                       dml_plr_fixture['sensitivity_ses']['upper'],
+                       rtol=1e-9, atol=1e-4)
+
+
+@pytest.fixture(scope="module")
+def dml_plr_ols_manual_fixture(generate_data1, score):
+    learner = LinearRegression()
+    boot_methods = ['Bayes', 'normal', 'wild']
+    n_folds = 2
+    n_rep_boot = 501
+
+    # collect data
+    data = generate_data1
+    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+
+    # Set machine learning methods for m & g
+    ml_l = clone(learner)
+    ml_g = clone(learner)
+    ml_m = clone(learner)
+
+    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
+    if score == 'partialling out':
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m,
+                                      n_folds=n_folds,
+                                      score=score)
+    else:
+        assert score == 'IV-type'
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m, ml_g,
+                                      n_folds,
+                                      score=score)
+
+    n = data.shape[0]
+    this_smpl = list()
+    xx = int(n/2)
+    this_smpl.append((np.arange(xx, n), np.arange(0, xx)))
+    this_smpl.append((np.arange(0, xx), np.arange(xx, n)))
+    smpls = [this_smpl]
+    dml_plr_obj.set_sample_splitting(smpls)
+
+    dml_plr_obj.fit()
+
+    y = data['y'].values
+    x = data.loc[:, x_cols].values
+    d = data['d'].values
+
+    # add column of ones for intercept
+    o = np.ones((n, 1))
+    x = np.append(x, o, axis=1)
+
+    smpls = dml_plr_obj.smpls[0]
+
+    l_hat = []
+    l_hat_vec = np.full_like(y, np.nan)
+    for (train_index, test_index) in smpls:
+        ols_est = scipy.linalg.lstsq(x[train_index], y[train_index])[0]
+        preds = np.dot(x[test_index], ols_est)
+        l_hat.append(preds)
+        l_hat_vec[test_index] = preds
+
+    m_hat = []
+    m_hat_vec = np.full_like(d, np.nan)
+    for (train_index, test_index) in smpls:
+        ols_est = scipy.linalg.lstsq(x[train_index], d[train_index])[0]
+        preds = np.dot(x[test_index], ols_est)
+        m_hat.append(preds)
+        m_hat_vec[test_index] = preds
+
+    g_hat = []
+    if score == 'IV-type':
+        theta_initial = scipy.linalg.lstsq((d - m_hat_vec).reshape(-1, 1), y - l_hat_vec)[0]
+        for (train_index, test_index) in smpls:
+            ols_est = scipy.linalg.lstsq(x[train_index],
+                                         y[train_index] - d[train_index] * theta_initial)[0]
+            g_hat.append(np.dot(x[test_index], ols_est))
+
+    res_manual, se_manual = plr_dml2(y, x, d,
+                                     l_hat, m_hat, g_hat,
+                                     smpls, score)
+
+    res_dict = {'coef': dml_plr_obj.coef,
+                'coef_manual': res_manual,
+                'se': dml_plr_obj.se,
+                'se_manual': se_manual,
+                'boot_methods': boot_methods}
+
+    for bootstrap in boot_methods:
+        np.random.seed(3141)
+        boot_t_stat = boot_plr(y, d, [res_manual], [se_manual],
+                               [l_hat], [m_hat], [g_hat],
+                               [smpls], score, bootstrap, n_rep_boot)
+
+        np.random.seed(3141)
+        dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
+        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_dml_plr_ols_manual_coef(dml_plr_ols_manual_fixture):
+    assert math.isclose(dml_plr_ols_manual_fixture['coef'],
+                        dml_plr_ols_manual_fixture['coef_manual'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_plr_ols_manual_se(dml_plr_ols_manual_fixture):
+    assert math.isclose(dml_plr_ols_manual_fixture['se'],
+                        dml_plr_ols_manual_fixture['se_manual'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_plr_ols_manual_boot(dml_plr_ols_manual_fixture):
+    for bootstrap in dml_plr_ols_manual_fixture['boot_methods']:
+        assert np.allclose(dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap],
+                           dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap + '_manual'],
+                           rtol=1e-9, atol=1e-4)
+
+
+@pytest.fixture(scope='module',
+                params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
+def cov_type(request):
+    return request.param
+
+
+@pytest.mark.ci
+def test_dml_plr_cate_gate(score, cov_type):
+    n = 9
+
+    # collect data
+    np.random.seed(42)
+    obj_dml_data = dml.datasets.make_plr_CCDDHNR2018(n_obs=n)
+    ml_l = LinearRegression()
+    ml_g = LinearRegression()
+    ml_m = LinearRegression()
+
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
+                                  ml_g, ml_m, ml_l,
+                                  n_folds=2,
+                                  score=score)
+    dml_plr_obj.fit()
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
+    cate = dml_plr_obj.cate(random_basis, cov_type=cov_type)
+    assert isinstance(cate, dml.DoubleMLBLP)
+    assert isinstance(cate.confint(), pd.DataFrame)
+    assert cate.blp_model.cov_type == cov_type
+
+    groups_1 = pd.DataFrame(
+        np.column_stack([obj_dml_data.data['X1'] <= 0,
+                         obj_dml_data.data['X1'] > 0.2]),
+        columns=['Group 1', 'Group 2'])
+    msg = ('At least one group effect is estimated with less than 6 observations.')
+    with pytest.warns(UserWarning, match=msg):
+        gate_1 = dml_plr_obj.gate(groups_1, cov_type=cov_type)
+    assert isinstance(gate_1, dml.utils.blp.DoubleMLBLP)
+    assert isinstance(gate_1.confint(), pd.DataFrame)
+    assert all(gate_1.confint().index == groups_1.columns.tolist())
+    assert gate_1.blp_model.cov_type == cov_type
+
+    np.random.seed(42)
+    groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
+    msg = ('At least one group effect is estimated with less than 6 observations.')
+    with pytest.warns(UserWarning, match=msg):
+        gate_2 = dml_plr_obj.gate(groups_2, cov_type=cov_type)
+    assert isinstance(gate_2, dml.utils.blp.DoubleMLBLP)
+    assert isinstance(gate_2.confint(), pd.DataFrame)
+    assert all(gate_2.confint().index == ["Group_1", "Group_2"])
+    assert gate_2.blp_model.cov_type == cov_type
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index 3d99d93a5..3ed110f3c 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -45,10 +45,12 @@ def _fit(estimator, x, y, train_index, idx=None):
 
 def _dml_cv_predict(
     estimator, x, y, smpls=None, n_jobs=None, est_params=None, method="predict", return_train_preds=False, return_models=False
-):
+, smpls_is_partition=None):
     n_obs = x.shape[0]
 
-    smpls_is_partition = _check_is_partition(smpls, n_obs)
+    # TODO: Better name for smples_is_partition
+    if smpls_is_partition is None:
+        smpls_is_partition = _check_is_partition(smpls, n_obs)
     fold_specific_params = (est_params is not None) & (not isinstance(est_params, dict))
     fold_specific_target = isinstance(y, list)
     manual_cv_predict = (
diff --git a/doubleml/utils/resampling.py b/doubleml/utils/resampling.py
index 18153944c..d10145176 100644
--- a/doubleml/utils/resampling.py
+++ b/doubleml/utils/resampling.py
@@ -33,6 +33,7 @@ def __init__(self,
                  n_obs,
                  stratify=None):
         self.n_folds = n_folds
+        self.n_folds_inner = n_folds_inner
         self.n_rep = n_rep
         self.n_obs = n_obs
         self.stratify = stratify
@@ -47,10 +48,10 @@ def __init__(self,
 
         if self.stratify is None:
             self.resampling = RepeatedKFold(n_splits=n_folds, n_repeats=n_rep)
-            self.resampling_inner = RepeatedKFold(n_splits=n_folds_inner)
+            self.resampling_inner = RepeatedKFold(n_splits=n_folds_inner, n_repeats=1)
         else:
             self.resampling = RepeatedStratifiedKFold(n_splits=n_folds, n_repeats=n_rep)
-            self.resampling_inner = RepeatedStratifiedKFold(n_splits=n_folds_inner)
+            self.resampling_inner = RepeatedStratifiedKFold(n_splits=n_folds_inner, n_repeats=1)
 
     def split_samples(self):
         all_smpls = [(train, test) for train, test in self.resampling.split(X=np.zeros(self.n_obs), y=self.stratify)]
@@ -59,12 +60,12 @@ def split_samples(self):
         smpls_inner = []
         for _ in range(self.n_rep):
             smpls_inner_rep = []
-            for _, test in all_smpls:
+            for train, test in all_smpls:
                 if self.stratify is None:
-                    smpls_inner_rep.append([(train_inner, test_inner) for train_inner, test_inner in self.resampling_inner.split(X=test)])
+                    smpls_inner_rep.append([(train[train_inner], train[test_inner]) for train_inner, test_inner in self.resampling_inner.split(X=train)])
                 else:
-                    smpls_inner_rep.append([(train_inner, test_inner) for train_inner, test_inner in
-                                            self.resampling_inner.split(X=np.zeros(len(test)), y=self.stratify[test])])
+                    smpls_inner_rep.append([(train[train_inner], train[test_inner]) for train_inner, test_inner in
+                                            self.resampling_inner.split(X=np.zeros(len(train)), y=self.stratify[train])])
             smpls_inner.append(smpls_inner_rep)
 
         return smpls, smpls_inner

From d729d0a2f1ea0752ddeb9c762452e46d3ad43f14 Mon Sep 17 00:00:00 2001
From: bbd5721 <julius.herzig@uni-hamburg.de>
Date: Thu, 27 Feb 2025 15:24:40 +0100
Subject: [PATCH 04/48] Changed data type of arrays

---
 doubleml/logistic/logistic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/logistic/logistic.py b/doubleml/logistic/logistic.py
index 25ba37634..cfb9926ee 100644
--- a/doubleml/logistic/logistic.py
+++ b/doubleml/logistic/logistic.py
@@ -176,7 +176,7 @@ def _check_data(self, obj_dml_data):
     def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, smpls_inner=None,
                     n_jobs=None, est_params=None, method='predict'):
         res = {}
-        res['preds'] = np.zeros_like(y)
+        res['preds'] = np.zeros(d.shape, dtype=float)
         res['preds_inner'] = []
         res['models'] = []
         for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
@@ -289,7 +289,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
 
         W_inner = []
-        beta = np.zeros_like(d)
+        beta = np.zeros(d.shape, dtype=float)
 
         for i, (train, test) in enumerate(smpls):
             M_iteration = M_hat['preds_inner'][i][train]

From 8fe7ca667519d79507fb0a8621bb51e54e2983a5 Mon Sep 17 00:00:00 2001
From: bbd5721 <julius.herzig@uni-hamburg.de>
Date: Thu, 27 Feb 2025 15:30:40 +0100
Subject: [PATCH 05/48] Fix variable name

---
 doubleml/logistic/logistic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/logistic/logistic.py b/doubleml/logistic/logistic.py
index cfb9926ee..ab10ceb87 100644
--- a/doubleml/logistic/logistic.py
+++ b/doubleml/logistic/logistic.py
@@ -176,7 +176,7 @@ def _check_data(self, obj_dml_data):
     def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, smpls_inner=None,
                     n_jobs=None, est_params=None, method='predict'):
         res = {}
-        res['preds'] = np.zeros(d.shape, dtype=float)
+        res['preds'] = np.zeros(y.shape, dtype=float)
         res['preds_inner'] = []
         res['models'] = []
         for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):

From 18bac23cbc95b0e6d25af918f1924202cea231b5 Mon Sep 17 00:00:00 2001
From: bbd5721 <julius.herzig@uni-hamburg.de>
Date: Wed, 27 Aug 2025 10:22:19 +0200
Subject: [PATCH 06/48] Moved into plm folder, started testing setup

---
 doubleml/__init__.py                          |    2 +
 doubleml/datasets.py                          | 1753 +++++++++++++++++
 doubleml/double_ml_data.py                    |   55 +-
 doubleml/plm/__init__.py                      |    1 +
 doubleml/{logistic => plm}/logistic.py        |  103 +-
 .../tests/_utils_logistic_manual.py           |   37 +-
 .../{logistic => plm}/tests/tests_logistic.py |   51 +-
 doubleml/utils/_estimation.py                 |   30 +-
 8 files changed, 1906 insertions(+), 126 deletions(-)
 create mode 100644 doubleml/datasets.py
 rename doubleml/{logistic => plm}/logistic.py (87%)
 rename doubleml/{logistic => plm}/tests/_utils_logistic_manual.py (87%)
 rename doubleml/{logistic => plm}/tests/tests_logistic.py (85%)

diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index 935491167..ba59a07e0 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -13,6 +13,8 @@
 from .irm.pq import DoubleMLPQ
 from .irm.qte import DoubleMLQTE
 from .irm.ssm import DoubleMLSSM
+from doubleml.plm.logistic import DoubleMLLogit
+
 from .plm.pliv import DoubleMLPLIV
 from .plm.plr import DoubleMLPLR
 from .logistic.logistic import DoubleMLLogit
diff --git a/doubleml/datasets.py b/doubleml/datasets.py
new file mode 100644
index 000000000..629a033aa
--- /dev/null
+++ b/doubleml/datasets.py
@@ -0,0 +1,1753 @@
+import pandas as pd
+import numpy as np
+import warnings
+
+from scipy.linalg import toeplitz
+from scipy.optimize import minimize_scalar
+from scipy.special import expit
+
+from sklearn.preprocessing import PolynomialFeatures, OneHotEncoder
+from sklearn.datasets import make_spd_matrix
+
+from .double_ml_data import DoubleMLData, DoubleMLClusterData
+
+_array_alias = ['array', 'np.ndarray', 'np.array', np.ndarray]
+_data_frame_alias = ['DataFrame', 'pd.DataFrame', pd.DataFrame]
+_dml_data_alias = ['DoubleMLData', DoubleMLData]
+_dml_cluster_data_alias = ['DoubleMLClusterData', DoubleMLClusterData]
+
+
+def fetch_401K(return_type='DoubleMLData', polynomial_features=False):
+    """
+    Data set on financial wealth and 401(k) plan participation.
+
+    Parameters
+    ----------
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+    polynomial_features :
+        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
+
+    References
+    ----------
+    Abadie, A. (2003), Semiparametric instrumental variable estimation of treatment response models. Journal of
+    Econometrics, 113(2): 231-263.
+
+    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
+    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
+    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
+    """
+    url = 'https://github.com/VC2015/DMLonGitHub/raw/master/sipp1991.dta'
+    raw_data = pd.read_stata(url)
+
+    y_col = 'net_tfa'
+    d_cols = ['e401']
+    x_cols = ['age', 'inc', 'educ', 'fsize', 'marr', 'twoearn', 'db', 'pira', 'hown']
+
+    data = raw_data.copy()
+
+    if polynomial_features:
+        raise NotImplementedError('polynomial_features os not implemented yet for fetch_401K.')
+
+    if return_type in _data_frame_alias + _dml_data_alias:
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, y_col, d_cols, x_cols)
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def fetch_bonus(return_type='DoubleMLData', polynomial_features=False):
+    """
+    Data set on the Pennsylvania Reemployment Bonus experiment.
+
+    Parameters
+    ----------
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+    polynomial_features :
+        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
+
+    References
+    ----------
+    Bilias Y. (2000), Sequential Testing of Duration Data: The Case of Pennsylvania 'Reemployment Bonus' Experiment.
+    Journal of Applied Econometrics, 15(6): 575-594.
+
+    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
+    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
+    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
+    """
+    url = 'https://raw.githubusercontent.com/VC2015/DMLonGitHub/master/penn_jae.dat'
+    raw_data = pd.read_csv(url, sep='\s+')
+
+    ind = (raw_data['tg'] == 0) | (raw_data['tg'] == 4)
+    data = raw_data.copy()[ind]
+    data.reset_index(inplace=True)
+    data['tg'] = data['tg'].replace(4, 1)
+    data['inuidur1'] = np.log(data['inuidur1'])
+
+    # variable dep as factor (dummy encoding)
+    dummy_enc = OneHotEncoder(drop='first', categories='auto').fit(data.loc[:, ['dep']])
+    xx = dummy_enc.transform(data.loc[:, ['dep']]).toarray()
+    data['dep1'] = xx[:, 0]
+    data['dep2'] = xx[:, 1]
+
+    y_col = 'inuidur1'
+    d_cols = ['tg']
+    x_cols = ['female', 'black', 'othrace',
+              'dep1', 'dep2',
+              'q2', 'q3', 'q4', 'q5', 'q6',
+              'agelt35', 'agegt54', 'durable', 'lusd', 'husd']
+
+    if polynomial_features:
+        poly = PolynomialFeatures(2, include_bias=False)
+        data_transf = poly.fit_transform(data[x_cols])
+        x_cols = list(poly.get_feature_names_out(x_cols))
+
+        data_transf = pd.DataFrame(data_transf, columns=x_cols)
+        data = pd.concat((data[[y_col] + d_cols], data_transf),
+                         axis=1, sort=False)
+
+    if return_type in _data_frame_alias + _dml_data_alias:
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, y_col, d_cols, x_cols)
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def _g(x):
+    return np.power(np.sin(x), 2)
+
+
+def _m(x, nu=0., gamma=1.):
+    return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
+
+
+def make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', **kwargs):
+    """
+    Generates data from a partially linear regression model used in Chernozhukov et al. (2018) for Figure 1.
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= m_0(x_i) + s_1 v_i, & &v_i \\sim \\mathcal{N}(0,1),
+
+        y_i &= \\alpha d_i + g_0(x_i) + s_2 \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
+
+
+    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.7^{|j-k|}`.
+    The nuisance functions are given by
+
+    .. math::
+
+        m_0(x_i) &= a_0 x_{i,1} + a_1 \\frac{\\exp(x_{i,3})}{1+\\exp(x_{i,3})},
+
+        g_0(x_i) &= b_0 \\frac{\\exp(x_{i,1})}{1+\\exp(x_{i,1})} + b_1 x_{i,3}.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    alpha :
+        The value of the causal parameter.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameters
+        :math:`a_0=1`, :math:`a_1=0.25`, :math:`s_1=1`, :math:`b_0=1`, :math:`b_1=0.25` or :math:`s_2=1`.
+
+    References
+    ----------
+    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
+    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
+    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
+    """
+    a_0 = kwargs.get('a_0', 1.)
+    a_1 = kwargs.get('a_1', 0.25)
+    s_1 = kwargs.get('s_1', 1.)
+
+    b_0 = kwargs.get('b_0', 1.)
+    b_1 = kwargs.get('b_1', 0.25)
+    s_2 = kwargs.get('s_2', 1.)
+
+    cov_mat = toeplitz([np.power(0.7, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+
+    d = a_0 * x[:, 0] + a_1 * np.divide(np.exp(x[:, 2]), 1 + np.exp(x[:, 2])) \
+        + s_1 * np.random.standard_normal(size=[n_obs, ])
+    y = alpha * d + b_0 * np.divide(np.exp(x[:, 0]), 1 + np.exp(x[:, 0])) \
+        + b_1 * x[:, 2] + s_2 * np.random.standard_normal(size=[n_obs, ])
+
+    if return_type in _array_alias:
+        return x, y, d
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)),
+                            columns=x_cols + ['y', 'd'])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, 'y', 'd', x_cols)
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def make_plr_turrell2018(n_obs=100, dim_x=20, theta=0.5, return_type='DoubleMLData', **kwargs):
+    """
+    Generates data from a partially linear regression model used in a blog article by Turrell (2018).
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= m_0(x_i' b) + v_i, & &v_i \\sim \\mathcal{N}(0,1),
+
+        y_i &= \\theta d_i + g_0(x_i' b) + u_i, & &u_i \\sim \\mathcal{N}(0,1),
+
+
+    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a random symmetric,
+    positive-definite matrix generated with :py:meth:`sklearn.datasets.make_spd_matrix`.
+    :math:`b` is a vector with entries :math:`b_j=\\frac{1}{j}` and the nuisance functions are given by
+
+    .. math::
+
+        m_0(x_i) &= \\frac{1}{2 \\pi} \\frac{\\sinh(\\gamma)}{\\cosh(\\gamma) - \\cos(x_i-\\nu)},
+
+        g_0(x_i) &= \\sin(x_i)^2.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameters
+        :math:`\\nu=0`, or :math:`\\gamma=1`.
+
+    References
+    ----------
+    Turrell, A. (2018), Econometrics in Python part I - Double machine learning, Markov Wanderer: A blog on economics,
+    science, coding and data. `https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/
+    <https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/>`_.
+    """
+    nu = kwargs.get('nu', 0.)
+    gamma = kwargs.get('gamma', 1.)
+
+    b = [1 / k for k in range(1, dim_x + 1)]
+    sigma = make_spd_matrix(dim_x)
+
+    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=[n_obs, ])
+    G = _g(np.dot(x, b))
+    M = _m(np.dot(x, b), nu=nu, gamma=gamma)
+    d = M + np.random.standard_normal(size=[n_obs, ])
+    y = np.dot(theta, d) + G + np.random.standard_normal(size=[n_obs, ])
+
+    if return_type in _array_alias:
+        return x, y, d
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)),
+                            columns=x_cols + ['y', 'd'])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, 'y', 'd', x_cols)
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def make_irm_data(n_obs=500, dim_x=20, theta=0, R2_d=0.5, R2_y=0.5, return_type='DoubleMLData'):
+    """
+    Generates data from a interactive regression (IRM) model.
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= 1\\left\\lbrace \\frac{\\exp(c_d x_i' \\beta)}{1+\\exp(c_d x_i' \\beta)} > v_i \\right\\rbrace, & &v_i
+        \\sim \\mathcal{U}(0,1),
+
+        y_i &= \\theta d_i + c_y x_i' \\beta d_i + \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
+
+    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
+    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}` and the constants :math:`c_y` and
+    :math:`c_d` are given by
+
+    .. math::
+
+        c_y = \\sqrt{\\frac{R_y^2}{(1-R_y^2) \\beta' \\Sigma \\beta}}, \\qquad c_d =
+        \\sqrt{\\frac{(\\pi^2 /3) R_d^2}{(1-R_d^2) \\beta' \\Sigma \\beta}}.
+
+    The data generating process is inspired by a process used in the simulation experiment (see Appendix P) of Belloni
+    et al. (2017).
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    R2_d :
+        The value of the parameter :math:`R_d^2`.
+    R2_y :
+        The value of the parameter :math:`R_y^2`.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
+
+    References
+    ----------
+    Belloni, A., Chernozhukov, V., Fernández‐Val, I. and Hansen, C. (2017). Program Evaluation and Causal Inference With
+    High‐Dimensional Data. Econometrica, 85: 233-298.
+    """
+    # inspired by https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA12723, see suplement
+    v = np.random.uniform(size=[n_obs, ])
+    zeta = np.random.standard_normal(size=[n_obs, ])
+
+    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+
+    beta = [1 / (k ** 2) for k in range(1, dim_x + 1)]
+    b_sigma_b = np.dot(np.dot(cov_mat, beta), beta)
+    c_y = np.sqrt(R2_y / ((1 - R2_y) * b_sigma_b))
+    c_d = np.sqrt(np.pi ** 2 / 3. * R2_d / ((1 - R2_d) * b_sigma_b))
+
+    xx = np.exp(np.dot(x, np.multiply(beta, c_d)))
+    d = 1. * ((xx / (1 + xx)) > v)
+
+    y = d * theta + d * np.dot(x, np.multiply(beta, c_y)) + zeta
+
+    if return_type in _array_alias:
+        return x, y, d
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)),
+                            columns=x_cols + ['y', 'd'])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, 'y', 'd', x_cols)
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def make_iivm_data(n_obs=500, dim_x=20, theta=1., alpha_x=0.2, return_type='DoubleMLData'):
+    """
+    Generates data from a interactive IV regression (IIVM) model.
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= 1\\left\\lbrace \\alpha_x Z + v_i > 0 \\right\\rbrace,
+
+        y_i &= \\theta d_i + x_i' \\beta + u_i,
+
+    with :math:`Z \\sim \\text{Bernoulli}(0.5)` and
+
+    .. math::
+
+        \\left(\\begin{matrix} u_i \\\\ v_i \\end{matrix} \\right) \\sim
+        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.3 \\\\ 0.3 & 1 \\end{matrix} \\right) \\right).
+
+    The covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`\\beta` is a `dim_x`-vector with entries
+    :math:`\\beta_j=\\frac{1}{j^2}`.
+
+    The data generating process is inspired by a process used in the simulation experiment of Farbmacher, Gruber and
+    Klaassen (2020).
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    alpha_x :
+        The value of the parameter :math:`\\alpha_x`.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
+
+    References
+    ----------
+    Farbmacher, H., Guber, R. and Klaaßen, S. (2020). Instrument Validity Tests with Causal Forests. MEA Discussion
+    Paper No. 13-2020. Available at SSRN: http://dx.doi.org/10.2139/ssrn.3619201.
+    """
+    # inspired by https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3619201
+    xx = np.random.multivariate_normal(np.zeros(2),
+                                       np.array([[1., 0.3], [0.3, 1.]]),
+                                       size=[n_obs, ])
+    u = xx[:, 0]
+    v = xx[:, 1]
+
+    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+
+    beta = [1 / (k ** 2) for k in range(1, dim_x + 1)]
+
+    z = np.random.binomial(p=0.5, n=1, size=[n_obs, ])
+    d = 1. * (alpha_x * z + v > 0)
+
+    y = d * theta + np.dot(x, beta) + u
+
+    if return_type in _array_alias:
+        return x, y, d, z
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)),
+                            columns=x_cols + ['y', 'd', 'z'])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, 'y', 'd', x_cols, 'z')
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def _make_pliv_data(n_obs=100, dim_x=20, theta=0.5, gamma_z=0.4, return_type='DoubleMLData'):
+    b = [1 / k for k in range(1, dim_x + 1)]
+    sigma = make_spd_matrix(dim_x)
+
+    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=[n_obs, ])
+    G = _g(np.dot(x, b))
+    # instrument
+    z = _m(np.dot(x, b)) + np.random.standard_normal(size=[n_obs, ])
+    # treatment
+    M = _m(gamma_z * z + np.dot(x, b))
+    d = M + np.random.standard_normal(size=[n_obs, ])
+    y = np.dot(theta, d) + G + np.random.standard_normal(size=[n_obs, ])
+
+    if return_type in _array_alias:
+        return x, y, d, z
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)),
+                            columns=x_cols + ['y', 'd', 'z'])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, 'y', 'd', x_cols, 'z')
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def make_pliv_CHS2015(n_obs, alpha=1., dim_x=200, dim_z=150, return_type='DoubleMLData'):
+    """
+    Generates data from a partially linear IV regression model used in Chernozhukov, Hansen and Spindler (2015).
+    The data generating process is defined as
+
+    .. math::
+
+        z_i &= \\Pi x_i + \\zeta_i,
+
+        d_i &= x_i' \\gamma + z_i' \\delta + u_i,
+
+        y_i &= \\alpha d_i + x_i' \\beta + \\varepsilon_i,
+
+    with
+
+    .. math::
+
+        \\left(\\begin{matrix} \\varepsilon_i \\\\ u_i \\\\ \\zeta_i \\\\ x_i \\end{matrix} \\right) \\sim
+        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.6 & 0 & 0 \\\\ 0.6 & 1 & 0 & 0 \\\\
+        0 & 0 & 0.25 I_{p_n^z} & 0 \\\\ 0 & 0 & 0 & \\Sigma \\end{matrix} \\right) \\right)
+
+    where  :math:`\\Sigma` is a :math:`p_n^x \\times p_n^x` matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`I_{p_n^z}` is the :math:`p_n^z \\times p_n^z` identity matrix.
+    :math:`\\beta = \\gamma` is a :math:`p_n^x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}`,
+    :math:`\\delta` is a :math:`p_n^z`-vector with entries :math:`\\delta_j=\\frac{1}{j^2}`
+    and :math:`\\Pi = (I_{p_n^z}, 0_{p_n^z \\times (p_n^x - p_n^z)})`.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    alpha :
+        The value of the causal parameter.
+    dim_x :
+        The number of covariates.
+    dim_z :
+        The number of instruments.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
+
+    References
+    ----------
+    Chernozhukov, V., Hansen, C. and Spindler, M. (2015), Post-Selection and Post-Regularization Inference in Linear
+    Models with Many Controls and Instruments. American Economic Review: Papers and Proceedings, 105 (5): 486-90.
+    """
+    assert dim_x >= dim_z
+    # see https://assets.aeaweb.org/asset-server/articles-attachments/aer/app/10505/P2015_1022_app.pdf
+    xx = np.random.multivariate_normal(np.zeros(2),
+                                       np.array([[1., 0.6], [0.6, 1.]]),
+                                       size=[n_obs, ])
+    epsilon = xx[:, 0]
+    u = xx[:, 1]
+
+    sigma = toeplitz([np.power(0.5, k) for k in range(0, dim_x)])
+    x = np.random.multivariate_normal(np.zeros(dim_x),
+                                      sigma,
+                                      size=[n_obs, ])
+
+    I_z = np.eye(dim_z)
+    xi = np.random.multivariate_normal(np.zeros(dim_z),
+                                       0.25 * I_z,
+                                       size=[n_obs, ])
+
+    beta = [1 / (k ** 2) for k in range(1, dim_x + 1)]
+    gamma = beta
+    delta = [1 / (k ** 2) for k in range(1, dim_z + 1)]
+    Pi = np.hstack((I_z, np.zeros((dim_z, dim_x - dim_z))))
+
+    z = np.dot(x, np.transpose(Pi)) + xi
+    d = np.dot(x, gamma) + np.dot(z, delta) + u
+    y = alpha * d + np.dot(x, beta) + epsilon
+
+    if return_type in _array_alias:
+        return x, y, d, z
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        z_cols = [f'Z{i + 1}' for i in np.arange(dim_z)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)),
+                            columns=x_cols + ['y', 'd'] + z_cols)
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, 'y', 'd', x_cols, z_cols)
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1., return_type='DoubleMLClusterData', **kwargs):
+    """
+    Generates data from a partially linear IV regression model with multiway cluster sample used in Chiang et al.
+    (2021). The data generating process is defined as
+
+    .. math::
+
+        Z_{ij} &= X_{ij}' \\xi_0 + V_{ij},
+
+        D_{ij} &= Z_{ij}' \\pi_{10} + X_{ij}' \\pi_{20} + v_{ij},
+
+        Y_{ij} &= D_{ij} \\theta + X_{ij}' \\zeta_0 + \\varepsilon_{ij},
+
+    with
+
+    .. math::
+
+        X_{ij} &= (1 - \\omega_1^X - \\omega_2^X) \\alpha_{ij}^X
+        + \\omega_1^X \\alpha_{i}^X + \\omega_2^X \\alpha_{j}^X,
+
+        \\varepsilon_{ij} &= (1 - \\omega_1^\\varepsilon - \\omega_2^\\varepsilon) \\alpha_{ij}^\\varepsilon
+        + \\omega_1^\\varepsilon \\alpha_{i}^\\varepsilon + \\omega_2^\\varepsilon \\alpha_{j}^\\varepsilon,
+
+        v_{ij} &= (1 - \\omega_1^v - \\omega_2^v) \\alpha_{ij}^v
+        + \\omega_1^v \\alpha_{i}^v + \\omega_2^v \\alpha_{j}^v,
+
+        V_{ij} &= (1 - \\omega_1^V - \\omega_2^V) \\alpha_{ij}^V
+        + \\omega_1^V \\alpha_{i}^V + \\omega_2^V \\alpha_{j}^V,
+
+    and :math:`\\alpha_{ij}^X, \\alpha_{i}^X, \\alpha_{j}^X \\sim \\mathcal{N}(0, \\Sigma)`
+    where  :math:`\\Sigma` is a :math:`p_x \\times p_x` matrix with entries
+    :math:`\\Sigma_{kj} = s_X^{|j-k|}`.
+    Further
+
+    .. math::
+
+        \\left(\\begin{matrix} \\alpha_{ij}^\\varepsilon \\\\ \\alpha_{ij}^v \\end{matrix}\\right),
+        \\left(\\begin{matrix} \\alpha_{i}^\\varepsilon \\\\ \\alpha_{i}^v \\end{matrix}\\right),
+        \\left(\\begin{matrix} \\alpha_{j}^\\varepsilon \\\\ \\alpha_{j}^v \\end{matrix}\\right)
+        \\sim \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & s_{\\varepsilon v} \\\\
+        s_{\\varepsilon v} & 1 \\end{matrix} \\right) \\right)
+
+
+    and :math:`\\alpha_{ij}^V, \\alpha_{i}^V, \\alpha_{j}^V \\sim \\mathcal{N}(0, 1)`.
+
+    Parameters
+    ----------
+    N :
+        The number of observations (first dimension).
+    M :
+        The number of observations (second dimension).
+    dim_X :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    return_type :
+        If ``'DoubleMLClusterData'`` or ``DoubleMLClusterData``, returns a ``DoubleMLClusterData`` object where
+        ``DoubleMLClusterData.data`` is a ``pd.DataFrame``.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s
+        ``(x, y, d, cluster_vars, z)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameters
+        :math:`\\pi_{10}=1.0`, :math:`\\omega_X = \\omega_{\\varepsilon} = \\omega_V = \\omega_v = (0.25, 0.25)`,
+        :math:`s_X = s_{\\varepsilon v} = 0.25`,
+        or the :math:`p_x`-vectors :math:`\\zeta_0 = \\pi_{20} = \\xi_0` with default entries
+        :math:`(\\zeta_{0})_j = 0.5^j`.
+
+    References
+    ----------
+    Chiang, H. D., Kato K., Ma, Y. and Sasaki, Y. (2021), Multiway Cluster Robust Double/Debiased Machine Learning,
+    Journal of Business & Economic Statistics,
+    doi: `10.1080/07350015.2021.1895815 <https://doi.org/10.1080/07350015.2021.1895815>`_,
+    arXiv:`1909.03489 <https://arxiv.org/abs/1909.03489>`_.
+    """
+    # additional parameters specifiable via kwargs
+    pi_10 = kwargs.get('pi_10', 1.0)
+
+    xx = np.arange(1, dim_X + 1)
+    zeta_0 = kwargs.get('zeta_0', np.power(0.5, xx))
+    pi_20 = kwargs.get('pi_20', np.power(0.5, xx))
+    xi_0 = kwargs.get('xi_0', np.power(0.5, xx))
+
+    omega_X = kwargs.get('omega_X', np.array([0.25, 0.25]))
+    omega_epsilon = kwargs.get('omega_epsilon', np.array([0.25, 0.25]))
+    omega_v = kwargs.get('omega_v', np.array([0.25, 0.25]))
+    omega_V = kwargs.get('omega_V', np.array([0.25, 0.25]))
+
+    s_X = kwargs.get('s_X', 0.25)
+    s_epsilon_v = kwargs.get('s_epsilon_v', 0.25)
+
+    # use np.tile() and np.repeat() for repeating vectors in different styles, i.e.,
+    # np.tile([v1, v2, v3], 2) [v1, v2, v3, v1, v2, v3]
+    # np.repeat([v1, v2, v3], 2) [v1, v1, v2, v2, v3, v3]
+
+    alpha_V = np.random.normal(size=(N * M))
+    alpha_V_i = np.repeat(np.random.normal(size=N), M)
+    alpha_V_j = np.tile(np.random.normal(size=M), N)
+
+    cov_mat = np.array([[1, s_epsilon_v], [s_epsilon_v, 1]])
+    alpha_eps_v = np.random.multivariate_normal(np.zeros(2), cov_mat, size=[N * M, ])
+    alpha_eps = alpha_eps_v[:, 0]
+    alpha_v = alpha_eps_v[:, 1]
+
+    alpha_eps_v_i = np.random.multivariate_normal(np.zeros(2), cov_mat, size=[N, ])
+    alpha_eps_i = np.repeat(alpha_eps_v_i[:, 0], M)
+    alpha_v_i = np.repeat(alpha_eps_v_i[:, 1], M)
+
+    alpha_eps_v_j = np.random.multivariate_normal(np.zeros(2), cov_mat, size=[M, ])
+    alpha_eps_j = np.tile(alpha_eps_v_j[:, 0], N)
+    alpha_v_j = np.tile(alpha_eps_v_j[:, 1], N)
+
+    cov_mat = toeplitz([np.power(s_X, k) for k in range(dim_X)])
+    alpha_X = np.random.multivariate_normal(np.zeros(dim_X), cov_mat, size=[N * M, ])
+    alpha_X_i = np.repeat(np.random.multivariate_normal(np.zeros(dim_X), cov_mat, size=[N, ]),
+                          M, axis=0)
+    alpha_X_j = np.tile(np.random.multivariate_normal(np.zeros(dim_X), cov_mat, size=[M, ]),
+                        (N, 1))
+
+    # generate variables
+    x = (1 - omega_X[0] - omega_X[1]) * alpha_X \
+        + omega_X[0] * alpha_X_i + omega_X[1] * alpha_X_j
+
+    eps = (1 - omega_epsilon[0] - omega_epsilon[1]) * alpha_eps \
+          + omega_epsilon[0] * alpha_eps_i + omega_epsilon[1] * alpha_eps_j
+
+    v = (1 - omega_v[0] - omega_v[1]) * alpha_v \
+        + omega_v[0] * alpha_v_i + omega_v[1] * alpha_v_j
+
+    V = (1 - omega_V[0] - omega_V[1]) * alpha_V \
+        + omega_V[0] * alpha_V_i + omega_V[1] * alpha_V_j
+
+    z = np.matmul(x, xi_0) + V
+    d = z * pi_10 + np.matmul(x, pi_20) + v
+    y = d * theta + np.matmul(x, zeta_0) + eps
+
+    cluster_cols = ['cluster_var_i', 'cluster_var_j']
+    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)
+
+    if return_type in _array_alias:
+        return x, y, d, cluster_vars.values, z
+    elif return_type in _data_frame_alias + _dml_cluster_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_X)]
+        data = pd.concat((cluster_vars,
+                          pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ['Y', 'D', 'Z'])),
+                         axis=1)
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLClusterData(data, 'Y', 'D', cluster_cols, x_cols, 'Z')
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_type='DoubleMLData', **kwargs):
+    """
+    Generates data from a difference-in-differences model used in Sant'Anna and Zhao (2020).
+    The data generating process is defined as follows. For a generic :math:`W=(W_1, W_2, W_3, W_4)^T`, let
+
+    .. math::
+
+        f_{reg}(W) &= 210 + 27.4 \\cdot W_1 +13.7 \\cdot (W_2 + W_3 + W_4),
+
+        f_{ps}(W) &= 0.75 \\cdot (-W_1 + 0.5 \\cdot W_2 -0.25 \\cdot W_3 - 0.1 \\cdot W_4).
+
+
+    Let :math:`X= (X_1, X_2, X_3, X_4)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is  :math:`c = 0`, corresponding to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where :math:`\\tilde{Z}_1 = \\exp(0.5 \\cdot X_1)`, :math:`\\tilde{Z}_2 = 10 + X_2/(1 + \\exp(X_1))`,
+    :math:`\\tilde{Z}_3 = (0.6 + X_1 \\cdot X_3 / 25)^3` and :math:`\\tilde{Z}_4 = (20 + X_2 + X_4)^2`.
+    At first define
+
+    .. math::
+
+        Y_0(0) &= f_{reg}(W_{reg}) + \\nu(W_{reg}, D) + \\varepsilon_0,
+
+        Y_1(d) &= 2 \\cdot f_{reg}(W_{reg}) + \\nu(W_{reg}, D) + \\varepsilon_1(d),
+
+        p(W_{ps}) &= \\frac{\\exp(f_{ps}(W_{ps}))}{1 + \\exp(f_{ps}(W_{ps}))},
+
+        D &= 1\\{p(W_{ps}) \\ge U\\},
+
+    where :math:`\\varepsilon_0, \\varepsilon_1(d), d=0, 1` are independent standard normal random variables,
+    :math:`U \\sim \\mathcal{U}[0, 1]` is a independent standard uniform
+    and :math:`\\nu(W_{reg}, D)\\sim \\mathcal{N}(D \\cdot f_{reg}(W_{reg}),1)`.
+    The different data generating processes are defined via
+
+    .. math::
+
+        DGP1:\\quad W_{reg} &= Z \\quad W_{ps} = Z
+
+        DGP2:\\quad W_{reg} &= Z \\quad W_{ps} = X
+
+        DGP3:\\quad W_{reg} &= X \\quad W_{ps} = Z
+
+        DGP4:\\quad W_{reg} &= X \\quad W_{ps} = X
+
+        DGP5:\\quad W_{reg} &= Z \\quad W_{ps} = 0
+
+        DGP6:\\quad W_{reg} &= X \\quad W_{ps} = 0,
+
+    such that the last two settings correspond to an experimental setting with treatment probability
+    of :math:`P(D=1) = \\frac{1}{2}.`
+    For the panel data the outcome is already defined as the difference :math:`Y = Y_1(D) - Y_0(0)`.
+    For cross-sectional data the flag ``cross_sectional_data`` has to be set to ``True``.
+    Then the outcome will be defined to be
+
+    .. math::
+
+        Y = T \\cdot Y_1(D) + (1-T) \\cdot Y_0(0),
+
+    where :math:`T = 1\\{U_T\\le \\lambda_T \\}` with :math:`U_T\\sim \\mathcal{U}[0, 1]` and :math:`\\lambda_T=0.5`.
+    The true average treatment effect on the treated is zero for all data generating processes.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dgp_type :
+        The DGP to be used. Default value is ``1`` (integer).
+    cross_sectional_data :
+        Indicates whether the setting is uses cross-sectional or panel data. Default value is ``False``.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``
+        or ``(x, y, d, t)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameter
+        :math:`xi=0.75`, :math:`c=0.0` and :math:`\\lambda_T=0.5`.
+
+    References
+    ----------
+    Sant’Anna, P. H. and Zhao, J. (2020),
+    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
+    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
+    """
+    xi = kwargs.get('xi', 0.75)
+    c = kwargs.get('c', 0.0)
+    lambda_t = kwargs.get('lambda_t', 0.5)
+
+    def f_reg(w):
+        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
+        return res
+
+    def f_ps(w, xi):
+        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
+        return res
+
+    dim_x = 4
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+
+    # error terms
+    epsilon_0 = np.random.normal(loc=0, scale=1, size=n_obs)
+    epsilon_1 = np.random.normal(loc=0, scale=1, size=[n_obs, 2])
+
+    if dgp_type == 1:
+        features_ps = z
+        features_reg = z
+    elif dgp_type == 2:
+        features_ps = x
+        features_reg = z
+    elif dgp_type == 3:
+        features_ps = z
+        features_reg = x
+    elif dgp_type == 4:
+        features_ps = x
+        features_reg = x
+    elif dgp_type == 5:
+        features_ps = None
+        features_reg = z
+    elif dgp_type == 6:
+        features_ps = None
+        features_reg = x
+    else:
+        raise ValueError('The dgp_type is not valid.')
+
+    # treatment and propensities
+    is_experimental = (dgp_type == 5) or (dgp_type == 6)
+    if is_experimental:
+        # Set D to be experimental
+        p = 0.5 * np.ones(n_obs)
+    else:
+        p = np.exp(f_ps(features_ps, xi)) / (1 + np.exp(f_ps(features_ps, xi)))
+    u = np.random.uniform(low=0, high=1, size=n_obs)
+    d = 1.0 * (p >= u)
+
+    # potential outcomes
+    nu = np.random.normal(loc=d * f_reg(features_reg), scale=1, size=n_obs)
+    y0 = f_reg(features_reg) + nu + epsilon_0
+    y1_d0 = 2 * f_reg(features_reg) + nu + epsilon_1[:, 0]
+    y1_d1 = 2 * f_reg(features_reg) + nu + epsilon_1[:, 1]
+    y1 = d * y1_d1 + (1 - d) * y1_d0
+
+    if not cross_sectional_data:
+        y = y1 - y0
+
+        if return_type in _array_alias:
+            return z, y, d
+        elif return_type in _data_frame_alias + _dml_data_alias:
+            z_cols = [f'Z{i + 1}' for i in np.arange(dim_x)]
+            data = pd.DataFrame(np.column_stack((z, y, d)),
+                                columns=z_cols + ['y', 'd'])
+            if return_type in _data_frame_alias:
+                return data
+            else:
+                return DoubleMLData(data, 'y', 'd', z_cols)
+        else:
+            raise ValueError('Invalid return_type.')
+
+    else:
+        u_t = np.random.uniform(low=0, high=1, size=n_obs)
+        t = 1.0 * (u_t <= lambda_t)
+        y = t * y1 + (1 - t) * y0
+
+        if return_type in _array_alias:
+            return z, y, d, t
+        elif return_type in _data_frame_alias + _dml_data_alias:
+            z_cols = [f'Z{i + 1}' for i in np.arange(dim_x)]
+            data = pd.DataFrame(np.column_stack((z, y, d, t)),
+                                columns=z_cols + ['y', 'd', 't'])
+            if return_type in _data_frame_alias:
+                return data
+            else:
+                return DoubleMLData(data, 'y', 'd', z_cols, t_col='t')
+        else:
+            raise ValueError('Invalid return_type.')
+
+
+def make_confounded_irm_data(n_obs=500, theta=0.0, gamma_a=0.127, beta_a=0.58, linear=False, **kwargs):
+    """
+    Generates counfounded data from an interactive regression model.
+
+    The data generating process is defined as follows (inspired by the Monte Carlo simulation used
+    in Sant'Anna and Zhao (2020)).
+
+    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
+    to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where
+
+    .. math::
+
+        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
+
+        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
+
+        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
+
+        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
+
+        \\tilde{Z}_5 &= X_5.
+
+    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
+    At first, define the propensity score as
+
+    .. math::
+
+        m(X, A) = P(D=1|X,A) = p(Z) + \\gamma_A \\cdot A
+
+    where
+
+    .. math::
+
+        p(Z) &= \\frac{\\exp(f_{ps}(Z))}{1 + \\exp(f_{ps}(Z))},
+
+        f_{ps}(Z) &= 0.75 \\cdot (-Z_1 + 0.1 \\cdot Z_2 -0.25 \\cdot Z_3 - 0.1 \\cdot Z_4).
+
+    and generate the treatment :math:`D = 1\\{m(X, A) \\ge U\\}` with :math:`U \\sim \\mathcal{U}[0, 1]`.
+    Since :math:`A` is independent of :math:`X`, the short form of the propensity score is given as
+
+    .. math::
+
+        P(D=1|X) = p(Z).
+
+    Further, generate the outcome of interest :math:`Y` as
+
+    .. math::
+
+        Y &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A + \\varepsilon
+
+        g(Z) &= 2.5 + 0.74 \\cdot Z_1 + 0.25 \\cdot Z_2 + 0.137 \\cdot (Z_3 + Z_4)
+
+    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
+    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
+    the conditional expectation take the following forms
+
+    .. math::
+
+        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A
+
+        \\mathbb{E}[Y|D, X] &= (\\theta + \\beta_A \\frac{\\mathrm{Cov}(A, D(Z_5 + 1))}{\\mathrm{Var}(D(Z_5 + 1))})
+            \\cdot D (Z_5 + 1) + g(Z).
+
+    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`, which can be
+    set via the parameters ``gamma_a`` and ``beta_a``.
+
+    The observed data is given as :math:`W = (Y, D, Z)`.
+    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`,
+    the potential outcomes of :math:`Y`, the long and short forms of the main regression and the propensity score and
+    in sample versions of the confounding parameters :math:`cf_d` and :math:`cf_y` (for ATE and ATTE)
+    are returned in a dictionary.
+
+    Parameters
+    ----------
+    n_obs : int
+        The number of observations to simulate.
+        Default is ``500``.
+    theta : float or int
+        Average treatment effect.
+        Default is ``0.0``.
+    gamma_a : float
+        Coefficient of the unobserved confounder in the propensity score.
+        Default is ``0.127``.
+    beta_a : float
+        Coefficient of the unobserved confounder in the outcome regression.
+        Default is ``0.58``.
+    linear : bool
+        If ``True``, the Z will be set to X, such that the underlying (short) models are linear/logistic.
+        Default is ``False``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
+
+    References
+    ----------
+    Sant’Anna, P. H. and Zhao, J. (2020),
+    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
+    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
+    """
+    c = 0.0  # the confounding strength is only valid for c=0
+    xi = 0.75
+    dim_x = kwargs.get('dim_x', 5)
+    trimming_threshold = kwargs.get('trimming_threshold', 0.01)
+    var_eps_y = kwargs.get('var_eps_y', 1.0)
+
+    # Specification of main regression function
+    def f_reg(w):
+        res = 2.5 + 0.74 * w[:, 0] + 0.25 * w[:, 1] + 0.137 * (w[:, 2] + w[:, 3])
+        return res
+
+    # Specification of prop score function
+    def f_ps(w, xi):
+        res = xi * (-w[:, 0] + 0.1 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
+        return res
+
+    # observed covariates
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+    z_tilde_5 = x[:, 4]
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, z_tilde_5))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+    # error terms and unobserved confounder
+    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
+    # unobserved confounder
+    a_bounds = (-1, 1)
+    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
+    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
+
+    # Choose the features used in the models
+    if linear:
+        features_ps = x
+        features_reg = x
+    else:
+        features_ps = z
+        features_reg = z
+
+    p = np.exp(f_ps(features_ps, xi)) / (1 + np.exp(f_ps(features_ps, xi)))
+    # compute short and long form of propensity score
+    m_long = p + gamma_a * a
+    m_short = p
+    # check propensity score bounds
+    if np.any(m_long < trimming_threshold) or np.any(m_long > 1.0 - trimming_threshold):
+        m_long = np.clip(m_long, trimming_threshold, 1.0 - trimming_threshold)
+        m_short = np.clip(m_short, trimming_threshold, 1.0 - trimming_threshold)
+        warnings.warn(f'Propensity score is close to 0 or 1. '
+                      f'Trimming is at {trimming_threshold} and {1.0 - trimming_threshold} is applied')
+    # generate treatment based on long form
+    u = np.random.uniform(low=0, high=1, size=n_obs)
+    d = 1.0 * (m_long >= u)
+    # add treatment heterogeneity
+    d1x = z[:, 4] + 1
+    var_dx = np.var(d * (d1x))
+    cov_adx = gamma_a * var_a
+    # Outcome regression
+    g_partial_reg = f_reg(features_reg)
+    # short model
+    g_short_d0 = g_partial_reg
+    g_short_d1 = (theta + beta_a * cov_adx / var_dx) * d1x + g_partial_reg
+    g_short = d * g_short_d1 + (1.0 - d) * g_short_d0
+    # long model
+    g_long_d0 = g_partial_reg + beta_a * a
+    g_long_d1 = theta * d1x + g_partial_reg + beta_a * a
+    g_long = d * g_long_d1 + (1.0 - d) * g_long_d0
+    # Potential outcomes
+    y_0 = g_long_d0 + eps_y
+    y_1 = g_long_d1 + eps_y
+    # Realized outcome
+    y = d * y_1 + (1.0 - d) * y_0
+    # In-sample values for confounding strength
+    explained_residual_variance = np.square(g_long - g_short)
+    residual_variance = np.square(y - g_short)
+    cf_y = np.mean(explained_residual_variance) / np.mean(residual_variance)
+    # compute the Riesz representation
+    treated_weight = d / np.mean(d)
+    untreated_weight = (1.0 - d) / np.mean(d)
+    # Odds ratios
+    propensity_ratio_long = m_long / (1.0 - m_long)
+    rr_long_ate = d / m_long - (1.0 - d) / (1.0 - m_long)
+    rr_long_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_long)
+    propensity_ratio_short = m_short / (1.0 - m_short)
+    rr_short_ate = d / m_short - (1.0 - d) / (1.0 - m_short)
+    rr_short_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_short)
+    cf_d_ate = (np.mean(1 / (m_long * (1 - m_long))) - np.mean(1 / (m_short * (1 - m_short)))) / np.mean(
+        1 / (m_long * (1 - m_long)))
+    cf_d_atte = (np.mean(propensity_ratio_long) - np.mean(propensity_ratio_short)) / np.mean(propensity_ratio_long)
+    if (beta_a == 0) | (gamma_a == 0):
+        rho_ate = 0.0
+        rho_atte = 0.0
+    else:
+        rho_ate = np.corrcoef((g_long - g_short), (rr_long_ate - rr_short_ate))[0, 1]
+        rho_atte = np.corrcoef((g_long - g_short), (rr_long_atte - rr_short_atte))[0, 1]
+    oracle_values = {
+        'g_long': g_long,
+        'g_short': g_short,
+        'm_long': m_long,
+        'm_short': m_short,
+        'gamma_a': gamma_a,
+        'beta_a': beta_a,
+        'a': a,
+        'y_0': y_0,
+        'y_1': y_1,
+        'z': z,
+        'cf_y': cf_y,
+        'cf_d_ate': cf_d_ate,
+        'cf_d_atte': cf_d_atte,
+        'rho_ate': rho_ate,
+        'rho_atte': rho_atte,
+    }
+    res_dict = {
+        'x': x,
+        'y': y,
+        'd': d,
+        'oracle_values': oracle_values
+    }
+    return res_dict
+
+
+def make_confounded_plr_data(n_obs=500, theta=5.0, cf_y=0.04, cf_d=0.04, **kwargs):
+    """
+    Generates counfounded data from an partially linear regression model.
+
+    The data generating process is defined as follows (similar to the Monte Carlo simulation used
+    in Sant'Anna and Zhao (2020)). Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`,
+    where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is  :math:`c = 0`, corresponding to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where
+
+    .. math::
+
+        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
+
+        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
+
+        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
+
+        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2.
+
+    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
+    At first, define the treatment as
+
+    .. math::
+
+        D = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A + \\varepsilon_D
+
+    and with :math:`\\varepsilon \\sim \\mathcal{N}(0,1)`.
+    Since :math:`A` is independent of :math:`X`, the long and short form of the treatment regression are given as
+
+    .. math::
+
+        E[D|X,A] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A
+
+        E[D|X] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4.
+
+    Further, generate the outcome of interest :math:`Y` as
+
+    .. math::
+
+        Y &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A + \\varepsilon
+
+        g(Z) &= 210 + 27.4 \\cdot Z_1 +13.7 \\cdot (Z_2 + Z_3 + Z_4)
+
+    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
+    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
+    the conditional expectation take the following forms
+
+    .. math::
+
+        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A
+
+        \\mathbb{E}[Y|D, X] &= (\\theta + \\gamma_A\\beta_A \\frac{\\mathrm{Var}(A)}{\\mathrm{Var}(D)}) \\cdot D + g(Z).
+
+    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`.
+    Both are chosen to obtain the desired confounding of the outcome and Riesz Representer (in sample).
+
+    The observed data is given as :math:`W = (Y, D, X)`.
+    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`, the effect :math:`\\theta`,
+    the coefficients :math:`\\gamma_a`, :math:`\\beta_a`, the long and short forms of the main regression and
+    the propensity score are returned in a dictionary.
+
+    Parameters
+    ----------
+    n_obs : int
+        The number of observations to simulate.
+        Default is ``500``.
+    theta : float or int
+        Average treatment effect.
+        Default is ``5.0``.
+    cf_y : float
+        Percentage of the residual variation of the outcome explained by latent/confounding variable.
+        Default is ``0.04``.
+    cf_d : float
+        Percentage gains in the variation of the Riesz Representer generated by latent/confounding variable.
+        Default is ``0.04``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
+
+    References
+    ----------
+    Sant’Anna, P. H. and Zhao, J. (2020),
+    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
+    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
+    """
+    c = kwargs.get('c', 0.0)
+    dim_x = kwargs.get('dim_x', 4)
+
+    # observed covariates
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+
+    # error terms
+    var_eps_y = 5
+    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
+    var_eps_d = 1
+    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
+
+    # unobserved confounder
+    a_bounds = (-1, 1)
+    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
+    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
+
+    # get the required impact of the confounder on the propensity score
+    m_short = -z[:, 0] + 0.5 * z[:, 1] - 0.25 * z[:, 2] - 0.1 * z[:, 3]
+
+    def f_m(gamma_a):
+        rr_long = eps_d / var_eps_d
+        rr_short = (gamma_a * a + eps_d) / (gamma_a ** 2 * var_a + var_eps_d)
+        C2_D = (np.mean(np.square(rr_long)) - np.mean(np.square(rr_short))) / np.mean(np.square(rr_short))
+        return np.square(C2_D / (1 + C2_D) - cf_d)
+
+    gamma_a = minimize_scalar(f_m).x
+    m_long = m_short + gamma_a * a
+    d = m_long + eps_d
+
+    # short and long version of g
+    g_partial_reg = 210 + 27.4 * z[:, 0] + 13.7 * (z[:, 1] + z[:, 2] + z[:, 3])
+
+    var_d = np.var(d)
+
+    def f_g(beta_a):
+        g_diff = beta_a * (a - gamma_a * (var_a / var_d) * d)
+        y_diff = eps_y + g_diff
+        return np.square(np.mean(np.square(g_diff)) / np.mean(np.square(y_diff)) - cf_y)
+
+    beta_a = minimize_scalar(f_g).x
+
+    g_long = theta * d + g_partial_reg + beta_a * a
+    g_short = (theta + gamma_a * beta_a * var_a / var_d) * d + g_partial_reg
+
+    y = g_long + eps_y
+
+    oracle_values = {'g_long': g_long,
+                     'g_short': g_short,
+                     'm_long': m_long,
+                     'm_short': m_short,
+                     'theta': theta,
+                     'gamma_a': gamma_a,
+                     'beta_a': beta_a,
+                     'a': a,
+                     'z': z}
+
+    res_dict = {'x': x,
+                'y': y,
+                'd': d,
+                'oracle_values': oracle_values}
+
+    return res_dict
+
+
+def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treatment=False):
+    """
+    Creates a simple synthetic example for heterogeneous treatment effects.
+    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019).
+
+    The data is generated as
+
+    .. math::
+
+        Y_i & = \\theta_0(X_i)D_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
+
+        D_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
+
+    where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
+    \\sim\\mathcal{U}[-1,1]`.
+    If the treatment is set to be binary, the treatment is generated as
+
+    .. math::
+        D_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
+
+    The coefficient vectors :math:`\\gamma_0` and :math:`\\beta_0` both have small random (identical) support
+    which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
+    Further, :math:`\\theta_0(x)` defines the conditional treatment effect, which is defined differently depending
+    on the dimension of :math:`x`.
+
+    If the heterogeneity is univariate the conditional treatment effect takes the following form
+
+    .. math::
+            \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_0),
+
+    whereas for the two-dimensional case the conditional treatment effect is defined as
+
+    .. math::
+        \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_1).
+
+    Parameters
+    ----------
+    n_obs : int
+        Number of observations to simulate.
+        Default is ``200``.
+
+    p : int
+        Dimension of covariates.
+        Default is ``30``.
+
+    support_size : int
+        Number of relevant (confounding) covariates.
+        Default is ``5``.
+
+    n_x : int
+        Dimension of the heterogeneity. Can be either ``1`` or ``2``.
+        Default is ``1``.
+
+    binary_treatment : bool
+        Indicates whether the treatment is binary.
+        Default is ``False``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``data``, ``effects``, ``treatment_effect``.
+
+    """
+    # simple input checks
+    assert n_x in [1, 2], 'n_x must be either 1 or 2.'
+    assert support_size <= p, 'support_size must be smaller than p.'
+    assert isinstance(binary_treatment, bool), 'binary_treatment must be a boolean.'
+
+    # define treatment effects
+    if n_x == 1:
+        def treatment_effect(x):
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
+    else:
+        assert n_x == 2
+
+        # redefine treatment effect
+        def treatment_effect(x):
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 1])
+
+    # Outcome support and coefficients
+    support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
+    coefs_y = np.random.uniform(0, 1, size=support_size)
+    # treatment support and coefficients
+    support_d = support_y
+    coefs_d = np.random.uniform(0, 0.3, size=support_size)
+
+    # noise
+    epsilon = np.random.uniform(-1, 1, size=n_obs)
+    eta = np.random.uniform(-1, 1, size=n_obs)
+
+    # Generate controls, covariates, treatments and outcomes
+    x = np.random.uniform(0, 1, size=(n_obs, p))
+    # Heterogeneous treatment effects
+    te = treatment_effect(x)
+    if binary_treatment:
+        d = 1.0 * (np.dot(x[:, support_d], coefs_d) >= eta)
+    else:
+        d = np.dot(x[:, support_d], coefs_d) + eta
+    y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
+
+    # Now we build the dataset
+    y_df = pd.DataFrame({'y': y})
+    d_df = pd.DataFrame({'d': d})
+    x_df = pd.DataFrame(
+        data=x,
+        index=np.arange(x.shape[0]),
+        columns=[f'X_{i}' for i in range(x.shape[1])]
+    )
+
+    data = pd.concat([y_df, d_df, x_df], axis=1)
+    res_dict = {
+        'data': data,
+        'effects': te,
+        'treatment_effect': treatment_effect}
+    return res_dict
+
+
+def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleMLData'):
+    """
+    Generates data from a sample selection model (SSM).
+    The data generating process is defined as
+
+    .. math::
+
+        y_i &= \\theta d_i + x_i' \\beta d_i + u_i,
+
+        s_i &= 1\\left\\lbrace d_i + \\gamma z_i + x_i' \\beta + v_i > 0 \\right\\rbrace,
+
+        d_i &= 1\\left\\lbrace x_i' \\beta + w_i > 0 \\right\\rbrace,
+
+    with Y being observed if :math:`s_i = 1` and covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma^2_x)`, where
+    :math:`\\Sigma^2_x` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
+    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{0.4}{j^2}`
+    :math:`z_i \\sim \\mathcal{N}(0, 1)`,
+    :math:`(u_i,v_i) \\sim \\mathcal{N}(0, \\Sigma^2_{u,v})`,
+    :math:`w_i \\sim \\mathcal{N}(0, 1)`.
+
+
+    The data generating process is inspired by a process used in the simulation study (see Appendix E) of Bia,
+    Huber and Lafférs (2023).
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    mar:
+        Boolean. Indicates whether missingness at random holds.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z, s)``.
+
+    References
+    ----------
+    Michela Bia, Martin Huber & Lukáš Lafférs (2023) Double Machine Learning for Sample Selection Models,
+    Journal of Business & Economic Statistics, DOI: 10.1080/07350015.2023.2271071
+    """
+    if mar:
+        sigma = np.array([[1, 0], [0, 1]])
+        gamma = 0
+    else:
+        sigma = np.array([[1, 0.8], [0.8, 1]])
+        gamma = 1
+
+    e = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=n_obs).T
+
+    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+
+    beta = [0.4 / (k ** 2) for k in range(1, dim_x + 1)]
+
+    d = np.where(np.dot(x, beta) + np.random.randn(n_obs) > 0, 1, 0)
+    z = np.random.randn(n_obs)
+    s = np.where(np.dot(x, beta) + d + gamma * z + e[0] > 0, 1, 0)
+
+    y = np.dot(x, beta) + theta * d + e[1]
+    y[s == 0] = 0
+
+    if return_type in _array_alias:
+        return x, y, d, z, s
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        if mar:
+            data = pd.DataFrame(np.column_stack((x, y, d, s)),
+                                columns=x_cols + ['y', 'd', 's'])
+        else:
+            data = pd.DataFrame(np.column_stack((x, y, d, z, s)),
+                                columns=x_cols + ['y', 'd', 'z', 's'])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            if mar:
+                return DoubleMLData(data, 'y', 'd', x_cols, None, None, 's')
+            return DoubleMLData(data, 'y', 'd', x_cols, 'z', None, 's')
+    else:
+        raise ValueError('Invalid return_type.')
+
+
+def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, random_state=None, **kwargs):
+    """
+    Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an
+    underlying continous treatment).
+
+    The data generating process is defined as follows (similar to the Monte Carlo simulation used
+    in Sant'Anna and Zhao (2020)).
+
+    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
+    to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where
+
+    .. math::
+
+            \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
+
+            \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
+
+            \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
+
+            \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
+
+            \\tilde{Z}_5 &= X_5.
+
+    A continuous treatment :math:`D_{\\text{cont}}` is generated as
+
+    .. math::
+
+        D_{\\text{cont}} = \\xi (-Z_1 + 0.5 Z_2 - 0.25 Z_3 - 0.1 Z_4) + \\varepsilon_D,
+
+    where :math:`\\varepsilon_D \\sim \\mathcal{N}(0,1)` and :math:`\\xi=0.3`. The corresponding treatment
+    effect is defined as
+
+    .. math::
+
+        \\theta (d) = 0.1 \\exp(d) + 10 \\sin(0.7 d) + 2 d - 0.2 d^2.
+
+    Based on the continous treatment, a discrete treatment :math:`D` is generated as with a baseline level of
+    :math:`D=0` and additional levels based on the quantiles of :math:`D_{\\text{cont}}`. The number of levels
+    is defined by :math:`n_{\\text{levels}}`. Each level is chosen to have the same probability of being selected.
+
+    The potential outcomes are defined as
+
+    .. math::
+
+            Y(0) &= 210 + 27.4 Z_1 + 13.7 (Z_2 + Z_3 + Z_4) + \\varepsilon_Y
+
+            Y(1) &= \\theta (D_{\\text{cont}}) 1\\{D_{\\text{cont}} > 0\\} + Y(0),
+
+    where :math:`\\varepsilon_Y \\sim \\mathcal{N}(0,5)`. Further, the observed outcome is defined as
+
+    .. math::
+
+        Y = Y(1) 1\\{D > 0\\} + Y(0) 1\\{D = 0\\}.
+
+    The data is returned as a dictionary with the entries ``x``, ``y``, ``d`` and ``oracle_values``.
+
+    Parameters
+    ----------
+    n_obs : int
+        The number of observations to simulate.
+        Default is ``200``.
+
+    n_levels : int
+        The number of treatment levels.
+        Default is ``3``.
+
+    linear : bool
+        Indicates whether the true underlying regression is linear.
+        Default is ``False``.
+
+    random_state : int
+        Random seed for reproducibility.
+        Default is ``42``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
+       The oracle values contain the continuous treatment, the level bounds, the potential level, ITE
+       and the potential outcome without treatment.
+
+    """
+    if random_state is not None:
+        np.random.seed(random_state)
+    xi = kwargs.get('xi', 0.3)
+    c = kwargs.get('c', 0.0)
+    dim_x = kwargs.get('dim_x', 5)
+
+    if not isinstance(n_levels, int):
+        raise ValueError('n_levels must be an integer.')
+    if n_levels < 2:
+        raise ValueError('n_levels must be at least 2.')
+
+    # observed covariates
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+
+    def f_reg(w):
+        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
+        return res
+
+    def f_treatment(w, xi):
+        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
+        return res
+
+    def treatment_effect(d, scale=15):
+        return scale * (1 / (1 + np.exp(-d - 1.2 * np.cos(d)))) - 2
+
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+
+    # error terms
+    var_eps_y = 5
+    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
+    var_eps_d = 1
+    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
+
+    if linear:
+        g = f_reg(x)
+        m = f_treatment(x, xi)
+    else:
+        assert not linear
+        g = f_reg(z)
+        m = f_treatment(z, xi)
+
+    cont_d = m + eps_d
+    level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1))
+    potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1
+    eta = np.random.uniform(0, 1, size=n_obs)
+    d = 1.0 * (eta >= 1 / n_levels) * potential_level
+
+    ite = treatment_effect(cont_d)
+    y0 = g + eps_y
+    # only treated for d > 0 compared to the baseline
+    y = ite * (d > 0) + y0
+
+    oracle_values = {
+        'cont_d': cont_d,
+        'level_bounds': level_bounds,
+        'potential_level': potential_level,
+        'ite': ite,
+        'y0': y0,
+    }
+
+    resul_dict = {
+        'x': x,
+        'y': y,
+        'd': d,
+        'oracle_values': oracle_values
+    }
+
+    return resul_dict
+
+
+def make_logistic_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', **kwargs):
+    """
+    Generates synthetic data for a logistic partially linear regression model, as in Liu et al. (2021),
+    designed for use in double/debiased machine learning applications.
+
+    The data generating process is defined as follows:
+
+    - Covariates \( x_i \sim \mathcal{N}(0, \Sigma) \), where \( \Sigma_{kj} = 0.7^{|j-k|} \).
+    - Treatment \( d_i = a_0(x_i) \).
+    - Propensity score \( p_i = \sigma(\alpha d_i + r_0(x_i)) \), where \( \sigma(\cdot) \) is the logistic function.
+    - Outcome \( y_i \sim \text{Bernoulli}(p_i) \).
+
+    The nuisance functions are defined as:
+
+    .. math::
+
+        a_0(x_i) &= \frac{2}{1 + \exp(x_{i,1})} - \frac{2}{1 + \exp(x_{i,2})} + \sin(x_{i,3}) + \cos(x_{i,4}) \\
+        &+ 0.5 \cdot \mathbb{1}(x_{i,5} > 0) - 0.5 \cdot \mathbb{1}(x_{i,6} > 0) + 0.2 x_{i,7} x_{i,8} - 0.2 x_{i,9} x_{i,10} \\
+
+        r_0(x_i) &= 0.1 x_{i,1} x_{i,2} x_{i,3} + 0.1 x_{i,4} x_{i,5} + 0.1 x_{i,6}^3 - 0.5 \sin^2(x_{i,7}) \\
+        &+ 0.5 \cos(x_{i,8}) + \frac{1}{1 + x_{i,9}^2} - \frac{1}{1 + \exp(x_{i,10})} \\
+        &+ 0.25 \cdot \mathbb{1}(x_{i,11} > 0) - 0.25 \cdot \mathbb{1}(x_{i,13} > 0)
+
+    Parameters
+    ----------
+    n_obs : int
+        Number of observations to simulate.
+    dim_x : int
+        Number of covariates.
+    alpha : float
+        Value of the causal parameter.
+    return_type : str
+        Determines the return format. One of:
+
+        - 'DoubleMLData' or DoubleMLData: returns a ``DoubleMLData`` object.
+        - 'DataFrame', 'pd.DataFrame' or pd.DataFrame: returns a ``pandas.DataFrame``.
+        - 'array', 'np.ndarray', 'np.array' or np.ndarray: returns tuple of numpy arrays (x, y, d, p).
+
+    **kwargs
+        Optional keyword arguments (currently unused in this implementation).
+
+    Returns
+    -------
+    Union[DoubleMLData, pd.DataFrame, Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]
+        The generated data in the specified format.
+
+    References
+    ----------
+    Liu, Molei, Yi Zhang, and Doudou Zhou. 2021.
+    "Double/Debiased Machine Learning for Logistic Partially Linear Model."
+    The Econometrics Journal 24 (3): 559–88. https://doi.org/10.1093/ectj/utab019.
+
+    """
+
+    def r_0(X):
+        return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
+            0.1 * X[:, 3] * X[:, 4] + \
+            0.1 * X[:, 5] ** 3 + \
+            -0.5 * np.sin(X[:, 6]) ** 2 + \
+            0.5 * np.cos(X[:, 7]) + \
+            1 / (1 + X[:, 8] ** 2) + \
+            -1 / (1 + np.exp(X[:, 9])) + \
+            0.25 * np.where(X[:, 10] > 0, 1, 0) + \
+            -0.25 * np.where(X[:, 12] > 0, 1, 0)
+
+    def a_0(X):
+        return 2 / (1 + np.exp(X[:, 0])) + \
+            -2 / (1 + np.exp(X[:, 1])) + \
+            1 * np.sin(X[:, 2]) + \
+            1 * np.cos(X[:, 3]) + \
+            0.5 * np.where(X[:, 4] > 0, 1, 0) + \
+            -0.5 * np.where(X[:, 5] > 0, 1, 0) + \
+            0.2 * X[:, 6] * X[:, 7] + \
+            -0.2 * X[:, 8] * X[:, 9]
+
+
+    sigma = np.full((dim_x, dim_x), 0.2)
+    np.fill_diagonal(sigma, 1)
+
+    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=n_obs)
+    np.clip(x, -2, 2, out=x)
+
+    d = a_0(x)
+
+    p = expit(alpha * d[:] + r_0(x))
+
+    y = np.random.binomial(1, p)
+
+    if return_type in _array_alias:
+        return x, y, d, p
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, p)),
+                            columns=x_cols + ['y', 'd', 'p'])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, 'y', 'd', x_cols, p_cols='p')
+    else:
+        raise ValueError('Invalid return_type.')
diff --git a/doubleml/double_ml_data.py b/doubleml/double_ml_data.py
index fdee739dd..35c9af651 100644
--- a/doubleml/double_ml_data.py
+++ b/doubleml/double_ml_data.py
@@ -113,6 +113,10 @@ class DoubleMLData(DoubleMLBaseData):
         The score or selection variable (only relevant/used for RDD or SSM Estimatiors).
         Default is ``None``.
 
+    p_cols : None, str or list, optional
+        The column(s) containing the probabilities of the outcome (only for simulated, binary data).
+        Default is ``None``.
+
     use_other_treat_as_covariate : bool
         Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
         Default is ``True``.
@@ -145,6 +149,7 @@ def __init__(self,
                  z_cols=None,
                  t_col=None,
                  s_col=None,
+                 p_cols=None,
                  use_other_treat_as_covariate=True,
                  force_all_x_finite=True):
         DoubleMLBaseData.__init__(self, data)
@@ -155,6 +160,7 @@ def __init__(self,
         self.t_col = t_col
         self.s_col = s_col
         self.x_cols = x_cols
+        self.p_cols = p_cols
         self._check_disjoint_sets_y_d_x_z_t_s()
         self.use_other_treat_as_covariate = use_other_treat_as_covariate
         self.force_all_x_finite = force_all_x_finite
@@ -187,7 +193,7 @@ def _data_summary_str(self):
         return data_summary
 
     @classmethod
-    def from_arrays(cls, x, y, d, z=None, t=None, s=None, use_other_treat_as_covariate=True,
+    def from_arrays(cls, x, y, d, z=None, t=None, s=None, p=None, use_other_treat_as_covariate=True,
                     force_all_x_finite=True):
         """
         Initialize :class:`DoubleMLData` from :class:`numpy.ndarray`'s.
@@ -215,6 +221,10 @@ def from_arrays(cls, x, y, d, z=None, t=None, s=None, use_other_treat_as_covaria
             Array of the score or selection variable (only relevant/used for RDD and SSM models).
             Default is ``None``.
 
+        p : None or :class:`numpy.ndarray`
+            Array of the probabilities of the outcome (only for simulated, binary data).
+            Default is ``None``.
+
         use_other_treat_as_covariate : bool
             Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
             Default is ``True``.
@@ -299,7 +309,13 @@ def from_arrays(cls, x, y, d, z=None, t=None, s=None, use_other_treat_as_covaria
         if s is not None:
             data[s_col] = s
 
-        return cls(data, y_col, d_cols, x_cols, z_cols, t_col, s_col, use_other_treat_as_covariate, force_all_x_finite)
+        if p is not None:
+            if p.shape[1] == 1:
+                d_cols = ['p']
+            else:
+                d_cols = [f'p{i + 1}' for i in np.arange(p.shape[1])]
+
+        return cls(data, y_col, d_cols, x_cols, z_cols, t_col, s_col, p_cols, use_other_treat_as_covariate, force_all_x_finite)
 
     @property
     def x(self):
@@ -358,6 +374,41 @@ def s(self):
         else:
             return None
 
+    @property
+    def p_cols(self):
+        """
+        The column(s) containing the probabilities of the outcome (only for simulated data).
+        """
+        return self._p_cols
+
+    @p_cols.setter
+    def p_cols(self, value):
+        if value is not None:
+            if isinstance(value, str):
+                value = [value]
+            if not isinstance(value, list):
+                raise TypeError('The probability column(s) p_cols must be of str or list type (or None). '
+                                f'{str(value)} of type {str(type(value))} was passed.')
+            if not len(set(value)) == len(value):
+                raise ValueError('Invalid probability column(s) p_cols: '
+                                 'Contains duplicate values.')
+            if not set(value).issubset(set(self.all_variables)):
+                raise ValueError('Invalid probability column(s) p_cols. '
+                                 'At least one probability column is not a data column.')
+            self._p_cols = value
+        else:
+            self._p_cols = None
+
+    @property
+    def p(self):
+        """
+        Array of probabilities of the outcome (only for simulated data).
+        """
+        if self.p_cols is not None:
+            return self._p.values
+        else:
+            return None
+
     @property
     def n_treat(self):
         """
diff --git a/doubleml/plm/__init__.py b/doubleml/plm/__init__.py
index e81f00c52..88ff26a8a 100644
--- a/doubleml/plm/__init__.py
+++ b/doubleml/plm/__init__.py
@@ -8,4 +8,5 @@
 __all__ = [
     "DoubleMLPLR",
     "DoubleMLPLIV",
+    "DoubleMLLogit"
 ]
diff --git a/doubleml/logistic/logistic.py b/doubleml/plm/logistic.py
similarity index 87%
rename from doubleml/logistic/logistic.py
rename to doubleml/plm/logistic.py
index ab10ceb87..d48fb29d3 100644
--- a/doubleml/logistic/logistic.py
+++ b/doubleml/plm/logistic.py
@@ -1,5 +1,9 @@
+import inspect
+
 import numpy as np
-from ..utils._estimation import (
+from torch.sparse import sampled_addmm
+
+from doubleml.utils._estimation import (
     _dml_cv_predict,
     _trimm,
     _predict_zero_one_propensity,
@@ -15,12 +19,12 @@
 import scipy
 from sklearn.utils.multiclass import type_of_target
 
-from .. import DoubleMLData
-from ..double_ml import DoubleML
-from ..double_ml_score_mixins import NonLinearScoreMixin
-from ..utils import DoubleMLClusterResampling
-from ..utils._checks import _check_score, _check_finite_predictions, _check_is_propensity
-from ..utils.resampling import DoubleMLDoubleResampling
+from doubleml import DoubleMLData
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_score_mixins import NonLinearScoreMixin
+from doubleml.utils import DoubleMLClusterResampling
+from doubleml.utils._checks import _check_score, _check_finite_predictions, _check_is_propensity
+from doubleml.utils.resampling import DoubleMLDoubleResampling
 
 
 
@@ -61,7 +65,7 @@ class DoubleMLLogit(NonLinearScoreMixin, DoubleML):
         Default is ``1``.
 
     score : str or callable
-        A str (``'partialling out'`` or ``'IV-type'``) specifying the score function
+        A str (``'nuisance_space'`` or ``'instrument'``) specifying the score function
         or a callable object / function with signature ``psi_a, psi_b = score(y, d, l_hat, m_hat, g_hat, smpls)``.
         Default is ``'partialling out'``.
 
@@ -103,14 +107,14 @@ class DoubleMLLogit(NonLinearScoreMixin, DoubleML):
 
     def __init__(self,
                  obj_dml_data,
-                 ml_m,
                  ml_M,
                  ml_t,
+                 ml_m,
                  ml_a=None,
                  n_folds=5,
                  n_folds_inner=5,
                  n_rep=1,
-                 score='logistic',
+                 score='nuisance_space',
                  draw_sample_splitting=True):
         self.n_folds_inner = n_folds_inner
         super().__init__(obj_dml_data,
@@ -122,12 +126,16 @@ def __init__(self,
         self._coef_start_val = 1.0
 
         self._check_data(self._dml_data)
-        valid_scores = ['logistic']
+        valid_scores = ['nuisance_space', 'instrument']
         _check_score(self.score, valid_scores, allow_callable=True)
 
         _ = self._check_learner(ml_t, 'ml_t', regressor=True, classifier=False)
         _ = self._check_learner(ml_M, 'ml_M', regressor=False, classifier=True)
-        ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=True)
+
+        if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
+            ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
+        else:
+            ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=False)
         self._learner = {'ml_m': ml_m, 'ml_t': ml_t, 'ml_M': ml_M}
 
         if ml_a is not None:
@@ -157,6 +165,11 @@ def __init__(self,
         else:
             self._predict_method['ml_a'] = 'predict'
 
+        if score == 'instrument':
+            sig = inspect.signature(self.learner['ml_a'].fit)
+            if not 'sample_weight' in sig.parameters:
+                raise ValueError('Learner \"ml_a\" who supports sample_weight is required for score type \"instrument\"')
+
         self._initialize_ml_nuisance_params()
         self._external_predictions_implemented = True
 
@@ -174,7 +187,7 @@ def _check_data(self, obj_dml_data):
 
 
     def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, smpls_inner=None,
-                    n_jobs=None, est_params=None, method='predict'):
+                    n_jobs=None, est_params=None, method='predict', sample_weights=None):
         res = {}
         res['preds'] = np.zeros(y.shape, dtype=float)
         res['preds_inner'] = []
@@ -182,7 +195,7 @@ def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, s
         for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
             res_inner = _dml_cv_predict(estimator, x, y, smpls=smpls_double_split, n_jobs=n_jobs,
                                     est_params=est_params, method=method,
-                                    return_models=True, smpls_is_partition=True)
+                                    return_models=True, smpls_is_partition=True, sample_weights=sample_weights)
             _check_finite_predictions(res_inner['preds'], estimator, estimator_name, smpls_double_split)
 
             res['preds_inner'].append(res_inner['preds'])
@@ -214,19 +227,41 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         else:
             a_external = False
 
+        if M_external:
+            M_hat = {'preds': external_predictions['ml_M'],
+                     'targets': None,
+                     'models': None}
+        else:
+            M_hat = (self._double_dml_cv_predict(self._learner['ml_M'], 'ml_M', x_d_concat, y, smpls=smpls, smpls_inner=self.__smpls__inner,
+                                                n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_M'), method=self._predict_method['ml_M']))
+
+        # TODO
+        #if self._score_type == "instrument":
+
+
         # nuisance m
         if m_external:
             m_hat = {'preds': external_predictions['ml_m'],
                      'targets': None,
                      'models': None}
         else:
-            filtered_smpls = []
-            for train, test in smpls:
-                train_filtered = train[y[train] == 0]
-                filtered_smpls.append((train_filtered, test))
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=filtered_smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
+            if self.score == 'instrument':
+                weights = []
+                for i, (train, test) in enumerate(smpls):
+                    weights.append( M_hat['preds_inner'][i][train] * (1-M_hat['preds_inner'][i][train]))
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models, weights=weights)
+
+            else:
+                filtered_smpls = []
+                for train, test in smpls:
+                    train_filtered = train[y[train] == 0]
+                    filtered_smpls.append((train_filtered, test))
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=filtered_smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
             _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
 
         if self._check_learner(self._learner['ml_m'], 'ml_m', regressor=True, classifier=True):
@@ -242,14 +277,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                  'probabilities and not labels are predicted.')
 
 
-        if M_external:
-            M_hat = {'preds': external_predictions['ml_M'],
-                     'targets': None,
-                     'models': None}
-        else:
-            M_hat = (self._double_dml_cv_predict(self._learner['ml_M'], 'ml_M', x_d_concat, y, smpls=smpls, smpls_inner=self.__smpls__inner,
-                                                n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_M'), method=self._predict_method['ml_M']))
+
 
         if a_external:
             a_hat = {'preds': external_predictions['ml_a'],
@@ -456,15 +484,22 @@ def set_sample_splitting(self):
 
     def _compute_score(self, psi_elements, coef):
 
-        score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d_tilde"]
-
+        if self._score_type == 'nuisance_space':
+            score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d_tilde"]
+            score = psi_elements["psi_hat"] * (score_1 - psi_elements["score_const"])
+        else:
+            score = (psi_elements["y"] - np.exp(coef * psi_elements["d"]+ psi_elements["r_hat"])) * psi_elements["d_tilde"]
 
-        return psi_elements["psi_hat"] * (score_1 - psi_elements["score_const"])
+        return score
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
-        deriv_1 = - psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d"]
+        if self._score_type == 'nuisance_space':
+            deriv_1 = - psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d"]
+            deriv = psi_elements["psi_hat"] * psi_elements["d_tilde"] *  deriv_1
+        else:
+            deriv = - psi_elements["d"] * np.exp(coef * psi_elements["d"]+ psi_elements["r_hat"]) * psi_elements["d_tilde"]
 
-        return psi_elements["psi_hat"] * psi_elements["d_tilde"] *  deriv_1
+        return deriv
 
 
     def cate(self, basis, is_gate=False):
diff --git a/doubleml/logistic/tests/_utils_logistic_manual.py b/doubleml/plm/tests/_utils_logistic_manual.py
similarity index 87%
rename from doubleml/logistic/tests/_utils_logistic_manual.py
rename to doubleml/plm/tests/_utils_logistic_manual.py
index ae53992a6..af4d034eb 100644
--- a/doubleml/logistic/tests/_utils_logistic_manual.py
+++ b/doubleml/plm/tests/_utils_logistic_manual.py
@@ -2,8 +2,8 @@
 import scipy
 from sklearn.base import clone, is_classifier
 
-from ...tests._utils_boot import boot_manual, draw_weights
-from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
+from doubleml.tests._utils_boot import boot_manual, draw_weights
+from doubleml.tests._utils import fit_predict, fit_predict_proba, tune_grid_search
 
 
 def fit_logistic_multitreat(y, x, d, learner_l, learner_m, learner_g, all_smpls, score,
@@ -155,32 +155,6 @@ def fit_nuisance_logistic_classifier(y, x, d, learner_l, learner_m, learner_g, s
     return l_hat, m_hat, g_hat
 
 
-def tune_nuisance_plr(y, x, d, ml_l, ml_m, ml_g, smpls, n_folds_tune, param_grid_l, param_grid_m, param_grid_g, tune_g=True):
-    l_tune_res = tune_grid_search(y, x, ml_l, smpls, param_grid_l, n_folds_tune)
-
-    m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
-
-    if tune_g:
-        l_hat = np.full_like(y, np.nan)
-        m_hat = np.full_like(d, np.nan)
-        for idx, (train_index, _) in enumerate(smpls):
-            l_hat[train_index] = l_tune_res[idx].predict(x[train_index, :])
-            m_hat[train_index] = m_tune_res[idx].predict(x[train_index, :])
-        psi_a = -np.multiply(d - m_hat, d - m_hat)
-        psi_b = np.multiply(d - m_hat, y - l_hat)
-        theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
-
-        g_tune_res = tune_grid_search(y - theta_initial*d, x, ml_g, smpls, param_grid_g, n_folds_tune)
-        g_best_params = [xx.best_params_ for xx in g_tune_res]
-    else:
-        g_best_params = []
-
-    l_best_params = [xx.best_params_ for xx in l_tune_res]
-    m_best_params = [xx.best_params_ for xx in m_tune_res]
-
-    return l_best_params, m_best_params, g_best_params
-
-
 def compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls):
     y_minus_l_hat = np.full_like(y, np.nan, dtype='float64')
     d_minus_m_hat = np.full_like(d, np.nan, dtype='float64')
@@ -193,13 +167,6 @@ def compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls):
     return y_minus_l_hat, d_minus_m_hat, y_minus_g_hat
 
 
-def plr_dml2(y, x, d, l_hat, m_hat, g_hat, smpls, score):
-    n_obs = len(y)
-    y_minus_l_hat, d_minus_m_hat, y_minus_g_hat = compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls)
-    theta_hat = plr_orth(y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, d, score)
-    se = np.sqrt(var_plr(theta_hat, d, y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, score, n_obs))
-
-    return theta_hat, se
 
 
 def var_plr(theta, d, y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, score, n_obs):
diff --git a/doubleml/logistic/tests/tests_logistic.py b/doubleml/plm/tests/tests_logistic.py
similarity index 85%
rename from doubleml/logistic/tests/tests_logistic.py
rename to doubleml/plm/tests/tests_logistic.py
index 2b97bf76b..a77db7a67 100644
--- a/doubleml/logistic/tests/tests_logistic.py
+++ b/doubleml/plm/tests/tests_logistic.py
@@ -11,8 +11,8 @@
 
 import doubleml as dml
 
-from ...tests._utils import draw_smpls
-from ._utils_logistic_manual import fit_logistic, , boot_plr
+from doubleml.tests._utils import draw_smpls
+from ._utils_logistic_manual import fit_logistic, boot_plr
 
 
 @pytest.fixture(scope='module',
@@ -304,49 +304,4 @@ def test_dml_plr_ols_manual_boot(dml_plr_ols_manual_fixture):
 @pytest.fixture(scope='module',
                 params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
 def cov_type(request):
-    return request.param
-
-
-@pytest.mark.ci
-def test_dml_plr_cate_gate(score, cov_type):
-    n = 9
-
-    # collect data
-    np.random.seed(42)
-    obj_dml_data = dml.datasets.make_plr_CCDDHNR2018(n_obs=n)
-    ml_l = LinearRegression()
-    ml_g = LinearRegression()
-    ml_m = LinearRegression()
-
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_g, ml_m, ml_l,
-                                  n_folds=2,
-                                  score=score)
-    dml_plr_obj.fit()
-    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
-    cate = dml_plr_obj.cate(random_basis, cov_type=cov_type)
-    assert isinstance(cate, dml.DoubleMLBLP)
-    assert isinstance(cate.confint(), pd.DataFrame)
-    assert cate.blp_model.cov_type == cov_type
-
-    groups_1 = pd.DataFrame(
-        np.column_stack([obj_dml_data.data['X1'] <= 0,
-                         obj_dml_data.data['X1'] > 0.2]),
-        columns=['Group 1', 'Group 2'])
-    msg = ('At least one group effect is estimated with less than 6 observations.')
-    with pytest.warns(UserWarning, match=msg):
-        gate_1 = dml_plr_obj.gate(groups_1, cov_type=cov_type)
-    assert isinstance(gate_1, dml.utils.blp.DoubleMLBLP)
-    assert isinstance(gate_1.confint(), pd.DataFrame)
-    assert all(gate_1.confint().index == groups_1.columns.tolist())
-    assert gate_1.blp_model.cov_type == cov_type
-
-    np.random.seed(42)
-    groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
-    msg = ('At least one group effect is estimated with less than 6 observations.')
-    with pytest.warns(UserWarning, match=msg):
-        gate_2 = dml_plr_obj.gate(groups_2, cov_type=cov_type)
-    assert isinstance(gate_2, dml.utils.blp.DoubleMLBLP)
-    assert isinstance(gate_2.confint(), pd.DataFrame)
-    assert all(gate_2.confint().index == ["Group_1", "Group_2"])
-    assert gate_2.blp_model.cov_type == cov_type
+    return request.param
\ No newline at end of file
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index 3ed110f3c..6029dfd97 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -43,9 +43,9 @@ def _fit(estimator, x, y, train_index, idx=None):
     return estimator, idx
 
 
-def _dml_cv_predict(
-    estimator, x, y, smpls=None, n_jobs=None, est_params=None, method="predict", return_train_preds=False, return_models=False
-, smpls_is_partition=None):
+def _dml_cv_predict(estimator, x, y, smpls=None,
+                    n_jobs=None, est_params=None, method='predict', return_train_preds=False, return_models=False,
+                    smpls_is_partition=None, sample_weights=None):
     n_obs = x.shape[0]
 
     # TODO: Better name for smples_is_partition
@@ -53,9 +53,9 @@ def _dml_cv_predict(
         smpls_is_partition = _check_is_partition(smpls, n_obs)
     fold_specific_params = (est_params is not None) & (not isinstance(est_params, dict))
     fold_specific_target = isinstance(y, list)
-    manual_cv_predict = (
-        (not smpls_is_partition) | return_train_preds | fold_specific_params | fold_specific_target | return_models
-    )
+    manual_cv_predict = (not smpls_is_partition) | return_train_preds | fold_specific_params | fold_specific_target \
+                        | return_models | bool(sample_weights)
+    #TODO: Check if cross_val_predict supports weights
 
     res = {"models": None}
     if not manual_cv_predict:
@@ -187,6 +187,22 @@ def _draw_weights(method, n_rep_boot, n_obs):
     return weights
 
 
+def _trimm(preds, trimming_rule, trimming_threshold):
+    if trimming_rule == 'truncate':
+        preds[preds < trimming_threshold] = trimming_threshold
+        preds[preds > 1 - trimming_threshold] = 1 - trimming_threshold
+    return preds
+
+
+def _normalize_ipw(propensity, treatment):
+    mean_treat1 = np.mean(np.divide(treatment, propensity))
+    mean_treat0 = np.mean(np.divide(1.0 - treatment, 1.0 - propensity))
+    normalized_weights = np.multiply(treatment, np.multiply(propensity, mean_treat1)) \
+                         + np.multiply(1.0 - treatment, 1.0 - np.multiply(1.0 - propensity, mean_treat0))
+
+    return normalized_weights
+
+
 def _rmse(y_true, y_pred):
     subset = np.logical_not(np.isnan(y_true))
     rmse = root_mean_squared_error(y_true[subset], y_pred[subset])
@@ -302,7 +318,7 @@ def _var_est(psi, psi_deriv, smpls, is_cluster_data, cluster_vars=None, smpls_cl
                 J_l = test_cluster_inds[1]
                 const = np.divide(min(len(I_k), len(J_l)), (np.square(len(I_k) * len(J_l))))
                 for cluster_value in I_k:
-                    ind_cluster = (first_cluster_var == cluster_value) & np.isin(second_cluster_var, J_l)
+                    ind_cluster = (first_cluster_var == cluster_value) & np.in1d(second_cluster_var, J_l)
                     gamma_hat += const * np.sum(np.outer(psi[ind_cluster], psi[ind_cluster]))
                 for cluster_value in J_l:
                     ind_cluster = (second_cluster_var == cluster_value) & np.isin(first_cluster_var, I_k)

From c6e600d2f67abf33aa59d8f074453c49ebd60c77 Mon Sep 17 00:00:00 2001
From: bbd5721 <julius.herzig@uni-hamburg.de>
Date: Wed, 27 Aug 2025 19:18:16 +0200
Subject: [PATCH 07/48] Fixed bug in score computation

---
 doubleml/double_ml_data.py |  14 ++-
 doubleml/plm/logistic.py   | 183 ++++++++-----------------------------
 2 files changed, 49 insertions(+), 148 deletions(-)

diff --git a/doubleml/double_ml_data.py b/doubleml/double_ml_data.py
index 35c9af651..612e6b7f0 100644
--- a/doubleml/double_ml_data.py
+++ b/doubleml/double_ml_data.py
@@ -288,6 +288,15 @@ def from_arrays(cls, x, y, d, z=None, t=None, s=None, p=None, use_other_treat_as
             check_consistent_length(x, y, d, s)
             s_col = 's'
 
+
+        if p is None:
+            p_cols = None
+        else:
+            if p.shape[1] == 1:
+                p_cols = ['p']
+            else:
+                p_cols = [f'p{i + 1}' for i in np.arange(p.shape[1])]
+
         if d.shape[1] == 1:
             d_cols = ['d']
         else:
@@ -310,10 +319,7 @@ def from_arrays(cls, x, y, d, z=None, t=None, s=None, p=None, use_other_treat_as
             data[s_col] = s
 
         if p is not None:
-            if p.shape[1] == 1:
-                d_cols = ['p']
-            else:
-                d_cols = [f'p{i + 1}' for i in np.arange(p.shape[1])]
+            data[p_cols] = p
 
         return cls(data, y_col, d_cols, x_cols, z_cols, t_col, s_col, p_cols, use_other_treat_as_covariate, force_all_x_finite)
 
diff --git a/doubleml/plm/logistic.py b/doubleml/plm/logistic.py
index d48fb29d3..3e04d15d5 100644
--- a/doubleml/plm/logistic.py
+++ b/doubleml/plm/logistic.py
@@ -215,9 +215,9 @@ def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, s
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         # TODO: How to deal with smpls_inner?
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
+                         ensure_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+                         ensure_all_finite=False)
         x_d_concat = np.hstack((d.reshape(-1,1), x))
         m_external = external_predictions['ml_m'] is not None
         M_external = external_predictions['ml_M'] is not None
@@ -236,9 +236,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                                 n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_M'), method=self._predict_method['ml_M']))
 
-        # TODO
-        #if self._score_type == "instrument":
-
 
         # nuisance m
         if m_external:
@@ -254,7 +251,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                         est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
                                         return_models=return_models, weights=weights)
 
-            else:
+            elif self.score == 'nuisance_space':
                 filtered_smpls = []
                 for train, test in smpls:
                     train_filtered = train[y[train] == 0]
@@ -262,6 +259,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=filtered_smpls, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
                                         return_models=return_models)
+            else:
+                raise NotImplementedError
             _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
 
         if self._check_learner(self._learner['ml_m'], 'ml_m', regressor=True, classifier=True):
@@ -288,31 +287,32 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                                 n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_a'), method=self._predict_method['ml_a']))
 
-        # r_legacy = np.zeros_like(y)
-        # smpls_inner = self.__smpls__inner
-        # M_hat = {}
-        # a_hat = {}
-        # M_hat['preds_inner'] = []
-        # M_hat['preds'] = np.full_like(y, np.nan)
-        # a_hat['preds_inner'] = []
-        # a_hat['preds'] = np.full_like(y, np.nan)
-        # for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
-        #     test = smpls_single_split[1]
-        #     train = smpls_single_split[0]
-        #     # r_legacy[test] =
-        #     Mleg, aleg, a_nf_leg = self.legacy_implementation(y[train], x[train], d[train], x[test], d[test],
-        #                                                       self._learner['ml_m'], self._learner['ml_M'],
-        #                                                       smpls_single_split, smpls_double_split, y, x, d,
-        #                                                       x_d_concat, n_jobs_cv)
-        #     Mtemp = np.full_like(y, np.nan)
-        #     Mtemp[train] = Mleg
-        #     Atemp = np.full_like(y, np.nan)
-        #     Atemp[train] = aleg
-        #     M_hat['preds_inner'].append(Mtemp)
-        #     a_hat['preds_inner'].append(Atemp)
-        #     a_hat['preds'][test] = a_nf_leg
-        #
-        # #r_hat['preds'] = r_legacy
+
+        r_legacy = np.zeros_like(y)
+        smpls_inner = self.__smpls__inner
+        M_hat_l = {}
+        a_hat_l = {}
+        M_hat_l['preds_inner'] = []
+        M_hat_l['preds'] = np.full_like(y, np.nan)
+        a_hat_l['preds_inner'] = []
+        a_hat_l['preds'] = np.full_like(y, np.nan)
+        for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
+            test = smpls_single_split[1]
+            train = smpls_single_split[0]
+            # r_legacy[test] =
+            Mleg, aleg, a_nf_leg = self.legacy_implementation(y[train], x[train], d[train], x[test], d[test],
+                                                              self._learner['ml_m'], self._learner['ml_M'],
+                                                              smpls_single_split, smpls_double_split, y, x, d,
+                                                              x_d_concat, n_jobs_cv)
+            Mtemp = np.full_like(y, np.nan)
+            Mtemp[train] = Mleg
+            Atemp = np.full_like(y, np.nan)
+            Atemp[train] = aleg
+            M_hat_l['preds_inner'].append(Mtemp)
+            a_hat_l['preds_inner'].append(Atemp)
+            a_hat_l['preds'][test] = a_nf_leg
+
+        #r_hat['preds'] = r_legacy
 
 
 
@@ -343,10 +343,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         r_hat = {}
         r_hat['preds'] = t_hat['preds'] - beta * a_hat['preds']
 
-
-
-
-
         psi_elements = self._score_elements(y, d, r_hat['preds'], m_hat['preds'])
 
         preds = {'predictions': {'ml_r': r_hat['preds'],
@@ -484,124 +480,23 @@ def set_sample_splitting(self):
 
     def _compute_score(self, psi_elements, coef):
 
-        if self._score_type == 'nuisance_space':
+        if self.score == 'nuisance_space':
             score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d_tilde"]
             score = psi_elements["psi_hat"] * (score_1 - psi_elements["score_const"])
-        else:
+        elif self.score == 'instrument':
             score = (psi_elements["y"] - np.exp(coef * psi_elements["d"]+ psi_elements["r_hat"])) * psi_elements["d_tilde"]
+        else:
+            raise NotImplementedError
 
         return score
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
-        if self._score_type == 'nuisance_space':
+        if self.score == 'nuisance_space':
             deriv_1 = - psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d"]
             deriv = psi_elements["psi_hat"] * psi_elements["d_tilde"] *  deriv_1
-        else:
+        elif self.score == 'instrument':
             deriv = - psi_elements["d"] * np.exp(coef * psi_elements["d"]+ psi_elements["r_hat"]) * psi_elements["d_tilde"]
-
-        return deriv
-
-
-    def cate(self, basis, is_gate=False):
-        """
-        Calculate conditional average treatment effects (CATE) for a given basis.
-
-        Parameters
-        ----------
-        basis : :class:`pandas.DataFrame`
-            The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
-            where ``n_obs`` is the number of observations and ``d`` is the number of predictors.
-        is_gate : bool
-            Indicates whether the basis is constructed for GATEs (dummy-basis).
-            Default is ``False``.
-
-        Returns
-        -------
-        model : :class:`doubleML.DoubleMLBLP`
-            Best linear Predictor model.
-        """
-        if self._dml_data.n_treat > 1:
-            raise NotImplementedError('Only implemented for single treatment. ' +
-                                      f'Number of treatments is {str(self._dml_data.n_treat)}.')
-        if self.n_rep != 1:
-            raise NotImplementedError('Only implemented for one repetition. ' +
-                                      f'Number of repetitions is {str(self.n_rep)}.')
-
-        Y_tilde, D_tilde = self._partial_out()
-
-        D_basis = basis * D_tilde
-        model = DoublelMLBLP(
-            orth_signal=Y_tilde.reshape(-1),
-            basis=D_basis,
-            is_gate=is_gate,
-        )
-        model.fit()
-
-        ## TODO: Solve score
-
-
-        return model
-
-    def gate(self, groups):
-        """
-        Calculate group average treatment effects (GATE) for groups.
-
-        Parameters
-        ----------
-        groups : :class:`pandas.DataFrame`
-            The group indicator for estimating the best linear predictor. Groups should be mutually exclusive.
-            Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
-            and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).
-
-        Returns
-        -------
-        model : :class:`doubleML.DoubleMLBLP`
-            Best linear Predictor model for Group Effects.
-        """
-
-        if not isinstance(groups, pd.DataFrame):
-            raise TypeError('Groups must be of DataFrame type. '
-                            f'Groups of type {str(type(groups))} was passed.')
-        if not all(groups.dtypes == bool) or all(groups.dtypes == int):
-            if groups.shape[1] == 1:
-                groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
-            else:
-                raise TypeError('Columns of groups must be of bool type or int type (dummy coded). '
-                                'Alternatively, groups should only contain one column.')
-
-        if any(groups.sum(0) <= 5):
-            warnings.warn('At least one group effect is estimated with less than 6 observations.')
-
-        model = self.cate(groups, is_gate=True)
-        return model
-
-    def _partial_out(self):
-        """
-        Helper function. Returns the partialled out quantities of Y and D.
-        Works with multiple repetitions.
-
-        Returns
-        -------
-        Y_tilde : :class:`numpy.ndarray`
-            The residual of the regression of Y on X.
-        D_tilde : :class:`numpy.ndarray`
-            The residual of the regression of D on X.
-        """
-        if self.predictions is None:
-            raise ValueError('predictions are None. Call .fit(store_predictions=True) to store the predictions.')
-
-        y = self._dml_data.y.reshape(-1, 1)
-        d = self._dml_data.d.reshape(-1, 1)
-        ml_m = self.predictions["ml_m"].squeeze(axis=2)
-
-        if self.score == "partialling out":
-            ml_l = self.predictions["ml_l"].squeeze(axis=2)
-            Y_tilde = y - ml_l
-            D_tilde = d - ml_m
         else:
-            assert self.score == "IV-type"
-            ml_g = self.predictions["ml_g"].squeeze(axis=2)
-            Y_tilde = y - (self.coef * ml_m) - ml_g
-            D_tilde = d - ml_m
+            raise NotImplementedError
 
-        return Y_tilde, D_tilde
\ No newline at end of file
+        return deriv
\ No newline at end of file

From 6f556e02caaf3e39e8b11e2655361178305ca183 Mon Sep 17 00:00:00 2001
From: bbd5721 <julius.herzig@uni-hamburg.de>
Date: Wed, 27 Aug 2025 22:02:40 +0200
Subject: [PATCH 08/48] Reverted from ensure_all_finite to force_all_finite

---
 doubleml/plm/logistic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/plm/logistic.py b/doubleml/plm/logistic.py
index 3e04d15d5..a716497d2 100644
--- a/doubleml/plm/logistic.py
+++ b/doubleml/plm/logistic.py
@@ -215,9 +215,9 @@ def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, s
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         # TODO: How to deal with smpls_inner?
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         ensure_all_finite=False)
+                         force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
-                         ensure_all_finite=False)
+                         force_all_finite=False)
         x_d_concat = np.hstack((d.reshape(-1,1), x))
         m_external = external_predictions['ml_m'] is not None
         M_external = external_predictions['ml_M'] is not None

From 3a332bf91e97af94780805130f21b7688238d29d Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Thu, 28 Aug 2025 15:59:29 +0200
Subject: [PATCH 09/48] Fixes to instrument score

---
 doubleml/plm/logistic.py | 53 ++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/doubleml/plm/logistic.py b/doubleml/plm/logistic.py
index a716497d2..e19fc1e40 100644
--- a/doubleml/plm/logistic.py
+++ b/doubleml/plm/logistic.py
@@ -249,7 +249,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     weights.append( M_hat['preds_inner'][i][train] * (1-M_hat['preds_inner'][i][train]))
                 m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                        return_models=return_models, weights=weights)
+                                        return_models=return_models, sample_weights=weights)
 
             elif self.score == 'nuisance_space':
                 filtered_smpls = []
@@ -288,29 +288,29 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                     est_params=self._get_params('ml_a'), method=self._predict_method['ml_a']))
 
 
-        r_legacy = np.zeros_like(y)
-        smpls_inner = self.__smpls__inner
-        M_hat_l = {}
-        a_hat_l = {}
-        M_hat_l['preds_inner'] = []
-        M_hat_l['preds'] = np.full_like(y, np.nan)
-        a_hat_l['preds_inner'] = []
-        a_hat_l['preds'] = np.full_like(y, np.nan)
-        for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
-            test = smpls_single_split[1]
-            train = smpls_single_split[0]
-            # r_legacy[test] =
-            Mleg, aleg, a_nf_leg = self.legacy_implementation(y[train], x[train], d[train], x[test], d[test],
-                                                              self._learner['ml_m'], self._learner['ml_M'],
-                                                              smpls_single_split, smpls_double_split, y, x, d,
-                                                              x_d_concat, n_jobs_cv)
-            Mtemp = np.full_like(y, np.nan)
-            Mtemp[train] = Mleg
-            Atemp = np.full_like(y, np.nan)
-            Atemp[train] = aleg
-            M_hat_l['preds_inner'].append(Mtemp)
-            a_hat_l['preds_inner'].append(Atemp)
-            a_hat_l['preds'][test] = a_nf_leg
+        # r_legacy = np.zeros_like(y)
+        # smpls_inner = self.__smpls__inner
+        # M_hat_l = {}
+        # a_hat_l = {}
+        # M_hat_l['preds_inner'] = []
+        # M_hat_l['preds'] = np.full_like(y, np.nan)
+        # a_hat_l['preds_inner'] = []
+        # a_hat_l['preds'] = np.full_like(y, np.nan)
+        # for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
+        #     test = smpls_single_split[1]
+        #     train = smpls_single_split[0]
+        #     # r_legacy[test] =
+        #     Mleg, aleg, a_nf_leg = self.legacy_implementation(y[train], x[train], d[train], x[test], d[test],
+        #                                                       self._learner['ml_m'], self._learner['ml_M'],
+        #                                                       smpls_single_split, smpls_double_split, y, x, d,
+        #                                                       x_d_concat, n_jobs_cv)
+        #     Mtemp = np.full_like(y, np.nan)
+        #     Mtemp[train] = Mleg
+        #     Atemp = np.full_like(y, np.nan)
+        #     Atemp[train] = aleg
+        #     M_hat_l['preds_inner'].append(Mtemp)
+        #     a_hat_l['preds_inner'].append(Atemp)
+        #     a_hat_l['preds'][test] = a_nf_leg
 
         #r_hat['preds'] = r_legacy
 
@@ -484,7 +484,7 @@ def _compute_score(self, psi_elements, coef):
             score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d_tilde"]
             score = psi_elements["psi_hat"] * (score_1 - psi_elements["score_const"])
         elif self.score == 'instrument':
-            score = (psi_elements["y"] - np.exp(coef * psi_elements["d"]+ psi_elements["r_hat"])) * psi_elements["d_tilde"]
+            score = (psi_elements["y"] - scipy.special.expit(coef * psi_elements["d"]+ psi_elements["r_hat"])) * psi_elements["d_tilde"]
         else:
             raise NotImplementedError
 
@@ -495,7 +495,8 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
             deriv_1 = - psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d"]
             deriv = psi_elements["psi_hat"] * psi_elements["d_tilde"] *  deriv_1
         elif self.score == 'instrument':
-            deriv = - psi_elements["d"] * np.exp(coef * psi_elements["d"]+ psi_elements["r_hat"]) * psi_elements["d_tilde"]
+            expit = scipy.special.expit(coef * psi_elements["d"]+ psi_elements["r_hat"])
+            deriv = - psi_elements["d"] * expit * (1-expit) * psi_elements["d_tilde"]
         else:
             raise NotImplementedError
 

From b41a773c92a3d0aab04e76bfdb7d1343ff129122 Mon Sep 17 00:00:00 2001
From: bbd5721 <julius.herzig@uni-hamburg.de>
Date: Wed, 3 Sep 2025 14:52:48 +0200
Subject: [PATCH 10/48] Added option for exception on convergence failure

---
 doubleml/double_ml_score_mixins.py | 44 ++++++++++++++++++------------
 doubleml/plm/logistic.py           |  4 ++-
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/doubleml/double_ml_score_mixins.py b/doubleml/double_ml_score_mixins.py
index 57dd6e623..b0c69c25e 100644
--- a/doubleml/double_ml_score_mixins.py
+++ b/doubleml/double_ml_score_mixins.py
@@ -86,6 +86,7 @@ class NonLinearScoreMixin:
     _score_type = "nonlinear"
     _coef_start_val = np.nan
     _coef_bounds = None
+    _error_on_convergence_failure = False
 
     @property
     @abstractmethod
@@ -149,12 +150,14 @@ def score_deriv(theta):
             theta_hat = root_res.root
             if not root_res.converged:
                 score_val = score(theta_hat)
-                warnings.warn(
-                    "Could not find a root of the score function.\n "
-                    f"Flag: {root_res.flag}.\n"
-                    f"Score value found is {score_val} "
-                    f"for parameter theta equal to {theta_hat}."
-                )
+                msg = ('Could not find a root of the score function.\n '
+                              f'Flag: {root_res.flag}.\n'
+                              f'Score value found is {score_val} '
+                              f'for parameter theta equal to {theta_hat}.')
+                if self._error_on_convergence_failure:
+                    raise ValueError(msg)
+                else:
+                    warnings.warn(msg)
         else:
             signs_different, bracket_guess = _get_bracket_guess(score, self._coef_start_val, self._coef_bounds)
 
@@ -182,16 +185,19 @@ def score_squared(theta):
                 else:
                     score_val_sign = np.sign(score(alt_coef_start))
                     if score_val_sign > 0:
+
                         theta_hat_array, score_val, _ = fmin_l_bfgs_b(
                             score, self._coef_start_val, approx_grad=True, bounds=[self._coef_bounds]
                         )
                         theta_hat = theta_hat_array.item()
-                        warnings.warn(
-                            "Could not find a root of the score function.\n "
-                            f"Minimum score value found is {score_val} "
-                            f"for parameter theta equal to {theta_hat}.\n "
-                            "No theta found such that the score function evaluates to a negative value."
-                        )
+                        msg = ('Could not find a root of the score function.\n '
+                                      f'Minimum score value found is {score_val} '
+                                      f'for parameter theta equal to {theta_hat}.\n '
+                                      'No theta found such that the score function evaluates to a negative value.')
+                        if self._error_on_convergence_failure:
+                            raise ValueError(msg)
+                        else:
+                            warnings.warn(msg)
                     else:
 
                         def neg_score(theta):
@@ -202,11 +208,13 @@ def neg_score(theta):
                             neg_score, self._coef_start_val, approx_grad=True, bounds=[self._coef_bounds]
                         )
                         theta_hat = theta_hat_array.item()
-                        warnings.warn(
-                            "Could not find a root of the score function. "
-                            f"Maximum score value found is {-1 * neg_score_val} "
-                            f"for parameter theta equal to {theta_hat}. "
-                            "No theta found such that the score function evaluates to a positive value."
-                        )
+                        msg = ('Could not find a root of the score function. '
+                                      f'Maximum score value found is {-1*neg_score_val} '
+                                      f'for parameter theta equal to {theta_hat}. '
+                                      'No theta found such that the score function evaluates to a positive value.')
+                        if self._error_on_convergence_failure:
+                            raise ValueError(msg)
+                        else:
+                            warnings.warn(msg)
 
         return theta_hat
diff --git a/doubleml/plm/logistic.py b/doubleml/plm/logistic.py
index e19fc1e40..9e1bb8750 100644
--- a/doubleml/plm/logistic.py
+++ b/doubleml/plm/logistic.py
@@ -115,13 +115,15 @@ def __init__(self,
                  n_folds_inner=5,
                  n_rep=1,
                  score='nuisance_space',
-                 draw_sample_splitting=True):
+                 draw_sample_splitting=True,
+                 error_on_convergence_failure=False,):
         self.n_folds_inner = n_folds_inner
         super().__init__(obj_dml_data,
                          n_folds,
                          n_rep,
                          score,
                          draw_sample_splitting)
+        self._error_on_convergence_failure = error_on_convergence_failure
         self._coef_bounds = (-1e-2, 1e2)
         self._coef_start_val = 1.0
 

From c434667ec8a668ca271d6639194807fda1ca26f6 Mon Sep 17 00:00:00 2001
From: bbd5721 <julius.herzig@uni-hamburg.de>
Date: Mon, 29 Sep 2025 10:38:13 -0700
Subject: [PATCH 11/48] Added unbalanced dataset option, bug fixes

---
 doubleml/datasets.py          | 34 ++++++++++-----
 doubleml/plm/logistic.py      | 80 ++++++++++++++++++++++++++++++++++-
 doubleml/utils/_estimation.py | 28 ++++++------
 3 files changed, 115 insertions(+), 27 deletions(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 629a033aa..dad8b9f79 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1651,7 +1651,7 @@ def treatment_effect(d, scale=15):
     return resul_dict
 
 
-def make_logistic_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', **kwargs):
+def make_logistic_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', balanced_r0=True, **kwargs):
     """
     Generates synthetic data for a logistic partially linear regression model, as in Liu et al. (2021),
     designed for use in double/debiased machine learning applications.
@@ -1705,16 +1705,28 @@ def make_logistic_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLD
 
     """
 
-    def r_0(X):
-        return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
-            0.1 * X[:, 3] * X[:, 4] + \
-            0.1 * X[:, 5] ** 3 + \
-            -0.5 * np.sin(X[:, 6]) ** 2 + \
-            0.5 * np.cos(X[:, 7]) + \
-            1 / (1 + X[:, 8] ** 2) + \
-            -1 / (1 + np.exp(X[:, 9])) + \
-            0.25 * np.where(X[:, 10] > 0, 1, 0) + \
-            -0.25 * np.where(X[:, 12] > 0, 1, 0)
+    if balanced_r0:
+        def r_0(X):
+            return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
+                0.1 * X[:, 3] * X[:, 4] + \
+                0.1 * X[:, 5] ** 3 + \
+                -0.5 * np.sin(X[:, 6]) ** 2 + \
+                0.5 * np.cos(X[:, 7]) + \
+                1 / (1 + X[:, 8] ** 2) + \
+                -1 / (1 + np.exp(X[:, 9])) + \
+                0.25 * np.where(X[:, 10] > 0, 1, 0) + \
+                -0.25 * np.where(X[:, 12] > 0, 1, 0)
+    else:
+        def r_0(X):
+            return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
+                0.1 * X[:, 3] * X[:, 4] + \
+                0.1 * X[:, 5] ** 3 + \
+                -0.5 * np.sin(X[:, 6]) ** 2 + \
+                0.5 * np.cos(X[:, 7]) + \
+                3 / (1 + X[:, 8] ** 2) + \
+                -1 / (1 + np.exp(X[:, 9])) + \
+                0.5 * np.where(X[:, 10] > 0, 1, 0) + \
+                -0.25 * np.where(X[:, 12] > 0, 1, 0)
 
     def a_0(X):
         return 2 / (1 + np.exp(X[:, 0])) + \
diff --git a/doubleml/plm/logistic.py b/doubleml/plm/logistic.py
index 9e1bb8750..7314debd7 100644
--- a/doubleml/plm/logistic.py
+++ b/doubleml/plm/logistic.py
@@ -143,9 +143,11 @@ def __init__(self,
         if ml_a is not None:
             ml_a_is_classifier = self._check_learner(ml_a, 'ml_a', regressor=True, classifier=True)
             self._learner['ml_a'] = ml_a
+            self._ml_a_provided = True
         else:
             self._learner['ml_a'] = clone(ml_m)
             ml_a_is_classifier = ml_m_is_classifier
+            self._ml_a_provided = False
 
         self._predict_method = {'ml_t': 'predict', 'ml_M': 'predict_proba'}
 
@@ -449,8 +451,82 @@ def _score_element_names(self):
     def _sensitivity_element_est(self, preds):
        pass
 
-    def _nuisance_tuning(self):
-        pass
+    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
+                         search_mode, n_iter_randomized_search):
+        # TODO: test
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
+                         force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d,
+                         force_all_finite=False)
+        x_d_concat = np.hstack((d.reshape(-1, 1), x))
+
+        if scoring_methods is None:
+            scoring_methods = {'ml_m': None,
+                               'ml_M': None,
+                               'ml_a': None,
+                               'ml_t': None}
+
+        train_inds = [train_index for (train_index, _) in smpls]
+        M_tune_res = _dml_tune(y, x_d_concat, train_inds,
+                               self._learner['ml_M'], param_grids['ml_M'], scoring_methods['ml_M'],
+                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+
+        if self.score == 'nuisance_space':
+            filtered_smpls = []
+            for train, test in smpls:
+                train_filtered = train[y[train] == 0]
+                filtered_smpls.append(train_filtered)
+            filtered_train_inds = [train_index for (train_index, _) in smpls]
+        elif self.score == 'instrument':
+            filtered_train_inds = train_inds
+        else:
+            raise NotImplementedError
+        m_tune_res = _dml_tune(d, x, filtered_train_inds,
+                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
+                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+
+        a_tune_res = _dml_tune(d, x, train_inds,
+                                   self._learner['ml_a'], param_grids['ml_a'], scoring_methods['ml_a'],
+                                   n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+
+        M_best_params = [xx.best_params_ for xx in M_tune_res]
+        m_best_params = [xx.best_params_ for xx in m_tune_res]
+        a_best_params = [xx.best_params_ for xx in a_tune_res]
+
+        # Create targets for tuning ml_t
+        M_hat = (self._double_dml_cv_predict(self._learner['ml_M'], 'ml_M', x_d_concat, y, smpls=smpls,
+                                             smpls_inner=self.__smpls__inner,
+                                             n_jobs=n_jobs_cv,
+                                             est_params=M_best_params, method=self._predict_method['ml_M']))
+
+        W_inner = []
+        for i, (train, test) in enumerate(smpls):
+            M_iteration = M_hat['preds_inner'][i][train]
+            M_iteration = np.clip(M_iteration, 1e-8, 1 - 1e-8)
+            w = scipy.special.logit(M_iteration)
+            W_inner.append(w)
+
+        t_tune_res = _dml_tune(W_inner, x, train_inds,
+                               self._learner['ml_t'], param_grids['ml_t'], scoring_methods['ml_t'],
+                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        t_best_params = [xx.best_params_ for xx in t_tune_res]
+
+
+
+        # Update params and tune_res to include ml_a and ml_t
+        params = {'ml_M': M_best_params,
+                  'ml_m': m_best_params,
+                  'ml_a': a_best_params,
+                  'ml_t': t_best_params}
+        tune_res = {'M_tune': M_tune_res,
+                    'm_tune': m_tune_res,
+                    'a_tune': a_tune_res,
+                    't_tune': t_tune_res}
+
+        res = {'params': params,
+               'tune_res': tune_res}
+
+        return res
 
     @property
     def __smpls__inner(self):
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index 6029dfd97..8086322a8 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -149,25 +149,25 @@ def _dml_cv_predict(estimator, x, y, smpls=None,
     return res
 
 
-def _dml_tune(
-    y, x, train_inds, learner, param_grid, scoring_method, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
-):
+def _dml_tune(y, x, train_inds,
+              learner, param_grid, scoring_method,
+              n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search, fold_specific_target=False):
     tune_res = list()
-    for train_index in train_inds:
+    for i, train_index in enumerate(train_inds):
         tune_resampling = KFold(n_splits=n_folds_tune, shuffle=True)
         if search_mode == "grid_search":
             g_grid_search = GridSearchCV(learner, param_grid, scoring=scoring_method, cv=tune_resampling, n_jobs=n_jobs_cv)
         else:
-            assert search_mode == "randomized_search"
-            g_grid_search = RandomizedSearchCV(
-                learner,
-                param_grid,
-                scoring=scoring_method,
-                cv=tune_resampling,
-                n_jobs=n_jobs_cv,
-                n_iter=n_iter_randomized_search,
-            )
-        tune_res.append(g_grid_search.fit(x[train_index, :], y[train_index]))
+            assert search_mode == 'randomized_search'
+            g_grid_search = RandomizedSearchCV(learner, param_grid,
+                                               scoring=scoring_method,
+                                               cv=tune_resampling,
+                                               n_jobs=n_jobs_cv,
+                                               n_iter=n_iter_randomized_search)
+        if fold_specific_target:
+            tune_res.append(g_grid_search.fit(x[train_index, :], y[i]))
+        else:
+            tune_res.append(g_grid_search.fit(x[train_index, :], y[train_index]))
 
     return tune_res
 

From 443d82ddcfa530f8151e47ad467bb17cddb2b0ed Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Tue, 7 Oct 2025 15:42:38 -0700
Subject: [PATCH 12/48] Added binary treatment dataset, fixed bug for model
 check

---
 doubleml/datasets.py     | 11 +++++++++--
 doubleml/plm/logistic.py |  3 +--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index dad8b9f79..b555b3bca 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1651,7 +1651,7 @@ def treatment_effect(d, scale=15):
     return resul_dict
 
 
-def make_logistic_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', balanced_r0=True, **kwargs):
+def make_logistic_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', balanced_r0=True, treatment="continuous", **kwargs):
     """
     Generates synthetic data for a logistic partially linear regression model, as in Liu et al. (2021),
     designed for use in double/debiased machine learning applications.
@@ -1745,7 +1745,14 @@ def a_0(X):
     x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=n_obs)
     np.clip(x, -2, 2, out=x)
 
-    d = a_0(x)
+    if treatment == "continuous":
+        d = a_0(x)
+    elif treatment == "binary":
+        d_cont = a_0(x)
+        d = np.random.binomial(1, expit(d_cont - d_cont.mean()))
+    elif treatment == "binary_unbalanced":
+        d_cont = a_0(x)
+        d = np.random.binomial(1, expit(d_cont))
 
     p = expit(alpha * d[:] + r_0(x))
 
diff --git a/doubleml/plm/logistic.py b/doubleml/plm/logistic.py
index 7314debd7..3e21cbf0c 100644
--- a/doubleml/plm/logistic.py
+++ b/doubleml/plm/logistic.py
@@ -1,7 +1,6 @@
 import inspect
 
 import numpy as np
-from torch.sparse import sampled_addmm
 
 from doubleml.utils._estimation import (
     _dml_cv_predict,
@@ -134,7 +133,7 @@ def __init__(self,
         _ = self._check_learner(ml_t, 'ml_t', regressor=True, classifier=False)
         _ = self._check_learner(ml_M, 'ml_M', regressor=False, classifier=True)
 
-        if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
+        if np.array_equal(np.unique(obj_dml_data.d), [0, 1]):
             ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
         else:
             ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=False)

From 774c74dfb98d7cb3b461bd962a0f37b74fce3257 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Tue, 7 Oct 2025 15:45:10 -0700
Subject: [PATCH 13/48] Adjusted dataset balancing

---
 doubleml/datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index b555b3bca..6d9acfc88 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1723,9 +1723,9 @@ def r_0(X):
                 0.1 * X[:, 5] ** 3 + \
                 -0.5 * np.sin(X[:, 6]) ** 2 + \
                 0.5 * np.cos(X[:, 7]) + \
-                3 / (1 + X[:, 8] ** 2) + \
+                4 / (1 + X[:, 8] ** 2) + \
                 -1 / (1 + np.exp(X[:, 9])) + \
-                0.5 * np.where(X[:, 10] > 0, 1, 0) + \
+                1.5 * np.where(X[:, 10] > 0, 1, 0) + \
                 -0.25 * np.where(X[:, 12] > 0, 1, 0)
 
     def a_0(X):

From 9695820f2cefa6bd1b63659fcca96e9f6f6a805a Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 27 Oct 2025 13:54:16 -0700
Subject: [PATCH 14/48] Renamed Logistic to LPLR Added test set-up

---
 doubleml/__init__.py                       |   4 +-
 doubleml/plm/__init__.py                   |   3 +-
 doubleml/plm/datasets/__init__.py          |   2 +
 doubleml/plm/datasets/dgp_lplr_LZZ2020.py  | 139 +++++++++
 doubleml/plm/{logistic.py => lplr.py}      | 213 +++----------
 doubleml/plm/tests/_utils_lplr_manual.py   | 335 +++++++++++++++++++++
 doubleml/plm/tests/test_lplr.py            | 105 +++++++
 doubleml/plm/tests/test_lplr_exceptions.py | 293 ++++++++++++++++++
 doubleml/plm/tests/test_lplr_tune.py       | 227 ++++++++++++++
 9 files changed, 1155 insertions(+), 166 deletions(-)
 create mode 100644 doubleml/plm/datasets/dgp_lplr_LZZ2020.py
 rename doubleml/plm/{logistic.py => lplr.py} (69%)
 create mode 100644 doubleml/plm/tests/_utils_lplr_manual.py
 create mode 100644 doubleml/plm/tests/test_lplr.py
 create mode 100644 doubleml/plm/tests/test_lplr_exceptions.py
 create mode 100644 doubleml/plm/tests/test_lplr_tune.py

diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index ba59a07e0..7c8ead970 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -13,7 +13,7 @@
 from .irm.pq import DoubleMLPQ
 from .irm.qte import DoubleMLQTE
 from .irm.ssm import DoubleMLSSM
-from doubleml.plm.logistic import DoubleMLLogit
+from doubleml.plm.lplr import DoubleMLLPLR
 
 from .plm.pliv import DoubleMLPLIV
 from .plm.plr import DoubleMLPLR
@@ -45,7 +45,7 @@
     "DoubleMLBLP",
     "DoubleMLPolicyTree",
     "DoubleMLSSM",
-    "DoubleMLLogit",
+    "DoubleMLLPLR",
 ]
 
 __version__ = importlib.metadata.version("doubleml")
diff --git a/doubleml/plm/__init__.py b/doubleml/plm/__init__.py
index 88ff26a8a..37262ed93 100644
--- a/doubleml/plm/__init__.py
+++ b/doubleml/plm/__init__.py
@@ -4,9 +4,10 @@
 
 from .pliv import DoubleMLPLIV
 from .plr import DoubleMLPLR
+from .lplr import DoubleMLLPLR
 
 __all__ = [
     "DoubleMLPLR",
     "DoubleMLPLIV",
-    "DoubleMLLogit"
+    "DoubleMLLPLR"
 ]
diff --git a/doubleml/plm/datasets/__init__.py b/doubleml/plm/datasets/__init__.py
index b2bb7df0e..5f433ae79 100644
--- a/doubleml/plm/datasets/__init__.py
+++ b/doubleml/plm/datasets/__init__.py
@@ -8,6 +8,7 @@
 from .dgp_pliv_multiway_cluster_CKMS2021 import make_pliv_multiway_cluster_CKMS2021
 from .dgp_plr_CCDDHNR2018 import make_plr_CCDDHNR2018
 from .dgp_plr_turrell2018 import make_plr_turrell2018
+from .dgp_lplr_LZZ2020 import make_lplr_LZZ2020
 
 __all__ = [
     "make_plr_CCDDHNR2018",
@@ -15,5 +16,6 @@
     "make_confounded_plr_data",
     "make_pliv_CHS2015",
     "make_pliv_multiway_cluster_CKMS2021",
+    "make_lplr_LZZ2020",
     "_make_pliv_data",
 ]
diff --git a/doubleml/plm/datasets/dgp_lplr_LZZ2020.py b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
new file mode 100644
index 000000000..007e2b918
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
@@ -0,0 +1,139 @@
+import numpy as np
+import pandas as pd
+from scipy.special import expit
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+def make_lplr_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', balanced_r0=True, treatment="continuous", **kwargs):
+    """
+    Generates synthetic data for a logistic partially linear regression model, as in Liu et al. (2021),
+    designed for use in double/debiased machine learning applications.
+
+    The data generating process is defined as follows:
+
+    - Covariates \( x_i \sim \mathcal{N}(0, \Sigma) \), where \( \Sigma_{kj} = 0.7^{|j-k|} \).
+    - Treatment \( d_i = a_0(x_i) \).
+    - Propensity score \( p_i = \sigma(\alpha d_i + r_0(x_i)) \), where \( \sigma(\cdot) \) is the logistic function.
+    - Outcome \( y_i \sim \text{Bernoulli}(p_i) \).
+
+    The nuisance functions are defined as:
+
+    .. math::
+
+        a_0(x_i) &= \frac{2}{1 + \exp(x_{i,1})} - \frac{2}{1 + \exp(x_{i,2})} + \sin(x_{i,3}) + \cos(x_{i,4}) \\
+        &+ 0.5 \cdot \mathbb{1}(x_{i,5} > 0) - 0.5 \cdot \mathbb{1}(x_{i,6} > 0) + 0.2 x_{i,7} x_{i,8} - 0.2 x_{i,9} x_{i,10} \\
+
+        r_0(x_i) &= 0.1 x_{i,1} x_{i,2} x_{i,3} + 0.1 x_{i,4} x_{i,5} + 0.1 x_{i,6}^3 - 0.5 \sin^2(x_{i,7}) \\
+        &+ 0.5 \cos(x_{i,8}) + \frac{1}{1 + x_{i,9}^2} - \frac{1}{1 + \exp(x_{i,10})} \\
+        &+ 0.25 \cdot \mathbb{1}(x_{i,11} > 0) - 0.25 \cdot \mathbb{1}(x_{i,13} > 0)
+
+    Parameters
+    ----------
+    n_obs : int
+        Number of observations to simulate.
+    dim_x : int
+        Number of covariates.
+    alpha : float
+        Value of the causal parameter.
+    return_type : str
+        Determines the return format. One of:
+
+        - 'DoubleMLData' or DoubleMLData: returns a ``DoubleMLData`` object.
+        - 'DataFrame', 'pd.DataFrame' or pd.DataFrame: returns a ``pandas.DataFrame``.
+        - 'array', 'np.ndarray', 'np.array' or np.ndarray: returns tuple of numpy arrays (x, y, d, p).
+    balanced_r0 : bool, default True
+        If True, uses the "balanced" r_0 specification (smaller magnitude / more balanced
+        heterogeneity). If False, uses an "unbalanced" r_0 specification with larger
+        share of Y=0.
+    treatment : {'continuous', 'binary', 'binary_unbalanced'}, default 'continuous'
+        Determines how the treatment d is generated from a_0(x):
+        - 'continuous': d = a_0(x) (continuous treatment).
+        - 'binary':    d ~ Bernoulli( sigmoid(a_0(x) - mean(a_0(x))) ) .
+        - 'binary_unbalanced': d ~ Bernoulli( sigmoid(a_0(x)) ).
+
+    **kwargs
+        Optional keyword arguments (currently unused in this implementation).
+
+    Returns
+    -------
+    Union[DoubleMLData, pd.DataFrame, Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]
+        The generated data in the specified format.
+
+    References
+    ----------
+    Liu, Molei, Yi Zhang, and Doudou Zhou. 2021.
+    "Double/Debiased Machine Learning for Logistic Partially Linear Model."
+    The Econometrics Journal 24 (3): 559–88. https://doi.org/10.1093/ectj/utab019.
+
+    """
+
+    if balanced_r0:
+        def r_0(X):
+            return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
+                0.1 * X[:, 3] * X[:, 4] + \
+                0.1 * X[:, 5] ** 3 + \
+                -0.5 * np.sin(X[:, 6]) ** 2 + \
+                0.5 * np.cos(X[:, 7]) + \
+                1 / (1 + X[:, 8] ** 2) + \
+                -1 / (1 + np.exp(X[:, 9])) + \
+                0.25 * np.where(X[:, 10] > 0, 1, 0) + \
+                -0.25 * np.where(X[:, 12] > 0, 1, 0)
+    else:
+        def r_0(X):
+            return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
+                0.1 * X[:, 3] * X[:, 4] + \
+                0.1 * X[:, 5] ** 3 + \
+                -0.5 * np.sin(X[:, 6]) ** 2 + \
+                0.5 * np.cos(X[:, 7]) + \
+                4 / (1 + X[:, 8] ** 2) + \
+                -1 / (1 + np.exp(X[:, 9])) + \
+                1.5 * np.where(X[:, 10] > 0, 1, 0) + \
+                -0.25 * np.where(X[:, 12] > 0, 1, 0)
+
+    def a_0(X):
+        return 2 / (1 + np.exp(X[:, 0])) + \
+            -2 / (1 + np.exp(X[:, 1])) + \
+            1 * np.sin(X[:, 2]) + \
+            1 * np.cos(X[:, 3]) + \
+            0.5 * np.where(X[:, 4] > 0, 1, 0) + \
+            -0.5 * np.where(X[:, 5] > 0, 1, 0) + \
+            0.2 * X[:, 6] * X[:, 7] + \
+            -0.2 * X[:, 8] * X[:, 9]
+
+
+    sigma = np.full((dim_x, dim_x), 0.2)
+    np.fill_diagonal(sigma, 1)
+
+    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=n_obs)
+    np.clip(x, -2, 2, out=x)
+
+    if treatment == "continuous":
+        d = a_0(x)
+    elif treatment == "binary":
+        d_cont = a_0(x)
+        d = np.random.binomial(1, expit(d_cont - d_cont.mean()))
+    elif treatment == "binary_unbalanced":
+        d_cont = a_0(x)
+        d = np.random.binomial(1, expit(d_cont))
+
+    p = expit(alpha * d[:] + r_0(x))
+
+    y = np.random.binomial(1, p)
+
+    if return_type in _array_alias:
+        return x, y, d, p
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, p)),
+                            columns=x_cols + ['y', 'd', 'p'])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, 'y', 'd', x_cols)
+    else:
+        raise ValueError('Invalid return_type.')
\ No newline at end of file
diff --git a/doubleml/plm/logistic.py b/doubleml/plm/lplr.py
similarity index 69%
rename from doubleml/plm/logistic.py
rename to doubleml/plm/lplr.py
index 3e21cbf0c..1ed00810a 100644
--- a/doubleml/plm/logistic.py
+++ b/doubleml/plm/lplr.py
@@ -29,79 +29,64 @@
 
 
 
-class DoubleMLLogit(NonLinearScoreMixin, DoubleML):
-    """Double machine learning for partially linear regression models
+class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
+    """Double machine learning for partially logistic models (binary outcomes)
 
     Parameters
     ----------
-    obj_dml_data : :class:`DoubleMLData` object
-        The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model.
-
-    ml_r : estimator implementing ``fit()`` and ``predict()``
-        A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
-        :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`\\ell_0(X) = E[Y|X]`.
-
-    ml_m : estimator implementing ``fit()`` and ``predict()``
-        A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
-        :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`m_0(X) = E[D|X]`.
-        For binary treatment variables :math:`D` (with values 0 and 1), a classifier implementing ``fit()`` and
-        ``predict_proba()`` can also be specified. If :py:func:`sklearn.base.is_classifier` returns ``True``,
-        ``predict_proba()`` is used otherwise ``predict()``.
-
-    ml_g : estimator implementing ``fit()`` and ``predict()``
-        A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
-        :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function
-        :math:`g_0(X) = E[Y - D \\theta_0|X]`.
-        Note: The learner `ml_g` is only required for the score ``'IV-type'``. Optionally, it can be specified and
-        estimated for callable scores.
-
-    n_folds : int
-        Number of folds.
-        Default is ``5``.
-
-    n_rep : int
-        Number of repetitons for the sample splitting.
-        Default is ``1``.
-
-    score : str or callable
-        A str (``'nuisance_space'`` or ``'instrument'``) specifying the score function
-        or a callable object / function with signature ``psi_a, psi_b = score(y, d, l_hat, m_hat, g_hat, smpls)``.
-        Default is ``'partialling out'``.
-
-    draw_sample_splitting : bool
-        Indicates whether the sample splitting should be drawn during initialization of the object.
-        Default is ``True``.
+    obj_dml_data : DoubleMLData
+        The DoubleMLData object providing the data and variable specification.
+        The outcome variable y must be binary with values {0, 1}.
+    ml_M : estimator
+        Classifier for M_0(D, X) = P[Y = 1 | D, X]. Must implement fit() and predict_proba().
+    ml_t : estimator
+        Regressor for the auxiliary regression used to predict log-odds. Must implement fit() and predict().
+    ml_m : estimator
+        Learner for m_0(X) = E[D | X]. For binary treatments a classifier with predict_proba() is expected;
+        for continuous treatments a regressor with predict() is expected.
+    ml_a : estimator, optional
+        Optional alternative learner for E[D | X]. If not provided, a clone of ml_m is used.
+        Must support the same prediction interface as ml_m.
+    n_folds : int, default=5
+        Number of outer cross-fitting folds.
+    n_folds_inner : int, default=5
+        Number of inner folds for nested resampling used internally.
+    n_rep : int, default=1
+        Number of repetitions for sample splitting.
+    score : {'nuisance_space', 'instrument'} or callable, default='nuisance_space'
+        Score to use. 'nuisance_space' estimates m on subsamples with y=0; 'instrument' uses an instrument-type score.
+    draw_sample_splitting : bool, default=True
+        Whether to draw sample splitting during initialization.
+    error_on_convergence_failure : bool, default=False
+        If True, raise an error on convergence failure of score.
 
     Examples
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_plr_CCDDHNR2018
-    >>> from sklearn.ensemble import RandomForestRegressor
+    >>> from doubleml.plm.datasets import make_lplr_LZZ2020
+    >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> from sklearn.base import clone
     >>> np.random.seed(3141)
-    >>> learner = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
-    >>> ml_g = learner
-    >>> ml_m = learner
-    >>> obj_dml_data = make_plr_CCDDHNR2018(alpha=0.5, n_obs=500, dim_x=20)
-    >>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m)
-    >>> dml_plr_obj.fit().summary
-           coef  std err          t         P>|t|     2.5 %    97.5 %
-    d  0.462321  0.04107  11.256983  2.139582e-29  0.381826  0.542816
+    >>> ml_t = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
+    >>> ml_m = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
+    >>> ml_M = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
+    >>> obj_dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=500, dim_x=20)
+    >>> dml_lplr_obj = dml.DoubleMLPLR(obj_dml_data, ml_M, ml_t, ml_m)
+    >>> dml_lplr_obj.fit().summary
+           coef   std err          t         P>|t|     2.5 %    97.5 %
+    d  0.480691  0.040533  11.859129  1.929729e-32  0.401247  0.560135
 
     Notes
     -----
-    **Partially linear regression (PLR)** models take the form
+    **Partially logistic regression (PLR)** models take the form
 
     .. math::
 
-        Y = D \\theta_0 + g_0(X) + \\zeta, & &\\mathbb{E}(\\zeta | D,X) = 0,
-
-        D = m_0(X) + V, & &\\mathbb{E}(V | X) = 0,
+        Y =  \\text{expit} ( D \\theta_0 + r_0(X))
 
     where :math:`Y` is the outcome variable and :math:`D` is the policy variable of interest.
-    The high-dimensional vector :math:`X = (X_1, \\ldots, X_p)` consists of other confounding covariates,
-    and :math:`\\zeta` and :math:`V` are stochastic errors.
+    The high-dimensional vector :math:`X = (X_1, \\ldots, X_p)` consists of other confounding covariates.
     """
 
     def __init__(self,
@@ -122,13 +107,18 @@ def __init__(self,
                          n_rep,
                          score,
                          draw_sample_splitting)
+
+        # Ensure outcome only contains 0 and 1 (validate early in constructor)
+        if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
+            raise TypeError("The outcome variable y must be binary with values 0 and 1.")
+
         self._error_on_convergence_failure = error_on_convergence_failure
         self._coef_bounds = (-1e-2, 1e2)
         self._coef_start_val = 1.0
 
         self._check_data(self._dml_data)
         valid_scores = ['nuisance_space', 'instrument']
-        _check_score(self.score, valid_scores, allow_callable=True)
+        _check_score(self.score, valid_scores, allow_callable=False)
 
         _ = self._check_learner(ml_t, 'ml_t', regressor=True, classifier=False)
         _ = self._check_learner(ml_M, 'ml_M', regressor=False, classifier=True)
@@ -208,7 +198,6 @@ def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, s
                     res['preds'][smpls_single_split[1]] += model.predict_proba(x[smpls_single_split[1]])[:, 1]
                 else:
                     res['preds'][smpls_single_split[1]] += model.predict(x[smpls_single_split[1]])
-        res["preds_inner"]
         res["preds"] /= len(smpls)
         res['targets'] = np.copy(y)
         return res
@@ -216,7 +205,6 @@ def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, s
 
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        # TODO: How to deal with smpls_inner?
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
@@ -278,9 +266,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                  'observed to be binary with values 0 and 1. Make sure that for classifiers '
                                  'probabilities and not labels are predicted.')
 
-
-
-
         if a_external:
             a_hat = {'preds': external_predictions['ml_a'],
                      'targets': None,
@@ -290,35 +275,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                                 n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_a'), method=self._predict_method['ml_a']))
 
-
-        # r_legacy = np.zeros_like(y)
-        # smpls_inner = self.__smpls__inner
-        # M_hat_l = {}
-        # a_hat_l = {}
-        # M_hat_l['preds_inner'] = []
-        # M_hat_l['preds'] = np.full_like(y, np.nan)
-        # a_hat_l['preds_inner'] = []
-        # a_hat_l['preds'] = np.full_like(y, np.nan)
-        # for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
-        #     test = smpls_single_split[1]
-        #     train = smpls_single_split[0]
-        #     # r_legacy[test] =
-        #     Mleg, aleg, a_nf_leg = self.legacy_implementation(y[train], x[train], d[train], x[test], d[test],
-        #                                                       self._learner['ml_m'], self._learner['ml_M'],
-        #                                                       smpls_single_split, smpls_double_split, y, x, d,
-        #                                                       x_d_concat, n_jobs_cv)
-        #     Mtemp = np.full_like(y, np.nan)
-        #     Mtemp[train] = Mleg
-        #     Atemp = np.full_like(y, np.nan)
-        #     Atemp[train] = aleg
-        #     M_hat_l['preds_inner'].append(Mtemp)
-        #     a_hat_l['preds_inner'].append(Atemp)
-        #     a_hat_l['preds'][test] = a_nf_leg
-
-        #r_hat['preds'] = r_legacy
-
-
-
         W_inner = []
         beta = np.zeros(d.shape, dtype=float)
 
@@ -366,74 +322,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         return psi_elements, preds
 
-
-    def legacy_implementation(self, Yfold: np.ndarray, Xfold: np.ndarray, Afold: np.ndarray, XnotFold: np.ndarray, AnotFold: np.ndarray,
-                    learner, learnerClassifier, smpls_single_split, smpls_double_split, yfull, xfull, afull, x_d_concat, n_jobs_cv, noFolds: int = 5, seed=None, )-> (np.ndarray, np.ndarray, np.ndarray):
-
-        def learn_predict(X, Y, Xpredict, learner, learnerClassifier, fit_args={}):
-            results = []
-            if len(np.unique(Y)) == 2:
-                learnerClassifier.fit(X, Y, **fit_args)
-                for x in Xpredict:
-                    results.append(learnerClassifier.predict_proba(x)[:, 1])
-            else:
-                learner.fit(X, Y, **fit_args)
-                for x in Xpredict:
-                    results.append(learner.predict(x))
-            return (*results,)
-
-        nFold = len(Yfold)
-        i = np.remainder(np.arange(nFold), noFolds)
-        np.random.default_rng(seed).shuffle(i)
-
-        M = np.zeros((nFold))
-        a_hat = np.zeros((nFold))
-        a_hat_notFold = np.zeros((len(XnotFold)))
-        M_notFold = np.zeros((len(XnotFold)))
-        loss = {}
-
-        a_hat_inner = _dml_cv_predict(self._learner['ml_a'], xfull, afull, smpls=smpls_double_split, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_a'), method=self._predict_method['ml_a'],
-                                    return_models=True, smpls_is_partition=True)
-        _check_finite_predictions(a_hat_inner['preds'], self._learner['ml_a'], 'ml_a', smpls_double_split)
-        a_hat_notFold = np.full_like(yfull, 0.)
-        for model in a_hat_inner['models']:
-            if self._predict_method['ml_a'] == 'predict_proba':
-                a_hat_notFold[smpls_single_split[1]] += model.predict_proba(xfull[smpls_single_split[1]])[:, 1]
-            else:
-                a_hat_notFold[smpls_single_split[1]] += model.predict(xfull[smpls_single_split[1]])
-
-        M_hat = _dml_cv_predict(self._learner['ml_M'], x_d_concat, yfull, smpls=smpls_double_split, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_M'), method=self._predict_method['ml_M'],
-                                    return_models=True, smpls_is_partition=True)
-        _check_finite_predictions(M_hat['preds'], self._learner['ml_M'], 'ml_M', smpls_double_split)
-
-        M = M_hat['preds'][~np.isnan(M_hat['preds'])]
-        a_hat = a_hat_inner['preds'][~np.isnan(a_hat_inner['preds'])]
-        a_hat_notFold = a_hat_notFold[smpls_single_split[1]]
-
-        np.clip(M, 1e-8, 1 - 1e-8, out=M)
-#        loss["M"] = compute_loss(Yfold, M)
-#        loss["a_hat"] = compute_loss(Afold, a_hat)
-        a_hat_notFold /= noFolds
-      #  M_notFold /= noFolds
-        np.clip(M_notFold, 1e-8, 1 - 1e-8, out=M_notFold)
-
-        # Obtain preliminary estimate of beta based on M and residual of a
-        W = scipy.special.logit(M)
-        A_resid = Afold - a_hat
-        beta_notFold = sum(A_resid * W) / sum(A_resid ** 2)
-    #    print(beta_notFold)
-        t_notFold, = learn_predict(Xfold, W, [XnotFold], learner, learnerClassifier)
-        W_notFold = scipy.special.expit(M_notFold)
-#        loss["t"] = compute_loss(W_notFold, t_notFold)
-
-
-        # Compute r based on estimates for W=logit(M), beta and residual of A
-        r_notFold = t_notFold - beta_notFold * a_hat_notFold
-
-        return M, a_hat, a_hat_notFold #r_notFold #, a_hat_notFold, M_notFold, t_notFold
-
     def _score_elements(self, y, d, r_hat, m_hat):
         # compute residual
         d_tilde = d - m_hat
@@ -470,12 +358,11 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
                                self._learner['ml_M'], param_grids['ml_M'], scoring_methods['ml_M'],
                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
 
+        filtered_train_inds = []
         if self.score == 'nuisance_space':
-            filtered_smpls = []
             for train, test in smpls:
                 train_filtered = train[y[train] == 0]
-                filtered_smpls.append(train_filtered)
-            filtered_train_inds = [train_index for (train_index, _) in smpls]
+                filtered_train_inds.append(train_filtered)
         elif self.score == 'instrument':
             filtered_train_inds = train_inds
         else:
@@ -553,7 +440,7 @@ def draw_sample_splitting(self):
         return self
 
     def set_sample_splitting(self):
-        raise NotImplementedError('set_sample_splitting is not implemented for DoubleMLLogit.')
+        raise NotImplementedError('set_sample_splitting is not implemented for DoubleMLLPLR.')
 
     def _compute_score(self, psi_elements, coef):
 
@@ -577,4 +464,4 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
         else:
             raise NotImplementedError
 
-        return deriv
\ No newline at end of file
+        return deriv
diff --git a/doubleml/plm/tests/_utils_lplr_manual.py b/doubleml/plm/tests/_utils_lplr_manual.py
new file mode 100644
index 000000000..f14a1f66c
--- /dev/null
+++ b/doubleml/plm/tests/_utils_lplr_manual.py
@@ -0,0 +1,335 @@
+import numpy as np
+from sklearn.base import clone
+from sklearn.model_selection import train_test_split
+
+from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
+from ...utils._estimation import _predict_zero_one_propensity
+from ...utils._propensity_score import _trimm
+
+
+def fit_selection(
+    y,
+    x,
+    d,
+    z,
+    s,
+    learner_g,
+    learner_pi,
+    learner_m,
+    all_smpls,
+    score,
+    trimming_rule="truncate",
+    trimming_threshold=1e-2,
+    normalize_ipw=True,
+    n_rep=1,
+    g_d0_params=None,
+    g_d1_params=None,
+    pi_params=None,
+    m_params=None,
+):
+    n_obs = len(y)
+
+    thetas = np.zeros(n_rep)
+    ses = np.zeros(n_rep)
+
+    all_g_d1_hat = list()
+    all_g_d0_hat = list()
+    all_pi_hat = list()
+    all_m_hat = list()
+
+    all_psi_a = list()
+    all_psi_b = list()
+
+    for i_rep in range(n_rep):
+        smpls = all_smpls[i_rep]
+
+        g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list = fit_nuisance_selection(
+            y,
+            x,
+            d,
+            z,
+            s,
+            learner_g,
+            learner_pi,
+            learner_m,
+            smpls,
+            score,
+            trimming_rule=trimming_rule,
+            trimming_threshold=trimming_threshold,
+            g_d0_params=g_d0_params,
+            g_d1_params=g_d1_params,
+            pi_params=pi_params,
+            m_params=m_params,
+        )
+        all_g_d1_hat.append(g_hat_d1_list)
+        all_g_d0_hat.append(g_hat_d0_list)
+        all_pi_hat.append(pi_hat_list)
+        all_m_hat.append(m_hat_list)
+
+        g_hat_d1, g_hat_d0, pi_hat, m_hat = compute_selection(y, g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list, smpls)
+
+        dtreat = d == 1
+        dcontrol = d == 0
+        psi_a, psi_b = selection_score_elements(dtreat, dcontrol, g_hat_d1, g_hat_d0, pi_hat, m_hat, s, y, normalize_ipw)
+
+        all_psi_a.append(psi_a)
+        all_psi_b.append(psi_b)
+
+        thetas[i_rep], ses[i_rep] = selection_dml2(psi_a, psi_b)
+
+    theta = np.median(thetas)
+    se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
+
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_g_d1_hat": all_g_d1_hat,
+        "all_g_d0_hat": all_g_d0_hat,
+        "all_pi_hat": all_pi_hat,
+        "all_m_hat": all_m_hat,
+        "all_psi_a": all_psi_a,
+        "all_psi_b": all_psi_b,
+    }
+
+    return res
+
+
+def fit_nuisance_selection(
+    y,
+    x,
+    d,
+    z,
+    s,
+    learner_g,
+    learner_pi,
+    learner_m,
+    smpls,
+    score,
+    trimming_rule="truncate",
+    trimming_threshold=1e-2,
+    g_d0_params=None,
+    g_d1_params=None,
+    pi_params=None,
+    m_params=None,
+):
+    ml_g_d1 = clone(learner_g)
+    ml_g_d0 = clone(learner_g)
+    ml_pi = clone(learner_pi)
+    ml_m = clone(learner_m)
+
+    if z is None:
+        dx = np.column_stack((d, x))
+    else:
+        dx = np.column_stack((d, x, z))
+
+    if score == "missing-at-random":
+        pi_hat_list = fit_predict_proba(s, dx, ml_pi, pi_params, smpls, trimming_threshold=trimming_threshold)
+
+        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls)
+
+        train_cond_d1_s1 = np.intersect1d(np.where(d == 1)[0], np.where(s == 1)[0])
+        g_hat_d1_list = fit_predict(y, x, ml_g_d1, g_d1_params, smpls, train_cond=train_cond_d1_s1)
+
+        train_cond_d0_s1 = np.intersect1d(np.where(d == 0)[0], np.where(s == 1)[0])
+        g_hat_d0_list = fit_predict(y, x, ml_g_d0, g_d0_params, smpls, train_cond=train_cond_d0_s1)
+    else:
+        # initialize empty lists
+        g_hat_d1_list = []
+        g_hat_d0_list = []
+        pi_hat_list = []
+        m_hat_list = []
+
+        # create strata for splitting
+        strata = d.reshape(-1, 1) + 2 * s.reshape(-1, 1)
+
+        # POTENTIAL OUTCOME Y(1)
+        for i_fold, _ in enumerate(smpls):
+            ml_g_d1 = clone(learner_g)
+            ml_pi = clone(learner_pi)
+            ml_m = clone(learner_m)
+
+            # set the params for the nuisance learners
+            if g_d1_params is not None:
+                ml_g_d1.set_params(**g_d1_params[i_fold])
+            if g_d0_params is not None:
+                ml_g_d0.set_params(**g_d0_params[i_fold])
+            if pi_params is not None:
+                ml_pi.set_params(**pi_params[i_fold])
+            if m_params is not None:
+                ml_m.set_params(**m_params[i_fold])
+
+            train_inds = smpls[i_fold][0]
+            test_inds = smpls[i_fold][1]
+
+            # start nested crossfitting
+            train_inds_1, train_inds_2 = train_test_split(
+                train_inds, test_size=0.5, random_state=42, stratify=strata[train_inds]
+            )
+
+            s_train_1 = s[train_inds_1]
+            dx_train_1 = dx[train_inds_1, :]
+
+            # preliminary propensity score for selection
+            ml_pi_prelim = clone(ml_pi)
+            # fit on first part of training set
+            ml_pi_prelim.fit(dx_train_1, s_train_1)
+            pi_hat_prelim = _predict_zero_one_propensity(ml_pi_prelim, dx)
+
+            # predictions for small pi in denominator
+            pi_hat = pi_hat_prelim[test_inds]
+
+            # add selection indicator to covariates
+            xpi = np.column_stack((x, pi_hat_prelim))
+
+            # estimate propensity score p using the second training sample
+            xpi_train_2 = xpi[train_inds_2, :]
+            d_train_2 = d[train_inds_2]
+            xpi_test = xpi[test_inds, :]
+
+            ml_m.fit(xpi_train_2, d_train_2)
+
+            m_hat = _predict_zero_one_propensity(ml_m, xpi_test)
+
+            # estimate conditional outcome on second training sample -- treatment
+            s1_d1_train_2_indices = np.intersect1d(np.where(d == 1)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
+            xpi_s1_d1_train_2 = xpi[s1_d1_train_2_indices, :]
+            y_s1_d1_train_2 = y[s1_d1_train_2_indices]
+
+            ml_g_d1.fit(xpi_s1_d1_train_2, y_s1_d1_train_2)
+
+            # predict conditional outcome
+            g_hat_d1 = ml_g_d1.predict(xpi_test)
+
+            # estimate conditional outcome on second training sample -- control
+            s1_d0_train_2_indices = np.intersect1d(np.where(d == 0)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
+            xpi_s1_d0_train_2 = xpi[s1_d0_train_2_indices, :]
+            y_s1_d0_train_2 = y[s1_d0_train_2_indices]
+
+            ml_g_d0.fit(xpi_s1_d0_train_2, y_s1_d0_train_2)
+
+            # predict conditional outcome
+            g_hat_d0 = ml_g_d0.predict(xpi_test)
+
+            m_hat = _trimm(m_hat, trimming_rule, trimming_threshold)
+
+            # append predictions on test sample to final list of predictions
+            g_hat_d1_list.append(g_hat_d1)
+            g_hat_d0_list.append(g_hat_d0)
+            pi_hat_list.append(pi_hat)
+            m_hat_list.append(m_hat)
+
+    return g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list
+
+
+def compute_selection(y, g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list, smpls):
+    g_hat_d1 = np.full_like(y, np.nan, dtype="float64")
+    g_hat_d0 = np.full_like(y, np.nan, dtype="float64")
+    pi_hat = np.full_like(y, np.nan, dtype="float64")
+    m_hat = np.full_like(y, np.nan, dtype="float64")
+
+    for idx, (_, test_index) in enumerate(smpls):
+        g_hat_d1[test_index] = g_hat_d1_list[idx]
+        g_hat_d0[test_index] = g_hat_d0_list[idx]
+        pi_hat[test_index] = pi_hat_list[idx]
+        m_hat[test_index] = m_hat_list[idx]
+
+    return g_hat_d1, g_hat_d0, pi_hat, m_hat
+
+
+def selection_score_elements(dtreat, dcontrol, g_d1, g_d0, pi, m, s, y, normalize_ipw):
+    # psi_a
+    psi_a = -1 * np.ones_like(y)
+
+    # psi_b
+    if normalize_ipw:
+        weight_treat = sum(dtreat) / sum((dtreat * s) / (m * pi))
+        weight_control = sum(dcontrol) / sum((dcontrol * s) / ((1 - m) * pi))
+
+        psi_b1 = weight_treat * ((dtreat * s * (y - g_d1)) / (m * pi)) + g_d1
+        psi_b0 = weight_control * ((dcontrol * s * (y - g_d0)) / ((1 - m) * pi)) + g_d0
+
+    else:
+        psi_b1 = (dtreat * s * (y - g_d1)) / (m * pi) + g_d1
+        psi_b0 = (dcontrol * s * (y - g_d0)) / ((1 - m) * pi) + g_d0
+
+    psi_b = psi_b1 - psi_b0
+
+    return psi_a, psi_b
+
+
+def selection_dml2(psi_a, psi_b):
+    n_obs = len(psi_a)
+    theta_hat = -np.mean(psi_b) / np.mean(psi_a)
+    se = np.sqrt(var_selection(theta_hat, psi_a, psi_b, n_obs))
+
+    return theta_hat, se
+
+
+def var_selection(theta, psi_a, psi_b, n_obs):
+    J = np.mean(psi_a)
+    var = 1 / n_obs * np.mean(np.power(np.multiply(psi_a, theta) + psi_b, 2)) / np.power(J, 2)
+    return var
+
+
+def tune_nuisance_ssm_mar(y, x, d, z, s, ml_g, ml_pi, ml_m, smpls, n_folds_tune, param_grid_g, param_grid_pi, param_grid_m):
+    d0_s1 = np.intersect1d(np.where(d == 0)[0], np.where(s == 1)[0])
+    d1_s1 = np.intersect1d(np.where(d == 1)[0], np.where(s == 1)[0])
+
+    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=d0_s1)
+    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=d1_s1)
+
+    dx = np.column_stack((x, d))
+
+    pi_tune_res = tune_grid_search(s, dx, ml_pi, smpls, param_grid_pi, n_folds_tune)
+
+    m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
+
+    g0_best_params = [xx.best_params_ for xx in g0_tune_res]
+    g1_best_params = [xx.best_params_ for xx in g1_tune_res]
+    pi_best_params = [xx.best_params_ for xx in pi_tune_res]
+    m_best_params = [xx.best_params_ for xx in m_tune_res]
+
+    return g0_best_params, g1_best_params, pi_best_params, m_best_params
+
+
+def tune_nuisance_ssm_nonignorable(
+    y, x, d, z, s, ml_g, ml_pi, ml_m, smpls, n_folds_tune, param_grid_g, param_grid_pi, param_grid_m
+):
+
+    train_inds = [tr for (tr, _) in smpls]
+
+    inner0_list, inner1_list = [], []
+    for tr in train_inds:
+        i0, i1 = train_test_split(tr, test_size=0.5, stratify=d[tr] + 2 * s[tr], random_state=42)
+        inner0_list.append(i0)
+        inner1_list.append(i1)
+
+    X_dz = np.c_[x, d.reshape(-1, 1), z.reshape(-1, 1)]
+    pi_tune_res = tune_grid_search(s, X_dz, ml_pi, [(i0, np.array([])) for i0 in inner0_list], param_grid_pi, n_folds_tune)
+    pi_best_params = [gs.best_params_ for gs in pi_tune_res]
+
+    pi_hat_full = np.full_like(s, np.nan, dtype=float)
+    for i0, i1, gs in zip(inner0_list, inner1_list, pi_tune_res):
+        ml_pi_temp = clone(ml_pi)
+        ml_pi_temp.set_params(**gs.best_params_)
+        ml_pi_temp.fit(X_dz[i0], s[i0])
+        ph = _predict_zero_one_propensity(ml_pi_temp, X_dz)
+        pi_hat_full[i1] = ph[i1]
+
+    X_pi = np.c_[x, pi_hat_full]
+    m_tune_res = tune_grid_search(d, X_pi, ml_m, [(i1, np.array([])) for i1 in inner1_list], param_grid_m, n_folds_tune)
+    m_best_params = [gs.best_params_ for gs in m_tune_res]
+
+    X_pi_d = np.c_[x, d.reshape(-1, 1), pi_hat_full.reshape(-1, 1)]
+    inner1_d0_s1 = [i1[(d[i1] == 0) & (s[i1] == 1)] for i1 in inner1_list]
+    inner1_d1_s1 = [i1[(d[i1] == 1) & (s[i1] == 1)] for i1 in inner1_list]
+
+    g0_tune_res = tune_grid_search(y, X_pi_d, ml_g, [(idx, np.array([])) for idx in inner1_d0_s1], param_grid_g, n_folds_tune)
+    g1_tune_res = tune_grid_search(y, X_pi_d, ml_g, [(idx, np.array([])) for idx in inner1_d1_s1], param_grid_g, n_folds_tune)
+
+    g0_best_params = [gs.best_params_ for gs in g0_tune_res]
+    g1_best_params = [gs.best_params_ for gs in g1_tune_res]
+
+    return g0_best_params, g1_best_params, pi_best_params, m_best_params
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
new file mode 100644
index 000000000..c561d9fe8
--- /dev/null
+++ b/doubleml/plm/tests/test_lplr.py
@@ -0,0 +1,105 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.base import clone
+from sklearn.linear_model import LassoCV, LogisticRegressionCV
+
+import doubleml as dml
+
+from ...tests._utils import draw_smpls
+from ._utils_ssm_manual import fit_selection
+
+
+@pytest.fixture(scope="module", params=[[LassoCV(), LogisticRegressionCV(penalty="l1", solver="liblinear")]])
+def learner(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["missing-at-random", "nonignorable"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[0.01])
+def trimming_threshold(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def dml_selection_fixture(
+    generate_data_selection_mar, generate_data_selection_nonignorable, learner, score, trimming_threshold, normalize_ipw
+):
+    n_folds = 3
+
+    # collect data
+    np.random.seed(42)
+    if score == "missing-at-random":
+        (x, y, d, z, s) = generate_data_selection_mar
+    else:
+        (x, y, d, z, s) = generate_data_selection_nonignorable
+
+    ml_g = clone(learner[0])
+    ml_pi = clone(learner[1])
+    ml_m = clone(learner[1])
+
+    np.random.seed(42)
+    n_obs = len(y)
+    all_smpls = draw_smpls(n_obs, n_folds)
+
+    np.random.seed(42)
+    if score == "missing-at-random":
+        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s)
+        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
+    else:
+        assert score == "nonignorable"
+        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=z, s=s)
+        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
+
+    np.random.seed(42)
+    dml_sel_obj.set_sample_splitting(all_smpls=all_smpls)
+    dml_sel_obj.fit()
+
+    np.random.seed(42)
+    res_manual = fit_selection(
+        y,
+        x,
+        d,
+        z,
+        s,
+        clone(learner[0]),
+        clone(learner[1]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        trimming_rule="truncate",
+        trimming_threshold=trimming_threshold,
+        normalize_ipw=normalize_ipw,
+    )
+
+    res_dict = {
+        "coef": dml_sel_obj.coef[0],
+        "coef_manual": res_manual["theta"],
+        "se": dml_sel_obj.se[0],
+        "se_manual": res_manual["se"],
+    }
+
+    # sensitivity tests
+    # TODO
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_dml_selection_coef(dml_selection_fixture):
+    assert math.isclose(dml_selection_fixture["coef"], dml_selection_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-2)
+
+
+@pytest.mark.ci
+def test_dml_selection_se(dml_selection_fixture):
+    assert math.isclose(dml_selection_fixture["se"], dml_selection_fixture["se_manual"], rel_tol=1e-9, abs_tol=5e-2)
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
new file mode 100644
index 000000000..4361e7c7b
--- /dev/null
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -0,0 +1,293 @@
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.base import BaseEstimator
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import Lasso, LogisticRegression
+
+from doubleml import DoubleMLLPLR
+from doubleml.data.base_data import DoubleMLBaseData, DoubleMLData
+from doubleml.plm.datasets import make_lplr_LZZ2020
+
+np.random.seed(3141)
+n = 100
+# create test data and basic learners
+dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=n, dim_x=10)
+ml_M = RandomForestClassifier()
+ml_t = RandomForestRegressor()
+ml_m = RandomForestRegressor()
+dml_lplr = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m)
+dml_lplr_instrument = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score="instrument")
+
+@pytest.mark.ci
+def test_lplr_exception_data():
+    msg = (
+        r"The data must be of DoubleMLData type\. .* of type "
+        r"<class 'pandas\.core\.frame\.DataFrame'> was passed\."
+    )
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(pd.DataFrame(), ml_M, ml_t, ml_m)
+
+    dml_data_nb = make_lplr_LZZ2020(alpha=0.5, n_obs=50, dim_x=5)
+    dml_data_nb.data[dml_data_nb.y_col] = dml_data_nb.data[dml_data_nb.y_col] + 1
+    dml_data_nb._set_y_z()
+    with pytest.raises(TypeError, match="The outcome variable y must be binary with values 0 and 1."):
+        _ = DoubleMLLPLR(dml_data_nb, ml_M, ml_t, ml_m)
+
+
+@pytest.mark.ci
+def test_lplr_exception_scores():
+    # LPLR valid scores are 'nuisance_space' and 'instrument'
+    msg = "Invalid score MAR"
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score="MAR")
+    msg = "score should be string. 0 was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score=0)
+
+@pytest.mark.ci
+def test_ssm_exception_resampling():
+    msg = "The number of folds must be of int type. 1.5 of type <class 'float'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, n_folds=1.5)
+
+    msg = "The number of repetitions for the sample splitting must be of int type. 1.5 of type <class 'float'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, n_rep=1.5)
+
+    msg = "The number of folds must be positive. 0 was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, n_folds=0)
+
+    msg = "The number of repetitions for the sample splitting must be positive. 0 was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, n_rep=0)
+
+    msg = "draw_sample_splitting must be True or False. Got true."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, draw_sample_splitting="true")
+
+
+@pytest.mark.ci
+def test_lplr_exception_get_params():
+    msg = "Invalid nuisance learner ml_x. Valid nuisance learner ml_M or ml_g_t or ml_m or ml_a."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.get_params("ml_x")
+
+@pytest.mark.ci
+def test_lplr_exception_smpls():
+    msg = (
+        "Sample splitting not specified. "
+        r"Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\)."
+    )
+    dml_plr_no_smpls = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, draw_sample_splitting=False)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_plr_no_smpls.smpls
+
+@pytest.mark.ci
+def test_lplr_exception_fit():
+    msg = "The number of CPUs used to fit the learners must be of int type. 5 of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr.fit(n_jobs_cv="5")
+    msg = "store_predictions must be True or False. Got 1."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr.fit(store_predictions=1)
+    msg = "store_models must be True or False. Got 1."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr.fit(store_models=1)
+
+@pytest.mark.ci
+def test_lplr_exception_bootstrap():
+    dml_lplr_boot = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m)
+    msg = r"Apply fit\(\) before bootstrap\(\)."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_boot.bootstrap()
+
+    dml_lplr_boot.fit()
+    msg = 'Method must be "Bayes", "normal" or "wild". Got Gaussian.'
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_boot.bootstrap(method="Gaussian")
+    msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr_boot.bootstrap(n_rep_boot="500")
+    msg = "The number of bootstrap replications must be positive. 0 was passed."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_boot.bootstrap(n_rep_boot=0)
+
+
+@pytest.mark.ci
+def test_lplr_exception_confint():
+    dml_lplr_conf = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m)
+    msg = r"Apply fit\(\) before confint\(\)."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_conf.confint()
+    dml_lplr_conf.fit()
+
+    msg = "joint must be True or False. Got 1."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr_conf.confint(joint=1)
+    msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr_conf.confint(level="5%")
+    msg = r"The confidence level must be in \(0,1\). 0.0 was passed."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_conf.confint(level=0.0)
+
+    msg = r"Apply bootstrap\(\) before confint\(joint=True\)."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_conf.confint(joint=True)
+    dml_lplr_conf.bootstrap()
+    df_lplr_ci = dml_lplr_conf.confint(joint=True)
+    assert isinstance(df_lplr_ci, pd.DataFrame)
+
+
+@pytest.mark.ci
+def test_lplr_exception_set_ml_nuisance_params():
+    # invalid learner name
+    msg = "Invalid nuisance learner g. Valid nuisance learner ml_M or ml_t or ml_m or ml_a."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.set_ml_nuisance_params("g", "d", {"alpha": 0.1})
+    # invalid treatment variable
+    msg = "Invalid treatment variable y. Valid treatment variable d."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.set_ml_nuisance_params("ml_M", "y", {"alpha": 0.1})
+
+
+class _DummyNoSetParams:
+    def fit(self):
+        pass
+
+
+class _DummyNoGetParams(_DummyNoSetParams):
+    def set_params(self):
+        pass
+
+
+class _DummyNoClassifier(_DummyNoGetParams):
+    def get_params(self):
+        pass
+
+    def predict_proba(self):
+        pass
+
+
+class LogisticRegressionManipulatedType(LogisticRegression):
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.estimator_type = None
+        return tags
+
+
+@pytest.mark.ci
+@pytest.mark.filterwarnings(
+    r"ignore:.*is \(probably\) neither a regressor nor a classifier.*:UserWarning",
+)
+def test_lplr_exception_learner():
+    err_msg_prefix = "Invalid learner provided for ml_t: "
+
+    msg = err_msg_prefix + "provide an instance of a learner instead of a class."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, Lasso, ml_m)
+    msg = err_msg_prefix + r"BaseEstimator\(\) has no method .fit\(\)."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, BaseEstimator(), ml_m)
+    msg = r"has no method .set_params\(\)."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, _DummyNoSetParams(), ml_m)
+    msg = r"has no method .get_params\(\)."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, _DummyNoGetParams(), ml_m)
+
+    # ml_m may not be a classifier when treatment is not binary
+    msg = (
+        r"The ml_m learner LogisticRegression\(\) was identified as classifier "
+        r"but at least one treatment variable is not binary with values 0 and 1\."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, LogisticRegression())
+
+    # construct a classifier which is not identifiable as classifier via is_classifier by sklearn
+    log_reg = LogisticRegressionManipulatedType()
+    # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0
+    log_reg._estimator_type = None
+    msg = (
+        r"Learner provided for ml_m is probably invalid: LogisticRegressionManipulatedType\(\) is \(probably\) "
+        r"no classifier\."
+    )
+    with pytest.warns(UserWarning, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, log_reg)
+
+
+@pytest.mark.ci
+@pytest.mark.filterwarnings(
+    r"ignore:.*is \(probably\) neither a regressor nor a classifier.*:UserWarning",
+    r"ignore: Learner provided for ml_m is probably invalid.*is \(probably\) no classifier.*:UserWarning",
+)
+def test_lplr_exception_and_warning_learner():
+    # invalid ml_M (must be a classifier with predict_proba)
+    with pytest.raises(TypeError):
+        _ = DoubleMLLPLR(dml_data, _DummyNoClassifier(), ml_t, ml_m)
+    msg = "Invalid learner provided for ml_M: " + r"Lasso\(\) has no method .predict_proba\(\)."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, Lasso(), ml_t, ml_m)
+
+
+class LassoWithNanPred(Lasso):
+    def predict(self, X):
+        preds = super().predict(X)
+        n_obs = len(preds)
+        preds[np.random.randint(0, n_obs, 1)] = np.nan
+        return preds
+
+
+class LassoWithInfPred(Lasso):
+    def predict(self, X):
+        preds = super().predict(X)
+        n_obs = len(preds)
+        preds[np.random.randint(0, n_obs, 1)] = np.inf
+        return preds
+
+
+@pytest.mark.ci
+def test_lplr_nan_prediction():
+    msg = r"Predictions from learner LassoWithNanPred\(\) for ml_t are not finite."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, LassoWithNanPred(), ml_m).fit()
+    msg = r"Predictions from learner LassoWithInfPred\(\) for ml_t are not finite."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, LassoWithInfPred(), ml_m).fit()
+
+
+@pytest.mark.ci
+def test_double_ml_exception_evaluate_learner():
+    dml_lplr_obj = DoubleMLLPLR(
+        dml_data,
+        ml_M=LogisticRegression(),
+        ml_t=Lasso(),
+        ml_m=RandomForestRegressor(),
+        n_folds=5,
+        score="nuisance_space",
+    )
+
+    msg = r"Apply fit\(\) before evaluate_learners\(\)."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_obj.evaluate_learners()
+
+    dml_lplr_obj.fit()
+
+    msg = "metric should be a callable. 'mse' was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr_obj.evaluate_learners(metric="mse")
+
+    msg = (
+        r"The learners have to be a subset of \['ml_M', 'ml_t', 'ml_m', 'ml_a'\]\. "
+        r"Learners \['ml_mu', 'ml_p'\] provided."
+    )
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_obj.evaluate_learners(learners=["ml_mu", "ml_p"])
+
+    def eval_fct(y_pred, y_true):
+        return np.nan
+
+    with pytest.raises(ValueError):
+        dml_lplr_obj.evaluate_learners(metric=eval_fct)
diff --git a/doubleml/plm/tests/test_lplr_tune.py b/doubleml/plm/tests/test_lplr_tune.py
new file mode 100644
index 000000000..0e0fa7bfd
--- /dev/null
+++ b/doubleml/plm/tests/test_lplr_tune.py
@@ -0,0 +1,227 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.base import clone
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LogisticRegression
+
+import doubleml as dml
+
+from ...tests._utils import draw_smpls
+from ._utils_lplr_manual import fit_selection, tune_nuisance_ssm_mar, tune_nuisance_ssm_nonignorable
+
+
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+def learner_g(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[LogisticRegression(random_state=42)])
+def learner_m(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["missing-at-random", "nonignorable"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def tune_on_folds(request):
+    return request.param
+
+
+def get_par_grid(learner):
+    if learner.__class__ in [RandomForestRegressor]:
+        par_grid = {"n_estimators": [5, 10, 20]}
+    else:
+        assert learner.__class__ in [LogisticRegression]
+        par_grid = {"C": np.logspace(-2, 2, 10)}
+    return par_grid
+
+
+@pytest.fixture(scope="module")
+def dml_ssm_fixture(
+    generate_data_selection_mar,
+    generate_data_selection_nonignorable,
+    learner_g,
+    learner_m,
+    score,
+    normalize_ipw,
+    tune_on_folds,
+):
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_pi": get_par_grid(learner_m), "ml_m": get_par_grid(learner_m)}
+    n_folds_tune = 4
+    n_folds = 2
+
+    # collect data
+    np.random.seed(42)
+    if score == "missing-at-random":
+        (x, y, d, z, s) = generate_data_selection_mar
+    else:
+        (x, y, d, z, s) = generate_data_selection_nonignorable
+
+    n_obs = len(y)
+    all_smpls = draw_smpls(n_obs, n_folds)
+
+    ml_g = clone(learner_g)
+    ml_pi = clone(learner_m)
+    ml_m = clone(learner_m)
+
+    np.random.seed(42)
+    if score == "missing-at-random":
+        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s)
+        dml_sel_obj = dml.DoubleMLSSM(
+            obj_dml_data,
+            ml_g,
+            ml_pi,
+            ml_m,
+            n_folds=n_folds,
+            score=score,
+            normalize_ipw=normalize_ipw,
+            draw_sample_splitting=False,
+        )
+    else:
+        assert score == "nonignorable"
+        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=z, s=s)
+        dml_sel_obj = dml.DoubleMLSSM(
+            obj_dml_data,
+            ml_g,
+            ml_pi,
+            ml_m,
+            n_folds=n_folds,
+            score=score,
+            normalize_ipw=normalize_ipw,
+            draw_sample_splitting=False,
+        )
+
+    # synchronize the sample splitting
+    np.random.seed(42)
+    dml_sel_obj.set_sample_splitting(all_smpls=all_smpls)
+
+    np.random.seed(42)
+    # tune hyperparameters
+    tune_res = dml_sel_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
+    assert isinstance(tune_res, dml.DoubleMLSSM)
+
+    dml_sel_obj.fit()
+
+    np.random.seed(42)
+    smpls = all_smpls[0]
+    if tune_on_folds:
+        if score == "missing-at-random":
+            g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm_mar(
+                y,
+                x,
+                d,
+                z,
+                s,
+                clone(learner_g),
+                clone(learner_m),
+                clone(learner_m),
+                smpls,
+                n_folds_tune,
+                par_grid["ml_g"],
+                par_grid["ml_pi"],
+                par_grid["ml_m"],
+            )
+        elif score == "nonignorable":
+            g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm_nonignorable(
+                y,
+                x,
+                d,
+                z,
+                s,
+                clone(learner_g),
+                clone(learner_m),
+                clone(learner_m),
+                smpls,
+                n_folds_tune,
+                par_grid["ml_g"],
+                par_grid["ml_pi"],
+                par_grid["ml_m"],
+            )
+
+    else:
+        xx = [(np.arange(len(y)), np.array([]))]
+        if score == "missing-at-random":
+            g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm_mar(
+                y,
+                x,
+                d,
+                z,
+                s,
+                clone(learner_g),
+                clone(learner_m),
+                clone(learner_m),
+                xx,
+                n_folds_tune,
+                par_grid["ml_g"],
+                par_grid["ml_pi"],
+                par_grid["ml_m"],
+            )
+        elif score == "nonignorable":
+            g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm_nonignorable(
+                y,
+                x,
+                d,
+                z,
+                s,
+                clone(learner_g),
+                clone(learner_m),
+                clone(learner_m),
+                xx,
+                n_folds_tune,
+                par_grid["ml_g"],
+                par_grid["ml_pi"],
+                par_grid["ml_m"],
+            )
+
+        g0_best_params = g0_best_params * n_folds
+        g1_best_params = g1_best_params * n_folds
+        pi_best_params = pi_best_params * n_folds
+        m_best_params = m_best_params * n_folds
+
+    np.random.seed(42)
+    res_manual = fit_selection(
+        y,
+        x,
+        d,
+        z,
+        s,
+        clone(learner_g),
+        clone(learner_m),
+        clone(learner_m),
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        g_d0_params=g0_best_params,
+        g_d1_params=g1_best_params,
+        pi_params=pi_best_params,
+        m_params=m_best_params,
+    )
+
+    res_dict = {
+        "coef": dml_sel_obj.coef[0],
+        "coef_manual": res_manual["theta"],
+        "se": dml_sel_obj.se[0],
+        "se_manual": res_manual["se"],
+    }
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_dml_ssm_coef(dml_ssm_fixture):
+    assert math.isclose(dml_ssm_fixture["coef"], dml_ssm_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_ssm_se(dml_ssm_fixture):
+    assert math.isclose(dml_ssm_fixture["se"], dml_ssm_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)

From dbfea737dc092c7f3c32531fdaf670b47892a5f6 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 27 Oct 2025 14:19:32 -0700
Subject: [PATCH 15/48] Clean-up of branch

---
 doubleml/datasets.py          | 1772 ---------------------------------
 doubleml/double_ml.py         |    6 -
 doubleml/double_ml_data.py    | 1104 --------------------
 doubleml/utils/_estimation.py |   16 -
 4 files changed, 2898 deletions(-)
 delete mode 100644 doubleml/datasets.py
 delete mode 100644 doubleml/double_ml_data.py

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
deleted file mode 100644
index 6d9acfc88..000000000
--- a/doubleml/datasets.py
+++ /dev/null
@@ -1,1772 +0,0 @@
-import pandas as pd
-import numpy as np
-import warnings
-
-from scipy.linalg import toeplitz
-from scipy.optimize import minimize_scalar
-from scipy.special import expit
-
-from sklearn.preprocessing import PolynomialFeatures, OneHotEncoder
-from sklearn.datasets import make_spd_matrix
-
-from .double_ml_data import DoubleMLData, DoubleMLClusterData
-
-_array_alias = ['array', 'np.ndarray', 'np.array', np.ndarray]
-_data_frame_alias = ['DataFrame', 'pd.DataFrame', pd.DataFrame]
-_dml_data_alias = ['DoubleMLData', DoubleMLData]
-_dml_cluster_data_alias = ['DoubleMLClusterData', DoubleMLClusterData]
-
-
-def fetch_401K(return_type='DoubleMLData', polynomial_features=False):
-    """
-    Data set on financial wealth and 401(k) plan participation.
-
-    Parameters
-    ----------
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-    polynomial_features :
-        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
-
-    References
-    ----------
-    Abadie, A. (2003), Semiparametric instrumental variable estimation of treatment response models. Journal of
-    Econometrics, 113(2): 231-263.
-
-    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
-    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
-    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
-    """
-    url = 'https://github.com/VC2015/DMLonGitHub/raw/master/sipp1991.dta'
-    raw_data = pd.read_stata(url)
-
-    y_col = 'net_tfa'
-    d_cols = ['e401']
-    x_cols = ['age', 'inc', 'educ', 'fsize', 'marr', 'twoearn', 'db', 'pira', 'hown']
-
-    data = raw_data.copy()
-
-    if polynomial_features:
-        raise NotImplementedError('polynomial_features os not implemented yet for fetch_401K.')
-
-    if return_type in _data_frame_alias + _dml_data_alias:
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, y_col, d_cols, x_cols)
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def fetch_bonus(return_type='DoubleMLData', polynomial_features=False):
-    """
-    Data set on the Pennsylvania Reemployment Bonus experiment.
-
-    Parameters
-    ----------
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-    polynomial_features :
-        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
-
-    References
-    ----------
-    Bilias Y. (2000), Sequential Testing of Duration Data: The Case of Pennsylvania 'Reemployment Bonus' Experiment.
-    Journal of Applied Econometrics, 15(6): 575-594.
-
-    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
-    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
-    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
-    """
-    url = 'https://raw.githubusercontent.com/VC2015/DMLonGitHub/master/penn_jae.dat'
-    raw_data = pd.read_csv(url, sep='\s+')
-
-    ind = (raw_data['tg'] == 0) | (raw_data['tg'] == 4)
-    data = raw_data.copy()[ind]
-    data.reset_index(inplace=True)
-    data['tg'] = data['tg'].replace(4, 1)
-    data['inuidur1'] = np.log(data['inuidur1'])
-
-    # variable dep as factor (dummy encoding)
-    dummy_enc = OneHotEncoder(drop='first', categories='auto').fit(data.loc[:, ['dep']])
-    xx = dummy_enc.transform(data.loc[:, ['dep']]).toarray()
-    data['dep1'] = xx[:, 0]
-    data['dep2'] = xx[:, 1]
-
-    y_col = 'inuidur1'
-    d_cols = ['tg']
-    x_cols = ['female', 'black', 'othrace',
-              'dep1', 'dep2',
-              'q2', 'q3', 'q4', 'q5', 'q6',
-              'agelt35', 'agegt54', 'durable', 'lusd', 'husd']
-
-    if polynomial_features:
-        poly = PolynomialFeatures(2, include_bias=False)
-        data_transf = poly.fit_transform(data[x_cols])
-        x_cols = list(poly.get_feature_names_out(x_cols))
-
-        data_transf = pd.DataFrame(data_transf, columns=x_cols)
-        data = pd.concat((data[[y_col] + d_cols], data_transf),
-                         axis=1, sort=False)
-
-    if return_type in _data_frame_alias + _dml_data_alias:
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, y_col, d_cols, x_cols)
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def _g(x):
-    return np.power(np.sin(x), 2)
-
-
-def _m(x, nu=0., gamma=1.):
-    return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
-
-
-def make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', **kwargs):
-    """
-    Generates data from a partially linear regression model used in Chernozhukov et al. (2018) for Figure 1.
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= m_0(x_i) + s_1 v_i, & &v_i \\sim \\mathcal{N}(0,1),
-
-        y_i &= \\alpha d_i + g_0(x_i) + s_2 \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
-
-
-    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.7^{|j-k|}`.
-    The nuisance functions are given by
-
-    .. math::
-
-        m_0(x_i) &= a_0 x_{i,1} + a_1 \\frac{\\exp(x_{i,3})}{1+\\exp(x_{i,3})},
-
-        g_0(x_i) &= b_0 \\frac{\\exp(x_{i,1})}{1+\\exp(x_{i,1})} + b_1 x_{i,3}.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    alpha :
-        The value of the causal parameter.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameters
-        :math:`a_0=1`, :math:`a_1=0.25`, :math:`s_1=1`, :math:`b_0=1`, :math:`b_1=0.25` or :math:`s_2=1`.
-
-    References
-    ----------
-    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
-    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
-    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
-    """
-    a_0 = kwargs.get('a_0', 1.)
-    a_1 = kwargs.get('a_1', 0.25)
-    s_1 = kwargs.get('s_1', 1.)
-
-    b_0 = kwargs.get('b_0', 1.)
-    b_1 = kwargs.get('b_1', 0.25)
-    s_2 = kwargs.get('s_2', 1.)
-
-    cov_mat = toeplitz([np.power(0.7, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
-
-    d = a_0 * x[:, 0] + a_1 * np.divide(np.exp(x[:, 2]), 1 + np.exp(x[:, 2])) \
-        + s_1 * np.random.standard_normal(size=[n_obs, ])
-    y = alpha * d + b_0 * np.divide(np.exp(x[:, 0]), 1 + np.exp(x[:, 0])) \
-        + b_1 * x[:, 2] + s_2 * np.random.standard_normal(size=[n_obs, ])
-
-    if return_type in _array_alias:
-        return x, y, d
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)),
-                            columns=x_cols + ['y', 'd'])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, 'y', 'd', x_cols)
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def make_plr_turrell2018(n_obs=100, dim_x=20, theta=0.5, return_type='DoubleMLData', **kwargs):
-    """
-    Generates data from a partially linear regression model used in a blog article by Turrell (2018).
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= m_0(x_i' b) + v_i, & &v_i \\sim \\mathcal{N}(0,1),
-
-        y_i &= \\theta d_i + g_0(x_i' b) + u_i, & &u_i \\sim \\mathcal{N}(0,1),
-
-
-    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a random symmetric,
-    positive-definite matrix generated with :py:meth:`sklearn.datasets.make_spd_matrix`.
-    :math:`b` is a vector with entries :math:`b_j=\\frac{1}{j}` and the nuisance functions are given by
-
-    .. math::
-
-        m_0(x_i) &= \\frac{1}{2 \\pi} \\frac{\\sinh(\\gamma)}{\\cosh(\\gamma) - \\cos(x_i-\\nu)},
-
-        g_0(x_i) &= \\sin(x_i)^2.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameters
-        :math:`\\nu=0`, or :math:`\\gamma=1`.
-
-    References
-    ----------
-    Turrell, A. (2018), Econometrics in Python part I - Double machine learning, Markov Wanderer: A blog on economics,
-    science, coding and data. `https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/
-    <https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/>`_.
-    """
-    nu = kwargs.get('nu', 0.)
-    gamma = kwargs.get('gamma', 1.)
-
-    b = [1 / k for k in range(1, dim_x + 1)]
-    sigma = make_spd_matrix(dim_x)
-
-    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=[n_obs, ])
-    G = _g(np.dot(x, b))
-    M = _m(np.dot(x, b), nu=nu, gamma=gamma)
-    d = M + np.random.standard_normal(size=[n_obs, ])
-    y = np.dot(theta, d) + G + np.random.standard_normal(size=[n_obs, ])
-
-    if return_type in _array_alias:
-        return x, y, d
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)),
-                            columns=x_cols + ['y', 'd'])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, 'y', 'd', x_cols)
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def make_irm_data(n_obs=500, dim_x=20, theta=0, R2_d=0.5, R2_y=0.5, return_type='DoubleMLData'):
-    """
-    Generates data from a interactive regression (IRM) model.
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= 1\\left\\lbrace \\frac{\\exp(c_d x_i' \\beta)}{1+\\exp(c_d x_i' \\beta)} > v_i \\right\\rbrace, & &v_i
-        \\sim \\mathcal{U}(0,1),
-
-        y_i &= \\theta d_i + c_y x_i' \\beta d_i + \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
-
-    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
-    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}` and the constants :math:`c_y` and
-    :math:`c_d` are given by
-
-    .. math::
-
-        c_y = \\sqrt{\\frac{R_y^2}{(1-R_y^2) \\beta' \\Sigma \\beta}}, \\qquad c_d =
-        \\sqrt{\\frac{(\\pi^2 /3) R_d^2}{(1-R_d^2) \\beta' \\Sigma \\beta}}.
-
-    The data generating process is inspired by a process used in the simulation experiment (see Appendix P) of Belloni
-    et al. (2017).
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    R2_d :
-        The value of the parameter :math:`R_d^2`.
-    R2_y :
-        The value of the parameter :math:`R_y^2`.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
-
-    References
-    ----------
-    Belloni, A., Chernozhukov, V., Fernández‐Val, I. and Hansen, C. (2017). Program Evaluation and Causal Inference With
-    High‐Dimensional Data. Econometrica, 85: 233-298.
-    """
-    # inspired by https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA12723, see suplement
-    v = np.random.uniform(size=[n_obs, ])
-    zeta = np.random.standard_normal(size=[n_obs, ])
-
-    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
-
-    beta = [1 / (k ** 2) for k in range(1, dim_x + 1)]
-    b_sigma_b = np.dot(np.dot(cov_mat, beta), beta)
-    c_y = np.sqrt(R2_y / ((1 - R2_y) * b_sigma_b))
-    c_d = np.sqrt(np.pi ** 2 / 3. * R2_d / ((1 - R2_d) * b_sigma_b))
-
-    xx = np.exp(np.dot(x, np.multiply(beta, c_d)))
-    d = 1. * ((xx / (1 + xx)) > v)
-
-    y = d * theta + d * np.dot(x, np.multiply(beta, c_y)) + zeta
-
-    if return_type in _array_alias:
-        return x, y, d
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)),
-                            columns=x_cols + ['y', 'd'])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, 'y', 'd', x_cols)
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def make_iivm_data(n_obs=500, dim_x=20, theta=1., alpha_x=0.2, return_type='DoubleMLData'):
-    """
-    Generates data from a interactive IV regression (IIVM) model.
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= 1\\left\\lbrace \\alpha_x Z + v_i > 0 \\right\\rbrace,
-
-        y_i &= \\theta d_i + x_i' \\beta + u_i,
-
-    with :math:`Z \\sim \\text{Bernoulli}(0.5)` and
-
-    .. math::
-
-        \\left(\\begin{matrix} u_i \\\\ v_i \\end{matrix} \\right) \\sim
-        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.3 \\\\ 0.3 & 1 \\end{matrix} \\right) \\right).
-
-    The covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`\\beta` is a `dim_x`-vector with entries
-    :math:`\\beta_j=\\frac{1}{j^2}`.
-
-    The data generating process is inspired by a process used in the simulation experiment of Farbmacher, Gruber and
-    Klaassen (2020).
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    alpha_x :
-        The value of the parameter :math:`\\alpha_x`.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
-
-    References
-    ----------
-    Farbmacher, H., Guber, R. and Klaaßen, S. (2020). Instrument Validity Tests with Causal Forests. MEA Discussion
-    Paper No. 13-2020. Available at SSRN: http://dx.doi.org/10.2139/ssrn.3619201.
-    """
-    # inspired by https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3619201
-    xx = np.random.multivariate_normal(np.zeros(2),
-                                       np.array([[1., 0.3], [0.3, 1.]]),
-                                       size=[n_obs, ])
-    u = xx[:, 0]
-    v = xx[:, 1]
-
-    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
-
-    beta = [1 / (k ** 2) for k in range(1, dim_x + 1)]
-
-    z = np.random.binomial(p=0.5, n=1, size=[n_obs, ])
-    d = 1. * (alpha_x * z + v > 0)
-
-    y = d * theta + np.dot(x, beta) + u
-
-    if return_type in _array_alias:
-        return x, y, d, z
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)),
-                            columns=x_cols + ['y', 'd', 'z'])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, 'y', 'd', x_cols, 'z')
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def _make_pliv_data(n_obs=100, dim_x=20, theta=0.5, gamma_z=0.4, return_type='DoubleMLData'):
-    b = [1 / k for k in range(1, dim_x + 1)]
-    sigma = make_spd_matrix(dim_x)
-
-    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=[n_obs, ])
-    G = _g(np.dot(x, b))
-    # instrument
-    z = _m(np.dot(x, b)) + np.random.standard_normal(size=[n_obs, ])
-    # treatment
-    M = _m(gamma_z * z + np.dot(x, b))
-    d = M + np.random.standard_normal(size=[n_obs, ])
-    y = np.dot(theta, d) + G + np.random.standard_normal(size=[n_obs, ])
-
-    if return_type in _array_alias:
-        return x, y, d, z
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)),
-                            columns=x_cols + ['y', 'd', 'z'])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, 'y', 'd', x_cols, 'z')
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def make_pliv_CHS2015(n_obs, alpha=1., dim_x=200, dim_z=150, return_type='DoubleMLData'):
-    """
-    Generates data from a partially linear IV regression model used in Chernozhukov, Hansen and Spindler (2015).
-    The data generating process is defined as
-
-    .. math::
-
-        z_i &= \\Pi x_i + \\zeta_i,
-
-        d_i &= x_i' \\gamma + z_i' \\delta + u_i,
-
-        y_i &= \\alpha d_i + x_i' \\beta + \\varepsilon_i,
-
-    with
-
-    .. math::
-
-        \\left(\\begin{matrix} \\varepsilon_i \\\\ u_i \\\\ \\zeta_i \\\\ x_i \\end{matrix} \\right) \\sim
-        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.6 & 0 & 0 \\\\ 0.6 & 1 & 0 & 0 \\\\
-        0 & 0 & 0.25 I_{p_n^z} & 0 \\\\ 0 & 0 & 0 & \\Sigma \\end{matrix} \\right) \\right)
-
-    where  :math:`\\Sigma` is a :math:`p_n^x \\times p_n^x` matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`I_{p_n^z}` is the :math:`p_n^z \\times p_n^z` identity matrix.
-    :math:`\\beta = \\gamma` is a :math:`p_n^x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}`,
-    :math:`\\delta` is a :math:`p_n^z`-vector with entries :math:`\\delta_j=\\frac{1}{j^2}`
-    and :math:`\\Pi = (I_{p_n^z}, 0_{p_n^z \\times (p_n^x - p_n^z)})`.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    alpha :
-        The value of the causal parameter.
-    dim_x :
-        The number of covariates.
-    dim_z :
-        The number of instruments.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
-
-    References
-    ----------
-    Chernozhukov, V., Hansen, C. and Spindler, M. (2015), Post-Selection and Post-Regularization Inference in Linear
-    Models with Many Controls and Instruments. American Economic Review: Papers and Proceedings, 105 (5): 486-90.
-    """
-    assert dim_x >= dim_z
-    # see https://assets.aeaweb.org/asset-server/articles-attachments/aer/app/10505/P2015_1022_app.pdf
-    xx = np.random.multivariate_normal(np.zeros(2),
-                                       np.array([[1., 0.6], [0.6, 1.]]),
-                                       size=[n_obs, ])
-    epsilon = xx[:, 0]
-    u = xx[:, 1]
-
-    sigma = toeplitz([np.power(0.5, k) for k in range(0, dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x),
-                                      sigma,
-                                      size=[n_obs, ])
-
-    I_z = np.eye(dim_z)
-    xi = np.random.multivariate_normal(np.zeros(dim_z),
-                                       0.25 * I_z,
-                                       size=[n_obs, ])
-
-    beta = [1 / (k ** 2) for k in range(1, dim_x + 1)]
-    gamma = beta
-    delta = [1 / (k ** 2) for k in range(1, dim_z + 1)]
-    Pi = np.hstack((I_z, np.zeros((dim_z, dim_x - dim_z))))
-
-    z = np.dot(x, np.transpose(Pi)) + xi
-    d = np.dot(x, gamma) + np.dot(z, delta) + u
-    y = alpha * d + np.dot(x, beta) + epsilon
-
-    if return_type in _array_alias:
-        return x, y, d, z
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        z_cols = [f'Z{i + 1}' for i in np.arange(dim_z)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)),
-                            columns=x_cols + ['y', 'd'] + z_cols)
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, 'y', 'd', x_cols, z_cols)
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1., return_type='DoubleMLClusterData', **kwargs):
-    """
-    Generates data from a partially linear IV regression model with multiway cluster sample used in Chiang et al.
-    (2021). The data generating process is defined as
-
-    .. math::
-
-        Z_{ij} &= X_{ij}' \\xi_0 + V_{ij},
-
-        D_{ij} &= Z_{ij}' \\pi_{10} + X_{ij}' \\pi_{20} + v_{ij},
-
-        Y_{ij} &= D_{ij} \\theta + X_{ij}' \\zeta_0 + \\varepsilon_{ij},
-
-    with
-
-    .. math::
-
-        X_{ij} &= (1 - \\omega_1^X - \\omega_2^X) \\alpha_{ij}^X
-        + \\omega_1^X \\alpha_{i}^X + \\omega_2^X \\alpha_{j}^X,
-
-        \\varepsilon_{ij} &= (1 - \\omega_1^\\varepsilon - \\omega_2^\\varepsilon) \\alpha_{ij}^\\varepsilon
-        + \\omega_1^\\varepsilon \\alpha_{i}^\\varepsilon + \\omega_2^\\varepsilon \\alpha_{j}^\\varepsilon,
-
-        v_{ij} &= (1 - \\omega_1^v - \\omega_2^v) \\alpha_{ij}^v
-        + \\omega_1^v \\alpha_{i}^v + \\omega_2^v \\alpha_{j}^v,
-
-        V_{ij} &= (1 - \\omega_1^V - \\omega_2^V) \\alpha_{ij}^V
-        + \\omega_1^V \\alpha_{i}^V + \\omega_2^V \\alpha_{j}^V,
-
-    and :math:`\\alpha_{ij}^X, \\alpha_{i}^X, \\alpha_{j}^X \\sim \\mathcal{N}(0, \\Sigma)`
-    where  :math:`\\Sigma` is a :math:`p_x \\times p_x` matrix with entries
-    :math:`\\Sigma_{kj} = s_X^{|j-k|}`.
-    Further
-
-    .. math::
-
-        \\left(\\begin{matrix} \\alpha_{ij}^\\varepsilon \\\\ \\alpha_{ij}^v \\end{matrix}\\right),
-        \\left(\\begin{matrix} \\alpha_{i}^\\varepsilon \\\\ \\alpha_{i}^v \\end{matrix}\\right),
-        \\left(\\begin{matrix} \\alpha_{j}^\\varepsilon \\\\ \\alpha_{j}^v \\end{matrix}\\right)
-        \\sim \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & s_{\\varepsilon v} \\\\
-        s_{\\varepsilon v} & 1 \\end{matrix} \\right) \\right)
-
-
-    and :math:`\\alpha_{ij}^V, \\alpha_{i}^V, \\alpha_{j}^V \\sim \\mathcal{N}(0, 1)`.
-
-    Parameters
-    ----------
-    N :
-        The number of observations (first dimension).
-    M :
-        The number of observations (second dimension).
-    dim_X :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    return_type :
-        If ``'DoubleMLClusterData'`` or ``DoubleMLClusterData``, returns a ``DoubleMLClusterData`` object where
-        ``DoubleMLClusterData.data`` is a ``pd.DataFrame``.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s
-        ``(x, y, d, cluster_vars, z)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameters
-        :math:`\\pi_{10}=1.0`, :math:`\\omega_X = \\omega_{\\varepsilon} = \\omega_V = \\omega_v = (0.25, 0.25)`,
-        :math:`s_X = s_{\\varepsilon v} = 0.25`,
-        or the :math:`p_x`-vectors :math:`\\zeta_0 = \\pi_{20} = \\xi_0` with default entries
-        :math:`(\\zeta_{0})_j = 0.5^j`.
-
-    References
-    ----------
-    Chiang, H. D., Kato K., Ma, Y. and Sasaki, Y. (2021), Multiway Cluster Robust Double/Debiased Machine Learning,
-    Journal of Business & Economic Statistics,
-    doi: `10.1080/07350015.2021.1895815 <https://doi.org/10.1080/07350015.2021.1895815>`_,
-    arXiv:`1909.03489 <https://arxiv.org/abs/1909.03489>`_.
-    """
-    # additional parameters specifiable via kwargs
-    pi_10 = kwargs.get('pi_10', 1.0)
-
-    xx = np.arange(1, dim_X + 1)
-    zeta_0 = kwargs.get('zeta_0', np.power(0.5, xx))
-    pi_20 = kwargs.get('pi_20', np.power(0.5, xx))
-    xi_0 = kwargs.get('xi_0', np.power(0.5, xx))
-
-    omega_X = kwargs.get('omega_X', np.array([0.25, 0.25]))
-    omega_epsilon = kwargs.get('omega_epsilon', np.array([0.25, 0.25]))
-    omega_v = kwargs.get('omega_v', np.array([0.25, 0.25]))
-    omega_V = kwargs.get('omega_V', np.array([0.25, 0.25]))
-
-    s_X = kwargs.get('s_X', 0.25)
-    s_epsilon_v = kwargs.get('s_epsilon_v', 0.25)
-
-    # use np.tile() and np.repeat() for repeating vectors in different styles, i.e.,
-    # np.tile([v1, v2, v3], 2) [v1, v2, v3, v1, v2, v3]
-    # np.repeat([v1, v2, v3], 2) [v1, v1, v2, v2, v3, v3]
-
-    alpha_V = np.random.normal(size=(N * M))
-    alpha_V_i = np.repeat(np.random.normal(size=N), M)
-    alpha_V_j = np.tile(np.random.normal(size=M), N)
-
-    cov_mat = np.array([[1, s_epsilon_v], [s_epsilon_v, 1]])
-    alpha_eps_v = np.random.multivariate_normal(np.zeros(2), cov_mat, size=[N * M, ])
-    alpha_eps = alpha_eps_v[:, 0]
-    alpha_v = alpha_eps_v[:, 1]
-
-    alpha_eps_v_i = np.random.multivariate_normal(np.zeros(2), cov_mat, size=[N, ])
-    alpha_eps_i = np.repeat(alpha_eps_v_i[:, 0], M)
-    alpha_v_i = np.repeat(alpha_eps_v_i[:, 1], M)
-
-    alpha_eps_v_j = np.random.multivariate_normal(np.zeros(2), cov_mat, size=[M, ])
-    alpha_eps_j = np.tile(alpha_eps_v_j[:, 0], N)
-    alpha_v_j = np.tile(alpha_eps_v_j[:, 1], N)
-
-    cov_mat = toeplitz([np.power(s_X, k) for k in range(dim_X)])
-    alpha_X = np.random.multivariate_normal(np.zeros(dim_X), cov_mat, size=[N * M, ])
-    alpha_X_i = np.repeat(np.random.multivariate_normal(np.zeros(dim_X), cov_mat, size=[N, ]),
-                          M, axis=0)
-    alpha_X_j = np.tile(np.random.multivariate_normal(np.zeros(dim_X), cov_mat, size=[M, ]),
-                        (N, 1))
-
-    # generate variables
-    x = (1 - omega_X[0] - omega_X[1]) * alpha_X \
-        + omega_X[0] * alpha_X_i + omega_X[1] * alpha_X_j
-
-    eps = (1 - omega_epsilon[0] - omega_epsilon[1]) * alpha_eps \
-          + omega_epsilon[0] * alpha_eps_i + omega_epsilon[1] * alpha_eps_j
-
-    v = (1 - omega_v[0] - omega_v[1]) * alpha_v \
-        + omega_v[0] * alpha_v_i + omega_v[1] * alpha_v_j
-
-    V = (1 - omega_V[0] - omega_V[1]) * alpha_V \
-        + omega_V[0] * alpha_V_i + omega_V[1] * alpha_V_j
-
-    z = np.matmul(x, xi_0) + V
-    d = z * pi_10 + np.matmul(x, pi_20) + v
-    y = d * theta + np.matmul(x, zeta_0) + eps
-
-    cluster_cols = ['cluster_var_i', 'cluster_var_j']
-    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)
-
-    if return_type in _array_alias:
-        return x, y, d, cluster_vars.values, z
-    elif return_type in _data_frame_alias + _dml_cluster_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_X)]
-        data = pd.concat((cluster_vars,
-                          pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ['Y', 'D', 'Z'])),
-                         axis=1)
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLClusterData(data, 'Y', 'D', cluster_cols, x_cols, 'Z')
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_type='DoubleMLData', **kwargs):
-    """
-    Generates data from a difference-in-differences model used in Sant'Anna and Zhao (2020).
-    The data generating process is defined as follows. For a generic :math:`W=(W_1, W_2, W_3, W_4)^T`, let
-
-    .. math::
-
-        f_{reg}(W) &= 210 + 27.4 \\cdot W_1 +13.7 \\cdot (W_2 + W_3 + W_4),
-
-        f_{ps}(W) &= 0.75 \\cdot (-W_1 + 0.5 \\cdot W_2 -0.25 \\cdot W_3 - 0.1 \\cdot W_4).
-
-
-    Let :math:`X= (X_1, X_2, X_3, X_4)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is  :math:`c = 0`, corresponding to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where :math:`\\tilde{Z}_1 = \\exp(0.5 \\cdot X_1)`, :math:`\\tilde{Z}_2 = 10 + X_2/(1 + \\exp(X_1))`,
-    :math:`\\tilde{Z}_3 = (0.6 + X_1 \\cdot X_3 / 25)^3` and :math:`\\tilde{Z}_4 = (20 + X_2 + X_4)^2`.
-    At first define
-
-    .. math::
-
-        Y_0(0) &= f_{reg}(W_{reg}) + \\nu(W_{reg}, D) + \\varepsilon_0,
-
-        Y_1(d) &= 2 \\cdot f_{reg}(W_{reg}) + \\nu(W_{reg}, D) + \\varepsilon_1(d),
-
-        p(W_{ps}) &= \\frac{\\exp(f_{ps}(W_{ps}))}{1 + \\exp(f_{ps}(W_{ps}))},
-
-        D &= 1\\{p(W_{ps}) \\ge U\\},
-
-    where :math:`\\varepsilon_0, \\varepsilon_1(d), d=0, 1` are independent standard normal random variables,
-    :math:`U \\sim \\mathcal{U}[0, 1]` is a independent standard uniform
-    and :math:`\\nu(W_{reg}, D)\\sim \\mathcal{N}(D \\cdot f_{reg}(W_{reg}),1)`.
-    The different data generating processes are defined via
-
-    .. math::
-
-        DGP1:\\quad W_{reg} &= Z \\quad W_{ps} = Z
-
-        DGP2:\\quad W_{reg} &= Z \\quad W_{ps} = X
-
-        DGP3:\\quad W_{reg} &= X \\quad W_{ps} = Z
-
-        DGP4:\\quad W_{reg} &= X \\quad W_{ps} = X
-
-        DGP5:\\quad W_{reg} &= Z \\quad W_{ps} = 0
-
-        DGP6:\\quad W_{reg} &= X \\quad W_{ps} = 0,
-
-    such that the last two settings correspond to an experimental setting with treatment probability
-    of :math:`P(D=1) = \\frac{1}{2}.`
-    For the panel data the outcome is already defined as the difference :math:`Y = Y_1(D) - Y_0(0)`.
-    For cross-sectional data the flag ``cross_sectional_data`` has to be set to ``True``.
-    Then the outcome will be defined to be
-
-    .. math::
-
-        Y = T \\cdot Y_1(D) + (1-T) \\cdot Y_0(0),
-
-    where :math:`T = 1\\{U_T\\le \\lambda_T \\}` with :math:`U_T\\sim \\mathcal{U}[0, 1]` and :math:`\\lambda_T=0.5`.
-    The true average treatment effect on the treated is zero for all data generating processes.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dgp_type :
-        The DGP to be used. Default value is ``1`` (integer).
-    cross_sectional_data :
-        Indicates whether the setting is uses cross-sectional or panel data. Default value is ``False``.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``
-        or ``(x, y, d, t)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameter
-        :math:`xi=0.75`, :math:`c=0.0` and :math:`\\lambda_T=0.5`.
-
-    References
-    ----------
-    Sant’Anna, P. H. and Zhao, J. (2020),
-    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
-    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
-    """
-    xi = kwargs.get('xi', 0.75)
-    c = kwargs.get('c', 0.0)
-    lambda_t = kwargs.get('lambda_t', 0.5)
-
-    def f_reg(w):
-        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
-        return res
-
-    def f_ps(w, xi):
-        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
-        return res
-
-    dim_x = 4
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
-
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-
-    # error terms
-    epsilon_0 = np.random.normal(loc=0, scale=1, size=n_obs)
-    epsilon_1 = np.random.normal(loc=0, scale=1, size=[n_obs, 2])
-
-    if dgp_type == 1:
-        features_ps = z
-        features_reg = z
-    elif dgp_type == 2:
-        features_ps = x
-        features_reg = z
-    elif dgp_type == 3:
-        features_ps = z
-        features_reg = x
-    elif dgp_type == 4:
-        features_ps = x
-        features_reg = x
-    elif dgp_type == 5:
-        features_ps = None
-        features_reg = z
-    elif dgp_type == 6:
-        features_ps = None
-        features_reg = x
-    else:
-        raise ValueError('The dgp_type is not valid.')
-
-    # treatment and propensities
-    is_experimental = (dgp_type == 5) or (dgp_type == 6)
-    if is_experimental:
-        # Set D to be experimental
-        p = 0.5 * np.ones(n_obs)
-    else:
-        p = np.exp(f_ps(features_ps, xi)) / (1 + np.exp(f_ps(features_ps, xi)))
-    u = np.random.uniform(low=0, high=1, size=n_obs)
-    d = 1.0 * (p >= u)
-
-    # potential outcomes
-    nu = np.random.normal(loc=d * f_reg(features_reg), scale=1, size=n_obs)
-    y0 = f_reg(features_reg) + nu + epsilon_0
-    y1_d0 = 2 * f_reg(features_reg) + nu + epsilon_1[:, 0]
-    y1_d1 = 2 * f_reg(features_reg) + nu + epsilon_1[:, 1]
-    y1 = d * y1_d1 + (1 - d) * y1_d0
-
-    if not cross_sectional_data:
-        y = y1 - y0
-
-        if return_type in _array_alias:
-            return z, y, d
-        elif return_type in _data_frame_alias + _dml_data_alias:
-            z_cols = [f'Z{i + 1}' for i in np.arange(dim_x)]
-            data = pd.DataFrame(np.column_stack((z, y, d)),
-                                columns=z_cols + ['y', 'd'])
-            if return_type in _data_frame_alias:
-                return data
-            else:
-                return DoubleMLData(data, 'y', 'd', z_cols)
-        else:
-            raise ValueError('Invalid return_type.')
-
-    else:
-        u_t = np.random.uniform(low=0, high=1, size=n_obs)
-        t = 1.0 * (u_t <= lambda_t)
-        y = t * y1 + (1 - t) * y0
-
-        if return_type in _array_alias:
-            return z, y, d, t
-        elif return_type in _data_frame_alias + _dml_data_alias:
-            z_cols = [f'Z{i + 1}' for i in np.arange(dim_x)]
-            data = pd.DataFrame(np.column_stack((z, y, d, t)),
-                                columns=z_cols + ['y', 'd', 't'])
-            if return_type in _data_frame_alias:
-                return data
-            else:
-                return DoubleMLData(data, 'y', 'd', z_cols, t_col='t')
-        else:
-            raise ValueError('Invalid return_type.')
-
-
-def make_confounded_irm_data(n_obs=500, theta=0.0, gamma_a=0.127, beta_a=0.58, linear=False, **kwargs):
-    """
-    Generates counfounded data from an interactive regression model.
-
-    The data generating process is defined as follows (inspired by the Monte Carlo simulation used
-    in Sant'Anna and Zhao (2020)).
-
-    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
-    to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where
-
-    .. math::
-
-        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
-
-        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
-
-        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
-
-        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
-
-        \\tilde{Z}_5 &= X_5.
-
-    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
-    At first, define the propensity score as
-
-    .. math::
-
-        m(X, A) = P(D=1|X,A) = p(Z) + \\gamma_A \\cdot A
-
-    where
-
-    .. math::
-
-        p(Z) &= \\frac{\\exp(f_{ps}(Z))}{1 + \\exp(f_{ps}(Z))},
-
-        f_{ps}(Z) &= 0.75 \\cdot (-Z_1 + 0.1 \\cdot Z_2 -0.25 \\cdot Z_3 - 0.1 \\cdot Z_4).
-
-    and generate the treatment :math:`D = 1\\{m(X, A) \\ge U\\}` with :math:`U \\sim \\mathcal{U}[0, 1]`.
-    Since :math:`A` is independent of :math:`X`, the short form of the propensity score is given as
-
-    .. math::
-
-        P(D=1|X) = p(Z).
-
-    Further, generate the outcome of interest :math:`Y` as
-
-    .. math::
-
-        Y &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A + \\varepsilon
-
-        g(Z) &= 2.5 + 0.74 \\cdot Z_1 + 0.25 \\cdot Z_2 + 0.137 \\cdot (Z_3 + Z_4)
-
-    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
-    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
-    the conditional expectation take the following forms
-
-    .. math::
-
-        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A
-
-        \\mathbb{E}[Y|D, X] &= (\\theta + \\beta_A \\frac{\\mathrm{Cov}(A, D(Z_5 + 1))}{\\mathrm{Var}(D(Z_5 + 1))})
-            \\cdot D (Z_5 + 1) + g(Z).
-
-    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`, which can be
-    set via the parameters ``gamma_a`` and ``beta_a``.
-
-    The observed data is given as :math:`W = (Y, D, Z)`.
-    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`,
-    the potential outcomes of :math:`Y`, the long and short forms of the main regression and the propensity score and
-    in sample versions of the confounding parameters :math:`cf_d` and :math:`cf_y` (for ATE and ATTE)
-    are returned in a dictionary.
-
-    Parameters
-    ----------
-    n_obs : int
-        The number of observations to simulate.
-        Default is ``500``.
-    theta : float or int
-        Average treatment effect.
-        Default is ``0.0``.
-    gamma_a : float
-        Coefficient of the unobserved confounder in the propensity score.
-        Default is ``0.127``.
-    beta_a : float
-        Coefficient of the unobserved confounder in the outcome regression.
-        Default is ``0.58``.
-    linear : bool
-        If ``True``, the Z will be set to X, such that the underlying (short) models are linear/logistic.
-        Default is ``False``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
-
-    References
-    ----------
-    Sant’Anna, P. H. and Zhao, J. (2020),
-    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
-    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
-    """
-    c = 0.0  # the confounding strength is only valid for c=0
-    xi = 0.75
-    dim_x = kwargs.get('dim_x', 5)
-    trimming_threshold = kwargs.get('trimming_threshold', 0.01)
-    var_eps_y = kwargs.get('var_eps_y', 1.0)
-
-    # Specification of main regression function
-    def f_reg(w):
-        res = 2.5 + 0.74 * w[:, 0] + 0.25 * w[:, 1] + 0.137 * (w[:, 2] + w[:, 3])
-        return res
-
-    # Specification of prop score function
-    def f_ps(w, xi):
-        res = xi * (-w[:, 0] + 0.1 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
-        return res
-
-    # observed covariates
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-    z_tilde_5 = x[:, 4]
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, z_tilde_5))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-    # error terms and unobserved confounder
-    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
-    # unobserved confounder
-    a_bounds = (-1, 1)
-    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
-    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
-
-    # Choose the features used in the models
-    if linear:
-        features_ps = x
-        features_reg = x
-    else:
-        features_ps = z
-        features_reg = z
-
-    p = np.exp(f_ps(features_ps, xi)) / (1 + np.exp(f_ps(features_ps, xi)))
-    # compute short and long form of propensity score
-    m_long = p + gamma_a * a
-    m_short = p
-    # check propensity score bounds
-    if np.any(m_long < trimming_threshold) or np.any(m_long > 1.0 - trimming_threshold):
-        m_long = np.clip(m_long, trimming_threshold, 1.0 - trimming_threshold)
-        m_short = np.clip(m_short, trimming_threshold, 1.0 - trimming_threshold)
-        warnings.warn(f'Propensity score is close to 0 or 1. '
-                      f'Trimming is at {trimming_threshold} and {1.0 - trimming_threshold} is applied')
-    # generate treatment based on long form
-    u = np.random.uniform(low=0, high=1, size=n_obs)
-    d = 1.0 * (m_long >= u)
-    # add treatment heterogeneity
-    d1x = z[:, 4] + 1
-    var_dx = np.var(d * (d1x))
-    cov_adx = gamma_a * var_a
-    # Outcome regression
-    g_partial_reg = f_reg(features_reg)
-    # short model
-    g_short_d0 = g_partial_reg
-    g_short_d1 = (theta + beta_a * cov_adx / var_dx) * d1x + g_partial_reg
-    g_short = d * g_short_d1 + (1.0 - d) * g_short_d0
-    # long model
-    g_long_d0 = g_partial_reg + beta_a * a
-    g_long_d1 = theta * d1x + g_partial_reg + beta_a * a
-    g_long = d * g_long_d1 + (1.0 - d) * g_long_d0
-    # Potential outcomes
-    y_0 = g_long_d0 + eps_y
-    y_1 = g_long_d1 + eps_y
-    # Realized outcome
-    y = d * y_1 + (1.0 - d) * y_0
-    # In-sample values for confounding strength
-    explained_residual_variance = np.square(g_long - g_short)
-    residual_variance = np.square(y - g_short)
-    cf_y = np.mean(explained_residual_variance) / np.mean(residual_variance)
-    # compute the Riesz representation
-    treated_weight = d / np.mean(d)
-    untreated_weight = (1.0 - d) / np.mean(d)
-    # Odds ratios
-    propensity_ratio_long = m_long / (1.0 - m_long)
-    rr_long_ate = d / m_long - (1.0 - d) / (1.0 - m_long)
-    rr_long_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_long)
-    propensity_ratio_short = m_short / (1.0 - m_short)
-    rr_short_ate = d / m_short - (1.0 - d) / (1.0 - m_short)
-    rr_short_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_short)
-    cf_d_ate = (np.mean(1 / (m_long * (1 - m_long))) - np.mean(1 / (m_short * (1 - m_short)))) / np.mean(
-        1 / (m_long * (1 - m_long)))
-    cf_d_atte = (np.mean(propensity_ratio_long) - np.mean(propensity_ratio_short)) / np.mean(propensity_ratio_long)
-    if (beta_a == 0) | (gamma_a == 0):
-        rho_ate = 0.0
-        rho_atte = 0.0
-    else:
-        rho_ate = np.corrcoef((g_long - g_short), (rr_long_ate - rr_short_ate))[0, 1]
-        rho_atte = np.corrcoef((g_long - g_short), (rr_long_atte - rr_short_atte))[0, 1]
-    oracle_values = {
-        'g_long': g_long,
-        'g_short': g_short,
-        'm_long': m_long,
-        'm_short': m_short,
-        'gamma_a': gamma_a,
-        'beta_a': beta_a,
-        'a': a,
-        'y_0': y_0,
-        'y_1': y_1,
-        'z': z,
-        'cf_y': cf_y,
-        'cf_d_ate': cf_d_ate,
-        'cf_d_atte': cf_d_atte,
-        'rho_ate': rho_ate,
-        'rho_atte': rho_atte,
-    }
-    res_dict = {
-        'x': x,
-        'y': y,
-        'd': d,
-        'oracle_values': oracle_values
-    }
-    return res_dict
-
-
-def make_confounded_plr_data(n_obs=500, theta=5.0, cf_y=0.04, cf_d=0.04, **kwargs):
-    """
-    Generates counfounded data from an partially linear regression model.
-
-    The data generating process is defined as follows (similar to the Monte Carlo simulation used
-    in Sant'Anna and Zhao (2020)). Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`,
-    where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is  :math:`c = 0`, corresponding to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where
-
-    .. math::
-
-        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
-
-        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
-
-        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
-
-        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2.
-
-    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
-    At first, define the treatment as
-
-    .. math::
-
-        D = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A + \\varepsilon_D
-
-    and with :math:`\\varepsilon \\sim \\mathcal{N}(0,1)`.
-    Since :math:`A` is independent of :math:`X`, the long and short form of the treatment regression are given as
-
-    .. math::
-
-        E[D|X,A] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A
-
-        E[D|X] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4.
-
-    Further, generate the outcome of interest :math:`Y` as
-
-    .. math::
-
-        Y &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A + \\varepsilon
-
-        g(Z) &= 210 + 27.4 \\cdot Z_1 +13.7 \\cdot (Z_2 + Z_3 + Z_4)
-
-    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
-    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
-    the conditional expectation take the following forms
-
-    .. math::
-
-        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A
-
-        \\mathbb{E}[Y|D, X] &= (\\theta + \\gamma_A\\beta_A \\frac{\\mathrm{Var}(A)}{\\mathrm{Var}(D)}) \\cdot D + g(Z).
-
-    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`.
-    Both are chosen to obtain the desired confounding of the outcome and Riesz Representer (in sample).
-
-    The observed data is given as :math:`W = (Y, D, X)`.
-    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`, the effect :math:`\\theta`,
-    the coefficients :math:`\\gamma_a`, :math:`\\beta_a`, the long and short forms of the main regression and
-    the propensity score are returned in a dictionary.
-
-    Parameters
-    ----------
-    n_obs : int
-        The number of observations to simulate.
-        Default is ``500``.
-    theta : float or int
-        Average treatment effect.
-        Default is ``5.0``.
-    cf_y : float
-        Percentage of the residual variation of the outcome explained by latent/confounding variable.
-        Default is ``0.04``.
-    cf_d : float
-        Percentage gains in the variation of the Riesz Representer generated by latent/confounding variable.
-        Default is ``0.04``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
-
-    References
-    ----------
-    Sant’Anna, P. H. and Zhao, J. (2020),
-    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
-    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
-    """
-    c = kwargs.get('c', 0.0)
-    dim_x = kwargs.get('dim_x', 4)
-
-    # observed covariates
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
-
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-
-    # error terms
-    var_eps_y = 5
-    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
-    var_eps_d = 1
-    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
-
-    # unobserved confounder
-    a_bounds = (-1, 1)
-    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
-    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
-
-    # get the required impact of the confounder on the propensity score
-    m_short = -z[:, 0] + 0.5 * z[:, 1] - 0.25 * z[:, 2] - 0.1 * z[:, 3]
-
-    def f_m(gamma_a):
-        rr_long = eps_d / var_eps_d
-        rr_short = (gamma_a * a + eps_d) / (gamma_a ** 2 * var_a + var_eps_d)
-        C2_D = (np.mean(np.square(rr_long)) - np.mean(np.square(rr_short))) / np.mean(np.square(rr_short))
-        return np.square(C2_D / (1 + C2_D) - cf_d)
-
-    gamma_a = minimize_scalar(f_m).x
-    m_long = m_short + gamma_a * a
-    d = m_long + eps_d
-
-    # short and long version of g
-    g_partial_reg = 210 + 27.4 * z[:, 0] + 13.7 * (z[:, 1] + z[:, 2] + z[:, 3])
-
-    var_d = np.var(d)
-
-    def f_g(beta_a):
-        g_diff = beta_a * (a - gamma_a * (var_a / var_d) * d)
-        y_diff = eps_y + g_diff
-        return np.square(np.mean(np.square(g_diff)) / np.mean(np.square(y_diff)) - cf_y)
-
-    beta_a = minimize_scalar(f_g).x
-
-    g_long = theta * d + g_partial_reg + beta_a * a
-    g_short = (theta + gamma_a * beta_a * var_a / var_d) * d + g_partial_reg
-
-    y = g_long + eps_y
-
-    oracle_values = {'g_long': g_long,
-                     'g_short': g_short,
-                     'm_long': m_long,
-                     'm_short': m_short,
-                     'theta': theta,
-                     'gamma_a': gamma_a,
-                     'beta_a': beta_a,
-                     'a': a,
-                     'z': z}
-
-    res_dict = {'x': x,
-                'y': y,
-                'd': d,
-                'oracle_values': oracle_values}
-
-    return res_dict
-
-
-def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treatment=False):
-    """
-    Creates a simple synthetic example for heterogeneous treatment effects.
-    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019).
-
-    The data is generated as
-
-    .. math::
-
-        Y_i & = \\theta_0(X_i)D_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
-
-        D_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
-
-    where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
-    \\sim\\mathcal{U}[-1,1]`.
-    If the treatment is set to be binary, the treatment is generated as
-
-    .. math::
-        D_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
-
-    The coefficient vectors :math:`\\gamma_0` and :math:`\\beta_0` both have small random (identical) support
-    which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
-    Further, :math:`\\theta_0(x)` defines the conditional treatment effect, which is defined differently depending
-    on the dimension of :math:`x`.
-
-    If the heterogeneity is univariate the conditional treatment effect takes the following form
-
-    .. math::
-            \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_0),
-
-    whereas for the two-dimensional case the conditional treatment effect is defined as
-
-    .. math::
-        \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_1).
-
-    Parameters
-    ----------
-    n_obs : int
-        Number of observations to simulate.
-        Default is ``200``.
-
-    p : int
-        Dimension of covariates.
-        Default is ``30``.
-
-    support_size : int
-        Number of relevant (confounding) covariates.
-        Default is ``5``.
-
-    n_x : int
-        Dimension of the heterogeneity. Can be either ``1`` or ``2``.
-        Default is ``1``.
-
-    binary_treatment : bool
-        Indicates whether the treatment is binary.
-        Default is ``False``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``data``, ``effects``, ``treatment_effect``.
-
-    """
-    # simple input checks
-    assert n_x in [1, 2], 'n_x must be either 1 or 2.'
-    assert support_size <= p, 'support_size must be smaller than p.'
-    assert isinstance(binary_treatment, bool), 'binary_treatment must be a boolean.'
-
-    # define treatment effects
-    if n_x == 1:
-        def treatment_effect(x):
-            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
-    else:
-        assert n_x == 2
-
-        # redefine treatment effect
-        def treatment_effect(x):
-            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 1])
-
-    # Outcome support and coefficients
-    support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
-    coefs_y = np.random.uniform(0, 1, size=support_size)
-    # treatment support and coefficients
-    support_d = support_y
-    coefs_d = np.random.uniform(0, 0.3, size=support_size)
-
-    # noise
-    epsilon = np.random.uniform(-1, 1, size=n_obs)
-    eta = np.random.uniform(-1, 1, size=n_obs)
-
-    # Generate controls, covariates, treatments and outcomes
-    x = np.random.uniform(0, 1, size=(n_obs, p))
-    # Heterogeneous treatment effects
-    te = treatment_effect(x)
-    if binary_treatment:
-        d = 1.0 * (np.dot(x[:, support_d], coefs_d) >= eta)
-    else:
-        d = np.dot(x[:, support_d], coefs_d) + eta
-    y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
-
-    # Now we build the dataset
-    y_df = pd.DataFrame({'y': y})
-    d_df = pd.DataFrame({'d': d})
-    x_df = pd.DataFrame(
-        data=x,
-        index=np.arange(x.shape[0]),
-        columns=[f'X_{i}' for i in range(x.shape[1])]
-    )
-
-    data = pd.concat([y_df, d_df, x_df], axis=1)
-    res_dict = {
-        'data': data,
-        'effects': te,
-        'treatment_effect': treatment_effect}
-    return res_dict
-
-
-def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleMLData'):
-    """
-    Generates data from a sample selection model (SSM).
-    The data generating process is defined as
-
-    .. math::
-
-        y_i &= \\theta d_i + x_i' \\beta d_i + u_i,
-
-        s_i &= 1\\left\\lbrace d_i + \\gamma z_i + x_i' \\beta + v_i > 0 \\right\\rbrace,
-
-        d_i &= 1\\left\\lbrace x_i' \\beta + w_i > 0 \\right\\rbrace,
-
-    with Y being observed if :math:`s_i = 1` and covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma^2_x)`, where
-    :math:`\\Sigma^2_x` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
-    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{0.4}{j^2}`
-    :math:`z_i \\sim \\mathcal{N}(0, 1)`,
-    :math:`(u_i,v_i) \\sim \\mathcal{N}(0, \\Sigma^2_{u,v})`,
-    :math:`w_i \\sim \\mathcal{N}(0, 1)`.
-
-
-    The data generating process is inspired by a process used in the simulation study (see Appendix E) of Bia,
-    Huber and Lafférs (2023).
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    mar:
-        Boolean. Indicates whether missingness at random holds.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z, s)``.
-
-    References
-    ----------
-    Michela Bia, Martin Huber & Lukáš Lafférs (2023) Double Machine Learning for Sample Selection Models,
-    Journal of Business & Economic Statistics, DOI: 10.1080/07350015.2023.2271071
-    """
-    if mar:
-        sigma = np.array([[1, 0], [0, 1]])
-        gamma = 0
-    else:
-        sigma = np.array([[1, 0.8], [0.8, 1]])
-        gamma = 1
-
-    e = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=n_obs).T
-
-    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
-
-    beta = [0.4 / (k ** 2) for k in range(1, dim_x + 1)]
-
-    d = np.where(np.dot(x, beta) + np.random.randn(n_obs) > 0, 1, 0)
-    z = np.random.randn(n_obs)
-    s = np.where(np.dot(x, beta) + d + gamma * z + e[0] > 0, 1, 0)
-
-    y = np.dot(x, beta) + theta * d + e[1]
-    y[s == 0] = 0
-
-    if return_type in _array_alias:
-        return x, y, d, z, s
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        if mar:
-            data = pd.DataFrame(np.column_stack((x, y, d, s)),
-                                columns=x_cols + ['y', 'd', 's'])
-        else:
-            data = pd.DataFrame(np.column_stack((x, y, d, z, s)),
-                                columns=x_cols + ['y', 'd', 'z', 's'])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            if mar:
-                return DoubleMLData(data, 'y', 'd', x_cols, None, None, 's')
-            return DoubleMLData(data, 'y', 'd', x_cols, 'z', None, 's')
-    else:
-        raise ValueError('Invalid return_type.')
-
-
-def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, random_state=None, **kwargs):
-    """
-    Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an
-    underlying continous treatment).
-
-    The data generating process is defined as follows (similar to the Monte Carlo simulation used
-    in Sant'Anna and Zhao (2020)).
-
-    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
-    to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where
-
-    .. math::
-
-            \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
-
-            \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
-
-            \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
-
-            \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
-
-            \\tilde{Z}_5 &= X_5.
-
-    A continuous treatment :math:`D_{\\text{cont}}` is generated as
-
-    .. math::
-
-        D_{\\text{cont}} = \\xi (-Z_1 + 0.5 Z_2 - 0.25 Z_3 - 0.1 Z_4) + \\varepsilon_D,
-
-    where :math:`\\varepsilon_D \\sim \\mathcal{N}(0,1)` and :math:`\\xi=0.3`. The corresponding treatment
-    effect is defined as
-
-    .. math::
-
-        \\theta (d) = 0.1 \\exp(d) + 10 \\sin(0.7 d) + 2 d - 0.2 d^2.
-
-    Based on the continous treatment, a discrete treatment :math:`D` is generated as with a baseline level of
-    :math:`D=0` and additional levels based on the quantiles of :math:`D_{\\text{cont}}`. The number of levels
-    is defined by :math:`n_{\\text{levels}}`. Each level is chosen to have the same probability of being selected.
-
-    The potential outcomes are defined as
-
-    .. math::
-
-            Y(0) &= 210 + 27.4 Z_1 + 13.7 (Z_2 + Z_3 + Z_4) + \\varepsilon_Y
-
-            Y(1) &= \\theta (D_{\\text{cont}}) 1\\{D_{\\text{cont}} > 0\\} + Y(0),
-
-    where :math:`\\varepsilon_Y \\sim \\mathcal{N}(0,5)`. Further, the observed outcome is defined as
-
-    .. math::
-
-        Y = Y(1) 1\\{D > 0\\} + Y(0) 1\\{D = 0\\}.
-
-    The data is returned as a dictionary with the entries ``x``, ``y``, ``d`` and ``oracle_values``.
-
-    Parameters
-    ----------
-    n_obs : int
-        The number of observations to simulate.
-        Default is ``200``.
-
-    n_levels : int
-        The number of treatment levels.
-        Default is ``3``.
-
-    linear : bool
-        Indicates whether the true underlying regression is linear.
-        Default is ``False``.
-
-    random_state : int
-        Random seed for reproducibility.
-        Default is ``42``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
-       The oracle values contain the continuous treatment, the level bounds, the potential level, ITE
-       and the potential outcome without treatment.
-
-    """
-    if random_state is not None:
-        np.random.seed(random_state)
-    xi = kwargs.get('xi', 0.3)
-    c = kwargs.get('c', 0.0)
-    dim_x = kwargs.get('dim_x', 5)
-
-    if not isinstance(n_levels, int):
-        raise ValueError('n_levels must be an integer.')
-    if n_levels < 2:
-        raise ValueError('n_levels must be at least 2.')
-
-    # observed covariates
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
-
-    def f_reg(w):
-        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
-        return res
-
-    def f_treatment(w, xi):
-        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
-        return res
-
-    def treatment_effect(d, scale=15):
-        return scale * (1 / (1 + np.exp(-d - 1.2 * np.cos(d)))) - 2
-
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-
-    # error terms
-    var_eps_y = 5
-    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
-    var_eps_d = 1
-    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
-
-    if linear:
-        g = f_reg(x)
-        m = f_treatment(x, xi)
-    else:
-        assert not linear
-        g = f_reg(z)
-        m = f_treatment(z, xi)
-
-    cont_d = m + eps_d
-    level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1))
-    potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1
-    eta = np.random.uniform(0, 1, size=n_obs)
-    d = 1.0 * (eta >= 1 / n_levels) * potential_level
-
-    ite = treatment_effect(cont_d)
-    y0 = g + eps_y
-    # only treated for d > 0 compared to the baseline
-    y = ite * (d > 0) + y0
-
-    oracle_values = {
-        'cont_d': cont_d,
-        'level_bounds': level_bounds,
-        'potential_level': potential_level,
-        'ite': ite,
-        'y0': y0,
-    }
-
-    resul_dict = {
-        'x': x,
-        'y': y,
-        'd': d,
-        'oracle_values': oracle_values
-    }
-
-    return resul_dict
-
-
-def make_logistic_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', balanced_r0=True, treatment="continuous", **kwargs):
-    """
-    Generates synthetic data for a logistic partially linear regression model, as in Liu et al. (2021),
-    designed for use in double/debiased machine learning applications.
-
-    The data generating process is defined as follows:
-
-    - Covariates \( x_i \sim \mathcal{N}(0, \Sigma) \), where \( \Sigma_{kj} = 0.7^{|j-k|} \).
-    - Treatment \( d_i = a_0(x_i) \).
-    - Propensity score \( p_i = \sigma(\alpha d_i + r_0(x_i)) \), where \( \sigma(\cdot) \) is the logistic function.
-    - Outcome \( y_i \sim \text{Bernoulli}(p_i) \).
-
-    The nuisance functions are defined as:
-
-    .. math::
-
-        a_0(x_i) &= \frac{2}{1 + \exp(x_{i,1})} - \frac{2}{1 + \exp(x_{i,2})} + \sin(x_{i,3}) + \cos(x_{i,4}) \\
-        &+ 0.5 \cdot \mathbb{1}(x_{i,5} > 0) - 0.5 \cdot \mathbb{1}(x_{i,6} > 0) + 0.2 x_{i,7} x_{i,8} - 0.2 x_{i,9} x_{i,10} \\
-
-        r_0(x_i) &= 0.1 x_{i,1} x_{i,2} x_{i,3} + 0.1 x_{i,4} x_{i,5} + 0.1 x_{i,6}^3 - 0.5 \sin^2(x_{i,7}) \\
-        &+ 0.5 \cos(x_{i,8}) + \frac{1}{1 + x_{i,9}^2} - \frac{1}{1 + \exp(x_{i,10})} \\
-        &+ 0.25 \cdot \mathbb{1}(x_{i,11} > 0) - 0.25 \cdot \mathbb{1}(x_{i,13} > 0)
-
-    Parameters
-    ----------
-    n_obs : int
-        Number of observations to simulate.
-    dim_x : int
-        Number of covariates.
-    alpha : float
-        Value of the causal parameter.
-    return_type : str
-        Determines the return format. One of:
-
-        - 'DoubleMLData' or DoubleMLData: returns a ``DoubleMLData`` object.
-        - 'DataFrame', 'pd.DataFrame' or pd.DataFrame: returns a ``pandas.DataFrame``.
-        - 'array', 'np.ndarray', 'np.array' or np.ndarray: returns tuple of numpy arrays (x, y, d, p).
-
-    **kwargs
-        Optional keyword arguments (currently unused in this implementation).
-
-    Returns
-    -------
-    Union[DoubleMLData, pd.DataFrame, Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]
-        The generated data in the specified format.
-
-    References
-    ----------
-    Liu, Molei, Yi Zhang, and Doudou Zhou. 2021.
-    "Double/Debiased Machine Learning for Logistic Partially Linear Model."
-    The Econometrics Journal 24 (3): 559–88. https://doi.org/10.1093/ectj/utab019.
-
-    """
-
-    if balanced_r0:
-        def r_0(X):
-            return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
-                0.1 * X[:, 3] * X[:, 4] + \
-                0.1 * X[:, 5] ** 3 + \
-                -0.5 * np.sin(X[:, 6]) ** 2 + \
-                0.5 * np.cos(X[:, 7]) + \
-                1 / (1 + X[:, 8] ** 2) + \
-                -1 / (1 + np.exp(X[:, 9])) + \
-                0.25 * np.where(X[:, 10] > 0, 1, 0) + \
-                -0.25 * np.where(X[:, 12] > 0, 1, 0)
-    else:
-        def r_0(X):
-            return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
-                0.1 * X[:, 3] * X[:, 4] + \
-                0.1 * X[:, 5] ** 3 + \
-                -0.5 * np.sin(X[:, 6]) ** 2 + \
-                0.5 * np.cos(X[:, 7]) + \
-                4 / (1 + X[:, 8] ** 2) + \
-                -1 / (1 + np.exp(X[:, 9])) + \
-                1.5 * np.where(X[:, 10] > 0, 1, 0) + \
-                -0.25 * np.where(X[:, 12] > 0, 1, 0)
-
-    def a_0(X):
-        return 2 / (1 + np.exp(X[:, 0])) + \
-            -2 / (1 + np.exp(X[:, 1])) + \
-            1 * np.sin(X[:, 2]) + \
-            1 * np.cos(X[:, 3]) + \
-            0.5 * np.where(X[:, 4] > 0, 1, 0) + \
-            -0.5 * np.where(X[:, 5] > 0, 1, 0) + \
-            0.2 * X[:, 6] * X[:, 7] + \
-            -0.2 * X[:, 8] * X[:, 9]
-
-
-    sigma = np.full((dim_x, dim_x), 0.2)
-    np.fill_diagonal(sigma, 1)
-
-    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=n_obs)
-    np.clip(x, -2, 2, out=x)
-
-    if treatment == "continuous":
-        d = a_0(x)
-    elif treatment == "binary":
-        d_cont = a_0(x)
-        d = np.random.binomial(1, expit(d_cont - d_cont.mean()))
-    elif treatment == "binary_unbalanced":
-        d_cont = a_0(x)
-        d = np.random.binomial(1, expit(d_cont))
-
-    p = expit(alpha * d[:] + r_0(x))
-
-    y = np.random.binomial(1, p)
-
-    if return_type in _array_alias:
-        return x, y, d, p
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, p)),
-                            columns=x_cols + ['y', 'd', 'p'])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, 'y', 'd', x_cols, p_cols='p')
-    else:
-        raise ValueError('Invalid return_type.')
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 1cc6bcf9b..05481bf16 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -585,12 +585,6 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
         # construct framework for inference
         self._framework = self.construct_framework()
 
-
-
-
-
-
-
         return self
 
     def construct_framework(self):
diff --git a/doubleml/double_ml_data.py b/doubleml/double_ml_data.py
deleted file mode 100644
index 612e6b7f0..000000000
--- a/doubleml/double_ml_data.py
+++ /dev/null
@@ -1,1104 +0,0 @@
-import numpy as np
-import pandas as pd
-import io
-
-from abc import ABC, abstractmethod
-
-from sklearn.utils.validation import check_array, column_or_1d,  check_consistent_length
-from sklearn.utils import assert_all_finite
-from sklearn.utils.multiclass import type_of_target
-from .utils._estimation import _assure_2d_array
-from .utils._checks import _check_set
-
-
-class DoubleMLBaseData(ABC):
-    """Base Class Double machine learning data-backends
-    """
-    def __init__(self,
-                 data):
-        if not isinstance(data, pd.DataFrame):
-            raise TypeError('data must be of pd.DataFrame type. '
-                            f'{str(data)} of type {str(type(data))} was passed.')
-        if not data.columns.is_unique:
-            raise ValueError('Invalid pd.DataFrame: '
-                             'Contains duplicate column names.')
-        self._data = data
-
-    def __str__(self):
-        data_summary = self._data_summary_str()
-        buf = io.StringIO()
-        self.data.info(verbose=False, buf=buf)
-        df_info = buf.getvalue()
-        res = '================== DoubleMLBaseData Object ==================\n' + \
-              '\n------------------ Data summary      ------------------\n' + data_summary + \
-              '\n------------------ DataFrame info    ------------------\n' + df_info
-        return res
-
-    def _data_summary_str(self):
-        data_summary = f'No. Observations: {self.n_obs}\n'
-        return data_summary
-
-    @property
-    def data(self):
-        """
-        The data.
-        """
-        return self._data
-
-    @property
-    def all_variables(self):
-        """
-        All variables available in the dataset.
-        """
-        return self.data.columns
-
-    @property
-    def n_obs(self):
-        """
-        The number of observations.
-        """
-        return self.data.shape[0]
-
-    # TODO: This and the following property does not make sense but the base class DoubleML needs it (especially for the
-    #  multiple treatment variables case) and other things are also build around it, see for example DoubleML._params
-    @property
-    def d_cols(self):
-        return ['theta']
-
-    @property
-    def n_treat(self):
-        """
-        The number of treatment variables.
-        """
-        return 1
-
-    @property
-    @abstractmethod
-    def n_coefs(self):
-        pass
-
-
-class DoubleMLData(DoubleMLBaseData):
-    """Double machine learning data-backend.
-
-    :class:`DoubleMLData` objects can be initialized from
-    :class:`pandas.DataFrame`'s as well as :class:`numpy.ndarray`'s.
-
-    Parameters
-    ----------
-    data : :class:`pandas.DataFrame`
-        The data.
-
-    y_col : str
-        The outcome variable.
-
-    d_cols : str or list
-        The treatment variable(s).
-
-    x_cols : None, str or list
-        The covariates.
-        If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
-        treatment variables ``d_cols``, nor instrumental variables ``z_cols`` are used as covariates.
-        Default is ``None``.
-
-    z_cols : None, str or list
-        The instrumental variable(s).
-        Default is ``None``.
-
-    t_col : None or str
-        The time variable (only relevant/used for DiD Estimators).
-        Default is ``None``.
-
-    s_col : None or str
-        The score or selection variable (only relevant/used for RDD or SSM Estimatiors).
-        Default is ``None``.
-
-    p_cols : None, str or list, optional
-        The column(s) containing the probabilities of the outcome (only for simulated, binary data).
-        Default is ``None``.
-
-    use_other_treat_as_covariate : bool
-        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
-        Default is ``True``.
-
-    force_all_x_finite : bool or str
-        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
-        Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
-        allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
-        Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
-        for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
-        in the covariates ``x``.
-        Default is ``True``.
-
-    Examples
-    --------
-    >>> from doubleml import DoubleMLData
-    >>> from doubleml.datasets import make_plr_CCDDHNR2018
-    >>> # initialization from pandas.DataFrame
-    >>> df = make_plr_CCDDHNR2018(return_type='DataFrame')
-    >>> obj_dml_data_from_df = DoubleMLData(df, 'y', 'd')
-    >>> # initialization from np.ndarray
-    >>> (x, y, d) = make_plr_CCDDHNR2018(return_type='array')
-    >>> obj_dml_data_from_array = DoubleMLData.from_arrays(x, y, d)
-    """
-    def __init__(self,
-                 data,
-                 y_col,
-                 d_cols,
-                 x_cols=None,
-                 z_cols=None,
-                 t_col=None,
-                 s_col=None,
-                 p_cols=None,
-                 use_other_treat_as_covariate=True,
-                 force_all_x_finite=True):
-        DoubleMLBaseData.__init__(self, data)
-
-        self.y_col = y_col
-        self.d_cols = d_cols
-        self.z_cols = z_cols
-        self.t_col = t_col
-        self.s_col = s_col
-        self.x_cols = x_cols
-        self.p_cols = p_cols
-        self._check_disjoint_sets_y_d_x_z_t_s()
-        self.use_other_treat_as_covariate = use_other_treat_as_covariate
-        self.force_all_x_finite = force_all_x_finite
-        self._binary_treats = self._check_binary_treats()
-        self._binary_outcome = self._check_binary_outcome()
-        self._set_y_z_t_s()
-        # by default, we initialize to the first treatment variable
-        self.set_x_d(self.d_cols[0])
-
-    def __str__(self):
-        data_summary = self._data_summary_str()
-        buf = io.StringIO()
-        self.data.info(verbose=False, buf=buf)
-        df_info = buf.getvalue()
-        res = '================== DoubleMLData Object ==================\n' + \
-              '\n------------------ Data summary      ------------------\n' + data_summary + \
-              '\n------------------ DataFrame info    ------------------\n' + df_info
-        return res
-
-    def _data_summary_str(self):
-        data_summary = f'Outcome variable: {self.y_col}\n' \
-                       f'Treatment variable(s): {self.d_cols}\n' \
-                       f'Covariates: {self.x_cols}\n' \
-                       f'Instrument variable(s): {self.z_cols}\n'
-        if self.t_col is not None:
-            data_summary += f'Time variable: {self.t_col}\n'
-        if self.s_col is not None:
-            data_summary += f'Score/Selection variable: {self.s_col}\n'
-        data_summary += f'No. Observations: {self.n_obs}\n'
-        return data_summary
-
-    @classmethod
-    def from_arrays(cls, x, y, d, z=None, t=None, s=None, p=None, use_other_treat_as_covariate=True,
-                    force_all_x_finite=True):
-        """
-        Initialize :class:`DoubleMLData` from :class:`numpy.ndarray`'s.
-
-        Parameters
-        ----------
-        x : :class:`numpy.ndarray`
-            Array of covariates.
-
-        y : :class:`numpy.ndarray`
-            Array of the outcome variable.
-
-        d : :class:`numpy.ndarray`
-            Array of treatment variables.
-
-        z : None or :class:`numpy.ndarray`
-            Array of instrumental variables.
-            Default is ``None``.
-
-        t : :class:`numpy.ndarray`
-            Array of the time variable (only relevant/used for DiD models).
-            Default is ``None``.
-
-        s : :class:`numpy.ndarray`
-            Array of the score or selection variable (only relevant/used for RDD and SSM models).
-            Default is ``None``.
-
-        p : None or :class:`numpy.ndarray`
-            Array of the probabilities of the outcome (only for simulated, binary data).
-            Default is ``None``.
-
-        use_other_treat_as_covariate : bool
-            Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
-            Default is ``True``.
-
-        force_all_x_finite : bool or str
-            Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
-            Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
-            allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
-            Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
-            for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
-            in the covariates ``x``.
-            Default is ``True``.
-
-        Examples
-        --------
-        >>> from doubleml import DoubleMLData
-        >>> from doubleml.datasets import make_plr_CCDDHNR2018
-        >>> (x, y, d) = make_plr_CCDDHNR2018(return_type='array')
-        >>> obj_dml_data_from_array = DoubleMLData.from_arrays(x, y, d)
-        """
-        if isinstance(force_all_x_finite, str):
-            if force_all_x_finite != 'allow-nan':
-                raise ValueError("Invalid force_all_x_finite " + force_all_x_finite + ". " +
-                                 "force_all_x_finite must be True, False or 'allow-nan'.")
-        elif not isinstance(force_all_x_finite, bool):
-            raise TypeError("Invalid force_all_x_finite. " +
-                            "force_all_x_finite must be True, False or 'allow-nan'.")
-
-        x = check_array(x, ensure_2d=False, allow_nd=False,
-                        force_all_finite=force_all_x_finite)
-        d = check_array(d, ensure_2d=False, allow_nd=False)
-        y = column_or_1d(y, warn=True)
-
-        x = _assure_2d_array(x)
-        d = _assure_2d_array(d)
-
-        y_col = 'y'
-        if z is None:
-            check_consistent_length(x, y, d)
-            z_cols = None
-        else:
-            z = check_array(z, ensure_2d=False, allow_nd=False)
-            z = _assure_2d_array(z)
-            check_consistent_length(x, y, d, z)
-            if z.shape[1] == 1:
-                z_cols = ['z']
-            else:
-                z_cols = [f'z{i + 1}' for i in np.arange(z.shape[1])]
-
-        if t is None:
-            t_col = None
-        else:
-            t = column_or_1d(t, warn=True)
-            check_consistent_length(x, y, d, t)
-            t_col = 't'
-
-        if s is None:
-            s_col = None
-        else:
-            s = column_or_1d(s, warn=True)
-            check_consistent_length(x, y, d, s)
-            s_col = 's'
-
-
-        if p is None:
-            p_cols = None
-        else:
-            if p.shape[1] == 1:
-                p_cols = ['p']
-            else:
-                p_cols = [f'p{i + 1}' for i in np.arange(p.shape[1])]
-
-        if d.shape[1] == 1:
-            d_cols = ['d']
-        else:
-            d_cols = [f'd{i+1}' for i in np.arange(d.shape[1])]
-
-        x_cols = [f'X{i+1}' for i in np.arange(x.shape[1])]
-
-        # basline version with features, outcome and treatments
-        data = pd.DataFrame(np.column_stack((x, y, d)),
-                            columns=x_cols + [y_col] + d_cols)
-
-        if z is not None:
-            df_z = pd.DataFrame(z, columns=z_cols)
-            data = pd.concat([data, df_z], axis=1)
-
-        if t is not None:
-            data[t_col] = t
-
-        if s is not None:
-            data[s_col] = s
-
-        if p is not None:
-            data[p_cols] = p
-
-        return cls(data, y_col, d_cols, x_cols, z_cols, t_col, s_col, p_cols, use_other_treat_as_covariate, force_all_x_finite)
-
-    @property
-    def x(self):
-        """
-        Array of covariates;
-        Dynamic! May depend on the currently set treatment variable;
-        To get an array of all covariates (independent of the currently set treatment variable)
-        call ``obj.data[obj.x_cols].values``.
-        """
-        return self._X.values
-
-    @property
-    def y(self):
-        """
-        Array of outcome variable.
-        """
-        return self._y.values
-
-    @property
-    def d(self):
-        """
-        Array of treatment variable;
-        Dynamic! Depends on the currently set treatment variable;
-        To get an array of all treatment variables (independent of the currently set treatment variable)
-        call ``obj.data[obj.d_cols].values``.
-        """
-        return self._d.values
-
-    @property
-    def z(self):
-        """
-        Array of instrumental variables.
-        """
-        if self.z_cols is not None:
-            return self._z.values
-        else:
-            return None
-
-    @property
-    def t(self):
-        """
-        Array of time variable.
-        """
-        if self.t_col is not None:
-            return self._t.values
-        else:
-            return None
-
-    @property
-    def s(self):
-        """
-        Array of score or selection variable.
-        """
-        if self.s_col is not None:
-            return self._s.values
-        else:
-            return None
-
-    @property
-    def p_cols(self):
-        """
-        The column(s) containing the probabilities of the outcome (only for simulated data).
-        """
-        return self._p_cols
-
-    @p_cols.setter
-    def p_cols(self, value):
-        if value is not None:
-            if isinstance(value, str):
-                value = [value]
-            if not isinstance(value, list):
-                raise TypeError('The probability column(s) p_cols must be of str or list type (or None). '
-                                f'{str(value)} of type {str(type(value))} was passed.')
-            if not len(set(value)) == len(value):
-                raise ValueError('Invalid probability column(s) p_cols: '
-                                 'Contains duplicate values.')
-            if not set(value).issubset(set(self.all_variables)):
-                raise ValueError('Invalid probability column(s) p_cols. '
-                                 'At least one probability column is not a data column.')
-            self._p_cols = value
-        else:
-            self._p_cols = None
-
-    @property
-    def p(self):
-        """
-        Array of probabilities of the outcome (only for simulated data).
-        """
-        if self.p_cols is not None:
-            return self._p.values
-        else:
-            return None
-
-    @property
-    def n_treat(self):
-        """
-        The number of treatment variables.
-        """
-        return len(self.d_cols)
-
-    @property
-    def n_coefs(self):
-        """
-        The number of coefficients to be estimated.
-        """
-        return self.n_treat
-
-    @property
-    def n_instr(self):
-        """
-        The number of instruments.
-        """
-        if self.z_cols is not None:
-            n_instr = len(self.z_cols)
-        else:
-            n_instr = 0
-        return n_instr
-
-    @property
-    def binary_treats(self):
-        """
-        Series with logical(s) indicating whether the treatment variable(s) are binary with values 0 and 1.
-        """
-        return self._binary_treats
-
-    @property
-    def binary_outcome(self):
-        """
-        Logical indicating whether the outcome variable is binary with values 0 and 1.
-        """
-        return self._binary_outcome
-
-    @property
-    def x_cols(self):
-        """
-        The covariates.
-        """
-        return self._x_cols
-
-    @x_cols.setter
-    def x_cols(self, value):
-        reset_value = hasattr(self, '_x_cols')
-        if value is not None:
-            if isinstance(value, str):
-                value = [value]
-            if not isinstance(value, list):
-                raise TypeError('The covariates x_cols must be of str or list type (or None). '
-                                f'{str(value)} of type {str(type(value))} was passed.')
-            if not len(set(value)) == len(value):
-                raise ValueError('Invalid covariates x_cols: '
-                                 'Contains duplicate values.')
-            if not set(value).issubset(set(self.all_variables)):
-                raise ValueError('Invalid covariates x_cols. '
-                                 'At least one covariate is no data column.')
-            assert set(value).issubset(set(self.all_variables))
-            self._x_cols = value
-        else:
-            excluded_cols = set.union({self.y_col}, set(self.d_cols))
-            if (self.z_cols is not None):
-                excluded_cols = set.union(excluded_cols, set(self.z_cols))
-            for col in [self.t_col, self.s_col]:
-                col = _check_set(col)
-                excluded_cols = set.union(excluded_cols, col)
-            self._x_cols = [col for col in self.data.columns if col not in excluded_cols]
-        if reset_value:
-            self._check_disjoint_sets()
-            # by default, we initialize to the first treatment variable
-            self.set_x_d(self.d_cols[0])
-
-    @property
-    def d_cols(self):
-        """
-        The treatment variable(s).
-        """
-        return self._d_cols
-
-    @d_cols.setter
-    def d_cols(self, value):
-        reset_value = hasattr(self, '_d_cols')
-        if isinstance(value, str):
-            value = [value]
-        if not isinstance(value, list):
-            raise TypeError('The treatment variable(s) d_cols must be of str or list type. '
-                            f'{str(value)} of type {str(type(value))} was passed.')
-        if not len(set(value)) == len(value):
-            raise ValueError('Invalid treatment variable(s) d_cols: '
-                             'Contains duplicate values.')
-        if not set(value).issubset(set(self.all_variables)):
-            raise ValueError('Invalid treatment variable(s) d_cols. '
-                             'At least one treatment variable is no data column.')
-        self._d_cols = value
-        if reset_value:
-            self._check_disjoint_sets()
-            # by default, we initialize to the first treatment variable
-            self.set_x_d(self.d_cols[0])
-
-    @property
-    def y_col(self):
-        """
-        The outcome variable.
-        """
-        return self._y_col
-
-    @y_col.setter
-    def y_col(self, value):
-        reset_value = hasattr(self, '_y_col')
-        if not isinstance(value, str):
-            raise TypeError('The outcome variable y_col must be of str type. '
-                            f'{str(value)} of type {str(type(value))} was passed.')
-        if value not in self.all_variables:
-            raise ValueError('Invalid outcome variable y_col. '
-                             f'{value} is no data column.')
-        self._y_col = value
-        if reset_value:
-            self._check_disjoint_sets()
-            self._set_y_z_t_s()
-
-    @property
-    def z_cols(self):
-        """
-        The instrumental variable(s).
-        """
-        return self._z_cols
-
-    @z_cols.setter
-    def z_cols(self, value):
-        reset_value = hasattr(self, '_z_cols')
-        if value is not None:
-            if isinstance(value, str):
-                value = [value]
-            if not isinstance(value, list):
-                raise TypeError('The instrumental variable(s) z_cols must be of str or list type (or None). '
-                                f'{str(value)} of type {str(type(value))} was passed.')
-            if not len(set(value)) == len(value):
-                raise ValueError('Invalid instrumental variable(s) z_cols: '
-                                 'Contains duplicate values.')
-            if not set(value).issubset(set(self.all_variables)):
-                raise ValueError('Invalid instrumental variable(s) z_cols. '
-                                 'At least one instrumental variable is no data column.')
-            self._z_cols = value
-        else:
-            self._z_cols = None
-        if reset_value:
-            self._check_disjoint_sets()
-            self._set_y_z_t_s()
-
-    @property
-    def t_col(self):
-        """
-        The time variable.
-        """
-        return self._t_col
-
-    @t_col.setter
-    def t_col(self, value):
-        reset_value = hasattr(self, '_t_col')
-        if value is not None:
-            if not isinstance(value, str):
-                raise TypeError('The time variable t_col must be of str type (or None). '
-                                f'{str(value)} of type {str(type(value))} was passed.')
-            if value not in self.all_variables:
-                raise ValueError('Invalid time variable t_col. '
-                                 f'{value} is no data column.')
-            self._t_col = value
-        else:
-            self._t_col = None
-        if reset_value:
-            self._check_disjoint_sets()
-            self._set_y_z_t_s()
-
-    @property
-    def s_col(self):
-        """
-        The score or selection variable.
-        """
-        return self._s_col
-
-    @s_col.setter
-    def s_col(self, value):
-        reset_value = hasattr(self, '_s_col')
-        if value is not None:
-            if not isinstance(value, str):
-                raise TypeError('The score or selection variable s_col must be of str type (or None). '
-                                f'{str(value)} of type {str(type(value))} was passed.')
-            if value not in self.all_variables:
-                raise ValueError('Invalid score or selection variable s_col. '
-                                 f'{value} is no data column.')
-            self._s_col = value
-        else:
-            self._s_col = None
-        if reset_value:
-            self._check_disjoint_sets()
-            self._set_y_z_t_s()
-
-    @property
-    def use_other_treat_as_covariate(self):
-        """
-        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
-        """
-        return self._use_other_treat_as_covariate
-
-    @use_other_treat_as_covariate.setter
-    def use_other_treat_as_covariate(self, value):
-        reset_value = hasattr(self, '_use_other_treat_as_covariate')
-        if not isinstance(value, bool):
-            raise TypeError('use_other_treat_as_covariate must be True or False. '
-                            f'Got {str(value)}.')
-        self._use_other_treat_as_covariate = value
-        if reset_value:
-            # by default, we initialize to the first treatment variable
-            self.set_x_d(self.d_cols[0])
-
-    @property
-    def force_all_x_finite(self):
-        """
-        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
-        """
-        return self._force_all_x_finite
-
-    @force_all_x_finite.setter
-    def force_all_x_finite(self, value):
-        reset_value = hasattr(self, '_force_all_x_finite')
-        if isinstance(value, str):
-            if value != 'allow-nan':
-                raise ValueError("Invalid force_all_x_finite " + value + ". " +
-                                 "force_all_x_finite must be True, False or 'allow-nan'.")
-        elif not isinstance(value, bool):
-            raise TypeError("Invalid force_all_x_finite. " +
-                            "force_all_x_finite must be True, False or 'allow-nan'.")
-        self._force_all_x_finite = value
-        if reset_value:
-            # by default, we initialize to the first treatment variable
-            self.set_x_d(self.d_cols[0])
-
-    def _set_y_z_t_s(self):
-        assert_all_finite(self.data.loc[:, self.y_col])
-        self._y = self.data.loc[:, self.y_col]
-        if self.z_cols is None:
-            self._z = None
-        else:
-            assert_all_finite(self.data.loc[:, self.z_cols])
-            self._z = self.data.loc[:, self.z_cols]
-
-        if self.t_col is None:
-            self._t = None
-        else:
-            assert_all_finite(self.data.loc[:, self.t_col])
-            self._t = self.data.loc[:, self.t_col]
-
-        if self.s_col is None:
-            self._s = None
-        else:
-            assert_all_finite(self.data.loc[:, self.s_col])
-            self._s = self.data.loc[:, self.s_col]
-
-    def set_x_d(self, treatment_var):
-        """
-        Function that assigns the role for the treatment variables in the multiple-treatment case.
-
-        Parameters
-        ----------
-        treatment_var : str
-            Active treatment variable that will be set to d.
-        """
-        if not isinstance(treatment_var, str):
-            raise TypeError('treatment_var must be of str type. '
-                            f'{str(treatment_var)} of type {str(type(treatment_var))} was passed.')
-        if treatment_var not in self.d_cols:
-            raise ValueError('Invalid treatment_var. '
-                             f'{treatment_var} is not in d_cols.')
-        if self.use_other_treat_as_covariate:
-            # note that the following line needs to be adapted in case an intersection of x_cols and d_cols as allowed
-            # (see https://github.com/DoubleML/doubleml-for-py/issues/83)
-            xd_list = self.x_cols + self.d_cols
-            xd_list.remove(treatment_var)
-        else:
-            xd_list = self.x_cols
-        assert_all_finite(self.data.loc[:, treatment_var])
-        if self.force_all_x_finite:
-            assert_all_finite(self.data.loc[:, xd_list],
-                              allow_nan=self.force_all_x_finite == 'allow-nan')
-        self._d = self.data.loc[:, treatment_var]
-        self._X = self.data.loc[:, xd_list]
-
-    def _check_binary_treats(self):
-        is_binary = pd.Series(dtype=bool, index=self.d_cols)
-        for treatment_var in self.d_cols:
-            this_d = self.data.loc[:, treatment_var]
-            binary_treat = (type_of_target(this_d) == 'binary')
-            zero_one_treat = np.all((np.power(this_d, 2) - this_d) == 0)
-            is_binary[treatment_var] = (binary_treat & zero_one_treat)
-        return is_binary
-
-    def _check_binary_outcome(self):
-        y = self.data.loc[:, self.y_col]
-        binary_outcome = (type_of_target(y) == 'binary')
-        zero_one_outcome = np.all((np.power(y, 2) - y) == 0)
-        is_binary = (binary_outcome & zero_one_outcome)
-        return is_binary
-
-    def _check_disjoint_sets(self):
-        # this function can be extended in inherited subclasses
-        self._check_disjoint_sets_y_d_x_z_t_s()
-
-    def _check_disjoint_sets_y_d_x_z_t_s(self):
-        y_col_set = {self.y_col}
-        x_cols_set = set(self.x_cols)
-        d_cols_set = set(self.d_cols)
-
-        if not y_col_set.isdisjoint(x_cols_set):
-            raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and covariate in '
-                             '``x_cols``.')
-        if not y_col_set.isdisjoint(d_cols_set):
-            raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and treatment variable in '
-                             '``d_cols``.')
-        # note that the line xd_list = self.x_cols + self.d_cols in method set_x_d needs adaption if an intersection of
-        # x_cols and d_cols as allowed (see https://github.com/DoubleML/doubleml-for-py/issues/83)
-        if not d_cols_set.isdisjoint(x_cols_set):
-            raise ValueError('At least one variable/column is set as treatment variable (``d_cols``) and as covariate'
-                             '(``x_cols``). Consider using parameter ``use_other_treat_as_covariate``.')
-
-        if self.z_cols is not None:
-            z_cols_set = set(self.z_cols)
-            if not y_col_set.isdisjoint(z_cols_set):
-                raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and instrumental '
-                                 'variable in ``z_cols``.')
-            if not d_cols_set.isdisjoint(z_cols_set):
-                raise ValueError('At least one variable/column is set as treatment variable (``d_cols``) and '
-                                 'instrumental variable in ``z_cols``.')
-            if not x_cols_set.isdisjoint(z_cols_set):
-                raise ValueError('At least one variable/column is set as covariate (``x_cols``) and instrumental '
-                                 'variable in ``z_cols``.')
-
-        self._check_disjoint_sets_t_s()
-
-    def _check_disjoint_sets_t_s(self):
-        y_col_set = {self.y_col}
-        x_cols_set = set(self.x_cols)
-        d_cols_set = set(self.d_cols)
-
-        if self.t_col is not None:
-            t_col_set = {self.t_col}
-            if not t_col_set.isdisjoint(x_cols_set):
-                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and covariate in '
-                                 '``x_cols``.')
-            if not t_col_set.isdisjoint(d_cols_set):
-                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and treatment variable in '
-                                 '``d_cols``.')
-            if not t_col_set.isdisjoint(y_col_set):
-                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and outcome variable '
-                                 '``y_col``.')
-            if self.z_cols is not None:
-                z_cols_set = set(self.z_cols)
-                if not t_col_set.isdisjoint(z_cols_set):
-                    raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and instrumental '
-                                     'variable in ``z_cols``.')
-
-        if self.s_col is not None:
-            s_col_set = {self.s_col}
-            if not s_col_set.isdisjoint(x_cols_set):
-                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and covariate in '
-                                 '``x_cols``.')
-            if not s_col_set.isdisjoint(d_cols_set):
-                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and treatment '
-                                 'variable in ``d_cols``.')
-            if not s_col_set.isdisjoint(y_col_set):
-                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and outcome '
-                                 'variable ``y_col``.')
-            if self.z_cols is not None:
-                z_cols_set = set(self.z_cols)
-                if not s_col_set.isdisjoint(z_cols_set):
-                    raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and '
-                                     'instrumental variable in ``z_cols``.')
-            if self.t_col is not None:
-                t_col_set = {self.t_col}
-                if not s_col_set.isdisjoint(t_col_set):
-                    raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and time '
-                                     'variable ``t_col``.')
-
-
-class DoubleMLClusterData(DoubleMLData):
-    """Double machine learning data-backend for data with cluster variables.
-
-    :class:`DoubleMLClusterData` objects can be initialized from
-    :class:`pandas.DataFrame`'s as well as :class:`numpy.ndarray`'s.
-
-    Parameters
-    ----------
-    data : :class:`pandas.DataFrame`
-        The data.
-
-    y_col : str
-        The outcome variable.
-
-    d_cols : str or list
-        The treatment variable(s).
-
-    cluster_cols : str or list
-        The cluster variable(s).
-
-    x_cols : None, str or list
-        The covariates.
-        If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
-        treatment variables ``d_cols``, nor instrumental variables ``z_cols`` are used as covariates.
-        Default is ``None``.
-
-    z_cols : None, str or list
-        The instrumental variable(s).
-        Default is ``None``.
-
-    t_col : None or str
-        The time variable (only relevant/used for DiD Estimators).
-        Default is ``None``.
-
-    s_col : None or str
-        The score or selection variable (only relevant/used for RDD and SSM Estimatiors).
-        Default is ``None``.
-
-    use_other_treat_as_covariate : bool
-        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
-        Default is ``True``.
-
-    force_all_x_finite : bool or str
-        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
-        Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
-        allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
-        Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
-        for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
-        in the covariates ``x``.
-        Default is ``True``.
-
-    Examples
-    --------
-    >>> from doubleml import DoubleMLClusterData
-    >>> from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
-    >>> # initialization from pandas.DataFrame
-    >>> df = make_pliv_multiway_cluster_CKMS2021(return_type='DataFrame')
-    >>> obj_dml_data_from_df = DoubleMLClusterData(df, 'Y', 'D', ['cluster_var_i', 'cluster_var_j'], z_cols='Z')
-    >>> # initialization from np.ndarray
-    >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
-    >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
-    """
-    def __init__(self,
-                 data,
-                 y_col,
-                 d_cols,
-                 cluster_cols,
-                 x_cols=None,
-                 z_cols=None,
-                 t_col=None,
-                 s_col=None,
-                 use_other_treat_as_covariate=True,
-                 force_all_x_finite=True):
-        DoubleMLBaseData.__init__(self, data)
-
-        # we need to set cluster_cols (needs _data) before call to the super __init__ because of the x_cols setter
-        self.cluster_cols = cluster_cols
-        self._set_cluster_vars()
-        DoubleMLData.__init__(self,
-                              data,
-                              y_col,
-                              d_cols,
-                              x_cols,
-                              z_cols,
-                              t_col,
-                              s_col,
-                              use_other_treat_as_covariate,
-                              force_all_x_finite)
-        self._check_disjoint_sets_cluster_cols()
-
-    def __str__(self):
-        data_summary = self._data_summary_str()
-        buf = io.StringIO()
-        self.data.info(verbose=False, buf=buf)
-        df_info = buf.getvalue()
-        res = '================== DoubleMLClusterData Object ==================\n' + \
-              '\n------------------ Data summary      ------------------\n' + data_summary + \
-              '\n------------------ DataFrame info    ------------------\n' + df_info
-        return res
-
-    def _data_summary_str(self):
-        data_summary = f'Outcome variable: {self.y_col}\n' \
-                       f'Treatment variable(s): {self.d_cols}\n' \
-                       f'Cluster variable(s): {self.cluster_cols}\n' \
-                       f'Covariates: {self.x_cols}\n' \
-                       f'Instrument variable(s): {self.z_cols}\n'
-        if self.t_col is not None:
-            data_summary += f'Time variable: {self.t_col}\n'
-        if self.s_col is not None:
-            data_summary += f'Score/Selection variable: {self.s_col}\n'
-
-        data_summary += f'No. Observations: {self.n_obs}\n'
-        return data_summary
-
-    @classmethod
-    def from_arrays(cls, x, y, d, cluster_vars, z=None, t=None, s=None, use_other_treat_as_covariate=True,
-                    force_all_x_finite=True):
-        """
-        Initialize :class:`DoubleMLClusterData` from :class:`numpy.ndarray`'s.
-
-        Parameters
-        ----------
-        x : :class:`numpy.ndarray`
-            Array of covariates.
-
-        y : :class:`numpy.ndarray`
-            Array of the outcome variable.
-
-        d : :class:`numpy.ndarray`
-            Array of treatment variables.
-
-        cluster_vars : :class:`numpy.ndarray`
-            Array of cluster variables.
-
-        z : None or :class:`numpy.ndarray`
-            Array of instrumental variables.
-            Default is ``None``.
-
-        t : :class:`numpy.ndarray`
-            Array of the time variable (only relevant/used for DiD models).
-            Default is ``None``.
-
-        s : :class:`numpy.ndarray`
-            Array of the score or selection variable (only relevant/used for RDD or SSM models).
-            Default is ``None``.
-
-        use_other_treat_as_covariate : bool
-            Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
-            Default is ``True``.
-
-        force_all_x_finite : bool or str
-            Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
-            Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
-            allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
-            Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
-            for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
-            in the covariates ``x``.
-            Default is ``True``.
-
-        Examples
-        --------
-        >>> from doubleml import DoubleMLClusterData
-        >>> from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
-        >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
-        >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
-        """
-        dml_data = DoubleMLData.from_arrays(x, y, d, z, t, s, use_other_treat_as_covariate, force_all_x_finite)
-        cluster_vars = check_array(cluster_vars, ensure_2d=False, allow_nd=False)
-        cluster_vars = _assure_2d_array(cluster_vars)
-        if cluster_vars.shape[1] == 1:
-            cluster_cols = ['cluster_var']
-        else:
-            cluster_cols = [f'cluster_var{i + 1}' for i in np.arange(cluster_vars.shape[1])]
-
-        data = pd.concat((pd.DataFrame(cluster_vars, columns=cluster_cols), dml_data.data), axis=1)
-
-        return (cls(data, dml_data.y_col, dml_data.d_cols, cluster_cols,
-                    dml_data.x_cols, dml_data.z_cols, dml_data.t_col, dml_data.s_col,
-                    dml_data.use_other_treat_as_covariate, dml_data.force_all_x_finite))
-
-    @property
-    def cluster_cols(self):
-        """
-        The cluster variable(s).
-        """
-        return self._cluster_cols
-
-    @cluster_cols.setter
-    def cluster_cols(self, value):
-        reset_value = hasattr(self, '_cluster_cols')
-        if isinstance(value, str):
-            value = [value]
-        if not isinstance(value, list):
-            raise TypeError('The cluster variable(s) cluster_cols must be of str or list type. '
-                            f'{str(value)} of type {str(type(value))} was passed.')
-        if not len(set(value)) == len(value):
-            raise ValueError('Invalid cluster variable(s) cluster_cols: '
-                             'Contains duplicate values.')
-        if not set(value).issubset(set(self.all_variables)):
-            raise ValueError('Invalid cluster variable(s) cluster_cols. '
-                             'At least one cluster variable is no data column.')
-        self._cluster_cols = value
-        if reset_value:
-            self._check_disjoint_sets()
-            self._set_cluster_vars()
-
-    @property
-    def n_cluster_vars(self):
-        """
-        The number of cluster variables.
-        """
-        return len(self.cluster_cols)
-
-    @property
-    def cluster_vars(self):
-        """
-        Array of cluster variable(s).
-        """
-        return self._cluster_vars.values
-
-    @DoubleMLData.x_cols.setter
-    def x_cols(self, value):
-        if value is not None:
-            # this call might become much easier with https://github.com/python/cpython/pull/26194
-            super(self.__class__, self.__class__).x_cols.__set__(self, value)
-        else:
-            if self.s_col is None:
-                if (self.z_cols is not None) & (self.t_col is not None):
-                    y_d_z_t = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), {self.t_col}, set(self.cluster_cols))
-                    x_cols = [col for col in self.data.columns if col not in y_d_z_t]
-                elif self.z_cols is not None:
-                    y_d_z = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), set(self.cluster_cols))
-                    x_cols = [col for col in self.data.columns if col not in y_d_z]
-                elif self.t_col is not None:
-                    y_d_t = set.union({self.y_col}, set(self.d_cols), {self.t_col}, set(self.cluster_cols))
-                    x_cols = [col for col in self.data.columns if col not in y_d_t]
-                else:
-                    y_d = set.union({self.y_col}, set(self.d_cols), set(self.cluster_cols))
-                    x_cols = [col for col in self.data.columns if col not in y_d]
-            else:
-                if (self.z_cols is not None) & (self.t_col is not None):
-                    y_d_z_t_s = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), {self.t_col}, {self.s_col},
-                                          set(self.cluster_cols))
-                    x_cols = [col for col in self.data.columns if col not in y_d_z_t_s]
-                elif self.z_cols is not None:
-                    y_d_z_s = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), {self.s_col}, set(self.cluster_cols))
-                    x_cols = [col for col in self.data.columns if col not in y_d_z_s]
-                elif self.t_col is not None:
-                    y_d_t_s = set.union({self.y_col}, set(self.d_cols), {self.t_col}, {self.s_col}, set(self.cluster_cols))
-                    x_cols = [col for col in self.data.columns if col not in y_d_t_s]
-                else:
-                    y_d_s = set.union({self.y_col}, set(self.d_cols), {self.s_col}, set(self.cluster_cols))
-                    x_cols = [col for col in self.data.columns if col not in y_d_s]
-            # this call might become much easier with https://github.com/python/cpython/pull/26194
-            super(self.__class__, self.__class__).x_cols.__set__(self, x_cols)
-
-    def _check_disjoint_sets(self):
-        # apply the standard checks from the DoubleMLData class
-        super(DoubleMLClusterData, self)._check_disjoint_sets()
-        self._check_disjoint_sets_cluster_cols()
-
-    def _check_disjoint_sets_cluster_cols(self):
-        # apply the standard checks from the DoubleMLData class
-        super(DoubleMLClusterData, self)._check_disjoint_sets()
-
-        # special checks for the additional cluster variables
-        cluster_cols_set = set(self.cluster_cols)
-        y_col_set = {self.y_col}
-        x_cols_set = set(self.x_cols)
-        d_cols_set = set(self.d_cols)
-        t_col_set = {self.t_col}
-        s_col_set = {self.s_col}
-
-        if not y_col_set.isdisjoint(cluster_cols_set):
-            raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and cluster '
-                             'variable in ``cluster_cols``.')
-        if not d_cols_set.isdisjoint(cluster_cols_set):
-            raise ValueError('At least one variable/column is set as treatment variable (``d_cols``) and '
-                             'cluster variable in ``cluster_cols``.')
-        # TODO: Is the following combination allowed, or not?
-        if not x_cols_set.isdisjoint(cluster_cols_set):
-            raise ValueError('At least one variable/column is set as covariate (``x_cols``) and cluster '
-                             'variable in ``cluster_cols``.')
-        if self.z_cols is not None:
-            z_cols_set = set(self.z_cols)
-            if not z_cols_set.isdisjoint(cluster_cols_set):
-                raise ValueError('At least one variable/column is set as instrumental variable (``z_cols``) and '
-                                 'cluster variable in ``cluster_cols``.')
-        if self.t_col is not None:
-            if not t_col_set.isdisjoint(cluster_cols_set):
-                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and '
-                                 'cluster variable in ``cluster_cols``.')
-        if self.s_col is not None:
-            if not s_col_set.isdisjoint(cluster_cols_set):
-                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and '
-                                 'cluster variable in ``cluster_cols``.')
-
-    def _set_cluster_vars(self):
-        assert_all_finite(self.data.loc[:, self.cluster_cols])
-        self._cluster_vars = self.data.loc[:, self.cluster_cols]
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index 8086322a8..7f24fde5f 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -187,22 +187,6 @@ def _draw_weights(method, n_rep_boot, n_obs):
     return weights
 
 
-def _trimm(preds, trimming_rule, trimming_threshold):
-    if trimming_rule == 'truncate':
-        preds[preds < trimming_threshold] = trimming_threshold
-        preds[preds > 1 - trimming_threshold] = 1 - trimming_threshold
-    return preds
-
-
-def _normalize_ipw(propensity, treatment):
-    mean_treat1 = np.mean(np.divide(treatment, propensity))
-    mean_treat0 = np.mean(np.divide(1.0 - treatment, 1.0 - propensity))
-    normalized_weights = np.multiply(treatment, np.multiply(propensity, mean_treat1)) \
-                         + np.multiply(1.0 - treatment, 1.0 - np.multiply(1.0 - propensity, mean_treat0))
-
-    return normalized_weights
-
-
 def _rmse(y_true, y_pred):
     subset = np.logical_not(np.isnan(y_true))
     rmse = root_mean_squared_error(y_true[subset], y_pred[subset])

From 29114ce4ac7663618b5113285f660b86c46298fe Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 27 Oct 2025 14:35:46 -0700
Subject: [PATCH 16/48] Ruff checks and formatting

---
 doubleml/__init__.py                         |   4 +-
 doubleml/double_ml_score_mixins.py           |  31 +-
 doubleml/plm/datasets/dgp_lplr_LZZ2020.py    |  98 ++--
 doubleml/plm/lplr.py                         | 523 +++++++++++--------
 doubleml/plm/tests/_utils_logistic_manual.py | 313 -----------
 doubleml/plm/tests/_utils_lplr_manual.py     |   1 -
 doubleml/plm/tests/test_lplr_exceptions.py   |   6 +-
 doubleml/plm/tests/tests_logistic.py         | 307 -----------
 doubleml/utils/_estimation.py                |  59 ++-
 doubleml/utils/resampling.py                 |  38 +-
 10 files changed, 447 insertions(+), 933 deletions(-)
 delete mode 100644 doubleml/plm/tests/_utils_logistic_manual.py
 delete mode 100644 doubleml/plm/tests/tests_logistic.py

diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index 7c8ead970..cb3891bac 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -13,11 +13,9 @@
 from .irm.pq import DoubleMLPQ
 from .irm.qte import DoubleMLQTE
 from .irm.ssm import DoubleMLSSM
-from doubleml.plm.lplr import DoubleMLLPLR
-
+from .plm.lplr import DoubleMLLPLR
 from .plm.pliv import DoubleMLPLIV
 from .plm.plr import DoubleMLPLR
-from .logistic.logistic import DoubleMLLogit
 from .utils.blp import DoubleMLBLP
 from .utils.policytree import DoubleMLPolicyTree
 
diff --git a/doubleml/double_ml_score_mixins.py b/doubleml/double_ml_score_mixins.py
index b0c69c25e..f1112db9c 100644
--- a/doubleml/double_ml_score_mixins.py
+++ b/doubleml/double_ml_score_mixins.py
@@ -150,10 +150,12 @@ def score_deriv(theta):
             theta_hat = root_res.root
             if not root_res.converged:
                 score_val = score(theta_hat)
-                msg = ('Could not find a root of the score function.\n '
-                              f'Flag: {root_res.flag}.\n'
-                              f'Score value found is {score_val} '
-                              f'for parameter theta equal to {theta_hat}.')
+                msg = (
+                    "Could not find a root of the score function.\n "
+                    f"Flag: {root_res.flag}.\n"
+                    f"Score value found is {score_val} "
+                    f"for parameter theta equal to {theta_hat}."
+                )
                 if self._error_on_convergence_failure:
                     raise ValueError(msg)
                 else:
@@ -185,15 +187,16 @@ def score_squared(theta):
                 else:
                     score_val_sign = np.sign(score(alt_coef_start))
                     if score_val_sign > 0:
-
                         theta_hat_array, score_val, _ = fmin_l_bfgs_b(
                             score, self._coef_start_val, approx_grad=True, bounds=[self._coef_bounds]
                         )
                         theta_hat = theta_hat_array.item()
-                        msg = ('Could not find a root of the score function.\n '
-                                      f'Minimum score value found is {score_val} '
-                                      f'for parameter theta equal to {theta_hat}.\n '
-                                      'No theta found such that the score function evaluates to a negative value.')
+                        msg = (
+                            "Could not find a root of the score function.\n "
+                            f"Minimum score value found is {score_val} "
+                            f"for parameter theta equal to {theta_hat}.\n "
+                            "No theta found such that the score function evaluates to a negative value."
+                        )
                         if self._error_on_convergence_failure:
                             raise ValueError(msg)
                         else:
@@ -208,10 +211,12 @@ def neg_score(theta):
                             neg_score, self._coef_start_val, approx_grad=True, bounds=[self._coef_bounds]
                         )
                         theta_hat = theta_hat_array.item()
-                        msg = ('Could not find a root of the score function. '
-                                      f'Maximum score value found is {-1*neg_score_val} '
-                                      f'for parameter theta equal to {theta_hat}. '
-                                      'No theta found such that the score function evaluates to a positive value.')
+                        msg = (
+                            "Could not find a root of the score function. "
+                            f"Maximum score value found is {-1 * neg_score_val} "
+                            f"for parameter theta equal to {theta_hat}. "
+                            "No theta found such that the score function evaluates to a positive value."
+                        )
                         if self._error_on_convergence_failure:
                             raise ValueError(msg)
                         else:
diff --git a/doubleml/plm/datasets/dgp_lplr_LZZ2020.py b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
index 007e2b918..3d6d71277 100644
--- a/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
+++ b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
@@ -9,28 +9,32 @@
 _data_frame_alias = _get_data_frame_alias()
 _dml_data_alias = _get_dml_data_alias()
 
-def make_lplr_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', balanced_r0=True, treatment="continuous", **kwargs):
-    """
+
+def make_lplr_LZZ2020(
+    n_obs=500, dim_x=20, alpha=0.5, return_type="DoubleMLData", balanced_r0=True, treatment="continuous", **kwargs
+):
+    r"""
     Generates synthetic data for a logistic partially linear regression model, as in Liu et al. (2021),
     designed for use in double/debiased machine learning applications.
 
     The data generating process is defined as follows:
 
-    - Covariates \( x_i \sim \mathcal{N}(0, \Sigma) \), where \( \Sigma_{kj} = 0.7^{|j-k|} \).
-    - Treatment \( d_i = a_0(x_i) \).
-    - Propensity score \( p_i = \sigma(\alpha d_i + r_0(x_i)) \), where \( \sigma(\cdot) \) is the logistic function.
-    - Outcome \( y_i \sim \text{Bernoulli}(p_i) \).
+    - Covariates :math:`x_i \sim \mathcal{N}(0, \Sigma)`, where :math:`\Sigma_{kj} = 0.7^{|j-k|}`.
+    - Treatment :math:`d_i = a_0(x_i)`.
+    - Propensity score :math:`p_i = \sigma(\alpha d_i + r_0(x_i))`, where :math:`\sigma(\cdot)` is the logistic function.
+    - Outcome :math:`y_i \sim \text{Bernoulli}(p_i)`.
 
     The nuisance functions are defined as:
 
     .. math::
-
+        \begin{aligned}
         a_0(x_i) &= \frac{2}{1 + \exp(x_{i,1})} - \frac{2}{1 + \exp(x_{i,2})} + \sin(x_{i,3}) + \cos(x_{i,4}) \\
-        &+ 0.5 \cdot \mathbb{1}(x_{i,5} > 0) - 0.5 \cdot \mathbb{1}(x_{i,6} > 0) + 0.2 x_{i,7} x_{i,8} - 0.2 x_{i,9} x_{i,10} \\
-
-        r_0(x_i) &= 0.1 x_{i,1} x_{i,2} x_{i,3} + 0.1 x_{i,4} x_{i,5} + 0.1 x_{i,6}^3 - 0.5 \sin^2(x_{i,7}) \\
-        &+ 0.5 \cos(x_{i,8}) + \frac{1}{1 + x_{i,9}^2} - \frac{1}{1 + \exp(x_{i,10})} \\
-        &+ 0.25 \cdot \mathbb{1}(x_{i,11} > 0) - 0.25 \cdot \mathbb{1}(x_{i,13} > 0)
+                 &\quad + 0.5 \cdot \mathbb{1}(x_{i,5} > 0) - 0.5 \cdot \mathbb{1}(x_{i,6} > 0) + 0.2\, x_{i,7} x_{i,8}
+                 - 0.2\, x_{i,9} x_{i,10} \\
+        r_0(x_i) &= 0.1\, x_{i,1} x_{i,2} x_{i,3} + 0.1\, x_{i,4} x_{i,5} + 0.1\, x_{i,6}^3 - 0.5 \sin^2(x_{i,7}) \\
+                 &\quad + 0.5 \cos(x_{i,8}) + \frac{1}{1 + x_{i,9}^2} - \frac{1}{1 + \exp(x_{i,10})} \\
+                 &\quad + 0.25 \cdot \mathbb{1}(x_{i,11} > 0) - 0.25 \cdot \mathbb{1}(x_{i,13} > 0)
+        \end{aligned}
 
     Parameters
     ----------
@@ -73,38 +77,45 @@ def make_lplr_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData'
     """
 
     if balanced_r0:
+
         def r_0(X):
-            return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
-                0.1 * X[:, 3] * X[:, 4] + \
-                0.1 * X[:, 5] ** 3 + \
-                -0.5 * np.sin(X[:, 6]) ** 2 + \
-                0.5 * np.cos(X[:, 7]) + \
-                1 / (1 + X[:, 8] ** 2) + \
-                -1 / (1 + np.exp(X[:, 9])) + \
-                0.25 * np.where(X[:, 10] > 0, 1, 0) + \
-                -0.25 * np.where(X[:, 12] > 0, 1, 0)
+            return (
+                0.1 * X[:, 0] * X[:, 1] * X[:, 2]
+                + 0.1 * X[:, 3] * X[:, 4]
+                + 0.1 * X[:, 5] ** 3
+                + -0.5 * np.sin(X[:, 6]) ** 2
+                + 0.5 * np.cos(X[:, 7])
+                + 1 / (1 + X[:, 8] ** 2)
+                + -1 / (1 + np.exp(X[:, 9]))
+                + 0.25 * np.where(X[:, 10] > 0, 1, 0)
+                + -0.25 * np.where(X[:, 12] > 0, 1, 0)
+            )
     else:
+
         def r_0(X):
-            return 0.1 * X[:, 0] * X[:, 1] * X[:, 2] + \
-                0.1 * X[:, 3] * X[:, 4] + \
-                0.1 * X[:, 5] ** 3 + \
-                -0.5 * np.sin(X[:, 6]) ** 2 + \
-                0.5 * np.cos(X[:, 7]) + \
-                4 / (1 + X[:, 8] ** 2) + \
-                -1 / (1 + np.exp(X[:, 9])) + \
-                1.5 * np.where(X[:, 10] > 0, 1, 0) + \
-                -0.25 * np.where(X[:, 12] > 0, 1, 0)
+            return (
+                0.1 * X[:, 0] * X[:, 1] * X[:, 2]
+                + 0.1 * X[:, 3] * X[:, 4]
+                + 0.1 * X[:, 5] ** 3
+                + -0.5 * np.sin(X[:, 6]) ** 2
+                + 0.5 * np.cos(X[:, 7])
+                + 4 / (1 + X[:, 8] ** 2)
+                + -1 / (1 + np.exp(X[:, 9]))
+                + 1.5 * np.where(X[:, 10] > 0, 1, 0)
+                + -0.25 * np.where(X[:, 12] > 0, 1, 0)
+            )
 
     def a_0(X):
-        return 2 / (1 + np.exp(X[:, 0])) + \
-            -2 / (1 + np.exp(X[:, 1])) + \
-            1 * np.sin(X[:, 2]) + \
-            1 * np.cos(X[:, 3]) + \
-            0.5 * np.where(X[:, 4] > 0, 1, 0) + \
-            -0.5 * np.where(X[:, 5] > 0, 1, 0) + \
-            0.2 * X[:, 6] * X[:, 7] + \
-            -0.2 * X[:, 8] * X[:, 9]
-
+        return (
+            2 / (1 + np.exp(X[:, 0]))
+            + -2 / (1 + np.exp(X[:, 1]))
+            + 1 * np.sin(X[:, 2])
+            + 1 * np.cos(X[:, 3])
+            + 0.5 * np.where(X[:, 4] > 0, 1, 0)
+            + -0.5 * np.where(X[:, 5] > 0, 1, 0)
+            + 0.2 * X[:, 6] * X[:, 7]
+            + -0.2 * X[:, 8] * X[:, 9]
+        )
 
     sigma = np.full((dim_x, dim_x), 0.2)
     np.fill_diagonal(sigma, 1)
@@ -128,12 +139,11 @@ def a_0(X):
     if return_type in _array_alias:
         return x, y, d, p
     elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, p)),
-                            columns=x_cols + ['y', 'd', 'p'])
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, p)), columns=x_cols + ["y", "d", "p"])
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLData(data, 'y', 'd', x_cols)
+            return DoubleMLData(data, "y", "d", x_cols)
     else:
-        raise ValueError('Invalid return_type.')
\ No newline at end of file
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 1ed00810a..edf17f082 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -1,34 +1,22 @@
 import inspect
 
 import numpy as np
-
-from doubleml.utils._estimation import (
-    _dml_cv_predict,
-    _trimm,
-    _predict_zero_one_propensity,
-    _cond_targets,
-    _get_bracket_guess,
-    _default_kde,
-    _normalize_ipw,
-    _dml_tune,
-    _solve_ipw_score,
-)
+import scipy
 from sklearn.base import clone
 from sklearn.utils import check_X_y
-import scipy
 from sklearn.utils.multiclass import type_of_target
 
 from doubleml import DoubleMLData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_score_mixins import NonLinearScoreMixin
-from doubleml.utils import DoubleMLClusterResampling
-from doubleml.utils._checks import _check_score, _check_finite_predictions, _check_is_propensity
+from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
+from doubleml.utils._estimation import (
+    _dml_cv_predict,
+    _dml_tune,
+)
 from doubleml.utils.resampling import DoubleMLDoubleResampling
 
 
-
-
-
 class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
     """Double machine learning for partially logistic models (binary outcomes)
 
@@ -89,24 +77,22 @@ class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
     The high-dimensional vector :math:`X = (X_1, \\ldots, X_p)` consists of other confounding covariates.
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_M,
-                 ml_t,
-                 ml_m,
-                 ml_a=None,
-                 n_folds=5,
-                 n_folds_inner=5,
-                 n_rep=1,
-                 score='nuisance_space',
-                 draw_sample_splitting=True,
-                 error_on_convergence_failure=False,):
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_M,
+        ml_t,
+        ml_m,
+        ml_a=None,
+        n_folds=5,
+        n_folds_inner=5,
+        n_rep=1,
+        score="nuisance_space",
+        draw_sample_splitting=True,
+        error_on_convergence_failure=False,
+    ):
         self.n_folds_inner = n_folds_inner
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         # Ensure outcome only contains 0 and 1 (validate early in constructor)
         if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
@@ -117,208 +103,264 @@ def __init__(self,
         self._coef_start_val = 1.0
 
         self._check_data(self._dml_data)
-        valid_scores = ['nuisance_space', 'instrument']
+        valid_scores = ["nuisance_space", "instrument"]
         _check_score(self.score, valid_scores, allow_callable=False)
 
-        _ = self._check_learner(ml_t, 'ml_t', regressor=True, classifier=False)
-        _ = self._check_learner(ml_M, 'ml_M', regressor=False, classifier=True)
+        _ = self._check_learner(ml_t, "ml_t", regressor=True, classifier=False)
+        _ = self._check_learner(ml_M, "ml_M", regressor=False, classifier=True)
 
         if np.array_equal(np.unique(obj_dml_data.d), [0, 1]):
-            ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
+            ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
         else:
-            ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=False)
-        self._learner = {'ml_m': ml_m, 'ml_t': ml_t, 'ml_M': ml_M}
+            ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=True, classifier=False)
+        self._learner = {"ml_m": ml_m, "ml_t": ml_t, "ml_M": ml_M}
 
         if ml_a is not None:
-            ml_a_is_classifier = self._check_learner(ml_a, 'ml_a', regressor=True, classifier=True)
-            self._learner['ml_a'] = ml_a
+            ml_a_is_classifier = self._check_learner(ml_a, "ml_a", regressor=True, classifier=True)
+            self._learner["ml_a"] = ml_a
             self._ml_a_provided = True
         else:
-            self._learner['ml_a'] = clone(ml_m)
+            self._learner["ml_a"] = clone(ml_m)
             ml_a_is_classifier = ml_m_is_classifier
             self._ml_a_provided = False
 
-        self._predict_method = {'ml_t': 'predict', 'ml_M': 'predict_proba'}
+        self._predict_method = {"ml_t": "predict", "ml_M": "predict_proba"}
 
         if ml_m_is_classifier:
             if self._dml_data.binary_treats.all():
-                self._predict_method['ml_m'] = 'predict_proba'
+                self._predict_method["ml_m"] = "predict_proba"
             else:
-                raise ValueError(f'The ml_m learner {str(ml_m)} was identified as classifier '
-                                 'but at least one treatment variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_m learner {str(ml_m)} was identified as classifier "
+                    "but at least one treatment variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method['ml_m'] = 'predict'
+            self._predict_method["ml_m"] = "predict"
 
         if ml_a_is_classifier:
             if self._dml_data.binary_treats.all():
-                self._predict_method['ml_a'] = 'predict_proba'
+                self._predict_method["ml_a"] = "predict_proba"
             else:
-                raise ValueError(f'The ml_a learner {str(ml_a)} was identified as classifier '
-                                 'but at least one treatment variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_a learner {str(ml_a)} was identified as classifier "
+                    "but at least one treatment variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method['ml_a'] = 'predict'
+            self._predict_method["ml_a"] = "predict"
 
-        if score == 'instrument':
-            sig = inspect.signature(self.learner['ml_a'].fit)
-            if not 'sample_weight' in sig.parameters:
-                raise ValueError('Learner \"ml_a\" who supports sample_weight is required for score type \"instrument\"')
+        if score == "instrument":
+            sig = inspect.signature(self.learner["ml_a"].fit)
+            if "sample_weight" not in sig.parameters:
+                raise ValueError('Learner "ml_a" who supports sample_weight is required for score type "instrument"')
 
         self._initialize_ml_nuisance_params()
         self._external_predictions_implemented = True
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in self._learner}
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in self._learner}
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
-            raise TypeError('The outcome variable y must be binary with values 0 and 1.')
+            raise TypeError("The outcome variable y must be binary with values 0 and 1.")
         return
 
-
-    def _double_dml_cv_predict(self, estimator, estimator_name,  x, y, smpls=None, smpls_inner=None,
-                    n_jobs=None, est_params=None, method='predict', sample_weights=None):
+    def _double_dml_cv_predict(
+        self,
+        estimator,
+        estimator_name,
+        x,
+        y,
+        smpls=None,
+        smpls_inner=None,
+        n_jobs=None,
+        est_params=None,
+        method="predict",
+        sample_weights=None,
+    ):
         res = {}
-        res['preds'] = np.zeros(y.shape, dtype=float)
-        res['preds_inner'] = []
-        res['models'] = []
+        res["preds"] = np.zeros(y.shape, dtype=float)
+        res["preds_inner"] = []
+        res["models"] = []
         for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
-            res_inner = _dml_cv_predict(estimator, x, y, smpls=smpls_double_split, n_jobs=n_jobs,
-                                    est_params=est_params, method=method,
-                                    return_models=True, smpls_is_partition=True, sample_weights=sample_weights)
-            _check_finite_predictions(res_inner['preds'], estimator, estimator_name, smpls_double_split)
-
-            res['preds_inner'].append(res_inner['preds'])
-            for model in res_inner['models']:
-                res['models'].append(model)
-                if method == 'predict_proba':
-                    res['preds'][smpls_single_split[1]] += model.predict_proba(x[smpls_single_split[1]])[:, 1]
+            res_inner = _dml_cv_predict(
+                estimator,
+                x,
+                y,
+                smpls=smpls_double_split,
+                n_jobs=n_jobs,
+                est_params=est_params,
+                method=method,
+                return_models=True,
+                smpls_is_partition=True,
+                sample_weights=sample_weights,
+            )
+            _check_finite_predictions(res_inner["preds"], estimator, estimator_name, smpls_double_split)
+
+            res["preds_inner"].append(res_inner["preds"])
+            for model in res_inner["models"]:
+                res["models"].append(model)
+                if method == "predict_proba":
+                    res["preds"][smpls_single_split[1]] += model.predict_proba(x[smpls_single_split[1]])[:, 1]
                 else:
-                    res['preds'][smpls_single_split[1]] += model.predict(x[smpls_single_split[1]])
+                    res["preds"][smpls_single_split[1]] += model.predict(x[smpls_single_split[1]])
         res["preds"] /= len(smpls)
-        res['targets'] = np.copy(y)
+        res["targets"] = np.copy(y)
         return res
 
-
-
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        x_d_concat = np.hstack((d.reshape(-1,1), x))
-        m_external = external_predictions['ml_m'] is not None
-        M_external = external_predictions['ml_M'] is not None
-        t_external = external_predictions['ml_t'] is not None
-        if 'ml_a' in self._learner:
-            a_external = external_predictions['ml_a'] is not None
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x_d_concat = np.hstack((d.reshape(-1, 1), x))
+        m_external = external_predictions["ml_m"] is not None
+        M_external = external_predictions["ml_M"] is not None
+        t_external = external_predictions["ml_t"] is not None
+        if "ml_a" in self._learner:
+            a_external = external_predictions["ml_a"] is not None
         else:
             a_external = False
 
         if M_external:
-            M_hat = {'preds': external_predictions['ml_M'],
-                     'targets': None,
-                     'models': None}
+            M_hat = {"preds": external_predictions["ml_M"], "targets": None, "models": None}
         else:
-            M_hat = (self._double_dml_cv_predict(self._learner['ml_M'], 'ml_M', x_d_concat, y, smpls=smpls, smpls_inner=self.__smpls__inner,
-                                                n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_M'), method=self._predict_method['ml_M']))
-
+            M_hat = self._double_dml_cv_predict(
+                self._learner["ml_M"],
+                "ml_M",
+                x_d_concat,
+                y,
+                smpls=smpls,
+                smpls_inner=self.__smpls__inner,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_M"),
+                method=self._predict_method["ml_M"],
+            )
 
         # nuisance m
         if m_external:
-            m_hat = {'preds': external_predictions['ml_m'],
-                     'targets': None,
-                     'models': None}
+            m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
         else:
-            if self.score == 'instrument':
+            if self.score == "instrument":
                 weights = []
                 for i, (train, test) in enumerate(smpls):
-                    weights.append( M_hat['preds_inner'][i][train] * (1-M_hat['preds_inner'][i][train]))
-                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                        return_models=return_models, sample_weights=weights)
-
-            elif self.score == 'nuisance_space':
+                    weights.append(M_hat["preds_inner"][i][train] * (1 - M_hat["preds_inner"][i][train]))
+                m_hat = _dml_cv_predict(
+                    self._learner["ml_m"],
+                    x,
+                    d,
+                    smpls=smpls,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_m"),
+                    method=self._predict_method["ml_m"],
+                    return_models=return_models,
+                    sample_weights=weights,
+                )
+
+            elif self.score == "nuisance_space":
                 filtered_smpls = []
                 for train, test in smpls:
                     train_filtered = train[y[train] == 0]
                     filtered_smpls.append((train_filtered, test))
-                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=filtered_smpls, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                        return_models=return_models)
+                m_hat = _dml_cv_predict(
+                    self._learner["ml_m"],
+                    x,
+                    d,
+                    smpls=filtered_smpls,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_m"),
+                    method=self._predict_method["ml_m"],
+                    return_models=return_models,
+                )
             else:
                 raise NotImplementedError
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
 
-        if self._check_learner(self._learner['ml_m'], 'ml_m', regressor=True, classifier=True):
-            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+        if self._check_learner(self._learner["ml_m"], "ml_m", regressor=True, classifier=True):
+            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
 
         if self._dml_data.binary_treats[self._dml_data.d_cols[self._i_treat]]:
-            binary_preds = (type_of_target(m_hat['preds']) == 'binary')
-            zero_one_preds = np.all((np.power(m_hat['preds'], 2) - m_hat['preds']) == 0)
+            binary_preds = type_of_target(m_hat["preds"]) == "binary"
+            zero_one_preds = np.all((np.power(m_hat["preds"], 2) - m_hat["preds"]) == 0)
             if binary_preds & zero_one_preds:
-                raise ValueError(f'For the binary treatment variable {self._dml_data.d_cols[self._i_treat]}, '
-                                 f'predictions obtained with the ml_m learner {str(self._learner["ml_m"])} are also '
-                                 'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                 'probabilities and not labels are predicted.')
+                raise ValueError(
+                    f"For the binary treatment variable {self._dml_data.d_cols[self._i_treat]}, "
+                    f"predictions obtained with the ml_m learner {str(self._learner['ml_m'])} are also "
+                    "observed to be binary with values 0 and 1. Make sure that for classifiers "
+                    "probabilities and not labels are predicted."
+                )
 
         if a_external:
-            a_hat = {'preds': external_predictions['ml_a'],
-                     'targets': None,
-                     'models': None}
+            a_hat = {"preds": external_predictions["ml_a"], "targets": None, "models": None}
         else:
-            a_hat = (self._double_dml_cv_predict(self._learner['ml_a'], 'ml_a', x, d, smpls=smpls, smpls_inner=self.__smpls__inner,
-                                                n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_a'), method=self._predict_method['ml_a']))
+            a_hat = self._double_dml_cv_predict(
+                self._learner["ml_a"],
+                "ml_a",
+                x,
+                d,
+                smpls=smpls,
+                smpls_inner=self.__smpls__inner,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_a"),
+                method=self._predict_method["ml_a"],
+            )
 
         W_inner = []
         beta = np.zeros(d.shape, dtype=float)
 
         for i, (train, test) in enumerate(smpls):
-            M_iteration = M_hat['preds_inner'][i][train]
+            M_iteration = M_hat["preds_inner"][i][train]
             M_iteration = np.clip(M_iteration, 1e-8, 1 - 1e-8)
             w = scipy.special.logit(M_iteration)
             W_inner.append(w)
-            d_tilde = (d - a_hat['preds_inner'][i])[train]
-            beta[test] = np.sum(d_tilde * w) / np.sum(d_tilde ** 2)
-
+            d_tilde = (d - a_hat["preds_inner"][i])[train]
+            beta[test] = np.sum(d_tilde * w) / np.sum(d_tilde**2)
 
         # nuisance t
         if t_external:
-            t_hat = {'preds': external_predictions['ml_t'],
-                     'targets': None,
-                     'models': None}
+            t_hat = {"preds": external_predictions["ml_t"], "targets": None, "models": None}
         else:
-            t_hat = _dml_cv_predict(self._learner['ml_t'], x, W_inner, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_t'), method=self._predict_method['ml_t'],
-                                    return_models=return_models)
-            _check_finite_predictions(t_hat['preds'], self._learner['ml_t'], 'ml_t', smpls)
-
+            t_hat = _dml_cv_predict(
+                self._learner["ml_t"],
+                x,
+                W_inner,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_t"),
+                method=self._predict_method["ml_t"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(t_hat["preds"], self._learner["ml_t"], "ml_t", smpls)
 
         r_hat = {}
-        r_hat['preds'] = t_hat['preds'] - beta * a_hat['preds']
-
-        psi_elements = self._score_elements(y, d, r_hat['preds'], m_hat['preds'])
-
-        preds = {'predictions': {'ml_r': r_hat['preds'],
-                                 'ml_m': m_hat['preds'],
-                                 'ml_a': a_hat['preds'],
-                                 'ml_t': t_hat['preds'],
-                                 'ml_M': M_hat['preds']},
-                 'targets': {'ml_r': None,
-                             'ml_m': m_hat['targets'],
-                             'ml_a': a_hat['targets'],
-                             'ml_t': t_hat['targets'],
-                             'ml_M': M_hat['targets']},
-                 'models': {'ml_r': None,
-                            'ml_m': m_hat['models'],
-                            'ml_a': a_hat['models'],
-                            'ml_t': t_hat['models'],
-                            'ml_M': M_hat['models']}}
+        r_hat["preds"] = t_hat["preds"] - beta * a_hat["preds"]
+
+        psi_elements = self._score_elements(y, d, r_hat["preds"], m_hat["preds"])
+
+        preds = {
+            "predictions": {
+                "ml_r": r_hat["preds"],
+                "ml_m": m_hat["preds"],
+                "ml_a": a_hat["preds"],
+                "ml_t": t_hat["preds"],
+                "ml_M": M_hat["preds"],
+            },
+            "targets": {
+                "ml_r": None,
+                "ml_m": m_hat["targets"],
+                "ml_a": a_hat["targets"],
+                "ml_t": t_hat["targets"],
+                "ml_M": M_hat["targets"],
+            },
+            "models": {
+                "ml_r": None,
+                "ml_m": m_hat["models"],
+                "ml_a": a_hat["models"],
+                "ml_t": t_hat["models"],
+                "ml_M": M_hat["models"],
+            },
+        }
 
         return psi_elements, preds
 
@@ -327,90 +369,128 @@ def _score_elements(self, y, d, r_hat, m_hat):
         d_tilde = d - m_hat
         psi_hat = scipy.special.expit(-r_hat)
         score_const = d_tilde * (1 - y) * np.exp(r_hat)
-        psi_elements = {"y": y, "d": d, "d_tilde": d_tilde, "r_hat": r_hat, "m_hat": m_hat, "psi_hat": psi_hat, "score_const": score_const}
+        psi_elements = {
+            "y": y,
+            "d": d,
+            "d_tilde": d_tilde,
+            "r_hat": r_hat,
+            "m_hat": m_hat,
+            "psi_hat": psi_hat,
+            "score_const": score_const,
+        }
 
         return psi_elements
 
     @property
     def _score_element_names(self):
-        return ['y', 'd', 'd_tilde', 'r_hat', 'm_hat', 'psi_hat', 'score_const']
+        return ["y", "d", "d_tilde", "r_hat", "m_hat", "psi_hat", "score_const"]
 
     def _sensitivity_element_est(self, preds):
-       pass
+        pass
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
         # TODO: test
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
         x_d_concat = np.hstack((d.reshape(-1, 1), x))
 
         if scoring_methods is None:
-            scoring_methods = {'ml_m': None,
-                               'ml_M': None,
-                               'ml_a': None,
-                               'ml_t': None}
+            scoring_methods = {"ml_m": None, "ml_M": None, "ml_a": None, "ml_t": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
-        M_tune_res = _dml_tune(y, x_d_concat, train_inds,
-                               self._learner['ml_M'], param_grids['ml_M'], scoring_methods['ml_M'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        M_tune_res = _dml_tune(
+            y,
+            x_d_concat,
+            train_inds,
+            self._learner["ml_M"],
+            param_grids["ml_M"],
+            scoring_methods["ml_M"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         filtered_train_inds = []
-        if self.score == 'nuisance_space':
+        if self.score == "nuisance_space":
             for train, test in smpls:
                 train_filtered = train[y[train] == 0]
                 filtered_train_inds.append(train_filtered)
-        elif self.score == 'instrument':
+        elif self.score == "instrument":
             filtered_train_inds = train_inds
         else:
             raise NotImplementedError
-        m_tune_res = _dml_tune(d, x, filtered_train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        a_tune_res = _dml_tune(d, x, train_inds,
-                                   self._learner['ml_a'], param_grids['ml_a'], scoring_methods['ml_a'],
-                                   n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        m_tune_res = _dml_tune(
+            d,
+            x,
+            filtered_train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        a_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_a"],
+            param_grids["ml_a"],
+            scoring_methods["ml_a"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         M_best_params = [xx.best_params_ for xx in M_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
         a_best_params = [xx.best_params_ for xx in a_tune_res]
 
         # Create targets for tuning ml_t
-        M_hat = (self._double_dml_cv_predict(self._learner['ml_M'], 'ml_M', x_d_concat, y, smpls=smpls,
-                                             smpls_inner=self.__smpls__inner,
-                                             n_jobs=n_jobs_cv,
-                                             est_params=M_best_params, method=self._predict_method['ml_M']))
+        M_hat = self._double_dml_cv_predict(
+            self._learner["ml_M"],
+            "ml_M",
+            x_d_concat,
+            y,
+            smpls=smpls,
+            smpls_inner=self.__smpls__inner,
+            n_jobs=n_jobs_cv,
+            est_params=M_best_params,
+            method=self._predict_method["ml_M"],
+        )
 
         W_inner = []
         for i, (train, test) in enumerate(smpls):
-            M_iteration = M_hat['preds_inner'][i][train]
+            M_iteration = M_hat["preds_inner"][i][train]
             M_iteration = np.clip(M_iteration, 1e-8, 1 - 1e-8)
             w = scipy.special.logit(M_iteration)
             W_inner.append(w)
 
-        t_tune_res = _dml_tune(W_inner, x, train_inds,
-                               self._learner['ml_t'], param_grids['ml_t'], scoring_methods['ml_t'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        t_tune_res = _dml_tune(
+            W_inner,
+            x,
+            train_inds,
+            self._learner["ml_t"],
+            param_grids["ml_t"],
+            scoring_methods["ml_t"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
         t_best_params = [xx.best_params_ for xx in t_tune_res]
 
-
-
         # Update params and tune_res to include ml_a and ml_t
-        params = {'ml_M': M_best_params,
-                  'ml_m': m_best_params,
-                  'ml_a': a_best_params,
-                  'ml_t': t_best_params}
-        tune_res = {'M_tune': M_tune_res,
-                    'm_tune': m_tune_res,
-                    'a_tune': a_tune_res,
-                    't_tune': t_tune_res}
-
-        res = {'params': params,
-               'tune_res': tune_res}
+        params = {"ml_M": M_best_params, "ml_m": m_best_params, "ml_a": a_best_params, "ml_t": t_best_params}
+        tune_res = {"M_tune": M_tune_res, "m_tune": m_tune_res, "a_tune": a_tune_res, "t_tune": t_tune_res}
+
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
@@ -430,37 +510,40 @@ def draw_sample_splitting(self):
         self : object
         """
 
-        obj_dml_resampling = DoubleMLDoubleResampling(n_folds=self.n_folds,
-                                                      n_folds_inner=self.n_folds_inner,
-                                                      n_rep=self.n_rep,
-                                                      n_obs=self._dml_data.n_obs,
-                                                      stratify=self._strata)
+        obj_dml_resampling = DoubleMLDoubleResampling(
+            n_folds=self.n_folds,
+            n_folds_inner=self.n_folds_inner,
+            n_rep=self.n_rep,
+            n_obs=self._dml_data.n_obs,
+            stratify=self._strata,
+        )
         self._smpls, self._smpls_inner = obj_dml_resampling.split_samples()
 
         return self
 
     def set_sample_splitting(self):
-        raise NotImplementedError('set_sample_splitting is not implemented for DoubleMLLPLR.')
+        raise NotImplementedError("set_sample_splitting is not implemented for DoubleMLLPLR.")
 
     def _compute_score(self, psi_elements, coef):
-
-        if self.score == 'nuisance_space':
+        if self.score == "nuisance_space":
             score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d_tilde"]
             score = psi_elements["psi_hat"] * (score_1 - psi_elements["score_const"])
-        elif self.score == 'instrument':
-            score = (psi_elements["y"] - scipy.special.expit(coef * psi_elements["d"]+ psi_elements["r_hat"])) * psi_elements["d_tilde"]
+        elif self.score == "instrument":
+            score = (psi_elements["y"] - scipy.special.expit(coef * psi_elements["d"] + psi_elements["r_hat"])) * psi_elements[
+                "d_tilde"
+            ]
         else:
             raise NotImplementedError
 
         return score
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
-        if self.score == 'nuisance_space':
-            deriv_1 = - psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d"]
-            deriv = psi_elements["psi_hat"] * psi_elements["d_tilde"] *  deriv_1
-        elif self.score == 'instrument':
-            expit = scipy.special.expit(coef * psi_elements["d"]+ psi_elements["r_hat"])
-            deriv = - psi_elements["d"] * expit * (1-expit) * psi_elements["d_tilde"]
+        if self.score == "nuisance_space":
+            deriv_1 = -psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d"]
+            deriv = psi_elements["psi_hat"] * psi_elements["d_tilde"] * deriv_1
+        elif self.score == "instrument":
+            expit = scipy.special.expit(coef * psi_elements["d"] + psi_elements["r_hat"])
+            deriv = -psi_elements["d"] * expit * (1 - expit) * psi_elements["d_tilde"]
         else:
             raise NotImplementedError
 
diff --git a/doubleml/plm/tests/_utils_logistic_manual.py b/doubleml/plm/tests/_utils_logistic_manual.py
deleted file mode 100644
index af4d034eb..000000000
--- a/doubleml/plm/tests/_utils_logistic_manual.py
+++ /dev/null
@@ -1,313 +0,0 @@
-import numpy as np
-import scipy
-from sklearn.base import clone, is_classifier
-
-from doubleml.tests._utils_boot import boot_manual, draw_weights
-from doubleml.tests._utils import fit_predict, fit_predict_proba, tune_grid_search
-
-
-def fit_logistic_multitreat(y, x, d, learner_l, learner_m, learner_g, all_smpls, score,
-                       n_rep=1, l_params=None, m_params=None, g_params=None,
-                       use_other_treat_as_covariate=True):
-    n_obs = len(y)
-    n_d = d.shape[1]
-
-    thetas = list()
-    ses = list()
-    all_l_hat = list()
-    all_m_hat = list()
-    all_g_hat = list()
-    for i_rep in range(n_rep):
-        smpls = all_smpls[i_rep]
-        thetas_this_rep = np.full(n_d, np.nan)
-        ses_this_rep = np.full(n_d, np.nan)
-        all_l_hat_this_rep = list()
-        all_m_hat_this_rep = list()
-        all_g_hat_this_rep = list()
-
-        for i_d in range(n_d):
-            if use_other_treat_as_covariate:
-                xd = np.hstack((x, np.delete(d, i_d, axis=1)))
-            else:
-                xd = x
-
-            l_hat, m_hat, g_hat, thetas_this_rep[i_d], ses_this_rep[i_d] = fit_plr_single_split(
-                y, xd, d[:, i_d],
-                learner_l, learner_m, learner_g,
-                smpls, score,
-                l_params, m_params, g_params)
-            all_l_hat_this_rep.append(l_hat)
-            all_m_hat_this_rep.append(m_hat)
-            all_g_hat_this_rep.append(g_hat)
-
-        thetas.append(thetas_this_rep)
-        ses.append(ses_this_rep)
-        all_l_hat.append(all_l_hat_this_rep)
-        all_m_hat.append(all_m_hat_this_rep)
-        all_g_hat.append(all_g_hat_this_rep)
-
-    theta = np.full(n_d, np.nan)
-    se = np.full(n_d, np.nan)
-    for i_d in range(n_d):
-        theta_vec = np.array([xx[i_d] for xx in thetas])
-        se_vec = np.array([xx[i_d] for xx in ses])
-        theta[i_d] = np.median(theta_vec)
-        se[i_d] = np.sqrt(np.median(np.power(se_vec, 2) * n_obs + np.power(theta_vec - theta[i_d], 2)) / n_obs)
-
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_g_hat': all_g_hat}
-
-    return res
-
-
-def fit_logistic(y, x, d, learner_l, learner_m, learner_g, all_smpls, score,
-            n_rep=1, l_params=None, m_params=None, g_params=None):
-    n_obs = len(y)
-
-    thetas = np.zeros(n_rep)
-    ses = np.zeros(n_rep)
-    all_l_hat = list()
-    all_m_hat = list()
-    all_g_hat = list()
-    for i_rep in range(n_rep):
-        smpls = all_smpls[i_rep]
-        l_hat, m_hat, g_hat, thetas[i_rep], ses[i_rep] = fit_plr_single_split(
-            y, x, d,
-            learner_l, learner_m, learner_g,
-            smpls, score,
-            l_params, m_params, g_params)
-        all_l_hat.append(l_hat)
-        all_m_hat.append(m_hat)
-        all_g_hat.append(g_hat)
-
-    theta = np.median(thetas)
-    se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
-
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_g_hat': all_g_hat}
-
-    return res
-
-
-def fit_plr_logistic_split(y, x, d, learner_l, learner_m, learner_g, smpls, score,
-                         l_params=None, m_params=None, g_params=None):
-    fit_g = (score == 'IV-type') | callable(score)
-    if is_classifier(learner_m):
-        l_hat, m_hat, g_hat = fit_nuisance_plr_classifier(y, x, d,
-                                                          learner_l, learner_m, learner_g,
-                                                          smpls, fit_g,
-                                                          l_params, m_params, g_params)
-    else:
-        l_hat, m_hat, g_hat = fit_nuisance_plr(y, x, d,
-                                               learner_l, learner_m, learner_g,
-                                               smpls, fit_g,
-                                               l_params, m_params, g_params)
-
-    theta, se = plr_dml2(y, x, d, l_hat, m_hat, g_hat,
-                         smpls, score)
-
-    return l_hat, m_hat, g_hat, theta, se
-
-
-def fit_nuisance_logistic(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True,
-                     l_params=None, m_params=None, g_params=None):
-    ml_l = clone(learner_l)
-    l_hat = fit_predict(y, x, ml_l, l_params, smpls)
-
-    ml_m = clone(learner_m)
-    m_hat = fit_predict(d, x, ml_m, m_params, smpls)
-
-    if fit_g:
-        y_minus_l_hat, d_minus_m_hat, _ = compute_plr_residuals(y, d, l_hat, m_hat, [], smpls)
-        psi_a = -np.multiply(d_minus_m_hat, d_minus_m_hat)
-        psi_b = np.multiply(d_minus_m_hat, y_minus_l_hat)
-        theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
-
-        ml_g = clone(learner_g)
-        g_hat = fit_predict(y - theta_initial*d, x, ml_g, g_params, smpls)
-    else:
-        g_hat = []
-
-    return l_hat, m_hat, g_hat
-
-
-def fit_nuisance_logistic_classifier(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True,
-                                l_params=None, m_params=None, g_params=None):
-    ml_l = clone(learner_l)
-    l_hat = fit_predict(y, x, ml_l, l_params, smpls)
-
-    ml_m = clone(learner_m)
-    m_hat = fit_predict_proba(d, x, ml_m, m_params, smpls)
-
-    if fit_g:
-        y_minus_l_hat, d_minus_m_hat, _ = compute_plr_residuals(y, d, l_hat, m_hat, [], smpls)
-        psi_a = -np.multiply(d_minus_m_hat, d_minus_m_hat)
-        psi_b = np.multiply(d_minus_m_hat, y_minus_l_hat)
-        theta_initial = -np.mean(psi_b) / np.mean(psi_a)
-
-        ml_g = clone(learner_g)
-        g_hat = fit_predict(y - theta_initial*d, x, ml_g, g_params, smpls)
-    else:
-        g_hat = []
-
-    return l_hat, m_hat, g_hat
-
-
-def compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls):
-    y_minus_l_hat = np.full_like(y, np.nan, dtype='float64')
-    d_minus_m_hat = np.full_like(d, np.nan, dtype='float64')
-    y_minus_g_hat = np.full_like(y, np.nan, dtype='float64')
-    for idx, (_, test_index) in enumerate(smpls):
-        y_minus_l_hat[test_index] = y[test_index] - l_hat[idx]
-        if len(g_hat) > 0:
-            y_minus_g_hat[test_index] = y[test_index] - g_hat[idx]
-        d_minus_m_hat[test_index] = d[test_index] - m_hat[idx]
-    return y_minus_l_hat, d_minus_m_hat, y_minus_g_hat
-
-
-
-
-def var_plr(theta, d, y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, score, n_obs):
-    if score == 'partialling out':
-        var = 1/n_obs * 1/np.power(np.mean(np.multiply(d_minus_m_hat, d_minus_m_hat)), 2) * \
-            np.mean(np.power(np.multiply(y_minus_l_hat - d_minus_m_hat*theta, d_minus_m_hat), 2))
-    else:
-        assert score == 'IV-type'
-        var = 1/n_obs * 1/np.power(np.mean(np.multiply(d_minus_m_hat, d)), 2) * \
-            np.mean(np.power(np.multiply(y_minus_g_hat - d*theta, d_minus_m_hat), 2))
-
-    return var
-
-
-def plr_orth(y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, d, score):
-    if score == 'IV-type':
-        res = np.mean(np.multiply(d_minus_m_hat, y_minus_g_hat))/np.mean(np.multiply(d_minus_m_hat, d))
-    else:
-        assert score == 'partialling out'
-        res = scipy.linalg.lstsq(d_minus_m_hat.reshape(-1, 1), y_minus_l_hat)[0]
-
-    return res
-
-
-def boot_plr(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
-             all_smpls, score, bootstrap, n_rep_boot,
-             n_rep=1, apply_cross_fitting=True):
-    all_boot_t_stat = list()
-    for i_rep in range(n_rep):
-        smpls = all_smpls[i_rep]
-        if apply_cross_fitting:
-            n_obs = len(y)
-        else:
-            test_index = smpls[0][1]
-            n_obs = len(test_index)
-        weights = draw_weights(bootstrap, n_rep_boot, n_obs)
-
-        boot_t_stat = boot_plr_single_split(
-            thetas[i_rep], y, d, all_l_hat[i_rep], all_m_hat[i_rep], all_g_hat[i_rep], smpls,
-            score, ses[i_rep],
-            weights, n_rep_boot, apply_cross_fitting)
-        all_boot_t_stat.append(boot_t_stat)
-
-    # differently for plr because of n_rep_boot and multiple treatmentsa
-    boot_t_stat = np.transpose(np.vstack(all_boot_t_stat))
-
-    return boot_t_stat
-
-
-def boot_plr_multitreat(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
-                        all_smpls, score, bootstrap, n_rep_boot,
-                        n_rep=1, apply_cross_fitting=True):
-    n_d = d.shape[1]
-    all_boot_t_stat = list()
-    for i_rep in range(n_rep):
-        smpls = all_smpls[i_rep]
-        if apply_cross_fitting:
-            n_obs = len(y)
-        else:
-            test_index = smpls[0][1]
-            n_obs = len(test_index)
-        weights = draw_weights(bootstrap, n_rep_boot, n_obs)
-
-        boot_t_stat = np.full((n_d, n_rep_boot), np.nan)
-        for i_d in range(n_d):
-            boot_t_stat[i_d, :] = boot_plr_single_split(
-                thetas[i_rep][i_d], y, d[:, i_d],
-                all_l_hat[i_rep][i_d], all_m_hat[i_rep][i_d], all_g_hat[i_rep][i_d],
-                smpls, score, ses[i_rep][i_d],
-                weights, n_rep_boot, apply_cross_fitting)
-
-        # transpose for shape (n_rep_boot, n_d)
-        boot_t_stat = np.transpose(boot_t_stat)
-        all_boot_t_stat.append(boot_t_stat)
-
-    # stack repetitions along the last axis
-    boot_t_stat = np.stack(all_boot_t_stat, axis=2)
-
-    return boot_t_stat
-
-
-def boot_plr_single_split(theta, y, d, l_hat, m_hat, g_hat,
-                          smpls, score, se, weights, n_rep, apply_cross_fitting):
-    y_minus_l_hat, d_minus_m_hat, y_minus_g_hat = compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls)
-
-    if apply_cross_fitting:
-        if score == 'partialling out':
-            J = np.mean(-np.multiply(d_minus_m_hat, d_minus_m_hat))
-        else:
-            assert score == 'IV-type'
-            J = np.mean(-np.multiply(d_minus_m_hat, d))
-    else:
-        test_index = smpls[0][1]
-        if score == 'partialling out':
-            J = np.mean(-np.multiply(d_minus_m_hat[test_index], d_minus_m_hat[test_index]))
-        else:
-            assert score == 'IV-type'
-            J = np.mean(-np.multiply(d_minus_m_hat[test_index], d[test_index]))
-
-    if score == 'partialling out':
-        psi = np.multiply(y_minus_l_hat - d_minus_m_hat * theta, d_minus_m_hat)
-    else:
-        assert score == 'IV-type'
-        psi = np.multiply(y_minus_g_hat - d * theta, d_minus_m_hat)
-
-    boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep, apply_cross_fitting)
-
-    return boot_t_stat
-
-
-def fit_sensitivity_elements_plr(y, d, all_coef, predictions, score, n_rep):
-    n_treat = d.shape[1]
-    n_obs = len(y)
-
-    sigma2 = np.full(shape=(1, n_rep, n_treat), fill_value=np.nan)
-    nu2 = np.full(shape=(1, n_rep, n_treat), fill_value=np.nan)
-    psi_sigma2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan)
-    psi_nu2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan)
-
-    for i_rep in range(n_rep):
-        for i_treat in range(n_treat):
-            d_tilde = d[:, i_treat]
-            m_hat = predictions['ml_m'][:, i_rep, i_treat]
-            theta = all_coef[i_treat, i_rep]
-            if score == 'partialling out':
-                l_hat = predictions['ml_l'][:, i_rep, i_treat]
-                sigma2_score_element = np.square(y - l_hat - np.multiply(theta, d_tilde-m_hat))
-            else:
-                assert score == 'IV-type'
-                g_hat = predictions['ml_g'][:, i_rep, i_treat]
-                sigma2_score_element = np.square(y - g_hat - np.multiply(theta, d_tilde))
-
-            sigma2[0, i_rep, i_treat] = np.mean(sigma2_score_element)
-            psi_sigma2[:, i_rep, i_treat] = sigma2_score_element - sigma2[0, i_rep, i_treat]
-
-            nu2[0, i_rep, i_treat] = np.divide(1.0, np.mean(np.square(d_tilde-m_hat)))
-            psi_nu2[:, i_rep, i_treat] = nu2[0, i_rep, i_treat] - \
-                np.multiply(np.square(d_tilde-m_hat), np.square(nu2[0, i_rep, i_treat]))
-
-    element_dict = {'sigma2': sigma2,
-                    'nu2': nu2,
-                    'psi_sigma2': psi_sigma2,
-                    'psi_nu2': psi_nu2}
-    return element_dict
diff --git a/doubleml/plm/tests/_utils_lplr_manual.py b/doubleml/plm/tests/_utils_lplr_manual.py
index f14a1f66c..8f45b5b08 100644
--- a/doubleml/plm/tests/_utils_lplr_manual.py
+++ b/doubleml/plm/tests/_utils_lplr_manual.py
@@ -297,7 +297,6 @@ def tune_nuisance_ssm_mar(y, x, d, z, s, ml_g, ml_pi, ml_m, smpls, n_folds_tune,
 def tune_nuisance_ssm_nonignorable(
     y, x, d, z, s, ml_g, ml_pi, ml_m, smpls, n_folds_tune, param_grid_g, param_grid_pi, param_grid_m
 ):
-
     train_inds = [tr for (tr, _) in smpls]
 
     inner0_list, inner1_list = [], []
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
index 4361e7c7b..8a55fe595 100644
--- a/doubleml/plm/tests/test_lplr_exceptions.py
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -6,7 +6,6 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLLPLR
-from doubleml.data.base_data import DoubleMLBaseData, DoubleMLData
 from doubleml.plm.datasets import make_lplr_LZZ2020
 
 np.random.seed(3141)
@@ -19,6 +18,7 @@
 dml_lplr = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m)
 dml_lplr_instrument = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score="instrument")
 
+
 @pytest.mark.ci
 def test_lplr_exception_data():
     msg = (
@@ -45,6 +45,7 @@ def test_lplr_exception_scores():
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score=0)
 
+
 @pytest.mark.ci
 def test_ssm_exception_resampling():
     msg = "The number of folds must be of int type. 1.5 of type <class 'float'> was passed."
@@ -74,6 +75,7 @@ def test_lplr_exception_get_params():
     with pytest.raises(ValueError, match=msg):
         dml_lplr.get_params("ml_x")
 
+
 @pytest.mark.ci
 def test_lplr_exception_smpls():
     msg = (
@@ -84,6 +86,7 @@ def test_lplr_exception_smpls():
     with pytest.raises(ValueError, match=msg):
         _ = dml_plr_no_smpls.smpls
 
+
 @pytest.mark.ci
 def test_lplr_exception_fit():
     msg = "The number of CPUs used to fit the learners must be of int type. 5 of type <class 'str'> was passed."
@@ -96,6 +99,7 @@ def test_lplr_exception_fit():
     with pytest.raises(TypeError, match=msg):
         dml_lplr.fit(store_models=1)
 
+
 @pytest.mark.ci
 def test_lplr_exception_bootstrap():
     dml_lplr_boot = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m)
diff --git a/doubleml/plm/tests/tests_logistic.py b/doubleml/plm/tests/tests_logistic.py
deleted file mode 100644
index a77db7a67..000000000
--- a/doubleml/plm/tests/tests_logistic.py
+++ /dev/null
@@ -1,307 +0,0 @@
-import pytest
-import math
-import scipy
-import numpy as np
-import pandas as pd
-
-from sklearn.base import clone
-
-from sklearn.linear_model import LinearRegression, Lasso
-from sklearn.ensemble import RandomForestRegressor
-
-import doubleml as dml
-
-from doubleml.tests._utils import draw_smpls
-from ._utils_logistic_manual import fit_logistic, boot_plr
-
-
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(max_depth=2, n_estimators=10),
-                        LinearRegression(),
-                        Lasso(alpha=0.1)])
-def learner(request):
-    return request.param
-
-
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
-def score(request):
-    return request.param
-
-
-@pytest.fixture(scope="module")
-def dml_plr_fixture(generate_data1, learner, score):
-    boot_methods = ['normal']
-    n_folds = 2
-    n_rep_boot = 502
-
-    # collect data
-    data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
-
-    # Set machine learning methods for m & g
-    ml_l = clone(learner)
-    ml_m = clone(learner)
-    ml_g = clone(learner)
-
-    np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
-    if score == 'partialling out':
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m,
-                                      n_folds=n_folds,
-                                      score=score)
-    else:
-        assert score == 'IV-type'
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m, ml_g,
-                                      n_folds,
-                                      score=score)
-
-    dml_plr_obj.fit()
-
-    np.random.seed(3141)
-    y = data['y'].values
-    x = data.loc[:, x_cols].values
-    d = data['d'].values
-    n_obs = len(y)
-    all_smpls = draw_smpls(n_obs, n_folds)
-
-    res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner),
-                         all_smpls, score)
-
-    np.random.seed(3141)
-    # test with external nuisance predictions
-    if score == 'partialling out':
-        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
-                                          ml_l, ml_m,
-                                          n_folds,
-                                          score=score)
-    else:
-        assert score == 'IV-type'
-        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
-                                          ml_l, ml_m, ml_g,
-                                          n_folds,
-                                          score=score)
-
-    # synchronize the sample splitting
-    dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
-
-    if score == 'partialling out':
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1)}}
-    else:
-        assert score == 'IV-type'
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1),
-                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, 1)}}
-
-    dml_plr_obj_ext.fit(external_predictions=prediction_dict)
-
-    res_dict = {'coef': dml_plr_obj.coef,
-                'coef_manual': res_manual['theta'],
-                'coef_ext': dml_plr_obj_ext.coef,
-                'se': dml_plr_obj.se,
-                'se_manual': res_manual['se'],
-                'se_ext': dml_plr_obj_ext.se,
-                'boot_methods': boot_methods}
-
-    for bootstrap in boot_methods:
-        np.random.seed(3141)
-        boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot)
-
-        np.random.seed(3141)
-        dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        np.random.seed(3141)
-        dml_plr_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
-        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_plr_obj_ext.boot_t_stat
-
-    # sensitivity tests
-    res_dict['sensitivity_elements'] = dml_plr_obj.sensitivity_elements
-    res_dict['sensitivity_elements_manual'] = fit_sensitivity_elements_plr(y, d.reshape(-1, 1),
-                                                                           all_coef=dml_plr_obj.all_coef,
-                                                                           predictions=dml_plr_obj.predictions,
-                                                                           score=score,
-                                                                           n_rep=1)
-    # check if sensitivity score with rho=0 gives equal asymptotic standard deviation
-    dml_plr_obj.sensitivity_analysis(rho=0.0)
-    res_dict['sensitivity_ses'] = dml_plr_obj.sensitivity_params['se']
-    return res_dict
-
-
-@pytest.mark.ci
-def test_dml_plr_coef(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['coef'],
-                        dml_plr_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_plr_fixture['coef'],
-                        dml_plr_fixture['coef_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-
-
-@pytest.mark.ci
-def test_dml_plr_se(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['se'],
-                        dml_plr_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_plr_fixture['se'],
-                        dml_plr_fixture['se_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-
-
-@pytest.mark.ci
-def test_dml_plr_boot(dml_plr_fixture):
-    for bootstrap in dml_plr_fixture['boot_methods']:
-        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
-        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_fixture['boot_t_stat' + bootstrap + '_ext'],
-                           rtol=1e-9, atol=1e-4)
-
-
-@pytest.mark.ci
-def test_dml_plr_sensitivity(dml_plr_fixture):
-    sensitivity_element_names = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
-    for sensitivity_element in sensitivity_element_names:
-        assert np.allclose(dml_plr_fixture['sensitivity_elements'][sensitivity_element],
-                           dml_plr_fixture['sensitivity_elements_manual'][sensitivity_element])
-
-
-@pytest.mark.ci
-def test_dml_plr_sensitivity_rho0(dml_plr_fixture):
-    assert np.allclose(dml_plr_fixture['se'],
-                       dml_plr_fixture['sensitivity_ses']['lower'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_plr_fixture['se'],
-                       dml_plr_fixture['sensitivity_ses']['upper'],
-                       rtol=1e-9, atol=1e-4)
-
-
-@pytest.fixture(scope="module")
-def dml_plr_ols_manual_fixture(generate_data1, score):
-    learner = LinearRegression()
-    boot_methods = ['Bayes', 'normal', 'wild']
-    n_folds = 2
-    n_rep_boot = 501
-
-    # collect data
-    data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
-
-    # Set machine learning methods for m & g
-    ml_l = clone(learner)
-    ml_g = clone(learner)
-    ml_m = clone(learner)
-
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
-    if score == 'partialling out':
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m,
-                                      n_folds=n_folds,
-                                      score=score)
-    else:
-        assert score == 'IV-type'
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m, ml_g,
-                                      n_folds,
-                                      score=score)
-
-    n = data.shape[0]
-    this_smpl = list()
-    xx = int(n/2)
-    this_smpl.append((np.arange(xx, n), np.arange(0, xx)))
-    this_smpl.append((np.arange(0, xx), np.arange(xx, n)))
-    smpls = [this_smpl]
-    dml_plr_obj.set_sample_splitting(smpls)
-
-    dml_plr_obj.fit()
-
-    y = data['y'].values
-    x = data.loc[:, x_cols].values
-    d = data['d'].values
-
-    # add column of ones for intercept
-    o = np.ones((n, 1))
-    x = np.append(x, o, axis=1)
-
-    smpls = dml_plr_obj.smpls[0]
-
-    l_hat = []
-    l_hat_vec = np.full_like(y, np.nan)
-    for (train_index, test_index) in smpls:
-        ols_est = scipy.linalg.lstsq(x[train_index], y[train_index])[0]
-        preds = np.dot(x[test_index], ols_est)
-        l_hat.append(preds)
-        l_hat_vec[test_index] = preds
-
-    m_hat = []
-    m_hat_vec = np.full_like(d, np.nan)
-    for (train_index, test_index) in smpls:
-        ols_est = scipy.linalg.lstsq(x[train_index], d[train_index])[0]
-        preds = np.dot(x[test_index], ols_est)
-        m_hat.append(preds)
-        m_hat_vec[test_index] = preds
-
-    g_hat = []
-    if score == 'IV-type':
-        theta_initial = scipy.linalg.lstsq((d - m_hat_vec).reshape(-1, 1), y - l_hat_vec)[0]
-        for (train_index, test_index) in smpls:
-            ols_est = scipy.linalg.lstsq(x[train_index],
-                                         y[train_index] - d[train_index] * theta_initial)[0]
-            g_hat.append(np.dot(x[test_index], ols_est))
-
-    res_manual, se_manual = plr_dml2(y, x, d,
-                                     l_hat, m_hat, g_hat,
-                                     smpls, score)
-
-    res_dict = {'coef': dml_plr_obj.coef,
-                'coef_manual': res_manual,
-                'se': dml_plr_obj.se,
-                'se_manual': se_manual,
-                'boot_methods': boot_methods}
-
-    for bootstrap in boot_methods:
-        np.random.seed(3141)
-        boot_t_stat = boot_plr(y, d, [res_manual], [se_manual],
-                               [l_hat], [m_hat], [g_hat],
-                               [smpls], score, bootstrap, n_rep_boot)
-
-        np.random.seed(3141)
-        dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
-
-    return res_dict
-
-
-@pytest.mark.ci
-def test_dml_plr_ols_manual_coef(dml_plr_ols_manual_fixture):
-    assert math.isclose(dml_plr_ols_manual_fixture['coef'],
-                        dml_plr_ols_manual_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-
-
-@pytest.mark.ci
-def test_dml_plr_ols_manual_se(dml_plr_ols_manual_fixture):
-    assert math.isclose(dml_plr_ols_manual_fixture['se'],
-                        dml_plr_ols_manual_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-
-
-@pytest.mark.ci
-def test_dml_plr_ols_manual_boot(dml_plr_ols_manual_fixture):
-    for bootstrap in dml_plr_ols_manual_fixture['boot_methods']:
-        assert np.allclose(dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
-
-
-@pytest.fixture(scope='module',
-                params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
-def cov_type(request):
-    return request.param
\ No newline at end of file
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index 7f24fde5f..d10ae48bc 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -43,9 +43,19 @@ def _fit(estimator, x, y, train_index, idx=None):
     return estimator, idx
 
 
-def _dml_cv_predict(estimator, x, y, smpls=None,
-                    n_jobs=None, est_params=None, method='predict', return_train_preds=False, return_models=False,
-                    smpls_is_partition=None, sample_weights=None):
+def _dml_cv_predict(
+    estimator,
+    x,
+    y,
+    smpls=None,
+    n_jobs=None,
+    est_params=None,
+    method="predict",
+    return_train_preds=False,
+    return_models=False,
+    smpls_is_partition=None,
+    sample_weights=None,
+):
     n_obs = x.shape[0]
 
     # TODO: Better name for smples_is_partition
@@ -53,9 +63,15 @@ def _dml_cv_predict(estimator, x, y, smpls=None,
         smpls_is_partition = _check_is_partition(smpls, n_obs)
     fold_specific_params = (est_params is not None) & (not isinstance(est_params, dict))
     fold_specific_target = isinstance(y, list)
-    manual_cv_predict = (not smpls_is_partition) | return_train_preds | fold_specific_params | fold_specific_target \
-                        | return_models | bool(sample_weights)
-    #TODO: Check if cross_val_predict supports weights
+    manual_cv_predict = (
+        (not smpls_is_partition)
+        | return_train_preds
+        | fold_specific_params
+        | fold_specific_target
+        | return_models
+        | bool(sample_weights)
+    )
+    # TODO: Check if cross_val_predict supports weights
 
     res = {"models": None}
     if not manual_cv_predict:
@@ -149,21 +165,34 @@ def _dml_cv_predict(estimator, x, y, smpls=None,
     return res
 
 
-def _dml_tune(y, x, train_inds,
-              learner, param_grid, scoring_method,
-              n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search, fold_specific_target=False):
+def _dml_tune(
+    y,
+    x,
+    train_inds,
+    learner,
+    param_grid,
+    scoring_method,
+    n_folds_tune,
+    n_jobs_cv,
+    search_mode,
+    n_iter_randomized_search,
+    fold_specific_target=False,
+):
     tune_res = list()
     for i, train_index in enumerate(train_inds):
         tune_resampling = KFold(n_splits=n_folds_tune, shuffle=True)
         if search_mode == "grid_search":
             g_grid_search = GridSearchCV(learner, param_grid, scoring=scoring_method, cv=tune_resampling, n_jobs=n_jobs_cv)
         else:
-            assert search_mode == 'randomized_search'
-            g_grid_search = RandomizedSearchCV(learner, param_grid,
-                                               scoring=scoring_method,
-                                               cv=tune_resampling,
-                                               n_jobs=n_jobs_cv,
-                                               n_iter=n_iter_randomized_search)
+            assert search_mode == "randomized_search"
+            g_grid_search = RandomizedSearchCV(
+                learner,
+                param_grid,
+                scoring=scoring_method,
+                cv=tune_resampling,
+                n_jobs=n_jobs_cv,
+                n_iter=n_iter_randomized_search,
+            )
         if fold_specific_target:
             tune_res.append(g_grid_search.fit(x[train_index, :], y[i]))
         else:
diff --git a/doubleml/utils/resampling.py b/doubleml/utils/resampling.py
index d10145176..38c1ac595 100644
--- a/doubleml/utils/resampling.py
+++ b/doubleml/utils/resampling.py
@@ -26,12 +26,7 @@ def split_samples(self):
 
 
 class DoubleMLDoubleResampling:
-    def __init__(self,
-                 n_folds,
-                 n_folds_inner,
-                 n_rep,
-                 n_obs,
-                 stratify=None):
+    def __init__(self, n_folds, n_folds_inner, n_rep, n_obs, stratify=None):
         self.n_folds = n_folds
         self.n_folds_inner = n_folds_inner
         self.n_rep = n_rep
@@ -39,12 +34,13 @@ def __init__(self,
         self.stratify = stratify
 
         if n_folds < 2:
-            raise ValueError('n_folds must be greater than 1. '
-                             'You can use set_sample_splitting with a tuple to only use one fold.')
+            raise ValueError(
+                "n_folds must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
+            )
         if n_folds_inner < 2:
-            raise ValueError('n_folds_inner must be greater than 1. '
-                             'You can use set_sample_splitting with a tuple to only use one fold.')
-
+            raise ValueError(
+                "n_folds_inner must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
+            )
 
         if self.stratify is None:
             self.resampling = RepeatedKFold(n_splits=n_folds, n_repeats=n_rep)
@@ -55,17 +51,27 @@ def __init__(self,
 
     def split_samples(self):
         all_smpls = [(train, test) for train, test in self.resampling.split(X=np.zeros(self.n_obs), y=self.stratify)]
-        smpls = [all_smpls[(i_repeat * self.n_folds):((i_repeat + 1) * self.n_folds)]
-                 for i_repeat in range(self.n_rep)]
+        smpls = [all_smpls[(i_repeat * self.n_folds) : ((i_repeat + 1) * self.n_folds)] for i_repeat in range(self.n_rep)]
         smpls_inner = []
         for _ in range(self.n_rep):
             smpls_inner_rep = []
             for train, test in all_smpls:
                 if self.stratify is None:
-                    smpls_inner_rep.append([(train[train_inner], train[test_inner]) for train_inner, test_inner in self.resampling_inner.split(X=train)])
+                    smpls_inner_rep.append(
+                        [
+                            (train[train_inner], train[test_inner])
+                            for train_inner, test_inner in self.resampling_inner.split(X=train)
+                        ]
+                    )
                 else:
-                    smpls_inner_rep.append([(train[train_inner], train[test_inner]) for train_inner, test_inner in
-                                            self.resampling_inner.split(X=np.zeros(len(train)), y=self.stratify[train])])
+                    smpls_inner_rep.append(
+                        [
+                            (train[train_inner], train[test_inner])
+                            for train_inner, test_inner in self.resampling_inner.split(
+                                X=np.zeros(len(train)), y=self.stratify[train]
+                            )
+                        ]
+                    )
             smpls_inner.append(smpls_inner_rep)
 
         return smpls, smpls_inner

From 5d2d1ed24deec8ca565b9ebe1260e3f9b0584b94 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 27 Oct 2025 21:56:22 -0700
Subject: [PATCH 17/48] Unit tests work and bug fix in lplr

---
 doubleml/plm/lplr.py                       |   6 +-
 doubleml/plm/tests/_utils_lplr_manual.py   | 371 +++++++--------------
 doubleml/plm/tests/test_lplr.py            |  31 +-
 doubleml/plm/tests/test_lplr_exceptions.py |  18 +-
 doubleml/plm/tests/test_lplr_tune.py       | 163 +++------
 5 files changed, 205 insertions(+), 384 deletions(-)

diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index edf17f082..08a6bbfac 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -109,10 +109,8 @@ def __init__(
         _ = self._check_learner(ml_t, "ml_t", regressor=True, classifier=False)
         _ = self._check_learner(ml_M, "ml_M", regressor=False, classifier=True)
 
-        if np.array_equal(np.unique(obj_dml_data.d), [0, 1]):
-            ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
-        else:
-            ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=True, classifier=False)
+
+        ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=True, classifier=True)
         self._learner = {"ml_m": ml_m, "ml_t": ml_t, "ml_M": ml_M}
 
         if ml_a is not None:
diff --git a/doubleml/plm/tests/_utils_lplr_manual.py b/doubleml/plm/tests/_utils_lplr_manual.py
index 8f45b5b08..699047019 100644
--- a/doubleml/plm/tests/_utils_lplr_manual.py
+++ b/doubleml/plm/tests/_utils_lplr_manual.py
@@ -8,74 +8,54 @@
 
 
 def fit_selection(
-    y,
-    x,
-    d,
-    z,
-    s,
-    learner_g,
-    learner_pi,
-    learner_m,
-    all_smpls,
-    score,
-    trimming_rule="truncate",
-    trimming_threshold=1e-2,
-    normalize_ipw=True,
-    n_rep=1,
-    g_d0_params=None,
-    g_d1_params=None,
-    pi_params=None,
-    m_params=None,
+        y,
+        x,
+        d,
+        learner_M,
+        learner_t,
+        learner_m,
+        all_smpls,
+        score,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        n_rep=1,
+        M_params=None,
+        t_params=None,
+        m_params=None,
 ):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
     ses = np.zeros(n_rep)
 
-    all_g_d1_hat = list()
-    all_g_d0_hat = list()
-    all_pi_hat = list()
+    all_M_hat = list()
+    all_t_hat = list()
     all_m_hat = list()
 
-    all_psi_a = list()
-    all_psi_b = list()
-
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list = fit_nuisance_selection(
+        M_hat_list, t_hat_list, m_hat_list = fit_nuisance_selection(
             y,
             x,
             d,
-            z,
-            s,
-            learner_g,
-            learner_pi,
+            learner_M,
+            learner_t,
             learner_m,
             smpls,
             score,
             trimming_rule=trimming_rule,
             trimming_threshold=trimming_threshold,
-            g_d0_params=g_d0_params,
-            g_d1_params=g_d1_params,
-            pi_params=pi_params,
+            M_params=M_params,
+            t_params=t_params,
             m_params=m_params,
         )
-        all_g_d1_hat.append(g_hat_d1_list)
-        all_g_d0_hat.append(g_hat_d0_list)
-        all_pi_hat.append(pi_hat_list)
-        all_m_hat.append(m_hat_list)
-
-        g_hat_d1, g_hat_d0, pi_hat, m_hat = compute_selection(y, g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list, smpls)
-
-        dtreat = d == 1
-        dcontrol = d == 0
-        psi_a, psi_b = selection_score_elements(dtreat, dcontrol, g_hat_d1, g_hat_d0, pi_hat, m_hat, s, y, normalize_ipw)
 
-        all_psi_a.append(psi_a)
-        all_psi_b.append(psi_b)
+        all_M_hat.append(M_hat)
+        all_t_hat.append(t_hat)
+        all_m_hat.append(m_hat)
 
-        thetas[i_rep], ses[i_rep] = selection_dml2(psi_a, psi_b)
+        thetas[i_rep], ses[i_rep] = solve_score(M_hat_list, t_hat_list, m_hat_list)
 
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
@@ -85,9 +65,8 @@ def fit_selection(
         "se": se,
         "thetas": thetas,
         "ses": ses,
-        "all_g_d1_hat": all_g_d1_hat,
-        "all_g_d0_hat": all_g_d0_hat,
-        "all_pi_hat": all_pi_hat,
+        "all_M_hat": all_M_hat,
+        "all_t_hat": all_t_hat,
         "all_m_hat": all_m_hat,
         "all_psi_a": all_psi_a,
         "all_psi_b": all_psi_b,
@@ -95,176 +74,125 @@ def fit_selection(
 
     return res
 
+def solve_score(M_hat, t_hat, m_hat):
+    pass
 
 def fit_nuisance_selection(
-    y,
-    x,
-    d,
-    z,
-    s,
-    learner_g,
-    learner_pi,
-    learner_m,
-    smpls,
-    score,
-    trimming_rule="truncate",
-    trimming_threshold=1e-2,
-    g_d0_params=None,
-    g_d1_params=None,
-    pi_params=None,
-    m_params=None,
+        y,
+        x,
+        d,
+        learner_M,
+        learner_t,
+        learner_m,
+        smpls,
+        score,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        M_params=None,
+        t_params=None,
+        m_params=None,
 ):
-    ml_g_d1 = clone(learner_g)
-    ml_g_d0 = clone(learner_g)
-    ml_pi = clone(learner_pi)
+    # TODO: complete for lplr
+    n_obs = len(y)
+    ml_M = clone(learner_M)
+    ml_t = clone(learner_t)
     ml_m = clone(learner_m)
 
-    if z is None:
-        dx = np.column_stack((d, x))
-    else:
-        dx = np.column_stack((d, x, z))
-
-    if score == "missing-at-random":
-        pi_hat_list = fit_predict_proba(s, dx, ml_pi, pi_params, smpls, trimming_threshold=trimming_threshold)
-
-        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls)
-
-        train_cond_d1_s1 = np.intersect1d(np.where(d == 1)[0], np.where(s == 1)[0])
-        g_hat_d1_list = fit_predict(y, x, ml_g_d1, g_d1_params, smpls, train_cond=train_cond_d1_s1)
-
-        train_cond_d0_s1 = np.intersect1d(np.where(d == 0)[0], np.where(s == 1)[0])
-        g_hat_d0_list = fit_predict(y, x, ml_g_d0, g_d0_params, smpls, train_cond=train_cond_d0_s1)
-    else:
-        # initialize empty lists
-        g_hat_d1_list = []
-        g_hat_d0_list = []
-        pi_hat_list = []
-        m_hat_list = []
-
-        # create strata for splitting
-        strata = d.reshape(-1, 1) + 2 * s.reshape(-1, 1)
-
-        # POTENTIAL OUTCOME Y(1)
-        for i_fold, _ in enumerate(smpls):
-            ml_g_d1 = clone(learner_g)
-            ml_pi = clone(learner_pi)
-            ml_m = clone(learner_m)
-
-            # set the params for the nuisance learners
-            if g_d1_params is not None:
-                ml_g_d1.set_params(**g_d1_params[i_fold])
-            if g_d0_params is not None:
-                ml_g_d0.set_params(**g_d0_params[i_fold])
-            if pi_params is not None:
-                ml_pi.set_params(**pi_params[i_fold])
-            if m_params is not None:
-                ml_m.set_params(**m_params[i_fold])
-
-            train_inds = smpls[i_fold][0]
-            test_inds = smpls[i_fold][1]
-
-            # start nested crossfitting
-            train_inds_1, train_inds_2 = train_test_split(
-                train_inds, test_size=0.5, random_state=42, stratify=strata[train_inds]
-            )
+    dx = np.column_stack((d, x))
+
+    # initialize empty lists
+    g_hat_d1_list = []
+    g_hat_d0_list = []
+    pi_hat_list = []
+    m_hat_list = []
+
+    # create strata for splitting
+    strata = d.reshape(-1, 1) + 2 * s.reshape(-1, 1)
+
+    # POTENTIAL OUTCOME Y(1)
+    for i_fold, _ in enumerate(smpls):
+        ml_g_d1 = clone(learner_g)
+        ml_pi = clone(learner_pi)
+        ml_m = clone(learner_m)
+
+        # set the params for the nuisance learners
+        if g_d1_params is not None:
+            ml_g_d1.set_params(**g_d1_params[i_fold])
+        if g_d0_params is not None:
+            ml_g_d0.set_params(**g_d0_params[i_fold])
+        if pi_params is not None:
+            ml_pi.set_params(**pi_params[i_fold])
+        if m_params is not None:
+            ml_m.set_params(**m_params[i_fold])
+
+        train_inds = smpls[i_fold][0]
+        test_inds = smpls[i_fold][1]
+
+        # start nested crossfitting
+        train_inds_1, train_inds_2 = train_test_split(
+            train_inds, test_size=0.5, random_state=42, stratify=strata[train_inds]
+        )
 
-            s_train_1 = s[train_inds_1]
-            dx_train_1 = dx[train_inds_1, :]
+        s_train_1 = s[train_inds_1]
+        dx_train_1 = dx[train_inds_1, :]
 
-            # preliminary propensity score for selection
-            ml_pi_prelim = clone(ml_pi)
-            # fit on first part of training set
-            ml_pi_prelim.fit(dx_train_1, s_train_1)
-            pi_hat_prelim = _predict_zero_one_propensity(ml_pi_prelim, dx)
+        # preliminary propensity score for selection
+        ml_pi_prelim = clone(ml_pi)
+        # fit on first part of training set
+        ml_pi_prelim.fit(dx_train_1, s_train_1)
+        pi_hat_prelim = _predict_zero_one_propensity(ml_pi_prelim, dx)
 
-            # predictions for small pi in denominator
-            pi_hat = pi_hat_prelim[test_inds]
+        # predictions for small pi in denominator
+        pi_hat = pi_hat_prelim[test_inds]
 
-            # add selection indicator to covariates
-            xpi = np.column_stack((x, pi_hat_prelim))
+        # add selection indicator to covariates
+        xpi = np.column_stack((x, pi_hat_prelim))
 
-            # estimate propensity score p using the second training sample
-            xpi_train_2 = xpi[train_inds_2, :]
-            d_train_2 = d[train_inds_2]
-            xpi_test = xpi[test_inds, :]
+        # estimate propensity score p using the second training sample
+        xpi_train_2 = xpi[train_inds_2, :]
+        d_train_2 = d[train_inds_2]
+        xpi_test = xpi[test_inds, :]
 
-            ml_m.fit(xpi_train_2, d_train_2)
+        ml_m.fit(xpi_train_2, d_train_2)
 
-            m_hat = _predict_zero_one_propensity(ml_m, xpi_test)
+        m_hat = _predict_zero_one_propensity(ml_m, xpi_test)
 
-            # estimate conditional outcome on second training sample -- treatment
-            s1_d1_train_2_indices = np.intersect1d(np.where(d == 1)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
-            xpi_s1_d1_train_2 = xpi[s1_d1_train_2_indices, :]
-            y_s1_d1_train_2 = y[s1_d1_train_2_indices]
+        # estimate conditional outcome on second training sample -- treatment
+        s1_d1_train_2_indices = np.intersect1d(np.where(d == 1)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
+        xpi_s1_d1_train_2 = xpi[s1_d1_train_2_indices, :]
+        y_s1_d1_train_2 = y[s1_d1_train_2_indices]
 
-            ml_g_d1.fit(xpi_s1_d1_train_2, y_s1_d1_train_2)
+        ml_g_d1.fit(xpi_s1_d1_train_2, y_s1_d1_train_2)
 
-            # predict conditional outcome
-            g_hat_d1 = ml_g_d1.predict(xpi_test)
+        # predict conditional outcome
+        g_hat_d1 = ml_g_d1.predict(xpi_test)
 
-            # estimate conditional outcome on second training sample -- control
-            s1_d0_train_2_indices = np.intersect1d(np.where(d == 0)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
-            xpi_s1_d0_train_2 = xpi[s1_d0_train_2_indices, :]
-            y_s1_d0_train_2 = y[s1_d0_train_2_indices]
+        # estimate conditional outcome on second training sample -- control
+        s1_d0_train_2_indices = np.intersect1d(np.where(d == 0)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
+        xpi_s1_d0_train_2 = xpi[s1_d0_train_2_indices, :]
+        y_s1_d0_train_2 = y[s1_d0_train_2_indices]
 
-            ml_g_d0.fit(xpi_s1_d0_train_2, y_s1_d0_train_2)
+        ml_g_d0.fit(xpi_s1_d0_train_2, y_s1_d0_train_2)
 
-            # predict conditional outcome
-            g_hat_d0 = ml_g_d0.predict(xpi_test)
+        # predict conditional outcome
+        g_hat_d0 = ml_g_d0.predict(xpi_test)
 
-            m_hat = _trimm(m_hat, trimming_rule, trimming_threshold)
+        m_hat = _trimm(m_hat, trimming_rule, trimming_threshold)
 
-            # append predictions on test sample to final list of predictions
-            g_hat_d1_list.append(g_hat_d1)
-            g_hat_d0_list.append(g_hat_d0)
-            pi_hat_list.append(pi_hat)
-            m_hat_list.append(m_hat)
+        # append predictions on test sample to final list of predictions
+        g_hat_d1_list.append(g_hat_d1)
+        g_hat_d0_list.append(g_hat_d0)
+        pi_hat_list.append(pi_hat)
+        m_hat_list.append(m_hat)
 
-    return g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list
 
 
-def compute_selection(y, g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list, smpls):
-    g_hat_d1 = np.full_like(y, np.nan, dtype="float64")
-    g_hat_d0 = np.full_like(y, np.nan, dtype="float64")
-    pi_hat = np.full_like(y, np.nan, dtype="float64")
     m_hat = np.full_like(y, np.nan, dtype="float64")
-
     for idx, (_, test_index) in enumerate(smpls):
-        g_hat_d1[test_index] = g_hat_d1_list[idx]
-        g_hat_d0[test_index] = g_hat_d0_list[idx]
-        pi_hat[test_index] = pi_hat_list[idx]
+        M_hat[test_index] = M_hat_list[idx]
+        t_hat[test_index] = t_hat_list[idx]
         m_hat[test_index] = m_hat_list[idx]
-
-    return g_hat_d1, g_hat_d0, pi_hat, m_hat
-
-
-def selection_score_elements(dtreat, dcontrol, g_d1, g_d0, pi, m, s, y, normalize_ipw):
-    # psi_a
-    psi_a = -1 * np.ones_like(y)
-
-    # psi_b
-    if normalize_ipw:
-        weight_treat = sum(dtreat) / sum((dtreat * s) / (m * pi))
-        weight_control = sum(dcontrol) / sum((dcontrol * s) / ((1 - m) * pi))
-
-        psi_b1 = weight_treat * ((dtreat * s * (y - g_d1)) / (m * pi)) + g_d1
-        psi_b0 = weight_control * ((dcontrol * s * (y - g_d0)) / ((1 - m) * pi)) + g_d0
-
-    else:
-        psi_b1 = (dtreat * s * (y - g_d1)) / (m * pi) + g_d1
-        psi_b0 = (dcontrol * s * (y - g_d0)) / ((1 - m) * pi) + g_d0
-
-    psi_b = psi_b1 - psi_b0
-
-    return psi_a, psi_b
-
-
-def selection_dml2(psi_a, psi_b):
-    n_obs = len(psi_a)
-    theta_hat = -np.mean(psi_b) / np.mean(psi_a)
-    se = np.sqrt(var_selection(theta_hat, psi_a, psi_b, n_obs))
-
-    return theta_hat, se
+    return M_hat, t_hat, m_hat
 
 
 def var_selection(theta, psi_a, psi_b, n_obs):
@@ -273,62 +201,17 @@ def var_selection(theta, psi_a, psi_b, n_obs):
     return var
 
 
-def tune_nuisance_ssm_mar(y, x, d, z, s, ml_g, ml_pi, ml_m, smpls, n_folds_tune, param_grid_g, param_grid_pi, param_grid_m):
-    d0_s1 = np.intersect1d(np.where(d == 0)[0], np.where(s == 1)[0])
-    d1_s1 = np.intersect1d(np.where(d == 1)[0], np.where(s == 1)[0])
-
-    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=d0_s1)
-    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=d1_s1)
-
+def tune_nuisance(y, x, d, ml_M, ml_t, ml_m, smpls, n_folds_tune, param_grid_M, param_grid_t, param_grid_m):
     dx = np.column_stack((x, d))
 
-    pi_tune_res = tune_grid_search(s, dx, ml_pi, smpls, param_grid_pi, n_folds_tune)
+    M_tune_res = tune_grid_search(y, dx, ml_M, smpls, param_grid_M, n_folds_tune)
 
     m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
 
-    g0_best_params = [xx.best_params_ for xx in g0_tune_res]
-    g1_best_params = [xx.best_params_ for xx in g1_tune_res]
-    pi_best_params = [xx.best_params_ for xx in pi_tune_res]
-    m_best_params = [xx.best_params_ for xx in m_tune_res]
-
-    return g0_best_params, g1_best_params, pi_best_params, m_best_params
+    t_tune_res = tune_grid_search(d, x, ml_t, smpls, param_grid_t, n_folds_tune)
 
+    M_best_params = [xx.best_params_ for xx in M_tune_res]
+    t_best_params = [xx.best_params_ for xx in t_tune_res]
+    m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-def tune_nuisance_ssm_nonignorable(
-    y, x, d, z, s, ml_g, ml_pi, ml_m, smpls, n_folds_tune, param_grid_g, param_grid_pi, param_grid_m
-):
-    train_inds = [tr for (tr, _) in smpls]
-
-    inner0_list, inner1_list = [], []
-    for tr in train_inds:
-        i0, i1 = train_test_split(tr, test_size=0.5, stratify=d[tr] + 2 * s[tr], random_state=42)
-        inner0_list.append(i0)
-        inner1_list.append(i1)
-
-    X_dz = np.c_[x, d.reshape(-1, 1), z.reshape(-1, 1)]
-    pi_tune_res = tune_grid_search(s, X_dz, ml_pi, [(i0, np.array([])) for i0 in inner0_list], param_grid_pi, n_folds_tune)
-    pi_best_params = [gs.best_params_ for gs in pi_tune_res]
-
-    pi_hat_full = np.full_like(s, np.nan, dtype=float)
-    for i0, i1, gs in zip(inner0_list, inner1_list, pi_tune_res):
-        ml_pi_temp = clone(ml_pi)
-        ml_pi_temp.set_params(**gs.best_params_)
-        ml_pi_temp.fit(X_dz[i0], s[i0])
-        ph = _predict_zero_one_propensity(ml_pi_temp, X_dz)
-        pi_hat_full[i1] = ph[i1]
-
-    X_pi = np.c_[x, pi_hat_full]
-    m_tune_res = tune_grid_search(d, X_pi, ml_m, [(i1, np.array([])) for i1 in inner1_list], param_grid_m, n_folds_tune)
-    m_best_params = [gs.best_params_ for gs in m_tune_res]
-
-    X_pi_d = np.c_[x, d.reshape(-1, 1), pi_hat_full.reshape(-1, 1)]
-    inner1_d0_s1 = [i1[(d[i1] == 0) & (s[i1] == 1)] for i1 in inner1_list]
-    inner1_d1_s1 = [i1[(d[i1] == 1) & (s[i1] == 1)] for i1 in inner1_list]
-
-    g0_tune_res = tune_grid_search(y, X_pi_d, ml_g, [(idx, np.array([])) for idx in inner1_d0_s1], param_grid_g, n_folds_tune)
-    g1_tune_res = tune_grid_search(y, X_pi_d, ml_g, [(idx, np.array([])) for idx in inner1_d1_s1], param_grid_g, n_folds_tune)
-
-    g0_best_params = [gs.best_params_ for gs in g0_tune_res]
-    g1_best_params = [gs.best_params_ for gs in g1_tune_res]
-
-    return g0_best_params, g1_best_params, pi_best_params, m_best_params
+    t_tune_res = tune_grid_search(t_targets, x, ml_t, smpls, param_grid_t, n_folds_tune)
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
index c561d9fe8..8e551cab9 100644
--- a/doubleml/plm/tests/test_lplr.py
+++ b/doubleml/plm/tests/test_lplr.py
@@ -3,7 +3,8 @@
 import numpy as np
 import pytest
 from sklearn.base import clone
-from sklearn.linear_model import LassoCV, LogisticRegressionCV
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LassoCV, LogisticRegressionCV, LogisticRegression
 
 import doubleml as dml
 
@@ -11,38 +12,36 @@
 from ._utils_ssm_manual import fit_selection
 
 
-@pytest.fixture(scope="module", params=[[LassoCV(), LogisticRegressionCV(penalty="l1", solver="liblinear")]])
-def learner(request):
+@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
+def learner_M(request):
     return request.param
 
-
-@pytest.fixture(scope="module", params=["missing-at-random", "nonignorable"])
-def score(request):
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+def learner_t(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[True, False])
-def normalize_ipw(request):
+@pytest.fixture(scope="module", params=[LogisticRegression(random_state=42)])
+def learner_m(request):
     return request.param
 
-
-@pytest.fixture(scope="module", params=[0.01])
-def trimming_threshold(request):
+@pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
+def score(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def dml_selection_fixture(
-    generate_data_selection_mar, generate_data_selection_nonignorable, learner, score, trimming_threshold, normalize_ipw
+    generate_data_selection, learner, score, learner_M,
+    learner_t,
+    learner_m,
 ):
     n_folds = 3
 
     # collect data
     np.random.seed(42)
-    if score == "missing-at-random":
-        (x, y, d, z, s) = generate_data_selection_mar
-    else:
-        (x, y, d, z, s) = generate_data_selection_nonignorable
+    (x, y, d, z, s) = generate_data_selection
+
 
     ml_g = clone(learner[0])
     ml_pi = clone(learner[1])
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
index 8a55fe595..cfe9f0679 100644
--- a/doubleml/plm/tests/test_lplr_exceptions.py
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -11,7 +11,7 @@
 np.random.seed(3141)
 n = 100
 # create test data and basic learners
-dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=n, dim_x=10)
+dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=n, dim_x=20)
 ml_M = RandomForestClassifier()
 ml_t = RandomForestRegressor()
 ml_m = RandomForestRegressor()
@@ -22,13 +22,13 @@
 @pytest.mark.ci
 def test_lplr_exception_data():
     msg = (
-        r"The data must be of DoubleMLData type\. .* of type "
+        r"The data must be of DoubleMLData.* type\.[\s\S]* of type "
         r"<class 'pandas\.core\.frame\.DataFrame'> was passed\."
     )
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLLPLR(pd.DataFrame(), ml_M, ml_t, ml_m)
 
-    dml_data_nb = make_lplr_LZZ2020(alpha=0.5, n_obs=50, dim_x=5)
+    dml_data_nb = make_lplr_LZZ2020(alpha=0.5, n_obs=50, dim_x=20)
     dml_data_nb.data[dml_data_nb.y_col] = dml_data_nb.data[dml_data_nb.y_col] + 1
     dml_data_nb._set_y_z()
     with pytest.raises(TypeError, match="The outcome variable y must be binary with values 0 and 1."):
@@ -41,7 +41,7 @@ def test_lplr_exception_scores():
     msg = "Invalid score MAR"
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score="MAR")
-    msg = "score should be string. 0 was passed."
+    msg = "score should be a string. 0 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score=0)
 
@@ -71,7 +71,7 @@ def test_ssm_exception_resampling():
 
 @pytest.mark.ci
 def test_lplr_exception_get_params():
-    msg = "Invalid nuisance learner ml_x. Valid nuisance learner ml_M or ml_g_t or ml_m or ml_a."
+    msg = "Invalid nuisance learner ml_x. Valid nuisance learner ml_m or ml_t or ml_M or ml_a."
     with pytest.raises(ValueError, match=msg):
         dml_lplr.get_params("ml_x")
 
@@ -148,7 +148,7 @@ def test_lplr_exception_confint():
 @pytest.mark.ci
 def test_lplr_exception_set_ml_nuisance_params():
     # invalid learner name
-    msg = "Invalid nuisance learner g. Valid nuisance learner ml_M or ml_t or ml_m or ml_a."
+    msg = "Invalid nuisance learner g. Valid nuisance learner ml_m or ml_t or ml_M or ml_a."
     with pytest.raises(ValueError, match=msg):
         dml_lplr.set_ml_nuisance_params("g", "d", {"alpha": 0.1})
     # invalid treatment variable
@@ -171,7 +171,7 @@ class _DummyNoClassifier(_DummyNoGetParams):
     def get_params(self):
         pass
 
-    def predict_proba(self):
+    def predict(self):
         pass
 
 
@@ -216,7 +216,7 @@ def test_lplr_exception_learner():
     log_reg._estimator_type = None
     msg = (
         r"Learner provided for ml_m is probably invalid: LogisticRegressionManipulatedType\(\) is \(probably\) "
-        r"no classifier\."
+        r"neither a regressor nor a classifier. Method predict is used for prediction\."
     )
     with pytest.warns(UserWarning, match=msg):
         _ = DoubleMLLPLR(dml_data, ml_M, ml_t, log_reg)
@@ -284,7 +284,7 @@ def test_double_ml_exception_evaluate_learner():
         dml_lplr_obj.evaluate_learners(metric="mse")
 
     msg = (
-        r"The learners have to be a subset of \['ml_M', 'ml_t', 'ml_m', 'ml_a'\]\. "
+        r"The learners have to be a subset of \['ml_m', 'ml_t', 'ml_M', 'ml_a'\]\. "
         r"Learners \['ml_mu', 'ml_p'\] provided."
     )
     with pytest.raises(ValueError, match=msg):
diff --git a/doubleml/plm/tests/test_lplr_tune.py b/doubleml/plm/tests/test_lplr_tune.py
index 0e0fa7bfd..28aa387f5 100644
--- a/doubleml/plm/tests/test_lplr_tune.py
+++ b/doubleml/plm/tests/test_lplr_tune.py
@@ -3,17 +3,20 @@
 import numpy as np
 import pytest
 from sklearn.base import clone
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_lplr_manual import fit_selection, tune_nuisance_ssm_mar, tune_nuisance_ssm_nonignorable
+from ._utils_lplr_manual import fit_selection, tune_nuisance
 
+@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
+def learner_M(request):
+    return request.param
 
 @pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
-def learner_g(request):
+def learner_t(request):
     return request.param
 
 
@@ -22,84 +25,63 @@ def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=["missing-at-random", "nonignorable"])
+@pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[True, False])
-def normalize_ipw(request):
-    return request.param
-
-
 @pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
-    if learner.__class__ in [RandomForestRegressor]:
+    if learner.__class__ in [RandomForestRegressor, RandomForestClassifier]:
         par_grid = {"n_estimators": [5, 10, 20]}
     else:
-        assert learner.__class__ in [LogisticRegression]
+        assert learner.__class__ in [LogisticRegression, Lasso]
         par_grid = {"C": np.logspace(-2, 2, 10)}
     return par_grid
 
 
 @pytest.fixture(scope="module")
-def dml_ssm_fixture(
-    generate_data_selection_mar,
-    generate_data_selection_nonignorable,
-    learner_g,
+def dml_lplr_fixture(
+    generate_data_selection,
+    learner_M,
+    learner_t,
     learner_m,
     score,
-    normalize_ipw,
     tune_on_folds,
 ):
-    par_grid = {"ml_g": get_par_grid(learner_g), "ml_pi": get_par_grid(learner_m), "ml_m": get_par_grid(learner_m)}
+    par_grid = {"ml_M": get_par_grid(learner_M), "ml_t": get_par_grid(learner_t), "ml_m": get_par_grid(learner_m)}
     n_folds_tune = 4
     n_folds = 2
 
     # collect data
     np.random.seed(42)
-    if score == "missing-at-random":
-        (x, y, d, z, s) = generate_data_selection_mar
-    else:
-        (x, y, d, z, s) = generate_data_selection_nonignorable
+    x, y, d = generate_data_selection
+
 
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds)
 
-    ml_g = clone(learner_g)
-    ml_pi = clone(learner_m)
+    ml_M = clone(learner_M)
+    ml_t = clone(learner_t)
     ml_m = clone(learner_m)
 
     np.random.seed(42)
-    if score == "missing-at-random":
-        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(
-            obj_dml_data,
-            ml_g,
-            ml_pi,
-            ml_m,
-            n_folds=n_folds,
-            score=score,
-            normalize_ipw=normalize_ipw,
-            draw_sample_splitting=False,
-        )
-    else:
-        assert score == "nonignorable"
-        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=z, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(
-            obj_dml_data,
-            ml_g,
-            ml_pi,
-            ml_m,
-            n_folds=n_folds,
-            score=score,
-            normalize_ipw=normalize_ipw,
-            draw_sample_splitting=False,
-        )
+
+    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+    dml_sel_obj = dml.DoubleMLLPLR(
+        obj_dml_data,
+        ml_M,
+        ml_t,
+        ml_m,
+        n_folds=n_folds,
+        score=score,
+        draw_sample_splitting=False,
+    )
+
 
     # synchronize the sample splitting
     np.random.seed(42)
@@ -115,95 +97,54 @@ def dml_ssm_fixture(
     np.random.seed(42)
     smpls = all_smpls[0]
     if tune_on_folds:
-        if score == "missing-at-random":
-            g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm_mar(
-                y,
-                x,
-                d,
-                z,
-                s,
-                clone(learner_g),
-                clone(learner_m),
-                clone(learner_m),
-                smpls,
-                n_folds_tune,
-                par_grid["ml_g"],
-                par_grid["ml_pi"],
-                par_grid["ml_m"],
-            )
-        elif score == "nonignorable":
-            g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm_nonignorable(
+
+        M_best_params, t_best_params, m_best_params = tune_nuisance(
                 y,
                 x,
                 d,
-                z,
-                s,
-                clone(learner_g),
-                clone(learner_m),
+                clone(learner_M),
+                clone(learner_t),
                 clone(learner_m),
                 smpls,
                 n_folds_tune,
-                par_grid["ml_g"],
-                par_grid["ml_pi"],
+                par_grid["ml_M"],
+                par_grid["ml_t"],
                 par_grid["ml_m"],
             )
 
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        if score == "missing-at-random":
-            g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm_mar(
+        g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance(
                 y,
                 x,
                 d,
-                z,
-                s,
-                clone(learner_g),
-                clone(learner_m),
+                clone(learner_M),
+                clone(learner_t),
                 clone(learner_m),
                 xx,
                 n_folds_tune,
-                par_grid["ml_g"],
-                par_grid["ml_pi"],
-                par_grid["ml_m"],
-            )
-        elif score == "nonignorable":
-            g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm_nonignorable(
-                y,
-                x,
-                d,
-                z,
-                s,
-                clone(learner_g),
-                clone(learner_m),
-                clone(learner_m),
-                xx,
-                n_folds_tune,
-                par_grid["ml_g"],
-                par_grid["ml_pi"],
+                par_grid["ml_M"],
+                par_grid["ml_t"],
                 par_grid["ml_m"],
             )
 
-        g0_best_params = g0_best_params * n_folds
-        g1_best_params = g1_best_params * n_folds
-        pi_best_params = pi_best_params * n_folds
-        m_best_params = m_best_params * n_folds
+
+    M_best_params = M_best_params * n_folds
+    t_best_params = t_best_params * n_folds
+    m_best_params = m_best_params * n_folds
 
     np.random.seed(42)
     res_manual = fit_selection(
         y,
         x,
         d,
-        z,
-        s,
-        clone(learner_g),
-        clone(learner_m),
+        clone(learner_M),
+        clone(learner_t),
         clone(learner_m),
         all_smpls,
         score,
-        normalize_ipw=normalize_ipw,
-        g_d0_params=g0_best_params,
-        g_d1_params=g1_best_params,
-        pi_params=pi_best_params,
+        M_params=M_best_params,
+        t_params=t_best_params,
         m_params=m_best_params,
     )
 
@@ -219,9 +160,9 @@ def dml_ssm_fixture(
 
 @pytest.mark.ci
 def test_dml_ssm_coef(dml_ssm_fixture):
-    assert math.isclose(dml_ssm_fixture["coef"], dml_ssm_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_lplr_fixture["coef"], dml_lplr_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_ssm_se(dml_ssm_fixture):
-    assert math.isclose(dml_ssm_fixture["se"], dml_ssm_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_lplr_fixture["se"], dml_lplr_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)

From 2c626a011bb2d68f658f2113eaff47a37dcbcd8a Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 27 Oct 2025 22:07:08 -0700
Subject: [PATCH 18/48] Cleanup

---
 doubleml/plm/__init__.py                  | 2 +-
 doubleml/plm/datasets/__init__.py         | 2 +-
 doubleml/plm/datasets/dgp_lplr_LZZ2020.py | 2 ++
 doubleml/plm/tests/_utils_lplr_manual.py  | 2 +-
 doubleml/plm/tests/test_lplr.py           | 2 +-
 doubleml/plm/tests/test_lplr_tune.py      | 3 ++-
 6 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/doubleml/plm/__init__.py b/doubleml/plm/__init__.py
index 37262ed93..f5e135e3a 100644
--- a/doubleml/plm/__init__.py
+++ b/doubleml/plm/__init__.py
@@ -2,9 +2,9 @@
 The :mod:`doubleml.plm` module implements double machine learning estimates based on partially linear models.
 """
 
+from .lplr import DoubleMLLPLR
 from .pliv import DoubleMLPLIV
 from .plr import DoubleMLPLR
-from .lplr import DoubleMLLPLR
 
 __all__ = [
     "DoubleMLPLR",
diff --git a/doubleml/plm/datasets/__init__.py b/doubleml/plm/datasets/__init__.py
index 5f433ae79..6e8e9bb51 100644
--- a/doubleml/plm/datasets/__init__.py
+++ b/doubleml/plm/datasets/__init__.py
@@ -4,11 +4,11 @@
 
 from ._make_pliv_data import _make_pliv_data
 from .dgp_confounded_plr_data import make_confounded_plr_data
+from .dgp_lplr_LZZ2020 import make_lplr_LZZ2020
 from .dgp_pliv_CHS2015 import make_pliv_CHS2015
 from .dgp_pliv_multiway_cluster_CKMS2021 import make_pliv_multiway_cluster_CKMS2021
 from .dgp_plr_CCDDHNR2018 import make_plr_CCDDHNR2018
 from .dgp_plr_turrell2018 import make_plr_turrell2018
-from .dgp_lplr_LZZ2020 import make_lplr_LZZ2020
 
 __all__ = [
     "make_plr_CCDDHNR2018",
diff --git a/doubleml/plm/datasets/dgp_lplr_LZZ2020.py b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
index 3d6d71277..a9b4ece9b 100644
--- a/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
+++ b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
@@ -131,6 +131,8 @@ def a_0(X):
     elif treatment == "binary_unbalanced":
         d_cont = a_0(x)
         d = np.random.binomial(1, expit(d_cont))
+    else:
+        raise ValueError("Invalid treatment type.")
 
     p = expit(alpha * d[:] + r_0(x))
 
diff --git a/doubleml/plm/tests/_utils_lplr_manual.py b/doubleml/plm/tests/_utils_lplr_manual.py
index 699047019..072eb2b56 100644
--- a/doubleml/plm/tests/_utils_lplr_manual.py
+++ b/doubleml/plm/tests/_utils_lplr_manual.py
@@ -2,7 +2,7 @@
 from sklearn.base import clone
 from sklearn.model_selection import train_test_split
 
-from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
+from ...tests._utils import tune_grid_search
 from ...utils._estimation import _predict_zero_one_propensity
 from ...utils._propensity_score import _trimm
 
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
index 8e551cab9..9ef7ec732 100644
--- a/doubleml/plm/tests/test_lplr.py
+++ b/doubleml/plm/tests/test_lplr.py
@@ -4,7 +4,7 @@
 import pytest
 from sklearn.base import clone
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.linear_model import LassoCV, LogisticRegressionCV, LogisticRegression
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
diff --git a/doubleml/plm/tests/test_lplr_tune.py b/doubleml/plm/tests/test_lplr_tune.py
index 28aa387f5..6d13e5d18 100644
--- a/doubleml/plm/tests/test_lplr_tune.py
+++ b/doubleml/plm/tests/test_lplr_tune.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 from sklearn.base import clone
-from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
@@ -11,6 +11,7 @@
 from ...tests._utils import draw_smpls
 from ._utils_lplr_manual import fit_selection, tune_nuisance
 
+
 @pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
 def learner_M(request):
     return request.param

From 98194367463f0e382726c8a01dbb05a7d5ff9f19 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Wed, 5 Nov 2025 18:41:19 -0800
Subject: [PATCH 19/48] Tests updated

---
 doubleml/plm/lplr.py                       |  10 +
 doubleml/plm/tests/_utils_lplr_manual.py   | 217 ---------------------
 doubleml/plm/tests/test_lplr.py            |  79 ++------
 doubleml/plm/tests/test_lplr_exceptions.py |   1 +
 doubleml/plm/tests/test_lplr_tune.py       | 129 ++++--------
 5 files changed, 67 insertions(+), 369 deletions(-)
 delete mode 100644 doubleml/plm/tests/_utils_lplr_manual.py

diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 08a6bbfac..468b93593 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -389,6 +389,8 @@ def _sensitivity_element_est(self, preds):
     def _nuisance_tuning(
         self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
     ):
+        if self._i_rep is None:
+            raise ValueError("tune_on_folds must be True as targets have to be created for ml_t on folds.")
         # TODO: test
         x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
@@ -470,6 +472,13 @@ def _nuisance_tuning(
             w = scipy.special.logit(M_iteration)
             W_inner.append(w)
 
+        # Reshape W_inner into full-length arrays per fold: fill train indices, others are NaN
+        W_targets = []
+        for i, train in enumerate(train_inds):
+            wt = np.full(x.shape[0], np.nan, dtype=float)
+            wt[train] = W_inner[i]
+            W_targets.append(wt)
+
         t_tune_res = _dml_tune(
             W_inner,
             x,
@@ -481,6 +490,7 @@ def _nuisance_tuning(
             n_jobs_cv,
             search_mode,
             n_iter_randomized_search,
+            fold_specific_target=True
         )
         t_best_params = [xx.best_params_ for xx in t_tune_res]
 
diff --git a/doubleml/plm/tests/_utils_lplr_manual.py b/doubleml/plm/tests/_utils_lplr_manual.py
deleted file mode 100644
index 072eb2b56..000000000
--- a/doubleml/plm/tests/_utils_lplr_manual.py
+++ /dev/null
@@ -1,217 +0,0 @@
-import numpy as np
-from sklearn.base import clone
-from sklearn.model_selection import train_test_split
-
-from ...tests._utils import tune_grid_search
-from ...utils._estimation import _predict_zero_one_propensity
-from ...utils._propensity_score import _trimm
-
-
-def fit_selection(
-        y,
-        x,
-        d,
-        learner_M,
-        learner_t,
-        learner_m,
-        all_smpls,
-        score,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
-        n_rep=1,
-        M_params=None,
-        t_params=None,
-        m_params=None,
-):
-    n_obs = len(y)
-
-    thetas = np.zeros(n_rep)
-    ses = np.zeros(n_rep)
-
-    all_M_hat = list()
-    all_t_hat = list()
-    all_m_hat = list()
-
-    for i_rep in range(n_rep):
-        smpls = all_smpls[i_rep]
-
-        M_hat_list, t_hat_list, m_hat_list = fit_nuisance_selection(
-            y,
-            x,
-            d,
-            learner_M,
-            learner_t,
-            learner_m,
-            smpls,
-            score,
-            trimming_rule=trimming_rule,
-            trimming_threshold=trimming_threshold,
-            M_params=M_params,
-            t_params=t_params,
-            m_params=m_params,
-        )
-
-        all_M_hat.append(M_hat)
-        all_t_hat.append(t_hat)
-        all_m_hat.append(m_hat)
-
-        thetas[i_rep], ses[i_rep] = solve_score(M_hat_list, t_hat_list, m_hat_list)
-
-    theta = np.median(thetas)
-    se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
-
-    res = {
-        "theta": theta,
-        "se": se,
-        "thetas": thetas,
-        "ses": ses,
-        "all_M_hat": all_M_hat,
-        "all_t_hat": all_t_hat,
-        "all_m_hat": all_m_hat,
-        "all_psi_a": all_psi_a,
-        "all_psi_b": all_psi_b,
-    }
-
-    return res
-
-def solve_score(M_hat, t_hat, m_hat):
-    pass
-
-def fit_nuisance_selection(
-        y,
-        x,
-        d,
-        learner_M,
-        learner_t,
-        learner_m,
-        smpls,
-        score,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
-        M_params=None,
-        t_params=None,
-        m_params=None,
-):
-    # TODO: complete for lplr
-    n_obs = len(y)
-    ml_M = clone(learner_M)
-    ml_t = clone(learner_t)
-    ml_m = clone(learner_m)
-
-    dx = np.column_stack((d, x))
-
-    # initialize empty lists
-    g_hat_d1_list = []
-    g_hat_d0_list = []
-    pi_hat_list = []
-    m_hat_list = []
-
-    # create strata for splitting
-    strata = d.reshape(-1, 1) + 2 * s.reshape(-1, 1)
-
-    # POTENTIAL OUTCOME Y(1)
-    for i_fold, _ in enumerate(smpls):
-        ml_g_d1 = clone(learner_g)
-        ml_pi = clone(learner_pi)
-        ml_m = clone(learner_m)
-
-        # set the params for the nuisance learners
-        if g_d1_params is not None:
-            ml_g_d1.set_params(**g_d1_params[i_fold])
-        if g_d0_params is not None:
-            ml_g_d0.set_params(**g_d0_params[i_fold])
-        if pi_params is not None:
-            ml_pi.set_params(**pi_params[i_fold])
-        if m_params is not None:
-            ml_m.set_params(**m_params[i_fold])
-
-        train_inds = smpls[i_fold][0]
-        test_inds = smpls[i_fold][1]
-
-        # start nested crossfitting
-        train_inds_1, train_inds_2 = train_test_split(
-            train_inds, test_size=0.5, random_state=42, stratify=strata[train_inds]
-        )
-
-        s_train_1 = s[train_inds_1]
-        dx_train_1 = dx[train_inds_1, :]
-
-        # preliminary propensity score for selection
-        ml_pi_prelim = clone(ml_pi)
-        # fit on first part of training set
-        ml_pi_prelim.fit(dx_train_1, s_train_1)
-        pi_hat_prelim = _predict_zero_one_propensity(ml_pi_prelim, dx)
-
-        # predictions for small pi in denominator
-        pi_hat = pi_hat_prelim[test_inds]
-
-        # add selection indicator to covariates
-        xpi = np.column_stack((x, pi_hat_prelim))
-
-        # estimate propensity score p using the second training sample
-        xpi_train_2 = xpi[train_inds_2, :]
-        d_train_2 = d[train_inds_2]
-        xpi_test = xpi[test_inds, :]
-
-        ml_m.fit(xpi_train_2, d_train_2)
-
-        m_hat = _predict_zero_one_propensity(ml_m, xpi_test)
-
-        # estimate conditional outcome on second training sample -- treatment
-        s1_d1_train_2_indices = np.intersect1d(np.where(d == 1)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
-        xpi_s1_d1_train_2 = xpi[s1_d1_train_2_indices, :]
-        y_s1_d1_train_2 = y[s1_d1_train_2_indices]
-
-        ml_g_d1.fit(xpi_s1_d1_train_2, y_s1_d1_train_2)
-
-        # predict conditional outcome
-        g_hat_d1 = ml_g_d1.predict(xpi_test)
-
-        # estimate conditional outcome on second training sample -- control
-        s1_d0_train_2_indices = np.intersect1d(np.where(d == 0)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
-        xpi_s1_d0_train_2 = xpi[s1_d0_train_2_indices, :]
-        y_s1_d0_train_2 = y[s1_d0_train_2_indices]
-
-        ml_g_d0.fit(xpi_s1_d0_train_2, y_s1_d0_train_2)
-
-        # predict conditional outcome
-        g_hat_d0 = ml_g_d0.predict(xpi_test)
-
-        m_hat = _trimm(m_hat, trimming_rule, trimming_threshold)
-
-        # append predictions on test sample to final list of predictions
-        g_hat_d1_list.append(g_hat_d1)
-        g_hat_d0_list.append(g_hat_d0)
-        pi_hat_list.append(pi_hat)
-        m_hat_list.append(m_hat)
-
-
-
-    m_hat = np.full_like(y, np.nan, dtype="float64")
-    for idx, (_, test_index) in enumerate(smpls):
-        M_hat[test_index] = M_hat_list[idx]
-        t_hat[test_index] = t_hat_list[idx]
-        m_hat[test_index] = m_hat_list[idx]
-    return M_hat, t_hat, m_hat
-
-
-def var_selection(theta, psi_a, psi_b, n_obs):
-    J = np.mean(psi_a)
-    var = 1 / n_obs * np.mean(np.power(np.multiply(psi_a, theta) + psi_b, 2)) / np.power(J, 2)
-    return var
-
-
-def tune_nuisance(y, x, d, ml_M, ml_t, ml_m, smpls, n_folds_tune, param_grid_M, param_grid_t, param_grid_m):
-    dx = np.column_stack((x, d))
-
-    M_tune_res = tune_grid_search(y, dx, ml_M, smpls, param_grid_M, n_folds_tune)
-
-    m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
-
-    t_tune_res = tune_grid_search(d, x, ml_t, smpls, param_grid_t, n_folds_tune)
-
-    M_best_params = [xx.best_params_ for xx in M_tune_res]
-    t_best_params = [xx.best_params_ for xx in t_tune_res]
-    m_best_params = [xx.best_params_ for xx in m_tune_res]
-
-    t_tune_res = tune_grid_search(t_targets, x, ml_t, smpls, param_grid_t, n_folds_tune)
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
index 9ef7ec732..154c47633 100644
--- a/doubleml/plm/tests/test_lplr.py
+++ b/doubleml/plm/tests/test_lplr.py
@@ -1,15 +1,10 @@
-import math
-
 import numpy as np
 import pytest
 from sklearn.base import clone
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
-
-from ...tests._utils import draw_smpls
-from ._utils_ssm_manual import fit_selection
+from ..datasets import make_lplr_LZZ2020
 
 
 @pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
@@ -21,7 +16,7 @@ def learner_t(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[LogisticRegression(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_m(request):
     return request.param
 
@@ -31,74 +26,36 @@ def score(request):
 
 
 @pytest.fixture(scope="module")
-def dml_selection_fixture(
-    generate_data_selection, learner, score, learner_M,
-    learner_t,
-    learner_m,
+def dml_lplr_fixture(
+    score, learner_M, learner_t, learner_m,
 ):
-    n_folds = 3
+    n_folds = 5
+    alpha = 0.5
 
     # collect data
     np.random.seed(42)
-    (x, y, d, z, s) = generate_data_selection
-
-
-    ml_g = clone(learner[0])
-    ml_pi = clone(learner[1])
-    ml_m = clone(learner[1])
-
-    np.random.seed(42)
-    n_obs = len(y)
-    all_smpls = draw_smpls(n_obs, n_folds)
+    obj_dml_data = make_lplr_LZZ2020(alpha=alpha)
 
-    np.random.seed(42)
-    if score == "missing-at-random":
-        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
-    else:
-        assert score == "nonignorable"
-        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=z, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
+    ml_M = clone(learner_M)
+    ml_t = clone(learner_t)
+    ml_m = clone(learner_m)
 
-    np.random.seed(42)
-    dml_sel_obj.set_sample_splitting(all_smpls=all_smpls)
+    dml_sel_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m, n_folds=n_folds, score=score)
     dml_sel_obj.fit()
 
-    np.random.seed(42)
-    res_manual = fit_selection(
-        y,
-        x,
-        d,
-        z,
-        s,
-        clone(learner[0]),
-        clone(learner[1]),
-        clone(learner[1]),
-        all_smpls,
-        score,
-        trimming_rule="truncate",
-        trimming_threshold=trimming_threshold,
-        normalize_ipw=normalize_ipw,
-    )
-
     res_dict = {
         "coef": dml_sel_obj.coef[0],
-        "coef_manual": res_manual["theta"],
         "se": dml_sel_obj.se[0],
-        "se_manual": res_manual["se"],
+        "true_coef": alpha,
     }
 
-    # sensitivity tests
-    # TODO
-
     return res_dict
 
 
 @pytest.mark.ci
-def test_dml_selection_coef(dml_selection_fixture):
-    assert math.isclose(dml_selection_fixture["coef"], dml_selection_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-2)
-
-
-@pytest.mark.ci
-def test_dml_selection_se(dml_selection_fixture):
-    assert math.isclose(dml_selection_fixture["se"], dml_selection_fixture["se_manual"], rel_tol=1e-9, abs_tol=5e-2)
+def test_dml_lplr_coef(dml_lplr_fixture):
+    # true_coef should lie within three standard deviations of the estimate
+    coef = dml_lplr_fixture["coef"]
+    se = dml_lplr_fixture["se"]
+    true_coef = dml_lplr_fixture["true_coef"]
+    assert abs(coef - true_coef) <= 3.0 * np.sqrt(se)
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
index cfe9f0679..1be83c122 100644
--- a/doubleml/plm/tests/test_lplr_exceptions.py
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -295,3 +295,4 @@ def eval_fct(y_pred, y_true):
 
     with pytest.raises(ValueError):
         dml_lplr_obj.evaluate_learners(metric=eval_fct)
+
diff --git a/doubleml/plm/tests/test_lplr_tune.py b/doubleml/plm/tests/test_lplr_tune.py
index 6d13e5d18..2926d755e 100644
--- a/doubleml/plm/tests/test_lplr_tune.py
+++ b/doubleml/plm/tests/test_lplr_tune.py
@@ -7,10 +7,7 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
-
-from ...tests._utils import draw_smpls
-from ._utils_lplr_manual import fit_selection, tune_nuisance
-
+from ..datasets import make_lplr_LZZ2020
 
 @pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
 def learner_M(request):
@@ -21,20 +18,19 @@ def learner_t(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[LogisticRegression(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_m(request):
     return request.param
 
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+def learner_a(request):
+    return request.param
 
 @pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[True, False])
-def tune_on_folds(request):
-    return request.param
-
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestRegressor, RandomForestClassifier]:
@@ -47,123 +43,74 @@ def get_par_grid(learner):
 
 @pytest.fixture(scope="module")
 def dml_lplr_fixture(
-    generate_data_selection,
     learner_M,
     learner_t,
     learner_m,
+    learner_a,
     score,
-    tune_on_folds,
+    tune_on_folds=True,
 ):
-    par_grid = {"ml_M": get_par_grid(learner_M), "ml_t": get_par_grid(learner_t), "ml_m": get_par_grid(learner_m)}
+    par_grid = {"ml_M": get_par_grid(learner_M), "ml_t": get_par_grid(learner_t), "ml_m": get_par_grid(learner_m), "ml_a": get_par_grid(learner_a)}
     n_folds_tune = 4
-    n_folds = 2
-
-    # collect data
-    np.random.seed(42)
-    x, y, d = generate_data_selection
-
+    n_folds = 5
+    alpha = 0.5
 
-    n_obs = len(y)
-    all_smpls = draw_smpls(n_obs, n_folds)
 
     ml_M = clone(learner_M)
     ml_t = clone(learner_t)
     ml_m = clone(learner_m)
+    ml_a = clone(learner_a)
 
-    np.random.seed(42)
-
-    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+    obj_dml_data = make_lplr_LZZ2020(alpha=alpha)
     dml_sel_obj = dml.DoubleMLLPLR(
         obj_dml_data,
         ml_M,
         ml_t,
         ml_m,
+        ml_a=ml_a,
         n_folds=n_folds,
         score=score,
-        draw_sample_splitting=False,
     )
 
-
-    # synchronize the sample splitting
-    np.random.seed(42)
-    dml_sel_obj.set_sample_splitting(all_smpls=all_smpls)
-
-    np.random.seed(42)
     # tune hyperparameters
     tune_res = dml_sel_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
-    assert isinstance(tune_res, dml.DoubleMLSSM)
+    assert isinstance(tune_res, dml.DoubleMLLPLR)
 
     dml_sel_obj.fit()
 
-    np.random.seed(42)
-    smpls = all_smpls[0]
-    if tune_on_folds:
-
-        M_best_params, t_best_params, m_best_params = tune_nuisance(
-                y,
-                x,
-                d,
-                clone(learner_M),
-                clone(learner_t),
-                clone(learner_m),
-                smpls,
-                n_folds_tune,
-                par_grid["ml_M"],
-                par_grid["ml_t"],
-                par_grid["ml_m"],
-            )
-
-    else:
-        xx = [(np.arange(len(y)), np.array([]))]
-        g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance(
-                y,
-                x,
-                d,
-                clone(learner_M),
-                clone(learner_t),
-                clone(learner_m),
-                xx,
-                n_folds_tune,
-                par_grid["ml_M"],
-                par_grid["ml_t"],
-                par_grid["ml_m"],
-            )
-
-
-    M_best_params = M_best_params * n_folds
-    t_best_params = t_best_params * n_folds
-    m_best_params = m_best_params * n_folds
-
-    np.random.seed(42)
-    res_manual = fit_selection(
-        y,
-        x,
-        d,
-        clone(learner_M),
-        clone(learner_t),
-        clone(learner_m),
-        all_smpls,
-        score,
-        M_params=M_best_params,
-        t_params=t_best_params,
-        m_params=m_best_params,
-    )
-
     res_dict = {
         "coef": dml_sel_obj.coef[0],
-        "coef_manual": res_manual["theta"],
         "se": dml_sel_obj.se[0],
-        "se_manual": res_manual["se"],
+        "true_coef": alpha,
     }
 
     return res_dict
 
 
 @pytest.mark.ci
-def test_dml_ssm_coef(dml_ssm_fixture):
-    assert math.isclose(dml_lplr_fixture["coef"], dml_lplr_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
+def test_dml_selection_coef(dml_lplr_fixture):
+    # true_coef should lie within three standard deviations of the estimate
+    coef = dml_lplr_fixture["coef"]
+    se = dml_lplr_fixture["se"]
+    true_coef = dml_lplr_fixture["true_coef"]
+    assert abs(coef - true_coef) <= 3.0 * np.sqrt(se)
 
 
 @pytest.mark.ci
-def test_dml_ssm_se(dml_ssm_fixture):
-    assert math.isclose(dml_lplr_fixture["se"], dml_lplr_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
+def test_lplr_exception_tuning(
+    learner_M,
+    learner_t,
+    learner_m,
+    learner_a,):
+    # LPLR valid scores are 'nuisance_space' and 'instrument'
+    obj_dml_data = make_lplr_LZZ2020(alpha=0.5)
+    ml_M = clone(learner_M)
+    ml_t = clone(learner_t)
+    ml_m = clone(learner_m)
+    ml_a = clone(learner_a)
+    dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
+    par_grid = {"ml_M": get_par_grid(learner_M), "ml_t": get_par_grid(learner_t), "ml_m": get_par_grid(learner_m),
+                "ml_a": get_par_grid(learner_a)}
+    msg = "tune_on_folds must be True as targets have to be created for ml_t on folds."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_obj.tune(par_grid, tune_on_folds=False)
\ No newline at end of file

From 5a7e2796fb35282e49c8ef23e6db95b6030a6d22 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Wed, 5 Nov 2025 18:45:15 -0800
Subject: [PATCH 20/48] Pre-commit checks

---
 doubleml/plm/lplr.py                       |  3 +--
 doubleml/plm/tests/test_lplr.py            |  8 +++++-
 doubleml/plm/tests/test_lplr_exceptions.py |  1 -
 doubleml/plm/tests/test_lplr_tune.py       | 31 +++++++++++++++-------
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 468b93593..af545216b 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -109,7 +109,6 @@ def __init__(
         _ = self._check_learner(ml_t, "ml_t", regressor=True, classifier=False)
         _ = self._check_learner(ml_M, "ml_M", regressor=False, classifier=True)
 
-
         ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=True, classifier=True)
         self._learner = {"ml_m": ml_m, "ml_t": ml_t, "ml_M": ml_M}
 
@@ -490,7 +489,7 @@ def _nuisance_tuning(
             n_jobs_cv,
             search_mode,
             n_iter_randomized_search,
-            fold_specific_target=True
+            fold_specific_target=True,
         )
         t_best_params = [xx.best_params_ for xx in t_tune_res]
 
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
index 154c47633..4eaf86136 100644
--- a/doubleml/plm/tests/test_lplr.py
+++ b/doubleml/plm/tests/test_lplr.py
@@ -4,6 +4,7 @@
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 import doubleml as dml
+
 from ..datasets import make_lplr_LZZ2020
 
 
@@ -11,6 +12,7 @@
 def learner_M(request):
     return request.param
 
+
 @pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_t(request):
     return request.param
@@ -20,6 +22,7 @@ def learner_t(request):
 def learner_m(request):
     return request.param
 
+
 @pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
 def score(request):
     return request.param
@@ -27,7 +30,10 @@ def score(request):
 
 @pytest.fixture(scope="module")
 def dml_lplr_fixture(
-    score, learner_M, learner_t, learner_m,
+    score,
+    learner_M,
+    learner_t,
+    learner_m,
 ):
     n_folds = 5
     alpha = 0.5
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
index 1be83c122..cfe9f0679 100644
--- a/doubleml/plm/tests/test_lplr_exceptions.py
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -295,4 +295,3 @@ def eval_fct(y_pred, y_true):
 
     with pytest.raises(ValueError):
         dml_lplr_obj.evaluate_learners(metric=eval_fct)
-
diff --git a/doubleml/plm/tests/test_lplr_tune.py b/doubleml/plm/tests/test_lplr_tune.py
index 2926d755e..70ea63817 100644
--- a/doubleml/plm/tests/test_lplr_tune.py
+++ b/doubleml/plm/tests/test_lplr_tune.py
@@ -1,5 +1,3 @@
-import math
-
 import numpy as np
 import pytest
 from sklearn.base import clone
@@ -7,12 +5,15 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
+
 from ..datasets import make_lplr_LZZ2020
 
+
 @pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
 def learner_M(request):
     return request.param
 
+
 @pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_t(request):
     return request.param
@@ -22,16 +23,17 @@ def learner_t(request):
 def learner_m(request):
     return request.param
 
+
 @pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_a(request):
     return request.param
 
+
 @pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
 def score(request):
     return request.param
 
 
-
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestRegressor, RandomForestClassifier]:
         par_grid = {"n_estimators": [5, 10, 20]}
@@ -50,12 +52,16 @@ def dml_lplr_fixture(
     score,
     tune_on_folds=True,
 ):
-    par_grid = {"ml_M": get_par_grid(learner_M), "ml_t": get_par_grid(learner_t), "ml_m": get_par_grid(learner_m), "ml_a": get_par_grid(learner_a)}
+    par_grid = {
+        "ml_M": get_par_grid(learner_M),
+        "ml_t": get_par_grid(learner_t),
+        "ml_m": get_par_grid(learner_m),
+        "ml_a": get_par_grid(learner_a),
+    }
     n_folds_tune = 4
     n_folds = 5
     alpha = 0.5
 
-
     ml_M = clone(learner_M)
     ml_t = clone(learner_t)
     ml_m = clone(learner_m)
@@ -101,16 +107,21 @@ def test_lplr_exception_tuning(
     learner_M,
     learner_t,
     learner_m,
-    learner_a,):
+    learner_a,
+):
     # LPLR valid scores are 'nuisance_space' and 'instrument'
     obj_dml_data = make_lplr_LZZ2020(alpha=0.5)
     ml_M = clone(learner_M)
     ml_t = clone(learner_t)
     ml_m = clone(learner_m)
-    ml_a = clone(learner_a)
+
     dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
-    par_grid = {"ml_M": get_par_grid(learner_M), "ml_t": get_par_grid(learner_t), "ml_m": get_par_grid(learner_m),
-                "ml_a": get_par_grid(learner_a)}
+    par_grid = {
+        "ml_M": get_par_grid(learner_M),
+        "ml_t": get_par_grid(learner_t),
+        "ml_m": get_par_grid(learner_m),
+        "ml_a": get_par_grid(learner_a),
+    }
     msg = "tune_on_folds must be True as targets have to be created for ml_t on folds."
     with pytest.raises(ValueError, match=msg):
-        dml_lplr_obj.tune(par_grid, tune_on_folds=False)
\ No newline at end of file
+        dml_lplr_obj.tune(par_grid, tune_on_folds=False)

From fc03cc65aaf2f216b8e44d2e5f4aee9adf8727ca Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Thu, 6 Nov 2025 11:39:25 -0800
Subject: [PATCH 21/48] Pre-commit checks on all files

---
 doubleml/plm/__init__.py                   | 6 +-----
 doubleml/plm/datasets/dgp_lplr_LZZ2020.py  | 1 +
 doubleml/plm/tests/test_lplr_exceptions.py | 8 ++------
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/doubleml/plm/__init__.py b/doubleml/plm/__init__.py
index f5e135e3a..283bc91b4 100644
--- a/doubleml/plm/__init__.py
+++ b/doubleml/plm/__init__.py
@@ -6,8 +6,4 @@
 from .pliv import DoubleMLPLIV
 from .plr import DoubleMLPLR
 
-__all__ = [
-    "DoubleMLPLR",
-    "DoubleMLPLIV",
-    "DoubleMLLPLR"
-]
+__all__ = ["DoubleMLPLR", "DoubleMLPLIV", "DoubleMLLPLR"]
diff --git a/doubleml/plm/datasets/dgp_lplr_LZZ2020.py b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
index a9b4ece9b..284da7d8b 100644
--- a/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
+++ b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
@@ -90,6 +90,7 @@ def r_0(X):
                 + 0.25 * np.where(X[:, 10] > 0, 1, 0)
                 + -0.25 * np.where(X[:, 12] > 0, 1, 0)
             )
+
     else:
 
         def r_0(X):
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
index cfe9f0679..c4c57fd98 100644
--- a/doubleml/plm/tests/test_lplr_exceptions.py
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -21,10 +21,7 @@
 
 @pytest.mark.ci
 def test_lplr_exception_data():
-    msg = (
-        r"The data must be of DoubleMLData.* type\.[\s\S]* of type "
-        r"<class 'pandas\.core\.frame\.DataFrame'> was passed\."
-    )
+    msg = r"The data must be of DoubleMLData.* type\.[\s\S]* of type " r"<class 'pandas\.core\.frame\.DataFrame'> was passed\."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLLPLR(pd.DataFrame(), ml_M, ml_t, ml_m)
 
@@ -284,8 +281,7 @@ def test_double_ml_exception_evaluate_learner():
         dml_lplr_obj.evaluate_learners(metric="mse")
 
     msg = (
-        r"The learners have to be a subset of \['ml_m', 'ml_t', 'ml_M', 'ml_a'\]\. "
-        r"Learners \['ml_mu', 'ml_p'\] provided."
+        r"The learners have to be a subset of \['ml_m', 'ml_t', 'ml_M', 'ml_a'\]\. " r"Learners \['ml_mu', 'ml_p'\] provided."
     )
     with pytest.raises(ValueError, match=msg):
         dml_lplr_obj.evaluate_learners(learners=["ml_mu", "ml_p"])

From 5dae65189666090406604cafb3438e04dcfd1ebf Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Thu, 6 Nov 2025 16:06:48 -0800
Subject: [PATCH 22/48] Changed function signature, test

---
 doubleml/plm/lplr.py                 |  4 ++--
 doubleml/plm/tests/test_lplr.py      |  8 +++++++-
 doubleml/plm/tests/test_lplr_tune.py | 18 ++++++------------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index af545216b..3ef6e4960 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -414,7 +414,7 @@ def _nuisance_tuning(
 
         filtered_train_inds = []
         if self.score == "nuisance_space":
-            for train, test in smpls:
+            for train, _ in smpls:
                 train_filtered = train[y[train] == 0]
                 filtered_train_inds.append(train_filtered)
         elif self.score == "instrument":
@@ -528,7 +528,7 @@ def draw_sample_splitting(self):
 
         return self
 
-    def set_sample_splitting(self):
+    def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         raise NotImplementedError("set_sample_splitting is not implemented for DoubleMLLPLR.")
 
     def _compute_score(self, psi_elements, coef):
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
index 4eaf86136..9c94a8a44 100644
--- a/doubleml/plm/tests/test_lplr.py
+++ b/doubleml/plm/tests/test_lplr.py
@@ -28,19 +28,25 @@ def score(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=["continuous", "binary", "binary_unbalanced"])
+def treatment(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
 def dml_lplr_fixture(
     score,
     learner_M,
     learner_t,
     learner_m,
+    treatment,
 ):
     n_folds = 5
     alpha = 0.5
 
     # collect data
     np.random.seed(42)
-    obj_dml_data = make_lplr_LZZ2020(alpha=alpha)
+    obj_dml_data = make_lplr_LZZ2020(alpha=alpha, treatment=treatment)
 
     ml_M = clone(learner_M)
     ml_t = clone(learner_t)
diff --git a/doubleml/plm/tests/test_lplr_tune.py b/doubleml/plm/tests/test_lplr_tune.py
index 70ea63817..64653f5e8 100644
--- a/doubleml/plm/tests/test_lplr_tune.py
+++ b/doubleml/plm/tests/test_lplr_tune.py
@@ -2,7 +2,6 @@
 import pytest
 from sklearn.base import clone
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
 
@@ -34,13 +33,8 @@ def score(request):
     return request.param
 
 
-def get_par_grid(learner):
-    if learner.__class__ in [RandomForestRegressor, RandomForestClassifier]:
-        par_grid = {"n_estimators": [5, 10, 20]}
-    else:
-        assert learner.__class__ in [LogisticRegression, Lasso]
-        par_grid = {"C": np.logspace(-2, 2, 10)}
-    return par_grid
+def get_par_grid():
+    return {"n_estimators": [5, 10, 20]}
 
 
 @pytest.fixture(scope="module")
@@ -53,10 +47,10 @@ def dml_lplr_fixture(
     tune_on_folds=True,
 ):
     par_grid = {
-        "ml_M": get_par_grid(learner_M),
-        "ml_t": get_par_grid(learner_t),
-        "ml_m": get_par_grid(learner_m),
-        "ml_a": get_par_grid(learner_a),
+        "ml_M": get_par_grid(),
+        "ml_t": get_par_grid(),
+        "ml_m": get_par_grid(),
+        "ml_a": get_par_grid(),
     }
     n_folds_tune = 4
     n_folds = 5

From 13fca2f6b166e2550c586e6c548d65ddf67f9b62 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Thu, 6 Nov 2025 16:09:35 -0800
Subject: [PATCH 23/48] Argument fix

---
 doubleml/plm/lplr.py                 | 2 +-
 doubleml/plm/tests/test_lplr_tune.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 3ef6e4960..8f609e04c 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -465,7 +465,7 @@ def _nuisance_tuning(
         )
 
         W_inner = []
-        for i, (train, test) in enumerate(smpls):
+        for i, (train, _) in enumerate(smpls):
             M_iteration = M_hat["preds_inner"][i][train]
             M_iteration = np.clip(M_iteration, 1e-8, 1 - 1e-8)
             w = scipy.special.logit(M_iteration)
diff --git a/doubleml/plm/tests/test_lplr_tune.py b/doubleml/plm/tests/test_lplr_tune.py
index 64653f5e8..7c7c4aebb 100644
--- a/doubleml/plm/tests/test_lplr_tune.py
+++ b/doubleml/plm/tests/test_lplr_tune.py
@@ -111,10 +111,10 @@ def test_lplr_exception_tuning(
 
     dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
     par_grid = {
-        "ml_M": get_par_grid(learner_M),
-        "ml_t": get_par_grid(learner_t),
-        "ml_m": get_par_grid(learner_m),
-        "ml_a": get_par_grid(learner_a),
+        "ml_M": get_par_grid(),
+        "ml_t": get_par_grid(),
+        "ml_m": get_par_grid(),
+        "ml_a": get_par_grid(),
     }
     msg = "tune_on_folds must be True as targets have to be created for ml_t on folds."
     with pytest.raises(ValueError, match=msg):

From ff4c75b9d4e881363492381d5e91730b1b26ea18 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Fri, 7 Nov 2025 12:55:43 -0800
Subject: [PATCH 24/48] Updated tests for improved coverage

---
 doubleml/plm/lplr.py                         | 17 ++-----
 doubleml/plm/tests/test_lplr.py              |  1 +
 doubleml/tests/test_datasets.py              | 36 ++++++++++++++
 doubleml/tests/test_nonlinear_score_mixin.py | 26 +++++++++++
 doubleml/utils/resampling.py                 | 10 ++--
 doubleml/utils/tests/test_resampling.py      | 49 ++++++++++++++++++++
 6 files changed, 120 insertions(+), 19 deletions(-)
 create mode 100644 doubleml/utils/tests/test_resampling.py

diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 8f609e04c..c9b39c94e 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -215,10 +215,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         m_external = external_predictions["ml_m"] is not None
         M_external = external_predictions["ml_M"] is not None
         t_external = external_predictions["ml_t"] is not None
-        if "ml_a" in self._learner:
-            a_external = external_predictions["ml_a"] is not None
-        else:
-            a_external = False
+        a_external = external_predictions["ml_a"] is not None
 
         if M_external:
             M_hat = {"preds": external_predictions["ml_M"], "targets": None, "models": None}
@@ -270,8 +267,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     method=self._predict_method["ml_m"],
                     return_models=return_models,
                 )
-            else:
-                raise NotImplementedError
+
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
 
         if self._check_learner(self._learner["ml_m"], "ml_m", regressor=True, classifier=True):
@@ -383,7 +379,7 @@ def _score_element_names(self):
         return ["y", "d", "d_tilde", "r_hat", "m_hat", "psi_hat", "score_const"]
 
     def _sensitivity_element_est(self, preds):
-        pass
+        raise NotImplementedError()
 
     def _nuisance_tuning(
         self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
@@ -419,8 +415,7 @@ def _nuisance_tuning(
                 filtered_train_inds.append(train_filtered)
         elif self.score == "instrument":
             filtered_train_inds = train_inds
-        else:
-            raise NotImplementedError
+
         m_tune_res = _dml_tune(
             d,
             x,
@@ -539,8 +534,6 @@ def _compute_score(self, psi_elements, coef):
             score = (psi_elements["y"] - scipy.special.expit(coef * psi_elements["d"] + psi_elements["r_hat"])) * psi_elements[
                 "d_tilde"
             ]
-        else:
-            raise NotImplementedError
 
         return score
 
@@ -551,7 +544,5 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
         elif self.score == "instrument":
             expit = scipy.special.expit(coef * psi_elements["d"] + psi_elements["r_hat"])
             deriv = -psi_elements["d"] * expit * (1 - expit) * psi_elements["d_tilde"]
-        else:
-            raise NotImplementedError
 
         return deriv
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
index 9c94a8a44..2e58bfeaf 100644
--- a/doubleml/plm/tests/test_lplr.py
+++ b/doubleml/plm/tests/test_lplr.py
@@ -29,6 +29,7 @@ def score(request):
 
 
 @pytest.fixture(scope="module", params=["continuous", "binary", "binary_unbalanced"])
+# TODO: Error for continuous treatment?
 def treatment(request):
     return request.param
 
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index f69b681e2..05c75d00a 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -15,6 +15,7 @@
 from doubleml.plm.datasets import (
     _make_pliv_data,
     make_confounded_plr_data,
+    make_lplr_LZZ2020,
     make_pliv_CHS2015,
     make_pliv_multiway_cluster_CKMS2021,
     make_plr_CCDDHNR2018,
@@ -294,3 +295,38 @@ def test_make_data_discrete_treatments(n_levels):
     msg = "n_levels must be an integer."
     with pytest.raises(ValueError, match=msg):
         _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1.1)
+
+
+@pytest.mark.ci
+def test_make_lplr_LZZ2020_return_types():
+    np.random.seed(3141)
+    res = make_lplr_LZZ2020(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_lplr_LZZ2020(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z = make_lplr_LZZ2020(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_lplr_LZZ2020(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_lplr_LZZ2020_variants():
+    np.random.seed(3141)
+    res = make_lplr_LZZ2020(n_obs=100, treatment="binary")
+    assert np.array_equal(np.unique(res.d), np.array([0, 1]))
+    res = make_lplr_LZZ2020(n_obs=100, treatment="binary_unbalanced")
+    assert np.array_equal(np.unique(res.d), np.array([0, 1]))
+    res = make_lplr_LZZ2020(n_obs=100, treatment="continuous")
+    assert len(np.unique(res.d)) == 100
+
+    msg = "Invalid treatment type."
+    with pytest.raises(ValueError, match=msg):
+        _ = make_lplr_LZZ2020(n_obs=100, treatment="colors")
+
+    res = make_lplr_LZZ2020(n_obs=100, balanced_r0=False)
+    _, y_unique = np.unique(res.y, return_counts=True)
+    assert np.abs(y_unique[0] - y_unique[1]) > 10
diff --git a/doubleml/tests/test_nonlinear_score_mixin.py b/doubleml/tests/test_nonlinear_score_mixin.py
index 0fce08c3b..d68785aa4 100644
--- a/doubleml/tests/test_nonlinear_score_mixin.py
+++ b/doubleml/tests/test_nonlinear_score_mixin.py
@@ -253,3 +253,29 @@ def test_nonlinear_warnings(generate_data1, coef_bounds):
     with pytest.warns(UserWarning, match=msg):
         dml_plr_obj._coef_bounds = coef_bounds
         dml_plr_obj.fit()
+
+
+@pytest.mark.ci
+def test_nonlinear_errors(generate_data1, coef_bounds):
+    # collect data
+    data = generate_data1
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
+
+    np.random.seed(3141)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
+
+    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, LinearRegression(), LinearRegression(), score="no_root_pos")
+    dml_plr_obj._error_on_convergence_failure = True
+
+    msg = "Could not find a root of the score function."
+    with pytest.raises(ValueError, match=msg):
+        dml_plr_obj._coef_bounds = coef_bounds
+        dml_plr_obj.fit()
+
+    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, LinearRegression(), LinearRegression(), score="no_root_neg")
+    dml_plr_obj._error_on_convergence_failure = True
+
+    msg = "Could not find a root of the score function."
+    with pytest.raises(ValueError, match=msg):
+        dml_plr_obj._coef_bounds = coef_bounds
+        dml_plr_obj.fit()
diff --git a/doubleml/utils/resampling.py b/doubleml/utils/resampling.py
index 38c1ac595..e0668ef25 100644
--- a/doubleml/utils/resampling.py
+++ b/doubleml/utils/resampling.py
@@ -31,7 +31,7 @@ def __init__(self, n_folds, n_folds_inner, n_rep, n_obs, stratify=None):
         self.n_folds_inner = n_folds_inner
         self.n_rep = n_rep
         self.n_obs = n_obs
-        self.stratify = stratify
+        self.stratify = np.array(stratify)
 
         if n_folds < 2:
             raise ValueError(
@@ -53,9 +53,9 @@ def split_samples(self):
         all_smpls = [(train, test) for train, test in self.resampling.split(X=np.zeros(self.n_obs), y=self.stratify)]
         smpls = [all_smpls[(i_repeat * self.n_folds) : ((i_repeat + 1) * self.n_folds)] for i_repeat in range(self.n_rep)]
         smpls_inner = []
-        for _ in range(self.n_rep):
+        for i_rep in range(self.n_rep):
             smpls_inner_rep = []
-            for train, test in all_smpls:
+            for train, test in smpls[i_rep]:
                 if self.stratify is None:
                     smpls_inner_rep.append(
                         [
@@ -67,9 +67,7 @@ def split_samples(self):
                     smpls_inner_rep.append(
                         [
                             (train[train_inner], train[test_inner])
-                            for train_inner, test_inner in self.resampling_inner.split(
-                                X=np.zeros(len(train)), y=self.stratify[train]
-                            )
+                            for train_inner, test_inner in self.resampling_inner.split(X=train, y=self.stratify[train])
                         ]
                     )
             smpls_inner.append(smpls_inner_rep)
diff --git a/doubleml/utils/tests/test_resampling.py b/doubleml/utils/tests/test_resampling.py
new file mode 100644
index 000000000..baab61b4c
--- /dev/null
+++ b/doubleml/utils/tests/test_resampling.py
@@ -0,0 +1,49 @@
+import pytest
+
+from doubleml.utils.resampling import DoubleMLDoubleResampling
+
+
+@pytest.mark.ci
+def test_DoubleMLDoubleResampling_stratify():
+    n_folds = 5
+    n_folds_inner = 3
+    n_rep = 2
+    n_obs = 100
+    stratify = [0] * 50 + [1] * 50
+
+    obj_dml_double_resampling = DoubleMLDoubleResampling(
+        n_folds=n_folds,
+        n_folds_inner=n_folds_inner,
+        n_rep=n_rep,
+        n_obs=n_obs,
+        stratify=stratify,
+    )
+    smpls, smpls_inner = obj_dml_double_resampling.split_samples()
+
+    assert len(smpls) == n_rep
+    assert len(smpls_inner) == n_rep
+
+    for i_rep in range(n_rep):
+        assert len(smpls[i_rep]) == n_folds
+        assert len(smpls_inner[i_rep]) == n_folds
+
+        for i_fold in range(n_folds):
+            train_ind, test_ind = smpls[i_rep][i_fold]
+            smpls_inner_rep_fold = smpls_inner[i_rep][i_fold]
+            assert len(smpls_inner_rep_fold) == n_folds_inner
+
+            for i_fold_inner in range(n_folds_inner):
+                train_ind_inner, test_ind_inner = smpls_inner_rep_fold[i_fold_inner]
+                assert set(train_ind_inner).issubset(set(train_ind))
+                assert set(test_ind_inner).issubset(set(train_ind))
+
+
+@pytest.mark.ci
+def test_DoubleMLDoubleResampling_exceptions():
+    msg = "n_folds must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLDoubleResampling(1, 5, 1, 100)
+
+    msg = "n_folds_inner must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLDoubleResampling(5, 1, 1, 100)

From 8a181cd656df723eeabb7316ab4388a52ef62ec7 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Fri, 7 Nov 2025 12:58:49 -0800
Subject: [PATCH 25/48] Unused var removed

---
 doubleml/utils/tests/test_resampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/utils/tests/test_resampling.py b/doubleml/utils/tests/test_resampling.py
index baab61b4c..3ecfbada0 100644
--- a/doubleml/utils/tests/test_resampling.py
+++ b/doubleml/utils/tests/test_resampling.py
@@ -28,7 +28,7 @@ def test_DoubleMLDoubleResampling_stratify():
         assert len(smpls_inner[i_rep]) == n_folds
 
         for i_fold in range(n_folds):
-            train_ind, test_ind = smpls[i_rep][i_fold]
+            train_ind, _ = smpls[i_rep][i_fold]
             smpls_inner_rep_fold = smpls_inner[i_rep][i_fold]
             assert len(smpls_inner_rep_fold) == n_folds_inner
 

From f2ecea799aac830c5dac42a37aa2ded19317a21e Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Fri, 7 Nov 2025 13:09:30 -0800
Subject: [PATCH 26/48] Fixed resampling

---
 doubleml/utils/resampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/utils/resampling.py b/doubleml/utils/resampling.py
index e0668ef25..4f49a3d2c 100644
--- a/doubleml/utils/resampling.py
+++ b/doubleml/utils/resampling.py
@@ -31,7 +31,7 @@ def __init__(self, n_folds, n_folds_inner, n_rep, n_obs, stratify=None):
         self.n_folds_inner = n_folds_inner
         self.n_rep = n_rep
         self.n_obs = n_obs
-        self.stratify = np.array(stratify)
+        self.stratify = np.array(stratify) if stratify is not None else None
 
         if n_folds < 2:
             raise ValueError(

From a9a295993645df01e82d85eb1b1a555d76170dcc Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Fri, 7 Nov 2025 17:36:59 -0800
Subject: [PATCH 27/48] External predictions

---
 doubleml/double_ml.py                         | 17 +++-
 doubleml/plm/lplr.py                          | 24 ++++-
 doubleml/plm/tests/test_lplr.py               |  3 +-
 .../tests/test_lplr_external_predictions.py   | 90 +++++++++++++++++++
 doubleml/utils/_estimation.py                 |  7 +-
 5 files changed, 132 insertions(+), 9 deletions(-)
 create mode 100644 doubleml/plm/tests/test_lplr_external_predictions.py

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 05481bf16..a95e2c7dc 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -263,6 +263,13 @@ def learner(self):
         """
         return self._learner
 
+    @property
+    def predictions_names(self):
+        """
+        The names of predictions for the nuisance functions.
+        """
+        return list(self._learner.keys())
+
     @property
     def learner_names(self):
         """
@@ -1059,7 +1066,7 @@ def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_m
             _check_external_predictions(
                 external_predictions=external_predictions,
                 valid_treatments=self._dml_data.d_cols,
-                valid_learners=self.params_names,
+                valid_learners=self.predictions_names,
                 n_obs=self.n_obs,
                 n_rep=self.n_rep,
             )
@@ -1146,8 +1153,10 @@ def _initialize_arrays(self):
         self._all_se = np.full((n_thetas, n_rep), np.nan)
 
     def _initialize_predictions_and_targets(self):
-        self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names}
-        self._nuisance_targets = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names}
+        self._predictions = {learner: np.full(self._score_dim, np.nan, dtype=object) for learner in self.predictions_names}
+        self._nuisance_targets = {
+            learner: np.full(self._score_dim, np.nan, dtype=object) for learner in self.predictions_names
+        }
 
     def _initialize_nuisance_loss(self):
         self._nuisance_loss = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names}
@@ -1158,7 +1167,7 @@ def _initialize_models(self):
         }
 
     def _store_predictions_and_targets(self, preds, targets):
-        for learner in self.params_names:
+        for learner in self.predictions_names:
             self._predictions[learner][:, self._i_rep, self._i_treat] = preds[learner]
             self._nuisance_targets[learner][:, self._i_rep, self._i_treat] = targets[learner]
 
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index c9b39c94e..1bd905367 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -111,6 +111,7 @@ def __init__(
 
         ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=True, classifier=True)
         self._learner = {"ml_m": ml_m, "ml_t": ml_t, "ml_M": ml_M}
+        self._predictions_names = ["ml_r", "ml_m", "ml_a", "ml_t", "ml_M", "ml_M_inner", "ml_a_inner"]
 
         if ml_a is not None:
             ml_a_is_classifier = self._check_learner(ml_a, "ml_a", regressor=True, classifier=True)
@@ -181,6 +182,7 @@ def _double_dml_cv_predict(
         res = {}
         res["preds"] = np.zeros(y.shape, dtype=float)
         res["preds_inner"] = []
+        res["targets_inner"] = []
         res["models"] = []
         for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
             res_inner = _dml_cv_predict(
@@ -198,6 +200,7 @@ def _double_dml_cv_predict(
             _check_finite_predictions(res_inner["preds"], estimator, estimator_name, smpls_double_split)
 
             res["preds_inner"].append(res_inner["preds"])
+            res["targets_inner"].append(res_inner["targets"])
             for model in res_inner["models"]:
                 res["models"].append(model)
                 if method == "predict_proba":
@@ -218,7 +221,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         a_external = external_predictions["ml_a"] is not None
 
         if M_external:
-            M_hat = {"preds": external_predictions["ml_M"], "targets": None, "models": None}
+            if "ml_M_inner" not in external_predictions.keys():
+                raise ValueError("When providing external predictions for ml_M, also inner predictions have to be provided.")
+            M_hat_inner = np.squeeze(np.array(external_predictions["ml_M_inner"].tolist())).T
+            M_hat = {"preds": external_predictions["ml_M"], "preds_inner": M_hat_inner, "targets": None, "models": None}
         else:
             M_hat = self._double_dml_cv_predict(
                 self._learner["ml_M"],
@@ -285,7 +291,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 )
 
         if a_external:
-            a_hat = {"preds": external_predictions["ml_a"], "targets": None, "models": None}
+            if "ml_a_inner" not in external_predictions.keys():
+                raise ValueError("When providing external predictions for ml_M, also inner predictions have to be provided.")
+            a_hat_inner = np.squeeze(np.array(external_predictions["ml_a_inner"].tolist())).T
+            a_hat = {"preds": external_predictions["ml_a"], "preds_inner": a_hat_inner, "targets": None, "models": None}
         else:
             a_hat = self._double_dml_cv_predict(
                 self._learner["ml_a"],
@@ -338,6 +347,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 "ml_a": a_hat["preds"],
                 "ml_t": t_hat["preds"],
                 "ml_M": M_hat["preds"],
+                "ml_M_inner": np.moveaxis(M_hat["preds_inner"], 0, -1).tolist(),
+                "ml_a_inner": np.moveaxis(a_hat["preds_inner"], 0, -1).tolist(),
             },
             "targets": {
                 "ml_r": None,
@@ -345,6 +356,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 "ml_a": a_hat["targets"],
                 "ml_t": t_hat["targets"],
                 "ml_M": M_hat["targets"],
+                "ml_M_inner": np.moveaxis(M_hat["targets_inner"], 0, -1).tolist() if not M_external else None,
+                "ml_a_inner": np.moveaxis(a_hat["targets_inner"], 0, -1).tolist() if not a_external else None,
             },
             "models": {
                 "ml_r": None,
@@ -357,6 +370,13 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         return psi_elements, preds
 
+    @property
+    def predictions_names(self):
+        """
+        The names of predictions for the nuisance functions.
+        """
+        return self._predictions_names
+
     def _score_elements(self, y, d, r_hat, m_hat):
         # compute residual
         d_tilde = d - m_hat
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
index 2e58bfeaf..efba990d9 100644
--- a/doubleml/plm/tests/test_lplr.py
+++ b/doubleml/plm/tests/test_lplr.py
@@ -4,8 +4,7 @@
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 import doubleml as dml
-
-from ..datasets import make_lplr_LZZ2020
+from doubleml.plm.datasets import make_lplr_LZZ2020
 
 
 @pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
diff --git a/doubleml/plm/tests/test_lplr_external_predictions.py b/doubleml/plm/tests/test_lplr_external_predictions.py
new file mode 100644
index 000000000..670860386
--- /dev/null
+++ b/doubleml/plm/tests/test_lplr_external_predictions.py
@@ -0,0 +1,90 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml import DoubleMLData
+from doubleml.plm.datasets import make_lplr_LZZ2020
+from doubleml.plm.lplr import DoubleMLLPLR
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
+
+
+@pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
+def lplr_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_t_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_M_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_lplr_fixture(lplr_score, n_rep, set_ml_m_ext, set_ml_t_ext, set_ml_M_ext):
+    ext_predictions = {"d": {}}
+
+    x, y, d, _ = make_lplr_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array", treatment="continuous")
+
+    np.random.seed(3141)
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data, "score": lplr_score, "n_rep": n_rep}
+    if lplr_score == "instrument":
+        # ensure ml_a supports sample_weight
+        kwargs["ml_a"] = LinearRegression()
+
+    dml_lplr = DoubleMLLPLR(ml_M=LogisticRegression(max_iter=1000), ml_t=LinearRegression(), ml_m=LinearRegression(), **kwargs)
+    np.random.seed(3141)
+    dml_lplr.fit(store_predictions=True)
+
+    # prepare external predictions and dummy learners
+    if set_ml_M_ext:
+        ext_predictions["d"]["ml_M"] = dml_lplr.predictions["ml_M"][:, :, 0]
+        ext_predictions["d"]["ml_M_inner"] = dml_lplr.predictions["ml_M_inner"][:, :, 0]
+        ml_M = DMLDummyClassifier()
+    else:
+        ml_M = LogisticRegression(max_iter=1000)
+
+    if set_ml_t_ext:
+        ext_predictions["d"]["ml_t"] = dml_lplr.predictions["ml_t"][:, :, 0]
+        ml_t = DMLDummyRegressor()
+    else:
+        ml_t = LinearRegression()
+
+    if set_ml_m_ext:
+        ext_predictions["d"]["ml_m"] = dml_lplr.predictions["ml_m"][:, :, 0]
+        ml_m = DMLDummyRegressor()
+        ext_predictions["d"]["ml_a"] = dml_lplr.predictions["ml_a"][:, :, 0]
+        ext_predictions["d"]["ml_a_inner"] = dml_lplr.predictions["ml_a_inner"][:, :, 0]
+    else:
+        ml_m = LinearRegression()
+
+    # build second model with external predictions
+    dml_lplr_ext = DoubleMLLPLR(ml_M=ml_M, ml_t=ml_t, ml_m=ml_m, **kwargs)
+
+    np.random.seed(3141)
+    dml_lplr_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": dml_lplr.coef[0], "coef_ext": dml_lplr_ext.coef[0]}
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_lplr_coef(doubleml_lplr_fixture):
+    assert math.isclose(doubleml_lplr_fixture["coef_normal"], doubleml_lplr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index d10ae48bc..1054feb35 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -363,9 +363,14 @@ def _set_external_predictions(external_predictions, learners, treatment, i_rep):
             ext_prediction_dict[learner] = None
         elif learner in external_predictions[treatment].keys():
             if isinstance(external_predictions[treatment][learner], np.ndarray):
-                ext_prediction_dict[learner] = external_predictions[treatment][learner][:, i_rep]
+                ext_prediction_dict[learner] = external_predictions[treatment][learner][:, i_rep].astype(float)
             else:
                 ext_prediction_dict[learner] = None
+            if f"{learner}_inner" in external_predictions[treatment].keys():
+                if isinstance(external_predictions[treatment][f"{learner}_inner"], np.ndarray):
+                    ext_prediction_dict[f"{learner}_inner"] = external_predictions[treatment][f"{learner}_inner"][:, i_rep]
+                else:
+                    ext_prediction_dict[learner] = None
         else:
             ext_prediction_dict[learner] = None
     return ext_prediction_dict

From cd6055b8f78db4bd6429a06ec2fd46c855f85ed8 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Fri, 7 Nov 2025 18:05:07 -0800
Subject: [PATCH 28/48] Bugfix and addtl text

---
 doubleml/double_ml.py                      |  2 +-
 doubleml/plm/lplr.py                       | 11 +++++++++++
 doubleml/plm/tests/test_lplr.py            | 11 ++++++++++-
 doubleml/plm/tests/test_lplr_exceptions.py | 13 +++++++++++++
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index a95e2c7dc..1605391e1 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -268,7 +268,7 @@ def predictions_names(self):
         """
         The names of predictions for the nuisance functions.
         """
-        return list(self._learner.keys())
+        return list(self.params_names)
 
     @property
     def learner_names(self):
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 1bd905367..a0405032e 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -1,4 +1,5 @@
 import inspect
+import warnings
 
 import numpy as np
 import scipy
@@ -133,6 +134,11 @@ def __init__(
                     "but at least one treatment variable is not binary with values 0 and 1."
                 )
         else:
+            if self._dml_data.binary_treats.any():
+                warnings.warn(
+                    f"The ml_m learner {str(ml_m)} was identified as regressor "
+                    "but at least one treatment variable is binary with values 0 and 1."
+                )
             self._predict_method["ml_m"] = "predict"
 
         if ml_a_is_classifier:
@@ -144,6 +150,11 @@ def __init__(
                     "but at least one treatment variable is not binary with values 0 and 1."
                 )
         else:
+            if self._dml_data.binary_treats.any():
+                warnings.warn(
+                    f"The ml_a learner {str(ml_a)} was identified as regressor but at least one treatment variable is "
+                    f"binary with values 0 and 1."
+                )
             self._predict_method["ml_a"] = "predict"
 
         if score == "instrument":
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
index efba990d9..abd7adf55 100644
--- a/doubleml/plm/tests/test_lplr.py
+++ b/doubleml/plm/tests/test_lplr.py
@@ -22,6 +22,11 @@ def learner_m(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
+def learner_m_classifier(request):
+    return request.param
+
+
 @pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
 def score(request):
     return request.param
@@ -39,6 +44,7 @@ def dml_lplr_fixture(
     learner_M,
     learner_t,
     learner_m,
+    learner_m_classifier,
     treatment,
 ):
     n_folds = 5
@@ -50,7 +56,10 @@ def dml_lplr_fixture(
 
     ml_M = clone(learner_M)
     ml_t = clone(learner_t)
-    ml_m = clone(learner_m)
+    if treatment == "continuous":
+        ml_m = clone(learner_m)
+    else:
+        ml_m = clone(learner_m_classifier)
 
     dml_sel_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m, n_folds=n_folds, score=score)
     dml_sel_obj.fit()
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
index c4c57fd98..03cb7158a 100644
--- a/doubleml/plm/tests/test_lplr_exceptions.py
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -12,6 +12,7 @@
 n = 100
 # create test data and basic learners
 dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=n, dim_x=20)
+dml_data_binary = make_lplr_LZZ2020(alpha=0.5, n_obs=n, treatment="binary", dim_x=20)
 ml_M = RandomForestClassifier()
 ml_t = RandomForestRegressor()
 ml_m = RandomForestRegressor()
@@ -231,6 +232,18 @@ def test_lplr_exception_and_warning_learner():
     msg = "Invalid learner provided for ml_M: " + r"Lasso\(\) has no method .predict_proba\(\)."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLLPLR(dml_data, Lasso(), ml_t, ml_m)
+    msg = (
+        r"The ml_m learner RandomForestRegressor\(\) was identified as regressor but at least one treatment "
+        r"variable is binary with values 0 and 1."
+    )
+    with pytest.warns(match=msg):
+        _ = DoubleMLLPLR(dml_data_binary, ml_M, ml_t, ml_m)
+    msg = (
+        r"The ml_a learner RandomForestRegressor\(\) was identified as regressor but at least one treatment "
+        r"variable is binary with values 0 and 1."
+    )
+    with pytest.warns(match=msg):
+        _ = DoubleMLLPLR(dml_data_binary, ml_M, ml_t, ml_M, ml_a=ml_m)
 
 
 class LassoWithNanPred(Lasso):

From 4a8be08efae7cb36ee46ad58a58edc43bb6e9bd6 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Sun, 9 Nov 2025 23:45:48 -0800
Subject: [PATCH 29/48] Change to ext predictions

---
 doubleml/double_ml.py                         |  7 ++-
 doubleml/plm/lplr.py                          | 57 +++++++++++++++----
 .../tests/test_lplr_external_predictions.py   |  7 ++-
 doubleml/utils/_estimation.py                 |  5 --
 4 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 1605391e1..899bad4c0 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1088,7 +1088,10 @@ def _initalize_fit(self, store_predictions, store_models):
 
     def _fit_nuisance_and_score_elements(self, n_jobs_cv, store_predictions, external_predictions, store_models):
         ext_prediction_dict = _set_external_predictions(
-            external_predictions, learners=self.params_names, treatment=self._dml_data.d_cols[self._i_treat], i_rep=self._i_rep
+            external_predictions,
+            learners=self.predictions_names,
+            treatment=self._dml_data.d_cols[self._i_treat],
+            i_rep=self._i_rep,
         )
 
         # ml estimation of nuisance models and computation of score elements
@@ -1153,7 +1156,7 @@ def _initialize_arrays(self):
         self._all_se = np.full((n_thetas, n_rep), np.nan)
 
     def _initialize_predictions_and_targets(self):
-        self._predictions = {learner: np.full(self._score_dim, np.nan, dtype=object) for learner in self.predictions_names}
+        self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.predictions_names}
         self._nuisance_targets = {
             learner: np.full(self._score_dim, np.nan, dtype=object) for learner in self.predictions_names
         }
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index a0405032e..99ac77e08 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -112,7 +112,10 @@ def __init__(
 
         ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=True, classifier=True)
         self._learner = {"ml_m": ml_m, "ml_t": ml_t, "ml_M": ml_M}
-        self._predictions_names = ["ml_r", "ml_m", "ml_a", "ml_t", "ml_M", "ml_M_inner", "ml_a_inner"]
+        # replace aggregated inner names with per-inner-fold names
+        inner_M_names = [f"ml_M_inner_{i}" for i in range(self.n_folds_inner)]
+        inner_a_names = [f"ml_a_inner_{i}" for i in range(self.n_folds_inner)]
+        self._predictions_names = ["ml_r", "ml_m", "ml_a", "ml_t", "ml_M"] + inner_M_names + inner_a_names
 
         if ml_a is not None:
             ml_a_is_classifier = self._check_learner(ml_a, "ml_a", regressor=True, classifier=True)
@@ -232,9 +235,14 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         a_external = external_predictions["ml_a"] is not None
 
         if M_external:
-            if "ml_M_inner" not in external_predictions.keys():
-                raise ValueError("When providing external predictions for ml_M, also inner predictions have to be provided.")
-            M_hat_inner = np.squeeze(np.array(external_predictions["ml_M_inner"].tolist())).T
+            # expect per-inner-fold keys ml_M_inner_i
+            missing = [i for i in range(self.n_folds_inner) if f"ml_M_inner_{i}" not in external_predictions.keys()]
+            if len(missing) > 0:
+                raise ValueError(
+                    "When providing external predictions for ml_M, also inner predictions for all inner folds "
+                    f"have to be provided (missing: {', '.join([str(i) for i in missing])})."
+                )
+            M_hat_inner = [external_predictions[f"ml_M_inner_{i}"] for i in range(self.n_folds_inner)]
             M_hat = {"preds": external_predictions["ml_M"], "preds_inner": M_hat_inner, "targets": None, "models": None}
         else:
             M_hat = self._double_dml_cv_predict(
@@ -302,9 +310,14 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 )
 
         if a_external:
-            if "ml_a_inner" not in external_predictions.keys():
-                raise ValueError("When providing external predictions for ml_M, also inner predictions have to be provided.")
-            a_hat_inner = np.squeeze(np.array(external_predictions["ml_a_inner"].tolist())).T
+            # expect per-inner-fold keys ml_a_inner_i
+            missing = [i for i in range(self.n_folds_inner) if f"ml_a_inner_{i}" not in external_predictions.keys()]
+            if len(missing) > 0:
+                raise ValueError(
+                    "When providing external predictions for ml_a, also inner predictions for all inner folds "
+                    f"have to be provided (missing: {', '.join([str(i) for i in missing])})."
+                )
+            a_hat_inner = [external_predictions[f"ml_a_inner_{i}"] for i in range(self.n_folds_inner)]
             a_hat = {"preds": external_predictions["ml_a"], "preds_inner": a_hat_inner, "targets": None, "models": None}
         else:
             a_hat = self._double_dml_cv_predict(
@@ -358,8 +371,11 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 "ml_a": a_hat["preds"],
                 "ml_t": t_hat["preds"],
                 "ml_M": M_hat["preds"],
-                "ml_M_inner": np.moveaxis(M_hat["preds_inner"], 0, -1).tolist(),
-                "ml_a_inner": np.moveaxis(a_hat["preds_inner"], 0, -1).tolist(),
+                # store inner predictions as separate keys per inner fold
+                # ml_M inner
+                **{f"ml_M_inner_{i}": M_hat["preds_inner"][i] for i in range(len(M_hat["preds_inner"]))},
+                # ml_a inner
+                **{f"ml_a_inner_{i}": a_hat["preds_inner"][i] for i in range(len(a_hat["preds_inner"]))},
             },
             "targets": {
                 "ml_r": None,
@@ -367,8 +383,27 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 "ml_a": a_hat["targets"],
                 "ml_t": t_hat["targets"],
                 "ml_M": M_hat["targets"],
-                "ml_M_inner": np.moveaxis(M_hat["targets_inner"], 0, -1).tolist() if not M_external else None,
-                "ml_a_inner": np.moveaxis(a_hat["targets_inner"], 0, -1).tolist() if not a_external else None,
+                # store inner targets as separate keys per inner fold (None if external)
+                **(
+                    {
+                        f"ml_M_inner_{i}": (
+                            M_hat.get("targets_inner")[i]
+                            if M_hat.get("targets_inner") is not None and i < len(M_hat["targets_inner"])
+                            else None
+                        )
+                        for i in range(len(M_hat.get("preds_inner", [])))
+                    }
+                ),
+                **(
+                    {
+                        f"ml_a_inner_{i}": (
+                            a_hat.get("targets_inner")[i]
+                            if a_hat.get("targets_inner") is not None and i < len(a_hat["targets_inner"])
+                            else None
+                        )
+                        for i in range(len(a_hat.get("preds_inner", [])))
+                    }
+                ),
             },
             "models": {
                 "ml_r": None,
diff --git a/doubleml/plm/tests/test_lplr_external_predictions.py b/doubleml/plm/tests/test_lplr_external_predictions.py
index 670860386..5e9b66e87 100644
--- a/doubleml/plm/tests/test_lplr_external_predictions.py
+++ b/doubleml/plm/tests/test_lplr_external_predictions.py
@@ -56,7 +56,9 @@ def doubleml_lplr_fixture(lplr_score, n_rep, set_ml_m_ext, set_ml_t_ext, set_ml_
     # prepare external predictions and dummy learners
     if set_ml_M_ext:
         ext_predictions["d"]["ml_M"] = dml_lplr.predictions["ml_M"][:, :, 0]
-        ext_predictions["d"]["ml_M_inner"] = dml_lplr.predictions["ml_M_inner"][:, :, 0]
+        # provide inner predictions per inner fold index
+        for i in range(dml_lplr.n_folds_inner):
+            ext_predictions["d"][f"ml_M_inner_{i}"] = dml_lplr.predictions[f"ml_M_inner_{i}"][:, :, 0]
         ml_M = DMLDummyClassifier()
     else:
         ml_M = LogisticRegression(max_iter=1000)
@@ -71,7 +73,8 @@ def doubleml_lplr_fixture(lplr_score, n_rep, set_ml_m_ext, set_ml_t_ext, set_ml_
         ext_predictions["d"]["ml_m"] = dml_lplr.predictions["ml_m"][:, :, 0]
         ml_m = DMLDummyRegressor()
         ext_predictions["d"]["ml_a"] = dml_lplr.predictions["ml_a"][:, :, 0]
-        ext_predictions["d"]["ml_a_inner"] = dml_lplr.predictions["ml_a_inner"][:, :, 0]
+        for i in range(dml_lplr.n_folds_inner):
+            ext_predictions["d"][f"ml_a_inner_{i}"] = dml_lplr.predictions[f"ml_a_inner_{i}"][:, :, 0]
     else:
         ml_m = LinearRegression()
 
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index 1054feb35..aaf21ea34 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -366,11 +366,6 @@ def _set_external_predictions(external_predictions, learners, treatment, i_rep):
                 ext_prediction_dict[learner] = external_predictions[treatment][learner][:, i_rep].astype(float)
             else:
                 ext_prediction_dict[learner] = None
-            if f"{learner}_inner" in external_predictions[treatment].keys():
-                if isinstance(external_predictions[treatment][f"{learner}_inner"], np.ndarray):
-                    ext_prediction_dict[f"{learner}_inner"] = external_predictions[treatment][f"{learner}_inner"][:, i_rep]
-                else:
-                    ext_prediction_dict[learner] = None
         else:
             ext_prediction_dict[learner] = None
     return ext_prediction_dict

From 0472f1cfe8d6fc74587dd02f0c3d3d98c2aeeb78 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 10 Nov 2025 07:41:10 -0800
Subject: [PATCH 30/48] Change to targets data type

---
 doubleml/double_ml.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 899bad4c0..33e5e75e6 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1157,9 +1157,7 @@ def _initialize_arrays(self):
 
     def _initialize_predictions_and_targets(self):
         self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.predictions_names}
-        self._nuisance_targets = {
-            learner: np.full(self._score_dim, np.nan, dtype=object) for learner in self.predictions_names
-        }
+        self._nuisance_targets = {learner: np.full(self._score_dim, np.nan) for learner in self.predictions_names}
 
     def _initialize_nuisance_loss(self):
         self._nuisance_loss = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names}

From 2fc1f538b8e921b04e1909e712c7e0a65aa42612 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 10 Nov 2025 11:22:49 -0800
Subject: [PATCH 31/48] DoubleResamplin integrated into mixin, small changes

---
 doubleml/double_ml.py                 | 26 ++++++++++++++++-
 doubleml/double_ml_sampling_mixins.py | 25 +++++++++++++----
 doubleml/plm/lplr.py                  | 40 ++++-----------------------
 doubleml/utils/_estimation.py         |  2 +-
 4 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 33e5e75e6..9295e0930 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -22,7 +22,7 @@
 class DoubleML(SampleSplittingMixin, ABC):
     """Double Machine Learning."""
 
-    def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
+    def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, double_sample_splitting=False):
         # check and pick up obj_dml_data
         if not isinstance(obj_dml_data, DoubleMLBaseData):
             raise TypeError(
@@ -108,6 +108,9 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
         self._smpls = None
         self._smpls_cluster = None
         self._n_obs_sample_splitting = self.n_obs
+        self._double_sample_splitting = double_sample_splitting
+        if self._smpls_cluster is True:
+            self.__smpls__inner = None
         if draw_sample_splitting:
             self.draw_sample_splitting()
         self._score_dim = (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs)
@@ -366,6 +369,21 @@ def smpls(self):
             raise ValueError(err_msg)
         return self._smpls
 
+    @property
+    def smpls_inner(self):
+        """
+        The partition used for cross-fitting.
+        """
+        if not self._double_sample_splitting:
+            raise ValueError("smpls_inner is only available for double sample splitting.")
+        if self._smpls is None:
+            err_msg = (
+                "Sample splitting not specified. Either draw samples via .draw_sample splitting() "
+                + "or set external samples via .set_sample_splitting()."
+            )
+            raise ValueError(err_msg)
+        return self._smpls
+
     @property
     def smpls_cluster(self):
         """
@@ -514,6 +532,12 @@ def summary(self):
     def __smpls(self):
         return self._smpls[self._i_rep]
 
+    @property
+    def __smpls__inner(self):
+        if not self._smpls_inner[self._i_rep]:
+            raise ValueError("smpls_inner is only available for double sample splitting.")
+        return self._smpls_inner[self._i_rep]
+
     @property
     def __smpls_cluster(self):
         return self._smpls_cluster[self._i_rep]
diff --git a/doubleml/double_ml_sampling_mixins.py b/doubleml/double_ml_sampling_mixins.py
index d7d8b2e14..97ed5aa59 100644
--- a/doubleml/double_ml_sampling_mixins.py
+++ b/doubleml/double_ml_sampling_mixins.py
@@ -1,7 +1,7 @@
 from abc import abstractmethod
 
 from doubleml.utils._checks import _check_sample_splitting
-from doubleml.utils.resampling import DoubleMLClusterResampling, DoubleMLResampling
+from doubleml.utils.resampling import DoubleMLClusterResampling, DoubleMLDoubleResampling, DoubleMLResampling
 
 
 class SampleSplittingMixin:
@@ -29,6 +29,8 @@ def draw_sample_splitting(self):
         self : object
         """
         if self._is_cluster_data:
+            if self._double_sample_splitting:
+                raise ValueError("Cluster data not supported for double sample splitting.")
             obj_dml_resampling = DoubleMLClusterResampling(
                 n_folds=self._n_folds_per_cluster,
                 n_rep=self.n_rep,
@@ -38,10 +40,20 @@ def draw_sample_splitting(self):
             )
             self._smpls, self._smpls_cluster = obj_dml_resampling.split_samples()
         else:
-            obj_dml_resampling = DoubleMLResampling(
-                n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._n_obs_sample_splitting, stratify=self._strata
-            )
-            self._smpls = obj_dml_resampling.split_samples()
+            if self._double_sample_splitting:
+                obj_dml_resampling = DoubleMLDoubleResampling(
+                    n_folds=self.n_folds,
+                    n_folds_inner=self.n_folds_inner,
+                    n_rep=self.n_rep,
+                    n_obs=self._dml_data.n_obs,
+                    stratify=self._strata,
+                )
+                self._smpls, self._smpls_inner = obj_dml_resampling.split_samples()
+            else:
+                obj_dml_resampling = DoubleMLResampling(
+                    n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._n_obs_sample_splitting, stratify=self._strata
+                )
+                self._smpls = obj_dml_resampling.split_samples()
 
         return self
 
@@ -104,6 +116,9 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         >>> dml_plr_obj.set_sample_splitting(smpls) # doctest: +ELLIPSIS
         <doubleml.plm.plr.DoubleMLPLR object at 0x...>
         """
+        if self._double_sample_splitting:
+            raise ValueError("set_sample_splitting not supported for double sample splitting.")
+
         self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting(
             all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self._n_obs_sample_splitting
         )
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 99ac77e08..d390067f8 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -15,7 +15,6 @@
     _dml_cv_predict,
     _dml_tune,
 )
-from doubleml.utils.resampling import DoubleMLDoubleResampling
 
 
 class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
@@ -91,14 +90,11 @@ def __init__(
         score="nuisance_space",
         draw_sample_splitting=True,
         error_on_convergence_failure=False,
+        double_sample_splitting=True,
     ):
         self.n_folds_inner = n_folds_inner
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
-        # Ensure outcome only contains 0 and 1 (validate early in constructor)
-        if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
-            raise TypeError("The outcome variable y must be binary with values 0 and 1.")
-
         self._error_on_convergence_failure = error_on_convergence_failure
         self._coef_bounds = (-1e-2, 1e2)
         self._coef_start_val = 1.0
@@ -167,11 +163,15 @@ def __init__(
 
         self._initialize_ml_nuisance_params()
         self._external_predictions_implemented = True
+        self._sensitivity_implemented = False
 
     def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in self._learner}
 
     def _check_data(self, obj_dml_data):
+        # Ensure outcome only contains 0 and 1 (validate early in constructor)
+        if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
+            raise TypeError("The outcome variable y must be binary with values 0 and 1.")
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError(
                 f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
@@ -562,36 +562,6 @@ def _nuisance_tuning(
 
         return res
 
-    @property
-    def __smpls__inner(self):
-        return self._smpls_inner[self._i_rep]
-
-    def draw_sample_splitting(self):
-        """
-        Draw sample splitting for DoubleML models.
-
-        The samples are drawn according to the attributes
-        ``n_folds`` and ``n_rep``.
-
-        Returns
-        -------
-        self : object
-        """
-
-        obj_dml_resampling = DoubleMLDoubleResampling(
-            n_folds=self.n_folds,
-            n_folds_inner=self.n_folds_inner,
-            n_rep=self.n_rep,
-            n_obs=self._dml_data.n_obs,
-            stratify=self._strata,
-        )
-        self._smpls, self._smpls_inner = obj_dml_resampling.split_samples()
-
-        return self
-
-    def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
-        raise NotImplementedError("set_sample_splitting is not implemented for DoubleMLLPLR.")
-
     def _compute_score(self, psi_elements, coef):
         if self.score == "nuisance_space":
             score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d_tilde"]
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index aaf21ea34..8dc631bcf 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -331,7 +331,7 @@ def _var_est(psi, psi_deriv, smpls, is_cluster_data, cluster_vars=None, smpls_cl
                 J_l = test_cluster_inds[1]
                 const = np.divide(min(len(I_k), len(J_l)), (np.square(len(I_k) * len(J_l))))
                 for cluster_value in I_k:
-                    ind_cluster = (first_cluster_var == cluster_value) & np.in1d(second_cluster_var, J_l)
+                    ind_cluster = (first_cluster_var == cluster_value) & np.isin(second_cluster_var, J_l)
                     gamma_hat += const * np.sum(np.outer(psi[ind_cluster], psi[ind_cluster]))
                 for cluster_value in J_l:
                     ind_cluster = (second_cluster_var == cluster_value) & np.isin(first_cluster_var, I_k)

From ecfe2c7b004930259495bcd94d1ae3d1fe15b768 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 10 Nov 2025 12:02:40 -0800
Subject: [PATCH 32/48] Added attribute to sample mixin

---
 doubleml/double_ml.py                 | 4 ++--
 doubleml/double_ml_sampling_mixins.py | 2 ++
 doubleml/plm/lplr.py                  | 3 +--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 9295e0930..6032fd789 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -109,8 +109,8 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, d
         self._smpls_cluster = None
         self._n_obs_sample_splitting = self.n_obs
         self._double_sample_splitting = double_sample_splitting
-        if self._smpls_cluster is True:
-            self.__smpls__inner = None
+        if self._double_sample_splitting:
+            self._smpls_inner = None
         if draw_sample_splitting:
             self.draw_sample_splitting()
         self._score_dim = (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs)
diff --git a/doubleml/double_ml_sampling_mixins.py b/doubleml/double_ml_sampling_mixins.py
index 97ed5aa59..2f63d88e2 100644
--- a/doubleml/double_ml_sampling_mixins.py
+++ b/doubleml/double_ml_sampling_mixins.py
@@ -17,6 +17,8 @@ class SampleSplittingMixin:
     `sample splitting <https://docs.doubleml.org/stable/guide/resampling.html>`_ in the DoubleML user guide.
     """
 
+    _double_sample_splitting = False
+
     def draw_sample_splitting(self):
         """
         Draw sample splitting for DoubleML models.
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index d390067f8..c9580b840 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -90,10 +90,9 @@ def __init__(
         score="nuisance_space",
         draw_sample_splitting=True,
         error_on_convergence_failure=False,
-        double_sample_splitting=True,
     ):
         self.n_folds_inner = n_folds_inner
-        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, double_sample_splitting=True)
 
         self._error_on_convergence_failure = error_on_convergence_failure
         self._coef_bounds = (-1e-2, 1e2)

From a9c0debb509f42cab4d27663a7ec50c8da4d5e0b Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 10 Nov 2025 13:44:35 -0800
Subject: [PATCH 33/48] Smpls inner access adjusted

---
 doubleml/double_ml.py | 4 ++--
 doubleml/plm/lplr.py  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 6032fd789..19811cb87 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -376,13 +376,13 @@ def smpls_inner(self):
         """
         if not self._double_sample_splitting:
             raise ValueError("smpls_inner is only available for double sample splitting.")
-        if self._smpls is None:
+        if self._smpls_inner is None:
             err_msg = (
                 "Sample splitting not specified. Either draw samples via .draw_sample splitting() "
                 + "or set external samples via .set_sample_splitting()."
             )
             raise ValueError(err_msg)
-        return self._smpls
+        return self._smpls_inner
 
     @property
     def smpls_cluster(self):
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index c9580b840..42336a619 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -250,7 +250,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 x_d_concat,
                 y,
                 smpls=smpls,
-                smpls_inner=self.__smpls__inner,
+                smpls_inner=self._DoubleML__smpls__inner,
                 n_jobs=n_jobs_cv,
                 est_params=self._get_params("ml_M"),
                 method=self._predict_method["ml_M"],
@@ -325,7 +325,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 x,
                 d,
                 smpls=smpls,
-                smpls_inner=self.__smpls__inner,
+                smpls_inner=self._DoubleML__smpls__inner,
                 n_jobs=n_jobs_cv,
                 est_params=self._get_params("ml_a"),
                 method=self._predict_method["ml_a"],
@@ -518,7 +518,7 @@ def _nuisance_tuning(
             x_d_concat,
             y,
             smpls=smpls,
-            smpls_inner=self.__smpls__inner,
+            smpls_inner=self._DoubleML__smpls__inner,
             n_jobs=n_jobs_cv,
             est_params=M_best_params,
             method=self._predict_method["ml_M"],

From 6abff491210391cb4b6841644cc31edf40c49978 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 10 Nov 2025 17:21:12 -0800
Subject: [PATCH 34/48] Docstring, complexity reduction

---
 doubleml/double_ml.py | 16 ++++------------
 doubleml/plm/lplr.py  |  6 +++---
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 19811cb87..bdfabad18 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -34,18 +34,10 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, d
             if obj_dml_data.n_cluster_vars > 2:
                 raise NotImplementedError("Multi-way (n_ways > 2) clustering not yet implemented.")
             self._is_cluster_data = True
-        self._is_panel_data = False
-        if isinstance(obj_dml_data, DoubleMLPanelData):
-            self._is_panel_data = True
-        self._is_did_data = False
-        if isinstance(obj_dml_data, DoubleMLDIDData):
-            self._is_did_data = True
-        self._is_ssm_data = False
-        if isinstance(obj_dml_data, DoubleMLSSMData):
-            self._is_ssm_data = True
-        self._is_rdd_data = False
-        if isinstance(obj_dml_data, DoubleMLRDDData):
-            self._is_rdd_data = True
+        self._is_panel_data = isinstance(obj_dml_data, DoubleMLPanelData)
+        self._is_did_data = isinstance(obj_dml_data, DoubleMLDIDData)
+        self._is_ssm_data = isinstance(obj_dml_data, DoubleMLSSMData)
+        self._is_rdd_data = isinstance(obj_dml_data, DoubleMLRDDData)
 
         self._dml_data = obj_dml_data
         self._n_obs = self._dml_data.n_obs
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 42336a619..9e0ed4060 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -60,10 +60,10 @@ class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
     >>> ml_m = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
     >>> ml_M = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
     >>> obj_dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=500, dim_x=20)
-    >>> dml_lplr_obj = dml.DoubleMLPLR(obj_dml_data, ml_M, ml_t, ml_m)
+    >>> dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
     >>> dml_lplr_obj.fit().summary
-           coef   std err          t         P>|t|     2.5 %    97.5 %
-    d  0.480691  0.040533  11.859129  1.929729e-32  0.401247  0.560135
+           coef   std err         t     P>|t|     2.5 %    97.5 %
+    d  0.661166  0.172672  3.829038  0.000129  0.322736  0.999596
 
     Notes
     -----

From 0f08e370b36a22062b762763c58203da6db99a56 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 10 Nov 2025 18:42:31 -0800
Subject: [PATCH 35/48] Weights updated, seed corrected

---
 doubleml/plm/lplr.py          | 48 ++++++++++++----------------
 doubleml/utils/_estimation.py | 60 +++++++++++++++++++++++++----------
 2 files changed, 63 insertions(+), 45 deletions(-)

diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 9e0ed4060..701a7fcd3 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -55,7 +55,7 @@ class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
     >>> from doubleml.plm.datasets import make_lplr_LZZ2020
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> from sklearn.base import clone
-    >>> np.random.seed(3141)
+    >>> np.random.seed(42)
     >>> ml_t = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
     >>> ml_m = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
     >>> ml_M = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
@@ -95,7 +95,6 @@ def __init__(
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, double_sample_splitting=True)
 
         self._error_on_convergence_failure = error_on_convergence_failure
-        self._coef_bounds = (-1e-2, 1e2)
         self._coef_start_val = 1.0
 
         self._check_data(self._dml_data)
@@ -207,7 +206,7 @@ def _double_dml_cv_predict(
                 est_params=est_params,
                 method=method,
                 return_models=True,
-                smpls_is_partition=True,
+                smpls_is_partition_manual_set=True,
                 sample_weights=sample_weights,
             )
             _check_finite_predictions(res_inner["preds"], estimator, estimator_name, smpls_double_split)
@@ -261,36 +260,26 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
         else:
             if self.score == "instrument":
-                weights = []
-                for i, (train, test) in enumerate(smpls):
-                    weights.append(M_hat["preds_inner"][i][train] * (1 - M_hat["preds_inner"][i][train]))
-                m_hat = _dml_cv_predict(
-                    self._learner["ml_m"],
-                    x,
-                    d,
-                    smpls=smpls,
-                    n_jobs=n_jobs_cv,
-                    est_params=self._get_params("ml_m"),
-                    method=self._predict_method["ml_m"],
-                    return_models=return_models,
-                    sample_weights=weights,
-                )
-
+                weights = M_hat["preds"] * (1 - M_hat["preds"])
+                filtered_smpls = smpls
             elif self.score == "nuisance_space":
                 filtered_smpls = []
                 for train, test in smpls:
                     train_filtered = train[y[train] == 0]
                     filtered_smpls.append((train_filtered, test))
-                m_hat = _dml_cv_predict(
-                    self._learner["ml_m"],
-                    x,
-                    d,
-                    smpls=filtered_smpls,
-                    n_jobs=n_jobs_cv,
-                    est_params=self._get_params("ml_m"),
-                    method=self._predict_method["ml_m"],
-                    return_models=return_models,
-                )
+                weights = None
+
+            m_hat = _dml_cv_predict(
+                self._learner["ml_m"],
+                x,
+                d,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_m"),
+                method=self._predict_method["ml_m"],
+                return_models=return_models,
+                sample_weights=weights,
+            )
 
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
 
@@ -342,6 +331,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             d_tilde = (d - a_hat["preds_inner"][i])[train]
             beta[test] = np.sum(d_tilde * w) / np.sum(d_tilde**2)
 
+        # Use preliminary beta estimates as starting value for root finding
+        self._coef_start_val = np.average(beta)
+
         # nuisance t
         if t_external:
             t_hat = {"preds": external_predictions["ml_t"], "targets": None, "models": None}
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index 8dc631bcf..aff0729ec 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -38,8 +38,11 @@ def _get_cond_smpls_2d(smpls, bin_var1, bin_var2):
     return smpls_00, smpls_01, smpls_10, smpls_11
 
 
-def _fit(estimator, x, y, train_index, idx=None):
-    estimator.fit(x[train_index, :], y[train_index])
+def _fit(estimator, x, y, train_index, idx=None, sample_weights=None):
+    if sample_weights is not None:
+        estimator.fit(x[train_index, :], y[train_index], sample_weights=sample_weights[train_index])
+    else:
+        estimator.fit(x[train_index, :], y[train_index])
     return estimator, idx
 
 
@@ -53,36 +56,50 @@ def _dml_cv_predict(
     method="predict",
     return_train_preds=False,
     return_models=False,
-    smpls_is_partition=None,
+    smpls_is_partition_manual_set=None,
     sample_weights=None,
 ):
     n_obs = x.shape[0]
 
-    # TODO: Better name for smples_is_partition
-    if smpls_is_partition is None:
+    if smpls_is_partition_manual_set is None:
         smpls_is_partition = _check_is_partition(smpls, n_obs)
+    else:
+        smpls_is_partition = smpls_is_partition_manual_set
     fold_specific_params = (est_params is not None) & (not isinstance(est_params, dict))
     fold_specific_target = isinstance(y, list)
     manual_cv_predict = (
-        (not smpls_is_partition)
-        | return_train_preds
-        | fold_specific_params
-        | fold_specific_target
-        | return_models
-        | bool(sample_weights)
+        (not smpls_is_partition) | return_train_preds | fold_specific_params | fold_specific_target | return_models
     )
-    # TODO: Check if cross_val_predict supports weights
 
     res = {"models": None}
     if not manual_cv_predict:
+        # prepare fit_params for cross_val_predict
+        fit_params_for_cv = {"sample_weight": sample_weights} if sample_weights is not None else None
+
         if est_params is None:
             # if there are no parameters set we redirect to the standard method
-            preds = cross_val_predict(clone(estimator), x, y, cv=smpls, n_jobs=n_jobs, method=method)
+            preds = cross_val_predict(
+                clone(estimator),
+                x,
+                y,
+                cv=smpls,
+                n_jobs=n_jobs,
+                method=method,
+                params=fit_params_for_cv,
+            )
         else:
             assert isinstance(est_params, dict)
             # if no fold-specific parameters we redirect to the standard method
             # warnings.warn("Using the same (hyper-)parameters for all folds")
-            preds = cross_val_predict(clone(estimator).set_params(**est_params), x, y, cv=smpls, n_jobs=n_jobs, method=method)
+            preds = cross_val_predict(
+                clone(estimator).set_params(**est_params),
+                x,
+                y,
+                cv=smpls,
+                n_jobs=n_jobs,
+                method=method,
+                params=fit_params_for_cv,
+            )
         if method == "predict_proba":
             res["preds"] = preds[:, 1]
         else:
@@ -113,19 +130,28 @@ def _dml_cv_predict(
 
         if est_params is None:
             fitted_models = parallel(
-                delayed(_fit)(clone(estimator), x, y_list[idx], train_index, idx)
+                delayed(_fit)(clone(estimator), x, y_list[idx], train_index, idx, sample_weights=sample_weights)
                 for idx, (train_index, test_index) in enumerate(smpls)
             )
         elif isinstance(est_params, dict):
             # warnings.warn("Using the same (hyper-)parameters for all folds")
             fitted_models = parallel(
-                delayed(_fit)(clone(estimator).set_params(**est_params), x, y_list[idx], train_index, idx)
+                delayed(_fit)(
+                    clone(estimator).set_params(**est_params), x, y_list[idx], train_index, idx, sample_weights=sample_weights
+                )
                 for idx, (train_index, test_index) in enumerate(smpls)
             )
         else:
             assert len(est_params) == len(smpls), "provide one parameter setting per fold"
             fitted_models = parallel(
-                delayed(_fit)(clone(estimator).set_params(**est_params[idx]), x, y_list[idx], train_index, idx)
+                delayed(_fit)(
+                    clone(estimator).set_params(**est_params[idx]),
+                    x,
+                    y_list[idx],
+                    train_index,
+                    idx,
+                    sample_weights=sample_weights,
+                )
                 for idx, (train_index, test_index) in enumerate(smpls)
             )
 

From 430f4a6abc48853d5704676805a229722f21c0ed Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 10 Nov 2025 20:37:31 -0800
Subject: [PATCH 36/48] Fix

---
 doubleml/utils/_estimation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index aff0729ec..d4e8abc4d 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -40,7 +40,7 @@ def _get_cond_smpls_2d(smpls, bin_var1, bin_var2):
 
 def _fit(estimator, x, y, train_index, idx=None, sample_weights=None):
     if sample_weights is not None:
-        estimator.fit(x[train_index, :], y[train_index], sample_weights=sample_weights[train_index])
+        estimator.fit(x[train_index, :], y[train_index], sample_weight=sample_weights[train_index])
     else:
         estimator.fit(x[train_index, :], y[train_index])
     return estimator, idx

From 5b92395b47ce93ccf5271b63ce55cc84aa820067 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 10 Nov 2025 20:50:44 -0800
Subject: [PATCH 37/48] Renaming

---
 doubleml/plm/lplr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 701a7fcd3..7b47f5665 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -273,7 +273,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 self._learner["ml_m"],
                 x,
                 d,
-                smpls=smpls,
+                smpls=filtered_smpls,
                 n_jobs=n_jobs_cv,
                 est_params=self._get_params("ml_m"),
                 method=self._predict_method["ml_m"],

From 042aa26e114e73852d0ad9a673a6c03b8c8d5d0b Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Mon, 10 Nov 2025 22:03:02 -0800
Subject: [PATCH 38/48] Doctest

---
 doubleml/plm/lplr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 7b47f5665..67f907a40 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -61,7 +61,7 @@ class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
     >>> ml_M = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
     >>> obj_dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=500, dim_x=20)
     >>> dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
-    >>> dml_lplr_obj.fit().summary
+    >>> dml_lplr_obj.fit().summary                                                                      # doctest: +SKIP
            coef   std err         t     P>|t|     2.5 %    97.5 %
     d  0.661166  0.172672  3.829038  0.000129  0.322736  0.999596
 

From 3b6f3b732dfacd3dc8ff3a8d4d8995e6b3f3aad4 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Wed, 12 Nov 2025 12:14:41 -0800
Subject: [PATCH 39/48] Test updated and comments implemented

---
 doubleml/double_ml.py                         |   2 +-
 doubleml/irm/tests/test_datasets.py           | 156 ++++++++++
 doubleml/plm/lplr.py                          |  18 +-
 doubleml/plm/tests/test_datasets.py           | 151 +++++++++
 doubleml/plm/tests/test_lplr_exceptions.py    |  38 ++-
 .../tests/test_lplr_external_predictions.py   |  57 +++-
 doubleml/tests/test_datasets.py               | 294 ------------------
 doubleml/tests/test_exceptions.py             |  28 +-
 doubleml/tests/test_set_sample_splitting.py   |   8 +
 9 files changed, 445 insertions(+), 307 deletions(-)
 create mode 100644 doubleml/irm/tests/test_datasets.py
 create mode 100644 doubleml/plm/tests/test_datasets.py

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index bdfabad18..638cb31ec 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -526,7 +526,7 @@ def __smpls(self):
 
     @property
     def __smpls__inner(self):
-        if not self._smpls_inner[self._i_rep]:
+        if not self.smpls_inner[self._i_rep]:
             raise ValueError("smpls_inner is only available for double sample splitting.")
         return self._smpls_inner[self._i_rep]
 
diff --git a/doubleml/irm/tests/test_datasets.py b/doubleml/irm/tests/test_datasets.py
new file mode 100644
index 000000000..79bf67940
--- /dev/null
+++ b/doubleml/irm/tests/test_datasets.py
@@ -0,0 +1,156 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from doubleml import DoubleMLData
+from doubleml.irm.datasets import (
+    make_confounded_irm_data,
+    make_heterogeneous_data,
+    make_iivm_data,
+    make_irm_data,
+    make_irm_data_discrete_treatments,
+    make_ssm_data,
+)
+
+msg_inv_return_type = "Invalid return_type."
+
+
+@pytest.mark.ci
+def test_make_irm_data_return_types():
+    np.random.seed(3141)
+    res = make_irm_data(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_irm_data(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d = make_irm_data(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_irm_data(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_iivm_data_return_types():
+    np.random.seed(3141)
+    res = make_iivm_data(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_iivm_data(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z = make_iivm_data(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_iivm_data(n_obs=100, return_type="matrix")
+
+
+@pytest.fixture(scope="function", params=[True, False])
+def linear(request):
+    return request.param
+
+
+@pytest.mark.ci
+def test_make_confounded_irm_data_return_types(linear):
+    np.random.seed(3141)
+    res = make_confounded_irm_data(linear=linear)
+    assert isinstance(res, dict)
+    assert isinstance(res["x"], np.ndarray)
+    assert isinstance(res["y"], np.ndarray)
+    assert isinstance(res["d"], np.ndarray)
+
+    assert isinstance(res["oracle_values"], dict)
+    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["gamma_a"], float)
+    assert isinstance(res["oracle_values"]["beta_a"], float)
+    assert isinstance(res["oracle_values"]["a"], np.ndarray)
+    assert isinstance(res["oracle_values"]["y_0"], np.ndarray)
+    assert isinstance(res["oracle_values"]["y_1"], np.ndarray)
+    assert isinstance(res["oracle_values"]["z"], np.ndarray)
+    assert isinstance(res["oracle_values"]["cf_y"], float)
+    assert isinstance(res["oracle_values"]["cf_d_ate"], float)
+    assert isinstance(res["oracle_values"]["cf_d_atte"], float)
+    assert isinstance(res["oracle_values"]["rho_ate"], float)
+    assert isinstance(res["oracle_values"]["rho_atte"], float)
+
+
+@pytest.fixture(scope="function", params=[False, True])
+def binary_treatment(request):
+    return request.param
+
+
+@pytest.fixture(scope="function", params=[1, 2])
+def n_x(request):
+    return request.param
+
+
+@pytest.mark.ci
+def test_make_heterogeneous_data_return_types(binary_treatment, n_x):
+    np.random.seed(3141)
+    res = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=binary_treatment)
+    assert isinstance(res, dict)
+    assert isinstance(res["data"], pd.DataFrame)
+    assert isinstance(res["effects"], np.ndarray)
+    assert callable(res["treatment_effect"])
+
+    # test input checks
+    msg = "n_x must be either 1 or 2."
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=0, binary_treatment=binary_treatment)
+    msg = "support_size must be smaller than p."
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, support_size=31, binary_treatment=binary_treatment)
+    msg = "binary_treatment must be a boolean."
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=2)
+
+
+@pytest.mark.ci
+def test_make_ssm_data_return_types():
+    np.random.seed(3141)
+    res = make_ssm_data(n_obs=100)
+    assert isinstance(res, DoubleMLData)
+    res = make_ssm_data(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z, s = make_ssm_data(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    assert isinstance(s, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_ssm_data(n_obs=100, return_type="matrix")
+
+
+@pytest.fixture(scope="function", params=[3, 5])
+def n_levels(request):
+    return request.param
+
+
+def test_make_data_discrete_treatments(n_levels):
+    np.random.seed(3141)
+    n = 100
+    data_apo = make_irm_data_discrete_treatments(n_obs=n, n_levels=3)
+    assert isinstance(data_apo, dict)
+    assert isinstance(data_apo["y"], np.ndarray)
+    assert isinstance(data_apo["d"], np.ndarray)
+    assert isinstance(data_apo["x"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"], dict)
+
+    assert isinstance(data_apo["oracle_values"]["cont_d"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["level_bounds"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["potential_level"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["ite"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["y0"], np.ndarray)
+
+    msg = "n_levels must be at least 2."
+    with pytest.raises(ValueError, match=msg):
+        _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1)
+
+    msg = "n_levels must be an integer."
+    with pytest.raises(ValueError, match=msg):
+        _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1.1)
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 67f907a40..e7ff1c35f 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -41,7 +41,7 @@ class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
         Number of inner folds for nested resampling used internally.
     n_rep : int, default=1
         Number of repetitions for sample splitting.
-    score : {'nuisance_space', 'instrument'} or callable, default='nuisance_space'
+    score : {'nuisance_space', 'instrument'}, default='nuisance_space'
         Score to use. 'nuisance_space' estimates m on subsamples with y=0; 'instrument' uses an instrument-type score.
     draw_sample_splitting : bool, default=True
         Whether to draw sample splitting during initialization.
@@ -95,7 +95,6 @@ def __init__(
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, double_sample_splitting=True)
 
         self._error_on_convergence_failure = error_on_convergence_failure
-        self._coef_start_val = 1.0
 
         self._check_data(self._dml_data)
         valid_scores = ["nuisance_space", "instrument"]
@@ -167,9 +166,6 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in self._learner}
 
     def _check_data(self, obj_dml_data):
-        # Ensure outcome only contains 0 and 1 (validate early in constructor)
-        if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
-            raise TypeError("The outcome variable y must be binary with values 0 and 1.")
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError(
                 f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
@@ -234,7 +230,11 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         if M_external:
             # expect per-inner-fold keys ml_M_inner_i
-            missing = [i for i in range(self.n_folds_inner) if f"ml_M_inner_{i}" not in external_predictions.keys()]
+            missing = [
+                i
+                for i in range(self.n_folds_inner)
+                if f"ml_M_inner_{i}" not in external_predictions.keys() or external_predictions[f"ml_M_inner_{i}"] is None
+            ]
             if len(missing) > 0:
                 raise ValueError(
                     "When providing external predictions for ml_M, also inner predictions for all inner folds "
@@ -299,7 +299,11 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         if a_external:
             # expect per-inner-fold keys ml_a_inner_i
-            missing = [i for i in range(self.n_folds_inner) if f"ml_a_inner_{i}" not in external_predictions.keys()]
+            missing = [
+                i
+                for i in range(self.n_folds_inner)
+                if f"ml_a_inner_{i}" not in external_predictions.keys() or external_predictions[f"ml_a_inner_{i}"] is None
+            ]
             if len(missing) > 0:
                 raise ValueError(
                     "When providing external predictions for ml_a, also inner predictions for all inner folds "
diff --git a/doubleml/plm/tests/test_datasets.py b/doubleml/plm/tests/test_datasets.py
new file mode 100644
index 000000000..5e16b9acf
--- /dev/null
+++ b/doubleml/plm/tests/test_datasets.py
@@ -0,0 +1,151 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from doubleml import DoubleMLData
+from doubleml.plm.datasets import (
+    _make_pliv_data,
+    make_confounded_plr_data,
+    make_lplr_LZZ2020,
+    make_pliv_CHS2015,
+    make_pliv_multiway_cluster_CKMS2021,
+    make_plr_CCDDHNR2018,
+    make_plr_turrell2018,
+)
+
+msg_inv_return_type = "Invalid return_type."
+
+
+@pytest.mark.ci
+def test_make_plr_CCDDHNR2018_return_types():
+    np.random.seed(3141)
+    res = make_plr_CCDDHNR2018(n_obs=100, return_type=DoubleMLData)
+    assert isinstance(res, DoubleMLData)
+    res = make_plr_CCDDHNR2018(n_obs=100, return_type=pd.DataFrame)
+    assert isinstance(res, pd.DataFrame)
+    x, y, d = make_plr_CCDDHNR2018(n_obs=100, return_type=np.ndarray)
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_plr_CCDDHNR2018(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_plr_turrell2018_return_types():
+    np.random.seed(3141)
+    res = make_plr_turrell2018(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_plr_turrell2018(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d = make_plr_turrell2018(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_plr_turrell2018(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_confounded_plr_data_return_types():
+    np.random.seed(3141)
+    res = make_confounded_plr_data(theta=5.0)
+    assert isinstance(res, dict)
+    assert isinstance(res["x"], np.ndarray)
+    assert isinstance(res["y"], np.ndarray)
+    assert isinstance(res["d"], np.ndarray)
+
+    assert isinstance(res["oracle_values"], dict)
+    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["theta"], float)
+    assert isinstance(res["oracle_values"]["gamma_a"], float)
+    assert isinstance(res["oracle_values"]["beta_a"], float)
+    assert isinstance(res["oracle_values"]["a"], np.ndarray)
+    assert isinstance(res["oracle_values"]["z"], np.ndarray)
+
+
+@pytest.mark.ci
+def test_make_pliv_data_return_types():
+    np.random.seed(3141)
+    res = _make_pliv_data(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = _make_pliv_data(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z = _make_pliv_data(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = _make_pliv_data(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_pliv_CHS2015_return_types():
+    np.random.seed(3141)
+    res = make_pliv_CHS2015(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_pliv_CHS2015(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z = make_pliv_CHS2015(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_pliv_CHS2015(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_pliv_multiway_cluster_CKMS2021_return_types():
+    np.random.seed(3141)
+    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(cluster_vars, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_lplr_LZZ2020_return_types():
+    np.random.seed(3141)
+    res = make_lplr_LZZ2020(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_lplr_LZZ2020(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z = make_lplr_LZZ2020(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_lplr_LZZ2020(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_lplr_LZZ2020_variants():
+    np.random.seed(3141)
+    res = make_lplr_LZZ2020(n_obs=100, treatment="binary")
+    assert np.array_equal(np.unique(res.d), np.array([0, 1]))
+    res = make_lplr_LZZ2020(n_obs=100, treatment="binary_unbalanced")
+    assert np.array_equal(np.unique(res.d), np.array([0, 1]))
+    res = make_lplr_LZZ2020(n_obs=100, treatment="continuous")
+    assert len(np.unique(res.d)) == 100
+
+    msg = "Invalid treatment type."
+    with pytest.raises(ValueError, match=msg):
+        _ = make_lplr_LZZ2020(n_obs=100, treatment="colors")
+
+    res = make_lplr_LZZ2020(n_obs=100, balanced_r0=False)
+    _, y_unique = np.unique(res.y, return_counts=True)
+    assert np.abs(y_unique[0] - y_unique[1]) > 10
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
index 03cb7158a..f01cd8855 100644
--- a/doubleml/plm/tests/test_lplr_exceptions.py
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -4,6 +4,7 @@
 from sklearn.base import BaseEstimator
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 from sklearn.linear_model import Lasso, LogisticRegression
+from sklearn.semi_supervised import LabelSpreading
 
 from doubleml import DoubleMLLPLR
 from doubleml.plm.datasets import make_lplr_LZZ2020
@@ -45,7 +46,7 @@ def test_lplr_exception_scores():
 
 
 @pytest.mark.ci
-def test_ssm_exception_resampling():
+def test_lplr_exception_resampling():
     msg = "The number of folds must be of int type. 1.5 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, n_folds=1.5)
@@ -208,10 +209,22 @@ def test_lplr_exception_learner():
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLLPLR(dml_data, ml_M, ml_t, LogisticRegression())
 
+    # ml_m may not be a classifier when treatment is not binary
+    msg = (
+        r"The ml_a learner LogisticRegression\(\) was identified as classifier "
+        r"but at least one treatment variable is not binary with values 0 and 1\."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, ml_a=LogisticRegression())
+
+    # ml_m may not be a classifier when treatment is not binary
+    dml_data_binary = make_lplr_LZZ2020(treatment="binary")
+    msg = 'Learner "ml_a" who supports sample_weight is required for score type "instrument"'
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data_binary, ml_M, ml_t, ml_m, ml_a=LabelSpreading(), score="instrument")
+
     # construct a classifier which is not identifiable as classifier via is_classifier by sklearn
     log_reg = LogisticRegressionManipulatedType()
-    # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0
-    log_reg._estimator_type = None
     msg = (
         r"Learner provided for ml_m is probably invalid: LogisticRegressionManipulatedType\(\) is \(probably\) "
         r"neither a regressor nor a classifier. Method predict is used for prediction\."
@@ -262,6 +275,13 @@ def predict(self, X):
         return preds
 
 
+# Classifier that returns hard labels (0/1) via predict_proba to trigger the binary-predictions error
+class HardLabelPredictProba(LogisticRegression):
+    def predict_proba(self, X):
+        labels = super().predict(X).astype(int)
+        return np.column_stack((1 - labels, labels))
+
+
 @pytest.mark.ci
 def test_lplr_nan_prediction():
     msg = r"Predictions from learner LassoWithNanPred\(\) for ml_t are not finite."
@@ -304,3 +324,15 @@ def eval_fct(y_pred, y_true):
 
     with pytest.raises(ValueError):
         dml_lplr_obj.evaluate_learners(metric=eval_fct)
+
+
+@pytest.mark.ci
+def test_lplr_exception_binary_predictions_from_classifier():
+    # Expect error because ml_m returns binary labels instead of probabilities for a binary treatment
+    msg = (
+        r"For the binary treatment variable d, predictions obtained with the ml_m learner "
+        r"HardLabelPredictProba\(\) are also observed to be binary with values 0 and 1\. "
+        r"Make sure that for classifiers probabilities and not labels are predicted\."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data_binary, ml_M, ml_t, HardLabelPredictProba()).fit()
diff --git a/doubleml/plm/tests/test_lplr_external_predictions.py b/doubleml/plm/tests/test_lplr_external_predictions.py
index 5e9b66e87..cc8546a8f 100644
--- a/doubleml/plm/tests/test_lplr_external_predictions.py
+++ b/doubleml/plm/tests/test_lplr_external_predictions.py
@@ -84,10 +84,65 @@ def doubleml_lplr_fixture(lplr_score, n_rep, set_ml_m_ext, set_ml_t_ext, set_ml_
     np.random.seed(3141)
     dml_lplr_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_lplr.coef[0], "coef_ext": dml_lplr_ext.coef[0]}
+    res_dict = {
+        "coef_normal": dml_lplr.coef[0],
+        "coef_ext": dml_lplr_ext.coef[0],
+        "se_normal": dml_lplr.se[0],
+        "se_ext": dml_lplr_ext.se[0],
+    }
     return res_dict
 
 
 @pytest.mark.ci
 def test_doubleml_lplr_coef(doubleml_lplr_fixture):
     assert math.isclose(doubleml_lplr_fixture["coef_normal"], doubleml_lplr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_doubleml_lplr_se(doubleml_lplr_fixture):
+    assert math.isclose(doubleml_lplr_fixture["se_normal"], doubleml_lplr_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_doubleml_lplr_exceptions():
+    ext_predictions = {"d": {}}
+
+    x, y, d, _ = make_lplr_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array", treatment="continuous")
+
+    np.random.seed(3141)
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data}
+
+    dml_lplr = DoubleMLLPLR(ml_M=LogisticRegression(max_iter=1000), ml_t=LinearRegression(), ml_m=LinearRegression(), **kwargs)
+    np.random.seed(3141)
+    dml_lplr.fit(store_predictions=True)
+
+    # prepare external predictions and dummy learners
+
+    ml_M = LogisticRegression(max_iter=1000)
+    ml_t = LinearRegression()
+    ml_m = LinearRegression()
+
+    # build second model with external predictions
+    dml_lplr_ext = DoubleMLLPLR(ml_M=ml_M, ml_t=ml_t, ml_m=ml_m, **kwargs)
+
+    ext_predictions["d"]["ml_M"] = dml_lplr.predictions["ml_M"][:, :, 0]
+    # provide inner predictions per inner fold index
+    for i in range(dml_lplr.n_folds_inner - 1):
+        ext_predictions["d"][f"ml_M_inner_{i}"] = dml_lplr.predictions[f"ml_M_inner_{i}"][:, :, 0]
+
+    msg = r"When providing external predictions for ml_M, also inner predictions for all inner folds"
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_ext.fit(external_predictions=ext_predictions)
+
+    ext_predictions["d"][f"ml_M_inner_{dml_lplr.n_folds_inner-1}"] = (dml_lplr.predictions)[
+        f"ml_M_inner_{dml_lplr.n_folds_inner-1}"
+    ][:, :, 0]
+    ext_predictions["d"]["ml_a"] = dml_lplr.predictions["ml_a"][:, :, 0]
+    for i in range(dml_lplr.n_folds_inner - 1):
+        ext_predictions["d"][f"ml_a_inner_{i}"] = dml_lplr.predictions[f"ml_a_inner_{i}"][:, :, 0]
+
+    msg = r"When providing external predictions for ml_a, also inner predictions for all inner folds"
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_ext.fit(external_predictions=ext_predictions)
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index 05c75d00a..95b6ea53b 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -1,26 +1,8 @@
-import numpy as np
 import pandas as pd
 import pytest
 
 from doubleml import DoubleMLData
 from doubleml.datasets import fetch_401K, fetch_bonus
-from doubleml.irm.datasets import (
-    make_confounded_irm_data,
-    make_heterogeneous_data,
-    make_iivm_data,
-    make_irm_data,
-    make_irm_data_discrete_treatments,
-    make_ssm_data,
-)
-from doubleml.plm.datasets import (
-    _make_pliv_data,
-    make_confounded_plr_data,
-    make_lplr_LZZ2020,
-    make_pliv_CHS2015,
-    make_pliv_multiway_cluster_CKMS2021,
-    make_plr_CCDDHNR2018,
-    make_plr_turrell2018,
-)
 
 msg_inv_return_type = "Invalid return_type."
 
@@ -54,279 +36,3 @@ def test_fetch_bonus_poly():
     n_x = len(data_bonus_wo_poly.x_cols)
     data_bonus_w_poly = fetch_bonus(polynomial_features=True)
     assert len(data_bonus_w_poly.x_cols) == ((n_x + 1) * n_x / 2 + n_x)
-
-
-@pytest.mark.ci
-def test_make_plr_CCDDHNR2018_return_types():
-    np.random.seed(3141)
-    res = make_plr_CCDDHNR2018(n_obs=100, return_type=DoubleMLData)
-    assert isinstance(res, DoubleMLData)
-    res = make_plr_CCDDHNR2018(n_obs=100, return_type=pd.DataFrame)
-    assert isinstance(res, pd.DataFrame)
-    x, y, d = make_plr_CCDDHNR2018(n_obs=100, return_type=np.ndarray)
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_plr_CCDDHNR2018(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_plr_turrell2018_return_types():
-    np.random.seed(3141)
-    res = make_plr_turrell2018(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_plr_turrell2018(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d = make_plr_turrell2018(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_plr_turrell2018(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_irm_data_return_types():
-    np.random.seed(3141)
-    res = make_irm_data(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_irm_data(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d = make_irm_data(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_irm_data(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_iivm_data_return_types():
-    np.random.seed(3141)
-    res = make_iivm_data(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_iivm_data(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, z = make_iivm_data(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_iivm_data(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_pliv_data_return_types():
-    np.random.seed(3141)
-    res = _make_pliv_data(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = _make_pliv_data(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, z = _make_pliv_data(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = _make_pliv_data(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_pliv_CHS2015_return_types():
-    np.random.seed(3141)
-    res = make_pliv_CHS2015(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_pliv_CHS2015(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, z = make_pliv_CHS2015(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_pliv_CHS2015(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_pliv_multiway_cluster_CKMS2021_return_types():
-    np.random.seed(3141)
-    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(cluster_vars, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="matrix")
-
-
-@pytest.fixture(scope="function", params=[True, False])
-def linear(request):
-    return request.param
-
-
-@pytest.mark.ci
-def test_make_confounded_irm_data_return_types(linear):
-    np.random.seed(3141)
-    res = make_confounded_irm_data(linear=linear)
-    assert isinstance(res, dict)
-    assert isinstance(res["x"], np.ndarray)
-    assert isinstance(res["y"], np.ndarray)
-    assert isinstance(res["d"], np.ndarray)
-
-    assert isinstance(res["oracle_values"], dict)
-    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
-    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
-    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
-    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
-    assert isinstance(res["oracle_values"]["gamma_a"], float)
-    assert isinstance(res["oracle_values"]["beta_a"], float)
-    assert isinstance(res["oracle_values"]["a"], np.ndarray)
-    assert isinstance(res["oracle_values"]["y_0"], np.ndarray)
-    assert isinstance(res["oracle_values"]["y_1"], np.ndarray)
-    assert isinstance(res["oracle_values"]["z"], np.ndarray)
-    assert isinstance(res["oracle_values"]["cf_y"], float)
-    assert isinstance(res["oracle_values"]["cf_d_ate"], float)
-    assert isinstance(res["oracle_values"]["cf_d_atte"], float)
-    assert isinstance(res["oracle_values"]["rho_ate"], float)
-    assert isinstance(res["oracle_values"]["rho_atte"], float)
-
-
-@pytest.mark.ci
-def test_make_confounded_plr_data_return_types():
-    np.random.seed(3141)
-    res = make_confounded_plr_data(theta=5.0)
-    assert isinstance(res, dict)
-    assert isinstance(res["x"], np.ndarray)
-    assert isinstance(res["y"], np.ndarray)
-    assert isinstance(res["d"], np.ndarray)
-
-    assert isinstance(res["oracle_values"], dict)
-    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
-    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
-    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
-    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
-    assert isinstance(res["oracle_values"]["theta"], float)
-    assert isinstance(res["oracle_values"]["gamma_a"], float)
-    assert isinstance(res["oracle_values"]["beta_a"], float)
-    assert isinstance(res["oracle_values"]["a"], np.ndarray)
-    assert isinstance(res["oracle_values"]["z"], np.ndarray)
-
-
-@pytest.fixture(scope="function", params=[False, True])
-def binary_treatment(request):
-    return request.param
-
-
-@pytest.fixture(scope="function", params=[1, 2])
-def n_x(request):
-    return request.param
-
-
-@pytest.mark.ci
-def test_make_heterogeneous_data_return_types(binary_treatment, n_x):
-    np.random.seed(3141)
-    res = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=binary_treatment)
-    assert isinstance(res, dict)
-    assert isinstance(res["data"], pd.DataFrame)
-    assert isinstance(res["effects"], np.ndarray)
-    assert callable(res["treatment_effect"])
-
-    # test input checks
-    msg = "n_x must be either 1 or 2."
-    with pytest.raises(AssertionError, match=msg):
-        _ = make_heterogeneous_data(n_obs=100, n_x=0, binary_treatment=binary_treatment)
-    msg = "support_size must be smaller than p."
-    with pytest.raises(AssertionError, match=msg):
-        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, support_size=31, binary_treatment=binary_treatment)
-    msg = "binary_treatment must be a boolean."
-    with pytest.raises(AssertionError, match=msg):
-        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=2)
-
-
-@pytest.mark.ci
-def test_make_ssm_data_return_types():
-    np.random.seed(3141)
-    res = make_ssm_data(n_obs=100)
-    assert isinstance(res, DoubleMLData)
-    res = make_ssm_data(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, z, s = make_ssm_data(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    assert isinstance(s, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_ssm_data(n_obs=100, return_type="matrix")
-
-
-@pytest.fixture(scope="function", params=[3, 5])
-def n_levels(request):
-    return request.param
-
-
-def test_make_data_discrete_treatments(n_levels):
-    np.random.seed(3141)
-    n = 100
-    data_apo = make_irm_data_discrete_treatments(n_obs=n, n_levels=3)
-    assert isinstance(data_apo, dict)
-    assert isinstance(data_apo["y"], np.ndarray)
-    assert isinstance(data_apo["d"], np.ndarray)
-    assert isinstance(data_apo["x"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"], dict)
-
-    assert isinstance(data_apo["oracle_values"]["cont_d"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"]["level_bounds"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"]["potential_level"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"]["ite"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"]["y0"], np.ndarray)
-
-    msg = "n_levels must be at least 2."
-    with pytest.raises(ValueError, match=msg):
-        _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1)
-
-    msg = "n_levels must be an integer."
-    with pytest.raises(ValueError, match=msg):
-        _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1.1)
-
-
-@pytest.mark.ci
-def test_make_lplr_LZZ2020_return_types():
-    np.random.seed(3141)
-    res = make_lplr_LZZ2020(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_lplr_LZZ2020(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, z = make_lplr_LZZ2020(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_lplr_LZZ2020(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_lplr_LZZ2020_variants():
-    np.random.seed(3141)
-    res = make_lplr_LZZ2020(n_obs=100, treatment="binary")
-    assert np.array_equal(np.unique(res.d), np.array([0, 1]))
-    res = make_lplr_LZZ2020(n_obs=100, treatment="binary_unbalanced")
-    assert np.array_equal(np.unique(res.d), np.array([0, 1]))
-    res = make_lplr_LZZ2020(n_obs=100, treatment="continuous")
-    assert len(np.unique(res.d)) == 100
-
-    msg = "Invalid treatment type."
-    with pytest.raises(ValueError, match=msg):
-        _ = make_lplr_LZZ2020(n_obs=100, treatment="colors")
-
-    res = make_lplr_LZZ2020(n_obs=100, balanced_r0=False)
-    _, y_unique = np.unique(res.y, return_counts=True)
-    assert np.abs(y_unique[0] - y_unique[1]) > 10
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index e725a562e..13a1fdc89 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -15,6 +15,7 @@
     DoubleMLDIDData,
     DoubleMLIIVM,
     DoubleMLIRM,
+    DoubleMLLPLR,
     DoubleMLLPQ,
     DoubleMLPLIV,
     DoubleMLPLR,
@@ -23,7 +24,12 @@
 )
 from doubleml.did.datasets import make_did_SZ2020
 from doubleml.irm.datasets import make_iivm_data, make_irm_data
-from doubleml.plm.datasets import make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
+from doubleml.plm.datasets import (
+    make_lplr_LZZ2020,
+    make_pliv_CHS2015,
+    make_pliv_multiway_cluster_CKMS2021,
+    make_plr_CCDDHNR2018,
+)
 
 from ._utils import DummyDataClass
 
@@ -796,6 +802,26 @@ def test_doubleml_exception_smpls():
         _ = dml_pliv_cluster.set_sample_splitting(all_smpls=dml_pliv_cluster.smpls, all_smpls_cluster=all_smpls_cluster)
 
 
+@pytest.mark.ci
+def test_doubleml_exception_smpls_inner():
+    dml_plr_no_inner = DoubleMLPLR(dml_data, ml_l, ml_m)
+    msg = "smpls_inner is only available for double sample splitting."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_plr_no_inner.smpls_inner
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_plr_no_inner._DoubleML__smpls__inner
+
+    dml_data_lplr = make_lplr_LZZ2020()
+    ml_M = LogisticRegression()
+    dml_plr_inner_no_smpls = DoubleMLLPLR(dml_data_lplr, ml_M, ml_m, ml_m, draw_sample_splitting=False)
+    msg = (
+        "Sample splitting not specified. "
+        r"Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\)."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_plr_inner_no_smpls.smpls_inner
+
+
 @pytest.mark.ci
 def test_doubleml_exception_fit():
     msg = "The number of CPUs used to fit the learners must be of int type. 5 of type <class 'str'> was passed."
diff --git a/doubleml/tests/test_set_sample_splitting.py b/doubleml/tests/test_set_sample_splitting.py
index fa0a43945..fd44289b2 100644
--- a/doubleml/tests/test_set_sample_splitting.py
+++ b/doubleml/tests/test_set_sample_splitting.py
@@ -276,3 +276,11 @@ def test_doubleml_set_sample_splitting_shuffled_indices():
     # Since predictions are stored by observation index, they should be identical
     np.testing.assert_allclose(sorted_preds_l, shuffled_preds_l, rtol=1e-10)
     np.testing.assert_allclose(sorted_preds_m, shuffled_preds_m, rtol=1e-10)
+
+
+@pytest.mark.ci
+def test_doubleml_exceptions_double_sample_splitting():
+    smpls = (np.arange(n_obs), np.arange(n_obs))
+    msg = "set_sample_splitting not supported for double sample splitting."
+    with pytest.raises(ValueError, match=msg):
+        dml_plr.set_sample_splitting(smpls)

From 74b1caaf74f0b5f38979151970cbfe608b83ea5b Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Wed, 12 Nov 2025 13:46:58 -0800
Subject: [PATCH 40/48] Sample splitting exceptions

---
 doubleml/tests/test_set_sample_splitting.py   |  8 -------
 .../test_set_sample_splitting_exceptions.py   | 23 +++++++++++++++++++
 2 files changed, 23 insertions(+), 8 deletions(-)
 create mode 100644 doubleml/tests/test_set_sample_splitting_exceptions.py

diff --git a/doubleml/tests/test_set_sample_splitting.py b/doubleml/tests/test_set_sample_splitting.py
index fd44289b2..fa0a43945 100644
--- a/doubleml/tests/test_set_sample_splitting.py
+++ b/doubleml/tests/test_set_sample_splitting.py
@@ -276,11 +276,3 @@ def test_doubleml_set_sample_splitting_shuffled_indices():
     # Since predictions are stored by observation index, they should be identical
     np.testing.assert_allclose(sorted_preds_l, shuffled_preds_l, rtol=1e-10)
     np.testing.assert_allclose(sorted_preds_m, shuffled_preds_m, rtol=1e-10)
-
-
-@pytest.mark.ci
-def test_doubleml_exceptions_double_sample_splitting():
-    smpls = (np.arange(n_obs), np.arange(n_obs))
-    msg = "set_sample_splitting not supported for double sample splitting."
-    with pytest.raises(ValueError, match=msg):
-        dml_plr.set_sample_splitting(smpls)
diff --git a/doubleml/tests/test_set_sample_splitting_exceptions.py b/doubleml/tests/test_set_sample_splitting_exceptions.py
new file mode 100644
index 000000000..b58f6bdda
--- /dev/null
+++ b/doubleml/tests/test_set_sample_splitting_exceptions.py
@@ -0,0 +1,23 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import Lasso, LogisticRegression
+
+from doubleml import DoubleMLLPLR
+from doubleml.plm.datasets import make_lplr_LZZ2020
+
+np.random.seed(3141)
+
+dml_data_lplr = make_lplr_LZZ2020(n_obs=10)
+n_obs = dml_data_lplr.n_obs
+ml_M = LogisticRegression()
+ml_t = Lasso()
+ml_m = Lasso()
+dml_lplr = DoubleMLLPLR(dml_data_lplr, ml_M, ml_t, ml_m, n_folds=7, n_rep=8, draw_sample_splitting=False)
+
+
+@pytest.mark.ci
+def test_doubleml_exceptions_double_sample_splitting():
+    smpls = (np.arange(n_obs), np.arange(n_obs))
+    msg = "set_sample_splitting not supported for double sample splitting."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.set_sample_splitting(smpls)

From 72be054067b98a85048b36266164cd0b7df53099 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Wed, 12 Nov 2025 15:25:39 -0800
Subject: [PATCH 41/48] Test coverage increase

---
 doubleml/double_ml.py                                  | 2 +-
 doubleml/plm/lplr.py                                   | 8 --------
 doubleml/tests/test_set_sample_splitting_exceptions.py | 5 +++++
 doubleml/utils/_estimation.py                          | 8 ++------
 4 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 638cb31ec..d084b4561 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -526,7 +526,7 @@ def __smpls(self):
 
     @property
     def __smpls__inner(self):
-        if not self.smpls_inner[self._i_rep]:
+        if self._smpls_inner is None:
             raise ValueError("smpls_inner is only available for double sample splitting.")
         return self._smpls_inner[self._i_rep]
 
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index e7ff1c35f..f452e02d4 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -7,7 +7,6 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
-from doubleml import DoubleMLData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_score_mixins import NonLinearScoreMixin
 from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
@@ -166,13 +165,8 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in self._learner}
 
     def _check_data(self, obj_dml_data):
-        if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError(
-                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
-            )
         if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
             raise TypeError("The outcome variable y must be binary with values 0 and 1.")
-        return
 
     def _double_dml_cv_predict(
         self,
@@ -202,7 +196,6 @@ def _double_dml_cv_predict(
                 est_params=est_params,
                 method=method,
                 return_models=True,
-                smpls_is_partition_manual_set=True,
                 sample_weights=sample_weights,
             )
             _check_finite_predictions(res_inner["preds"], estimator, estimator_name, smpls_double_split)
@@ -447,7 +440,6 @@ def _nuisance_tuning(
     ):
         if self._i_rep is None:
             raise ValueError("tune_on_folds must be True as targets have to be created for ml_t on folds.")
-        # TODO: test
         x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
         x_d_concat = np.hstack((d.reshape(-1, 1), x))
diff --git a/doubleml/tests/test_set_sample_splitting_exceptions.py b/doubleml/tests/test_set_sample_splitting_exceptions.py
index b58f6bdda..eba513ca3 100644
--- a/doubleml/tests/test_set_sample_splitting_exceptions.py
+++ b/doubleml/tests/test_set_sample_splitting_exceptions.py
@@ -21,3 +21,8 @@ def test_doubleml_exceptions_double_sample_splitting():
     msg = "set_sample_splitting not supported for double sample splitting."
     with pytest.raises(ValueError, match=msg):
         dml_lplr.set_sample_splitting(smpls)
+
+    dml_lplr._is_cluster_data = True
+    msg = "Cluster data not supported for double sample splitting."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.draw_sample_splitting()
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index d4e8abc4d..b79c7618a 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -56,15 +56,11 @@ def _dml_cv_predict(
     method="predict",
     return_train_preds=False,
     return_models=False,
-    smpls_is_partition_manual_set=None,
     sample_weights=None,
 ):
     n_obs = x.shape[0]
 
-    if smpls_is_partition_manual_set is None:
-        smpls_is_partition = _check_is_partition(smpls, n_obs)
-    else:
-        smpls_is_partition = smpls_is_partition_manual_set
+    smpls_is_partition = _check_is_partition(smpls, n_obs)
     fold_specific_params = (est_params is not None) & (not isinstance(est_params, dict))
     fold_specific_target = isinstance(y, list)
     manual_cv_predict = (
@@ -108,7 +104,7 @@ def _dml_cv_predict(
     else:
         if not smpls_is_partition:
             assert not fold_specific_target, "combination of fold-specific y and no cross-fitting not implemented yet"
-            assert len(smpls) == 1
+            # assert len(smpls) == 1
 
         if method == "predict_proba":
             assert not fold_specific_target  # fold_specific_target only needed for PLIV.partialXZ

From 5d9e0eb7b3f43d2f1fbfa0f1e966d3b5fe471b0f Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Wed, 12 Nov 2025 16:20:32 -0800
Subject: [PATCH 42/48] Exception fixed

---
 doubleml/double_ml.py             | 8 +++++++-
 doubleml/tests/test_exceptions.py | 6 ++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index d084b4561..4e11b13c8 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -526,8 +526,14 @@ def __smpls(self):
 
     @property
     def __smpls__inner(self):
-        if self._smpls_inner is None:
+        if not self._double_sample_splitting:
             raise ValueError("smpls_inner is only available for double sample splitting.")
+        if self._smpls_inner is None:
+            err_msg = (
+                "Sample splitting not specified. Either draw samples via .draw_sample splitting() "
+                + "or set external samples via .set_sample_splitting()."
+            )
+            raise ValueError(err_msg)
         return self._smpls_inner[self._i_rep]
 
     @property
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index f4a267170..4fca5318b 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -706,13 +706,15 @@ def test_doubleml_exception_smpls_inner():
 
     dml_data_lplr = make_lplr_LZZ2020()
     ml_M = LogisticRegression()
-    dml_plr_inner_no_smpls = DoubleMLLPLR(dml_data_lplr, ml_M, ml_m, ml_m, draw_sample_splitting=False)
+    dml_lplr_inner_no_smpls = DoubleMLLPLR(dml_data_lplr, ml_M, ml_m, ml_m, draw_sample_splitting=False)
     msg = (
         "Sample splitting not specified. "
         r"Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = dml_plr_inner_no_smpls.smpls_inner
+        _ = dml_lplr_inner_no_smpls.smpls_inner
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_lplr_inner_no_smpls._DoubleML__smpls__inner
 
 
 @pytest.mark.ci

From 99e78bf77015bff71a9bf90f62b2ecb66c0cf01c Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Thu, 13 Nov 2025 19:56:57 -0800
Subject: [PATCH 43/48] PR Review

---
 doubleml/double_ml.py                      |  17 +--
 doubleml/plm/lplr.py                       | 114 +++++----------------
 doubleml/plm/tests/test_lplr.py            |   9 +-
 doubleml/plm/tests/test_lplr_exceptions.py |   6 +-
 doubleml/plm/tests/test_lplr_tune.py       |  27 +----
 doubleml/plm/tests/test_model_defaults.py  |  51 +++++++++
 doubleml/plm/tests/test_return_types.py    |  73 +++++++++++++
 doubleml/utils/_check_defaults.py          |   3 +-
 doubleml/utils/_check_return_types.py      |   4 +-
 doubleml/utils/_estimation.py              |  46 ++++++++-
 10 files changed, 212 insertions(+), 138 deletions(-)
 create mode 100644 doubleml/plm/tests/test_model_defaults.py
 create mode 100644 doubleml/plm/tests/test_return_types.py

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 4e11b13c8..df35bcb5e 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -258,13 +258,6 @@ def learner(self):
         """
         return self._learner
 
-    @property
-    def predictions_names(self):
-        """
-        The names of predictions for the nuisance functions.
-        """
-        return list(self.params_names)
-
     @property
     def learner_names(self):
         """
@@ -1088,7 +1081,7 @@ def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_m
             _check_external_predictions(
                 external_predictions=external_predictions,
                 valid_treatments=self._dml_data.d_cols,
-                valid_learners=self.predictions_names,
+                valid_learners=self.params_names,
                 n_obs=self.n_obs,
                 n_rep=self.n_rep,
             )
@@ -1111,7 +1104,7 @@ def _initalize_fit(self, store_predictions, store_models):
     def _fit_nuisance_and_score_elements(self, n_jobs_cv, store_predictions, external_predictions, store_models):
         ext_prediction_dict = _set_external_predictions(
             external_predictions,
-            learners=self.predictions_names,
+            learners=self.params_names,
             treatment=self._dml_data.d_cols[self._i_treat],
             i_rep=self._i_rep,
         )
@@ -1178,8 +1171,8 @@ def _initialize_arrays(self):
         self._all_se = np.full((n_thetas, n_rep), np.nan)
 
     def _initialize_predictions_and_targets(self):
-        self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.predictions_names}
-        self._nuisance_targets = {learner: np.full(self._score_dim, np.nan) for learner in self.predictions_names}
+        self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names}
+        self._nuisance_targets = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names}
 
     def _initialize_nuisance_loss(self):
         self._nuisance_loss = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names}
@@ -1190,7 +1183,7 @@ def _initialize_models(self):
         }
 
     def _store_predictions_and_targets(self, preds, targets):
-        for learner in self.predictions_names:
+        for learner in self.params_names:
             self._predictions[learner][:, self._i_rep, self._i_treat] = preds[learner]
             self._nuisance_targets[learner][:, self._i_rep, self._i_treat] = targets[learner]
 
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index f452e02d4..c3f6d5b56 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -13,6 +13,7 @@
 from doubleml.utils._estimation import (
     _dml_cv_predict,
     _dml_tune,
+    _double_dml_cv_predict,
 )
 
 
@@ -104,10 +105,6 @@ def __init__(
 
         ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=True, classifier=True)
         self._learner = {"ml_m": ml_m, "ml_t": ml_t, "ml_M": ml_M}
-        # replace aggregated inner names with per-inner-fold names
-        inner_M_names = [f"ml_M_inner_{i}" for i in range(self.n_folds_inner)]
-        inner_a_names = [f"ml_a_inner_{i}" for i in range(self.n_folds_inner)]
-        self._predictions_names = ["ml_r", "ml_m", "ml_a", "ml_t", "ml_M"] + inner_M_names + inner_a_names
 
         if ml_a is not None:
             ml_a_is_classifier = self._check_learner(ml_a, "ml_a", regressor=True, classifier=True)
@@ -162,56 +159,15 @@ def __init__(
         self._sensitivity_implemented = False
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in self._learner}
+        inner_M_names = [f"ml_M_inner_{i}" for i in range(self.n_folds)]
+        inner_a_names = [f"ml_a_inner_{i}" for i in range(self.n_folds)]
+        params_names = ["ml_m", "ml_a", "ml_t", "ml_M"] + inner_M_names + inner_a_names
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in params_names}
 
     def _check_data(self, obj_dml_data):
         if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
             raise TypeError("The outcome variable y must be binary with values 0 and 1.")
 
-    def _double_dml_cv_predict(
-        self,
-        estimator,
-        estimator_name,
-        x,
-        y,
-        smpls=None,
-        smpls_inner=None,
-        n_jobs=None,
-        est_params=None,
-        method="predict",
-        sample_weights=None,
-    ):
-        res = {}
-        res["preds"] = np.zeros(y.shape, dtype=float)
-        res["preds_inner"] = []
-        res["targets_inner"] = []
-        res["models"] = []
-        for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
-            res_inner = _dml_cv_predict(
-                estimator,
-                x,
-                y,
-                smpls=smpls_double_split,
-                n_jobs=n_jobs,
-                est_params=est_params,
-                method=method,
-                return_models=True,
-                sample_weights=sample_weights,
-            )
-            _check_finite_predictions(res_inner["preds"], estimator, estimator_name, smpls_double_split)
-
-            res["preds_inner"].append(res_inner["preds"])
-            res["targets_inner"].append(res_inner["targets"])
-            for model in res_inner["models"]:
-                res["models"].append(model)
-                if method == "predict_proba":
-                    res["preds"][smpls_single_split[1]] += model.predict_proba(x[smpls_single_split[1]])[:, 1]
-                else:
-                    res["preds"][smpls_single_split[1]] += model.predict(x[smpls_single_split[1]])
-        res["preds"] /= len(smpls)
-        res["targets"] = np.copy(y)
-        return res
-
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
@@ -234,9 +190,14 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     f"have to be provided (missing: {', '.join([str(i) for i in missing])})."
                 )
             M_hat_inner = [external_predictions[f"ml_M_inner_{i}"] for i in range(self.n_folds_inner)]
-            M_hat = {"preds": external_predictions["ml_M"], "preds_inner": M_hat_inner, "targets": None, "models": None}
+            M_hat = {
+                "preds": external_predictions["ml_M"],
+                "preds_inner": M_hat_inner,
+                "targets": self._dml_data.y,
+                "models": None,
+            }
         else:
-            M_hat = self._double_dml_cv_predict(
+            M_hat = _double_dml_cv_predict(
                 self._learner["ml_M"],
                 "ml_M",
                 x_d_concat,
@@ -250,7 +211,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         # nuisance m
         if m_external:
-            m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
+            m_hat = {"preds": external_predictions["ml_m"], "targets": self._dml_data.d, "models": None}
         else:
             if self.score == "instrument":
                 weights = M_hat["preds"] * (1 - M_hat["preds"])
@@ -303,9 +264,14 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     f"have to be provided (missing: {', '.join([str(i) for i in missing])})."
                 )
             a_hat_inner = [external_predictions[f"ml_a_inner_{i}"] for i in range(self.n_folds_inner)]
-            a_hat = {"preds": external_predictions["ml_a"], "preds_inner": a_hat_inner, "targets": None, "models": None}
+            a_hat = {
+                "preds": external_predictions["ml_a"],
+                "preds_inner": a_hat_inner,
+                "targets": self._dml_data.d,
+                "models": None,
+            }
         else:
-            a_hat = self._double_dml_cv_predict(
+            a_hat = _double_dml_cv_predict(
                 self._learner["ml_a"],
                 "ml_a",
                 x,
@@ -404,13 +370,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         return psi_elements, preds
 
-    @property
-    def predictions_names(self):
-        """
-        The names of predictions for the nuisance functions.
-        """
-        return self._predictions_names
-
     def _score_elements(self, y, d, r_hat, m_hat):
         # compute residual
         d_tilde = d - m_hat
@@ -438,8 +397,6 @@ def _sensitivity_element_est(self, preds):
     def _nuisance_tuning(
         self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
     ):
-        if self._i_rep is None:
-            raise ValueError("tune_on_folds must be True as targets have to be created for ml_t on folds.")
         x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
         x_d_concat = np.hstack((d.reshape(-1, 1), x))
@@ -500,34 +457,16 @@ def _nuisance_tuning(
         a_best_params = [xx.best_params_ for xx in a_tune_res]
 
         # Create targets for tuning ml_t
-        M_hat = self._double_dml_cv_predict(
-            self._learner["ml_M"],
-            "ml_M",
-            x_d_concat,
-            y,
-            smpls=smpls,
-            smpls_inner=self._DoubleML__smpls__inner,
-            n_jobs=n_jobs_cv,
-            est_params=M_best_params,
-            method=self._predict_method["ml_M"],
-        )
 
-        W_inner = []
-        for i, (train, _) in enumerate(smpls):
-            M_iteration = M_hat["preds_inner"][i][train]
-            M_iteration = np.clip(M_iteration, 1e-8, 1 - 1e-8)
-            w = scipy.special.logit(M_iteration)
-            W_inner.append(w)
+        M_hat = np.full_like(y, np.nan)
+        for idx, (train_index, _) in enumerate(smpls):
+            M_hat[train_index] = M_tune_res[idx].predict_proba(x_d_concat[train_index, :])[:, 1]
 
-        # Reshape W_inner into full-length arrays per fold: fill train indices, others are NaN
-        W_targets = []
-        for i, train in enumerate(train_inds):
-            wt = np.full(x.shape[0], np.nan, dtype=float)
-            wt[train] = W_inner[i]
-            W_targets.append(wt)
+        M_hat = np.clip(M_hat, 1e-8, 1 - 1e-8)
+        W_hat = scipy.special.logit(M_hat)
 
         t_tune_res = _dml_tune(
-            W_inner,
+            W_hat,
             x,
             train_inds,
             self._learner["ml_t"],
@@ -537,7 +476,6 @@ def _nuisance_tuning(
             n_jobs_cv,
             search_mode,
             n_iter_randomized_search,
-            fold_specific_target=True,
         )
         t_best_params = [xx.best_params_ for xx in t_tune_res]
 
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
index abd7adf55..6ddbba6bc 100644
--- a/doubleml/plm/tests/test_lplr.py
+++ b/doubleml/plm/tests/test_lplr.py
@@ -7,22 +7,22 @@
 from doubleml.plm.datasets import make_lplr_LZZ2020
 
 
-@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42, max_depth=2, n_estimators=10)])
 def learner_M(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42, max_depth=2, n_estimators=10)])
 def learner_t(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42, max_depth=2, n_estimators=10)])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42, max_depth=2, n_estimators=10)])
 def learner_m_classifier(request):
     return request.param
 
@@ -33,7 +33,6 @@ def score(request):
 
 
 @pytest.fixture(scope="module", params=["continuous", "binary", "binary_unbalanced"])
-# TODO: Error for continuous treatment?
 def treatment(request):
     return request.param
 
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
index f01cd8855..32a8103e9 100644
--- a/doubleml/plm/tests/test_lplr_exceptions.py
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -14,9 +14,9 @@
 # create test data and basic learners
 dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=n, dim_x=20)
 dml_data_binary = make_lplr_LZZ2020(alpha=0.5, n_obs=n, treatment="binary", dim_x=20)
-ml_M = RandomForestClassifier()
-ml_t = RandomForestRegressor()
-ml_m = RandomForestRegressor()
+ml_M = RandomForestClassifier(max_depth=2, n_estimators=10)
+ml_t = RandomForestRegressor(max_depth=2, n_estimators=10)
+ml_m = RandomForestRegressor(max_depth=2, n_estimators=10)
 dml_lplr = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m)
 dml_lplr_instrument = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score="instrument")
 
diff --git a/doubleml/plm/tests/test_lplr_tune.py b/doubleml/plm/tests/test_lplr_tune.py
index 7c7c4aebb..78ad050ee 100644
--- a/doubleml/plm/tests/test_lplr_tune.py
+++ b/doubleml/plm/tests/test_lplr_tune.py
@@ -44,7 +44,7 @@ def dml_lplr_fixture(
     learner_m,
     learner_a,
     score,
-    tune_on_folds=True,
+    tune_on_folds=False,
 ):
     par_grid = {
         "ml_M": get_par_grid(),
@@ -94,28 +94,3 @@ def test_dml_selection_coef(dml_lplr_fixture):
     se = dml_lplr_fixture["se"]
     true_coef = dml_lplr_fixture["true_coef"]
     assert abs(coef - true_coef) <= 3.0 * np.sqrt(se)
-
-
-@pytest.mark.ci
-def test_lplr_exception_tuning(
-    learner_M,
-    learner_t,
-    learner_m,
-    learner_a,
-):
-    # LPLR valid scores are 'nuisance_space' and 'instrument'
-    obj_dml_data = make_lplr_LZZ2020(alpha=0.5)
-    ml_M = clone(learner_M)
-    ml_t = clone(learner_t)
-    ml_m = clone(learner_m)
-
-    dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
-    par_grid = {
-        "ml_M": get_par_grid(),
-        "ml_t": get_par_grid(),
-        "ml_m": get_par_grid(),
-        "ml_a": get_par_grid(),
-    }
-    msg = "tune_on_folds must be True as targets have to be created for ml_t on folds."
-    with pytest.raises(ValueError, match=msg):
-        dml_lplr_obj.tune(par_grid, tune_on_folds=False)
diff --git a/doubleml/plm/tests/test_model_defaults.py b/doubleml/plm/tests/test_model_defaults.py
new file mode 100644
index 000000000..3e9bc430d
--- /dev/null
+++ b/doubleml/plm/tests/test_model_defaults.py
@@ -0,0 +1,51 @@
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml import DoubleMLLPLR
+from doubleml.plm.datasets import make_lplr_LZZ2020
+from doubleml.utils._check_defaults import _check_basic_defaults_after_fit, _check_basic_defaults_before_fit, _fit_bootstrap
+
+dml_data_lplr = make_lplr_LZZ2020(n_obs=100)
+
+dml_lplr_obj = DoubleMLLPLR(dml_data_lplr, LogisticRegression(), LinearRegression(), LinearRegression())
+
+
+@pytest.mark.ci
+def test_lplr_defaults():
+    _check_basic_defaults_before_fit(dml_lplr_obj)
+
+    _fit_bootstrap(dml_lplr_obj)
+
+    _check_basic_defaults_after_fit(dml_lplr_obj)
+
+
+@pytest.mark.ci
+def test_did_multi_str():
+    # Test the string representation before fitting
+    dml_str = str(dml_lplr_obj)
+
+    # Check that all important sections are present
+    assert "================== DoubleMLLPLR Object ==================" in dml_str
+    assert "------------------ Data Summary      ------------------" in dml_str
+    assert "------------------ Score & Algorithm ------------------" in dml_str
+    assert "------------------ Machine Learner   ------------------" in dml_str
+    assert "------------------ Resampling        ------------------" in dml_str
+    assert "------------------ Fit Summary       ------------------" in dml_str
+
+    # Check specific content before fitting
+    assert "No. folds: 5" in dml_str
+    assert "No. repeated sample splits: 1" in dml_str
+    assert "Learner ml_M:" in dml_str
+    assert "Learner ml_m:" in dml_str
+    assert "Learner ml_t:" in dml_str
+
+    # Fit the model
+    dml_lplr_obj_fit = dml_lplr_obj.fit()
+    dml_str_after_fit = str(dml_lplr_obj_fit)
+
+    # Check that additional information is present after fitting
+    assert "coef" in dml_str_after_fit
+    assert "std err" in dml_str_after_fit
+    assert "t" in dml_str_after_fit
+    assert "P>|t|" in dml_str_after_fit
+    assert "Out-of-sample Performance:" in dml_str_after_fit
diff --git a/doubleml/plm/tests/test_return_types.py b/doubleml/plm/tests/test_return_types.py
new file mode 100644
index 000000000..cb32f5433
--- /dev/null
+++ b/doubleml/plm/tests/test_return_types.py
@@ -0,0 +1,73 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml import DoubleMLLPLR
+from doubleml.plm.datasets import make_lplr_LZZ2020
+from doubleml.utils._check_return_types import (
+    check_basic_predictions_and_targets,
+    check_basic_property_types_and_shapes,
+    check_basic_return_types,
+    check_sensitivity_return_types,
+)
+
+# Test constants
+N_OBS = 200
+N_TREAT = 1
+N_REP = 1
+N_FOLDS = 3
+N_REP_BOOT = 314
+
+dml_args = {
+    "n_rep": N_REP,
+    "n_folds": N_FOLDS,
+}
+
+
+# create all datasets
+np.random.seed(3141)
+datasets = {}
+
+
+datasets["lplr"] = make_lplr_LZZ2020(n_obs=N_OBS)
+datasets["lplr_binary"] = make_lplr_LZZ2020(n_obs=N_OBS, treatment="binary")
+
+dml_lplr_obj = DoubleMLLPLR(datasets["lplr"], LogisticRegression(), LinearRegression(), LinearRegression(), **dml_args)
+dml_lplr_obj_binary = DoubleMLLPLR(
+    datasets["lplr_binary"], LogisticRegression(), LinearRegression(), LogisticRegression(), **dml_args
+)
+
+dml_objs = [
+    (dml_lplr_obj, DoubleMLLPLR),
+    (dml_lplr_obj_binary, DoubleMLLPLR),
+]
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("dml_obj, cls", dml_objs)
+def test_return_types(dml_obj, cls):
+    check_basic_return_types(dml_obj, cls)
+
+    # further return type tests
+    assert isinstance(dml_obj.get_params("ml_m"), dict)
+
+
+@pytest.fixture(params=dml_objs)
+def fitted_dml_obj(request):
+    dml_obj, _ = request.param
+    dml_obj.fit()
+    dml_obj.bootstrap(n_rep_boot=N_REP_BOOT)
+    return dml_obj
+
+
+@pytest.mark.ci
+def test_property_types_and_shapes(fitted_dml_obj):
+    check_basic_property_types_and_shapes(fitted_dml_obj, N_OBS, N_TREAT, N_REP, N_FOLDS, N_REP_BOOT)
+    check_basic_predictions_and_targets(fitted_dml_obj, N_OBS, N_TREAT, N_REP)
+
+
+@pytest.mark.ci
+def test_sensitivity_return_types(fitted_dml_obj):
+    if fitted_dml_obj._sensitivity_implemented:
+        benchmarking_set = [fitted_dml_obj._dml_data.x_cols[0]]
+        check_sensitivity_return_types(fitted_dml_obj, N_OBS, N_REP, N_TREAT, benchmarking_set=benchmarking_set)
diff --git a/doubleml/utils/_check_defaults.py b/doubleml/utils/_check_defaults.py
index 5f376000a..d374ff313 100644
--- a/doubleml/utils/_check_defaults.py
+++ b/doubleml/utils/_check_defaults.py
@@ -47,7 +47,8 @@ def _check_basic_defaults_after_fit(dml_obj):
 
     # sensitivity
     assert dml_obj.sensitivity_params is None
-    assert isinstance(dml_obj.sensitivity_elements, dict)
+    if dml_obj._sensitivity_implemented:
+        assert isinstance(dml_obj.sensitivity_elements, dict)
 
     # fit method
     if isinstance(dml_obj, DoubleML):
diff --git a/doubleml/utils/_check_return_types.py b/doubleml/utils/_check_return_types.py
index b73e2e04e..633eb1c67 100644
--- a/doubleml/utils/_check_return_types.py
+++ b/doubleml/utils/_check_return_types.py
@@ -11,9 +11,9 @@ def check_basic_return_types(dml_obj, cls):
     assert isinstance(dml_obj.__str__(), str)
     assert isinstance(dml_obj.summary, pd.DataFrame)
     assert isinstance(dml_obj.draw_sample_splitting(), cls)
-    if not dml_obj._is_cluster_data:
+    if not dml_obj._is_cluster_data and not hasattr(dml_obj, "n_folds_inner"):
         assert isinstance(dml_obj.set_sample_splitting(dml_obj.smpls), cls)
-    else:
+    elif dml_obj._is_cluster_data:
         assert dml_obj._dml_data.is_cluster_data
     assert isinstance(dml_obj.fit(), cls)
     assert isinstance(dml_obj.__str__(), str)  # called again after fit, now with numbers
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index b79c7618a..d548ca14a 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -9,7 +9,7 @@
 from sklearn.preprocessing import LabelEncoder
 from statsmodels.nonparametric.kde import KDEUnivariate
 
-from ._checks import _check_is_partition
+from ._checks import _check_finite_predictions, _check_is_partition
 
 
 def _assure_2d_array(x):
@@ -187,6 +187,50 @@ def _dml_cv_predict(
     return res
 
 
+def _double_dml_cv_predict(
+    estimator,
+    estimator_name,
+    x,
+    y,
+    smpls=None,
+    smpls_inner=None,
+    n_jobs=None,
+    est_params=None,
+    method="predict",
+    sample_weights=None,
+):
+    res = {}
+    res["preds"] = np.zeros(y.shape, dtype=float)
+    res["preds_inner"] = []
+    res["targets_inner"] = []
+    res["models"] = []
+    for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
+        res_inner = _dml_cv_predict(
+            estimator,
+            x,
+            y,
+            smpls=smpls_double_split,
+            n_jobs=n_jobs,
+            est_params=est_params,
+            method=method,
+            return_models=True,
+            sample_weights=sample_weights,
+        )
+        _check_finite_predictions(res_inner["preds"], estimator, estimator_name, smpls_double_split)
+
+        res["preds_inner"].append(res_inner["preds"])
+        res["targets_inner"].append(res_inner["targets"])
+        for model in res_inner["models"]:
+            res["models"].append(model)
+            if method == "predict_proba":
+                res["preds"][smpls_single_split[1]] += model.predict_proba(x[smpls_single_split[1]])[:, 1]
+            else:
+                res["preds"][smpls_single_split[1]] += model.predict(x[smpls_single_split[1]])
+    res["preds"] /= len(smpls)
+    res["targets"] = np.copy(y)
+    return res
+
+
 def _dml_tune(
     y,
     x,

From 8f7125f7794fb7e27c98454588c04c9b66465313 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Thu, 13 Nov 2025 21:10:57 -0800
Subject: [PATCH 44/48] Exceptions fixed

---
 doubleml/plm/tests/test_lplr_exceptions.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
index 32a8103e9..404770fa0 100644
--- a/doubleml/plm/tests/test_lplr_exceptions.py
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -70,7 +70,7 @@ def test_lplr_exception_resampling():
 
 @pytest.mark.ci
 def test_lplr_exception_get_params():
-    msg = "Invalid nuisance learner ml_x. Valid nuisance learner ml_m or ml_t or ml_M or ml_a."
+    msg = r"Invalid nuisance learner ml_x. Valid nuisance learner ml_m or ml_a or ml_t or ml_M.*"
     with pytest.raises(ValueError, match=msg):
         dml_lplr.get_params("ml_x")
 
@@ -147,7 +147,7 @@ def test_lplr_exception_confint():
 @pytest.mark.ci
 def test_lplr_exception_set_ml_nuisance_params():
     # invalid learner name
-    msg = "Invalid nuisance learner g. Valid nuisance learner ml_m or ml_t or ml_M or ml_a."
+    msg = "Invalid nuisance learner g. Valid nuisance learner ml_m or ml_a or ml_t or ml_M.*"
     with pytest.raises(ValueError, match=msg):
         dml_lplr.set_ml_nuisance_params("g", "d", {"alpha": 0.1})
     # invalid treatment variable
@@ -246,13 +246,13 @@ def test_lplr_exception_and_warning_learner():
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLLPLR(dml_data, Lasso(), ml_t, ml_m)
     msg = (
-        r"The ml_m learner RandomForestRegressor\(\) was identified as regressor but at least one treatment "
+        r"The ml_m learner RandomForestRegressor\(.*\) was identified as regressor but at least one treatment "
         r"variable is binary with values 0 and 1."
     )
     with pytest.warns(match=msg):
         _ = DoubleMLLPLR(dml_data_binary, ml_M, ml_t, ml_m)
     msg = (
-        r"The ml_a learner RandomForestRegressor\(\) was identified as regressor but at least one treatment "
+        r"The ml_a learner RandomForestRegressor\(.*\) was identified as regressor but at least one treatment "
         r"variable is binary with values 0 and 1."
     )
     with pytest.warns(match=msg):
@@ -314,7 +314,8 @@ def test_double_ml_exception_evaluate_learner():
         dml_lplr_obj.evaluate_learners(metric="mse")
 
     msg = (
-        r"The learners have to be a subset of \['ml_m', 'ml_t', 'ml_M', 'ml_a'\]\. " r"Learners \['ml_mu', 'ml_p'\] provided."
+        r"The learners have to be a subset of \['ml_m', 'ml_a', 'ml_t', 'ml_M'.*\]\. "
+        r"Learners \['ml_mu', 'ml_p'\] provided."
     )
     with pytest.raises(ValueError, match=msg):
         dml_lplr_obj.evaluate_learners(learners=["ml_mu", "ml_p"])

From 03fd19179bcc909d3199ddc0c3e3fe376c86b0b1 Mon Sep 17 00:00:00 2001
From: Julius Herzig <julius.herzig@uni-hamburg.de>
Date: Thu, 13 Nov 2025 22:23:07 -0800
Subject: [PATCH 45/48] Test fixed

---
 doubleml/utils/_check_defaults.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/utils/_check_defaults.py b/doubleml/utils/_check_defaults.py
index d374ff313..bb3f8ba49 100644
--- a/doubleml/utils/_check_defaults.py
+++ b/doubleml/utils/_check_defaults.py
@@ -47,7 +47,7 @@ def _check_basic_defaults_after_fit(dml_obj):
 
     # sensitivity
     assert dml_obj.sensitivity_params is None
-    if dml_obj._sensitivity_implemented:
+    if dml_obj.sensitivity_params is not None:
         assert isinstance(dml_obj.sensitivity_elements, dict)
 
     # fit method

From 33a86d0f70d7c5775ba51d897a32696bd37247eb Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 14 Nov 2025 08:53:39 +0100
Subject: [PATCH 46/48] Skip doctests for summary outputs

---
 doubleml/did/did.py                   | 2 +-
 doubleml/did/did_cs.py                | 2 +-
 doubleml/did/did_multi.py             | 2 +-
 doubleml/double_ml.py                 | 2 +-
 doubleml/irm/cvar.py                  | 2 +-
 doubleml/irm/iivm.py                  | 2 +-
 doubleml/irm/irm.py                   | 2 +-
 doubleml/irm/lpq.py                   | 2 +-
 doubleml/irm/pq.py                    | 2 +-
 doubleml/irm/qte.py                   | 2 +-
 doubleml/irm/ssm.py                   | 2 +-
 doubleml/plm/lplr.py                  | 2 +-
 doubleml/plm/pliv.py                  | 2 +-
 doubleml/plm/plr.py                   | 2 +-
 doubleml/rdd/rdd.py                   | 1 +
 doubleml/utils/_check_return_types.py | 2 +-
 16 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 87eb4aaa8..5e86d52eb 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -70,7 +70,7 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
     >>> data = make_did_SZ2020(n_obs=500, return_type='DataFrame')
     >>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd')
     >>> dml_did_obj = dml.DoubleMLDID(obj_dml_data, ml_g, ml_m)
-    >>> dml_did_obj.fit().summary
+    >>> dml_did_obj.fit().summary  # doctest: +SKIP
            coef   std err         t     P>|t|     2.5 %    97.5 %
     d -2.840718  1.760386 -1.613691  0.106595 -6.291011  0.609575
 
diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index da833fd56..f2f2b0543 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -69,7 +69,7 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
     >>> data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type='DataFrame')
     >>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd', t_col='t')
     >>> dml_did_obj = dml.DoubleMLDIDCS(obj_dml_data, ml_g, ml_m)
-    >>> dml_did_obj.fit().summary
+    >>> dml_did_obj.fit().summary  # doctest: +SKIP
          coef   std err         t     P>|t|      2.5 %    97.5 %
     d -4.9944  7.561785 -0.660479  0.508947 -19.815226  9.826426
     """
diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py
index a9e9e7908..2b7aa9064 100644
--- a/doubleml/did/did_multi.py
+++ b/doubleml/did/did_multi.py
@@ -140,7 +140,7 @@ class DoubleMLDIDMulti:
     ...     gt_combinations="standard",
     ...     control_group="never_treated",
     ... )
-    >>> print(dml_did_obj.fit().summary)
+    >>> print(dml_did_obj.fit().summary)  # doctest: +SKIP
                                       coef   std err  ...     2.5 %    97.5 %
     ATT(2025-03,2025-01,2025-02) -0.797617  0.459617  ... -1.698450  0.103215
     ATT(2025-03,2025-02,2025-03)  0.270311  0.456453  ... -0.624320  1.164941
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index df35bcb5e..3b94a2cae 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1255,7 +1255,7 @@ def evaluate_learners(self, learners=None, metric=_rmse):
         >>> def mae(y_true, y_pred):
         ...     subset = np.logical_not(np.isnan(y_true))
         ...     return mean_absolute_error(y_true[subset], y_pred[subset])
-        >>> dml_irm_obj.evaluate_learners(metric=mae)
+        >>> dml_irm_obj.evaluate_learners(metric=mae)  # doctest: +SKIP
         {'ml_g0': array([[0.88173585]]), 'ml_g1': array([[0.83854057]]), 'ml_m': array([[0.35871235]])}
         """
         # if no learners are provided try to evaluate all learners
diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py
index 64e82ad85..6c6982933 100644
--- a/doubleml/irm/cvar.py
+++ b/doubleml/irm/cvar.py
@@ -97,7 +97,7 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML):
     >>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame')
     >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
     >>> dml_cvar_obj = dml.DoubleMLCVAR(obj_dml_data, ml_g, ml_m, treatment=1, quantile=0.5)
-    >>> dml_cvar_obj.fit().summary
+    >>> dml_cvar_obj.fit().summary  # doctest: +SKIP
            coef   std err         t         P>|t|     2.5 %    97.5 %
     d  1.588364  0.096616  16.43989  9.909942e-61  1.398999  1.777728
 
diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py
index 7f330cfb2..50513c0f4 100644
--- a/doubleml/irm/iivm.py
+++ b/doubleml/irm/iivm.py
@@ -95,7 +95,7 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML):
     >>> data = make_iivm_data(theta=0.5, n_obs=1000, dim_x=20, alpha_x=1.0, return_type='DataFrame')
     >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd', z_cols='z')
     >>> dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r)
-    >>> dml_iivm_obj.fit().summary
+    >>> dml_iivm_obj.fit().summary  # doctest: +SKIP
            coef   std err         t     P>|t|     2.5 %    97.5 %
     d  0.362398  0.191578  1.891649  0.058538 -0.013088  0.737884
 
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index 01d288bdf..f5abdbd95 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -96,7 +96,7 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
     >>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame')
     >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
     >>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
-    >>> dml_irm_obj.fit().summary
+    >>> dml_irm_obj.fit().summary  # doctest: +SKIP
            coef   std err         t     P>|t|     2.5 %    97.5 %
     d  0.371972  0.206802  1.798685  0.072069 -0.033353  0.777297
 
diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py
index bd62794cd..5dd8ff379 100644
--- a/doubleml/irm/lpq.py
+++ b/doubleml/irm/lpq.py
@@ -99,7 +99,7 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
     >>> data = make_iivm_data(theta=0.5, n_obs=1000, dim_x=20, return_type='DataFrame')
     >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd', z_cols='z')
     >>> dml_lpq_obj = dml.DoubleMLLPQ(obj_dml_data, ml_g, ml_m, treatment=1, quantile=0.5)
-    >>> dml_lpq_obj.fit().summary
+    >>> dml_lpq_obj.fit().summary  # doctest: +SKIP
            coef   std err         t    P>|t|    2.5 %    97.5 %
     d  0.217244  0.636453  0.341336  0.73285 -1.03018  1.464668
     """
diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py
index f3b72e2c8..901c07b7d 100644
--- a/doubleml/irm/pq.py
+++ b/doubleml/irm/pq.py
@@ -105,7 +105,7 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
     >>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame')
     >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
     >>> dml_pq_obj = dml.DoubleMLPQ(obj_dml_data, ml_g, ml_m, treatment=1, quantile=0.5)
-    >>> dml_pq_obj.fit().summary
+    >>> dml_pq_obj.fit().summary  # doctest: +SKIP
            coef   std err         t     P>|t|     2.5 %    97.5 %
     d  0.553878  0.149858  3.696011  0.000219  0.260161  0.847595
     """
diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py
index 46c8f3165..c3325e08d 100644
--- a/doubleml/irm/qte.py
+++ b/doubleml/irm/qte.py
@@ -88,7 +88,7 @@ class DoubleMLQTE(SampleSplittingMixin):
     >>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame')
     >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
     >>> dml_qte_obj = dml.DoubleMLQTE(obj_dml_data, ml_g, ml_m, quantiles=[0.25, 0.5, 0.75])
-    >>> dml_qte_obj.fit().summary
+    >>> dml_qte_obj.fit().summary  # doctest: +SKIP
               coef   std err         t     P>|t|     2.5 %    97.5 %
     0.25  0.274825  0.347310  0.791297  0.428771 -0.405890  0.955541
     0.50  0.449150  0.192539  2.332782  0.019660  0.071782  0.826519
diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py
index fdc2ab6ef..bc6cd739d 100644
--- a/doubleml/irm/ssm.py
+++ b/doubleml/irm/ssm.py
@@ -94,7 +94,7 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
     >>> ml_pi_sim = clone(learner_class)
     >>> ml_m_sim = clone(learner_class)
     >>> obj_dml_sim = DoubleMLSSM(simul_data, ml_g_sim, ml_pi_sim, ml_m_sim)
-    >>> obj_dml_sim.fit().summary
+    >>> obj_dml_sim.fit().summary  # doctest: +SKIP
            coef   std err         t         P>|t|    2.5 %    97.5 %
     d  0.518517  0.065535  7.912033  2.532202e-15  0.39007  0.646963
 
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index c3f6d5b56..0e5cb9965 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -61,7 +61,7 @@ class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
     >>> ml_M = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
     >>> obj_dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=500, dim_x=20)
     >>> dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
-    >>> dml_lplr_obj.fit().summary                                                                      # doctest: +SKIP
+    >>> dml_lplr_obj.fit().summary  # doctest: +SKIP
            coef   std err         t     P>|t|     2.5 %    97.5 %
     d  0.661166  0.172672  3.829038  0.000129  0.322736  0.999596
 
diff --git a/doubleml/plm/pliv.py b/doubleml/plm/pliv.py
index d2b348c58..e8fd0ed67 100644
--- a/doubleml/plm/pliv.py
+++ b/doubleml/plm/pliv.py
@@ -73,7 +73,7 @@ class DoubleMLPLIV(LinearScoreMixin, DoubleML):
     >>> data = make_pliv_CHS2015(alpha=0.5, n_obs=500, dim_x=20, dim_z=1, return_type='DataFrame')
     >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd', z_cols='Z1')
     >>> dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_l, ml_m, ml_r)
-    >>> dml_pliv_obj.fit().summary
+    >>> dml_pliv_obj.fit().summary  # doctest: +SKIP
            coef   std err         t         P>|t|     2.5 %  97.5 %
     d  0.511722  0.087184  5.869427  4.373034e-09  0.340844  0.6826
 
diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py
index 19ee9199b..9b28231f2 100644
--- a/doubleml/plm/plr.py
+++ b/doubleml/plm/plr.py
@@ -68,7 +68,7 @@ class DoubleMLPLR(LinearScoreMixin, DoubleML):
     >>> ml_m = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
     >>> obj_dml_data = make_plr_CCDDHNR2018(alpha=0.5, n_obs=500, dim_x=20)
     >>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m)
-    >>> dml_plr_obj.fit().summary
+    >>> dml_plr_obj.fit().summary  # doctest: +SKIP
            coef   std err          t         P>|t|     2.5 %    97.5 %
     d  0.480691  0.040533  11.859129  1.929729e-32  0.401247  0.560135
 
diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py
index fffa9a0aa..d854a8bec 100644
--- a/doubleml/rdd/rdd.py
+++ b/doubleml/rdd/rdd.py
@@ -91,6 +91,7 @@ class RDFlex:
     >>> ml_g = RandomForestRegressor()
     >>> ml_m = RandomForestClassifier()
     >>> rdflex_obj = dml.rdd.RDFlex(obj_dml_data, ml_g, ml_m, fuzzy=True)
+    >>> print(rdflex_obj.fit())  # doctest: +SKIP
     """
 
     def __init__(
diff --git a/doubleml/utils/_check_return_types.py b/doubleml/utils/_check_return_types.py
index 633eb1c67..5e6b207c6 100644
--- a/doubleml/utils/_check_return_types.py
+++ b/doubleml/utils/_check_return_types.py
@@ -11,7 +11,7 @@ def check_basic_return_types(dml_obj, cls):
     assert isinstance(dml_obj.__str__(), str)
     assert isinstance(dml_obj.summary, pd.DataFrame)
     assert isinstance(dml_obj.draw_sample_splitting(), cls)
-    if not dml_obj._is_cluster_data and not hasattr(dml_obj, "n_folds_inner"):
+    if not dml_obj._is_cluster_data and not hasattr(dml_obj, "n_folds_inner"):  # set_sample_splitting is not available
         assert isinstance(dml_obj.set_sample_splitting(dml_obj.smpls), cls)
     elif dml_obj._is_cluster_data:
         assert dml_obj._dml_data.is_cluster_data

From 96f33ae7dd91bcf3265adb3544238ece44370f48 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 14 Nov 2025 09:09:13 +0100
Subject: [PATCH 47/48] Enhance learner evaluation checks and handle NaN
 targets in DoubleML class

---
 doubleml/double_ml.py                 | 19 +++++++++++++------
 doubleml/utils/_check_return_types.py |  6 ++++++
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 3b94a2cae..6293731a3 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1274,12 +1274,19 @@ def evaluate_learners(self, learners=None, metric=_rmse):
             for learner in learners:
                 for rep in range(self.n_rep):
                     for coef_idx in range(self._dml_data.n_coefs):
-                        res = metric(
-                            y_pred=self.predictions[learner][:, rep, coef_idx].reshape(1, -1),
-                            y_true=self.nuisance_targets[learner][:, rep, coef_idx].reshape(1, -1),
-                        )
-                        if not np.isfinite(res):
-                            raise ValueError(f"Evaluation from learner {str(learner)} is not finite.")
+                        targets = self.nuisance_targets[learner][:, rep, coef_idx].reshape(1, -1)
+
+                        if np.all(np.isnan(targets)):
+                            res = np.nan
+                        else:
+                            predictions = self.predictions[learner][:, rep, coef_idx].reshape(1, -1)
+                            res = metric(
+                                y_pred=predictions,
+                                y_true=targets,
+                            )
+                            if not np.isfinite(res):
+                                raise ValueError(f"Evaluation from learner {str(learner)} is not finite.")
+
                         dist[learner][rep, coef_idx] = res
             return dist
         else:
diff --git a/doubleml/utils/_check_return_types.py b/doubleml/utils/_check_return_types.py
index 5e6b207c6..fc7aca0ec 100644
--- a/doubleml/utils/_check_return_types.py
+++ b/doubleml/utils/_check_return_types.py
@@ -113,6 +113,12 @@ def check_basic_predictions_and_targets(dml_obj, n_obs, n_treat, n_rep):
         assert isinstance(dml_obj.nuisance_loss[key], np.ndarray)
         assert dml_obj.nuisance_loss[key].shape == (n_rep, n_treat)
 
+    learner_eval = dml_obj.evaluate_learners()
+    assert isinstance(learner_eval, dict)
+    for key in expected_keys:
+        assert key in learner_eval
+        assert isinstance(learner_eval[key], np.ndarray)
+        assert learner_eval[key].shape == (n_rep, n_treat)
     return
 
 

From 3d362aad41d1cffc1e33b11e07e2ba770e3382bb Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 14 Nov 2025 09:09:25 +0100
Subject: [PATCH 48/48] removed unnecessary test

---
 doubleml/plm/tests/test_model_defaults.py | 32 -----------------------
 1 file changed, 32 deletions(-)

diff --git a/doubleml/plm/tests/test_model_defaults.py b/doubleml/plm/tests/test_model_defaults.py
index 3e9bc430d..b555f5ad5 100644
--- a/doubleml/plm/tests/test_model_defaults.py
+++ b/doubleml/plm/tests/test_model_defaults.py
@@ -17,35 +17,3 @@ def test_lplr_defaults():
     _fit_bootstrap(dml_lplr_obj)
 
     _check_basic_defaults_after_fit(dml_lplr_obj)
-
-
-@pytest.mark.ci
-def test_did_multi_str():
-    # Test the string representation before fitting
-    dml_str = str(dml_lplr_obj)
-
-    # Check that all important sections are present
-    assert "================== DoubleMLLPLR Object ==================" in dml_str
-    assert "------------------ Data Summary      ------------------" in dml_str
-    assert "------------------ Score & Algorithm ------------------" in dml_str
-    assert "------------------ Machine Learner   ------------------" in dml_str
-    assert "------------------ Resampling        ------------------" in dml_str
-    assert "------------------ Fit Summary       ------------------" in dml_str
-
-    # Check specific content before fitting
-    assert "No. folds: 5" in dml_str
-    assert "No. repeated sample splits: 1" in dml_str
-    assert "Learner ml_M:" in dml_str
-    assert "Learner ml_m:" in dml_str
-    assert "Learner ml_t:" in dml_str
-
-    # Fit the model
-    dml_lplr_obj_fit = dml_lplr_obj.fit()
-    dml_str_after_fit = str(dml_lplr_obj_fit)
-
-    # Check that additional information is present after fitting
-    assert "coef" in dml_str_after_fit
-    assert "std err" in dml_str_after_fit
-    assert "t" in dml_str_after_fit
-    assert "P>|t|" in dml_str_after_fit
-    assert "Out-of-sample Performance:" in dml_str_after_fit