diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index 6cf7de96..cb3891ba 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -13,6 +13,7 @@
 from .irm.pq import DoubleMLPQ
 from .irm.qte import DoubleMLQTE
 from .irm.ssm import DoubleMLSSM
+from .plm.lplr import DoubleMLLPLR
 from .plm.pliv import DoubleMLPLIV
 from .plm.plr import DoubleMLPLR
 from .utils.blp import DoubleMLBLP
@@ -42,6 +43,7 @@
     "DoubleMLBLP",
     "DoubleMLPolicyTree",
     "DoubleMLSSM",
+    "DoubleMLLPLR",
 ]
 
 __version__ = importlib.metadata.version("doubleml")
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 05481bf1..4e11b13c 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -22,7 +22,7 @@
 class DoubleML(SampleSplittingMixin, ABC):
     """Double Machine Learning."""
 
-    def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
+    def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, double_sample_splitting=False):
         # check and pick up obj_dml_data
         if not isinstance(obj_dml_data, DoubleMLBaseData):
             raise TypeError(
@@ -34,18 +34,10 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
             if obj_dml_data.n_cluster_vars > 2:
                 raise NotImplementedError("Multi-way (n_ways > 2) clustering not yet implemented.")
             self._is_cluster_data = True
-        self._is_panel_data = False
-        if isinstance(obj_dml_data, DoubleMLPanelData):
-            self._is_panel_data = True
-        self._is_did_data = False
-        if isinstance(obj_dml_data, DoubleMLDIDData):
-            self._is_did_data = True
-        self._is_ssm_data = False
-        if isinstance(obj_dml_data, DoubleMLSSMData):
-            self._is_ssm_data = True
-        self._is_rdd_data = False
-        if isinstance(obj_dml_data, DoubleMLRDDData):
-            self._is_rdd_data = True
+        self._is_panel_data = isinstance(obj_dml_data, DoubleMLPanelData)
+        self._is_did_data = isinstance(obj_dml_data, DoubleMLDIDData)
+        self._is_ssm_data = isinstance(obj_dml_data, DoubleMLSSMData)
+        self._is_rdd_data = isinstance(obj_dml_data, DoubleMLRDDData)
 
         self._dml_data = obj_dml_data
         self._n_obs = self._dml_data.n_obs
@@ -108,6 +100,9 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
         self._smpls = None
         self._smpls_cluster = None
         self._n_obs_sample_splitting = self.n_obs
+        self._double_sample_splitting = double_sample_splitting
+        if self._double_sample_splitting:
+            self._smpls_inner = None
         if draw_sample_splitting:
             self.draw_sample_splitting()
         self._score_dim = (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs)
@@ -263,6 +258,13 @@ def learner(self):
         """
         return self._learner
 
+    @property
+    def predictions_names(self):
+        """
+        The names of predictions for the nuisance functions.
+        """
+        return list(self.params_names)
+
     @property
     def learner_names(self):
         """
@@ -359,6 +361,21 @@ def smpls(self):
             raise ValueError(err_msg)
         return self._smpls
 
+    @property
+    def smpls_inner(self):
+        """
+        The partition used for cross-fitting.
+        """
+        if not self._double_sample_splitting:
+            raise ValueError("smpls_inner is only available for double sample splitting.")
+        if self._smpls_inner is None:
+            err_msg = (
+                "Sample splitting not specified. Either draw samples via .draw_sample splitting() "
+                + "or set external samples via .set_sample_splitting()."
+            )
+            raise ValueError(err_msg)
+        return self._smpls_inner
+
     @property
     def smpls_cluster(self):
         """
@@ -507,6 +524,18 @@ def summary(self):
     def __smpls(self):
         return self._smpls[self._i_rep]
 
+    @property
+    def __smpls__inner(self):
+        if not self._double_sample_splitting:
+            raise ValueError("smpls_inner is only available for double sample splitting.")
+        if self._smpls_inner is None:
+            err_msg = (
+                "Sample splitting not specified. Either draw samples via .draw_sample splitting() "
+                + "or set external samples via .set_sample_splitting()."
+            )
+            raise ValueError(err_msg)
+        return self._smpls_inner[self._i_rep]
+
     @property
     def __smpls_cluster(self):
         return self._smpls_cluster[self._i_rep]
@@ -1059,7 +1088,7 @@ def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_m
             _check_external_predictions(
                 external_predictions=external_predictions,
                 valid_treatments=self._dml_data.d_cols,
-                valid_learners=self.params_names,
+                valid_learners=self.predictions_names,
                 n_obs=self.n_obs,
                 n_rep=self.n_rep,
             )
@@ -1081,7 +1110,10 @@ def _initalize_fit(self, store_predictions, store_models):
 
     def _fit_nuisance_and_score_elements(self, n_jobs_cv, store_predictions, external_predictions, store_models):
         ext_prediction_dict = _set_external_predictions(
-            external_predictions, learners=self.params_names, treatment=self._dml_data.d_cols[self._i_treat], i_rep=self._i_rep
+            external_predictions,
+            learners=self.predictions_names,
+            treatment=self._dml_data.d_cols[self._i_treat],
+            i_rep=self._i_rep,
         )
 
         # ml estimation of nuisance models and computation of score elements
@@ -1146,8 +1178,8 @@ def _initialize_arrays(self):
         self._all_se = np.full((n_thetas, n_rep), np.nan)
 
     def _initialize_predictions_and_targets(self):
-        self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names}
-        self._nuisance_targets = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names}
+        self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.predictions_names}
+        self._nuisance_targets = {learner: np.full(self._score_dim, np.nan) for learner in self.predictions_names}
 
     def _initialize_nuisance_loss(self):
         self._nuisance_loss = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names}
@@ -1158,7 +1190,7 @@ def _initialize_models(self):
         }
 
     def _store_predictions_and_targets(self, preds, targets):
-        for learner in self.params_names:
+        for learner in self.predictions_names:
             self._predictions[learner][:, self._i_rep, self._i_treat] = preds[learner]
             self._nuisance_targets[learner][:, self._i_rep, self._i_treat] = targets[learner]
 
diff --git a/doubleml/double_ml_sampling_mixins.py b/doubleml/double_ml_sampling_mixins.py
index d7d8b2e1..2f63d88e 100644
--- a/doubleml/double_ml_sampling_mixins.py
+++ b/doubleml/double_ml_sampling_mixins.py
@@ -1,7 +1,7 @@
 from abc import abstractmethod
 
 from doubleml.utils._checks import _check_sample_splitting
-from doubleml.utils.resampling import DoubleMLClusterResampling, DoubleMLResampling
+from doubleml.utils.resampling import DoubleMLClusterResampling, DoubleMLDoubleResampling, DoubleMLResampling
 
 
 class SampleSplittingMixin:
@@ -17,6 +17,8 @@ class SampleSplittingMixin:
     `sample splitting <https://docs.doubleml.org/stable/guide/resampling.html>`_ in the DoubleML user guide.
     """
 
+    _double_sample_splitting = False
+
     def draw_sample_splitting(self):
         """
         Draw sample splitting for DoubleML models.
@@ -29,6 +31,8 @@ def draw_sample_splitting(self):
         self : object
         """
         if self._is_cluster_data:
+            if self._double_sample_splitting:
+                raise ValueError("Cluster data not supported for double sample splitting.")
             obj_dml_resampling = DoubleMLClusterResampling(
                 n_folds=self._n_folds_per_cluster,
                 n_rep=self.n_rep,
@@ -38,10 +42,20 @@ def draw_sample_splitting(self):
             )
             self._smpls, self._smpls_cluster = obj_dml_resampling.split_samples()
         else:
-            obj_dml_resampling = DoubleMLResampling(
-                n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._n_obs_sample_splitting, stratify=self._strata
-            )
-            self._smpls = obj_dml_resampling.split_samples()
+            if self._double_sample_splitting:
+                obj_dml_resampling = DoubleMLDoubleResampling(
+                    n_folds=self.n_folds,
+                    n_folds_inner=self.n_folds_inner,
+                    n_rep=self.n_rep,
+                    n_obs=self._dml_data.n_obs,
+                    stratify=self._strata,
+                )
+                self._smpls, self._smpls_inner = obj_dml_resampling.split_samples()
+            else:
+                obj_dml_resampling = DoubleMLResampling(
+                    n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._n_obs_sample_splitting, stratify=self._strata
+                )
+                self._smpls = obj_dml_resampling.split_samples()
 
         return self
 
@@ -104,6 +118,9 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         >>> dml_plr_obj.set_sample_splitting(smpls) # doctest: +ELLIPSIS
         <doubleml.plm.plr.DoubleMLPLR object at 0x...>
         """
+        if self._double_sample_splitting:
+            raise ValueError("set_sample_splitting not supported for double sample splitting.")
+
         self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting(
             all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self._n_obs_sample_splitting
         )
diff --git a/doubleml/double_ml_score_mixins.py b/doubleml/double_ml_score_mixins.py
index 57dd6e62..f1112db9 100644
--- a/doubleml/double_ml_score_mixins.py
+++ b/doubleml/double_ml_score_mixins.py
@@ -86,6 +86,7 @@ class NonLinearScoreMixin:
     _score_type = "nonlinear"
     _coef_start_val = np.nan
     _coef_bounds = None
+    _error_on_convergence_failure = False
 
     @property
     @abstractmethod
@@ -149,12 +150,16 @@ def score_deriv(theta):
             theta_hat = root_res.root
             if not root_res.converged:
                 score_val = score(theta_hat)
-                warnings.warn(
+                msg = (
                     "Could not find a root of the score function.\n "
                     f"Flag: {root_res.flag}.\n"
                     f"Score value found is {score_val} "
                     f"for parameter theta equal to {theta_hat}."
                 )
+                if self._error_on_convergence_failure:
+                    raise ValueError(msg)
+                else:
+                    warnings.warn(msg)
         else:
             signs_different, bracket_guess = _get_bracket_guess(score, self._coef_start_val, self._coef_bounds)
 
@@ -186,12 +191,16 @@ def score_squared(theta):
                             score, self._coef_start_val, approx_grad=True, bounds=[self._coef_bounds]
                         )
                         theta_hat = theta_hat_array.item()
-                        warnings.warn(
+                        msg = (
                             "Could not find a root of the score function.\n "
                             f"Minimum score value found is {score_val} "
                             f"for parameter theta equal to {theta_hat}.\n "
                             "No theta found such that the score function evaluates to a negative value."
                         )
+                        if self._error_on_convergence_failure:
+                            raise ValueError(msg)
+                        else:
+                            warnings.warn(msg)
                     else:
 
                         def neg_score(theta):
@@ -202,11 +211,15 @@ def neg_score(theta):
                             neg_score, self._coef_start_val, approx_grad=True, bounds=[self._coef_bounds]
                         )
                         theta_hat = theta_hat_array.item()
-                        warnings.warn(
+                        msg = (
                             "Could not find a root of the score function. "
                             f"Maximum score value found is {-1 * neg_score_val} "
                             f"for parameter theta equal to {theta_hat}. "
                             "No theta found such that the score function evaluates to a positive value."
                         )
+                        if self._error_on_convergence_failure:
+                            raise ValueError(msg)
+                        else:
+                            warnings.warn(msg)
 
         return theta_hat
diff --git a/doubleml/irm/tests/test_datasets.py b/doubleml/irm/tests/test_datasets.py
new file mode 100644
index 00000000..79bf6794
--- /dev/null
+++ b/doubleml/irm/tests/test_datasets.py
@@ -0,0 +1,156 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from doubleml import DoubleMLData
+from doubleml.irm.datasets import (
+    make_confounded_irm_data,
+    make_heterogeneous_data,
+    make_iivm_data,
+    make_irm_data,
+    make_irm_data_discrete_treatments,
+    make_ssm_data,
+)
+
+msg_inv_return_type = "Invalid return_type."
+
+
+@pytest.mark.ci
+def test_make_irm_data_return_types():
+    np.random.seed(3141)
+    res = make_irm_data(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_irm_data(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d = make_irm_data(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_irm_data(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_iivm_data_return_types():
+    np.random.seed(3141)
+    res = make_iivm_data(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_iivm_data(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z = make_iivm_data(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_iivm_data(n_obs=100, return_type="matrix")
+
+
+@pytest.fixture(scope="function", params=[True, False])
+def linear(request):
+    return request.param
+
+
+@pytest.mark.ci
+def test_make_confounded_irm_data_return_types(linear):
+    np.random.seed(3141)
+    res = make_confounded_irm_data(linear=linear)
+    assert isinstance(res, dict)
+    assert isinstance(res["x"], np.ndarray)
+    assert isinstance(res["y"], np.ndarray)
+    assert isinstance(res["d"], np.ndarray)
+
+    assert isinstance(res["oracle_values"], dict)
+    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["gamma_a"], float)
+    assert isinstance(res["oracle_values"]["beta_a"], float)
+    assert isinstance(res["oracle_values"]["a"], np.ndarray)
+    assert isinstance(res["oracle_values"]["y_0"], np.ndarray)
+    assert isinstance(res["oracle_values"]["y_1"], np.ndarray)
+    assert isinstance(res["oracle_values"]["z"], np.ndarray)
+    assert isinstance(res["oracle_values"]["cf_y"], float)
+    assert isinstance(res["oracle_values"]["cf_d_ate"], float)
+    assert isinstance(res["oracle_values"]["cf_d_atte"], float)
+    assert isinstance(res["oracle_values"]["rho_ate"], float)
+    assert isinstance(res["oracle_values"]["rho_atte"], float)
+
+
+@pytest.fixture(scope="function", params=[False, True])
+def binary_treatment(request):
+    return request.param
+
+
+@pytest.fixture(scope="function", params=[1, 2])
+def n_x(request):
+    return request.param
+
+
+@pytest.mark.ci
+def test_make_heterogeneous_data_return_types(binary_treatment, n_x):
+    np.random.seed(3141)
+    res = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=binary_treatment)
+    assert isinstance(res, dict)
+    assert isinstance(res["data"], pd.DataFrame)
+    assert isinstance(res["effects"], np.ndarray)
+    assert callable(res["treatment_effect"])
+
+    # test input checks
+    msg = "n_x must be either 1 or 2."
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=0, binary_treatment=binary_treatment)
+    msg = "support_size must be smaller than p."
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, support_size=31, binary_treatment=binary_treatment)
+    msg = "binary_treatment must be a boolean."
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=2)
+
+
+@pytest.mark.ci
+def test_make_ssm_data_return_types():
+    np.random.seed(3141)
+    res = make_ssm_data(n_obs=100)
+    assert isinstance(res, DoubleMLData)
+    res = make_ssm_data(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z, s = make_ssm_data(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    assert isinstance(s, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_ssm_data(n_obs=100, return_type="matrix")
+
+
+@pytest.fixture(scope="function", params=[3, 5])
+def n_levels(request):
+    return request.param
+
+
+def test_make_data_discrete_treatments(n_levels):
+    np.random.seed(3141)
+    n = 100
+    data_apo = make_irm_data_discrete_treatments(n_obs=n, n_levels=3)
+    assert isinstance(data_apo, dict)
+    assert isinstance(data_apo["y"], np.ndarray)
+    assert isinstance(data_apo["d"], np.ndarray)
+    assert isinstance(data_apo["x"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"], dict)
+
+    assert isinstance(data_apo["oracle_values"]["cont_d"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["level_bounds"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["potential_level"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["ite"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["y0"], np.ndarray)
+
+    msg = "n_levels must be at least 2."
+    with pytest.raises(ValueError, match=msg):
+        _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1)
+
+    msg = "n_levels must be an integer."
+    with pytest.raises(ValueError, match=msg):
+        _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1.1)
diff --git a/doubleml/plm/__init__.py b/doubleml/plm/__init__.py
index e81f00c5..283bc91b 100644
--- a/doubleml/plm/__init__.py
+++ b/doubleml/plm/__init__.py
@@ -2,10 +2,8 @@
 The :mod:`doubleml.plm` module implements double machine learning estimates based on partially linear models.
 """
 
+from .lplr import DoubleMLLPLR
 from .pliv import DoubleMLPLIV
 from .plr import DoubleMLPLR
 
-__all__ = [
-    "DoubleMLPLR",
-    "DoubleMLPLIV",
-]
+__all__ = ["DoubleMLPLR", "DoubleMLPLIV", "DoubleMLLPLR"]
diff --git a/doubleml/plm/datasets/__init__.py b/doubleml/plm/datasets/__init__.py
index b2bb7df0..6e8e9bb5 100644
--- a/doubleml/plm/datasets/__init__.py
+++ b/doubleml/plm/datasets/__init__.py
@@ -4,6 +4,7 @@
 
 from ._make_pliv_data import _make_pliv_data
 from .dgp_confounded_plr_data import make_confounded_plr_data
+from .dgp_lplr_LZZ2020 import make_lplr_LZZ2020
 from .dgp_pliv_CHS2015 import make_pliv_CHS2015
 from .dgp_pliv_multiway_cluster_CKMS2021 import make_pliv_multiway_cluster_CKMS2021
 from .dgp_plr_CCDDHNR2018 import make_plr_CCDDHNR2018
@@ -15,5 +16,6 @@
     "make_confounded_plr_data",
     "make_pliv_CHS2015",
     "make_pliv_multiway_cluster_CKMS2021",
+    "make_lplr_LZZ2020",
     "_make_pliv_data",
 ]
diff --git a/doubleml/plm/datasets/dgp_lplr_LZZ2020.py b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
new file mode 100644
index 00000000..284da7d8
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_lplr_LZZ2020.py
@@ -0,0 +1,152 @@
+import numpy as np
+import pandas as pd
+from scipy.special import expit
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def make_lplr_LZZ2020(
+    n_obs=500, dim_x=20, alpha=0.5, return_type="DoubleMLData", balanced_r0=True, treatment="continuous", **kwargs
+):
+    r"""
+    Generates synthetic data for a logistic partially linear regression model, as in Liu et al. (2021),
+    designed for use in double/debiased machine learning applications.
+
+    The data generating process is defined as follows:
+
+    - Covariates :math:`x_i \sim \mathcal{N}(0, \Sigma)`, where :math:`\Sigma_{kj} = 0.7^{|j-k|}`.
+    - Treatment :math:`d_i = a_0(x_i)`.
+    - Propensity score :math:`p_i = \sigma(\alpha d_i + r_0(x_i))`, where :math:`\sigma(\cdot)` is the logistic function.
+    - Outcome :math:`y_i \sim \text{Bernoulli}(p_i)`.
+
+    The nuisance functions are defined as:
+
+    .. math::
+        \begin{aligned}
+        a_0(x_i) &= \frac{2}{1 + \exp(x_{i,1})} - \frac{2}{1 + \exp(x_{i,2})} + \sin(x_{i,3}) + \cos(x_{i,4}) \\
+                 &\quad + 0.5 \cdot \mathbb{1}(x_{i,5} > 0) - 0.5 \cdot \mathbb{1}(x_{i,6} > 0) + 0.2\, x_{i,7} x_{i,8}
+                 - 0.2\, x_{i,9} x_{i,10} \\
+        r_0(x_i) &= 0.1\, x_{i,1} x_{i,2} x_{i,3} + 0.1\, x_{i,4} x_{i,5} + 0.1\, x_{i,6}^3 - 0.5 \sin^2(x_{i,7}) \\
+                 &\quad + 0.5 \cos(x_{i,8}) + \frac{1}{1 + x_{i,9}^2} - \frac{1}{1 + \exp(x_{i,10})} \\
+                 &\quad + 0.25 \cdot \mathbb{1}(x_{i,11} > 0) - 0.25 \cdot \mathbb{1}(x_{i,13} > 0)
+        \end{aligned}
+
+    Parameters
+    ----------
+    n_obs : int
+        Number of observations to simulate.
+    dim_x : int
+        Number of covariates.
+    alpha : float
+        Value of the causal parameter.
+    return_type : str
+        Determines the return format. One of:
+
+        - 'DoubleMLData' or DoubleMLData: returns a ``DoubleMLData`` object.
+        - 'DataFrame', 'pd.DataFrame' or pd.DataFrame: returns a ``pandas.DataFrame``.
+        - 'array', 'np.ndarray', 'np.array' or np.ndarray: returns tuple of numpy arrays (x, y, d, p).
+    balanced_r0 : bool, default True
+        If True, uses the "balanced" r_0 specification (smaller magnitude / more balanced
+        heterogeneity). If False, uses an "unbalanced" r_0 specification with larger
+        share of Y=0.
+    treatment : {'continuous', 'binary', 'binary_unbalanced'}, default 'continuous'
+        Determines how the treatment d is generated from a_0(x):
+        - 'continuous': d = a_0(x) (continuous treatment).
+        - 'binary':    d ~ Bernoulli( sigmoid(a_0(x) - mean(a_0(x))) ) .
+        - 'binary_unbalanced': d ~ Bernoulli( sigmoid(a_0(x)) ).
+
+    **kwargs
+        Optional keyword arguments (currently unused in this implementation).
+
+    Returns
+    -------
+    Union[DoubleMLData, pd.DataFrame, Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]
+        The generated data in the specified format.
+
+    References
+    ----------
+    Liu, Molei, Yi Zhang, and Doudou Zhou. 2021.
+    "Double/Debiased Machine Learning for Logistic Partially Linear Model."
+    The Econometrics Journal 24 (3): 559–88. https://doi.org/10.1093/ectj/utab019.
+
+    """
+
+    if balanced_r0:
+
+        def r_0(X):
+            return (
+                0.1 * X[:, 0] * X[:, 1] * X[:, 2]
+                + 0.1 * X[:, 3] * X[:, 4]
+                + 0.1 * X[:, 5] ** 3
+                + -0.5 * np.sin(X[:, 6]) ** 2
+                + 0.5 * np.cos(X[:, 7])
+                + 1 / (1 + X[:, 8] ** 2)
+                + -1 / (1 + np.exp(X[:, 9]))
+                + 0.25 * np.where(X[:, 10] > 0, 1, 0)
+                + -0.25 * np.where(X[:, 12] > 0, 1, 0)
+            )
+
+    else:
+
+        def r_0(X):
+            return (
+                0.1 * X[:, 0] * X[:, 1] * X[:, 2]
+                + 0.1 * X[:, 3] * X[:, 4]
+                + 0.1 * X[:, 5] ** 3
+                + -0.5 * np.sin(X[:, 6]) ** 2
+                + 0.5 * np.cos(X[:, 7])
+                + 4 / (1 + X[:, 8] ** 2)
+                + -1 / (1 + np.exp(X[:, 9]))
+                + 1.5 * np.where(X[:, 10] > 0, 1, 0)
+                + -0.25 * np.where(X[:, 12] > 0, 1, 0)
+            )
+
+    def a_0(X):
+        return (
+            2 / (1 + np.exp(X[:, 0]))
+            + -2 / (1 + np.exp(X[:, 1]))
+            + 1 * np.sin(X[:, 2])
+            + 1 * np.cos(X[:, 3])
+            + 0.5 * np.where(X[:, 4] > 0, 1, 0)
+            + -0.5 * np.where(X[:, 5] > 0, 1, 0)
+            + 0.2 * X[:, 6] * X[:, 7]
+            + -0.2 * X[:, 8] * X[:, 9]
+        )
+
+    sigma = np.full((dim_x, dim_x), 0.2)
+    np.fill_diagonal(sigma, 1)
+
+    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=n_obs)
+    np.clip(x, -2, 2, out=x)
+
+    if treatment == "continuous":
+        d = a_0(x)
+    elif treatment == "binary":
+        d_cont = a_0(x)
+        d = np.random.binomial(1, expit(d_cont - d_cont.mean()))
+    elif treatment == "binary_unbalanced":
+        d_cont = a_0(x)
+        d = np.random.binomial(1, expit(d_cont))
+    else:
+        raise ValueError("Invalid treatment type.")
+
+    p = expit(alpha * d[:] + r_0(x))
+
+    y = np.random.binomial(1, p)
+
+    if return_type in _array_alias:
+        return x, y, d, p
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, p)), columns=x_cols + ["y", "d", "p"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
new file mode 100644
index 00000000..f452e02d
--- /dev/null
+++ b/doubleml/plm/lplr.py
@@ -0,0 +1,571 @@
+import inspect
+import warnings
+
+import numpy as np
+import scipy
+from sklearn.base import clone
+from sklearn.utils import check_X_y
+from sklearn.utils.multiclass import type_of_target
+
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_score_mixins import NonLinearScoreMixin
+from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
+from doubleml.utils._estimation import (
+    _dml_cv_predict,
+    _dml_tune,
+)
+
+
+class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
+    """Double machine learning for partially logistic models (binary outcomes)
+
+    Parameters
+    ----------
+    obj_dml_data : DoubleMLData
+        The DoubleMLData object providing the data and variable specification.
+        The outcome variable y must be binary with values {0, 1}.
+    ml_M : estimator
+        Classifier for M_0(D, X) = P[Y = 1 | D, X]. Must implement fit() and predict_proba().
+    ml_t : estimator
+        Regressor for the auxiliary regression used to predict log-odds. Must implement fit() and predict().
+    ml_m : estimator
+        Learner for m_0(X) = E[D | X]. For binary treatments a classifier with predict_proba() is expected;
+        for continuous treatments a regressor with predict() is expected.
+    ml_a : estimator, optional
+        Optional alternative learner for E[D | X]. If not provided, a clone of ml_m is used.
+        Must support the same prediction interface as ml_m.
+    n_folds : int, default=5
+        Number of outer cross-fitting folds.
+    n_folds_inner : int, default=5
+        Number of inner folds for nested resampling used internally.
+    n_rep : int, default=1
+        Number of repetitions for sample splitting.
+    score : {'nuisance_space', 'instrument'}, default='nuisance_space'
+        Score to use. 'nuisance_space' estimates m on subsamples with y=0; 'instrument' uses an instrument-type score.
+    draw_sample_splitting : bool, default=True
+        Whether to draw sample splitting during initialization.
+    error_on_convergence_failure : bool, default=False
+        If True, raise an error on convergence failure of score.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import doubleml as dml
+    >>> from doubleml.plm.datasets import make_lplr_LZZ2020
+    >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+    >>> from sklearn.base import clone
+    >>> np.random.seed(42)
+    >>> ml_t = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
+    >>> ml_m = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
+    >>> ml_M = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
+    >>> obj_dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=500, dim_x=20)
+    >>> dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
+    >>> dml_lplr_obj.fit().summary                                                                      # doctest: +SKIP
+           coef   std err         t     P>|t|     2.5 %    97.5 %
+    d  0.661166  0.172672  3.829038  0.000129  0.322736  0.999596
+
+    Notes
+    -----
+    **Partially logistic regression (PLR)** models take the form
+
+    .. math::
+
+        Y =  \\text{expit} ( D \\theta_0 + r_0(X))
+
+    where :math:`Y` is the outcome variable and :math:`D` is the policy variable of interest.
+    The high-dimensional vector :math:`X = (X_1, \\ldots, X_p)` consists of other confounding covariates.
+    """
+
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_M,
+        ml_t,
+        ml_m,
+        ml_a=None,
+        n_folds=5,
+        n_folds_inner=5,
+        n_rep=1,
+        score="nuisance_space",
+        draw_sample_splitting=True,
+        error_on_convergence_failure=False,
+    ):
+        self.n_folds_inner = n_folds_inner
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, double_sample_splitting=True)
+
+        self._error_on_convergence_failure = error_on_convergence_failure
+
+        self._check_data(self._dml_data)
+        valid_scores = ["nuisance_space", "instrument"]
+        _check_score(self.score, valid_scores, allow_callable=False)
+
+        _ = self._check_learner(ml_t, "ml_t", regressor=True, classifier=False)
+        _ = self._check_learner(ml_M, "ml_M", regressor=False, classifier=True)
+
+        ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=True, classifier=True)
+        self._learner = {"ml_m": ml_m, "ml_t": ml_t, "ml_M": ml_M}
+        # replace aggregated inner names with per-inner-fold names
+        inner_M_names = [f"ml_M_inner_{i}" for i in range(self.n_folds_inner)]
+        inner_a_names = [f"ml_a_inner_{i}" for i in range(self.n_folds_inner)]
+        self._predictions_names = ["ml_r", "ml_m", "ml_a", "ml_t", "ml_M"] + inner_M_names + inner_a_names
+
+        if ml_a is not None:
+            ml_a_is_classifier = self._check_learner(ml_a, "ml_a", regressor=True, classifier=True)
+            self._learner["ml_a"] = ml_a
+            self._ml_a_provided = True
+        else:
+            self._learner["ml_a"] = clone(ml_m)
+            ml_a_is_classifier = ml_m_is_classifier
+            self._ml_a_provided = False
+
+        self._predict_method = {"ml_t": "predict", "ml_M": "predict_proba"}
+
+        if ml_m_is_classifier:
+            if self._dml_data.binary_treats.all():
+                self._predict_method["ml_m"] = "predict_proba"
+            else:
+                raise ValueError(
+                    f"The ml_m learner {str(ml_m)} was identified as classifier "
+                    "but at least one treatment variable is not binary with values 0 and 1."
+                )
+        else:
+            if self._dml_data.binary_treats.any():
+                warnings.warn(
+                    f"The ml_m learner {str(ml_m)} was identified as regressor "
+                    "but at least one treatment variable is binary with values 0 and 1."
+                )
+            self._predict_method["ml_m"] = "predict"
+
+        if ml_a_is_classifier:
+            if self._dml_data.binary_treats.all():
+                self._predict_method["ml_a"] = "predict_proba"
+            else:
+                raise ValueError(
+                    f"The ml_a learner {str(ml_a)} was identified as classifier "
+                    "but at least one treatment variable is not binary with values 0 and 1."
+                )
+        else:
+            if self._dml_data.binary_treats.any():
+                warnings.warn(
+                    f"The ml_a learner {str(ml_a)} was identified as regressor but at least one treatment variable is "
+                    f"binary with values 0 and 1."
+                )
+            self._predict_method["ml_a"] = "predict"
+
+        if score == "instrument":
+            sig = inspect.signature(self.learner["ml_a"].fit)
+            if "sample_weight" not in sig.parameters:
+                raise ValueError('Learner "ml_a" who supports sample_weight is required for score type "instrument"')
+
+        self._initialize_ml_nuisance_params()
+        self._external_predictions_implemented = True
+        self._sensitivity_implemented = False
+
+    def _initialize_ml_nuisance_params(self):
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in self._learner}
+
+    def _check_data(self, obj_dml_data):
+        if not np.array_equal(np.unique(obj_dml_data.y), [0, 1]):
+            raise TypeError("The outcome variable y must be binary with values 0 and 1.")
+
+    def _double_dml_cv_predict(
+        self,
+        estimator,
+        estimator_name,
+        x,
+        y,
+        smpls=None,
+        smpls_inner=None,
+        n_jobs=None,
+        est_params=None,
+        method="predict",
+        sample_weights=None,
+    ):
+        res = {}
+        res["preds"] = np.zeros(y.shape, dtype=float)
+        res["preds_inner"] = []
+        res["targets_inner"] = []
+        res["models"] = []
+        for smpls_single_split, smpls_double_split in zip(smpls, smpls_inner):
+            res_inner = _dml_cv_predict(
+                estimator,
+                x,
+                y,
+                smpls=smpls_double_split,
+                n_jobs=n_jobs,
+                est_params=est_params,
+                method=method,
+                return_models=True,
+                sample_weights=sample_weights,
+            )
+            _check_finite_predictions(res_inner["preds"], estimator, estimator_name, smpls_double_split)
+
+            res["preds_inner"].append(res_inner["preds"])
+            res["targets_inner"].append(res_inner["targets"])
+            for model in res_inner["models"]:
+                res["models"].append(model)
+                if method == "predict_proba":
+                    res["preds"][smpls_single_split[1]] += model.predict_proba(x[smpls_single_split[1]])[:, 1]
+                else:
+                    res["preds"][smpls_single_split[1]] += model.predict(x[smpls_single_split[1]])
+        res["preds"] /= len(smpls)
+        res["targets"] = np.copy(y)
+        return res
+
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x_d_concat = np.hstack((d.reshape(-1, 1), x))
+        m_external = external_predictions["ml_m"] is not None
+        M_external = external_predictions["ml_M"] is not None
+        t_external = external_predictions["ml_t"] is not None
+        a_external = external_predictions["ml_a"] is not None
+
+        if M_external:
+            # expect per-inner-fold keys ml_M_inner_i
+            missing = [
+                i
+                for i in range(self.n_folds_inner)
+                if f"ml_M_inner_{i}" not in external_predictions.keys() or external_predictions[f"ml_M_inner_{i}"] is None
+            ]
+            if len(missing) > 0:
+                raise ValueError(
+                    "When providing external predictions for ml_M, also inner predictions for all inner folds "
+                    f"have to be provided (missing: {', '.join([str(i) for i in missing])})."
+                )
+            M_hat_inner = [external_predictions[f"ml_M_inner_{i}"] for i in range(self.n_folds_inner)]
+            M_hat = {"preds": external_predictions["ml_M"], "preds_inner": M_hat_inner, "targets": None, "models": None}
+        else:
+            M_hat = self._double_dml_cv_predict(
+                self._learner["ml_M"],
+                "ml_M",
+                x_d_concat,
+                y,
+                smpls=smpls,
+                smpls_inner=self._DoubleML__smpls__inner,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_M"),
+                method=self._predict_method["ml_M"],
+            )
+
+        # nuisance m
+        if m_external:
+            m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
+        else:
+            if self.score == "instrument":
+                weights = M_hat["preds"] * (1 - M_hat["preds"])
+                filtered_smpls = smpls
+            elif self.score == "nuisance_space":
+                filtered_smpls = []
+                for train, test in smpls:
+                    train_filtered = train[y[train] == 0]
+                    filtered_smpls.append((train_filtered, test))
+                weights = None
+
+            m_hat = _dml_cv_predict(
+                self._learner["ml_m"],
+                x,
+                d,
+                smpls=filtered_smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_m"),
+                method=self._predict_method["ml_m"],
+                return_models=return_models,
+                sample_weights=weights,
+            )
+
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
+
+        if self._check_learner(self._learner["ml_m"], "ml_m", regressor=True, classifier=True):
+            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
+
+        if self._dml_data.binary_treats[self._dml_data.d_cols[self._i_treat]]:
+            binary_preds = type_of_target(m_hat["preds"]) == "binary"
+            zero_one_preds = np.all((np.power(m_hat["preds"], 2) - m_hat["preds"]) == 0)
+            if binary_preds & zero_one_preds:
+                raise ValueError(
+                    f"For the binary treatment variable {self._dml_data.d_cols[self._i_treat]}, "
+                    f"predictions obtained with the ml_m learner {str(self._learner['ml_m'])} are also "
+                    "observed to be binary with values 0 and 1. Make sure that for classifiers "
+                    "probabilities and not labels are predicted."
+                )
+
+        if a_external:
+            # expect per-inner-fold keys ml_a_inner_i
+            missing = [
+                i
+                for i in range(self.n_folds_inner)
+                if f"ml_a_inner_{i}" not in external_predictions.keys() or external_predictions[f"ml_a_inner_{i}"] is None
+            ]
+            if len(missing) > 0:
+                raise ValueError(
+                    "When providing external predictions for ml_a, also inner predictions for all inner folds "
+                    f"have to be provided (missing: {', '.join([str(i) for i in missing])})."
+                )
+            a_hat_inner = [external_predictions[f"ml_a_inner_{i}"] for i in range(self.n_folds_inner)]
+            a_hat = {"preds": external_predictions["ml_a"], "preds_inner": a_hat_inner, "targets": None, "models": None}
+        else:
+            a_hat = self._double_dml_cv_predict(
+                self._learner["ml_a"],
+                "ml_a",
+                x,
+                d,
+                smpls=smpls,
+                smpls_inner=self._DoubleML__smpls__inner,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_a"),
+                method=self._predict_method["ml_a"],
+            )
+
+        W_inner = []
+        beta = np.zeros(d.shape, dtype=float)
+
+        for i, (train, test) in enumerate(smpls):
+            M_iteration = M_hat["preds_inner"][i][train]
+            M_iteration = np.clip(M_iteration, 1e-8, 1 - 1e-8)
+            w = scipy.special.logit(M_iteration)
+            W_inner.append(w)
+            d_tilde = (d - a_hat["preds_inner"][i])[train]
+            beta[test] = np.sum(d_tilde * w) / np.sum(d_tilde**2)
+
+        # Use preliminary beta estimates as starting value for root finding
+        self._coef_start_val = np.average(beta)
+
+        # nuisance t
+        if t_external:
+            t_hat = {"preds": external_predictions["ml_t"], "targets": None, "models": None}
+        else:
+            t_hat = _dml_cv_predict(
+                self._learner["ml_t"],
+                x,
+                W_inner,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_t"),
+                method=self._predict_method["ml_t"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(t_hat["preds"], self._learner["ml_t"], "ml_t", smpls)
+
+        r_hat = {}
+        r_hat["preds"] = t_hat["preds"] - beta * a_hat["preds"]
+
+        psi_elements = self._score_elements(y, d, r_hat["preds"], m_hat["preds"])
+
+        preds = {
+            "predictions": {
+                "ml_r": r_hat["preds"],
+                "ml_m": m_hat["preds"],
+                "ml_a": a_hat["preds"],
+                "ml_t": t_hat["preds"],
+                "ml_M": M_hat["preds"],
+                # store inner predictions as separate keys per inner fold
+                # ml_M inner
+                **{f"ml_M_inner_{i}": M_hat["preds_inner"][i] for i in range(len(M_hat["preds_inner"]))},
+                # ml_a inner
+                **{f"ml_a_inner_{i}": a_hat["preds_inner"][i] for i in range(len(a_hat["preds_inner"]))},
+            },
+            "targets": {
+                "ml_r": None,
+                "ml_m": m_hat["targets"],
+                "ml_a": a_hat["targets"],
+                "ml_t": t_hat["targets"],
+                "ml_M": M_hat["targets"],
+                # store inner targets as separate keys per inner fold (None if external)
+                **(
+                    {
+                        f"ml_M_inner_{i}": (
+                            M_hat.get("targets_inner")[i]
+                            if M_hat.get("targets_inner") is not None and i < len(M_hat["targets_inner"])
+                            else None
+                        )
+                        for i in range(len(M_hat.get("preds_inner", [])))
+                    }
+                ),
+                **(
+                    {
+                        f"ml_a_inner_{i}": (
+                            a_hat.get("targets_inner")[i]
+                            if a_hat.get("targets_inner") is not None and i < len(a_hat["targets_inner"])
+                            else None
+                        )
+                        for i in range(len(a_hat.get("preds_inner", [])))
+                    }
+                ),
+            },
+            "models": {
+                "ml_r": None,
+                "ml_m": m_hat["models"],
+                "ml_a": a_hat["models"],
+                "ml_t": t_hat["models"],
+                "ml_M": M_hat["models"],
+            },
+        }
+
+        return psi_elements, preds
+
+    @property
+    def predictions_names(self):
+        """
+        The names of predictions for the nuisance functions.
+        """
+        return self._predictions_names
+
+    def _score_elements(self, y, d, r_hat, m_hat):
+        # compute residual
+        d_tilde = d - m_hat
+        psi_hat = scipy.special.expit(-r_hat)
+        score_const = d_tilde * (1 - y) * np.exp(r_hat)
+        psi_elements = {
+            "y": y,
+            "d": d,
+            "d_tilde": d_tilde,
+            "r_hat": r_hat,
+            "m_hat": m_hat,
+            "psi_hat": psi_hat,
+            "score_const": score_const,
+        }
+
+        return psi_elements
+
+    @property
+    def _score_element_names(self):
+        return ["y", "d", "d_tilde", "r_hat", "m_hat", "psi_hat", "score_const"]
+
+    def _sensitivity_element_est(self, preds):
+        raise NotImplementedError()
+
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        if self._i_rep is None:
+            raise ValueError("tune_on_folds must be True as targets have to be created for ml_t on folds.")
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x_d_concat = np.hstack((d.reshape(-1, 1), x))
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_m": None, "ml_M": None, "ml_a": None, "ml_t": None}
+
+        train_inds = [train_index for (train_index, _) in smpls]
+        M_tune_res = _dml_tune(
+            y,
+            x_d_concat,
+            train_inds,
+            self._learner["ml_M"],
+            param_grids["ml_M"],
+            scoring_methods["ml_M"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        filtered_train_inds = []
+        if self.score == "nuisance_space":
+            for train, _ in smpls:
+                train_filtered = train[y[train] == 0]
+                filtered_train_inds.append(train_filtered)
+        elif self.score == "instrument":
+            filtered_train_inds = train_inds
+
+        m_tune_res = _dml_tune(
+            d,
+            x,
+            filtered_train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        a_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_a"],
+            param_grids["ml_a"],
+            scoring_methods["ml_a"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        M_best_params = [xx.best_params_ for xx in M_tune_res]
+        m_best_params = [xx.best_params_ for xx in m_tune_res]
+        a_best_params = [xx.best_params_ for xx in a_tune_res]
+
+        # Create targets for tuning ml_t
+        M_hat = self._double_dml_cv_predict(
+            self._learner["ml_M"],
+            "ml_M",
+            x_d_concat,
+            y,
+            smpls=smpls,
+            smpls_inner=self._DoubleML__smpls__inner,
+            n_jobs=n_jobs_cv,
+            est_params=M_best_params,
+            method=self._predict_method["ml_M"],
+        )
+
+        W_inner = []
+        for i, (train, _) in enumerate(smpls):
+            M_iteration = M_hat["preds_inner"][i][train]
+            M_iteration = np.clip(M_iteration, 1e-8, 1 - 1e-8)
+            w = scipy.special.logit(M_iteration)
+            W_inner.append(w)
+
+        # Reshape W_inner into full-length arrays per fold: fill train indices, others are NaN
+        W_targets = []
+        for i, train in enumerate(train_inds):
+            wt = np.full(x.shape[0], np.nan, dtype=float)
+            wt[train] = W_inner[i]
+            W_targets.append(wt)
+
+        t_tune_res = _dml_tune(
+            W_inner,
+            x,
+            train_inds,
+            self._learner["ml_t"],
+            param_grids["ml_t"],
+            scoring_methods["ml_t"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+            fold_specific_target=True,
+        )
+        t_best_params = [xx.best_params_ for xx in t_tune_res]
+
+        # Update params and tune_res to include ml_a and ml_t
+        params = {"ml_M": M_best_params, "ml_m": m_best_params, "ml_a": a_best_params, "ml_t": t_best_params}
+        tune_res = {"M_tune": M_tune_res, "m_tune": m_tune_res, "a_tune": a_tune_res, "t_tune": t_tune_res}
+
+        res = {"params": params, "tune_res": tune_res}
+
+        return res
+
+    def _compute_score(self, psi_elements, coef):
+        if self.score == "nuisance_space":
+            score_1 = psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d_tilde"]
+            score = psi_elements["psi_hat"] * (score_1 - psi_elements["score_const"])
+        elif self.score == "instrument":
+            score = (psi_elements["y"] - scipy.special.expit(coef * psi_elements["d"] + psi_elements["r_hat"])) * psi_elements[
+                "d_tilde"
+            ]
+
+        return score
+
+    def _compute_score_deriv(self, psi_elements, coef, inds=None):
+        if self.score == "nuisance_space":
+            deriv_1 = -psi_elements["y"] * np.exp(-coef * psi_elements["d"]) * psi_elements["d"]
+            deriv = psi_elements["psi_hat"] * psi_elements["d_tilde"] * deriv_1
+        elif self.score == "instrument":
+            expit = scipy.special.expit(coef * psi_elements["d"] + psi_elements["r_hat"])
+            deriv = -psi_elements["d"] * expit * (1 - expit) * psi_elements["d_tilde"]
+
+        return deriv
diff --git a/doubleml/plm/tests/test_datasets.py b/doubleml/plm/tests/test_datasets.py
new file mode 100644
index 00000000..5e16b9ac
--- /dev/null
+++ b/doubleml/plm/tests/test_datasets.py
@@ -0,0 +1,151 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from doubleml import DoubleMLData
+from doubleml.plm.datasets import (
+    _make_pliv_data,
+    make_confounded_plr_data,
+    make_lplr_LZZ2020,
+    make_pliv_CHS2015,
+    make_pliv_multiway_cluster_CKMS2021,
+    make_plr_CCDDHNR2018,
+    make_plr_turrell2018,
+)
+
+msg_inv_return_type = "Invalid return_type."
+
+
+@pytest.mark.ci
+def test_make_plr_CCDDHNR2018_return_types():
+    np.random.seed(3141)
+    res = make_plr_CCDDHNR2018(n_obs=100, return_type=DoubleMLData)
+    assert isinstance(res, DoubleMLData)
+    res = make_plr_CCDDHNR2018(n_obs=100, return_type=pd.DataFrame)
+    assert isinstance(res, pd.DataFrame)
+    x, y, d = make_plr_CCDDHNR2018(n_obs=100, return_type=np.ndarray)
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_plr_CCDDHNR2018(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_plr_turrell2018_return_types():
+    np.random.seed(3141)
+    res = make_plr_turrell2018(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_plr_turrell2018(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d = make_plr_turrell2018(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_plr_turrell2018(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_confounded_plr_data_return_types():
+    np.random.seed(3141)
+    res = make_confounded_plr_data(theta=5.0)
+    assert isinstance(res, dict)
+    assert isinstance(res["x"], np.ndarray)
+    assert isinstance(res["y"], np.ndarray)
+    assert isinstance(res["d"], np.ndarray)
+
+    assert isinstance(res["oracle_values"], dict)
+    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["theta"], float)
+    assert isinstance(res["oracle_values"]["gamma_a"], float)
+    assert isinstance(res["oracle_values"]["beta_a"], float)
+    assert isinstance(res["oracle_values"]["a"], np.ndarray)
+    assert isinstance(res["oracle_values"]["z"], np.ndarray)
+
+
+@pytest.mark.ci
+def test_make_pliv_data_return_types():
+    np.random.seed(3141)
+    res = _make_pliv_data(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = _make_pliv_data(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z = _make_pliv_data(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = _make_pliv_data(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_pliv_CHS2015_return_types():
+    np.random.seed(3141)
+    res = make_pliv_CHS2015(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_pliv_CHS2015(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z = make_pliv_CHS2015(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_pliv_CHS2015(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_pliv_multiway_cluster_CKMS2021_return_types():
+    np.random.seed(3141)
+    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(cluster_vars, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_lplr_LZZ2020_return_types():
+    np.random.seed(3141)
+    res = make_lplr_LZZ2020(n_obs=100, return_type="DoubleMLData")
+    assert isinstance(res, DoubleMLData)
+    res = make_lplr_LZZ2020(n_obs=100, return_type="DataFrame")
+    assert isinstance(res, pd.DataFrame)
+    x, y, d, z = make_lplr_LZZ2020(n_obs=100, return_type="array")
+    assert isinstance(x, np.ndarray)
+    assert isinstance(y, np.ndarray)
+    assert isinstance(d, np.ndarray)
+    assert isinstance(z, np.ndarray)
+    with pytest.raises(ValueError, match=msg_inv_return_type):
+        _ = make_lplr_LZZ2020(n_obs=100, return_type="matrix")
+
+
+@pytest.mark.ci
+def test_make_lplr_LZZ2020_variants():
+    np.random.seed(3141)
+    res = make_lplr_LZZ2020(n_obs=100, treatment="binary")
+    assert np.array_equal(np.unique(res.d), np.array([0, 1]))
+    res = make_lplr_LZZ2020(n_obs=100, treatment="binary_unbalanced")
+    assert np.array_equal(np.unique(res.d), np.array([0, 1]))
+    res = make_lplr_LZZ2020(n_obs=100, treatment="continuous")
+    assert len(np.unique(res.d)) == 100
+
+    msg = "Invalid treatment type."
+    with pytest.raises(ValueError, match=msg):
+        _ = make_lplr_LZZ2020(n_obs=100, treatment="colors")
+
+    res = make_lplr_LZZ2020(n_obs=100, balanced_r0=False)
+    _, y_unique = np.unique(res.y, return_counts=True)
+    assert np.abs(y_unique[0] - y_unique[1]) > 10
diff --git a/doubleml/plm/tests/test_lplr.py b/doubleml/plm/tests/test_lplr.py
new file mode 100644
index 00000000..abd7adf5
--- /dev/null
+++ b/doubleml/plm/tests/test_lplr.py
@@ -0,0 +1,82 @@
+import numpy as np
+import pytest
+from sklearn.base import clone
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+
+import doubleml as dml
+from doubleml.plm.datasets import make_lplr_LZZ2020
+
+
+@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
+def learner_M(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+def learner_t(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+def learner_m(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
+def learner_m_classifier(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["continuous", "binary", "binary_unbalanced"])
+# TODO: Error for continuous treatment?
+def treatment(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def dml_lplr_fixture(
+    score,
+    learner_M,
+    learner_t,
+    learner_m,
+    learner_m_classifier,
+    treatment,
+):
+    n_folds = 5
+    alpha = 0.5
+
+    # collect data
+    np.random.seed(42)
+    obj_dml_data = make_lplr_LZZ2020(alpha=alpha, treatment=treatment)
+
+    ml_M = clone(learner_M)
+    ml_t = clone(learner_t)
+    if treatment == "continuous":
+        ml_m = clone(learner_m)
+    else:
+        ml_m = clone(learner_m_classifier)
+
+    dml_sel_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m, n_folds=n_folds, score=score)
+    dml_sel_obj.fit()
+
+    res_dict = {
+        "coef": dml_sel_obj.coef[0],
+        "se": dml_sel_obj.se[0],
+        "true_coef": alpha,
+    }
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_dml_lplr_coef(dml_lplr_fixture):
+    # true_coef should lie within three standard deviations of the estimate
+    coef = dml_lplr_fixture["coef"]
+    se = dml_lplr_fixture["se"]
+    true_coef = dml_lplr_fixture["true_coef"]
+    assert abs(coef - true_coef) <= 3.0 * np.sqrt(se)
diff --git a/doubleml/plm/tests/test_lplr_exceptions.py b/doubleml/plm/tests/test_lplr_exceptions.py
new file mode 100644
index 00000000..f01cd885
--- /dev/null
+++ b/doubleml/plm/tests/test_lplr_exceptions.py
@@ -0,0 +1,338 @@
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.base import BaseEstimator
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import Lasso, LogisticRegression
+from sklearn.semi_supervised import LabelSpreading
+
+from doubleml import DoubleMLLPLR
+from doubleml.plm.datasets import make_lplr_LZZ2020
+
+np.random.seed(3141)
+n = 100
+# create test data and basic learners
+dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=n, dim_x=20)
+dml_data_binary = make_lplr_LZZ2020(alpha=0.5, n_obs=n, treatment="binary", dim_x=20)
+ml_M = RandomForestClassifier()
+ml_t = RandomForestRegressor()
+ml_m = RandomForestRegressor()
+dml_lplr = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m)
+dml_lplr_instrument = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score="instrument")
+
+
+@pytest.mark.ci
+def test_lplr_exception_data():
+    msg = r"The data must be of DoubleMLData.* type\.[\s\S]* of type " r"<class 'pandas\.core\.frame\.DataFrame'> was passed\."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(pd.DataFrame(), ml_M, ml_t, ml_m)
+
+    dml_data_nb = make_lplr_LZZ2020(alpha=0.5, n_obs=50, dim_x=20)
+    dml_data_nb.data[dml_data_nb.y_col] = dml_data_nb.data[dml_data_nb.y_col] + 1
+    dml_data_nb._set_y_z()
+    with pytest.raises(TypeError, match="The outcome variable y must be binary with values 0 and 1."):
+        _ = DoubleMLLPLR(dml_data_nb, ml_M, ml_t, ml_m)
+
+
+@pytest.mark.ci
+def test_lplr_exception_scores():
+    # LPLR valid scores are 'nuisance_space' and 'instrument'
+    msg = "Invalid score MAR"
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score="MAR")
+    msg = "score should be a string. 0 was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, score=0)
+
+
+@pytest.mark.ci
+def test_lplr_exception_resampling():
+    msg = "The number of folds must be of int type. 1.5 of type <class 'float'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, n_folds=1.5)
+
+    msg = "The number of repetitions for the sample splitting must be of int type. 1.5 of type <class 'float'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, n_rep=1.5)
+
+    msg = "The number of folds must be positive. 0 was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, n_folds=0)
+
+    msg = "The number of repetitions for the sample splitting must be positive. 0 was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, n_rep=0)
+
+    msg = "draw_sample_splitting must be True or False. Got true."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, draw_sample_splitting="true")
+
+
+@pytest.mark.ci
+def test_lplr_exception_get_params():
+    msg = "Invalid nuisance learner ml_x. Valid nuisance learner ml_m or ml_t or ml_M or ml_a."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.get_params("ml_x")
+
+
+@pytest.mark.ci
+def test_lplr_exception_smpls():
+    msg = (
+        "Sample splitting not specified. "
+        r"Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\)."
+    )
+    dml_plr_no_smpls = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, draw_sample_splitting=False)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_plr_no_smpls.smpls
+
+
+@pytest.mark.ci
+def test_lplr_exception_fit():
+    msg = "The number of CPUs used to fit the learners must be of int type. 5 of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr.fit(n_jobs_cv="5")
+    msg = "store_predictions must be True or False. Got 1."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr.fit(store_predictions=1)
+    msg = "store_models must be True or False. Got 1."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr.fit(store_models=1)
+
+
+@pytest.mark.ci
+def test_lplr_exception_bootstrap():
+    dml_lplr_boot = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m)
+    msg = r"Apply fit\(\) before bootstrap\(\)."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_boot.bootstrap()
+
+    dml_lplr_boot.fit()
+    msg = 'Method must be "Bayes", "normal" or "wild". Got Gaussian.'
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_boot.bootstrap(method="Gaussian")
+    msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr_boot.bootstrap(n_rep_boot="500")
+    msg = "The number of bootstrap replications must be positive. 0 was passed."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_boot.bootstrap(n_rep_boot=0)
+
+
+@pytest.mark.ci
+def test_lplr_exception_confint():
+    dml_lplr_conf = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m)
+    msg = r"Apply fit\(\) before confint\(\)."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_conf.confint()
+    dml_lplr_conf.fit()
+
+    msg = "joint must be True or False. Got 1."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr_conf.confint(joint=1)
+    msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr_conf.confint(level="5%")
+    msg = r"The confidence level must be in \(0,1\). 0.0 was passed."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_conf.confint(level=0.0)
+
+    msg = r"Apply bootstrap\(\) before confint\(joint=True\)."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_conf.confint(joint=True)
+    dml_lplr_conf.bootstrap()
+    df_lplr_ci = dml_lplr_conf.confint(joint=True)
+    assert isinstance(df_lplr_ci, pd.DataFrame)
+
+
+@pytest.mark.ci
+def test_lplr_exception_set_ml_nuisance_params():
+    # invalid learner name
+    msg = "Invalid nuisance learner g. Valid nuisance learner ml_m or ml_t or ml_M or ml_a."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.set_ml_nuisance_params("g", "d", {"alpha": 0.1})
+    # invalid treatment variable
+    msg = "Invalid treatment variable y. Valid treatment variable d."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.set_ml_nuisance_params("ml_M", "y", {"alpha": 0.1})
+
+
+class _DummyNoSetParams:
+    def fit(self):
+        pass
+
+
+class _DummyNoGetParams(_DummyNoSetParams):
+    def set_params(self):
+        pass
+
+
+class _DummyNoClassifier(_DummyNoGetParams):
+    def get_params(self):
+        pass
+
+    def predict(self):
+        pass
+
+
+class LogisticRegressionManipulatedType(LogisticRegression):
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.estimator_type = None
+        return tags
+
+
+@pytest.mark.ci
+@pytest.mark.filterwarnings(
+    r"ignore:.*is \(probably\) neither a regressor nor a classifier.*:UserWarning",
+)
+def test_lplr_exception_learner():
+    err_msg_prefix = "Invalid learner provided for ml_t: "
+
+    msg = err_msg_prefix + "provide an instance of a learner instead of a class."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, Lasso, ml_m)
+    msg = err_msg_prefix + r"BaseEstimator\(\) has no method .fit\(\)."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, BaseEstimator(), ml_m)
+    msg = r"has no method .set_params\(\)."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, _DummyNoSetParams(), ml_m)
+    msg = r"has no method .get_params\(\)."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, _DummyNoGetParams(), ml_m)
+
+    # ml_m may not be a classifier when treatment is not binary
+    msg = (
+        r"The ml_m learner LogisticRegression\(\) was identified as classifier "
+        r"but at least one treatment variable is not binary with values 0 and 1\."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, LogisticRegression())
+
+    # ml_m may not be a classifier when treatment is not binary
+    msg = (
+        r"The ml_a learner LogisticRegression\(\) was identified as classifier "
+        r"but at least one treatment variable is not binary with values 0 and 1\."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, ml_m, ml_a=LogisticRegression())
+
+    # ml_m may not be a classifier when treatment is not binary
+    dml_data_binary = make_lplr_LZZ2020(treatment="binary")
+    msg = 'Learner "ml_a" who supports sample_weight is required for score type "instrument"'
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data_binary, ml_M, ml_t, ml_m, ml_a=LabelSpreading(), score="instrument")
+
+    # construct a classifier which is not identifiable as classifier via is_classifier by sklearn
+    log_reg = LogisticRegressionManipulatedType()
+    msg = (
+        r"Learner provided for ml_m is probably invalid: LogisticRegressionManipulatedType\(\) is \(probably\) "
+        r"neither a regressor nor a classifier. Method predict is used for prediction\."
+    )
+    with pytest.warns(UserWarning, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, ml_t, log_reg)
+
+
+@pytest.mark.ci
+@pytest.mark.filterwarnings(
+    r"ignore:.*is \(probably\) neither a regressor nor a classifier.*:UserWarning",
+    r"ignore: Learner provided for ml_m is probably invalid.*is \(probably\) no classifier.*:UserWarning",
+)
+def test_lplr_exception_and_warning_learner():
+    # invalid ml_M (must be a classifier with predict_proba)
+    with pytest.raises(TypeError):
+        _ = DoubleMLLPLR(dml_data, _DummyNoClassifier(), ml_t, ml_m)
+    msg = "Invalid learner provided for ml_M: " + r"Lasso\(\) has no method .predict_proba\(\)."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPLR(dml_data, Lasso(), ml_t, ml_m)
+    msg = (
+        r"The ml_m learner RandomForestRegressor\(\) was identified as regressor but at least one treatment "
+        r"variable is binary with values 0 and 1."
+    )
+    with pytest.warns(match=msg):
+        _ = DoubleMLLPLR(dml_data_binary, ml_M, ml_t, ml_m)
+    msg = (
+        r"The ml_a learner RandomForestRegressor\(\) was identified as regressor but at least one treatment "
+        r"variable is binary with values 0 and 1."
+    )
+    with pytest.warns(match=msg):
+        _ = DoubleMLLPLR(dml_data_binary, ml_M, ml_t, ml_M, ml_a=ml_m)
+
+
+class LassoWithNanPred(Lasso):
+    def predict(self, X):
+        preds = super().predict(X)
+        n_obs = len(preds)
+        preds[np.random.randint(0, n_obs, 1)] = np.nan
+        return preds
+
+
+class LassoWithInfPred(Lasso):
+    def predict(self, X):
+        preds = super().predict(X)
+        n_obs = len(preds)
+        preds[np.random.randint(0, n_obs, 1)] = np.inf
+        return preds
+
+
+# Classifier that returns hard labels (0/1) via predict_proba to trigger the binary-predictions error
+class HardLabelPredictProba(LogisticRegression):
+    def predict_proba(self, X):
+        labels = super().predict(X).astype(int)
+        return np.column_stack((1 - labels, labels))
+
+
+@pytest.mark.ci
+def test_lplr_nan_prediction():
+    msg = r"Predictions from learner LassoWithNanPred\(\) for ml_t are not finite."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, LassoWithNanPred(), ml_m).fit()
+    msg = r"Predictions from learner LassoWithInfPred\(\) for ml_t are not finite."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data, ml_M, LassoWithInfPred(), ml_m).fit()
+
+
+@pytest.mark.ci
+def test_double_ml_exception_evaluate_learner():
+    dml_lplr_obj = DoubleMLLPLR(
+        dml_data,
+        ml_M=LogisticRegression(),
+        ml_t=Lasso(),
+        ml_m=RandomForestRegressor(),
+        n_folds=5,
+        score="nuisance_space",
+    )
+
+    msg = r"Apply fit\(\) before evaluate_learners\(\)."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_obj.evaluate_learners()
+
+    dml_lplr_obj.fit()
+
+    msg = "metric should be a callable. 'mse' was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_lplr_obj.evaluate_learners(metric="mse")
+
+    msg = (
+        r"The learners have to be a subset of \['ml_m', 'ml_t', 'ml_M', 'ml_a'\]\. " r"Learners \['ml_mu', 'ml_p'\] provided."
+    )
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_obj.evaluate_learners(learners=["ml_mu", "ml_p"])
+
+    def eval_fct(y_pred, y_true):
+        return np.nan
+
+    with pytest.raises(ValueError):
+        dml_lplr_obj.evaluate_learners(metric=eval_fct)
+
+
+@pytest.mark.ci
+def test_lplr_exception_binary_predictions_from_classifier():
+    # Expect error because ml_m returns binary labels instead of probabilities for a binary treatment
+    msg = (
+        r"For the binary treatment variable d, predictions obtained with the ml_m learner "
+        r"HardLabelPredictProba\(\) are also observed to be binary with values 0 and 1\. "
+        r"Make sure that for classifiers probabilities and not labels are predicted\."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPLR(dml_data_binary, ml_M, ml_t, HardLabelPredictProba()).fit()
diff --git a/doubleml/plm/tests/test_lplr_external_predictions.py b/doubleml/plm/tests/test_lplr_external_predictions.py
new file mode 100644
index 00000000..cc8546a8
--- /dev/null
+++ b/doubleml/plm/tests/test_lplr_external_predictions.py
@@ -0,0 +1,148 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml import DoubleMLData
+from doubleml.plm.datasets import make_lplr_LZZ2020
+from doubleml.plm.lplr import DoubleMLLPLR
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
+
+
+@pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
+def lplr_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_t_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_M_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_lplr_fixture(lplr_score, n_rep, set_ml_m_ext, set_ml_t_ext, set_ml_M_ext):
+    ext_predictions = {"d": {}}
+
+    x, y, d, _ = make_lplr_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array", treatment="continuous")
+
+    np.random.seed(3141)
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data, "score": lplr_score, "n_rep": n_rep}
+    if lplr_score == "instrument":
+        # ensure ml_a supports sample_weight
+        kwargs["ml_a"] = LinearRegression()
+
+    dml_lplr = DoubleMLLPLR(ml_M=LogisticRegression(max_iter=1000), ml_t=LinearRegression(), ml_m=LinearRegression(), **kwargs)
+    np.random.seed(3141)
+    dml_lplr.fit(store_predictions=True)
+
+    # prepare external predictions and dummy learners
+    if set_ml_M_ext:
+        ext_predictions["d"]["ml_M"] = dml_lplr.predictions["ml_M"][:, :, 0]
+        # provide inner predictions per inner fold index
+        for i in range(dml_lplr.n_folds_inner):
+            ext_predictions["d"][f"ml_M_inner_{i}"] = dml_lplr.predictions[f"ml_M_inner_{i}"][:, :, 0]
+        ml_M = DMLDummyClassifier()
+    else:
+        ml_M = LogisticRegression(max_iter=1000)
+
+    if set_ml_t_ext:
+        ext_predictions["d"]["ml_t"] = dml_lplr.predictions["ml_t"][:, :, 0]
+        ml_t = DMLDummyRegressor()
+    else:
+        ml_t = LinearRegression()
+
+    if set_ml_m_ext:
+        ext_predictions["d"]["ml_m"] = dml_lplr.predictions["ml_m"][:, :, 0]
+        ml_m = DMLDummyRegressor()
+        ext_predictions["d"]["ml_a"] = dml_lplr.predictions["ml_a"][:, :, 0]
+        for i in range(dml_lplr.n_folds_inner):
+            ext_predictions["d"][f"ml_a_inner_{i}"] = dml_lplr.predictions[f"ml_a_inner_{i}"][:, :, 0]
+    else:
+        ml_m = LinearRegression()
+
+    # build second model with external predictions
+    dml_lplr_ext = DoubleMLLPLR(ml_M=ml_M, ml_t=ml_t, ml_m=ml_m, **kwargs)
+
+    np.random.seed(3141)
+    dml_lplr_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {
+        "coef_normal": dml_lplr.coef[0],
+        "coef_ext": dml_lplr_ext.coef[0],
+        "se_normal": dml_lplr.se[0],
+        "se_ext": dml_lplr_ext.se[0],
+    }
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_lplr_coef(doubleml_lplr_fixture):
+    assert math.isclose(doubleml_lplr_fixture["coef_normal"], doubleml_lplr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_doubleml_lplr_se(doubleml_lplr_fixture):
+    assert math.isclose(doubleml_lplr_fixture["se_normal"], doubleml_lplr_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_doubleml_lplr_exceptions():
+    ext_predictions = {"d": {}}
+
+    x, y, d, _ = make_lplr_LZZ2020(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array", treatment="continuous")
+
+    np.random.seed(3141)
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data}
+
+    dml_lplr = DoubleMLLPLR(ml_M=LogisticRegression(max_iter=1000), ml_t=LinearRegression(), ml_m=LinearRegression(), **kwargs)
+    np.random.seed(3141)
+    dml_lplr.fit(store_predictions=True)
+
+    # prepare external predictions and dummy learners
+
+    ml_M = LogisticRegression(max_iter=1000)
+    ml_t = LinearRegression()
+    ml_m = LinearRegression()
+
+    # build second model with external predictions
+    dml_lplr_ext = DoubleMLLPLR(ml_M=ml_M, ml_t=ml_t, ml_m=ml_m, **kwargs)
+
+    ext_predictions["d"]["ml_M"] = dml_lplr.predictions["ml_M"][:, :, 0]
+    # provide inner predictions per inner fold index
+    for i in range(dml_lplr.n_folds_inner - 1):
+        ext_predictions["d"][f"ml_M_inner_{i}"] = dml_lplr.predictions[f"ml_M_inner_{i}"][:, :, 0]
+
+    msg = r"When providing external predictions for ml_M, also inner predictions for all inner folds"
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_ext.fit(external_predictions=ext_predictions)
+
+    ext_predictions["d"][f"ml_M_inner_{dml_lplr.n_folds_inner-1}"] = (dml_lplr.predictions)[
+        f"ml_M_inner_{dml_lplr.n_folds_inner-1}"
+    ][:, :, 0]
+    ext_predictions["d"]["ml_a"] = dml_lplr.predictions["ml_a"][:, :, 0]
+    for i in range(dml_lplr.n_folds_inner - 1):
+        ext_predictions["d"][f"ml_a_inner_{i}"] = dml_lplr.predictions[f"ml_a_inner_{i}"][:, :, 0]
+
+    msg = r"When providing external predictions for ml_a, also inner predictions for all inner folds"
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_ext.fit(external_predictions=ext_predictions)
diff --git a/doubleml/plm/tests/test_lplr_tune.py b/doubleml/plm/tests/test_lplr_tune.py
new file mode 100644
index 00000000..7c7c4aeb
--- /dev/null
+++ b/doubleml/plm/tests/test_lplr_tune.py
@@ -0,0 +1,121 @@
+import numpy as np
+import pytest
+from sklearn.base import clone
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+
+import doubleml as dml
+
+from ..datasets import make_lplr_LZZ2020
+
+
+@pytest.fixture(scope="module", params=[RandomForestClassifier(random_state=42)])
+def learner_M(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+def learner_t(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+def learner_m(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+def learner_a(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
+def score(request):
+    return request.param
+
+
+def get_par_grid():
+    return {"n_estimators": [5, 10, 20]}
+
+
+@pytest.fixture(scope="module")
+def dml_lplr_fixture(
+    learner_M,
+    learner_t,
+    learner_m,
+    learner_a,
+    score,
+    tune_on_folds=True,
+):
+    par_grid = {
+        "ml_M": get_par_grid(),
+        "ml_t": get_par_grid(),
+        "ml_m": get_par_grid(),
+        "ml_a": get_par_grid(),
+    }
+    n_folds_tune = 4
+    n_folds = 5
+    alpha = 0.5
+
+    ml_M = clone(learner_M)
+    ml_t = clone(learner_t)
+    ml_m = clone(learner_m)
+    ml_a = clone(learner_a)
+
+    obj_dml_data = make_lplr_LZZ2020(alpha=alpha)
+    dml_sel_obj = dml.DoubleMLLPLR(
+        obj_dml_data,
+        ml_M,
+        ml_t,
+        ml_m,
+        ml_a=ml_a,
+        n_folds=n_folds,
+        score=score,
+    )
+
+    # tune hyperparameters
+    tune_res = dml_sel_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
+    assert isinstance(tune_res, dml.DoubleMLLPLR)
+
+    dml_sel_obj.fit()
+
+    res_dict = {
+        "coef": dml_sel_obj.coef[0],
+        "se": dml_sel_obj.se[0],
+        "true_coef": alpha,
+    }
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_dml_selection_coef(dml_lplr_fixture):
+    # true_coef should lie within three standard deviations of the estimate
+    coef = dml_lplr_fixture["coef"]
+    se = dml_lplr_fixture["se"]
+    true_coef = dml_lplr_fixture["true_coef"]
+    assert abs(coef - true_coef) <= 3.0 * np.sqrt(se)
+
+
+@pytest.mark.ci
+def test_lplr_exception_tuning(
+    learner_M,
+    learner_t,
+    learner_m,
+    learner_a,
+):
+    # LPLR valid scores are 'nuisance_space' and 'instrument'
+    obj_dml_data = make_lplr_LZZ2020(alpha=0.5)
+    ml_M = clone(learner_M)
+    ml_t = clone(learner_t)
+    ml_m = clone(learner_m)
+
+    dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
+    par_grid = {
+        "ml_M": get_par_grid(),
+        "ml_t": get_par_grid(),
+        "ml_m": get_par_grid(),
+        "ml_a": get_par_grid(),
+    }
+    msg = "tune_on_folds must be True as targets have to be created for ml_t on folds."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr_obj.tune(par_grid, tune_on_folds=False)
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index f69b681e..95b6ea53 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -1,25 +1,8 @@
-import numpy as np
 import pandas as pd
 import pytest
 
 from doubleml import DoubleMLData
 from doubleml.datasets import fetch_401K, fetch_bonus
-from doubleml.irm.datasets import (
-    make_confounded_irm_data,
-    make_heterogeneous_data,
-    make_iivm_data,
-    make_irm_data,
-    make_irm_data_discrete_treatments,
-    make_ssm_data,
-)
-from doubleml.plm.datasets import (
-    _make_pliv_data,
-    make_confounded_plr_data,
-    make_pliv_CHS2015,
-    make_pliv_multiway_cluster_CKMS2021,
-    make_plr_CCDDHNR2018,
-    make_plr_turrell2018,
-)
 
 msg_inv_return_type = "Invalid return_type."
 
@@ -53,244 +36,3 @@ def test_fetch_bonus_poly():
     n_x = len(data_bonus_wo_poly.x_cols)
     data_bonus_w_poly = fetch_bonus(polynomial_features=True)
     assert len(data_bonus_w_poly.x_cols) == ((n_x + 1) * n_x / 2 + n_x)
-
-
-@pytest.mark.ci
-def test_make_plr_CCDDHNR2018_return_types():
-    np.random.seed(3141)
-    res = make_plr_CCDDHNR2018(n_obs=100, return_type=DoubleMLData)
-    assert isinstance(res, DoubleMLData)
-    res = make_plr_CCDDHNR2018(n_obs=100, return_type=pd.DataFrame)
-    assert isinstance(res, pd.DataFrame)
-    x, y, d = make_plr_CCDDHNR2018(n_obs=100, return_type=np.ndarray)
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_plr_CCDDHNR2018(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_plr_turrell2018_return_types():
-    np.random.seed(3141)
-    res = make_plr_turrell2018(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_plr_turrell2018(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d = make_plr_turrell2018(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_plr_turrell2018(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_irm_data_return_types():
-    np.random.seed(3141)
-    res = make_irm_data(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_irm_data(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d = make_irm_data(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_irm_data(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_iivm_data_return_types():
-    np.random.seed(3141)
-    res = make_iivm_data(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_iivm_data(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, z = make_iivm_data(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_iivm_data(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_pliv_data_return_types():
-    np.random.seed(3141)
-    res = _make_pliv_data(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = _make_pliv_data(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, z = _make_pliv_data(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = _make_pliv_data(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_pliv_CHS2015_return_types():
-    np.random.seed(3141)
-    res = make_pliv_CHS2015(n_obs=100, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_pliv_CHS2015(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, z = make_pliv_CHS2015(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_pliv_CHS2015(n_obs=100, return_type="matrix")
-
-
-@pytest.mark.ci
-def test_make_pliv_multiway_cluster_CKMS2021_return_types():
-    np.random.seed(3141)
-    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLData)
-    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(cluster_vars, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="matrix")
-
-
-@pytest.fixture(scope="function", params=[True, False])
-def linear(request):
-    return request.param
-
-
-@pytest.mark.ci
-def test_make_confounded_irm_data_return_types(linear):
-    np.random.seed(3141)
-    res = make_confounded_irm_data(linear=linear)
-    assert isinstance(res, dict)
-    assert isinstance(res["x"], np.ndarray)
-    assert isinstance(res["y"], np.ndarray)
-    assert isinstance(res["d"], np.ndarray)
-
-    assert isinstance(res["oracle_values"], dict)
-    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
-    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
-    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
-    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
-    assert isinstance(res["oracle_values"]["gamma_a"], float)
-    assert isinstance(res["oracle_values"]["beta_a"], float)
-    assert isinstance(res["oracle_values"]["a"], np.ndarray)
-    assert isinstance(res["oracle_values"]["y_0"], np.ndarray)
-    assert isinstance(res["oracle_values"]["y_1"], np.ndarray)
-    assert isinstance(res["oracle_values"]["z"], np.ndarray)
-    assert isinstance(res["oracle_values"]["cf_y"], float)
-    assert isinstance(res["oracle_values"]["cf_d_ate"], float)
-    assert isinstance(res["oracle_values"]["cf_d_atte"], float)
-    assert isinstance(res["oracle_values"]["rho_ate"], float)
-    assert isinstance(res["oracle_values"]["rho_atte"], float)
-
-
-@pytest.mark.ci
-def test_make_confounded_plr_data_return_types():
-    np.random.seed(3141)
-    res = make_confounded_plr_data(theta=5.0)
-    assert isinstance(res, dict)
-    assert isinstance(res["x"], np.ndarray)
-    assert isinstance(res["y"], np.ndarray)
-    assert isinstance(res["d"], np.ndarray)
-
-    assert isinstance(res["oracle_values"], dict)
-    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
-    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
-    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
-    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
-    assert isinstance(res["oracle_values"]["theta"], float)
-    assert isinstance(res["oracle_values"]["gamma_a"], float)
-    assert isinstance(res["oracle_values"]["beta_a"], float)
-    assert isinstance(res["oracle_values"]["a"], np.ndarray)
-    assert isinstance(res["oracle_values"]["z"], np.ndarray)
-
-
-@pytest.fixture(scope="function", params=[False, True])
-def binary_treatment(request):
-    return request.param
-
-
-@pytest.fixture(scope="function", params=[1, 2])
-def n_x(request):
-    return request.param
-
-
-@pytest.mark.ci
-def test_make_heterogeneous_data_return_types(binary_treatment, n_x):
-    np.random.seed(3141)
-    res = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=binary_treatment)
-    assert isinstance(res, dict)
-    assert isinstance(res["data"], pd.DataFrame)
-    assert isinstance(res["effects"], np.ndarray)
-    assert callable(res["treatment_effect"])
-
-    # test input checks
-    msg = "n_x must be either 1 or 2."
-    with pytest.raises(AssertionError, match=msg):
-        _ = make_heterogeneous_data(n_obs=100, n_x=0, binary_treatment=binary_treatment)
-    msg = "support_size must be smaller than p."
-    with pytest.raises(AssertionError, match=msg):
-        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, support_size=31, binary_treatment=binary_treatment)
-    msg = "binary_treatment must be a boolean."
-    with pytest.raises(AssertionError, match=msg):
-        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=2)
-
-
-@pytest.mark.ci
-def test_make_ssm_data_return_types():
-    np.random.seed(3141)
-    res = make_ssm_data(n_obs=100)
-    assert isinstance(res, DoubleMLData)
-    res = make_ssm_data(n_obs=100, return_type="DataFrame")
-    assert isinstance(res, pd.DataFrame)
-    x, y, d, z, s = make_ssm_data(n_obs=100, return_type="array")
-    assert isinstance(x, np.ndarray)
-    assert isinstance(y, np.ndarray)
-    assert isinstance(d, np.ndarray)
-    assert isinstance(z, np.ndarray)
-    assert isinstance(s, np.ndarray)
-    with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_ssm_data(n_obs=100, return_type="matrix")
-
-
-@pytest.fixture(scope="function", params=[3, 5])
-def n_levels(request):
-    return request.param
-
-
-def test_make_data_discrete_treatments(n_levels):
-    np.random.seed(3141)
-    n = 100
-    data_apo = make_irm_data_discrete_treatments(n_obs=n, n_levels=3)
-    assert isinstance(data_apo, dict)
-    assert isinstance(data_apo["y"], np.ndarray)
-    assert isinstance(data_apo["d"], np.ndarray)
-    assert isinstance(data_apo["x"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"], dict)
-
-    assert isinstance(data_apo["oracle_values"]["cont_d"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"]["level_bounds"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"]["potential_level"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"]["ite"], np.ndarray)
-    assert isinstance(data_apo["oracle_values"]["y0"], np.ndarray)
-
-    msg = "n_levels must be at least 2."
-    with pytest.raises(ValueError, match=msg):
-        _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1)
-
-    msg = "n_levels must be an integer."
-    with pytest.raises(ValueError, match=msg):
-        _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1.1)
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index 94b5f824..4fca5318 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -15,6 +15,7 @@
     DoubleMLDIDData,
     DoubleMLIIVM,
     DoubleMLIRM,
+    DoubleMLLPLR,
     DoubleMLLPQ,
     DoubleMLPLIV,
     DoubleMLPLR,
@@ -23,7 +24,12 @@
 )
 from doubleml.did.datasets import make_did_SZ2020
 from doubleml.irm.datasets import make_iivm_data, make_irm_data
-from doubleml.plm.datasets import make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
+from doubleml.plm.datasets import (
+    make_lplr_LZZ2020,
+    make_pliv_CHS2015,
+    make_pliv_multiway_cluster_CKMS2021,
+    make_plr_CCDDHNR2018,
+)
 from doubleml.utils import PSProcessorConfig
 
 from ._utils import DummyDataClass
@@ -689,6 +695,28 @@ def test_doubleml_exception_smpls():
         _ = dml_pliv_cluster.set_sample_splitting(all_smpls=dml_pliv_cluster.smpls, all_smpls_cluster=all_smpls_cluster)
 
 
+@pytest.mark.ci
+def test_doubleml_exception_smpls_inner():
+    dml_plr_no_inner = DoubleMLPLR(dml_data, ml_l, ml_m)
+    msg = "smpls_inner is only available for double sample splitting."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_plr_no_inner.smpls_inner
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_plr_no_inner._DoubleML__smpls__inner
+
+    dml_data_lplr = make_lplr_LZZ2020()
+    ml_M = LogisticRegression()
+    dml_lplr_inner_no_smpls = DoubleMLLPLR(dml_data_lplr, ml_M, ml_m, ml_m, draw_sample_splitting=False)
+    msg = (
+        "Sample splitting not specified. "
+        r"Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\)."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_lplr_inner_no_smpls.smpls_inner
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_lplr_inner_no_smpls._DoubleML__smpls__inner
+
+
 @pytest.mark.ci
 def test_doubleml_exception_fit():
     msg = "The number of CPUs used to fit the learners must be of int type. 5 of type <class 'str'> was passed."
diff --git a/doubleml/tests/test_nonlinear_score_mixin.py b/doubleml/tests/test_nonlinear_score_mixin.py
index 0fce08c3..d68785aa 100644
--- a/doubleml/tests/test_nonlinear_score_mixin.py
+++ b/doubleml/tests/test_nonlinear_score_mixin.py
@@ -253,3 +253,29 @@ def test_nonlinear_warnings(generate_data1, coef_bounds):
     with pytest.warns(UserWarning, match=msg):
         dml_plr_obj._coef_bounds = coef_bounds
         dml_plr_obj.fit()
+
+
+@pytest.mark.ci
+def test_nonlinear_errors(generate_data1, coef_bounds):
+    # collect data
+    data = generate_data1
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
+
+    np.random.seed(3141)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
+
+    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, LinearRegression(), LinearRegression(), score="no_root_pos")
+    dml_plr_obj._error_on_convergence_failure = True
+
+    msg = "Could not find a root of the score function."
+    with pytest.raises(ValueError, match=msg):
+        dml_plr_obj._coef_bounds = coef_bounds
+        dml_plr_obj.fit()
+
+    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, LinearRegression(), LinearRegression(), score="no_root_neg")
+    dml_plr_obj._error_on_convergence_failure = True
+
+    msg = "Could not find a root of the score function."
+    with pytest.raises(ValueError, match=msg):
+        dml_plr_obj._coef_bounds = coef_bounds
+        dml_plr_obj.fit()
diff --git a/doubleml/tests/test_set_sample_splitting_exceptions.py b/doubleml/tests/test_set_sample_splitting_exceptions.py
new file mode 100644
index 00000000..eba513ca
--- /dev/null
+++ b/doubleml/tests/test_set_sample_splitting_exceptions.py
@@ -0,0 +1,28 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import Lasso, LogisticRegression
+
+from doubleml import DoubleMLLPLR
+from doubleml.plm.datasets import make_lplr_LZZ2020
+
+np.random.seed(3141)
+
+dml_data_lplr = make_lplr_LZZ2020(n_obs=10)
+n_obs = dml_data_lplr.n_obs
+ml_M = LogisticRegression()
+ml_t = Lasso()
+ml_m = Lasso()
+dml_lplr = DoubleMLLPLR(dml_data_lplr, ml_M, ml_t, ml_m, n_folds=7, n_rep=8, draw_sample_splitting=False)
+
+
+@pytest.mark.ci
+def test_doubleml_exceptions_double_sample_splitting():
+    smpls = (np.arange(n_obs), np.arange(n_obs))
+    msg = "set_sample_splitting not supported for double sample splitting."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.set_sample_splitting(smpls)
+
+    dml_lplr._is_cluster_data = True
+    msg = "Cluster data not supported for double sample splitting."
+    with pytest.raises(ValueError, match=msg):
+        dml_lplr.draw_sample_splitting()
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index 3d99d93a..b79c7618 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -38,13 +38,25 @@ def _get_cond_smpls_2d(smpls, bin_var1, bin_var2):
     return smpls_00, smpls_01, smpls_10, smpls_11
 
 
-def _fit(estimator, x, y, train_index, idx=None):
-    estimator.fit(x[train_index, :], y[train_index])
+def _fit(estimator, x, y, train_index, idx=None, sample_weights=None):
+    if sample_weights is not None:
+        estimator.fit(x[train_index, :], y[train_index], sample_weight=sample_weights[train_index])
+    else:
+        estimator.fit(x[train_index, :], y[train_index])
     return estimator, idx
 
 
 def _dml_cv_predict(
-    estimator, x, y, smpls=None, n_jobs=None, est_params=None, method="predict", return_train_preds=False, return_models=False
+    estimator,
+    x,
+    y,
+    smpls=None,
+    n_jobs=None,
+    est_params=None,
+    method="predict",
+    return_train_preds=False,
+    return_models=False,
+    sample_weights=None,
 ):
     n_obs = x.shape[0]
 
@@ -57,14 +69,33 @@ def _dml_cv_predict(
 
     res = {"models": None}
     if not manual_cv_predict:
+        # prepare fit_params for cross_val_predict
+        fit_params_for_cv = {"sample_weight": sample_weights} if sample_weights is not None else None
+
         if est_params is None:
             # if there are no parameters set we redirect to the standard method
-            preds = cross_val_predict(clone(estimator), x, y, cv=smpls, n_jobs=n_jobs, method=method)
+            preds = cross_val_predict(
+                clone(estimator),
+                x,
+                y,
+                cv=smpls,
+                n_jobs=n_jobs,
+                method=method,
+                params=fit_params_for_cv,
+            )
         else:
             assert isinstance(est_params, dict)
             # if no fold-specific parameters we redirect to the standard method
             # warnings.warn("Using the same (hyper-)parameters for all folds")
-            preds = cross_val_predict(clone(estimator).set_params(**est_params), x, y, cv=smpls, n_jobs=n_jobs, method=method)
+            preds = cross_val_predict(
+                clone(estimator).set_params(**est_params),
+                x,
+                y,
+                cv=smpls,
+                n_jobs=n_jobs,
+                method=method,
+                params=fit_params_for_cv,
+            )
         if method == "predict_proba":
             res["preds"] = preds[:, 1]
         else:
@@ -73,7 +104,7 @@ def _dml_cv_predict(
     else:
         if not smpls_is_partition:
             assert not fold_specific_target, "combination of fold-specific y and no cross-fitting not implemented yet"
-            assert len(smpls) == 1
+            # assert len(smpls) == 1
 
         if method == "predict_proba":
             assert not fold_specific_target  # fold_specific_target only needed for PLIV.partialXZ
@@ -95,19 +126,28 @@ def _dml_cv_predict(
 
         if est_params is None:
             fitted_models = parallel(
-                delayed(_fit)(clone(estimator), x, y_list[idx], train_index, idx)
+                delayed(_fit)(clone(estimator), x, y_list[idx], train_index, idx, sample_weights=sample_weights)
                 for idx, (train_index, test_index) in enumerate(smpls)
             )
         elif isinstance(est_params, dict):
             # warnings.warn("Using the same (hyper-)parameters for all folds")
             fitted_models = parallel(
-                delayed(_fit)(clone(estimator).set_params(**est_params), x, y_list[idx], train_index, idx)
+                delayed(_fit)(
+                    clone(estimator).set_params(**est_params), x, y_list[idx], train_index, idx, sample_weights=sample_weights
+                )
                 for idx, (train_index, test_index) in enumerate(smpls)
             )
         else:
             assert len(est_params) == len(smpls), "provide one parameter setting per fold"
             fitted_models = parallel(
-                delayed(_fit)(clone(estimator).set_params(**est_params[idx]), x, y_list[idx], train_index, idx)
+                delayed(_fit)(
+                    clone(estimator).set_params(**est_params[idx]),
+                    x,
+                    y_list[idx],
+                    train_index,
+                    idx,
+                    sample_weights=sample_weights,
+                )
                 for idx, (train_index, test_index) in enumerate(smpls)
             )
 
@@ -148,10 +188,20 @@ def _dml_cv_predict(
 
 
 def _dml_tune(
-    y, x, train_inds, learner, param_grid, scoring_method, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    y,
+    x,
+    train_inds,
+    learner,
+    param_grid,
+    scoring_method,
+    n_folds_tune,
+    n_jobs_cv,
+    search_mode,
+    n_iter_randomized_search,
+    fold_specific_target=False,
 ):
     tune_res = list()
-    for train_index in train_inds:
+    for i, train_index in enumerate(train_inds):
         tune_resampling = KFold(n_splits=n_folds_tune, shuffle=True)
         if search_mode == "grid_search":
             g_grid_search = GridSearchCV(learner, param_grid, scoring=scoring_method, cv=tune_resampling, n_jobs=n_jobs_cv)
@@ -165,7 +215,10 @@ def _dml_tune(
                 n_jobs=n_jobs_cv,
                 n_iter=n_iter_randomized_search,
             )
-        tune_res.append(g_grid_search.fit(x[train_index, :], y[train_index]))
+        if fold_specific_target:
+            tune_res.append(g_grid_search.fit(x[train_index, :], y[i]))
+        else:
+            tune_res.append(g_grid_search.fit(x[train_index, :], y[train_index]))
 
     return tune_res
 
@@ -332,7 +385,7 @@ def _set_external_predictions(external_predictions, learners, treatment, i_rep):
             ext_prediction_dict[learner] = None
         elif learner in external_predictions[treatment].keys():
             if isinstance(external_predictions[treatment][learner], np.ndarray):
-                ext_prediction_dict[learner] = external_predictions[treatment][learner][:, i_rep]
+                ext_prediction_dict[learner] = external_predictions[treatment][learner][:, i_rep].astype(float)
             else:
                 ext_prediction_dict[learner] = None
         else:
diff --git a/doubleml/utils/resampling.py b/doubleml/utils/resampling.py
index 188d2f24..4f49a3d2 100644
--- a/doubleml/utils/resampling.py
+++ b/doubleml/utils/resampling.py
@@ -25,6 +25,56 @@ def split_samples(self):
         return smpls
 
 
+class DoubleMLDoubleResampling:
+    def __init__(self, n_folds, n_folds_inner, n_rep, n_obs, stratify=None):
+        self.n_folds = n_folds
+        self.n_folds_inner = n_folds_inner
+        self.n_rep = n_rep
+        self.n_obs = n_obs
+        self.stratify = np.array(stratify) if stratify is not None else None
+
+        if n_folds < 2:
+            raise ValueError(
+                "n_folds must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
+            )
+        if n_folds_inner < 2:
+            raise ValueError(
+                "n_folds_inner must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
+            )
+
+        if self.stratify is None:
+            self.resampling = RepeatedKFold(n_splits=n_folds, n_repeats=n_rep)
+            self.resampling_inner = RepeatedKFold(n_splits=n_folds_inner, n_repeats=1)
+        else:
+            self.resampling = RepeatedStratifiedKFold(n_splits=n_folds, n_repeats=n_rep)
+            self.resampling_inner = RepeatedStratifiedKFold(n_splits=n_folds_inner, n_repeats=1)
+
+    def split_samples(self):
+        all_smpls = [(train, test) for train, test in self.resampling.split(X=np.zeros(self.n_obs), y=self.stratify)]
+        smpls = [all_smpls[(i_repeat * self.n_folds) : ((i_repeat + 1) * self.n_folds)] for i_repeat in range(self.n_rep)]
+        smpls_inner = []
+        for i_rep in range(self.n_rep):
+            smpls_inner_rep = []
+            for train, test in smpls[i_rep]:
+                if self.stratify is None:
+                    smpls_inner_rep.append(
+                        [
+                            (train[train_inner], train[test_inner])
+                            for train_inner, test_inner in self.resampling_inner.split(X=train)
+                        ]
+                    )
+                else:
+                    smpls_inner_rep.append(
+                        [
+                            (train[train_inner], train[test_inner])
+                            for train_inner, test_inner in self.resampling_inner.split(X=train, y=self.stratify[train])
+                        ]
+                    )
+            smpls_inner.append(smpls_inner_rep)
+
+        return smpls, smpls_inner
+
+
 class DoubleMLClusterResampling:
     def __init__(self, n_folds, n_rep, n_obs, n_cluster_vars, cluster_vars):
         self.n_folds = n_folds
diff --git a/doubleml/utils/tests/test_resampling.py b/doubleml/utils/tests/test_resampling.py
new file mode 100644
index 00000000..3ecfbada
--- /dev/null
+++ b/doubleml/utils/tests/test_resampling.py
@@ -0,0 +1,49 @@
+import pytest
+
+from doubleml.utils.resampling import DoubleMLDoubleResampling
+
+
+@pytest.mark.ci
+def test_DoubleMLDoubleResampling_stratify():
+    n_folds = 5
+    n_folds_inner = 3
+    n_rep = 2
+    n_obs = 100
+    stratify = [0] * 50 + [1] * 50
+
+    obj_dml_double_resampling = DoubleMLDoubleResampling(
+        n_folds=n_folds,
+        n_folds_inner=n_folds_inner,
+        n_rep=n_rep,
+        n_obs=n_obs,
+        stratify=stratify,
+    )
+    smpls, smpls_inner = obj_dml_double_resampling.split_samples()
+
+    assert len(smpls) == n_rep
+    assert len(smpls_inner) == n_rep
+
+    for i_rep in range(n_rep):
+        assert len(smpls[i_rep]) == n_folds
+        assert len(smpls_inner[i_rep]) == n_folds
+
+        for i_fold in range(n_folds):
+            train_ind, _ = smpls[i_rep][i_fold]
+            smpls_inner_rep_fold = smpls_inner[i_rep][i_fold]
+            assert len(smpls_inner_rep_fold) == n_folds_inner
+
+            for i_fold_inner in range(n_folds_inner):
+                train_ind_inner, test_ind_inner = smpls_inner_rep_fold[i_fold_inner]
+                assert set(train_ind_inner).issubset(set(train_ind))
+                assert set(test_ind_inner).issubset(set(train_ind))
+
+
+@pytest.mark.ci
+def test_DoubleMLDoubleResampling_exceptions():
+    msg = "n_folds must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLDoubleResampling(1, 5, 1, 100)
+
+    msg = "n_folds_inner must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLDoubleResampling(5, 1, 1, 100)