update unit tests

SvenKlaassen · SvenKlaassen · commit 891a4552cdd4 · 2023-04-20T17:38:50.000+02:00
diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
@@ -21,7 +21,7 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
 
     ml_g : estimator implementing ``fit()`` and ``predict()``
         A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
-        :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`g_0(d,X) = E[\Delta Y|D=d, X]`.
+        :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`g_0(d,X) = E[Y_1-Y_0|D=d, X]`.
         For a binary outcome variable :math:`Y` (with values 0 and 1), a classifier implementing ``fit()`` and
         ``predict_proba()`` can also be specified. If :py:func:`sklearn.base.is_classifier` returns ``True``,
         ``predict_proba()`` is used otherwise ``predict()``.
@@ -126,19 +126,19 @@ def __init__(self,
         else:
             assert self.score == 'experimental'
             if ml_m is not None:
-                warnings.warn(('A learner ml_m has been provided for score = "experimental" but will be ignored. "'
+                warnings.warn(('A learner ml_m has been provided for score = "experimental" but will be ignored. '
                                'A learner ml_m is not required for estimation.'))
             self._learner = {'ml_g': ml_g}
 
         if ml_g_is_classifier:
-            if obj_dml_data.binary_outcome: 
+            if obj_dml_data.binary_outcome:
                 self._predict_method = {'ml_g': 'predict_proba'}
             else:
                 raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier '
                                  'but the outcome variable is not binary with values 0 and 1.')
         else:
             self._predict_method = {'ml_g': 'predict'}
-        
+
         if 'ml_m' in self._learner:
             self._predict_method['ml_m'] = 'predict_proba'
         self._initialize_ml_nuisance_params()
@@ -313,8 +313,8 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         m_tune_res = list()
         if self.score == 'observational':
             m_tune_res = _dml_tune(d, x, train_inds,
-                        self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                        n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+                                   self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
+                                   n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
         g1_tune_res = list()
         if self.score == 'experimental':
             g1_tune_res = _dml_tune(y, x, train_inds_d1,
diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
@@ -126,7 +126,7 @@ def __init__(self,
         else:
             assert self.score == 'experimental'
             if ml_m is not None:
-                warnings.warn(('A learner ml_m has been provided for score = "experimental" but will be ignored. "'
+                warnings.warn(('A learner ml_m has been provided for score = "experimental" but will be ignored. '
                                'A learner ml_m is not required for estimation.'))
             self._learner = {'ml_g': ml_g}
 
@@ -427,8 +427,8 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         m_tune_res = list()
         if self.score == 'observational':
             m_tune_res = _dml_tune(d, x, train_inds,
-                                    self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                                    n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+                                   self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
+                                   n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
 
         g_d0_t0_best_params = [xx.best_params_ for xx in g_d0_t0_tune_res]
         g_d0_t1_best_params = [xx.best_params_ for xx in g_d0_t1_tune_res]
@@ -438,20 +438,20 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         if self.score == 'observational':
             m_best_params = [xx.best_params_ for xx in m_tune_res]
             params = {'ml_g_d0_t0': g_d0_t0_best_params,
-                    'ml_g_d0_t1': g_d0_t1_best_params,
-                    'ml_g_d1_t0': g_d1_t0_best_params,
-                    'ml_g_d1_t1': g_d1_t1_best_params,
-                    'ml_m': m_best_params}
+                      'ml_g_d0_t1': g_d0_t1_best_params,
+                      'ml_g_d1_t0': g_d1_t0_best_params,
+                      'ml_g_d1_t1': g_d1_t1_best_params,
+                      'ml_m': m_best_params}
             tune_res = {'g_d0_t0_tune': g_d0_t0_tune_res,
                         'g_d0_t1_tune': g_d0_t1_tune_res,
                         'g_d1_t0_tune': g_d1_t0_tune_res,
                         'g_d1_t1_tune': g_d1_t1_tune_res,
                         'm_tune': m_tune_res}
         else:
             params = {'ml_g_d0_t0': g_d0_t0_best_params,
-                    'ml_g_d0_t1': g_d0_t1_best_params,
-                    'ml_g_d1_t0': g_d1_t0_best_params,
-                    'ml_g_d1_t1': g_d1_t1_best_params}
+                      'ml_g_d0_t1': g_d0_t1_best_params,
+                      'ml_g_d1_t0': g_d1_t0_best_params,
+                      'ml_g_d1_t1': g_d1_t1_best_params}
             tune_res = {'g_d0_t0_tune': g_d0_t0_tune_res,
                         'g_d0_t1_tune': g_d0_t1_tune_res,
                         'g_d1_t0_tune': g_d1_t0_tune_res,
diff --git a/doubleml/tests/_utils_did_cs_manual.py b/doubleml/tests/_utils_did_cs_manual.py
@@ -4,8 +4,6 @@
 from ._utils import fit_predict, fit_predict_proba, tune_grid_search
 from ._utils_did_manual import did_dml1, did_dml2
 
-from .._utils import _check_is_propensity
-
 
 def fit_did_cs(y, x, d, t,
                learner_g, learner_m, all_smpls, dml_procedure, score, in_sample_normalization,
@@ -105,10 +103,16 @@ def fit_nuisance_did_cs(y, x, d, t,
     train_cond_d1_t1 = np.intersect1d(np.where(d == 1)[0], np.where(t == 1)[0])
     g_hat_d1_t1_list = fit_predict(y, x, ml_g_d1_t1, g_d1_t1_params, smpls,
                                    train_cond=train_cond_d1_t1)
-
-    ml_m = clone(learner_m)
-    m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls,
-                                   trimming_threshold=trimming_threshold)
+    if score == 'observational':
+        ml_m = clone(learner_m)
+        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls,
+                                       trimming_threshold=trimming_threshold)
+    else:
+        assert score == 'experimental'
+        m_hat_list = list()
+        for idx, _ in enumerate(smpls):
+            # fill it up, but its not further used
+            m_hat_list.append(np.zeros_like(g_hat_d1_t1_list[idx], dtype='float64'))
 
     p_hat_list = []
     for (train_index, _) in smpls:
@@ -145,7 +149,6 @@ def compute_did_cs_residuals(y, g_hat_d0_t0_list, g_hat_d0_t1_list,
     resid_d0_t1 = y - g_hat_d0_t1
     resid_d1_t0 = y - g_hat_d1_t0
     resid_d1_t1 = y - g_hat_d1_t1
-    _check_is_propensity(m_hat, 'learner_m', 'ml_m', smpls, eps=1e-12)
     return resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1, \
         g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_d1_t1, \
         m_hat, p_hat, lambda_hat
@@ -259,13 +262,17 @@ def tune_nuisance_did_cs(y, x, d, t, ml_g, ml_m, smpls, score, n_folds_tune,
     g_d1_t1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
                                         train_cond=smpls_d1_t1)
 
-    m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
-
     g_d0_t0_best_params = [xx.best_params_ for xx in g_d0_t0_tune_res]
     g_d0_t1_best_params = [xx.best_params_ for xx in g_d0_t1_tune_res]
     g_d1_t0_best_params = [xx.best_params_ for xx in g_d1_t0_tune_res]
     g_d1_t1_best_params = [xx.best_params_ for xx in g_d1_t1_tune_res]
-    m_best_params = [xx.best_params_ for xx in m_tune_res]
+
+    if score == 'observational':
+        m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
+        m_best_params = [xx.best_params_ for xx in m_tune_res]
+    else:
+        assert score == 'experimental'
+        m_best_params = None
 
     return g_d0_t0_best_params, g_d0_t1_best_params, \
         g_d1_t0_best_params, g_d1_t1_best_params, m_best_params
diff --git a/doubleml/tests/_utils_did_manual.py b/doubleml/tests/_utils_did_manual.py
@@ -4,8 +4,6 @@
 from ._utils_boot import boot_manual, draw_weights
 from ._utils import fit_predict, fit_predict_proba, tune_grid_search
 
-from .._utils import _check_is_propensity
-
 
 def fit_did(y, x, d,
             learner_g, learner_m, all_smpls, dml_procedure, score, in_sample_normalization,
@@ -75,6 +73,10 @@ def fit_nuisance_did(y, x, d, learner_g, learner_m, smpls, score,
         train_cond1 = np.where(d == 1)[0]
         g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls,
                                   train_cond=train_cond1)
+        m_hat_list = list()
+        for idx, _ in enumerate(smpls):
+            # fill it up, but its not further used
+            m_hat_list.append(np.zeros_like(g_hat0_list[idx], dtype='float64'))
 
     else:
         assert score == 'observational'
@@ -83,9 +85,9 @@ def fit_nuisance_did(y, x, d, learner_g, learner_m, smpls, score,
             # fill it up, but its not further used
             g_hat1_list.append(np.zeros_like(g_hat0_list[idx], dtype='float64'))
 
-    ml_m = clone(learner_m)
-    m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls,
-                                   trimming_threshold=trimming_threshold)
+        ml_m = clone(learner_m)
+        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls,
+                                       trimming_threshold=trimming_threshold)
 
     p_hat_list = []
     for (train_index, _) in smpls:
@@ -107,7 +109,6 @@ def compute_did_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, s
         m_hat[test_index] = m_hat_list[idx]
         p_hat[test_index] = p_hat_list[idx]
 
-    _check_is_propensity(m_hat, 'learner_m', 'ml_m', smpls, eps=1e-12)
     return resid_d0, g_hat0, g_hat1, m_hat, p_hat
 
 
@@ -157,13 +158,12 @@ def did_score_elements(g_hat0, g_hat1, m_hat, p_hat, resid_d0, d, score, in_samp
             weight_psi_a = np.ones_like(d)
             weight_g0 = np.divide(d, np.mean(d)) - 1.0
             weight_g1 = 1.0 - np.divide(d, np.mean(d))
-            propensity_weight = np.multiply(1.0-d, np.divide(m_hat, 1.0-m_hat))
-            weight_resid_d0 = np.divide(d, np.mean(d)) - np.divide(propensity_weight, np.mean(propensity_weight))
+            weight_resid_d0 = np.divide(d, np.mean(d)) - np.divide(1.0-d, np.mean(1.0-d))
         else:
             weight_psi_a = np.ones_like(d)
             weight_g0 = np.divide(d, p_hat) - 1.0
             weight_g1 = 1.0 - np.divide(d, p_hat)
-            weight_resid_d0 = np.divide(d-m_hat, np.multiply(p_hat, 1.0-m_hat))
+            weight_resid_d0 = np.divide(d-p_hat, np.multiply(p_hat, 1.0-p_hat))
 
         psi_b_1 = np.multiply(weight_g0,  g_hat0) + np.multiply(weight_g1,  g_hat1)
 
@@ -223,18 +223,18 @@ def tune_nuisance_did(y, x, d, ml_g, ml_m, smpls, score, n_folds_tune,
     train_cond0 = np.where(d == 0)[0]
     g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
                                    train_cond=train_cond0)
-
+    g0_best_params = [xx.best_params_ for xx in g0_tune_res]
     if score == 'experimental':
         train_cond1 = np.where(d == 1)[0]
         g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
                                        train_cond=train_cond1)
         g1_best_params = [xx.best_params_ for xx in g1_tune_res]
+        m_best_params = None
     else:
+        assert score == 'observational'
         g1_best_params = None
 
-    m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
-
-    g0_best_params = [xx.best_params_ for xx in g0_tune_res]
-    m_best_params = [xx.best_params_ for xx in m_tune_res]
+        m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
+        m_best_params = [xx.best_params_ for xx in m_tune_res]
 
     return g0_best_params, g1_best_params, m_best_params
diff --git a/doubleml/tests/test_did.py b/doubleml/tests/test_did.py
@@ -130,3 +130,40 @@ def test_dml_did_boot(dml_did_fixture):
         assert np.allclose(dml_did_fixture['boot_t_stat' + bootstrap],
                            dml_did_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_did_experimental(generate_data_did, in_sample_normalization, learner):
+    # collect data
+    (x, y, d) = generate_data_did
+
+    # Set machine learning methods for m & g
+    ml_g = clone(learner[0])
+    ml_m = clone(learner[1])
+
+    np.random.seed(3141)
+    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+
+    np.random.seed(3141)
+    dml_did_obj_without_ml_m = dml.DoubleMLDID(obj_dml_data,
+                                               ml_g,
+                                               score='experimental',
+                                               in_sample_normalization=in_sample_normalization)
+    dml_did_obj_without_ml_m.fit()
+
+    np.random.seed(3141)
+    dml_did_obj_with_ml_m = dml.DoubleMLDID(obj_dml_data,
+                                            ml_g, ml_m,
+                                            score='experimental',
+                                            in_sample_normalization=in_sample_normalization)
+    dml_did_obj_with_ml_m.fit()
+    assert math.isclose(dml_did_obj_with_ml_m.coef,
+                        dml_did_obj_without_ml_m.coef,
+                        rel_tol=1e-9, abs_tol=1e-4)
+
+    msg = ('A learner ml_m has been provided for score = "experimental" but will be ignored. '
+           'A learner ml_m is not required for estimation.')
+    with pytest.warns(UserWarning, match=msg):
+        dml.DoubleMLDID(obj_dml_data, ml_g, ml_m,
+                        score='experimental',
+                        in_sample_normalization=in_sample_normalization)
diff --git a/doubleml/tests/test_did_cs.py b/doubleml/tests/test_did_cs.py
@@ -131,3 +131,40 @@ def test_dml_did_cs_boot(dml_did_cs_fixture):
         assert np.allclose(dml_did_cs_fixture['boot_t_stat' + bootstrap],
                            dml_did_cs_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_did_cs_experimental(generate_data_did_cs, in_sample_normalization, learner):
+    # collect data
+    (x, y, d, t) = generate_data_did_cs
+
+    # Set machine learning methods for m & g
+    ml_g = clone(learner[0])
+    ml_m = clone(learner[1])
+
+    np.random.seed(3141)
+    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
+
+    np.random.seed(3141)
+    dml_did_obj_without_ml_m = dml.DoubleMLDIDCS(obj_dml_data,
+                                                 ml_g,
+                                                 score='experimental',
+                                                 in_sample_normalization=in_sample_normalization)
+    dml_did_obj_without_ml_m.fit()
+
+    np.random.seed(3141)
+    dml_did_obj_with_ml_m = dml.DoubleMLDIDCS(obj_dml_data,
+                                              ml_g, ml_m,
+                                              score='experimental',
+                                              in_sample_normalization=in_sample_normalization)
+    dml_did_obj_with_ml_m.fit()
+    assert math.isclose(dml_did_obj_with_ml_m.coef,
+                        dml_did_obj_without_ml_m.coef,
+                        rel_tol=1e-9, abs_tol=1e-4)
+
+    msg = ('A learner ml_m has been provided for score = "experimental" but will be ignored. '
+           'A learner ml_m is not required for estimation.')
+    with pytest.warns(UserWarning, match=msg):
+        dml.DoubleMLDIDCS(obj_dml_data, ml_g, ml_m,
+                          score='experimental',
+                          in_sample_normalization=in_sample_normalization)
diff --git a/doubleml/tests/test_did_cs_tune.py b/doubleml/tests/test_did_cs_tune.py
@@ -116,7 +116,11 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam
         g_d0_t1_params = g_d0_t1_params * n_folds
         g_d1_t0_params = g_d1_t0_params * n_folds
         g_d1_t1_params = g_d1_t1_params * n_folds
-        m_params = m_params * n_folds
+        if score == 'observational':
+            m_params = m_params * n_folds
+        else:
+            assert score == 'experimental'
+            m_params = None
 
     res_manual = fit_did_cs(y, x, d, t, clone(learner_g), clone(learner_m),
                             all_smpls, dml_procedure, score, in_sample_normalization,
diff --git a/doubleml/tests/test_did_tune.py b/doubleml/tests/test_did_tune.py
@@ -109,12 +109,13 @@ def dml_did_fixture(generate_data_did, learner_g, learner_m, score, in_sample_no
                                                            n_folds_tune,
                                                            par_grid['ml_g'], par_grid['ml_m'])
         g0_params = g0_params * n_folds
-        m_params = m_params * n_folds
         if score == 'experimental':
             g1_params = g1_params * n_folds
+            m_params = None
         else:
             assert score == 'observational'
             g1_params = None
+            m_params = m_params * n_folds
 
     res_manual = fit_did(y, x, d, clone(learner_g), clone(learner_m),
                          all_smpls, dml_procedure, score, in_sample_normalization,