11import numpy as np
22import pandas as pd
3- from scipy .stats import norm
43
54from sklearn .base import clone
65
1211from .cvar import DoubleMLCVAR
1312from ..double_ml_framework import concat
1413
15- from ..utils ._estimation import _draw_weights , _default_kde
14+ from ..utils ._estimation import _default_kde
1615from ..utils .resampling import DoubleMLResampling
1716from ..utils ._checks import _check_score , _check_trimming , _check_zero_one_treatment
1817
@@ -155,13 +154,6 @@ def __init__(self,
155154 # initialize all models
156155 self ._modellist_0 , self ._modellist_1 = self ._initialize_models ()
157156
158- # initialize arrays according to obj_dml_data and the resampling settings
159- self ._psi0 , self ._psi1 , self ._psi0_deriv , self ._psi1_deriv , \
160- self ._coef , self ._se , self ._all_coef , self ._all_se = self ._initialize_arrays ()
161-
162- # also initialize bootstrap arrays with the default number of bootstrap replications
163- self ._n_rep_boot , self ._boot_coef , self ._boot_t_stat = self ._initialize_boot_arrays (n_rep_boot = 500 )
164-
165157 def __str__ (self ):
166158 class_name = self .__class__ .__name__
167159 header = f'================== { class_name } Object ==================\n '
@@ -276,52 +268,69 @@ def trimming_threshold(self):
276268 @property
277269 def coef (self ):
278270 """
279- Estimates for the causal parameter(s) after calling :meth:`fit`.
271+ Estimates for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)) .
280272 """
281- return self ._coef
282-
283- @ coef . setter
284- def coef ( self , value ):
285- self . _coef = value
273+ if self ._framework is None :
274+ coef = None
275+ else :
276+ coef = self . framework . thetas
277+ return coef
286278
287279 @property
288280 def all_coef (self ):
289281 """
290- Estimates of the causal parameter(s) for the ``n_rep`` different sample splits after calling :meth:`fit`.
282+ Estimates of the causal parameter(s) for the ``n_rep`` different sample splits after calling :meth:`fit`
283+ (shape (``n_quantiles``, ``n_rep``)).
291284 """
292- return self ._all_coef
285+ if self ._framework is None :
286+ all_coef = None
287+ else :
288+ all_coef = self .framework .all_thetas
289+ return all_coef
293290
294291 @property
295292 def se (self ):
296293 """
297- Standard errors for the causal parameter(s) after calling :meth:`fit`.
294+ Standard errors for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)) .
298295 """
299- return self ._se
296+ if self ._framework is None :
297+ se = None
298+ else :
299+ se = self .framework .ses
300+ return se
300301
301- @se .setter
302- def se (self , value ):
303- self ._se = value
302+ @property
303+ def all_se (self ):
304+ """
305+ Standard errors of the causal parameter(s) for the ``n_rep`` different sample splits after calling :meth:`fit`
306+ (shape (``n_quantiles``, ``n_rep``)).
307+ """
308+ if self ._framework is None :
309+ all_se = None
310+ else :
311+ all_se = self .framework .all_ses
312+ return all_se
304313
305314 @property
306315 def t_stat (self ):
307316 """
308- t-statistics for the causal parameter(s) after calling :meth:`fit`.
317+ t-statistics for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)) .
309318 """
310319 t_stat = self .coef / self .se
311320 return t_stat
312321
313322 @property
314323 def pval (self ):
315324 """
316- p-values for the causal parameter(s) after calling :meth:`fit` .
325+ p-values for the causal parameter(s) (shape (``n_quantiles``,)) .
317326 """
318- pval = 2 * norm .cdf (- np .abs (self .t_stat ))
319- return pval
327+ return self .framework .pvals
320328
321329 @property
322330 def boot_t_stat (self ):
323331 """
324- Bootstrapped t-statistics for the causal parameter(s) after calling :meth:`fit` and :meth:`bootstrap`.
332+ Bootstrapped t-statistics for the causal parameter(s) after calling :meth:`fit` and :meth:`bootstrap`
333+ (shape (``n_rep_boot``, ``n_quantiles``, ``n_rep``)).
325334 """
326335 if self ._framework is None :
327336 boot_t_stat = None
@@ -362,30 +371,6 @@ def summary(self):
362371 df_summary = df_summary .join (ci )
363372 return df_summary
364373
365- # The private properties with __ always deliver the single treatment, single (cross-fitting) sample subselection.
366- # The slicing is based on the two properties self._i_quant, the index of the quantile, and
367- # self._i_rep, the index of the cross-fitting sample.
368-
369- @property
370- def __psi0 (self ):
371- return self ._psi0 [:, self ._i_rep , self ._i_quant ]
372-
373- @property
374- def __psi0_deriv (self ):
375- return self ._psi0_deriv [:, self ._i_rep , self ._i_quant ]
376-
377- @property
378- def __psi1 (self ):
379- return self ._psi1 [:, self ._i_rep , self ._i_quant ]
380-
381- @property
382- def __psi1_deriv (self ):
383- return self ._psi1_deriv [:, self ._i_rep , self ._i_quant ]
384-
385- @property
386- def __all_se (self ):
387- return self ._all_se [self ._i_quant , self ._i_rep ]
388-
389374 def fit (self , n_jobs_models = None , n_jobs_cv = None , store_predictions = True , store_models = False , external_predictions = None ):
390375 """
391376 Estimate DoubleMLQTE models.
@@ -436,26 +421,6 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
436421 framework_list [self ._i_quant ] = self ._modellist_1 [self ._i_quant ].framework - \
437422 self ._modellist_0 [self ._i_quant ].framework
438423
439- # treatment Effects
440- self ._all_coef [self ._i_quant , :] = self .modellist_1 [self ._i_quant ].all_coef - \
441- self .modellist_0 [self ._i_quant ].all_coef
442-
443- # save scores and derivatives
444- self ._psi0 [:, :, self ._i_quant ] = np .squeeze (self .modellist_0 [i_quant ].psi , 2 )
445- self ._psi1 [:, :, self ._i_quant ] = np .squeeze (self .modellist_1 [i_quant ].psi , 2 )
446-
447- self ._psi0_deriv [:, :, self ._i_quant ] = np .squeeze (self .modellist_0 [i_quant ].psi_deriv , 2 )
448- self ._psi1_deriv [:, :, self ._i_quant ] = np .squeeze (self .modellist_1 [i_quant ].psi_deriv , 2 )
449-
450- # Estimate the variance
451- for i_rep in range (self .n_rep ):
452- self ._i_rep = i_rep
453-
454- self ._all_se [self ._i_quant , self ._i_rep ] = self ._se_causal_pars ()
455-
456- # aggregated parameter estimates and standard errors from repeated cross-fitting
457- self ._agg_cross_fit ()
458-
459424 # aggregate all frameworks
460425 self ._framework = concat (framework_list )
461426
@@ -478,33 +443,10 @@ def bootstrap(self, method='normal', n_rep_boot=500):
478443 -------
479444 self : object
480445 """
481- if np . isnan ( self .coef ). all () :
446+ if self ._framework is None :
482447 raise ValueError ('Apply fit() before bootstrap().' )
448+ self ._framework .bootstrap (method = method , n_rep_boot = n_rep_boot )
483449
484- if (not isinstance (method , str )) | (method not in ['Bayes' , 'normal' , 'wild' ]):
485- raise ValueError ('Method must be "Bayes", "normal" or "wild". '
486- f'Got { str (method )} .' )
487-
488- if not isinstance (n_rep_boot , int ):
489- raise TypeError ('The number of bootstrap replications must be of int type. '
490- f'{ str (n_rep_boot )} of type { str (type (n_rep_boot ))} was passed.' )
491- if n_rep_boot < 1 :
492- raise ValueError ('The number of bootstrap replications must be positive. '
493- f'{ str (n_rep_boot )} was passed.' )
494-
495- self ._n_rep_boot , self ._boot_coef , self ._boot_t_stat = self ._initialize_boot_arrays (n_rep_boot )
496-
497- for i_rep in range (self .n_rep ):
498- self ._i_rep = i_rep
499-
500- n_obs = self ._dml_data .n_obs
501- weights = _draw_weights (method , n_rep_boot , n_obs )
502- for i_quant in range (self .n_quantiles ):
503- self ._i_quant = i_quant
504- i_start = self ._i_rep * self .n_rep_boot
505- i_end = (self ._i_rep + 1 ) * self .n_rep_boot
506- self ._boot_coef [self ._i_quant , i_start :i_end ], self ._boot_t_stat [self ._i_quant , i_start :i_end ] = \
507- self ._compute_bootstrap (weights )
508450 return self
509451
510452 def draw_sample_splitting (self ):
@@ -526,15 +468,6 @@ def draw_sample_splitting(self):
526468
527469 return self
528470
529- def _compute_bootstrap (self , weights ):
530- J0 = np .mean (self .__psi0_deriv )
531- J1 = np .mean (self .__psi1_deriv )
532- scaled_score = self .__psi1 / J1 - self .__psi0 / J0
533-
534- boot_coef = np .matmul (weights , scaled_score ) / self ._dml_data .n_obs
535- boot_t_stat = np .matmul (weights , scaled_score ) / (self ._dml_data .n_obs * self .__all_se )
536- return boot_coef , boot_t_stat
537-
538471 def confint (self , joint = False , level = 0.95 ):
539472 """
540473 Confidence intervals for DoubleML models.
@@ -555,97 +488,49 @@ def confint(self, joint=False, level=0.95):
555488 A data frame with the confidence interval(s).
556489 """
557490
558- if not isinstance (joint , bool ):
559- raise TypeError ('joint must be True or False. '
560- f'Got { str (joint )} .' )
561-
562- if not isinstance (level , float ):
563- raise TypeError ('The confidence level must be of float type. '
564- f'{ str (level )} of type { str (type (level ))} was passed.' )
565- if (level <= 0 ) | (level >= 1 ):
566- raise ValueError ('The confidence level must be in (0,1). '
567- f'{ str (level )} was passed.' )
568-
569- a = (1 - level )
570- ab = np .array ([a / 2 , 1. - a / 2 ])
571- if joint :
572- if np .isnan (self .boot_coef ).all ():
573- raise ValueError ('Apply fit() & bootstrap() before confint(joint=True).' )
574- sim = np .amax (np .abs (self .boot_t_stat ), 0 )
575- hatc = np .quantile (sim , 1 - a )
576- ci = np .vstack ((self .coef - self .se * hatc , self .coef + self .se * hatc )).T
577- else :
578- if np .isnan (self .coef ).all ():
579- raise ValueError ('Apply fit() before confint().' )
580- fac = norm .ppf (ab )
581- ci = np .vstack ((self .coef + self .se * fac [0 ], self .coef + self .se * fac [1 ])).T
582-
583- df_ci = pd .DataFrame (ci ,
584- columns = ['{:.1f} %' .format (i * 100 ) for i in ab ],
585- index = self ._quantiles )
586- return df_ci
587-
588- def _fit_quantile (self , i_quant , n_jobs_cv = None , store_predictions = True , store_models = False ):
589-
590- model_0 = self .modellist_0 [i_quant ]
591- model_1 = self .modellist_1 [i_quant ]
592-
593- model_0 .fit (n_jobs_cv = n_jobs_cv , store_predictions = store_predictions , store_models = store_models )
594- model_1 .fit (n_jobs_cv = n_jobs_cv , store_predictions = store_predictions , store_models = store_models )
595-
596- return model_0 , model_1
491+ if self .framework is None :
492+ raise ValueError ('Apply fit() before confint().' )
597493
598- def _agg_cross_fit (self ):
599- # aggregate parameters from the repeated cross-fitting
600- # don't use the getter (always for one treatment variable and one sample), but the private variable
601- self .coef = np .median (self ._all_coef , 1 )
494+ df_ci = self .framework .confint (joint = joint , level = level )
495+ df_ci .set_index (pd .Index (self ._quantiles ), inplace = True )
602496
603- # TODO: In the documentation of standard errors we need to cleary state what we return here, i.e.,
604- # the asymptotic variance sigma_hat/N and not sigma_hat (which sometimes is also called the asympt var)!
605- # TODO: In the edge case of repeated no-cross-fitting, the test sets might have different size and therefore
606- # it would note be valid to always use the same self._var_scaling_factor
607- xx = np .tile (self .coef .reshape (- 1 , 1 ), self .n_rep )
608- self .se = np .sqrt (np .divide (np .median (np .multiply (np .power (self ._all_se , 2 ), self ._var_scaling_factor ) +
609- np .power (self ._all_coef - xx , 2 ), 1 ), self ._var_scaling_factor ))
497+ return df_ci
610498
611- def _var_est (self ):
612- """
613- Estimate the standard errors of the structural parameter
499+ def p_adjust (self , method = 'romano-wolf' ):
614500 """
615- J0 = self ._psi0_deriv [:, self ._i_rep , self ._i_quant ].mean ()
616- J1 = self ._psi1_deriv [:, self ._i_rep , self ._i_quant ].mean ()
617- score0 = self ._psi0 [:, self ._i_rep , self ._i_quant ]
618- score1 = self ._psi1 [:, self ._i_rep , self ._i_quant ]
619- omega = score1 / J1 - score0 / J0
501+ Multiple testing adjustment for DoubleML models.
620502
621- self ._var_scaling_factor = self ._dml_data .n_obs
622- sigma2_hat = 1 / self ._var_scaling_factor * np .mean (np .power (omega , 2 ))
503+ Parameters
504+ ----------
505+ method : str
506+ A str (``'romano-wolf''``, ``'bonferroni'``, ``'holm'``, etc) specifying the adjustment method.
507+ In addition to ``'romano-wolf''``, all methods implemented in
508+ :py:func:`statsmodels.stats.multitest.multipletests` can be applied.
509+ Default is ``'romano-wolf'``.
623510
624- return sigma2_hat
511+ Returns
512+ -------
513+ p_val : pd.DataFrame
514+ A data frame with adjusted p-values.
515+ """
625516
626- def _se_causal_pars (self ):
627- se = np .sqrt (self ._var_est ())
628- return se
517+ if self .framework is None :
518+ raise ValueError ('Apply fit() before p_adjust().' )
629519
630- def _initialize_arrays (self ):
631- psi0 = np .full ((self ._dml_data .n_obs , self .n_rep , self .n_quantiles ), np .nan )
632- psi0_deriv = np .full ((self ._dml_data .n_obs , self .n_rep , self .n_quantiles ), np .nan )
520+ p_val , _ = self .framework .p_adjust (method = method )
521+ p_val .set_index (pd .Index (self ._quantiles ), inplace = True )
633522
634- psi1 = np .full ((self ._dml_data .n_obs , self .n_rep , self .n_quantiles ), np .nan )
635- psi1_deriv = np .full ((self ._dml_data .n_obs , self .n_rep , self .n_quantiles ), np .nan )
523+ return p_val
636524
637- coef = np .full (self .n_quantiles , np .nan )
638- se = np .full (self .n_quantiles , np .nan )
525+ def _fit_quantile (self , i_quant , n_jobs_cv = None , store_predictions = True , store_models = False ):
639526
640- all_coef = np . full (( self .n_quantiles , self . n_rep ), np . nan )
641- all_se = np . full (( self .n_quantiles , self . n_rep ), np . nan )
527+ model_0 = self .modellist_0 [ i_quant ]
528+ model_1 = self .modellist_1 [ i_quant ]
642529
643- return psi0 , psi1 , psi0_deriv , psi1_deriv , coef , se , all_coef , all_se
530+ model_0 .fit (n_jobs_cv = n_jobs_cv , store_predictions = store_predictions , store_models = store_models )
531+ model_1 .fit (n_jobs_cv = n_jobs_cv , store_predictions = store_predictions , store_models = store_models )
644532
645- def _initialize_boot_arrays (self , n_rep_boot ):
646- boot_coef = np .full ((self .n_quantiles , n_rep_boot * self .n_rep ), np .nan )
647- boot_t_stat = np .full ((self .n_quantiles , n_rep_boot * self .n_rep ), np .nan )
648- return n_rep_boot , boot_coef , boot_t_stat
533+ return model_0 , model_1
649534
650535 def _check_data (self , obj_dml_data ):
651536 if not isinstance (obj_dml_data , DoubleMLData ):
0 commit comments