@@ -486,11 +486,14 @@ def _load_best_individual_model(self) -> SingleBest:
486486
487487 return ensemble
488488
489- def _do_dummy_prediction (self , num_run : int ) -> None :
489+ def _do_dummy_prediction (self ) -> None :
490490
491491 assert self ._metric is not None
492492 assert self ._logger is not None
493493
494+ # For dummy estimator, we always expect the num_run to be 1
495+ num_run = 1
496+
494497 self ._logger .info ("Starting to create dummy predictions." )
495498
496499 memory_limit = self ._memory_limit
@@ -551,29 +554,20 @@ def _do_dummy_prediction(self, num_run: int) -> None:
551554 % (str (status ), str (additional_info ))
552555 )
553556
554- def _do_traditional_prediction (self , num_run : int , time_left : int , func_eval_time_limit_secs : int
555- ) -> int :
557+ def _do_traditional_prediction (self , time_left : int , func_eval_time_limit_secs : int ) -> None :
556558 """
557559 Fits traditional machine learning algorithms to the provided dataset, while
558560 complying with time resource allocation.
559561
560562 This method currently only supports classification.
561563
562564 Args:
563- num_run: (int)
564- An identifier to indicate the current machine learning algorithm
565- being processed
566565 time_left: (int)
567566 Hard limit on how many machine learning algorithms can be fit. Depending on how
568567 fast a traditional machine learning algorithm trains, it will allow multiple
569568 models to be fitted.
570569 func_eval_time_limit_secs: (int)
571570 Maximum training time each algorithm is allowed to take, during training
572-
573- Returns:
574- num_run: (int)
575- The incremented identifier index. This depends on how many machine learning
576- models were fitted.
577571 """
578572
579573 # Mypy Checkings -- Traditional prediction is only called for search
@@ -592,8 +586,8 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
592586 available_classifiers = get_available_classifiers ()
593587 dask_futures = []
594588
595- total_number_classifiers = len (available_classifiers ) + num_run
596- for n_r , classifier in enumerate (available_classifiers , start = num_run ):
589+ total_number_classifiers = len (available_classifiers )
590+ for n_r , classifier in enumerate (available_classifiers ):
597591
598592 # Only launch a task if there is time
599593 start_time = time .time ()
@@ -612,7 +606,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
612606 logger_port = self ._logger_port ,
613607 cost_for_crash = get_cost_of_crash (self ._metric ),
614608 abort_on_first_run_crash = False ,
615- initial_num_run = n_r ,
609+ initial_num_run = self . _backend . get_next_num_run () ,
616610 stats = stats ,
617611 memory_limit = memory_limit ,
618612 disable_file_output = True if len (self ._disable_file_output ) > 0 else False ,
@@ -626,9 +620,6 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
626620 )
627621 ])
628622
629- # Increment the launched job index
630- num_run = n_r
631-
632623 # When managing time, we need to take into account the allocated time resources,
633624 # which are dependent on the number of cores. 'dask_futures' is a proxy to the number
634625 # of workers /n_jobs that we have, in that if there are 4 cores allocated, we can run at most
@@ -691,7 +682,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
691682 self .run_history .update (run_history , DataOrigin .EXTERNAL_SAME_INSTANCES )
692683 run_history .save_json (os .path .join (self ._backend .internals_directory , 'traditional_run_history.json' ),
693684 save_external = True )
694- return num_run
685+ return
695686
696687 def _search (
697688 self ,
@@ -861,10 +852,9 @@ def _search(
861852 )
862853
863854 # ============> Run dummy predictions
864- num_run = 1
865855 dummy_task_name = 'runDummy'
866856 self ._stopwatch .start_task (dummy_task_name )
867- self ._do_dummy_prediction (num_run )
857+ self ._do_dummy_prediction ()
868858 self ._stopwatch .stop_task (dummy_task_name )
869859
870860 # ============> Run traditional ml
@@ -880,8 +870,8 @@ def _search(
880870 time_for_traditional = int (
881871 self ._time_for_task - elapsed_time - func_eval_time_limit_secs
882872 )
883- num_run = self ._do_traditional_prediction (
884- num_run = num_run + 1 , func_eval_time_limit_secs = func_eval_time_limit_secs ,
873+ self ._do_traditional_prediction (
874+ func_eval_time_limit_secs = func_eval_time_limit_secs ,
885875 time_left = time_for_traditional ,
886876 )
887877 self ._stopwatch .stop_task (traditional_task_name )
@@ -957,7 +947,9 @@ def _search(
957947 pipeline_config = {** self .pipeline_options , ** budget_config },
958948 ensemble_callback = proc_ensemble ,
959949 logger_port = self ._logger_port ,
960- start_num_run = num_run ,
950+ # We do not increase the num_run here, this is something
951+ # smac does internally
952+ start_num_run = self ._backend .get_next_num_run (peek = True ),
961953 search_space_updates = self .search_space_updates
962954 )
963955 try :
@@ -1063,7 +1055,7 @@ def refit(
10631055 'train_indices' : dataset .splits [split_id ][0 ],
10641056 'val_indices' : dataset .splits [split_id ][1 ],
10651057 'split_id' : split_id ,
1066- 'num_run' : 0
1058+ 'num_run' : self . _backend . get_next_num_run (),
10671059 })
10681060 X .update ({** self .pipeline_options , ** budget_config })
10691061 if self .models_ is None or len (self .models_ ) == 0 or self .ensemble_ is None :
@@ -1140,7 +1132,7 @@ def fit(self,
11401132 'train_indices' : dataset .splits [split_id ][0 ],
11411133 'val_indices' : dataset .splits [split_id ][1 ],
11421134 'split_id' : split_id ,
1143- 'num_run' : 0
1135+ 'num_run' : self . _backend . get_next_num_run (),
11441136 })
11451137 X .update ({** self .pipeline_options , ** budget_config })
11461138
0 commit comments