@@ -1550,7 +1550,7 @@ def _aggregate_series_fast(self, obj, func):
15501550
15511551 # avoids object / Series creation overhead
15521552 dummy = obj ._get_values (slice (None , 0 )).to_dense ()
1553- indexer = _algos . groupsort_indexer (group_index , ngroups )[ 0 ]
1553+ indexer = _get_group_index_sorter (group_index , ngroups )
15541554 obj = obj .take (indexer , convert = False )
15551555 group_index = com .take_nd (group_index , indexer , allow_fill = False )
15561556 grouper = lib .SeriesGrouper (obj , func , group_index , ngroups ,
@@ -3262,7 +3262,7 @@ def slabels(self):
32623262 @cache_readonly
32633263 def sort_idx (self ):
32643264 # Counting sort indexer
3265- return _algos . groupsort_indexer (self .labels , self .ngroups )[ 0 ]
3265+ return _get_group_index_sorter (self .labels , self .ngroups )
32663266
32673267 def __iter__ (self ):
32683268 sdata = self ._get_sorted_data ()
@@ -3534,23 +3534,39 @@ def get_key(self, comp_id):
35343534
35353535
35363536def _get_indices_dict (label_list , keys ):
3537- shape = [len (x ) for x in keys ]
3538- group_index = get_group_index (label_list , shape )
3539-
3540- sorter , _ = _algos .groupsort_indexer (com ._ensure_int64 (group_index ),
3541- np .prod (shape ))
3537+ shape = list (map (len , keys ))
3538+ ngroups = np .prod (shape )
35423539
3543- sorter_int = com ._ensure_platform_int (sorter )
3540+ group_index = get_group_index (label_list , shape )
3541+ sorter = _get_group_index_sorter (group_index , ngroups )
35443542
3545- sorted_labels = [lab .take (sorter_int ) for lab in label_list ]
3546- group_index = group_index .take (sorter_int )
3543+ sorted_labels = [lab .take (sorter ) for lab in label_list ]
3544+ group_index = group_index .take (sorter )
35473545
35483546 return lib .indices_fast (sorter , group_index , keys , sorted_labels )
35493547
35503548
35513549#----------------------------------------------------------------------
35523550# sorting levels...cleverly?
35533551
3552+ def _get_group_index_sorter (group_index , ngroups ):
3553+ '''
3554+ _algos.groupsort_indexer is at least O(ngroups), where
3555+ ngroups = prod(shape)
3556+ shape = map(len, keys)
3557+ that is, linear in the number of combinations (cartesian product) of unique
3558+ values of groupby keys. This can be huge when doing multi-key groupby.
3559+ np.argsort is (persumambly) O(count x log(count)) where count is the length
3560+ of the data-frame;
3561+ '''
3562+ count = len (group_index )
3563+ if ngroups < count * np .log (count ): # taking complexities literally
3564+ sorter , _ = _algos .groupsort_indexer (com ._ensure_int64 (group_index ),
3565+ ngroups )
3566+ return com ._ensure_platform_int (sorter )
3567+ else :
3568+ return group_index .argsort ()
3569+
35543570
35553571def _compress_group_index (group_index , sort = True ):
35563572 """
0 commit comments