@@ -1559,7 +1559,7 @@ def _aggregate_series_fast(self, obj, func):
15591559
15601560 # avoids object / Series creation overhead
15611561 dummy = obj ._get_values (slice (None , 0 )).to_dense ()
1562- indexer = _algos . groupsort_indexer (group_index , ngroups )[ 0 ]
1562+ indexer = _get_group_index_sorter (group_index , ngroups )
15631563 obj = obj .take (indexer , convert = False )
15641564 group_index = com .take_nd (group_index , indexer , allow_fill = False )
15651565 grouper = lib .SeriesGrouper (obj , func , group_index , ngroups ,
@@ -3271,7 +3271,7 @@ def slabels(self):
32713271 @cache_readonly
32723272 def sort_idx (self ):
32733273 # Counting sort indexer
3274- return _algos . groupsort_indexer (self .labels , self .ngroups )[ 0 ]
3274+ return _get_group_index_sorter (self .labels , self .ngroups )
32753275
32763276 def __iter__ (self ):
32773277 sdata = self ._get_sorted_data ()
@@ -3543,23 +3543,39 @@ def get_key(self, comp_id):
35433543
35443544
35453545def _get_indices_dict (label_list , keys ):
3546- shape = [len (x ) for x in keys ]
3547- group_index = get_group_index (label_list , shape )
3548-
3549- sorter , _ = _algos .groupsort_indexer (com ._ensure_int64 (group_index ),
3550- np .prod (shape ))
3546+ shape = list (map (len , keys ))
3547+ ngroups = np .prod (shape )
35513548
3552- sorter_int = com ._ensure_platform_int (sorter )
3549+ group_index = get_group_index (label_list , shape )
3550+ sorter = _get_group_index_sorter (group_index , ngroups )
35533551
3554- sorted_labels = [lab .take (sorter_int ) for lab in label_list ]
3555- group_index = group_index .take (sorter_int )
3552+ sorted_labels = [lab .take (sorter ) for lab in label_list ]
3553+ group_index = group_index .take (sorter )
35563554
35573555 return lib .indices_fast (sorter , group_index , keys , sorted_labels )
35583556
35593557
35603558#----------------------------------------------------------------------
35613559# sorting levels...cleverly?
35623560
3561+ def _get_group_index_sorter (group_index , ngroups ):
3562+ """
3563+ _algos.groupsort_indexer is at least O(ngroups), where
3564+ ngroups = prod(shape)
3565+ shape = map(len, keys)
3566+ that is, linear in the number of combinations (cartesian product) of unique
3567+ values of groupby keys. This can be huge when doing multi-key groupby.
3568+ np.argsort is O(count)^2 when using quicksort (the default) where count is the length
3569+ of the data-frame;
3570+ """
3571+ count = len (group_index )
3572+ if ngroups < count * np .log (count ): # taking complexities literally
3573+ sorter , _ = _algos .groupsort_indexer (com ._ensure_int64 (group_index ),
3574+ ngroups )
3575+ return com ._ensure_platform_int (sorter )
3576+ else :
3577+ return group_index .argsort ()
3578+
35633579
35643580def _compress_group_index (group_index , sort = True ):
35653581 """
0 commit comments