✨ [parallelisation] Define a transformation group (#693)

acabarbaye · web-flow · commit 6325e3ccbc3c · 2025-09-08T13:33:14.000+01:00
&lt;!--
Copyright (C) 2020-2022 Arm Limited or its affiliates and Contributors.
All rights reserved.
SPDX-License-Identifier: Apache-2.0
--&gt;
### Description

- Add a transform group
- reuse groups to define worker pools



### Test Coverage

&lt;!--
Please put an `x` in the correct box e.g. `[x]` to indicate the testing
coverage of this change.
--&gt;

- [x]  This change is covered by existing or additional automated tests.
- [ ] Manual testing has been performed (and evidence provided) as
automated testing was not feasible.
- [ ] Additional tests are not required for this change (e.g.
documentation update).
diff --git a/changes/20250905171217.feature b/changes/20250905171217.feature
@@ -0,0 +1 @@
+:sparkles: `[parallelisation]` Define a transformation group
diff --git a/changes/20250908111211.feature b/changes/20250908111211.feature
@@ -0,0 +1 @@
+:sparkles: `[collection]` Added a `Range` function to populate slices of integers
diff --git a/utils/collection/range.go b/utils/collection/range.go
@@ -0,0 +1,33 @@
+package collection
+
+import "github.com/ARM-software/golang-utils/utils/field"
+
+func sign(x int) int {
+	if x < 0 {
+		return -1
+	}
+	return 1
+}
+
+// Range returns a slice of integers similar to Python's built-in range().
+// https://docs.python.org/2/library/functions.html#range
+//
+//	Note: The stop value is always exclusive.
+func Range(start, stop int, step *int) []int {
+	s := field.OptionalInt(step, 1)
+	if s == 0 {
+		return []int{}
+	}
+
+	// Compute length
+	length := 0
+	if (s > 0 && start < stop) || (s < 0 && start > stop) {
+		length = (stop - start + s - sign(s)) / s
+	}
+
+	result := make([]int, length)
+	for i, v := 0, start; i < length; i, v = i+1, v+s {
+		result[i] = v
+	}
+	return result
+}
diff --git a/utils/collection/range_test.go b/utils/collection/range_test.go
@@ -0,0 +1,40 @@
+package collection
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/ARM-software/golang-utils/utils/field"
+)
+
+func TestRange(t *testing.T) {
+	tests := []struct {
+		start    int
+		stop     int
+		step     *int
+		expected []int
+	}{
+
+		{2, 5, nil, []int{2, 3, 4}},
+		{5, 2, nil, []int{}}, // empty, since stop < start
+		{2, 10, field.ToOptionalInt(2), []int{2, 4, 6, 8}},
+		{0, 10, field.ToOptionalInt(3), []int{0, 3, 6, 9}},
+		{1, 10, field.ToOptionalInt(3), []int{1, 4, 7}},
+		{10, 2, field.ToOptionalInt(-2), []int{10, 8, 6, 4}},
+		{5, -1, field.ToOptionalInt(-1), []int{5, 4, 3, 2, 1, 0}},
+		{0, -5, field.ToOptionalInt(-2), []int{0, -2, -4}},
+		{0, 5, nil, []int{0, 1, 2, 3, 4}},
+		{0, 5, field.ToOptionalInt(0), []int{}},
+		{2, 2, field.ToOptionalInt(1), []int{}},
+		{2, 2, field.ToOptionalInt(-1), []int{}},
+	}
+
+	for i := range tests {
+		test := tests[i]
+		t.Run(fmt.Sprintf("[%v,%v,%v]", test.start, test.stop, test.step), func(t *testing.T) {
+			assert.Equal(t, test.expected, Range(test.start, test.stop, test.step))
+		})
+	}
+}
diff --git a/utils/collection/search.go b/utils/collection/search.go
@@ -71,8 +71,10 @@ func AnyFunc[S ~[]E, E any](s S, f func(E) bool) bool {
 	return conditions.Any()
 }
 
+type FilterFunc[E any] func(E) bool
+
 // Filter returns a new slice that contains elements from the input slice which return true when they’re passed as a parameter to the provided filtering function f.
-func Filter[S ~[]E, E any](s S, f func(E) bool) (result S) {
+func Filter[S ~[]E, E any](s S, f FilterFunc[E]) (result S) {
 	result = make(S, 0, len(s))
 
 	for i := range s {
@@ -84,8 +86,16 @@ func Filter[S ~[]E, E any](s S, f func(E) bool) (result S) {
 	return result
 }
 
+type MapFunc[T1, T2 any] func(T1) T2
+
+func IdentityMapFunc[T any]() MapFunc[T, T] {
+	return func(i T) T {
+		return i
+	}
+}
+
 // Map creates a new slice and populates it with the results of calling the provided function on every element in input slice.
-func Map[T1 any, T2 any](s []T1, f func(T1) T2) (result []T2) {
+func Map[T1 any, T2 any](s []T1, f MapFunc[T1, T2]) (result []T2) {
 	result = make([]T2, len(s))
 
 	for i := range s {
@@ -97,12 +107,14 @@ func Map[T1 any, T2 any](s []T1, f func(T1) T2) (result []T2) {
 
 // Reject is the opposite of Filter and returns the elements of collection for which the filtering function f returns false.
 // This is functionally equivalent to slices.DeleteFunc but it returns a new slice.
-func Reject[S ~[]E, E any](s S, f func(E) bool) S {
+func Reject[S ~[]E, E any](s S, f FilterFunc[E]) S {
 	return Filter(s, func(e E) bool { return !f(e) })
 }
 
+type ReduceFunc[T1, T2 any] func(T2, T1) T2
+
 // Reduce runs a reducer function f over all elements in the array, in ascending-index order, and accumulates them into a single value.
-func Reduce[T1, T2 any](s []T1, accumulator T2, f func(T2, T1) T2) (result T2) {
+func Reduce[T1, T2 any](s []T1, accumulator T2, f ReduceFunc[T1, T2]) (result T2) {
 	result = accumulator
 	for i := range s {
 		result = f(result, s[i])
diff --git a/utils/parallelisation/group.go b/utils/parallelisation/group.go
@@ -224,6 +224,14 @@ type ICompoundExecutionGroup[T any] interface {
 // NewExecutionGroup returns an execution group which executes functions according to store options.
 func NewExecutionGroup[T any](executeFunc ExecuteFunc[T], options ...StoreOption) *ExecutionGroup[T] {
 
+	return NewOrderedExecutionGroup(func(ctx context.Context, index int, element T) error {
+		return executeFunc(ctx, element)
+	}, options...)
+}
+
+// NewOrderedExecutionGroup returns an execution group which executes functions according to store options. It also keeps track of the input index.
+func NewOrderedExecutionGroup[T any](executeFunc OrderedExecuteFunc[T], options ...StoreOption) *ExecutionGroup[T] {
+
 	opts := WithOptions(options...)
 	return &ExecutionGroup[T]{
 		mu:          deadlock.RWMutex{},
@@ -235,10 +243,12 @@ func NewExecutionGroup[T any](executeFunc ExecuteFunc[T], options ...StoreOption
 
 type ExecuteFunc[T any] func(ctx context.Context, element T) error
 
+type OrderedExecuteFunc[T any] func(ctx context.Context, index int, element T) error
+
 type ExecutionGroup[T any] struct {
 	mu          deadlock.RWMutex
 	functions   []wrappedElement[T]
-	executeFunc ExecuteFunc[T]
+	executeFunc OrderedExecuteFunc[T]
 	options     StoreOptions
 }
 
@@ -294,7 +304,7 @@ func (s *ExecutionGroup[T]) executeConcurrently(ctx context.Context, stopOnFirst
 	g.SetLimit(workers)
 	for i := range s.functions {
 		g.Go(func() error {
-			_, subErr := s.executeFunction(gCtx, s.functions[i])
+			_, subErr := s.executeFunction(gCtx, i, s.functions[i])
 			errCh <- subErr
 			return subErr
 		})
@@ -323,7 +333,7 @@ func (s *ExecutionGroup[T]) executeSequentially(ctx context.Context, stopOnFirst
 	collateErr := make([]error, funcNum)
 	if reverse {
 		for i := funcNum - 1; i >= 0; i-- {
-			shouldBreak, subErr := s.executeFunction(ctx, s.functions[i])
+			shouldBreak, subErr := s.executeFunction(ctx, i, s.functions[i])
 			collateErr[funcNum-i-1] = subErr
 			if shouldBreak {
 				err = subErr
@@ -338,7 +348,7 @@ func (s *ExecutionGroup[T]) executeSequentially(ctx context.Context, stopOnFirst
 		}
 	} else {
 		for i := range s.functions {
-			shouldBreak, subErr := s.executeFunction(ctx, s.functions[i])
+			shouldBreak, subErr := s.executeFunction(ctx, i, s.functions[i])
 			collateErr[i] = subErr
 			if shouldBreak {
 				err = subErr
@@ -359,7 +369,7 @@ func (s *ExecutionGroup[T]) executeSequentially(ctx context.Context, stopOnFirst
 	return
 }
 
-func (s *ExecutionGroup[T]) executeFunction(ctx context.Context, w wrappedElement[T]) (mustBreak bool, err error) {
+func (s *ExecutionGroup[T]) executeFunction(ctx context.Context, index int, w wrappedElement[T]) (mustBreak bool, err error) {
 	err = DetermineContextError(ctx)
 	if err != nil {
 		mustBreak = true
@@ -370,7 +380,9 @@ func (s *ExecutionGroup[T]) executeFunction(ctx context.Context, w wrappedElemen
 		mustBreak = true
 		return
 	}
-	err = w.Execute(ctx, s.executeFunc)
+	err = w.Execute(ctx, func(ctx context.Context, element T) error {
+		return s.executeFunc(ctx, index, element)
+	})
 
 	return
 }
diff --git a/utils/parallelisation/parallelisation.go b/utils/parallelisation/parallelisation.go
@@ -12,8 +12,8 @@ import (
 	"time"
 
 	"go.uber.org/atomic"
-	"golang.org/x/sync/errgroup"
 
+	"github.com/ARM-software/golang-utils/utils/collection"
 	"github.com/ARM-software/golang-utils/utils/commonerrors"
 )
 
@@ -265,69 +265,35 @@ func WaitUntil(ctx context.Context, evalCondition func(ctx2 context.Context) (bo
 	}
 }
 
-func newWorker[JobType, ResultType any](ctx context.Context, f func(context.Context, JobType) (ResultType, bool, error), jobs chan JobType, results chan ResultType) (err error) {
-	for job := range jobs {
-		result, ok, subErr := f(ctx, job)
-		if subErr != nil {
-			err = commonerrors.WrapError(commonerrors.ErrUnexpected, subErr, "an error occurred whilst handling a job")
-			return
-		}
-
-		err = DetermineContextError(ctx)
-		if err != nil {
-			return
-		}
-
-		if ok {
-			results <- result
-		}
+// WorkerPool parallelises an action using a worker pool of the size provided by numWorkers and retrieves all the results when all the actions have completed. It is similar to Parallelise but it uses generics instead of reflection and allows you to control the pool size
+func WorkerPool[InputType, ResultType any](ctx context.Context, numWorkers int, jobs []InputType, f TransformFunc[InputType, ResultType]) (results []ResultType, err error) {
+	g, err := workerPoolGroup[InputType, ResultType](ctx, numWorkers, jobs, f)
+	if err != nil {
+		return
 	}
-
+	results, err = g.Outputs(ctx)
 	return
 }
 
-// WorkerPool parallelises an action using a worker pool of the size provided by numWorkers and retrieves all the results when all the actions have completed. It is similar to Parallelise but it uses generics instead of reflection and allows you to control the pool size
-func WorkerPool[InputType, ResultType any](ctx context.Context, numWorkers int, jobs []InputType, f func(context.Context, InputType) (ResultType, bool, error)) (results []ResultType, err error) {
+func workerPoolGroup[I, O any](ctx context.Context, numWorkers int, jobs []I, f TransformFunc[I, O]) (g *TransformGroup[I, O], err error) {
 	if numWorkers < 1 {
 		err = commonerrors.New(commonerrors.ErrInvalid, "numWorkers must be greater than or equal to 1")
 		return
 	}
-
-	numJobs := len(jobs)
-	jobsChan := make(chan InputType, numJobs)
-	resultsChan := make(chan ResultType, numJobs)
-
-	g, gCtx := errgroup.WithContext(ctx)
-	g.SetLimit(numWorkers)
-	for range numWorkers {
-		g.Go(func() error { return newWorker(gCtx, f, jobsChan, resultsChan) })
-	}
-	for i := range jobs {
-		if DetermineContextError(ctx) != nil {
-			break
-		}
-		jobsChan <- jobs[i]
-	}
-
-	close(jobsChan)
-	err = g.Wait()
-	close(resultsChan)
-	if err == nil {
-		err = DetermineContextError(ctx)
-	}
+	g = NewTransformGroup[I, O](f, Workers(numWorkers), JoinErrors)
+	err = g.Inputs(ctx, jobs...)
 	if err != nil {
 		return
 	}
-
-	for result := range resultsChan {
-		results = append(results, result)
+	err = g.Transform(ctx)
+	if err != nil {
+		return
 	}
-
 	return
 }
 
 // Filter is similar to collection.Filter but uses parallelisation.
-func Filter[T any](ctx context.Context, numWorkers int, s []T, f func(T) bool) (result []T, err error) {
+func Filter[T any](ctx context.Context, numWorkers int, s []T, f collection.FilterFunc[T]) (result []T, err error) {
 	result, err = WorkerPool[T, T](ctx, numWorkers, s, func(fCtx context.Context, item T) (r T, ok bool, fErr error) {
 		fErr = DetermineContextError(fCtx)
 		if fErr != nil {
@@ -340,9 +306,8 @@ func Filter[T any](ctx context.Context, numWorkers int, s []T, f func(T) bool) (
 	return
 }
 
-// Map is similar to collection.Map but uses parallelisation.
-func Map[T1 any, T2 any](ctx context.Context, numWorkers int, s []T1, f func(T1) T2) (result []T2, err error) {
-	result, err = WorkerPool[T1, T2](ctx, numWorkers, s, func(fCtx context.Context, item T1) (r T2, ok bool, fErr error) {
+func mapGroup[T1 any, T2 any](ctx context.Context, numWorkers int, s []T1, f collection.MapFunc[T1, T2]) (*TransformGroup[T1, T2], error) {
+	return workerPoolGroup[T1, T2](ctx, numWorkers, s, func(fCtx context.Context, item T1) (r T2, ok bool, fErr error) {
 		fErr = DetermineContextError(fCtx)
 		if fErr != nil {
 			return
@@ -351,10 +316,29 @@ func Map[T1 any, T2 any](ctx context.Context, numWorkers int, s []T1, f func(T1)
 		ok = true
 		return
 	})
+}
+
+// Map is similar to collection.Map but uses parallelisation.
+func Map[T1 any, T2 any](ctx context.Context, numWorkers int, s []T1, f collection.MapFunc[T1, T2]) (result []T2, err error) {
+	g, err := mapGroup[T1, T2](ctx, numWorkers, s, f)
+	if err != nil {
+		return
+	}
+	result, err = g.Outputs(ctx)
+	return
+}
+
+// OrderedMap is similar to Map but ensures the results are in the same order as the input.
+func OrderedMap[T1 any, T2 any](ctx context.Context, numWorkers int, s []T1, f collection.MapFunc[T1, T2]) (result []T2, err error) {
+	g, err := mapGroup[T1, T2](ctx, numWorkers, s, f)
+	if err != nil {
+		return
+	}
+	result, err = g.OrderedOutputs(ctx)
 	return
 }
 
 // Reject is the opposite of Filter and returns the elements of collection for which the filtering function f returns false.
-func Reject[T any](ctx context.Context, numWorkers int, s []T, f func(T) bool) ([]T, error) {
+func Reject[T any](ctx context.Context, numWorkers int, s []T, f collection.FilterFunc[T]) ([]T, error) {
 	return Filter[T](ctx, numWorkers, s, func(e T) bool { return !f(e) })
 }
diff --git a/utils/parallelisation/parallelisation_test.go b/utils/parallelisation/parallelisation_test.go
@@ -19,8 +19,10 @@ import (
 	"go.uber.org/atomic"
 	"go.uber.org/goleak"
 
+	"github.com/ARM-software/golang-utils/utils/collection"
 	"github.com/ARM-software/golang-utils/utils/commonerrors"
 	"github.com/ARM-software/golang-utils/utils/commonerrors/errortest"
+	"github.com/ARM-software/golang-utils/utils/field"
 )
 
 var (
@@ -636,3 +638,35 @@ func TestMap(t *testing.T) {
 		errortest.AssertError(t, err, commonerrors.ErrCancelled)
 	})
 }
+
+func TestMapAndOrderedMap(t *testing.T) {
+	defer goleak.VerifyNone(t)
+	ctx := context.Background()
+	mapped, err := OrderedMap(ctx, 3, []int{1, 2}, func(i int) string {
+		return fmt.Sprintf("Hello world %v", i)
+	})
+	require.NoError(t, err)
+	assert.Equal(t, []string{"Hello world 1", "Hello world 2"}, mapped)
+	mapped, err = OrderedMap(ctx, 3, []int64{1, 2, 3, 4}, func(x int64) string {
+		return strconv.FormatInt(x, 10)
+	})
+	require.NoError(t, err)
+	assert.Equal(t, []string{"1", "2", "3", "4"}, mapped)
+	t.Run("cancelled context", func(t *testing.T) {
+		cancelledCtx, cancel := context.WithCancel(context.Background())
+		cancel()
+		_, err := Map(cancelledCtx, 3, []int{1, 2}, func(i int) string {
+			return fmt.Sprintf("Hello world %v", i)
+		})
+		errortest.AssertError(t, err, commonerrors.ErrCancelled)
+	})
+
+	in := collection.Range(0, 1000, field.ToOptionalInt(5))
+	mappedInt, err := OrderedMap(ctx, 3, in, collection.IdentityMapFunc[int]())
+	require.NoError(t, err)
+	assert.Equal(t, in, mappedInt)
+	mappedInt, err = Map(ctx, 3, in, collection.IdentityMapFunc[int]())
+	require.NoError(t, err)
+	assert.NotEqual(t, in, mappedInt)
+	assert.ElementsMatch(t, in, mappedInt)
+}
diff --git a/utils/parallelisation/transform.go b/utils/parallelisation/transform.go
diff --git a/utils/parallelisation/transform_test.go b/utils/parallelisation/transform_test.go
diff --git a/utils/signing/signing_test.go b/utils/signing/signing_test.go

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+:sparkles: `[parallelisation]` Define a transformation group
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+:sparkles: `[collection]` Added a `Range` function to populate slices of integers