grafana
diff --git a/‎docs/sources/operators-guide/configure/reference-configuration-parameters/index.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/sources/operators-guide/configure/reference-configuration-parameters/index.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pkg/ingester/ingester.go‎
Lines changed: 19 additions & 5 deletions b/‎pkg/ingester/ingester.go‎
Lines changed: 19 additions & 5 deletions
diff --git a/‎pkg/ingester/ingester_test.go‎
Lines changed: 7 additions & 6 deletions b/‎pkg/ingester/ingester_test.go‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎pkg/ingester/instance.go‎
Lines changed: 6 additions & 4 deletions b/‎pkg/ingester/instance.go‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎pkg/ingester/limiter.go‎
Lines changed: 193 additions & 0 deletions b/‎pkg/ingester/limiter.go‎
Lines changed: 193 additions & 0 deletions
@@ -193,16 +193,16 @@ limits:
 
   # Maximum number of active series of profiles per tenant, per ingester. 0 to
   # disable.
-  # CLI flag: -ingester.max-series-per-tenant
-  [max_series_per_tenant: <int> | default = 0]
+  # CLI flag: -ingester.max-local-series-per-tenant
+  [max_local_series_per_tenant: <int> | default = 0]
 
   # Maximum number of active series of profiles per tenant, across the cluster.
   # 0 to disable. When the global limit is enabled, each ingester is configured
   # with a dynamic local limit based on the replication factor and the current
   # number of healthy ingesters, and is kept updated whenever the number of
   # ingesters change.
   # CLI flag: -ingester.max-global-series-per-tenant
-  [max_global_series_per_user: <int> | default = 5000]
+  [max_global_series_per_tenant: <int> | default = 5000]
 
   # Limit how far back in profiling data can be queried, up until lookback
   # duration ago. This limit is enforced in the query frontend. If the requested
 
@@ -24,6 +24,7 @@ import (
 	"github.com/grafana/phlare/pkg/tenant"
 	"github.com/grafana/phlare/pkg/usagestats"
 	"github.com/grafana/phlare/pkg/util"
+	"github.com/grafana/phlare/pkg/validation"
 )
 
 var activeTenantsStats = usagestats.NewInt("ingester_active_tenants")
@@ -57,7 +58,8 @@ type Ingester struct {
 	instances    map[string]*instance
 	instancesMtx sync.RWMutex
 
-	reg prometheus.Registerer
+	limits Limits
+	reg    prometheus.Registerer
 }
 
 type ingesterFlusherCompat struct {
@@ -71,7 +73,7 @@ func (i *ingesterFlusherCompat) Flush() {
 	}
 }
 
-func New(phlarectx context.Context, cfg Config, dbConfig phlaredb.Config, storageBucket phlareobjstore.Bucket) (*Ingester, error) {
+func New(phlarectx context.Context, cfg Config, dbConfig phlaredb.Config, storageBucket phlareobjstore.Bucket, limits Limits) (*Ingester, error) {
 	i := &Ingester{
 		cfg:           cfg,
 		phlarectx:     phlarectx,
@@ -80,6 +82,7 @@ func New(phlarectx context.Context, cfg Config, dbConfig phlaredb.Config, storag
 		instances:     map[string]*instance{},
 		dbConfig:      dbConfig,
 		storageBucket: storageBucket,
+		limits:        limits,
 	}
 
 	var err error
@@ -135,7 +138,8 @@ func (i *Ingester) GetOrCreateInstance(tenantID string) (*instance, error) { //n
 	inst, ok = i.instances[tenantID]
 	if !ok {
 		var err error
-		inst, err = newInstance(i.phlarectx, i.dbConfig, tenantID, i.storageBucket)
+
+		inst, err = newInstance(i.phlarectx, i.dbConfig, tenantID, i.storageBucket, NewLimiter(tenantID, i.limits, i.lifecycler, i.cfg.LifecyclerConfig.RingConfig.ReplicationFactor))
 		if err != nil {
 			return nil, err
 		}
@@ -184,16 +188,26 @@ func (i *Ingester) Push(ctx context.Context, req *connect.Request[pushv1.PushReq
 		level.Debug(instance.logger).Log("msg", "message received by ingester push")
 		for _, series := range req.Msg.Series {
 			for _, sample := range series.Samples {
-				p, err := pprof.FromBytes(sample.RawProfile)
+				p, size, err := pprof.FromBytes(sample.RawProfile)
 				if err != nil {
 					return nil, err
 				}
-
 				id, err := uuid.Parse(sample.ID)
 				if err != nil {
 					return nil, err
 				}
 				if err := instance.Head().Ingest(ctx, p, id, series.Labels...); err != nil {
+					reason := validation.ReasonOf(err)
+					if reason != validation.Unknown {
+						validation.DiscardedProfiles.WithLabelValues(string(reason), instance.tenantID).Add(float64(1))
+						validation.DiscardedBytes.WithLabelValues(string(reason), instance.tenantID).Add(float64(size))
+						switch validation.ReasonOf(err) {
+						case validation.OutOfOrder:
+							return nil, connect.NewError(connect.CodeInvalidArgument, err)
+						case validation.SeriesLimit:
+							return nil, connect.NewError(connect.CodeResourceExhausted, err)
+						}
+					}
 					return nil, err
 				}
 				p.ReturnToVTPool()
 
@@ -57,12 +57,13 @@ func Test_MultitenantReadWrite(t *testing.T) {
 	reg := prometheus.NewRegistry()
 	ctx := phlarecontext.WithLogger(context.Background(), logger)
 	ctx = phlarecontext.WithRegistry(ctx, reg)
-	cfg := client.Config{StorageBackendConfig: client.StorageBackendConfig{
-		Backend: client.Filesystem,
-		Filesystem: filesystem.Config{
-			Directory: dbPath,
+	cfg := client.Config{
+		StorageBackendConfig: client.StorageBackendConfig{
+			Backend: client.Filesystem,
+			Filesystem: filesystem.Config{
+				Directory: dbPath,
+			},
 		},
-	},
 	}
 
 	fs, err := client.NewBucket(ctx, cfg, "storage")
@@ -71,7 +72,7 @@ func Test_MultitenantReadWrite(t *testing.T) {
 	ing, err := New(ctx, defaultIngesterTestConfig(t), phlaredb.Config{
 		DataPath:         dbPath,
 		MaxBlockDuration: 30 * time.Hour,
-	}, fs)
+	}, fs, &fakeLimits{})
 	require.NoError(t, err)
 	require.NoError(t, services.StartAndAwaitRunning(context.Background(), ing))
 
 
@@ -24,15 +24,16 @@ type instance struct {
 	logger      log.Logger
 	reg         prometheus.Registerer
 
-	cancel context.CancelFunc
-	wg     sync.WaitGroup
+	cancel   context.CancelFunc
+	wg       sync.WaitGroup
+	tenantID string
 }
 
-func newInstance(phlarectx context.Context, cfg phlaredb.Config, tenantID string, storageBucket phlareobjstore.Bucket) (*instance, error) {
+func newInstance(phlarectx context.Context, cfg phlaredb.Config, tenantID string, storageBucket phlareobjstore.Bucket, limiter Limiter) (*instance, error) {
 	cfg.DataPath = path.Join(cfg.DataPath, tenantID)
 
 	phlarectx = phlarecontext.WrapTenant(phlarectx, tenantID)
-	db, err := phlaredb.New(phlarectx, cfg)
+	db, err := phlaredb.New(phlarectx, cfg, limiter)
 	if err != nil {
 		return nil, err
 	}
@@ -42,6 +43,7 @@ func newInstance(phlarectx context.Context, cfg phlaredb.Config, tenantID string
 		logger:   phlarecontext.Logger(phlarectx),
 		reg:      phlarecontext.Registry(phlarectx),
 		cancel:   cancel,
+		tenantID: tenantID,
 	}
 	if storageBucket != nil {
 		inst.shipper = shipper.New(
 
@@ -0,0 +1,193 @@
+package ingester
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	phlaremodel "github.com/grafana/phlare/pkg/model"
+	"github.com/grafana/phlare/pkg/validation"
+)
+
+var (
+	activeSeriesTimeout = 10 * time.Minute
+	activeSeriesCleanup = time.Minute
+)
+
+type RingCount interface {
+	HealthyInstancesCount() int
+}
+
+type Limits interface {
+	MaxLocalSeriesPerTenant(tenantID string) int
+	MaxGlobalSeriesPerTenant(tenantID string) int
+}
+
+type Limiter interface {
+	// AllowProfile returns an error if the profile is not allowed to be ingested.
+	// The error is a validation error and can be out of order or max series limit reached.
+	AllowProfile(fp model.Fingerprint, lbs phlaremodel.Labels, tsNano int64) error
+	Stop()
+}
+
+type limiter struct {
+	limits            Limits
+	ring              RingCount
+	replicationFactor int
+	tenantID          string
+
+	activeSeries  map[model.Fingerprint]int64
+	lastTimestamp map[model.Fingerprint]int64
+
+	mtx sync.Mutex // todo: may be shard the lock to avoid latency spikes.
+
+	ctx    context.Context
+	cancel context.CancelFunc
+	wg     sync.WaitGroup
+}
+
+func NewLimiter(tenantID string, limits Limits, ring RingCount, replicationFactor int) Limiter {
+	ctx, cancel := context.WithCancel(context.Background())
+
+	l := &limiter{
+		tenantID:          tenantID,
+		limits:            limits,
+		ring:              ring,
+		replicationFactor: replicationFactor,
+		activeSeries:      map[model.Fingerprint]int64{},
+		lastTimestamp:     map[model.Fingerprint]int64{},
+		cancel:            cancel,
+		ctx:               ctx,
+	}
+
+	l.wg.Add(1)
+	go l.loop()
+
+	return l
+}
+
+func (l *limiter) Stop() {
+	l.cancel()
+	l.wg.Wait()
+}
+
+func (l *limiter) loop() {
+	defer l.wg.Done()
+
+	ticker := time.NewTicker(activeSeriesCleanup)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			l.cleanup()
+		case <-l.ctx.Done():
+			return
+		}
+	}
+}
+
+// cleanup removes the series that have not been used for a while.
+func (l *limiter) cleanup() {
+	now := time.Now().UnixNano()
+	l.mtx.Lock()
+	defer l.mtx.Unlock()
+
+	for fp, lastUsed := range l.activeSeries {
+		if now-lastUsed > int64(activeSeriesTimeout) {
+			delete(l.activeSeries, fp)
+		}
+	}
+}
+
+func (l *limiter) AllowProfile(fp model.Fingerprint, lbs phlaremodel.Labels, tsNano int64) error {
+	l.mtx.Lock()
+	defer l.mtx.Unlock()
+	if err := l.allowNewProfile(fp, lbs, tsNano); err != nil {
+		return err
+	}
+	return l.allowNewSeries(fp)
+}
+
+func (l *limiter) allowNewProfile(fp model.Fingerprint, lbs phlaremodel.Labels, tsNano int64) error {
+	max, ok := l.lastTimestamp[fp]
+	if ok {
+		// profile is before the last timestamp
+		if tsNano < max {
+			return validation.NewErrorf(validation.OutOfOrder, "profile for series %s out of order (received %s last %s)", phlaremodel.LabelPairsString(lbs), time.Unix(0, tsNano), time.Unix(0, max))
+		}
+	}
+
+	// set the last timestamp
+	l.lastTimestamp[fp] = tsNano
+	return nil
+}
+
+func (l *limiter) allowNewSeries(fp model.Fingerprint) error {
+	_, ok := l.activeSeries[fp]
+	series := len(l.activeSeries)
+	if !ok {
+		// can this series be added?
+		if err := l.assertMaxSeriesPerUser(l.tenantID, series); err != nil {
+			return err
+		}
+	}
+
+	// update time or add it
+	l.activeSeries[fp] = time.Now().UnixNano()
+	return nil
+}
+
+func (l *limiter) assertMaxSeriesPerUser(tenantID string, series int) error {
+	// Start by setting the local limit either from override or default
+	localLimit := l.limits.MaxLocalSeriesPerTenant(tenantID)
+
+	// We can assume that series are evenly distributed across ingesters
+	// so we do convert the global limit into a local limit
+	globalLimit := l.limits.MaxGlobalSeriesPerTenant(tenantID)
+	adjustedGlobalLimit := convertGlobalToLocalLimit(globalLimit, l.ring, l.replicationFactor)
+
+	// Set the calculated limit to the lesser of the local limit or the new calculated global limit
+	calculatedLimit := minNonZero(localLimit, adjustedGlobalLimit)
+
+	// If both the local and global limits are disabled, we just
+	// use the largest int value
+	if calculatedLimit == 0 {
+		return nil
+	}
+
+	if series < calculatedLimit {
+		return nil
+	}
+	return validation.NewErrorf(validation.SeriesLimit, validation.SeriesLimitErrorMsg, series, calculatedLimit)
+}
+
+func convertGlobalToLocalLimit(globalLimit int, ringCount RingCount, replicationFactor int) int {
+	if globalLimit == 0 {
+		return 0
+	}
+
+	// Given we don't need a super accurate count (ie. when the ingesters
+	// topology changes) and we prefer to always be in favor of the tenant,
+	// we can use a per-ingester limit equal to:
+	// (global limit / number of ingesters) * replication factor
+	numIngesters := ringCount.HealthyInstancesCount()
+
+	// May happen because the number of ingesters is asynchronously updated.
+	// If happens, we just temporarily ignore the global limit.
+	if numIngesters > 0 {
+		return int((float64(globalLimit) / float64(numIngesters)) * float64(replicationFactor))
+	}
+
+	return 0
+}
+
+func minNonZero(first, second int) int {
+	if first == 0 || (second != 0 && first > second) {
+		return second
+	}
+
+	return first
+}