Skip to content

Commit 740ef90

Browse files
authored
ESQL: Add exponential_histogram support to AVG and SUM (#138177)
1 parent 5679795 commit 740ef90

File tree

9 files changed

+184
-50
lines changed

9 files changed

+184
-50
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/exponential_histogram.csv-spec

Lines changed: 102 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,52 +17,54 @@ dummy-zero_threshold_only | "{""scale"":0,""sum"":0.0,""zero"":{""threshold"":2.
1717

1818

1919
allAggsGrouped
20-
required_capability: exponential_histogram_minmax_support
20+
required_capability: exponential_histogram_sum_avg_support
2121

2222
FROM exp_histo_sample
2323
| EVAL instance = CASE(STARTS_WITH(instance, "dummy"), "dummy-grouped", instance)
24-
| STATS min = MIN(responseTime), max = MAX(responseTime), p75 = PERCENTILE(responseTime,75) BY instance
24+
| STATS min = MIN(responseTime), max = MAX(responseTime), p75 = PERCENTILE(responseTime,75), sum = SUM(responseTime), avg = AVG(responseTime) BY instance
2525
| EVAL p75 = ROUND(p75, 7) // rounding to avoid floating point precision issues
26-
| KEEP instance, min, max, p75
26+
| KEEP instance, min, max, p75, sum, avg
2727
| SORT instance
2828
;
2929

30-
instance:keyword | min:double | max:double | p75:double
31-
dummy-grouped | -100.0 | 50.0 | 8.3457089
32-
instance-0 | 2.4E-4 | 6.786232 | 0.2608237
33-
instance-1 | 2.17E-4 | 3.190723 | 0.0016068
34-
instance-2 | 2.2E-4 | 2.744054 | 0.0016068
30+
instance:keyword | min:double | max:double | p75:double | sum:double | avg:double
31+
dummy-grouped | -100.0 | 50.0 | 8.3457089 | -7550.0 | -15.0398406374502
32+
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
33+
instance-1 | 2.17E-4 | 3.190723 | 0.0016068 | 36.198484 | 0.011137995076923077
34+
instance-2 | 2.2E-4 | 2.744054 | 0.0016068 | 27.706021000000003 | 0.008197047633136096
3535
;
3636

3737

3838

3939
allAggsInlineGrouped
40-
required_capability: exponential_histogram_minmax_support
40+
required_capability: exponential_histogram_sum_avg_support
4141

4242
FROM exp_histo_sample
43-
| INLINE STATS min = MIN(responseTime), max = MAX(responseTime), p75 = PERCENTILE(responseTime,75) BY instance
43+
| INLINE STATS min = MIN(responseTime), max = MAX(responseTime), p75 = PERCENTILE(responseTime,75), sum = SUM(responseTime), avg = AVG(responseTime) BY instance
4444
| EVAL p75 = ROUND(p75, 7) // rounding to avoid floating point precision issues
45-
| KEEP instance, min, max, p75
45+
| KEEP instance, min, max, p75, sum , avg
4646
| SORT instance
4747
| Limit 15
4848
;
49+
warningRegex: Line 2:132: evaluation of \[AVG\(responseTime\)\] failed, treating result as null. Only first 20 failures recorded
50+
warningRegex: Line 2:132: java.lang.ArithmeticException: / by zero
4951

50-
instance:keyword | min:double | max:double | p75:double
51-
dummy-empty | null | null | null
52-
dummy-full | -100.0 | 50.0 | 10.6666667
53-
dummy-negative_only | -50.0 | -1.0 | -12.8729318
54-
dummy-no_zero_bucket | -100.0 | 50.0 | 10.6666667
55-
dummy-positive_only | 1.0 | 50.0 | 34.7656715
56-
dummy-zero_count_only | 0.0 | 0.0 | 0.0
57-
dummy-zero_threshold_only | null | null | null
58-
instance-0 | 2.4E-4 | 6.786232 | 0.2608237
59-
instance-0 | 2.4E-4 | 6.786232 | 0.2608237
60-
instance-0 | 2.4E-4 | 6.786232 | 0.2608237
61-
instance-0 | 2.4E-4 | 6.786232 | 0.2608237
62-
instance-0 | 2.4E-4 | 6.786232 | 0.2608237
63-
instance-0 | 2.4E-4 | 6.786232 | 0.2608237
64-
instance-0 | 2.4E-4 | 6.786232 | 0.2608237
65-
instance-0 | 2.4E-4 | 6.786232 | 0.2608237
52+
instance:keyword | min:double | max:double | p75:double | sum:double | avg:double
53+
dummy-empty | null | null | null | 0.0 | null
54+
dummy-full | -100.0 | 50.0 | 10.6666667 | -3775.0 | -25.0
55+
dummy-negative_only | -50.0 | -1.0 | -12.8729318 | -1275.0 | -25.5
56+
dummy-no_zero_bucket | -100.0 | 50.0 | 10.6666667 | -3775.0 | -25.166666666666668
57+
dummy-positive_only | 1.0 | 50.0 | 34.7656715 | 1275.0 | 25.5
58+
dummy-zero_count_only | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
59+
dummy-zero_threshold_only | null | null | null | 0.0 | null
60+
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
61+
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
62+
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
63+
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
64+
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
65+
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
66+
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
67+
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
6668
;
6769

6870

@@ -170,3 +172,76 @@ FROM exp_histo_sample | WHERE instance == "dummy-empty"
170172
min:double | max:double
171173
NULL | NULL
172174
;
175+
176+
177+
ungroupedAvg
178+
required_capability: exponential_histogram_sum_avg_support
179+
180+
FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy")
181+
| STATS avg = AVG(responseTime)
182+
| KEEP avg
183+
;
184+
185+
avg:double
186+
0.09932445956951716
187+
;
188+
189+
190+
groupedAvg
191+
required_capability: exponential_histogram_sum_avg_support
192+
193+
FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy")
194+
| STATS avg = AVG(responseTime) BY instance
195+
| KEEP instance, avg
196+
| SORT instance
197+
;
198+
199+
instance:keyword | avg:double
200+
instance-0 | 0.1665811796176903
201+
instance-1 | 0.011137995076923077
202+
instance-2 | 0.008197047633136096
203+
;
204+
205+
206+
avgOnEmptyHistogram
207+
required_capability: exponential_histogram_sum_avg_support
208+
209+
FROM exp_histo_sample | WHERE instance == "dummy-empty"
210+
| STATS avg = AVG(responseTime)
211+
| KEEP avg //TODO handle without triggering warnings, but this currently triggers a planner issue
212+
;
213+
warningRegex: Line 2:16: evaluation of \[AVG\(responseTime\)\] failed, treating result as null. Only first 20 failures recorded
214+
warningRegex: Line 2:16: java.lang.ArithmeticException: / by zero
215+
216+
avg:double
217+
NULL
218+
;
219+
220+
221+
ungroupedSum
222+
required_capability: exponential_histogram_sum_avg_support
223+
224+
FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy")
225+
| STATS sum = SUM(responseTime)
226+
| KEEP sum
227+
;
228+
229+
sum:double
230+
1536.648714
231+
;
232+
233+
234+
groupedSum
235+
required_capability: exponential_histogram_sum_avg_support
236+
237+
FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy")
238+
| STATS sum = SUM(responseTime) BY instance
239+
| KEEP instance, sum
240+
| SORT instance
241+
;
242+
243+
instance:keyword | sum:double
244+
instance-0 | 1472.744209
245+
instance-1 | 36.198484
246+
instance-2 | 27.706021000000003
247+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,11 @@ public enum Cap {
15731573
*/
15741574
EXPONENTIAL_HISTOGRAM_MINMAX_SUPPORT(EXPONENTIAL_HISTOGRAM_FEATURE_FLAG),
15751575

1576+
/**
1577+
* Support for exponential_histogram type in SUM and AVG aggregation.
1578+
*/
1579+
EXPONENTIAL_HISTOGRAM_SUM_AVG_SUPPORT(EXPONENTIAL_HISTOGRAM_FEATURE_FLAG),
1580+
15761581
/**
15771582
* Create new block when filtering OrdinalBytesRefBlock
15781583
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1111
import org.elasticsearch.common.io.stream.StreamInput;
12+
import org.elasticsearch.compute.data.ExponentialHistogramBlock;
1213
import org.elasticsearch.xpack.esql.core.expression.Expression;
1314
import org.elasticsearch.xpack.esql.core.expression.Literal;
1415
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
@@ -19,6 +20,7 @@
1920
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
2021
import org.elasticsearch.xpack.esql.expression.function.FunctionType;
2122
import org.elasticsearch.xpack.esql.expression.function.Param;
23+
import org.elasticsearch.xpack.esql.expression.function.scalar.histogram.ExtractHistogramComponent;
2224
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvAvg;
2325
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div;
2426
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
@@ -29,6 +31,7 @@
2931
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
3032
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
3133
import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE;
34+
import static org.elasticsearch.xpack.esql.core.type.DataType.EXPONENTIAL_HISTOGRAM;
3235

3336
public class Avg extends AggregateFunction implements SurrogateExpression {
3437
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Avg", Avg::new);
@@ -52,7 +55,7 @@ public Avg(
5255
Source source,
5356
@Param(
5457
name = "number",
55-
type = { "aggregate_metric_double", "double", "integer", "long" },
58+
type = { "aggregate_metric_double", "exponential_histogram", "double", "integer", "long" },
5659
description = "Expression that outputs values to average."
5760
) Expression field
5861
) {
@@ -72,10 +75,10 @@ public Expression summationMode() {
7275
protected Expression.TypeResolution resolveType() {
7376
return isType(
7477
field(),
75-
dt -> (dt.isNumeric() && dt != DataType.UNSIGNED_LONG) || dt == AGGREGATE_METRIC_DOUBLE,
78+
dt -> (dt.isNumeric() && dt != DataType.UNSIGNED_LONG) || dt == AGGREGATE_METRIC_DOUBLE || dt == EXPONENTIAL_HISTOGRAM,
7679
sourceText(),
7780
DEFAULT,
78-
"aggregate_metric_double or numeric except unsigned_long or counter types"
81+
"aggregate_metric_double, exponential_histogram or numeric except unsigned_long or counter types"
7982
);
8083
}
8184

@@ -130,6 +133,18 @@ public Expression surrogate() {
130133
new Count(s, field, filter(), window()).surrogate()
131134
);
132135
}
136+
if (field.dataType() == EXPONENTIAL_HISTOGRAM) {
137+
Sum valuesSum = new Sum(s, field, filter(), window(), summationMode);
138+
Sum totalCount = new Sum(
139+
s,
140+
ExtractHistogramComponent.create(s, field, ExponentialHistogramBlock.Component.COUNT),
141+
filter(),
142+
window(),
143+
summationMode
144+
);
145+
// TODO handle empty histograms ( => 0.0 / 0.0) gracefully without triggering a warning
146+
return new Div(s, valuesSum, totalCount);
147+
}
133148
if (field.foldable()) {
134149
return new MvAvg(s, field);
135150
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.compute.aggregation.SumIntAggregatorFunctionSupplier;
1515
import org.elasticsearch.compute.aggregation.SumLongAggregatorFunctionSupplier;
1616
import org.elasticsearch.compute.data.AggregateMetricDoubleBlockBuilder;
17+
import org.elasticsearch.compute.data.ExponentialHistogramBlock;
1718
import org.elasticsearch.xpack.esql.core.expression.Expression;
1819
import org.elasticsearch.xpack.esql.core.expression.Literal;
1920
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
@@ -27,6 +28,7 @@
2728
import org.elasticsearch.xpack.esql.expression.function.FunctionType;
2829
import org.elasticsearch.xpack.esql.expression.function.Param;
2930
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FromAggregateMetricDouble;
31+
import org.elasticsearch.xpack.esql.expression.function.scalar.histogram.ExtractHistogramComponent;
3032
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSum;
3133
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
3234
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
@@ -38,6 +40,7 @@
3840
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
3941
import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE;
4042
import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE;
43+
import static org.elasticsearch.xpack.esql.core.type.DataType.EXPONENTIAL_HISTOGRAM;
4144
import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
4245
import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
4346

@@ -63,7 +66,10 @@ public class Sum extends NumericAggregate implements SurrogateExpression {
6366
tag = "docsStatsSumNestedExpression"
6467
) }
6568
)
66-
public Sum(Source source, @Param(name = "number", type = { "aggregate_metric_double", "double", "integer", "long" }) Expression field) {
69+
public Sum(
70+
Source source,
71+
@Param(name = "number", type = { "aggregate_metric_double", "exponential_histogram", "double", "integer", "long" }) Expression field
72+
) {
6773
this(source, field, Literal.TRUE, NO_WINDOW, SummationMode.COMPENSATED_LITERAL);
6874
}
6975

@@ -141,19 +147,24 @@ protected TypeResolution resolveType() {
141147
if (supportsDates()) {
142148
return TypeResolutions.isType(
143149
this,
144-
e -> e == DataType.DATETIME || e == DataType.AGGREGATE_METRIC_DOUBLE || e.isNumeric() && e != DataType.UNSIGNED_LONG,
150+
e -> e == DataType.DATETIME
151+
|| e == DataType.AGGREGATE_METRIC_DOUBLE
152+
|| e == DataType.EXPONENTIAL_HISTOGRAM
153+
|| e.isNumeric() && e != DataType.UNSIGNED_LONG,
145154
sourceText(),
146155
DEFAULT,
147156
"datetime",
148-
"aggregate_metric_double or numeric except unsigned_long or counter types"
157+
"aggregate_metric_double, exponential_histogram or numeric except unsigned_long or counter types"
149158
);
150159
}
151160
return isType(
152161
field(),
153-
dt -> dt == DataType.AGGREGATE_METRIC_DOUBLE || dt.isNumeric() && dt != DataType.UNSIGNED_LONG,
162+
dt -> dt == DataType.AGGREGATE_METRIC_DOUBLE
163+
|| dt == DataType.EXPONENTIAL_HISTOGRAM
164+
|| dt.isNumeric() && dt != DataType.UNSIGNED_LONG,
154165
sourceText(),
155166
DEFAULT,
156-
"aggregate_metric_double or numeric except unsigned_long or counter types"
167+
"aggregate_metric_double, exponential_histogram or numeric except unsigned_long or counter types"
157168
);
158169
}
159170

@@ -170,6 +181,15 @@ public Expression surrogate() {
170181
summationMode
171182
);
172183
}
184+
if (field.dataType() == EXPONENTIAL_HISTOGRAM) {
185+
return new Sum(
186+
s,
187+
ExtractHistogramComponent.create(source(), field, ExponentialHistogramBlock.Component.SUM),
188+
filter(),
189+
window(),
190+
summationMode
191+
);
192+
}
173193

174194
// SUM(const) is equivalent to MV_SUM(const)*COUNT(*).
175195
return field.foldable()

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,7 +2044,8 @@ public void testUnsupportedTypesInStats() {
20442044
| stats avg(x), count_distinct(x), max(x), median(x), median_absolute_deviation(x), min(x), percentile(x, 10), sum(x)
20452045
""", """
20462046
Found 6 problems
2047-
line 2:12: argument of [avg(x)] must be [aggregate_metric_double or numeric except unsigned_long or counter types],\
2047+
line 2:12: argument of [avg(x)] must be [aggregate_metric_double,\
2048+
exponential_histogram or numeric except unsigned_long or counter types],\
20482049
found value [x] type [unsigned_long]
20492050
line 2:20: argument of [count_distinct(x)] must be [any exact type except unsigned_long, _source, or counter types],\
20502051
found value [x] type [unsigned_long]
@@ -2054,23 +2055,26 @@ public void testUnsupportedTypesInStats() {
20542055
found value [x] type [unsigned_long]
20552056
line 2:96: first argument of [percentile(x, 10)] must be [exponential_histogram or numeric except unsigned_long],\
20562057
found value [x] type [unsigned_long]
2057-
line 2:115: argument of [sum(x)] must be [aggregate_metric_double or numeric except unsigned_long or counter types],\
2058+
line 2:115: argument of [sum(x)] must be [aggregate_metric_double,\
2059+
exponential_histogram or numeric except unsigned_long or counter types],\
20582060
found value [x] type [unsigned_long]""");
20592061

20602062
verifyUnsupported("""
20612063
row x = to_version("1.2")
20622064
| stats avg(x), median(x), median_absolute_deviation(x), percentile(x, 10), sum(x)
20632065
""", """
20642066
Found 5 problems
2065-
line 2:10: argument of [avg(x)] must be [aggregate_metric_double or numeric except unsigned_long or counter types],\
2067+
line 2:10: argument of [avg(x)] must be [aggregate_metric_double,\
2068+
exponential_histogram or numeric except unsigned_long or counter types],\
20662069
found value [x] type [version]
20672070
line 2:18: argument of [median(x)] must be [numeric except unsigned_long or counter types],\
20682071
found value [x] type [version]
20692072
line 2:29: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long or counter types],\
20702073
found value [x] type [version]
20712074
line 2:59: first argument of [percentile(x, 10)] must be [exponential_histogram or numeric except unsigned_long],\
20722075
found value [x] type [version]
2073-
line 2:78: argument of [sum(x)] must be [aggregate_metric_double or numeric except unsigned_long or counter types],\
2076+
line 2:78: argument of [sum(x)] must be [aggregate_metric_double,\
2077+
exponential_histogram or numeric except unsigned_long or counter types],\
20742078
found value [x] type [version]""");
20752079
}
20762080

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,8 @@ public void testAggsExpressionsInStatsAggs() {
409409
error("from test | stats max(max(salary)) by first_name")
410410
);
411411
assertEquals(
412-
"1:25: argument of [avg(first_name)] must be [aggregate_metric_double or numeric except unsigned_long or counter types],"
412+
"1:25: argument of [avg(first_name)] must be [aggregate_metric_double,"
413+
+ " exponential_histogram or numeric except unsigned_long or counter types],"
413414
+ " found value [first_name] type [keyword]",
414415
error("from test | stats count(avg(first_name)) by first_name")
415416
);
@@ -837,7 +838,8 @@ public void testUnsignedLongNegation() {
837838

838839
public void testSumOnDate() {
839840
assertEquals(
840-
"1:19: argument of [sum(hire_date)] must be [aggregate_metric_double or numeric except unsigned_long or counter types],"
841+
"1:19: argument of [sum(hire_date)] must be [aggregate_metric_double,"
842+
+ " exponential_histogram or numeric except unsigned_long or counter types],"
841843
+ " found value [hire_date] type [datetime]",
842844
error("from test | stats sum(hire_date)")
843845
);

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AvgErrorTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ protected Matcher<String> expectedTypeErrorMatcher(List<Set<DataType>> validPerP
3737
false,
3838
validPerPosition,
3939
signature,
40-
(v, p) -> "aggregate_metric_double or numeric except unsigned_long or counter types"
40+
(v, p) -> "aggregate_metric_double, exponential_histogram or numeric except unsigned_long or counter types"
4141
)
4242
);
4343
}

0 commit comments

Comments
 (0)