Skip to content

Commit 156fc5d

Browse files
romseygeekKubik42
authored andcommitted
Speed up sorts on secondary sort fields (elastic#137533)
This adds a competitive iterator implementation that will take advantage of doc value skippers in the case that: * the index is sorted by a low cardinality field like hostname, and then by a high cardinality field like timestamp * skippers are enabled on both of these fields * the query is sorted by the high cardinality field. To be able to plug this new implementation into the lucene sort architecture, we need to fork NumericComparator and some associated classes. LongValuesComparatorSource now returns the forked version with the new competitive iterator builder.
1 parent f7e96f0 commit 156fc5d

File tree

21 files changed

+1139
-26
lines changed

21 files changed

+1139
-26
lines changed

docs/changelog/137533.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 137533
2+
summary: Speed up sorts on secondary sort fields
3+
area: Search
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/common/lucene/Lucene.java

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -634,12 +634,8 @@ private static SortField rewriteMergeSortField(SortField sortField) {
634634
SortField newSortField = new SortField(sortField.getField(), SortField.Type.STRING, sortField.getReverse());
635635
newSortField.setMissingValue(sortField.getMissingValue());
636636
return newSortField;
637-
} else if (sortField.getClass() == SortedNumericSortField.class) {
638-
SortField newSortField = new SortField(
639-
sortField.getField(),
640-
((SortedNumericSortField) sortField).getNumericType(),
641-
sortField.getReverse()
642-
);
637+
} else if (sortField instanceof SortedNumericSortField snsf) {
638+
SortField newSortField = new SortField(sortField.getField(), snsf.getNumericType(), sortField.getReverse());
643639
newSortField.setMissingValue(sortField.getMissingValue());
644640
return newSortField;
645641
} else if (sortField.getClass() == ShardDocSortField.class) {
@@ -651,9 +647,6 @@ private static SortField rewriteMergeSortField(SortField sortField) {
651647

652648
static void writeSortField(StreamOutput out, SortField sortField) throws IOException {
653649
sortField = rewriteMergeSortField(sortField);
654-
if (sortField.getClass() != SortField.class) {
655-
throw new IllegalArgumentException("Cannot serialize SortField impl [" + sortField + "]");
656-
}
657650
out.writeOptionalString(sortField.getField());
658651
if (sortField.getComparatorSource() != null) {
659652
IndexFieldData.XFieldComparatorSource comparatorSource = (IndexFieldData.XFieldComparatorSource) sortField

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/DoubleValuesComparatorSource.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning e
8080
final double dMissingValue = (Double) missingObject(missingValue, reversed);
8181
// NOTE: it's important to pass null as a missing value in the constructor so that
8282
// the comparator doesn't check docsWithField since we replace missing values in select()
83-
return new DoubleComparator(numHits, null, null, reversed, Pruning.NONE) {
83+
// TODO we can re-enable pruning here if we allow NumericDoubleValues to expose an iterator
84+
return new DoubleComparator(numHits, fieldname, null, reversed, Pruning.NONE) {
8485
@Override
8586
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
8687
return new DoubleLeafComparator(context) {

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/FloatValuesComparatorSource.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning e
7373
final float fMissingValue = (Float) missingObject(missingValue, reversed);
7474
// NOTE: it's important to pass null as a missing value in the constructor so that
7575
// the comparator doesn't check docsWithField since we replace missing values in select()
76-
return new FloatComparator(numHits, null, null, reversed, Pruning.NONE) {
76+
// TODO we can re-enable pruning here if we allow NumericDoubleValues to expose an iterator
77+
return new FloatComparator(numHits, fieldname, null, reversed, Pruning.NONE) {
7778
@Override
7879
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
7980
return new FloatLeafComparator(context) {

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning e
3939
final float fMissingValue = (Float) missingObject(missingValue, reversed);
4040
// NOTE: it's important to pass null as a missing value in the constructor so that
4141
// the comparator doesn't check docsWithField since we replace missing values in select()
42-
return new HalfFloatComparator(numHits, fieldname, null, reversed, enableSkipping) {
42+
// TODO we can re-enable pruning here if we allow NumericDoubleValues to expose an iterator
43+
return new HalfFloatComparator(numHits, fieldname, null, reversed, Pruning.NONE) {
4344
@Override
4445
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
4546
return new HalfFloatLeafComparator(context) {

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws I
6060
return new IntLeafComparator(context) {
6161
@Override
6262
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
63-
return wrap(getLongValues(context, iMissingValue));
63+
return wrap(getLongValues(context, iMissingValue), context.reader().maxDoc());
6464
}
6565
};
6666
}

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/LongValuesComparatorSource.java

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@
88
*/
99
package org.elasticsearch.index.fielddata.fieldcomparator;
1010

11+
import org.apache.lucene.index.DocValuesSkipper;
1112
import org.apache.lucene.index.LeafReaderContext;
1213
import org.apache.lucene.index.NumericDocValues;
1314
import org.apache.lucene.search.DocIdSetIterator;
1415
import org.apache.lucene.search.FieldComparator;
1516
import org.apache.lucene.search.LeafFieldComparator;
1617
import org.apache.lucene.search.LongValues;
1718
import org.apache.lucene.search.Pruning;
19+
import org.apache.lucene.search.Sort;
1820
import org.apache.lucene.search.SortField;
19-
import org.apache.lucene.search.comparators.LongComparator;
2021
import org.apache.lucene.util.BitSet;
2122
import org.elasticsearch.common.time.DateUtils;
2223
import org.elasticsearch.common.util.BigArrays;
@@ -28,6 +29,8 @@
2829
import org.elasticsearch.index.fielddata.LeafNumericFieldData;
2930
import org.elasticsearch.index.fielddata.SortedNumericLongValues;
3031
import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData;
32+
import org.elasticsearch.lucene.comparators.XLongComparator;
33+
import org.elasticsearch.lucene.comparators.XNumericComparator;
3134
import org.elasticsearch.search.DocValueFormat;
3235
import org.elasticsearch.search.MultiValueMode;
3336
import org.elasticsearch.search.sort.BucketedSort;
@@ -103,13 +106,48 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning e
103106
final long lMissingValue = (Long) missingObject(missingValue, reversed);
104107
// NOTE: it's important to pass null as a missing value in the constructor so that
105108
// the comparator doesn't check docsWithField since we replace missing values in select()
106-
return new LongComparator(numHits, null, null, reversed, Pruning.NONE) {
109+
return new XLongComparator(numHits, fieldname, null, reversed, enableSkipping) {
107110
@Override
108111
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
112+
final int maxDoc = context.reader().maxDoc();
109113
return new LongLeafComparator(context) {
110114
@Override
111115
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
112-
return wrap(getLongValues(context, lMissingValue));
116+
return wrap(getLongValues(context, lMissingValue), maxDoc);
117+
}
118+
119+
@Override
120+
protected XNumericComparator<Long>.CompetitiveDISIBuilder buildCompetitiveDISIBuilder(LeafReaderContext context)
121+
throws IOException {
122+
Sort indexSort = context.reader().getMetaData().sort();
123+
if (indexSort == null) {
124+
return super.buildCompetitiveDISIBuilder(context);
125+
}
126+
SortField[] sortFields = indexSort.getSort();
127+
if (sortFields.length != 2) {
128+
return super.buildCompetitiveDISIBuilder(context);
129+
}
130+
if (sortFields[1].getField().equals(field) == false) {
131+
return super.buildCompetitiveDISIBuilder(context);
132+
}
133+
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
134+
DocValuesSkipper primaryFieldSkipper = context.reader().getDocValuesSkipper(sortFields[0].getField());
135+
if (primaryFieldSkipper == null || skipper.docCount() != maxDoc || primaryFieldSkipper.docCount() != maxDoc) {
136+
return super.buildCompetitiveDISIBuilder(context);
137+
}
138+
return new CompetitiveDISIBuilder(this) {
139+
@Override
140+
protected int docCount() {
141+
return skipper.docCount();
142+
}
143+
144+
@Override
145+
protected void doUpdateCompetitiveIterator() {
146+
competitiveIterator.update(
147+
new SecondarySortIterator(docValues, skipper, primaryFieldSkipper, minValueAsLong, maxValueAsLong)
148+
);
149+
}
150+
};
113151
}
114152
};
115153
}
@@ -163,31 +201,43 @@ public Object missingObject(Object missingValue, boolean reversed) {
163201
return super.missingObject(missingValue, reversed);
164202
}
165203

166-
protected static NumericDocValues wrap(LongValues longValues) {
204+
protected static NumericDocValues wrap(LongValues longValues, int maxDoc) {
167205
return new NumericDocValues() {
206+
207+
int doc = -1;
208+
168209
@Override
169210
public long longValue() throws IOException {
170211
return longValues.longValue();
171212
}
172213

173214
@Override
174215
public boolean advanceExact(int target) throws IOException {
216+
doc = target;
175217
return longValues.advanceExact(target);
176218
}
177219

178220
@Override
179221
public int docID() {
180-
throw new UnsupportedOperationException();
222+
return doc;
181223
}
182224

183225
@Override
184226
public int nextDoc() throws IOException {
185-
throw new UnsupportedOperationException();
227+
return advance(doc + 1);
186228
}
187229

188230
@Override
189231
public int advance(int target) throws IOException {
190-
throw new UnsupportedOperationException();
232+
if (target >= maxDoc) {
233+
return doc = NO_MORE_DOCS;
234+
}
235+
// All documents are guaranteed to have a value, as all invocations of getLongValues
236+
// always return `true` from `advanceExact()`
237+
boolean hasValue = longValues.advanceExact(target);
238+
assert hasValue : "LongValuesComparatorSource#wrap called with a LongValues that has missing values";
239+
doc = target;
240+
return target;
191241
}
192242

193243
@Override
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.fielddata.fieldcomparator;
11+
12+
import org.apache.lucene.index.DocValuesSkipper;
13+
import org.apache.lucene.index.NumericDocValues;
14+
import org.apache.lucene.search.DocIdSetIterator;
15+
16+
import java.io.IOException;
17+
18+
/**
19+
* A competitive DocIdSetIterator that examines the values of a secondary
20+
* sort field and tries to exclude documents with values outside a given
21+
* range, using DocValueSkippers on the primary sort field to advance rapidly
22+
* to the next block of values.
23+
*/
24+
class SecondarySortIterator extends DocIdSetIterator {
25+
26+
final NumericDocValues values;
27+
28+
final DocValuesSkipper valueSkipper;
29+
final DocValuesSkipper primaryFieldSkipper;
30+
final long minValue;
31+
final long maxValue;
32+
33+
int docID = -1;
34+
boolean skipperMatch;
35+
int primaryFieldUpTo = -1;
36+
int valueFieldUpTo = -1;
37+
38+
SecondarySortIterator(
39+
NumericDocValues values,
40+
DocValuesSkipper valueSkipper,
41+
DocValuesSkipper primaryFieldSkipper,
42+
long minValue,
43+
long maxValue
44+
) {
45+
this.values = values;
46+
this.valueSkipper = valueSkipper;
47+
this.primaryFieldSkipper = primaryFieldSkipper;
48+
this.minValue = minValue;
49+
this.maxValue = maxValue;
50+
51+
valueFieldUpTo = valueSkipper.maxDocID(0);
52+
primaryFieldUpTo = primaryFieldSkipper.maxDocID(0);
53+
}
54+
55+
@Override
56+
public int docID() {
57+
return docID;
58+
}
59+
60+
@Override
61+
public int nextDoc() throws IOException {
62+
return advance(docID + 1);
63+
}
64+
65+
@Override
66+
public int advance(int target) throws IOException {
67+
skipperMatch = false;
68+
target = values.advance(target);
69+
if (target == DocIdSetIterator.NO_MORE_DOCS) {
70+
return docID = target;
71+
}
72+
while (true) {
73+
if (target > valueFieldUpTo) {
74+
valueSkipper.advance(target);
75+
valueFieldUpTo = valueSkipper.maxDocID(0);
76+
long minValue = valueSkipper.minValue(0);
77+
long maxValue = valueSkipper.maxValue(0);
78+
if (minValue > this.maxValue || maxValue < this.minValue) {
79+
// outside the desired range, skip forward
80+
for (int level = 1; level < valueSkipper.numLevels(); level++) {
81+
minValue = valueSkipper.minValue(level);
82+
maxValue = valueSkipper.maxValue(level);
83+
if (minValue > this.maxValue || maxValue < this.minValue) {
84+
valueFieldUpTo = valueSkipper.maxDocID(level);
85+
} else {
86+
break;
87+
}
88+
}
89+
90+
int upTo = valueFieldUpTo;
91+
if (maxValue < this.minValue) {
92+
// We've moved past the end of the valid values in the secondary sort field
93+
// for this primary value. Advance the primary skipper to find the starting point
94+
// for the next primary value, where the secondary field values will have reset
95+
primaryFieldSkipper.advance(target);
96+
primaryFieldUpTo = primaryFieldSkipper.maxDocID(0);
97+
if (primaryFieldSkipper.minValue(0) == primaryFieldSkipper.maxValue(0)) {
98+
for (int level = 1; level < primaryFieldSkipper.numLevels(); level++) {
99+
if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {
100+
primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);
101+
} else {
102+
break;
103+
}
104+
}
105+
}
106+
if (primaryFieldUpTo > upTo) {
107+
upTo = primaryFieldUpTo;
108+
}
109+
}
110+
111+
target = values.advance(upTo + 1);
112+
if (target == DocIdSetIterator.NO_MORE_DOCS) {
113+
return docID = target;
114+
}
115+
} else if (minValue >= this.minValue && maxValue <= this.maxValue) {
116+
assert valueSkipper.docCount(0) == valueSkipper.maxDocID(0) - valueSkipper.minDocID(0) + 1;
117+
skipperMatch = true;
118+
return docID = target;
119+
}
120+
}
121+
122+
long value = values.longValue();
123+
if (value < minValue && target > primaryFieldUpTo) {
124+
primaryFieldSkipper.advance(target);
125+
primaryFieldUpTo = primaryFieldSkipper.maxDocID(0);
126+
if (primaryFieldSkipper.minValue(0) == primaryFieldSkipper.maxValue(0)) {
127+
for (int level = 1; level < primaryFieldSkipper.numLevels(); level++) {
128+
if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {
129+
primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);
130+
} else {
131+
break;
132+
}
133+
}
134+
target = values.advance(primaryFieldUpTo + 1);
135+
if (target == DocIdSetIterator.NO_MORE_DOCS) {
136+
return docID = target;
137+
}
138+
} else {
139+
target = values.nextDoc();
140+
if (target == DocIdSetIterator.NO_MORE_DOCS) {
141+
return docID = target;
142+
}
143+
}
144+
} else if (value >= minValue && value <= maxValue) {
145+
return docID = target;
146+
} else {
147+
target = values.nextDoc();
148+
if (target == DocIdSetIterator.NO_MORE_DOCS) {
149+
return docID = target;
150+
}
151+
}
152+
}
153+
}
154+
155+
@Override
156+
public int docIDRunEnd() throws IOException {
157+
if (skipperMatch) {
158+
return valueFieldUpTo + 1;
159+
}
160+
return super.docIDRunEnd();
161+
}
162+
163+
@Override
164+
public long cost() {
165+
return values.cost();
166+
}
167+
168+
}

0 commit comments

Comments
 (0)