Skip to content

Commit ffb3722

Browse files
committed
Improve HashBuilderOperator unspill parallelism
HashBuilderOperator is unspilling sequentially partition by partition. This commit improves join unspilling performance by making FileSingleStreamSpiller unspill single partition in parallel. FileSingleStreamSpiller is enhanced so it can spill to/unspill from multiple files.
1 parent 53f6c27 commit ffb3722

File tree

12 files changed

+353
-76
lines changed

12 files changed

+353
-76
lines changed

core/trino-main/src/main/java/io/trino/operator/join/HashBuilderOperator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,8 @@ private ListenableFuture<Void> spillIndex()
411411
spiller = Optional.of(singleStreamSpillerFactory.create(
412412
index.getTypes(),
413413
operatorContext.getSpillContext().newLocalSpillContext(),
414-
operatorContext.newLocalUserMemoryContext(HashBuilderOperator.class.getSimpleName())));
414+
operatorContext.newLocalUserMemoryContext(HashBuilderOperator.class.getSimpleName()),
415+
true));
415416
long spillStartNanos = System.nanoTime();
416417
ListenableFuture<DataSize> spillFuture = getSpiller().spill(index.getPages());
417418
addSuccessCallback(spillFuture, dataSize -> {

core/trino-main/src/main/java/io/trino/spiller/FileSingleStreamSpiller.java

Lines changed: 132 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,13 @@
1313
*/
1414
package io.trino.spiller;
1515

16-
import com.google.common.annotations.VisibleForTesting;
1716
import com.google.common.collect.AbstractIterator;
1817
import com.google.common.collect.ImmutableList;
1918
import com.google.common.io.Closer;
2019
import com.google.common.util.concurrent.Futures;
2120
import com.google.common.util.concurrent.ListenableFuture;
2221
import com.google.common.util.concurrent.ListeningExecutorService;
23-
import io.airlift.slice.OutputStreamSliceOutput;
2422
import io.airlift.slice.Slice;
25-
import io.airlift.slice.SliceOutput;
2623
import io.airlift.units.DataSize;
2724
import io.trino.annotation.NotThreadSafe;
2825
import io.trino.execution.buffer.PageDeserializer;
@@ -42,28 +39,29 @@
4239
import java.io.UncheckedIOException;
4340
import java.nio.file.Files;
4441
import java.nio.file.Path;
42+
import java.util.ArrayList;
4543
import java.util.Iterator;
4644
import java.util.List;
4745
import java.util.Optional;
4846
import java.util.concurrent.atomic.AtomicBoolean;
4947
import java.util.concurrent.atomic.AtomicLong;
5048

49+
import static com.google.common.base.Preconditions.checkArgument;
5150
import static com.google.common.base.Preconditions.checkState;
51+
import static com.google.common.collect.Iterators.transform;
5252
import static com.google.common.util.concurrent.Futures.immediateFuture;
5353
import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
5454
import static io.trino.spiller.FileSingleStreamSpillerFactory.SPILL_FILE_PREFIX;
5555
import static io.trino.spiller.FileSingleStreamSpillerFactory.SPILL_FILE_SUFFIX;
56-
import static java.nio.file.StandardOpenOption.APPEND;
5756
import static java.util.Objects.requireNonNull;
5857

5958
@NotThreadSafe
6059
public class FileSingleStreamSpiller
6160
implements SingleStreamSpiller
6261
{
63-
@VisibleForTesting
64-
static final int BUFFER_SIZE = 4 * 1024;
62+
private final List<SpillFile> spillFiles;
63+
private volatile int currentFileIndex;
6564

66-
private final FileHolder targetFile;
6765
private final Closer closer = Closer.create();
6866
private final PagesSerdeFactory serdeFactory;
6967
private volatile Optional<SecretKey> encryptionKey;
@@ -84,12 +82,15 @@ public FileSingleStreamSpiller(
8482
PagesSerdeFactory serdeFactory,
8583
Optional<SecretKey> encryptionKey,
8684
ListeningExecutorService executor,
87-
Path spillPath,
85+
List<Path> spillPaths,
8886
SpillerStats spillerStats,
8987
SpillContext spillContext,
9088
LocalMemoryContext memoryContext,
9189
Runnable fileSystemErrorHandler)
9290
{
91+
requireNonNull(spillPaths, "spillPaths is null");
92+
checkArgument(!spillPaths.isEmpty(), "spillPaths is empty");
93+
9394
this.serdeFactory = requireNonNull(serdeFactory, "serdeFactory is null");
9495
this.encryptionKey = requireNonNull(encryptionKey, "encryptionKey is null");
9596
this.encrypted = encryptionKey.isPresent();
@@ -107,10 +108,14 @@ public FileSingleStreamSpiller(
107108
// This means we start accounting for the memory before the spiller thread allocates it, and we release the memory reservation
108109
// before/after the spiller thread allocates that memory -- -- whether before or after depends on whether writePages() is in the
109110
// middle of execution when close() is called (note that this applies to both readPages() and writePages() methods).
110-
this.memoryContext.setBytes(BUFFER_SIZE);
111+
this.memoryContext.setBytes((long) SpillFile.BUFFER_SIZE * spillPaths.size());
111112
this.fileSystemErrorHandler = requireNonNull(fileSystemErrorHandler, "filesystemErrorHandler is null");
112113
try {
113-
this.targetFile = closer.register(new FileHolder(Files.createTempFile(spillPath, SPILL_FILE_PREFIX, SPILL_FILE_SUFFIX)));
114+
ImmutableList.Builder<SpillFile> builder = ImmutableList.builderWithExpectedSize(spillPaths.size());
115+
for (Path path : spillPaths) {
116+
builder.add(closer.register(new SpillFile(Files.createTempFile(path, SPILL_FILE_PREFIX, SPILL_FILE_SUFFIX))));
117+
}
118+
this.spillFiles = builder.build();
114119
}
115120
catch (IOException e) {
116121
this.fileSystemErrorHandler.run();
@@ -137,61 +142,136 @@ public long getSpilledPagesInMemorySize()
137142
public Iterator<Page> getSpilledPages()
138143
{
139144
checkNoSpillInProgress();
140-
return readPages();
145+
checkState(writable.getAndSet(false), "Repeated reads are disallowed to prevent potential resource leaks");
146+
147+
try {
148+
Optional<SecretKey> encryptionKey = this.encryptionKey;
149+
checkState(encrypted == encryptionKey.isPresent(), "encryptionKey has been discarded");
150+
151+
PageDeserializer deserializer = serdeFactory.createDeserializer(encryptionKey);
152+
this.encryptionKey = Optional.empty();
153+
154+
int fileCount = spillFiles.size();
155+
List<Iterator<Page>> iterators = new ArrayList<>(fileCount);
156+
for (SpillFile file : spillFiles) {
157+
iterators.add(readFilePages(deserializer, file, closer));
158+
}
159+
160+
return new AbstractIterator<>()
161+
{
162+
int fileIndex;
163+
164+
@Override
165+
protected Page computeNext()
166+
{
167+
Iterator<Page> iterator = iterators.get(fileIndex);
168+
if (!iterator.hasNext()) {
169+
checkAllIteratorsExhausted(iterators);
170+
return endOfData();
171+
}
172+
173+
Page page = iterator.next();
174+
fileIndex = (fileIndex + 1) % fileCount;
175+
return page;
176+
}
177+
};
178+
}
179+
catch (IOException e) {
180+
fileSystemErrorHandler.run();
181+
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Failed to read spilled pages", e);
182+
}
141183
}
142184

143185
@Override
144186
public ListenableFuture<List<Page>> getAllSpilledPages()
145187
{
146-
return executor.submit(() -> ImmutableList.copyOf(getSpilledPages()));
188+
checkNoSpillInProgress();
189+
checkState(writable.getAndSet(false), "Repeated reads are disallowed to prevent potential resource leaks");
190+
191+
Optional<SecretKey> encryptionKey = this.encryptionKey;
192+
checkState(encrypted == encryptionKey.isPresent(), "encryptionKey has been discarded");
193+
194+
this.encryptionKey = Optional.empty();
195+
196+
List<ListenableFuture<List<Page>>> futures = new ArrayList<>();
197+
for (SpillFile file : spillFiles) {
198+
futures.add(executor.submit(() -> {
199+
PageDeserializer deserializer = serdeFactory.createDeserializer(encryptionKey);
200+
ImmutableList.Builder<Page> pages = ImmutableList.builder();
201+
try (Closer closer = Closer.create()) {
202+
readFilePages(deserializer, file, closer).forEachRemaining(pages::add);
203+
}
204+
return pages.build();
205+
}));
206+
}
207+
208+
// Combine pages from all spill files according to the round-robin order.
209+
return Futures.transform(Futures.allAsList(futures), pagesPerFile -> {
210+
ImmutableList.Builder<Page> builder = ImmutableList.builderWithExpectedSize(pagesPerFile.stream().mapToInt(List::size).sum());
211+
int fileCount = spillFiles.size();
212+
213+
List<Iterator<Page>> iterators = new ArrayList<>(fileCount);
214+
for (List<Page> pages : pagesPerFile) {
215+
iterators.add(pages.iterator());
216+
}
217+
218+
int fileIndex = 0;
219+
while (iterators.get(fileIndex).hasNext()) {
220+
builder.add(iterators.get(fileIndex).next());
221+
fileIndex = (fileIndex + 1) % fileCount;
222+
}
223+
checkAllIteratorsExhausted(iterators);
224+
return builder.build();
225+
}, executor);
226+
}
227+
228+
private static void checkAllIteratorsExhausted(List<Iterator<Page>> iterators)
229+
{
230+
iterators.forEach(iterator -> checkState(!iterator.hasNext(), "spill file iterator not fully consumed"));
147231
}
148232

149-
private DataSize writePages(Iterator<Page> pageIterator)
233+
private DataSize writePages(Iterator<Page> pages)
150234
{
151235
checkState(writable.get(), "Spilling no longer allowed. The spiller has been made non-writable on first read for subsequent reads to be consistent");
152236

153237
Optional<SecretKey> encryptionKey = this.encryptionKey;
154238
checkState(encrypted == encryptionKey.isPresent(), "encryptionKey has been discarded");
155239
PageSerializer serializer = serdeFactory.createSerializer(encryptionKey);
240+
156241
long spilledPagesBytes = 0;
157-
try (SliceOutput output = new OutputStreamSliceOutput(targetFile.newOutputStream(APPEND), BUFFER_SIZE)) {
158-
while (pageIterator.hasNext()) {
159-
Page page = pageIterator.next();
242+
int fileIndex = currentFileIndex;
243+
int fileCount = spillFiles.size();
244+
245+
try {
246+
while (pages.hasNext()) {
247+
Page page = pages.next();
160248
long pageSizeInBytes = page.getSizeInBytes();
249+
Slice serialized = serializer.serialize(page);
250+
long serializedPageSize = serialized.length();
251+
252+
spillFiles.get(fileIndex).writeBytes(serialized);
253+
161254
spilledPagesBytes += pageSizeInBytes;
255+
162256
spilledPagesInMemorySize.addAndGet(pageSizeInBytes);
163-
Slice serializedPage = serializer.serialize(page);
164-
long pageSize = serializedPage.length();
165-
localSpillContext.updateBytes(pageSize);
166-
spillerStats.addToTotalSpilledBytes(pageSize);
167-
output.writeBytes(serializedPage);
257+
localSpillContext.updateBytes(serializedPageSize);
258+
spillerStats.addToTotalSpilledBytes(serializedPageSize);
259+
260+
fileIndex = (fileIndex + 1) % fileCount;
261+
}
262+
263+
currentFileIndex = fileIndex;
264+
265+
for (SpillFile file : spillFiles) {
266+
file.closeOutput();
168267
}
169268
}
170269
catch (UncheckedIOException | IOException e) {
171270
fileSystemErrorHandler.run();
172271
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Failed to spill pages", e);
173272
}
174-
return DataSize.ofBytes(spilledPagesBytes);
175-
}
176273

177-
private Iterator<Page> readPages()
178-
{
179-
checkState(writable.getAndSet(false), "Repeated reads are disallowed to prevent potential resource leaks");
180-
181-
try {
182-
Optional<SecretKey> encryptionKey = this.encryptionKey;
183-
checkState(encrypted == encryptionKey.isPresent(), "encryptionKey has been discarded");
184-
PageDeserializer deserializer = serdeFactory.createDeserializer(encryptionKey);
185-
// encryption key is safe to discard since it now belongs to the PageDeserializer and repeated reads are disallowed
186-
this.encryptionKey = Optional.empty();
187-
InputStream input = closer.register(targetFile.newInputStream());
188-
Iterator<Page> pages = PagesSerdeUtil.readPages(deserializer, input);
189-
return closeWhenExhausted(pages, input);
190-
}
191-
catch (IOException e) {
192-
fileSystemErrorHandler.run();
193-
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Failed to read spilled pages", e);
194-
}
274+
return DataSize.ofBytes(spilledPagesBytes);
195275
}
196276

197277
@Override
@@ -215,6 +295,17 @@ private void checkNoSpillInProgress()
215295
checkState(spillInProgress.isDone(), "spill in progress");
216296
}
217297

298+
/**
299+
* Returns an iterator that exposes all pages stored in the given file.
300+
* Pages are lazily deserialized as the iterator is consumed.
301+
*/
302+
private Iterator<Page> readFilePages(PageDeserializer deserializer, SpillFile file, Closer closer)
303+
throws IOException
304+
{
305+
InputStream input = closer.register(file.newInputStream());
306+
return transform(closeWhenExhausted(PagesSerdeUtil.readSerializedPages(input), input), deserializer::deserialize);
307+
}
308+
218309
private static <T> Iterator<T> closeWhenExhausted(Iterator<T> iterator, Closeable resource)
219310
{
220311
requireNonNull(iterator, "iterator is null");

core/trino-main/src/main/java/io/trino/spiller/FileSingleStreamSpillerFactory.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import io.airlift.log.Logger;
2323
import io.trino.FeaturesConfig;
2424
import io.trino.cache.NonKeyEvictableLoadingCache;
25+
import io.trino.execution.TaskManagerConfig;
2526
import io.trino.execution.buffer.CompressionCodec;
2627
import io.trino.execution.buffer.PagesSerdeFactory;
2728
import io.trino.memory.context.LocalMemoryContext;
@@ -80,11 +81,12 @@ public class FileSingleStreamSpillerFactory
8081
private final SpillerStats spillerStats;
8182
private final double maxUsedSpaceThreshold;
8283
private final boolean spillEncryptionEnabled;
84+
private final int spillFileCount;
8385
private int roundRobinIndex;
8486
private final NonKeyEvictableLoadingCache<Path, Boolean> spillPathHealthCache;
8587

8688
@Inject
87-
public FileSingleStreamSpillerFactory(BlockEncodingSerde blockEncodingSerde, SpillerStats spillerStats, FeaturesConfig featuresConfig, NodeSpillConfig nodeSpillConfig)
89+
public FileSingleStreamSpillerFactory(BlockEncodingSerde blockEncodingSerde, SpillerStats spillerStats, FeaturesConfig featuresConfig, NodeSpillConfig nodeSpillConfig, TaskManagerConfig taskManagerConfig)
8890
{
8991
this(
9092
listeningDecorator(newFixedThreadPool(
@@ -93,6 +95,7 @@ public FileSingleStreamSpillerFactory(BlockEncodingSerde blockEncodingSerde, Spi
9395
requireNonNull(blockEncodingSerde, "blockEncodingSerde is null"),
9496
spillerStats,
9597
featuresConfig.getSpillerSpillPaths(),
98+
Math.min(featuresConfig.getSpillerThreads(), taskManagerConfig.getTaskConcurrency()),
9699
featuresConfig.getSpillMaxUsedSpaceThreshold(),
97100
nodeSpillConfig.getSpillCompressionCodec(),
98101
nodeSpillConfig.isSpillEncryptionEnabled());
@@ -104,6 +107,7 @@ public FileSingleStreamSpillerFactory(
104107
BlockEncodingSerde blockEncodingSerde,
105108
SpillerStats spillerStats,
106109
List<Path> spillPaths,
110+
int spillFileCount,
107111
double maxUsedSpaceThreshold,
108112
CompressionCodec compressionCodec,
109113
boolean spillEncryptionEnabled)
@@ -124,6 +128,7 @@ public FileSingleStreamSpillerFactory(
124128
throw new IllegalArgumentException(format("spill path %s is not accessible, it must be +rwx; adjust %s config property or filesystem permissions", path, SPILLER_SPILL_PATH));
125129
}
126130
});
131+
this.spillFileCount = spillFileCount;
127132
this.maxUsedSpaceThreshold = maxUsedSpaceThreshold;
128133
this.spillEncryptionEnabled = spillEncryptionEnabled;
129134
this.roundRobinIndex = 0;
@@ -165,14 +170,19 @@ private static void cleanupOldSpillFiles(Path path)
165170
}
166171

167172
@Override
168-
public SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext)
173+
public SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext, boolean parallelSpill)
169174
{
170175
Optional<SecretKey> encryptionKey = spillEncryptionEnabled ? Optional.of(createRandomAesEncryptionKey()) : Optional.empty();
176+
int spillFileCount = parallelSpill ? this.spillFileCount : 1;
177+
ImmutableList.Builder<Path> paths = ImmutableList.builderWithExpectedSize(spillFileCount);
178+
for (int i = 0; i < spillFileCount; i++) {
179+
paths.add(getNextSpillPath());
180+
}
171181
return new FileSingleStreamSpiller(
172182
serdeFactory,
173183
encryptionKey,
174184
executor,
175-
getNextSpillPath(),
185+
paths.build(),
176186
spillerStats,
177187
spillContext,
178188
memoryContext,

core/trino-main/src/main/java/io/trino/spiller/SingleStreamSpillerFactory.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,16 @@
2121

2222
public interface SingleStreamSpillerFactory
2323
{
24-
SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext);
24+
default SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext)
25+
{
26+
return create(types, spillContext, memoryContext, false);
27+
}
28+
29+
SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext, boolean parallelSpill);
2530

2631
static SingleStreamSpillerFactory unsupportedSingleStreamSpillerFactory()
2732
{
28-
return (types, spillContext, memoryContext) -> {
33+
return (types, spillContext, memoryContext, parallelSpill) -> {
2934
throw new UnsupportedOperationException();
3035
};
3136
}

0 commit comments

Comments
 (0)