Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions src/main/java/io/confluent/connect/jdbc/sink/BatchPKCompaction.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright 2018 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.connect.jdbc.sink;

import io.confluent.connect.jdbc.sink.metadata.FieldsMetadata;
import io.confluent.connect.jdbc.sink.metadata.SchemaPair;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.sink.SinkRecord;


public class BatchPKCompaction {

private final JdbcSinkConfig.PrimaryKeyMode pkMode;
private final FieldsMetadata fieldsMetadata;
private final SchemaPair schemaPair;

public BatchPKCompaction(
JdbcSinkConfig.PrimaryKeyMode pkMode,
FieldsMetadata fieldsMetadata,
SchemaPair schemaPair) {
this.pkMode = pkMode;
this.fieldsMetadata = fieldsMetadata;
this.schemaPair = schemaPair;
}

List<SinkRecord> applyCompaction(List<SinkRecord> records) {
LinkedHashMap<PrimaryKeyValue, SinkRecord> lastValues = new LinkedHashMap<>();
for (int i = records.size() - 1; i >= 0; i--) {
lastValues.putIfAbsent(getPK(records.get(i)), records.get(i));
}
List<SinkRecord> result = new ArrayList<>(lastValues.values());
Collections.reverse(result);
return result;
}

private PrimaryKeyValue getPK(SinkRecord record) {
switch (pkMode) {
case KAFKA: {
return new PrimaryKeyValue(record.topic(), record.kafkaPartition(), record.kafkaOffset());
}
case RECORD_KEY: {
return getPK(schemaPair.keySchema, record.key());
}
case RECORD_VALUE: {
return getPK(schemaPair.valueSchema, record.value());
}
case NONE: {
throw new UnsupportedOperationException("Compaction isn't supported for pkMode NONE");
}
default:
throw new UnsupportedOperationException("Compaction isn't supported for pkMode: " + pkMode);
}
}

private PrimaryKeyValue getPK(Schema schema, Object value) {
if (schema.type().isPrimitive()) {
assert fieldsMetadata.keyFieldNames.size() == 1;
return new PrimaryKeyValue(value);
} else {
return new PrimaryKeyValue(
fieldsMetadata.keyFieldNames.stream()
.map(fieldName -> ((Struct) value).get(schema.field(fieldName)))
.toArray()
);
}
}

private static class PrimaryKeyValue {

private final Object[] values;

private PrimaryKeyValue(Object... values) {
this.values = values;
}

@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
PrimaryKeyValue that = (PrimaryKeyValue) obj;
return Arrays.equals(this.values, that.values);
}

@Override
public int hashCode() {
return Arrays.hashCode(values);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import static java.util.Objects.nonNull;

public class BufferedRecords {

private static final Logger log = LoggerFactory.getLogger(BufferedRecords.class);

private final TableId tableId;
Expand All @@ -61,6 +62,7 @@ public class BufferedRecords {
private StatementBinder updateStatementBinder;
private StatementBinder deleteStatementBinder;
private boolean deletesInBatch = false;
private BatchPKCompaction batchPKCompaction = null;

public BufferedRecords(
JdbcSinkConfig config,
Expand Down Expand Up @@ -178,7 +180,17 @@ public List<SinkRecord> flush() throws SQLException {
return new ArrayList<>();
}
log.debug("Flushing {} buffered records", records.size());
for (SinkRecord record : records) {

List<SinkRecord> batch = records;
if (config.batchPkCompactionEnabled) {
if (batchPKCompaction == null) {
batchPKCompaction = new BatchPKCompaction(config.pkMode, fieldsMetadata,
new SchemaPair(keySchema, valueSchema));
}
batch = batchPKCompaction.applyCompaction(records);
}

for (SinkRecord record : batch) {
if (isNull(record.value()) && nonNull(deleteStatementBinder)) {
deleteStatementBinder.bindRecord(record);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,13 @@ public enum DateTimezone {
private static final String MSSQL_USE_MERGE_HOLDLOCK_DISPLAY =
"SQL Server - Use HOLDLOCK in MERGE";

public static final String BATCH_PK_COMPACTION = "batch.pk.compaction";
private static final String BATCH_PK_COMPACTION_DEFAULT = "false";
private static final String BATCH_PK_COMPACTION_DOC =
"Whether to retain only the latest record for each unique key";
private static final String BATCH_PK_COMPACTION_DISPLAY =
"Retain only the latest record for each unique key";

/**
* The properties that begin with this prefix will be used to configure a class, specified by
* {@code jdbc.credentials.provider.class} if it implements {@link Configurable}.
Expand Down Expand Up @@ -457,6 +464,17 @@ public enum DateTimezone {
ConfigDef.Width.MEDIUM,
REPLACE_NULL_WITH_DEFAULT_DISPLAY
)
.define(
BATCH_PK_COMPACTION,
ConfigDef.Type.BOOLEAN,
BATCH_PK_COMPACTION_DEFAULT,
ConfigDef.Importance.LOW,
BATCH_PK_COMPACTION_DOC,
WRITES_GROUP,
6,
ConfigDef.Width.LONG,
BATCH_PK_COMPACTION_DISPLAY
)
// Data Mapping
.define(
TABLE_NAME_FORMAT,
Expand Down Expand Up @@ -614,6 +632,7 @@ public enum DateTimezone {
public final int batchSize;
public final boolean deleteEnabled;
public final boolean replaceNullWithDefault;
public final boolean batchPkCompactionEnabled;
public final int maxRetries;
public final int retryBackoffMs;
public final boolean autoCreate;
Expand Down Expand Up @@ -642,6 +661,7 @@ public JdbcSinkConfig(Map<?, ?> props) {
batchSize = getInt(BATCH_SIZE);
deleteEnabled = getBoolean(DELETE_ENABLED);
replaceNullWithDefault = getBoolean(REPLACE_NULL_WITH_DEFAULT);
batchPkCompactionEnabled = getBoolean(BATCH_PK_COMPACTION);
maxRetries = getInt(MAX_RETRIES);
retryBackoffMs = getInt(RETRY_BACKOFF_MS);
autoCreate = getBoolean(AUTO_CREATE);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package io.confluent.connect.jdbc.sink;

import io.confluent.connect.jdbc.sink.metadata.FieldsMetadata;
import io.confluent.connect.jdbc.sink.metadata.SchemaPair;
import io.confluent.connect.jdbc.sink.metadata.SinkRecordField;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.Test;

import java.util.*;
import java.util.concurrent.atomic.AtomicLong;

import static org.junit.Assert.assertEquals;

public class BatchPKCompactionTest {

private static final String FIELD_ID = "id";
private static final String FIELD_VALUE = "value";

@Test
public void test() {
final Map<String, SinkRecordField> allFields = new HashMap<>();
allFields.put(FIELD_ID, new SinkRecordField(Schema.INT64_SCHEMA, FIELD_ID, true));
allFields.put(FIELD_VALUE, new SinkRecordField(Schema.STRING_SCHEMA, FIELD_VALUE, false));
BatchPKCompaction compaction = new BatchPKCompaction(
JdbcSinkConfig.PrimaryKeyMode.RECORD_KEY,
new FieldsMetadata(
new HashSet<>(Arrays.asList(FIELD_ID)),
new HashSet<>(Arrays.asList(FIELD_VALUE)),
allFields
),
new SchemaPair(Schema.STRING_SCHEMA, schemaIdValue)
);

final SinkRecord sinkRecord1 = createRecord(1l, "value1");
final SinkRecord sinkRecord2 = createRecord(2l, "value2");
final SinkRecord sinkRecord3 = createRecord(1l, "value3");
List<SinkRecord> result = compaction.applyCompaction(Arrays.asList(sinkRecord1, sinkRecord2, sinkRecord3));

assertEquals(2, result.size());
assertEquals(sinkRecord2, result.get(0));
assertEquals(sinkRecord3, result.get(1));
}


private final Schema schemaIdValue = SchemaBuilder.struct()
.field("id", Schema.INT64_SCHEMA)
.field("value", Schema.STRING_SCHEMA);

private final AtomicLong kafkaOffset = new AtomicLong(0);

private SinkRecord createRecord(Long id, String value) {
return new SinkRecord(
"topic",
0,
Schema.STRING_SCHEMA,
id.toString(),
schemaIdValue,
new Struct(schemaIdValue)
.put("id", id)
.put("value", value),
kafkaOffset.incrementAndGet()
);
}
}