From 749a33985b9c01d2e7cdcabcd1629e741e1eec0c Mon Sep 17 00:00:00 2001 From: GoGoWen2021 Date: Mon, 9 Oct 2023 20:18:32 +0800 Subject: [PATCH 1/2] support export doris hll/bitmap data type --- .../org/apache/doris/spark/serialization/RowBatch.java | 10 ++++++++++ .../scala/org/apache/doris/spark/sql/SchemaUtils.scala | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java b/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java index cb4d303c..eb2a741b 100644 --- a/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java +++ b/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java @@ -377,6 +377,16 @@ public void convertArrowToRowBatch() throws DorisException { addValueToRow(rowIndex, value); } break; + case "HLL": + case "BITMAP": + Preconditions.checkArgument(mt.equals(Types.MinorType.VARCHAR), + typeMismatchMessage(currentType, mt)); + VarCharVector varcharVector = (VarCharVector) curFieldVector; + for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { + Object fieldValue = varcharVector.isNull(rowIndex) ? null : varcharVector.get(rowIndex); + addValueToRow(rowIndex, fieldValue); + } + break; default: String errMsg = "Unsupported type " + schema.get(col).getType(); logger.error(errMsg); diff --git a/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/SchemaUtils.scala b/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/SchemaUtils.scala index 982e580e..2770f742 100644 --- a/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/SchemaUtils.scala +++ b/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/SchemaUtils.scala @@ -130,8 +130,8 @@ private[spark] object SchemaUtils { case "ARRAY" => DataTypes.StringType case "MAP" => MapType(DataTypes.StringType, DataTypes.StringType) case "STRUCT" => DataTypes.StringType - case "HLL" => - throw new DorisException("Unsupported type " + dorisType) + case "HLL" => DataTypes.BinaryType + case "BITMAP" => DataTypes.BinaryType case _ => throw new DorisException("Unrecognized Doris type " + dorisType) } From 4989cba4539e78614d693d1c239545ed0ec5d2dd Mon Sep 17 00:00:00 2001 From: GoGoWen2021 Date: Thu, 9 Nov 2023 18:29:05 +0800 Subject: [PATCH 2/2] support read BITMAP --- .../java/org/apache/doris/spark/serialization/RowBatch.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java b/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java index eb2a741b..b67f3d97 100644 --- a/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java +++ b/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java @@ -378,7 +378,7 @@ public void convertArrowToRowBatch() throws DorisException { } break; case "HLL": - case "BITMAP": + case "OBJECT"://BITMAP Preconditions.checkArgument(mt.equals(Types.MinorType.VARCHAR), typeMismatchMessage(currentType, mt)); VarCharVector varcharVector = (VarCharVector) curFieldVector;