diff --git a/CHANGELOG.md b/CHANGELOG.md index 57c19a20c..35405c2c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- hive: Build [hive-metastore-opa-authorizer](https://github.com/boschglobal/hive-metastore-opa-authorizer) from source and add to image ([#1340]). + +[#1340]: https://github.com/stackabletech/docker-images/pull/1340 + ## [25.11.0] - 2025-11-07 ## [25.11.0-rc1] - 2025-11-06 diff --git a/hive/Dockerfile b/hive/Dockerfile index 46ab0412f..fb33fd8f0 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -2,6 +2,7 @@ # check=error=true FROM local-image/hadoop/hadoop AS hadoop-builder +FROM local-image/hive/hive-metastore-opa-authorizer AS hive-metastore-opa-authorizer-builder FROM local-image/java-devel AS hive-builder @@ -184,6 +185,8 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hive-${PRODU COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION} /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION} COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/*-src.tar.gz /stackable COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/jmx /stackable/jmx +COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-metastore-opa-authorizer-builder /stackable/opa-authorizer-bin /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib +COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-metastore-opa-authorizer-builder /stackable/opa-authorizer-src /stackable COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable/jmx /stackable/jmx COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable/bin/start-metastore /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/bin @@ -230,8 +233,8 @@ EOF USER ${STACKABLE_USER_UID} -ENV HADOOP_HOME=/stackable/hadoop ENV HIVE_HOME=/stackable/hive-metastore +ENV HADOOP_HOME=/stackable/hadoop ENV PATH="${PATH}":/stackable/hadoop/bin:/stackable/hive-metastore/bin # The following 2 env-vars are required for common hadoop scripts even if the respective libraries are never used. diff --git a/hive/boil-config.toml b/hive/boil-config.toml index 2c99809a8..f4d4ed316 100644 --- a/hive/boil-config.toml +++ b/hive/boil-config.toml @@ -3,6 +3,8 @@ java-base = "11" java-devel = "8" "hadoop/hadoop" = "3.3.6" +# hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer +"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-3.1.3-hadoop-3.3.6" [versions."3.1.3".build-arguments] jmx-exporter-version = "1.3.0" @@ -11,24 +13,13 @@ aws-java-sdk-bundle-version = "1.12.367" azure-storage-version = "7.0.1" azure-keyvault-core-version = "1.0.0" -[versions."4.0.0".local-images] -# Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 -java-base = "11" -java-devel = "8" -"hadoop/hadoop" = "3.3.6" - -[versions."4.0.0".build-arguments] -jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 -aws-java-sdk-bundle-version = "1.12.367" -azure-storage-version = "7.0.1" -azure-keyvault-core-version = "1.0.0" - [versions."4.0.1".local-images] # Hive 4.0 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 java-base = "11" java-devel = "8" "hadoop/hadoop" = "3.3.6" +# hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer +"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.0.1-hadoop-3.3.6" [versions."4.0.1".build-arguments] jmx-exporter-version = "1.3.0" @@ -42,6 +33,8 @@ azure-keyvault-core-version = "1.0.0" java-base = "17" java-devel = "17" "hadoop/hadoop" = "3.4.2" +# hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer +"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.1.0-hadoop-3.4.2" [versions."4.1.0".build-arguments] jmx-exporter-version = "1.3.0" diff --git a/hive/hive-metastore-opa-authorizer/Dockerfile b/hive/hive-metastore-opa-authorizer/Dockerfile new file mode 100644 index 000000000..ba7db4911 --- /dev/null +++ b/hive/hive-metastore-opa-authorizer/Dockerfile @@ -0,0 +1,58 @@ +# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 +# check=error=true + +FROM local-image/java-devel + +ARG AUTHORIZER_VERSION +ARG HIVE_VERSION +ARG HADOOP_VERSION +ARG STACKABLE_USER_UID +# Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.) +# This can be used to speed up builds when disk space is of no concern. +ARG DELETE_CACHES="true" + +USER ${STACKABLE_USER_UID} +WORKDIR /stackable + +COPY --chown=${STACKABLE_USER_UID}:0 hive/hive-metastore-opa-authorizer/stackable/patches/patchable.toml /stackable/src/hive/hive-metastore-opa-authorizer/stackable/patches/patchable.toml +COPY --chown=${STACKABLE_USER_UID}:0 hive/hive-metastore-opa-authorizer/stackable/patches/${AUTHORIZER_VERSION} /stackable/src/hive/hive-metastore-opa-authorizer/stackable/patches/${AUTHORIZER_VERSION} + +RUN <<'EOF' +set -euo pipefail + +# for moving nested artifacts out of target folder +mkdir -p /stackable/opa-authorizer-bin +# containing sources +mkdir -p /stackable/opa-authorizer-src + +cd "$(/stackable/patchable --images-repo-root=src checkout hive/hive-metastore-opa-authorizer ${AUTHORIZER_VERSION})" + +# Create snapshot of the source code including custom patches +tar -czf /stackable/opa-authorizer-src/hive-metastore-opa-authorizer-${AUTHORIZER_VERSION}-hive-${HIVE_VERSION}-hadoop-${HADOOP_VERSION}-src.tar.gz . + +# The if part can be removed once we do no longer support Hive 3.x.x +if [[ "${HIVE_VERSION}" =~ ^3 ]]; then + mvn clean package -DskipTests -Dhive.version=${HIVE_VERSION} -Dhadoop.version=${HADOOP_VERSION} -f hms-v3/pom.xml + mv hms-v3/target/com.bosch.bdps.hms3-${HIVE_VERSION}-${HADOOP_VERSION}-dev.jar /stackable/opa-authorizer-bin +else + mvn clean package -DskipTests -Dhive.version=${HIVE_VERSION} -Dhadoop.version=${HADOOP_VERSION} -f hms-v4/pom.xml + + # The hive-metastore-opa-authorizer offers a shaded jar from version 4.x.x. Using the shaded jar leads to problems with schema tool at pod startup. + # mv hms-v4/target/com.bosch.bdps.hms4-${HIVE_VERSION}-${HADOOP_VERSION}-dev.jar /stackable/opa-authorizer-bin + mv hms-v4/target/hms4-dev.jar /stackable/opa-authorizer-bin +fi + +# We're removing these to make the intermediate layer smaller +# This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available +# and we are sometimes running into errors because we're out of space. +# Therefore, we try to clean up all layers as much as possible. +if [ "${DELETE_CACHES}" = "true" ] ; then + rm -rf /stackable/.m2/repository/* + rm -rf /stackable/.npm/* + rm -rf /stackable/.cache/* + rm -rf /stackable/src +fi + +# fix permissions +chmod --recursive g=u /stackable/opa-authorizer-bin +EOF diff --git a/hive/hive-metastore-opa-authorizer/boil-config.toml b/hive/hive-metastore-opa-authorizer/boil-config.toml new file mode 100644 index 000000000..5eff87f4d --- /dev/null +++ b/hive/hive-metastore-opa-authorizer/boil-config.toml @@ -0,0 +1,26 @@ +[versions."v1.0.0-hive-3.1.3-hadoop-3.3.6".local-images] +"java-devel" = "11" + +[versions."v1.0.0-hive-3.1.3-hadoop-3.3.6".build-arguments] +authorizer-version = "v1.0.0" +hive-version = "3.1.3" +hadoop-version = "3.3.6" +delete-caches = "true" + +[versions."v1.0.0-hive-4.0.1-hadoop-3.3.6".local-images] +"java-devel" = "11" + +[versions."v1.0.0-hive-4.0.1-hadoop-3.3.6".build-arguments] +authorizer-version = "v1.0.0" +hive-version = "4.0.1" +hadoop-version = "3.3.6" +delete-caches = "true" + +[versions."v1.0.0-hive-4.1.0-hadoop-3.4.2".local-images] +"java-devel" = "17" + +[versions."v1.0.0-hive-4.1.0-hadoop-3.4.2".build-arguments] +authorizer-version = "v1.0.0" +hive-version = "4.1.0" +hadoop-version = "3.4.2" +delete-caches = "true" diff --git a/hive/hive-metastore-opa-authorizer/stackable/patches/patchable.toml b/hive/hive-metastore-opa-authorizer/stackable/patches/patchable.toml new file mode 100644 index 000000000..c8755e3a6 --- /dev/null +++ b/hive/hive-metastore-opa-authorizer/stackable/patches/patchable.toml @@ -0,0 +1,2 @@ +upstream = "https://github.com/boschglobal/hive-metastore-opa-authorizer" +default-mirror = "https://github.com/stackabletech/hive-metastore-opa-authorizer" diff --git a/hive/hive-metastore-opa-authorizer/stackable/patches/v1.0.0/patchable.toml b/hive/hive-metastore-opa-authorizer/stackable/patches/v1.0.0/patchable.toml new file mode 100644 index 000000000..93c50d9ff --- /dev/null +++ b/hive/hive-metastore-opa-authorizer/stackable/patches/v1.0.0/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/hive-metastore-opa-authorizer" +base = "1925fee7512d4afba4a9d83c303aa241d0e5412e"