From 04eae573ce21585613a36cbbc952683afa2e71f0 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 5 Nov 2025 17:57:49 +0100 Subject: [PATCH 01/11] extension to list for 3.0.6 --- airflow/Dockerfile | 3 +++ airflow/boil-config.toml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/airflow/Dockerfile b/airflow/Dockerfile index 9165d2918..b65a8666b 100644 --- a/airflow/Dockerfile +++ b/airflow/Dockerfile @@ -58,6 +58,9 @@ RUN microdnf module enable -y nodejs:${NODEJS_VERSION} && \ microdnf update && \ microdnf install \ cyrus-sasl-devel \ + # Needed for kerberos + cyrus-sasl-gssapi \ + krb5-devel\ # Needed by ./configure to build gevent, see snippet [1] at the end of file diffutils \ # Needed to build gevent, see snippet [1] at the end of file diff --git a/airflow/boil-config.toml b/airflow/boil-config.toml index 54d5f62c2..1272e3a05 100644 --- a/airflow/boil-config.toml +++ b/airflow/boil-config.toml @@ -58,6 +58,6 @@ s3fs-version = "2024.9.0" cyclonedx-bom-version = "6.0.0" tini-version = "0.19.0" uv-version = "0.7.8" -airflow-extras = "amazon,apache-kafka,async,celery,cncf-kubernetes,common-messaging,docker,elasticsearch,fab,ftp,grpc,hashicorp,http,ldap,google,microsoft-azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,trino" +airflow-extras = "amazon,apache-druid,apache-flink,apache-hdfs,apache-hive,apache-iceberg,apache-kafka,apache-webhdfs,async,celery,cncf-kubernetes,common-messaging,databricks,datadog,discord,docker,elasticsearch,exasol,fab,ftp,google,grpc,hashicorp,http,influxdb,kerberos,ldap,microsoft-azure,mongodb,neo4,odbc,openai,opsgenie,oracle,otel,pandas,polars,postgres,rabbitmq,redis,salesforce,sendgrid,sftp,slack,ssh,statsd,tableau,teradata,trino" opa-auth-manager = "airflow-3" nodejs-version = "20" From e565e891168f0dcb040ae185f5aa94bcb7e6f7cf Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 6 Nov 2025 12:13:08 +0100 Subject: [PATCH 02/11] full list based on constraints file --- airflow/boil-config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/boil-config.toml b/airflow/boil-config.toml index 1272e3a05..430f07ddf 100644 --- a/airflow/boil-config.toml +++ b/airflow/boil-config.toml @@ -42,7 +42,7 @@ s3fs-version = "2024.9.0" cyclonedx-bom-version = "6.0.0" tini-version = "0.19.0" uv-version = "0.7.8" -airflow-extras = "async,amazon,celery,cncf-kubernetes,docker,elasticsearch,fab,ftp,grpc,hashicorp,http,ldap,google,microsoft-azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,trino" +airflow-extras = "airbyte,alibaba,amazon,apache-beam,apache-cassandra,apache-drill,apache-druid,apache-flink,apache-hdfs,apache-hive,apache-iceberg,apache-impala,apache-kafka,apache-kylin,apache-livy,apache-pig,apache-pinot,apache-spark,apprise,arangodb,asana,atlassian-jira,celery,cloudant,cncf-kubernetes,cohere,common-compat,common-io,common-messaging,common-sql,databricks,datadog,dbt-cloud,dingding,discord,docker,edge3,elasticsearch,exasol,fab,facebook,ftp,git,github,google,grpc,hashicorp,http,imap,influxdb,jdbc,jenkins,microsoft-azure,microsoft-mssql,microsoft-psrp,microsoft-winrm,mongo,neo4j,odbc,openai,openfaas,openlineage,opensearch,opsgenie,oracle,pagerduty,papermill,pgvector,pinecone,postgres,presto,qdrant,redis,salesforce,samba,segment,sendgrid,sftp,singularity,slack,smtp,snowflake,sqlite,ssh,standard,tableau,telegram,teradata,trino,vertica,weaviate,yandex,ydb,zendesk" opa-auth-manager = "airflow-3" nodejs-version = "20" @@ -58,6 +58,6 @@ s3fs-version = "2024.9.0" cyclonedx-bom-version = "6.0.0" tini-version = "0.19.0" uv-version = "0.7.8" -airflow-extras = "amazon,apache-druid,apache-flink,apache-hdfs,apache-hive,apache-iceberg,apache-kafka,apache-webhdfs,async,celery,cncf-kubernetes,common-messaging,databricks,datadog,discord,docker,elasticsearch,exasol,fab,ftp,google,grpc,hashicorp,http,influxdb,kerberos,ldap,microsoft-azure,mongodb,neo4,odbc,openai,opsgenie,oracle,otel,pandas,polars,postgres,rabbitmq,redis,salesforce,sendgrid,sftp,slack,ssh,statsd,tableau,teradata,trino" +airflow-extras = "airbyte,alibaba,amazon,apache-beam,apache-cassandra,apache-drill,apache-druid,apache-flink,apache-hdfs,apache-hive,apache-iceberg,apache-impala,apache-kafka,apache-kylin,apache-livy,apache-pig,apache-pinot,apache-spark,apprise,arangodb,asana,atlassian-jira,celery,cloudant,cncf-kubernetes,cohere,common-compat,common-io,common-messaging,common-sql,databricks,datadog,dbt-cloud,dingding,discord,docker,edge3,elasticsearch,exasol,fab,facebook,ftp,git,github,google,grpc,hashicorp,http,imap,influxdb,jdbc,jenkins,microsoft-azure,microsoft-mssql,microsoft-psrp,microsoft-winrm,mongo,neo4j,odbc,openai,openfaas,openlineage,opensearch,opsgenie,oracle,pagerduty,papermill,pgvector,pinecone,postgres,presto,qdrant,redis,salesforce,samba,segment,sendgrid,sftp,singularity,slack,smtp,snowflake,sqlite,ssh,standard,tableau,telegram,teradata,trino,vertica,weaviate,yandex,ydb,zendesk" opa-auth-manager = "airflow-3" nodejs-version = "20" From c76471e326701ed7056886f4e5293b1e70515fa9 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 6 Nov 2025 15:54:40 +0100 Subject: [PATCH 03/11] revert 3.0.1, drop pyspark --- airflow/README.md | 7 +++++++ airflow/boil-config.toml | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/airflow/README.md b/airflow/README.md index ff942ce46..961c206b7 100644 --- a/airflow/README.md +++ b/airflow/README.md @@ -16,3 +16,10 @@ Example output: Downloading constraints file for Airflow 3.0.6 (Python 3.12) Successfully pulled new constraints file: constraints-3.0.6-python3.12.txt ``` + +## Airflow providers/extras + +The providers are released independently of Airflow. +The expected versions are listed in the constraints files, but these change over time. +To keep the installation tightly coupled to the associated constraints it is best to extract the list of providers from the specific constraints file being used to build the product image. +The only provider that is currently excluded is mysql, as it requires an implementation of: https://github.com/apache/airflow/blob/main/scripts/docker/install_mysql.sh. \ No newline at end of file diff --git a/airflow/boil-config.toml b/airflow/boil-config.toml index 430f07ddf..ed88746d7 100644 --- a/airflow/boil-config.toml +++ b/airflow/boil-config.toml @@ -42,7 +42,7 @@ s3fs-version = "2024.9.0" cyclonedx-bom-version = "6.0.0" tini-version = "0.19.0" uv-version = "0.7.8" -airflow-extras = "airbyte,alibaba,amazon,apache-beam,apache-cassandra,apache-drill,apache-druid,apache-flink,apache-hdfs,apache-hive,apache-iceberg,apache-impala,apache-kafka,apache-kylin,apache-livy,apache-pig,apache-pinot,apache-spark,apprise,arangodb,asana,atlassian-jira,celery,cloudant,cncf-kubernetes,cohere,common-compat,common-io,common-messaging,common-sql,databricks,datadog,dbt-cloud,dingding,discord,docker,edge3,elasticsearch,exasol,fab,facebook,ftp,git,github,google,grpc,hashicorp,http,imap,influxdb,jdbc,jenkins,microsoft-azure,microsoft-mssql,microsoft-psrp,microsoft-winrm,mongo,neo4j,odbc,openai,openfaas,openlineage,opensearch,opsgenie,oracle,pagerduty,papermill,pgvector,pinecone,postgres,presto,qdrant,redis,salesforce,samba,segment,sendgrid,sftp,singularity,slack,smtp,snowflake,sqlite,ssh,standard,tableau,telegram,teradata,trino,vertica,weaviate,yandex,ydb,zendesk" +airflow-extras = "async,amazon,celery,cncf-kubernetes,docker,elasticsearch,fab,ftp,grpc,hashicorp,http,ldap,google,microsoft-azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,trino" opa-auth-manager = "airflow-3" nodejs-version = "20" @@ -58,6 +58,6 @@ s3fs-version = "2024.9.0" cyclonedx-bom-version = "6.0.0" tini-version = "0.19.0" uv-version = "0.7.8" -airflow-extras = "airbyte,alibaba,amazon,apache-beam,apache-cassandra,apache-drill,apache-druid,apache-flink,apache-hdfs,apache-hive,apache-iceberg,apache-impala,apache-kafka,apache-kylin,apache-livy,apache-pig,apache-pinot,apache-spark,apprise,arangodb,asana,atlassian-jira,celery,cloudant,cncf-kubernetes,cohere,common-compat,common-io,common-messaging,common-sql,databricks,datadog,dbt-cloud,dingding,discord,docker,edge3,elasticsearch,exasol,fab,facebook,ftp,git,github,google,grpc,hashicorp,http,imap,influxdb,jdbc,jenkins,microsoft-azure,microsoft-mssql,microsoft-psrp,microsoft-winrm,mongo,neo4j,odbc,openai,openfaas,openlineage,opensearch,opsgenie,oracle,pagerduty,papermill,pgvector,pinecone,postgres,presto,qdrant,redis,salesforce,samba,segment,sendgrid,sftp,singularity,slack,smtp,snowflake,sqlite,ssh,standard,tableau,telegram,teradata,trino,vertica,weaviate,yandex,ydb,zendesk" +airflow-extras = "async,graphviz,kerberos,otel,sentry,standard,statsd,aiobotocore,cloudpickle,github-enterprise,google-auth,ldap,leveldb,pandas,polars,rabbitmq,s3fs,saml,uv,airbyte,alibaba,amazon,apache-beam,apache-cassandra,apache-drill,apache-druid,apache-flink,apache-hdfs,apache-hive,apache-iceberg,apache-impala,apache-kafka,apache-kylin,apache-livy,apache-pig,apache-pinot,apprise,arangodb,asana,atlassian-jira,celery,cloudant,cncf-kubernetes,cohere,common-compat,common-io,common-messaging,common-sql,databricks,datadog,dbt-cloud,dingding,discord,docker,edge3,elasticsearch,exasol,fab,facebook,ftp,git,github,google,grpc,hashicorp,http,imap,influxdb,jdbc,jenkins,microsoft-azure,microsoft-mssql,microsoft-psrp,microsoft-winrm,mongo,neo4j,odbc,openai,openfaas,openlineage,opensearch,opsgenie,oracle,pagerduty,papermill,pgvector,pinecone,postgres,presto,qdrant,redis,salesforce,samba,segment,sendgrid,sftp,singularity,slack,smtp,snowflake,sqlite,ssh,standard,tableau,telegram,teradata,trino,vertica,weaviate,yandex,ydb,zendesk" opa-auth-manager = "airflow-3" nodejs-version = "20" From 565659933ae92488db241a15677580188ea0c171 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 6 Nov 2025 16:20:12 +0100 Subject: [PATCH 04/11] extend readme comment --- airflow/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/airflow/README.md b/airflow/README.md index 961c206b7..5b8590a03 100644 --- a/airflow/README.md +++ b/airflow/README.md @@ -22,4 +22,6 @@ Successfully pulled new constraints file: constraints-3.0.6-python3.12.txt The providers are released independently of Airflow. The expected versions are listed in the constraints files, but these change over time. To keep the installation tightly coupled to the associated constraints it is best to extract the list of providers from the specific constraints file being used to build the product image. -The only provider that is currently excluded is mysql, as it requires an implementation of: https://github.com/apache/airflow/blob/main/scripts/docker/install_mysql.sh. \ No newline at end of file +The only providers that are currently excluded are: +- `mysql`, as it requires an implementation of: https://github.com/apache/airflow/blob/main/scripts/docker/install_mysql.sh +- `apache-spark`, due to the size (roughly 500MB) and the number high/critical CVEs it adds to the image From 086df15c51588450fa27ed11b40e51940d4ed2e1 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 6 Nov 2025 17:26:52 +0100 Subject: [PATCH 05/11] linting --- airflow/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/airflow/README.md b/airflow/README.md index 5b8590a03..9d2656ac8 100644 --- a/airflow/README.md +++ b/airflow/README.md @@ -23,5 +23,6 @@ The providers are released independently of Airflow. The expected versions are listed in the constraints files, but these change over time. To keep the installation tightly coupled to the associated constraints it is best to extract the list of providers from the specific constraints file being used to build the product image. The only providers that are currently excluded are: + - `mysql`, as it requires an implementation of: https://github.com/apache/airflow/blob/main/scripts/docker/install_mysql.sh - `apache-spark`, due to the size (roughly 500MB) and the number high/critical CVEs it adds to the image From 20af2bb25272956d05569e9cc051a3da01d6cdaf Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 6 Nov 2025 17:40:36 +0100 Subject: [PATCH 06/11] linting --- airflow/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/README.md b/airflow/README.md index 9d2656ac8..7e8952ded 100644 --- a/airflow/README.md +++ b/airflow/README.md @@ -24,5 +24,5 @@ The expected versions are listed in the constraints files, but these change over To keep the installation tightly coupled to the associated constraints it is best to extract the list of providers from the specific constraints file being used to build the product image. The only providers that are currently excluded are: -- `mysql`, as it requires an implementation of: https://github.com/apache/airflow/blob/main/scripts/docker/install_mysql.sh +- `mysql`, as it requires an implementation of: - `apache-spark`, due to the size (roughly 500MB) and the number high/critical CVEs it adds to the image From 2bfbb6c14f20254266ef3beaf42addf5b3507c5f Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 6 Nov 2025 17:50:49 +0100 Subject: [PATCH 07/11] changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e619fe4be..806c6b41e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,7 @@ All notable changes to this project will be documented in this file. - nifi: Use a patched version of logback to fix corrupted logs ([#1314]) - zookeeper: Use a patched version of logback to fix corrupted logs ([#1320]) - kafka: Use patched version of reload4j to fix corrupted logs ([#1330]) +- airflow: Extend list of providers for 3.0.6 ([#1336]) ### Fixed @@ -126,6 +127,7 @@ All notable changes to this project will be documented in this file. [#1323]: https://github.com/stackabletech/docker-images/pull/1323 [#1326]: https://github.com/stackabletech/docker-images/pull/1326 [#1330]: https://github.com/stackabletech/docker-images/pull/1330 +[#1336]: https://github.com/stackabletech/docker-images/pull/1336 ## [25.7.0] - 2025-07-23 From 8a2aa8acd0cc050c3d0c8658778c8dcf593491dd Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 7 Nov 2025 12:35:48 +0100 Subject: [PATCH 08/11] typo --- airflow/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/README.md b/airflow/README.md index 7e8952ded..7ca3a8d66 100644 --- a/airflow/README.md +++ b/airflow/README.md @@ -25,4 +25,4 @@ To keep the installation tightly coupled to the associated constraints it is bes The only providers that are currently excluded are: - `mysql`, as it requires an implementation of: -- `apache-spark`, due to the size (roughly 500MB) and the number high/critical CVEs it adds to the image +- `apache-spark`, due to the size (roughly 500MB) and the number of high/critical CVEs it adds to the image From 522efd8d975340a254161efa452e59d3db2b960b Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 7 Nov 2025 12:37:16 +0100 Subject: [PATCH 09/11] corrected changelog --- CHANGELOG.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d82551d71..2b88a8c43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Changed + +- airflow: Extend list of providers for 3.0.6 ([#1336]) + +[#1336]: https://github.com/stackabletech/docker-images/pull/1336 + ## [25.11.0] - 2025-11-07 ## [25.11.0-rc1] - 2025-11-06 @@ -62,7 +68,6 @@ All notable changes to this project will be documented in this file. - nifi: Use a patched version of logback to fix corrupted logs ([#1314]) - zookeeper: Use a patched version of logback to fix corrupted logs ([#1320]) - kafka: Use patched version of reload4j to fix corrupted logs ([#1330]) -- airflow: Extend list of providers for 3.0.6 ([#1336]) ### Fixed @@ -131,7 +136,6 @@ All notable changes to this project will be documented in this file. [#1323]: https://github.com/stackabletech/docker-images/pull/1323 [#1326]: https://github.com/stackabletech/docker-images/pull/1326 [#1330]: https://github.com/stackabletech/docker-images/pull/1330 -[#1336]: https://github.com/stackabletech/docker-images/pull/1336 ## [25.7.0] - 2025-07-23 From 4b1d9328483fae5e8030fac076c4a19e6169fdd5 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 10 Nov 2025 17:18:24 +0100 Subject: [PATCH 10/11] split extras into separate lists --- airflow/Dockerfile | 19 +++++++++++++++++-- airflow/README.md | 7 ++++--- airflow/boil-config.toml | 27 +++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/airflow/Dockerfile b/airflow/Dockerfile index b65a8666b..f518903fc 100644 --- a/airflow/Dockerfile +++ b/airflow/Dockerfile @@ -51,8 +51,16 @@ ARG UV_VERSION # Airflow "extras" packages are listed here: https://airflow.apache.org/docs/apache-airflow/stable/extra-packages-ref.html # They evolve over time and thus belong to the version-specific arguments. # The mysql provider is currently excluded. -# Requires implementation of https://github.com/apache/airflow/blob/2.2.5/scripts/docker/install_mysql.sh -ARG AIRFLOW_EXTRAS +# Requires implementation of https://github.com/apache/airflow/blob/main/scripts/docker/install_mysql.sh +# The providers are split into separate lists to make it easier to manage +# (and to compare to the online links). Default values are provided for +# backwards compatability. +ARG AIRFLOW_EXTRAS_CORE="" +ARG AIRFLOW_EXTRAS_META="" +ARG AIRFLOW_EXTRAS_PROVIDER_APACHE="" +ARG AIRFLOW_EXTRAS_EXTERNAL_SERVICES="" +ARG AIRFLOW_EXTRAS_LOCALLY_INSTALLED_SOFTWARE="" +ARG AIRFLOW_EXTRAS_OTHER="" RUN microdnf module enable -y nodejs:${NODEJS_VERSION} && \ microdnf update && \ @@ -96,6 +104,13 @@ COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/patches/${PRODUCT_VERSION WORKDIR /stackable RUN < 0 {if (!seen[$0]++) print $0}' | tr '\n' ',' | sed 's/,$//') + python${PYTHON_VERSION} -m venv --system-site-packages /stackable/app source /stackable/app/bin/activate diff --git a/airflow/README.md b/airflow/README.md index 7ca3a8d66..570f64296 100644 --- a/airflow/README.md +++ b/airflow/README.md @@ -20,9 +20,10 @@ Successfully pulled new constraints file: constraints-3.0.6-python3.12.txt ## Airflow providers/extras The providers are released independently of Airflow. -The expected versions are listed in the constraints files, but these change over time. -To keep the installation tightly coupled to the associated constraints it is best to extract the list of providers from the specific constraints file being used to build the product image. -The only providers that are currently excluded are: +The list of provider packages are listed in the build configuration file, matching the groups used in the online documentation to make them easier to compare and manage (these will be concatentated into a single list in the Dockerfile). +The expected versions are listed in the constraints files, but these can change over time. +To keep the installation tightly coupled to the associated constraints it is best to only use providers listed in the relevant constraints file. +Other than the above filter, the only providers that are currently excluded are: - `mysql`, as it requires an implementation of: - `apache-spark`, due to the size (roughly 500MB) and the number of high/critical CVEs it adds to the image diff --git a/airflow/boil-config.toml b/airflow/boil-config.toml index ed88746d7..324abdffe 100644 --- a/airflow/boil-config.toml +++ b/airflow/boil-config.toml @@ -10,7 +10,7 @@ s3fs-version = "2024.9.0" cyclonedx-bom-version = "6.0.0" tini-version = "0.19.0" uv-version = "0.7.8" -airflow-extras = "async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv,trino" +airflow-extras-other = "async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv,trino" opa-auth-manager = "airflow-2" nodejs-version = "20" @@ -26,7 +26,7 @@ s3fs-version = "2024.9.0" cyclonedx-bom-version = "6.0.0" tini-version = "0.19.0" uv-version = "0.7.8" -airflow-extras = "async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv,trino" +airflow-extras-other = "async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv,trino" opa-auth-manager = "airflow-2" nodejs-version = "20" @@ -42,7 +42,7 @@ s3fs-version = "2024.9.0" cyclonedx-bom-version = "6.0.0" tini-version = "0.19.0" uv-version = "0.7.8" -airflow-extras = "async,amazon,celery,cncf-kubernetes,docker,elasticsearch,fab,ftp,grpc,hashicorp,http,ldap,google,microsoft-azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,trino" +airflow-extras-other = "async,amazon,celery,cncf-kubernetes,docker,elasticsearch,fab,ftp,grpc,hashicorp,http,ldap,google,microsoft-azure,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,trino" opa-auth-manager = "airflow-3" nodejs-version = "20" @@ -58,6 +58,25 @@ s3fs-version = "2024.9.0" cyclonedx-bom-version = "6.0.0" tini-version = "0.19.0" uv-version = "0.7.8" -airflow-extras = "async,graphviz,kerberos,otel,sentry,standard,statsd,aiobotocore,cloudpickle,github-enterprise,google-auth,ldap,leveldb,pandas,polars,rabbitmq,s3fs,saml,uv,airbyte,alibaba,amazon,apache-beam,apache-cassandra,apache-drill,apache-druid,apache-flink,apache-hdfs,apache-hive,apache-iceberg,apache-impala,apache-kafka,apache-kylin,apache-livy,apache-pig,apache-pinot,apprise,arangodb,asana,atlassian-jira,celery,cloudant,cncf-kubernetes,cohere,common-compat,common-io,common-messaging,common-sql,databricks,datadog,dbt-cloud,dingding,discord,docker,edge3,elasticsearch,exasol,fab,facebook,ftp,git,github,google,grpc,hashicorp,http,imap,influxdb,jdbc,jenkins,microsoft-azure,microsoft-mssql,microsoft-psrp,microsoft-winrm,mongo,neo4j,odbc,openai,openfaas,openlineage,opensearch,opsgenie,oracle,pagerduty,papermill,pgvector,pinecone,postgres,presto,qdrant,redis,salesforce,samba,segment,sendgrid,sftp,singularity,slack,smtp,snowflake,sqlite,ssh,standard,tableau,telegram,teradata,trino,vertica,weaviate,yandex,ydb,zendesk" + +# Airflow extras are defined in separate lists to make them easier to check against the links below. The lists will be concatenated and duplicates removed in the dockerfile. +# See https://airflow.apache.org/docs/apache-airflow/3.0.6/extra-packages-ref.html#core-airflow-extras +airflow-extras-core="async,graphviz,kerberos,otel,sentry,standard,statsd" + +# See https://airflow.apache.org/docs/apache-airflow/3.0.6/extra-packages-ref.html#meta-airflow-package-extras +airflow-extras-meta="aiobotocore,cloudpickle,github-enterprise,google-auth,graphviz,ldap,leveldb,pandas,polars,rabbitmq,s3fs,saml,uv" + +# See https://airflow.apache.org/docs/apache-airflow/3.0.6/extra-packages-ref.html#apache-software-extras +airflow-extras-provider-apache="apache-beam,apache-cassandra,apache-drill,apache-druid,apache-flink,apache-hdfs,apache-hive,apache-iceberg,apache-impala,apache-kafka,apache-kylin,apache-livy,apache-pig,apache-pinot" + +# See https://airflow.apache.org/docs/apache-airflow/3.0.6/extra-packages-ref.html#external-services-extras +airflow-extras-external-services="airbyte,alibaba,apprise,amazon,asana,atlassian-jira,microsoft-azure,cloudant,cohere,databricks,datadog,dbt-cloud,dingding,discord,facebook,github,google,hashicorp,openai,opsgenie,pagerduty,pgvector,pinecone,qdrant,salesforce,sendgrid,segment,slack,snowflake,tableau,tabular,telegram,vertica,weaviate,yandex,ydb,zendesk" + +# See https://airflow.apache.org/docs/apache-airflow/3.0.6/extra-packages-ref.html#locally-installed-software-extras +airflow-extras-locally-installed-software="arangodb,celery,cncf-kubernetes,docker,edge3,elasticsearch,exasol,fab,git,github,influxdb,jenkins,mongo,microsoft-mssql,neo4j,odbc,openfaas,oracle,postgres,presto,redis,samba,singularity,teradata,trino" + +# See https://airflow.apache.org/docs/apache-airflow/3.0.6/extra-packages-ref.html#other-extras +airflow-extras-other="common-compat,common-io,common-messaging,common-sql,ftp,grpc,http,imap,jdbc,microsoft-psrp,microsoft-winrm,openlineage,opensearch,papermill,sftp,smtp,sqlite,ssh" + opa-auth-manager = "airflow-3" nodejs-version = "20" From 71ddeb947a395dac44cf8d2753ed510668294991 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 11 Nov 2025 09:48:07 +0100 Subject: [PATCH 11/11] updated list of exclusions --- airflow/README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/airflow/README.md b/airflow/README.md index 570f64296..cec43b269 100644 --- a/airflow/README.md +++ b/airflow/README.md @@ -23,7 +23,15 @@ The providers are released independently of Airflow. The list of provider packages are listed in the build configuration file, matching the groups used in the online documentation to make them easier to compare and manage (these will be concatentated into a single list in the Dockerfile). The expected versions are listed in the constraints files, but these can change over time. To keep the installation tightly coupled to the associated constraints it is best to only use providers listed in the relevant constraints file. -Other than the above filter, the only providers that are currently excluded are: + +### Version 3.0.6 + +Applying the filter above results in the omission of the following providers: + +- `apache-atlas` +- `apache-webhdfs` + +Other than the above, the only other providers that are currently excluded are: - `mysql`, as it requires an implementation of: - `apache-spark`, due to the size (roughly 500MB) and the number of high/critical CVEs it adds to the image