diff --git a/CHANGELOG.md b/CHANGELOG.md index f64b05bf..259da2fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- Add OPA authorization using the operator-rs `OpaConfig` ([#652]). + +[#652]: https://github.com/stackabletech/hive-operator/pull/652 + ## [25.11.0] - 2025-11-07 ## [25.11.0-rc1] - 2025-11-06 diff --git a/deploy/helm/hive-operator/crds/crds.yaml b/deploy/helm/hive-operator/crds/crds.yaml index b6fd37be..3d07e4ad 100644 --- a/deploy/helm/hive-operator/crds/crds.yaml +++ b/deploy/helm/hive-operator/crds/crds.yaml @@ -49,6 +49,33 @@ spec: required: - kerberos type: object + authorization: + description: |- + Authorization options for Hive. + Learn more in the [Hive authorization usage guide](https://docs.stackable.tech/home/nightly/hive/usage-guide/security#authorization). + nullable: true + properties: + opa: + description: |- + Configure the OPA stacklet [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) + and the name of the Rego package containing your authorization rules. + Consult the [OPA authorization documentation](https://docs.stackable.tech/home/nightly/concepts/opa) + to learn how to deploy Rego authorization rules with OPA. + nullable: true + properties: + configMapName: + description: |- + The [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) + for the OPA stacklet that should be used for authorization requests. + type: string + package: + description: The name of the Rego package containing the Rego rules for the product. + nullable: true + type: string + required: + - configMapName + type: object + type: object database: description: Database connection specification for the metadata database. properties: diff --git a/docs/modules/hive/pages/usage-guide/security.adoc b/docs/modules/hive/pages/usage-guide/security.adoc index 2c500038..fcc282c3 100644 --- a/docs/modules/hive/pages/usage-guide/security.adoc +++ b/docs/modules/hive/pages/usage-guide/security.adoc @@ -45,3 +45,118 @@ The `kerberos.secretClass` is used to give Hive the possibility to request keyta === 5. Access Hive In case you want to access Hive it is recommended to start up a client Pod that connects to Hive, rather than shelling into the master. We have an https://github.com/stackabletech/hive-operator/blob/main/tests/templates/kuttl/kerberos/70-install-access-hive.yaml.j2[integration test] for this exact purpose, where you can see how to connect and get a valid keytab. + + +== Authorization +The Stackable Operator for Apache Hive supports the following authorization methods. + +=== Open Policy Agent (OPA) +The Apache Hive metastore can be configured to delegate authorization decisions to an Open Policy Agent (OPA) instance. +More information on the setup and configuration of OPA can be found in the xref:opa:index.adoc[OPA Operator documentation]. +A Hive cluster can be configured using OPA authorization by adding this section to the configuration: + +[source,yaml] +---- +spec: + clusterConfig: + authorization: + opa: + configMapName: opa # <1> + package: hms # <2> +---- +<1> The name of your OPA Stacklet (`opa` in this case) +<2> The rego rule package to use for policy decisions. +This is optional and defaults to the name of the Hive Stacklet. + +==== Defining rego rules +For a general explanation of how rules are written, please refer to the {opa-rego-docs}[OPA documentation]. +Authorization with OPA is done using the https://github.com/boschglobal/hive-metastore-opa-authorizer[hive-metastore-opa-authorizer] plugin. + +===== OPA Inputs +The payload sent by Hive with each request to OPA, that is accessible within the rego rules, has the following structure: + +[source,json] +---- +{ + "identity": { + "username": "", + "groups": ["", ""] + }, + "resources": { + "database": null, + "table": null, + "partition": null, + "columns": ["col1", "col2"] + }, + "privileges": { + "readRequiredPriv": [], + "writeRequiredPriv": [], + "inputs": null, + "outputs": null + } +} +---- +* `identity`: Contains user information. +** `username`: The name of the user. +** `groups`: A list of groups the user belongs to. +* `resources`: Specifies the resources involved in the request. +** `database`: The database object. +** `table`: The table object. +** `partition`: The partition object. +** `columns`: A list of column names involved in the request. +* `privileges`: Details the privileges required for the request. +** `readRequiredPriv`: A list of required read privileges. +** `writeRequiredPriv`: A list of required write privileges. +** `inputs`: Input tables for the request. +** `outputs`: Output tables for the request. + +===== Example OPA Rego Rule +Below is a basic rego rule that demonstrates how to handle input dictionary sent from the hive authorizer to OPA: + +[source,rego] +---- +package hms + +default database_allow = false +default table_allow = false +default column_allow = false +default partition_allow = false +default user_allow = false + +database_allow if { + input.identity.username == "stackable" + input.resources.database.name == "test_db" +} + +table_allow if { + input.identity.username == "stackable" + input.resources.table.dbName == "test_db" + input.resources.table.tableName == "test_table" + input.privileges.readRequiredPriv[0].priv == "SELECT" +} + +table_allow if { + input.identity.username == "stackable" + input.resources.table.dbName == "test_db" + input.privileges.writeRequiredPriv[0].priv == "CREATE" +} +---- +* `database_allow` grants access if the user is `stackable` and the database is `test_db`. +* `table_allow` grants access if the user is `stackable`, the database is `test_db` and: +** the table is `test_table` and the required read privilege is `SELECT`. +** the required write privilege is `CREATE` without any table restriction. + +==== Configuring policy URLs + +The `database_allow`, `table_allow`, `column_allow`, `partition_allow`, and `user_allow` policy URLs can be (config) overriden using the properties in `hive-site.xml`: + +* `com.bosch.bdps.opa.authorization.policy.url.database` +* `com.bosch.bdps.opa.authorization.policy.url.table` +* `com.bosch.bdps.opa.authorization.policy.url.column` +* `com.bosch.bdps.opa.authorization.policy.url.partition` +* `com.bosch.bdps.opa.authorization.policy.url.user` + +==== TLS secured OPA cluster + +Stackable OPA clusters secured via TLS are supported and no further configuration is required. +The Stackable Hive operator automatically adds the certificate from the SecretClass used to secure the OPA cluster to its trust. diff --git a/examples/hive-opa-cluster.yaml b/examples/hive-opa-cluster.yaml new file mode 100644 index 00000000..994bedaa --- /dev/null +++ b/examples/hive-opa-cluster.yaml @@ -0,0 +1,89 @@ +# helm install postgresql oci://registry-1.docker.io/bitnamicharts/postgresql \ +# --version 16.5.0 \ +# --namespace default \ +# --set image.repository=bitnamilegacy/postgresql \ +# --set volumePermissions.image.repository=bitnamilegacy/os-shell \ +# --set metrics.image.repository=bitnamilegacy/postgres-exporter \ +# --set global.security.allowInsecureImages=true \ +# --set auth.username=hive \ +# --set auth.password=hive \ +# --set auth.database=hive \ +# --set primary.extendedConfiguration="password_encryption=md5" \ +# --wait +--- +apiVersion: hive.stackable.tech/v1alpha1 +kind: HiveCluster +metadata: + name: hive +spec: + image: + productVersion: 4.1.0 + pullPolicy: IfNotPresent + clusterConfig: + authorization: + opa: + configMapName: opa + package: hms + database: + connString: jdbc:postgresql://postgresql:5432/hive + credentialsSecret: hive-postgresql-credentials + dbType: postgres + metastore: + roleGroups: + default: + replicas: 1 + config: + resources: + cpu: + min: 300m + max: "2" + memory: + limit: 5Gi +--- +apiVersion: v1 +kind: Secret +metadata: + name: hive-postgresql-credentials +type: Opaque +stringData: + username: hive + password: hive +--- +apiVersion: opa.stackable.tech/v1alpha1 +kind: OpaCluster +metadata: + name: opa +spec: + image: + productVersion: 1.8.0 + servers: + config: + logging: + enableVectorAgent: false + containers: + opa: + console: + level: INFO + file: + level: INFO + loggers: + decision: + level: INFO + roleGroups: + default: {} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: hive-opa-bundle + labels: + opa.stackable.tech/bundle: "hms" +data: + hive.rego: | + package hms + + database_allow = true + table_allow = true + column_allow = true + partition_allow = true + user_allow = true diff --git a/rust/operator-binary/src/command.rs b/rust/operator-binary/src/command.rs index 4f8d1135..ee08f56d 100644 --- a/rust/operator-binary/src/command.rs +++ b/rust/operator-binary/src/command.rs @@ -1,16 +1,20 @@ use stackable_operator::crd::s3; -use crate::crd::{ - DB_PASSWORD_ENV, DB_PASSWORD_PLACEHOLDER, DB_USERNAME_ENV, DB_USERNAME_PLACEHOLDER, - HIVE_METASTORE_LOG4J2_PROPERTIES, HIVE_SITE_XML, STACKABLE_CONFIG_DIR, - STACKABLE_CONFIG_MOUNT_DIR, STACKABLE_LOG_CONFIG_MOUNT_DIR, STACKABLE_TRUST_STORE, - STACKABLE_TRUST_STORE_PASSWORD, v1alpha1, +use crate::{ + config::opa::HiveOpaConfig, + crd::{ + DB_PASSWORD_ENV, DB_PASSWORD_PLACEHOLDER, DB_USERNAME_ENV, DB_USERNAME_PLACEHOLDER, + HIVE_METASTORE_LOG4J2_PROPERTIES, HIVE_SITE_XML, STACKABLE_CONFIG_DIR, + STACKABLE_CONFIG_MOUNT_DIR, STACKABLE_LOG_CONFIG_MOUNT_DIR, STACKABLE_TRUST_STORE, + STACKABLE_TRUST_STORE_PASSWORD, v1alpha1, + }, }; pub fn build_container_command_args( hive: &v1alpha1::HiveCluster, start_command: String, s3_connection_spec: Option<&s3::v1alpha1::ConnectionSpec>, + hive_opa_config: Option<&HiveOpaConfig>, ) -> Vec { let mut args = vec![ // copy config files to a writeable empty folder in order to set s3 access and secret keys @@ -51,6 +55,14 @@ pub fn build_container_command_args( } } + if let Some(opa) = hive_opa_config { + if let Some(ca_cert_dir) = opa.tls_ca_cert_mount_path() { + args.push(format!( + "cert-tools generate-pkcs12-truststore --pkcs12 {STACKABLE_TRUST_STORE}:{STACKABLE_TRUST_STORE_PASSWORD} --pem {ca_cert_dir}/ca.crt --out {STACKABLE_TRUST_STORE} --out-password {STACKABLE_TRUST_STORE_PASSWORD}" + )); + } + } + // db credentials args.extend([ format!("echo replacing {DB_USERNAME_PLACEHOLDER} and {DB_PASSWORD_PLACEHOLDER} with secret values."), diff --git a/rust/operator-binary/src/config/mod.rs b/rust/operator-binary/src/config/mod.rs index 271c6d99..4069348d 100644 --- a/rust/operator-binary/src/config/mod.rs +++ b/rust/operator-binary/src/config/mod.rs @@ -1 +1,2 @@ pub mod jvm; +pub mod opa; diff --git a/rust/operator-binary/src/config/opa.rs b/rust/operator-binary/src/config/opa.rs new file mode 100644 index 00000000..411e4558 --- /dev/null +++ b/rust/operator-binary/src/config/opa.rs @@ -0,0 +1,129 @@ +use std::collections::BTreeMap; + +use stackable_operator::{ + client::Client, + commons::opa::{OpaApiVersion, OpaConfig}, + k8s_openapi::api::core::v1::ConfigMap, + kube::ResourceExt, +}; + +use crate::crd::v1alpha1::HiveCluster; + +const HIVE_METASTORE_PRE_EVENT_LISTENERS: &str = "hive.metastore.pre.event.listeners"; +const HIVE_SECURITY_METASTORE_AUTHORIZATION_MANAGER: &str = + "hive.security.metastore.authorization.manager"; + +const OPA_AUTHORIZATION_PRE_EVENT_LISTENER_V3: &str = + "com.bosch.bdps.hms3.OpaAuthorizationPreEventListener"; +const OPA_BASED_AUTHORIZATION_PROVIDER_V3: &str = + "com.bosch.bdps.hms3.OpaBasedAuthorizationProvider"; +const OPA_AUTHORIZATION_PRE_EVENT_LISTENER_V4: &str = + "com.bosch.bdps.hms4.OpaAuthorizationPreEventListener"; +const OPA_BASED_AUTHORIZATION_PROVIDER_V4: &str = + "com.bosch.bdps.hms4.OpaBasedAuthorizationProvider"; + +const OPA_AUTHORIZATION_BASE_ENDPOINT: &str = "com.bosch.bdps.opa.authorization.base.endpoint"; +const OPA_AUTHORIZATION_POLICY_URL_DATA_BASE: &str = + "com.bosch.bdps.opa.authorization.policy.url.database"; +const OPA_AUTHORIZATION_POLICY_URL_TABLE: &str = + "com.bosch.bdps.opa.authorization.policy.url.table"; +const OPA_AUTHORIZATION_POLICY_URL_COLUMN: &str = + "com.bosch.bdps.opa.authorization.policy.url.column"; +const OPA_AUTHORIZATION_POLICY_URL_PARTITION: &str = + "com.bosch.bdps.opa.authorization.policy.url.partition"; +const OPA_AUTHORIZATION_POLICY_URL_USER: &str = "com.bosch.bdps.opa.authorization.policy.url.user"; + +pub const OPA_TLS_VOLUME_NAME: &str = "opa-tls"; + +pub struct HiveOpaConfig { + /// Endpoint for OPA, e.g. + /// `http://localhost:8081/v1/data/` + pub(crate) base_endpoint: String, + /// Optional TLS secret class for OPA communication. + /// If set, the CA certificate from this secret class will be added + /// to hive's truststore to make it trust OPA's TLS certificate. + pub(crate) tls_secret_class: Option, +} + +impl HiveOpaConfig { + pub async fn from_opa_config( + client: &Client, + hive: &HiveCluster, + opa_config: &OpaConfig, + ) -> Result { + // See: https://github.com/boschglobal/hive-metastore-opa-authorizer?tab=readme-ov-file#configuration + let base_endpoint = opa_config + .full_document_url_from_config_map(client, hive, None, OpaApiVersion::V1) + .await?; + + let tls_secret_class = client + .get::( + &opa_config.config_map_name, + hive.namespace().as_deref().unwrap_or("default"), + ) + .await + .ok() + .and_then(|cm| cm.data) + .and_then(|mut data| data.remove("OPA_SECRET_CLASS")); + + Ok(HiveOpaConfig { + base_endpoint, + tls_secret_class, + }) + } + + pub fn as_config(&self, product_version: &str) -> BTreeMap { + let (pre_event_listener, authorization_provider) = if product_version.starts_with("3.") { + ( + OPA_AUTHORIZATION_PRE_EVENT_LISTENER_V3, + OPA_BASED_AUTHORIZATION_PROVIDER_V3, + ) + } else { + ( + OPA_AUTHORIZATION_PRE_EVENT_LISTENER_V4, + OPA_BASED_AUTHORIZATION_PROVIDER_V4, + ) + }; + + BTreeMap::from([ + ( + HIVE_METASTORE_PRE_EVENT_LISTENERS.to_string(), + pre_event_listener.to_string(), + ), + ( + HIVE_SECURITY_METASTORE_AUTHORIZATION_MANAGER.to_string(), + authorization_provider.to_string(), + ), + ( + OPA_AUTHORIZATION_BASE_ENDPOINT.to_string(), + self.base_endpoint.to_owned(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_DATA_BASE.to_string(), + "database_allow".to_string(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_TABLE.to_string(), + "table_allow".to_string(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_COLUMN.to_string(), + "column_allow".to_string(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_PARTITION.to_string(), + "partition_allow".to_string(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_USER.to_string(), + "user_allow".to_string(), + ), + ]) + } + + pub fn tls_ca_cert_mount_path(&self) -> Option { + self.tls_secret_class + .as_ref() + .map(|_| format!("/stackable/secrets/{OPA_TLS_VOLUME_NAME}")) + } +} diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 96cdc955..9229e619 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -28,7 +28,7 @@ use stackable_operator::{ security::PodSecurityContextBuilder, volume::{ ListenerOperatorVolumeSourceBuilder, ListenerOperatorVolumeSourceBuilderError, - ListenerReference, VolumeBuilder, + ListenerReference, SecretOperatorVolumeSourceBuilder, VolumeBuilder, }, }, }, @@ -36,7 +36,6 @@ use stackable_operator::{ commons::{ product_image_selection::{self, ResolvedProductImage}, rbac::build_rbac_resources, - tls_verification::TlsClientDetailsError, }, crd::{listener::v1alpha1::Listener, s3}, k8s_openapi::{ @@ -85,7 +84,10 @@ use tracing::warn; use crate::{ OPERATOR_NAME, command::build_container_command_args, - config::jvm::{construct_hadoop_heapsize_env, construct_non_heap_jvm_args}, + config::{ + jvm::{construct_hadoop_heapsize_env, construct_non_heap_jvm_args}, + opa::{HiveOpaConfig, OPA_TLS_VOLUME_NAME}, + }, crd::{ APP_NAME, CORE_SITE_XML, Container, DB_PASSWORD_ENV, DB_USERNAME_ENV, HIVE_PORT, HIVE_PORT_NAME, HIVE_SITE_XML, HiveClusterStatus, HiveRole, JVM_SECURITY_PROPERTIES_FILE, @@ -131,16 +133,6 @@ pub enum Error { #[snafu(display("object defines no metastore role"))] NoMetaStoreRole, - #[snafu(display("failed to calculate service name for role {rolegroup}"))] - RoleGroupServiceNameNotFound { - rolegroup: RoleGroupRef, - }, - - #[snafu(display("failed to apply global Service"))] - ApplyRoleService { - source: stackable_operator::cluster_resources::Error, - }, - #[snafu(display("failed to apply Service for {rolegroup}"))] ApplyRoleGroupService { source: stackable_operator::cluster_resources::Error, @@ -198,9 +190,6 @@ pub enum Error { source: stackable_operator::crd::s3::v1alpha1::ConnectionError, }, - #[snafu(display("failed to configure S3 TLS client details"))] - ConfigureS3TlsClientDetails { source: TlsClientDetailsError }, - #[snafu(display( "Hive does not support skipping the verification of the tls enabled S3 server" ))] @@ -209,15 +198,6 @@ pub enum Error { #[snafu(display("failed to resolve and merge resource config for role and role group"))] FailedToResolveResourceConfig { source: crate::crd::Error }, - #[snafu(display("invalid java heap config - missing default or value in crd?"))] - InvalidJavaHeapConfig, - - #[snafu(display("failed to convert java heap config to unit [{unit}]"))] - FailedToConvertJavaHeap { - source: stackable_operator::memory::Error, - unit: String, - }, - #[snafu(display("failed to create hive container [{name}]"))] FailedToCreateHiveContainer { source: stackable_operator::builder::pod::container::Error, @@ -259,7 +239,7 @@ pub enum Error { }, #[snafu(display("internal operator failure"))] - InternalOperatorError { source: crate::crd::Error }, + InternalOperatorFailure { source: crate::crd::Error }, #[snafu(display( "failed to serialize [{JVM_SECURITY_PROPERTIES_FILE}] for {}", @@ -280,16 +260,6 @@ pub enum Error { source: crate::operations::graceful_shutdown::Error, }, - #[snafu(display("failed to build TLS certificate SecretClass Volume"))] - TlsCertSecretClassVolumeBuild { - source: stackable_operator::builder::pod::volume::SecretOperatorVolumeSourceBuilderError, - }, - - #[snafu(display("failed to build S3 credentials SecretClass Volume"))] - S3CredentialsSecretClassVolumeBuild { - source: stackable_operator::commons::secret_class::SecretClassVolumeError, - }, - #[snafu(display("failed to build Labels"))] LabelBuild { source: stackable_operator::kvp::LabelError, @@ -306,13 +276,6 @@ pub enum Error { stackable_operator::kvp::KeyValuePairError, }, - #[snafu(display( - "there was an error adding LDAP Volumes and VolumeMounts to the Pod and Containers" - ))] - AddLdapVolumes { - source: stackable_operator::crd::authentication::ldap::v1alpha1::Error, - }, - #[snafu(display("failed to add kerberos config"))] AddKerberosConfig { source: kerberos::Error }, @@ -355,6 +318,16 @@ pub enum Error { ResolveProductImage { source: product_image_selection::Error, }, + + #[snafu(display("invalid OpaConfig"))] + InvalidOpaConfig { + source: stackable_operator::commons::opa::Error, + }, + + #[snafu(display("failed to build TLS certificate SecretClass Volume"))] + TlsCertSecretClassVolumeBuild { + source: stackable_operator::builder::pod::volume::SecretOperatorVolumeSourceBuilderError, + }, } type Result = std::result::Result; @@ -458,6 +431,15 @@ pub async fn reconcile_hive( .await .context(ApplyRoleBindingSnafu)?; + let hive_opa_config = match hive.get_opa_config() { + Some(opa_config) => Some( + HiveOpaConfig::from_opa_config(client, hive, opa_config) + .await + .context(InvalidOpaConfigSnafu)?, + ), + None => None, + }; + let mut ss_cond_builder = StatefulSetConditionBuilder::default(); for (rolegroup_name, rolegroup_config) in metastore_config.iter() { @@ -484,6 +466,7 @@ pub async fn reconcile_hive( s3_connection_spec.as_ref(), &config, &client.kubernetes_cluster_info, + hive_opa_config.as_ref(), )?; let rg_statefulset = build_metastore_rolegroup_statefulset( hive, @@ -494,6 +477,7 @@ pub async fn reconcile_hive( s3_connection_spec.as_ref(), &config, &rbac_sa.name_any(), + hive_opa_config.as_ref(), )?; cluster_resources @@ -609,6 +593,7 @@ fn build_metastore_rolegroup_config_map( s3_connection_spec: Option<&s3::v1alpha1::ConnectionSpec>, merged_config: &MetaStoreConfig, cluster_info: &KubernetesClusterInfo, + hive_opa_config: Option<&HiveOpaConfig>, ) -> Result { let mut hive_site_data = String::new(); @@ -665,6 +650,17 @@ fn build_metastore_rolegroup_config_map( data.insert(property_name.to_string(), Some(property_value.to_string())); } + // OPA settings + if let Some(opa_config) = hive_opa_config { + data.extend( + opa_config + .as_config(&resolved_product_image.product_version) + .into_iter() + .map(|(k, v)| (k, Some(v))) + .collect::>>(), + ); + } + // overrides for (property_name, property_value) in config { data.insert(property_name.to_string(), Some(property_value.to_string())); @@ -752,11 +748,12 @@ fn build_metastore_rolegroup_statefulset( s3_connection: Option<&s3::v1alpha1::ConnectionSpec>, merged_config: &MetaStoreConfig, sa_name: &str, + hive_opa_config: Option<&HiveOpaConfig>, ) -> Result { - let role = hive.role(hive_role).context(InternalOperatorSnafu)?; + let role = hive.role(hive_role).context(InternalOperatorFailureSnafu)?; let rolegroup = hive .rolegroup(rolegroup_ref) - .context(InternalOperatorSnafu)?; + .context(InternalOperatorFailureSnafu)?; let mut container_builder = ContainerBuilder::new(APP_NAME).context(FailedToCreateHiveContainerSnafu { @@ -825,6 +822,32 @@ fn build_metastore_rolegroup_statefulset( } } + // Add OPA TLS certs if configured + if let Some((tls_secret_class, tls_mount_path)) = + hive_opa_config.as_ref().and_then(|opa_config| { + opa_config + .tls_secret_class + .as_ref() + .zip(opa_config.tls_ca_cert_mount_path()) + }) + { + container_builder + .add_volume_mount(OPA_TLS_VOLUME_NAME, &tls_mount_path) + .context(AddVolumeMountSnafu)?; + + let opa_tls_volume = VolumeBuilder::new(OPA_TLS_VOLUME_NAME) + .ephemeral( + SecretOperatorVolumeSourceBuilder::new(tls_secret_class) + .build() + .context(TlsCertSecretClassVolumeBuildSnafu)?, + ) + .build(); + + pod_builder + .add_volume(opa_tls_volume) + .context(AddVolumeSnafu)?; + } + let db_type = hive.db_type(); let start_command = if resolved_product_image.product_version.starts_with("3.") { // The schematool version in 3.1.x does *not* support the `-initOrUpgradeSchema` flag yet, so we can not use that. @@ -876,6 +899,7 @@ fn build_metastore_rolegroup_statefulset( create_vector_shutdown_file_command(STACKABLE_LOG_DIR), }, s3_connection, + hive_opa_config, )) .add_volume_mount(STACKABLE_CONFIG_DIR_NAME, STACKABLE_CONFIG_DIR) .context(AddVolumeMountSnafu)? diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 8af89d72..e9e0f216 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -7,6 +7,7 @@ use stackable_operator::{ commons::{ affinity::StackableAffinity, cluster_operation::ClusterOperation, + opa::OpaConfig, product_image_selection::ProductImage, resources::{ CpuLimitsFragment, MemoryLimitsFragment, NoRuntimeLimits, NoRuntimeLimitsFragment, @@ -151,6 +152,13 @@ pub mod versioned { #[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct HiveClusterConfig { + /// Settings related to user [authentication](DOCS_BASE_URL_PLACEHOLDER/usage-guide/security). + pub authentication: Option, + + /// Authorization options for Hive. + /// Learn more in the [Hive authorization usage guide](DOCS_BASE_URL_PLACEHOLDER/hive/usage-guide/security#authorization). + pub authorization: Option, + // no doc - docs in DatabaseConnectionSpec struct. pub database: DatabaseConnectionSpec, @@ -169,9 +177,6 @@ pub mod versioned { /// to learn how to configure log aggregation with Vector. #[serde(skip_serializing_if = "Option::is_none")] pub vector_aggregator_config_map_name: Option, - - /// Settings related to user [authentication](DOCS_BASE_URL_PLACEHOLDER/usage-guide/security). - pub authentication: Option, } } @@ -289,6 +294,14 @@ impl v1alpha1::HiveCluster { &self.spec.cluster_config.database.db_type } + pub fn get_opa_config(&self) -> Option<&OpaConfig> { + self.spec + .cluster_config + .authorization + .as_ref() + .and_then(|a| a.opa.as_ref()) + } + /// Retrieve and merge resource configs for role and role groups pub fn merged_config( &self, diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index ce279097..ffad541a 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -1,5 +1,8 @@ use serde::{Deserialize, Serialize}; -use stackable_operator::schemars::{self, JsonSchema}; +use stackable_operator::{ + commons::opa::OpaConfig, + schemars::{self, JsonSchema}, +}; #[derive(Clone, Debug, Deserialize, Eq, Hash, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] @@ -8,6 +11,14 @@ pub struct AuthenticationConfig { pub kerberos: KerberosConfig, } +#[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct AuthorizationConfig { + // no doc - it's in the struct. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub opa: Option, +} + #[derive(Clone, Debug, Deserialize, Eq, Hash, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct KerberosConfig { diff --git a/tests/templates/kuttl/logging/test_log_aggregation.py b/tests/templates/kuttl/logging/test_log_aggregation.py index 80c87d09..ee20e27c 100755 --- a/tests/templates/kuttl/logging/test_log_aggregation.py +++ b/tests/templates/kuttl/logging/test_log_aggregation.py @@ -23,9 +23,9 @@ def check_sent_events(): }, ) - assert ( - response.status_code == 200 - ), "Cannot access the API of the vector aggregator." + assert response.status_code == 200, ( + "Cannot access the API of the vector aggregator." + ) result = response.json() @@ -35,13 +35,13 @@ def check_sent_events(): componentId = transform["componentId"] if componentId == "filteredInvalidEvents": - assert ( - sentEvents is None or sentEvents["sentEventsTotal"] == 0 - ), "Invalid log events were sent." + assert sentEvents is None or sentEvents["sentEventsTotal"] == 0, ( + "Invalid log events were sent." + ) else: - assert ( - sentEvents is not None and sentEvents["sentEventsTotal"] > 0 - ), f'No events were sent in "{componentId}".' + assert sentEvents is not None and sentEvents["sentEventsTotal"] > 0, ( + f'No events were sent in "{componentId}".' + ) if __name__ == "__main__": diff --git a/tests/templates/kuttl/smoke/50-assert.yaml b/tests/templates/kuttl/smoke/50-assert.yaml new file mode 100644 index 00000000..5b1731b6 --- /dev/null +++ b/tests/templates/kuttl/smoke/50-assert.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE rollout status daemonset opa-server-default --timeout 300s +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: hive-opa-bundle + labels: + opa.stackable.tech/bundle: "hms" diff --git a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 new file mode 100644 index 00000000..a63a90b4 --- /dev/null +++ b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 @@ -0,0 +1,108 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl apply -n $NAMESPACE -f - < 0 %} + custom: "{{ test_scenario['values']['opa-latest'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['opa-latest'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['opa-latest'] }}" +{% endif %} + pullPolicy: IfNotPresent + clusterConfig: +{% if test_scenario['values']['opa-use-tls'] == 'true' %} + tls: + serverSecretClass: opa-tls-$NAMESPACE +{% endif %} +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + servers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + containers: + opa: + console: + level: INFO + file: + level: INFO + loggers: + decision: + level: INFO + roleGroups: + default: {} +{% if test_scenario['values']['opa-use-tls'] == 'true' %} + --- + apiVersion: secrets.stackable.tech/v1alpha1 + kind: SecretClass + metadata: + name: opa-tls-$NAMESPACE + spec: + backend: + autoTls: + ca: + autoGenerate: true + secret: + name: opa-tls-ca-$NAMESPACE + namespace: $NAMESPACE +{% endif %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: hive-opa-bundle + labels: + opa.stackable.tech/bundle: "hms" +data: + hive.rego: | + package hms + + default database_allow = false + default table_allow = false + default column_allow = false + default partition_allow = false + default user_allow = false + + stackable_user := "stackable" + db_name := "test_metastore" + + database_allow if { + input.identity.username == stackable_user + input.resources.database.name == db_name + } + + table_allow if { + input.identity.username == stackable_user + input.resources.table.dbName == db_name + input.privileges.writeRequiredPriv[0].priv == "CREATE" + input.resources.table.tableName in ["s3_one_column_table", "one_column_table"] + } + + table_allow if { + input.identity.username == stackable_user + input.resources.table.dbName == db_name + input.privileges.readRequiredPriv[0].priv == "SELECT" + input.resources.table.tableName in ["s3_one_column_table", "one_column_table"] + } + + column_allow if { + input.identity.username == stackable_user + } + + partition_allow if { + input.identity.username == stackable_user + } + + user_allow if { + input.identity.username == stackable_user + } diff --git a/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 b/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 index 4db2575e..1f35f82a 100644 --- a/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 +++ b/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 @@ -13,6 +13,10 @@ spec: {% endif %} pullPolicy: IfNotPresent clusterConfig: + authorization: + opa: + configMapName: opa + package: hms database: connString: jdbc:postgresql://postgresql:5432/hive credentialsSecret: hive-credentials diff --git a/tests/templates/kuttl/smoke/80-assert.yaml b/tests/templates/kuttl/smoke/80-assert.yaml index 3d4bd846..ed78faa4 100644 --- a/tests/templates/kuttl/smoke/80-assert.yaml +++ b/tests/templates/kuttl/smoke/80-assert.yaml @@ -2,5 +2,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert commands: - - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore.py -m hive-metastore.$NAMESPACE.svc.cluster.local - - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore.py -m hive-metastore-default-headless.$NAMESPACE.svc.cluster.local + - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore.py -d test_metastore -m hive-metastore.$NAMESPACE.svc.cluster.local + - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore.py -d test_metastore -m hive-metastore-default-headless.$NAMESPACE.svc.cluster.local + - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore_opa.py -d db_not_allowed -m hive-metastore.$NAMESPACE.svc.cluster.local diff --git a/tests/templates/kuttl/smoke/80-prepare-test-metastore.yaml b/tests/templates/kuttl/smoke/80-prepare-test-metastore.yaml index 45da6773..3704aaba 100644 --- a/tests/templates/kuttl/smoke/80-prepare-test-metastore.yaml +++ b/tests/templates/kuttl/smoke/80-prepare-test-metastore.yaml @@ -3,3 +3,4 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep commands: - script: kubectl cp -n "$NAMESPACE" ./test_metastore.py test-metastore-0:/tmp + - script: kubectl cp -n "$NAMESPACE" ./test_metastore_opa.py test-metastore-0:/tmp diff --git a/tests/templates/kuttl/smoke/test_metastore_opa.py b/tests/templates/kuttl/smoke/test_metastore_opa.py new file mode 100755 index 00000000..bde8b6db --- /dev/null +++ b/tests/templates/kuttl/smoke/test_metastore_opa.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +from hive_metastore_client import HiveMetastoreClient +from hive_metastore_client.builders import ( + DatabaseBuilder, + ColumnBuilder, + SerDeInfoBuilder, + StorageDescriptorBuilder, + TableBuilder, +) +import argparse + + +def table(db_name, table_name, location): + columns = [ColumnBuilder("id", "string", "col comment").build()] + + serde_info = SerDeInfoBuilder( + serialization_lib="org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" + ).build() + + storage_descriptor = StorageDescriptorBuilder( + columns=columns, + location=location, + input_format="org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", + output_format="org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", + serde_info=serde_info, + compressed=True, + ).build() + + test_table = TableBuilder( + db_name=db_name, + table_name=table_name, + storage_descriptor=storage_descriptor, + ).build() + + return test_table + + +if __name__ == "__main__": + all_args = argparse.ArgumentParser( + description="Test hive-metastore-opa-authorizer and rego rules." + ) + all_args.add_argument("-p", "--port", help="Metastore server port", default="9083") + all_args.add_argument( + "-d", "--database", help="Test DB name", default="db_not_allowed" + ) + all_args.add_argument( + "-m", "--metastore", help="The host or service to connect to", required=True + ) + args = vars(all_args.parse_args()) + + database_name = args["database"] + port = args["port"] + host = args["metastore"] + + # Creating database object using builder + database = DatabaseBuilder(database_name).build() + + print( + f"[INFO] Trying to access '{database_name}' which is expected to fail due to 'database_allow' authorization policy...!" + ) + + with HiveMetastoreClient(host, port) as hive_client: + try: + hive_client.create_database_if_not_exists(database) + except Exception as e: + print(f"[DENIED] {e}") + print( + f"[SUCCESS] Test hive-metastore-opa-authorizer succeeded. Could not access database '{database_name}'!" + ) + exit(0) + + print( + f"[ERROR] Test hive-metastore-opa-authorizer failed. Could access database '{database_name}'!" + ) + exit(-1) diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index a8795502..12eb28f2 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -17,7 +17,6 @@ dimensions: - name: hive values: - 3.1.3 - - 4.0.0 - 4.0.1 - 4.1.0 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version @@ -41,6 +40,9 @@ dimensions: - name: zookeeper-latest values: - 3.9.4 + - name: opa-latest + values: + - 1.8.0 - name: krb5 values: - 1.21.1 @@ -51,6 +53,10 @@ dimensions: values: - "true" - "false" + - name: opa-use-tls + values: + - "true" + - "false" - name: kerberos-realm values: - "PROD.MYCORP" @@ -62,7 +68,9 @@ tests: dimensions: - postgres - hive + - opa-latest - s3-use-tls + - opa-use-tls - openshift - name: upgrade dimensions: