Skip to content

Commit d3e23ec

Browse files
authored
fix: Pass KUBERNETES_NODE_NAME to product sidecars and other bugs (#744)
* fix: Pass KUBERNETES_NODE_NAME to product sidecars and other bugs * Unneccesary constification * changelog
1 parent 552d2fb commit d3e23ec

File tree

5 files changed

+111
-19
lines changed

5 files changed

+111
-19
lines changed

CHANGELOG.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file.
66

77
### Added
88

9-
- Adds new telemetry CLI arguments and environment variables ([#715]).
9+
- Adds new telemetry CLI arguments and environment variables ([#715], [#744]).
1010
- Use `--file-log-max-files` (or `FILE_LOG_MAX_FILES`) to limit the number of log files kept.
1111
- Use `--file-log-rotation-period` (or `FILE_LOG_ROTATION_PERIOD`) to configure the frequency of rotation.
1212
- Use `--console-log-format` (or `CONSOLE_LOG_FORMAT`) to set the format to `plain` (default) or `json`.
@@ -17,7 +17,7 @@ All notable changes to this project will be documented in this file.
1717

1818
### Changed
1919

20-
- BREAKING: Replace stackable-operator `initialize_logging` with stackable-telemetry `Tracing` ([#703], [#710], [#715]).
20+
- BREAKING: Replace stackable-operator `initialize_logging` with stackable-telemetry `Tracing` ([#703], [#710], [#715], [#744]).
2121
- operator-binary:
2222
- The console log level was set by `OPA_OPERATOR_LOG`, and is now set by `CONSOLE_LOG_LEVEL`.
2323
- The file log level was set by `OPA_OPERATOR_LOG`, and is now set by `FILE_LOG_LEVEL`.
@@ -43,7 +43,7 @@ All notable changes to this project will be documented in this file.
4343
- The defaults from the docker images itself will now apply, which will be different from 1000/0 going forward
4444
- This is marked as breaking because tools and policies might exist, which require these fields to be set
4545
- user-info-fetcher: the AD backend now uses the Kerberos realm to expand the user search filter ([#737])
46-
- BREAKING: Bump stackable-operator to 0.94.0 and update other dependencies ([#743]).
46+
- BREAKING: Bump stackable-operator to 0.94.0 and update other dependencies ([#743], [#744]).
4747
- The default Kubernetes cluster domain name is now fetched from the kubelet API unless explicitly configured.
4848
- This requires operators to have the RBAC permission to get nodes/proxy in the apiGroup "". The helm-chart takes care of this.
4949
- The CLI argument `--kubernetes-node-name` or env variable `KUBERNETES_NODE_NAME` needs to be set. The helm-chart takes care of this.
@@ -52,6 +52,8 @@ All notable changes to this project will be documented in this file.
5252

5353
- Use `json` file extension for log files ([#709]).
5454
- Allow uppercase characters in domain names ([#743]).
55+
- Add missing RBAC permission to patch Kubernetes `events` used for error reporting to helm-chart ([#744]).
56+
- Correctly propagate the Kubernetes cluster domain to the `opa-bundle-builder` and `user-info-fetcher` sidecars ([#744]).
5557

5658
### Removed
5759

@@ -71,6 +73,7 @@ All notable changes to this project will be documented in this file.
7173
[#732]: https://github.com/stackabletech/opa-operator/pull/732
7274
[#737]: https://github.com/stackabletech/opa-operator/pull/737
7375
[#743]: https://github.com/stackabletech/opa-operator/pull/743
76+
[#744]: https://github.com/stackabletech/opa-operator/pull/744
7477

7578
## [25.3.0] - 2025-03-21
7679

deploy/helm/opa-operator/templates/roles.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ rules:
8585
- patch
8686
verbs:
8787
- create
88+
- patch
8889
- apiGroups:
8990
- {{ include "operator.name" . }}.stackable.tech
9091
resources:

rust/operator-binary/src/controller.rs

Lines changed: 98 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ use stackable_operator::{
3838
api::{
3939
apps::v1::{DaemonSet, DaemonSetSpec},
4040
core::v1::{
41-
ConfigMap, EmptyDirVolumeSource, EnvVar, HTTPGetAction, Probe, SecretVolumeSource,
42-
Service, ServiceAccount, ServicePort, ServiceSpec,
41+
ConfigMap, EmptyDirVolumeSource, EnvVar, EnvVarSource, HTTPGetAction,
42+
ObjectFieldSelector, Probe, SecretVolumeSource, Service, ServiceAccount,
43+
ServicePort, ServiceSpec,
4344
},
4445
},
4546
apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString},
@@ -69,7 +70,7 @@ use stackable_operator::{
6970
operations::ClusterOperationsConditionBuilder,
7071
},
7172
time::Duration,
72-
utils::COMMON_BASH_TRAP_FUNCTIONS,
73+
utils::{COMMON_BASH_TRAP_FUNCTIONS, cluster_info::KubernetesClusterInfo},
7374
};
7475
use strum::{EnumDiscriminants, IntoStaticStr};
7576

@@ -104,6 +105,12 @@ const USER_INFO_FETCHER_KERBEROS_DIR: &str = "/stackable/kerberos";
104105

105106
const DOCKER_IMAGE_BASE_NAME: &str = "opa";
106107

108+
const CONSOLE_LOG_LEVEL_ENV: &str = "CONSOLE_LOG_LEVEL";
109+
const FILE_LOG_LEVEL_ENV: &str = "FILE_LOG_LEVEL";
110+
const FILE_LOG_DIRECTORY_ENV: &str = "FILE_LOG_DIRECTORY";
111+
const KUBERNETES_NODE_NAME_ENV: &str = "KUBERNETES_NODE_NAME";
112+
const KUBERNETES_CLUSTER_DOMAIN_ENV: &str = "KUBERNETES_CLUSTER_DOMAIN";
113+
107114
// logging defaults
108115
const DEFAULT_DECISION_LOGGING_ENABLED: bool = false;
109116
const DEFAULT_FILE_LOG_LEVEL: LogLevel = LogLevel::INFO;
@@ -144,6 +151,7 @@ pub struct Ctx {
144151
pub product_config: ProductConfigManager,
145152
pub opa_bundle_builder_image: String,
146153
pub user_info_fetcher_image: String,
154+
pub cluster_info: KubernetesClusterInfo,
147155
}
148156

149157
#[derive(Snafu, Debug, EnumDiscriminants)]
@@ -492,6 +500,7 @@ pub async fn reconcile_opa(
492500
&ctx.opa_bundle_builder_image,
493501
&ctx.user_info_fetcher_image,
494502
&rbac_sa,
503+
&ctx.cluster_info,
495504
)?;
496505

497506
cluster_resources
@@ -714,6 +723,44 @@ fn build_server_rolegroup_config_map(
714723
})
715724
}
716725

726+
/// Env variables that are need to run stackable Rust binaries, such as
727+
/// * opa-bundle-builder
728+
/// * user-info-fetcher
729+
fn add_stackable_rust_cli_env_vars(
730+
container_builder: &mut ContainerBuilder,
731+
cluster_info: &KubernetesClusterInfo,
732+
log_level: impl Into<String>,
733+
container: &v1alpha1::Container,
734+
) {
735+
let log_level = log_level.into();
736+
container_builder
737+
.add_env_var(CONSOLE_LOG_LEVEL_ENV, log_level.clone())
738+
.add_env_var(FILE_LOG_LEVEL_ENV, log_level)
739+
.add_env_var(
740+
FILE_LOG_DIRECTORY_ENV,
741+
format!("{STACKABLE_LOG_DIR}/{container}",),
742+
)
743+
.add_env_var_from_source(
744+
KUBERNETES_NODE_NAME_ENV,
745+
EnvVarSource {
746+
field_ref: Some(ObjectFieldSelector {
747+
field_path: "spec.nodeName".to_owned(),
748+
..Default::default()
749+
}),
750+
..Default::default()
751+
},
752+
)
753+
// We set the cluster domain always explicitly, because the product Pods does not have the
754+
// RBAC permission to get the `nodes/proxy` resource at cluster scope. This is likely
755+
// because it only has a RoleBinding and no ClusterRoleBinding.
756+
// By setting the cluster domain explicitly we avoid that the sidecars try to look it up
757+
// based on some information coming from the node.
758+
.add_env_var(
759+
KUBERNETES_CLUSTER_DOMAIN_ENV,
760+
cluster_info.cluster_domain.to_string(),
761+
);
762+
}
763+
717764
/// The rolegroup [`DaemonSet`] runs the rolegroup, as configured by the administrator.
718765
///
719766
/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the
@@ -732,6 +779,7 @@ fn build_server_rolegroup_daemonset(
732779
opa_bundle_builder_image: &str,
733780
user_info_fetcher_image: &str,
734781
service_account: &ServiceAccount,
782+
cluster_info: &KubernetesClusterInfo,
735783
) -> Result<DaemonSet> {
736784
let opa_name = opa.metadata.name.as_deref().context(NoNameSnafu)?;
737785
let role = opa.role(opa_role);
@@ -782,8 +830,6 @@ fn build_server_rolegroup_daemonset(
782830
.context(AddVolumeMountSnafu)?
783831
.resources(merged_config.resources.to_owned().into());
784832

785-
let console_and_file_log_level = bundle_builder_log_level(merged_config);
786-
787833
cb_bundle_builder
788834
.image_from_product_image(resolved_product_image) // inherit the pull policy and pull secrets, and then...
789835
.image(opa_bundle_builder_image) // ...override the image
@@ -799,12 +845,6 @@ fn build_server_rolegroup_daemonset(
799845
&bundle_builder_container_name,
800846
)])
801847
.add_env_var_from_field_path("WATCH_NAMESPACE", FieldPathEnvVar::Namespace)
802-
.add_env_var("CONSOLE_LOG_LEVEL", console_and_file_log_level.to_string())
803-
.add_env_var("FILE_LOG_LEVEL", console_and_file_log_level.to_string())
804-
.add_env_var(
805-
"FILE_LOG_DIRECTORY",
806-
format!("{STACKABLE_LOG_DIR}/{bundle_builder_container_name}"),
807-
)
808848
.add_volume_mount(BUNDLES_VOLUME_NAME, BUNDLES_DIR)
809849
.context(AddVolumeMountSnafu)?
810850
.add_volume_mount(LOG_VOLUME_NAME, STACKABLE_LOG_DIR)
@@ -838,6 +878,12 @@ fn build_server_rolegroup_daemonset(
838878
}),
839879
..Probe::default()
840880
});
881+
add_stackable_rust_cli_env_vars(
882+
&mut cb_bundle_builder,
883+
cluster_info,
884+
sidecar_container_log_level(merged_config, &v1alpha1::Container::BundleBuilder).to_string(),
885+
&v1alpha1::Container::BundleBuilder,
886+
);
841887

842888
cb_opa
843889
.image_from_product_image(resolved_product_image)
@@ -949,6 +995,13 @@ fn build_server_rolegroup_daemonset(
949995
.with_memory_limit("128Mi")
950996
.build(),
951997
);
998+
add_stackable_rust_cli_env_vars(
999+
&mut cb_user_info_fetcher,
1000+
cluster_info,
1001+
sidecar_container_log_level(merged_config, &v1alpha1::Container::UserInfoFetcher)
1002+
.to_string(),
1003+
&v1alpha1::Container::UserInfoFetcher,
1004+
);
9521005

9531006
match &user_info.backend {
9541007
user_info_fetcher::v1alpha1::Backend::None {} => {}
@@ -1257,13 +1310,42 @@ fn build_bundle_builder_start_command(
12571310
}
12581311
}
12591312

1260-
fn bundle_builder_log_level(merged_config: &v1alpha1::OpaConfig) -> BundleBuilderLogLevel {
1313+
/// TODO: *Technically* this function would need to be way more complex.
1314+
/// For now it's a good-enough approximation, this is fine :D
1315+
///
1316+
/// The following config
1317+
///
1318+
/// ```
1319+
/// containers:
1320+
/// opa-bundle-builder:
1321+
/// console:
1322+
/// level: DEBUG
1323+
/// file:
1324+
/// level: INFO
1325+
/// loggers:
1326+
/// ROOT:
1327+
/// level: INFO
1328+
/// my.module:
1329+
/// level: DEBUG
1330+
/// some.chatty.module:
1331+
/// level: NONE
1332+
/// ```
1333+
///
1334+
/// should result in
1335+
/// `CONSOLE_LOG_LEVEL=info,my.module=debug,some.chatty.module=none`
1336+
/// and
1337+
/// `FILE_LOG_LEVEL=info,my.module=info,some.chatty.module=none`.
1338+
/// Note that `my.module` is `info` instead of `debug`, because it's clamped by the global file log
1339+
/// level.
1340+
///
1341+
/// Context: https://docs.stackable.tech/home/stable/concepts/logging/
1342+
fn sidecar_container_log_level(
1343+
merged_config: &v1alpha1::OpaConfig,
1344+
sidecar_container: &v1alpha1::Container,
1345+
) -> BundleBuilderLogLevel {
12611346
if let Some(ContainerLogConfig {
12621347
choice: Some(ContainerLogConfigChoice::Automatic(log_config)),
1263-
}) = merged_config
1264-
.logging
1265-
.containers
1266-
.get(&v1alpha1::Container::BundleBuilder)
1348+
}) = merged_config.logging.containers.get(sidecar_container)
12671349
{
12681350
if let Some(logger) = log_config
12691351
.loggers

rust/operator-binary/src/crd/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ pub mod versioned {
168168
Vector,
169169
BundleBuilder,
170170
Opa,
171+
UserInfoFetcher,
171172
}
172173

173174
#[derive(Clone, Debug, Default, Fragment, JsonSchema, PartialEq)]

rust/operator-binary/src/main.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use stackable_operator::{
2828
namespace::WatchNamespace,
2929
shared::yaml::SerializeOptions,
3030
telemetry::Tracing,
31+
utils::cluster_info::KubernetesClusterInfo,
3132
};
3233

3334
use crate::controller::OPA_FULL_CONTROLLER_NAME;
@@ -100,12 +101,14 @@ async fn main() -> anyhow::Result<()> {
100101
let client =
101102
client::initialize_operator(Some(OPERATOR_NAME.to_string()), &cluster_info_opts)
102103
.await?;
104+
let kubernetes_cluster_info = client.kubernetes_cluster_info.clone();
103105
create_controller(
104106
client,
105107
product_config,
106108
watch_namespace,
107109
operator_image.clone(),
108110
operator_image,
111+
kubernetes_cluster_info,
109112
)
110113
.await;
111114
}
@@ -123,6 +126,7 @@ async fn create_controller(
123126
watch_namespace: WatchNamespace,
124127
opa_bundle_builder_image: String,
125128
user_info_fetcher_image: String,
129+
cluster_info: KubernetesClusterInfo,
126130
) {
127131
let opa_api: Api<DeserializeGuard<v1alpha1::OpaCluster>> = watch_namespace.get_api(&client);
128132
let daemonsets_api: Api<DeserializeGuard<DaemonSet>> = watch_namespace.get_api(&client);
@@ -150,6 +154,7 @@ async fn create_controller(
150154
product_config,
151155
opa_bundle_builder_image,
152156
user_info_fetcher_image,
157+
cluster_info,
153158
}),
154159
)
155160
// We can let the reporting happen in the background

0 commit comments

Comments
 (0)