From 44ba48ee2f4298915af4952780583ce3ae6b28b3 Mon Sep 17 00:00:00 2001 From: Masaki Yatsu Date: Sun, 23 Nov 2025 15:02:04 +0900 Subject: [PATCH] fix(prometheus): fix Grafana auth and set pod security standards --- prometheus/README.md | 51 ++++++++++ ...grafana-oidc-external-secret.gomplate.yaml | 21 ++++ prometheus/justfile | 39 +++++++- prometheus/values.gomplate.yaml | 97 ++++++++++++++++++- 4 files changed, 204 insertions(+), 4 deletions(-) create mode 100644 prometheus/grafana-oidc-external-secret.gomplate.yaml diff --git a/prometheus/README.md b/prometheus/README.md index d3b8e1f..a8bab5b 100644 --- a/prometheus/README.md +++ b/prometheus/README.md @@ -72,6 +72,57 @@ kubectl port-forward -n monitoring svc/kube-prometheus-stack-alertmanager 9093:9 Then access at `http://localhost:9093` +## Pod Security Standards + +The monitoring namespace uses **privileged** Pod Security Standard enforcement. + +```bash +pod-security.kubernetes.io/enforce=privileged +``` + +#### Why Privileged Instead of Baseline or Restricted? + +The `prometheus-node-exporter` component requires the following privileged access to collect hardware and OS-level metrics: + +- `hostNetwork: true` - Access to host network namespace +- `hostPID: true` - Access to host process IDs +- `hostPath` volumes - Access to host filesystem paths (`/`, `/sys`, `/proc`) +- `hostPort: 9100` - Expose metrics on host port + +These requirements are incompatible with both `baseline` and `restricted` Pod Security Standards: +- **baseline** prohibits: `hostNetwork`, `hostPID`, `hostPath`, `hostPort` +- **restricted** has even stricter requirements + +While these settings may seem permissive, they are necessary for node-exporter to collect system-level metrics from the host. + +#### Security Measures + +While using privileged enforcement at the namespace level, all other components (except node-exporter) apply restricted-level security contexts: + +- **Grafana**: Non-root user (472), dropped capabilities, seccomp profile +- **Prometheus**: Non-root user (1000), read-only root filesystem, dropped capabilities +- **Alertmanager**: Non-root user (1000), read-only root filesystem, dropped capabilities +- **Prometheus Operator**: Non-root user (65534), read-only root filesystem, dropped capabilities +- **kube-state-metrics**: Non-root user (65534), read-only root filesystem, dropped capabilities + +#### Alternative: Restricted Mode Without Node Metrics + +To use `restricted` Pod Security Standard, disable node-exporter: + +1. Add to `values.gomplate.yaml`: + ```yaml + nodeExporter: + enabled: false + ``` + +2. Update justfile to use `restricted`: + ```bash + kubectl label namespace ${PROMETHEUS_NAMESPACE} \ + pod-security.kubernetes.io/enforce=restricted --overwrite + ``` + +**Trade-off**: You will lose node-level metrics (CPU, memory, disk, network at the host level), though pod-level metrics remain available. + ## Configuration Environment variables (set in `.env.local` or override): diff --git a/prometheus/grafana-oidc-external-secret.gomplate.yaml b/prometheus/grafana-oidc-external-secret.gomplate.yaml new file mode 100644 index 0000000..79d3d6e --- /dev/null +++ b/prometheus/grafana-oidc-external-secret.gomplate.yaml @@ -0,0 +1,21 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: grafana-oidc-credentials + namespace: {{ .Env.PROMETHEUS_NAMESPACE }} +spec: + refreshInterval: 1h + secretStoreRef: + name: vault-secret-store + kind: ClusterSecretStore + target: + name: grafana-oidc-credentials + creationPolicy: Owner + template: + data: + client-secret: "{{ `{{ .client_secret }}` }}" + data: + - secretKey: client_secret + remoteRef: + key: grafana/oidc + property: client_secret diff --git a/prometheus/justfile b/prometheus/justfile index 6293fb8..d164963 100644 --- a/prometheus/justfile +++ b/prometheus/justfile @@ -101,6 +101,14 @@ install: check-env #!/bin/bash set -euo pipefail just create-namespace + + # Using 'privileged' because prometheus-node-exporter requires: + # - hostNetwork, hostPID (not allowed in baseline/restricted) + # - hostPath volumes (not allowed in baseline/restricted) + # - hostPort (not allowed in baseline/restricted) + kubectl label namespace ${PROMETHEUS_NAMESPACE} \ + pod-security.kubernetes.io/enforce=privileged --overwrite + just add-helm-repo # Create credentials if not exists @@ -182,6 +190,31 @@ setup-oidc: # Create admin group if it doesn't exist just keycloak::create-group "grafana-admins" "" "Grafana administrators group" || true + # Store OIDC client secret in Vault and create ExternalSecret + if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then + echo "External Secrets Operator detected. Creating ExternalSecret..." + just vault::put grafana/oidc client_secret="${oidc_client_secret}" + + kubectl delete secret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + kubectl delete externalsecret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + + gomplate -f grafana-oidc-external-secret.gomplate.yaml | kubectl apply -f - + + echo "Waiting for ExternalSecret to sync..." + kubectl wait --for=condition=Ready externalsecret/grafana-oidc-credentials \ + -n ${PROMETHEUS_NAMESPACE} --timeout=60s + else + echo "External Secrets Operator not found. Creating secret directly..." + kubectl delete secret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + + kubectl create secret generic grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} \ + --from-literal=client-secret="${oidc_client_secret}" + + if helm status vault -n ${K8S_VAULT_NAMESPACE} &>/dev/null; then + just vault::put grafana/oidc client_secret="${oidc_client_secret}" + fi + fi + # Update Helm values with OIDC configuration export GRAFANA_OIDC_CLIENT_SECRET="${oidc_client_secret}" export GRAFANA_OIDC_ENABLED="true" @@ -210,8 +243,12 @@ disable-oidc: set -euo pipefail echo "Disabling Keycloak OIDC authentication for Grafana..." + # Clean up OIDC secrets + kubectl delete secret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + kubectl delete externalsecret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + # Update Helm values to disable OIDC - export GRAFANA_OIDC_ENABLED="false" + export GRAFANA_OIDC_ENABLED="" export GRAFANA_OIDC_CLIENT_SECRET="" gomplate -f values.gomplate.yaml -o values.yaml diff --git a/prometheus/values.gomplate.yaml b/prometheus/values.gomplate.yaml index e46cb22..75e9f1d 100644 --- a/prometheus/values.gomplate.yaml +++ b/prometheus/values.gomplate.yaml @@ -5,11 +5,40 @@ grafana: enabled: true + securityContext: + runAsNonRoot: true + runAsUser: 472 + runAsGroup: 472 + fsGroup: 472 + seccompProfile: + type: RuntimeDefault + + containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 472 + runAsGroup: 472 + seccompProfile: + type: RuntimeDefault + admin: existingSecret: grafana-admin-credentials userKey: admin-user passwordKey: admin-password +{{- if .Env.GRAFANA_OIDC_ENABLED }} + # Reference OIDC client secret from Kubernetes Secret + envValueFrom: + GRAFANA_OIDC_CLIENT_SECRET: + secretKeyRef: + name: grafana-oidc-credentials + key: client-secret +{{- end }} + ingress: enabled: true ingressClassName: traefik @@ -25,14 +54,14 @@ grafana: grafana.ini: server: root_url: https://{{ .Env.GRAFANA_HOST }} -{{- if eq (.Env.GRAFANA_OIDC_ENABLED | default "false") "true" }} +{{- if .Env.GRAFANA_OIDC_ENABLED }} auth.generic_oauth: enabled: true name: Keycloak allow_sign_up: true client_id: grafana - client_secret: {{ .Env.GRAFANA_OIDC_CLIENT_SECRET }} - scopes: openid profile email groups + client_secret: $__env{GRAFANA_OIDC_CLIENT_SECRET} + scopes: openid profile email auth_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/auth token_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token api_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/userinfo @@ -67,6 +96,22 @@ grafana: # Prometheus Configuration prometheus: prometheusSpec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 2000 + fsGroup: 2000 + seccompProfile: + type: RuntimeDefault + + containers: + - name: prometheus + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true # Retention settings retention: 30d retentionSize: "50GB" @@ -112,6 +157,22 @@ prometheus: # Alertmanager Configuration alertmanager: alertmanagerSpec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 2000 + fsGroup: 2000 + seccompProfile: + type: RuntimeDefault + + containers: + - name: alertmanager + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true # Storage storage: volumeClaimTemplate: @@ -170,6 +231,21 @@ kubeStateMetrics: # kube-state-metrics subchart configuration kube-state-metrics: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + runAsGroup: 65534 + fsGroup: 65534 + seccompProfile: + type: RuntimeDefault + + containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + # Resource configuration based on Goldilocks/VPA recommendations resources: requests: @@ -196,6 +272,21 @@ prometheus-node-exporter: # Prometheus Operator Configuration # Resource configuration based on Goldilocks/VPA recommendations prometheusOperator: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + runAsGroup: 65534 + fsGroup: 65534 + seccompProfile: + type: RuntimeDefault + + containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + resources: requests: cpu: 15m