fix(prometheus): fix Grafana auth and set pod security standards
This commit is contained in:
@@ -72,6 +72,57 @@ kubectl port-forward -n monitoring svc/kube-prometheus-stack-alertmanager 9093:9
|
||||
|
||||
Then access at `http://localhost:9093`
|
||||
|
||||
## Pod Security Standards
|
||||
|
||||
The monitoring namespace uses **privileged** Pod Security Standard enforcement.
|
||||
|
||||
```bash
|
||||
pod-security.kubernetes.io/enforce=privileged
|
||||
```
|
||||
|
||||
#### Why Privileged Instead of Baseline or Restricted?
|
||||
|
||||
The `prometheus-node-exporter` component requires the following privileged access to collect hardware and OS-level metrics:
|
||||
|
||||
- `hostNetwork: true` - Access to host network namespace
|
||||
- `hostPID: true` - Access to host process IDs
|
||||
- `hostPath` volumes - Access to host filesystem paths (`/`, `/sys`, `/proc`)
|
||||
- `hostPort: 9100` - Expose metrics on host port
|
||||
|
||||
These requirements are incompatible with both `baseline` and `restricted` Pod Security Standards:
|
||||
- **baseline** prohibits: `hostNetwork`, `hostPID`, `hostPath`, `hostPort`
|
||||
- **restricted** has even stricter requirements
|
||||
|
||||
While these settings may seem permissive, they are necessary for node-exporter to collect system-level metrics from the host.
|
||||
|
||||
#### Security Measures
|
||||
|
||||
While using privileged enforcement at the namespace level, all other components (except node-exporter) apply restricted-level security contexts:
|
||||
|
||||
- **Grafana**: Non-root user (472), dropped capabilities, seccomp profile
|
||||
- **Prometheus**: Non-root user (1000), read-only root filesystem, dropped capabilities
|
||||
- **Alertmanager**: Non-root user (1000), read-only root filesystem, dropped capabilities
|
||||
- **Prometheus Operator**: Non-root user (65534), read-only root filesystem, dropped capabilities
|
||||
- **kube-state-metrics**: Non-root user (65534), read-only root filesystem, dropped capabilities
|
||||
|
||||
#### Alternative: Restricted Mode Without Node Metrics
|
||||
|
||||
To use `restricted` Pod Security Standard, disable node-exporter:
|
||||
|
||||
1. Add to `values.gomplate.yaml`:
|
||||
```yaml
|
||||
nodeExporter:
|
||||
enabled: false
|
||||
```
|
||||
|
||||
2. Update justfile to use `restricted`:
|
||||
```bash
|
||||
kubectl label namespace ${PROMETHEUS_NAMESPACE} \
|
||||
pod-security.kubernetes.io/enforce=restricted --overwrite
|
||||
```
|
||||
|
||||
**Trade-off**: You will lose node-level metrics (CPU, memory, disk, network at the host level), though pod-level metrics remain available.
|
||||
|
||||
## Configuration
|
||||
|
||||
Environment variables (set in `.env.local` or override):
|
||||
|
||||
21
prometheus/grafana-oidc-external-secret.gomplate.yaml
Normal file
21
prometheus/grafana-oidc-external-secret.gomplate.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: grafana-oidc-credentials
|
||||
namespace: {{ .Env.PROMETHEUS_NAMESPACE }}
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
name: vault-secret-store
|
||||
kind: ClusterSecretStore
|
||||
target:
|
||||
name: grafana-oidc-credentials
|
||||
creationPolicy: Owner
|
||||
template:
|
||||
data:
|
||||
client-secret: "{{ `{{ .client_secret }}` }}"
|
||||
data:
|
||||
- secretKey: client_secret
|
||||
remoteRef:
|
||||
key: grafana/oidc
|
||||
property: client_secret
|
||||
@@ -101,6 +101,14 @@ install: check-env
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
just create-namespace
|
||||
|
||||
# Using 'privileged' because prometheus-node-exporter requires:
|
||||
# - hostNetwork, hostPID (not allowed in baseline/restricted)
|
||||
# - hostPath volumes (not allowed in baseline/restricted)
|
||||
# - hostPort (not allowed in baseline/restricted)
|
||||
kubectl label namespace ${PROMETHEUS_NAMESPACE} \
|
||||
pod-security.kubernetes.io/enforce=privileged --overwrite
|
||||
|
||||
just add-helm-repo
|
||||
|
||||
# Create credentials if not exists
|
||||
@@ -182,6 +190,31 @@ setup-oidc:
|
||||
# Create admin group if it doesn't exist
|
||||
just keycloak::create-group "grafana-admins" "" "Grafana administrators group" || true
|
||||
|
||||
# Store OIDC client secret in Vault and create ExternalSecret
|
||||
if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then
|
||||
echo "External Secrets Operator detected. Creating ExternalSecret..."
|
||||
just vault::put grafana/oidc client_secret="${oidc_client_secret}"
|
||||
|
||||
kubectl delete secret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found
|
||||
kubectl delete externalsecret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found
|
||||
|
||||
gomplate -f grafana-oidc-external-secret.gomplate.yaml | kubectl apply -f -
|
||||
|
||||
echo "Waiting for ExternalSecret to sync..."
|
||||
kubectl wait --for=condition=Ready externalsecret/grafana-oidc-credentials \
|
||||
-n ${PROMETHEUS_NAMESPACE} --timeout=60s
|
||||
else
|
||||
echo "External Secrets Operator not found. Creating secret directly..."
|
||||
kubectl delete secret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found
|
||||
|
||||
kubectl create secret generic grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} \
|
||||
--from-literal=client-secret="${oidc_client_secret}"
|
||||
|
||||
if helm status vault -n ${K8S_VAULT_NAMESPACE} &>/dev/null; then
|
||||
just vault::put grafana/oidc client_secret="${oidc_client_secret}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Update Helm values with OIDC configuration
|
||||
export GRAFANA_OIDC_CLIENT_SECRET="${oidc_client_secret}"
|
||||
export GRAFANA_OIDC_ENABLED="true"
|
||||
@@ -210,8 +243,12 @@ disable-oidc:
|
||||
set -euo pipefail
|
||||
echo "Disabling Keycloak OIDC authentication for Grafana..."
|
||||
|
||||
# Clean up OIDC secrets
|
||||
kubectl delete secret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found
|
||||
kubectl delete externalsecret grafana-oidc-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found
|
||||
|
||||
# Update Helm values to disable OIDC
|
||||
export GRAFANA_OIDC_ENABLED="false"
|
||||
export GRAFANA_OIDC_ENABLED=""
|
||||
export GRAFANA_OIDC_CLIENT_SECRET=""
|
||||
gomplate -f values.gomplate.yaml -o values.yaml
|
||||
|
||||
|
||||
@@ -5,11 +5,40 @@
|
||||
grafana:
|
||||
enabled: true
|
||||
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 472
|
||||
runAsGroup: 472
|
||||
fsGroup: 472
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
containerSecurityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 472
|
||||
runAsGroup: 472
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
admin:
|
||||
existingSecret: grafana-admin-credentials
|
||||
userKey: admin-user
|
||||
passwordKey: admin-password
|
||||
|
||||
{{- if .Env.GRAFANA_OIDC_ENABLED }}
|
||||
# Reference OIDC client secret from Kubernetes Secret
|
||||
envValueFrom:
|
||||
GRAFANA_OIDC_CLIENT_SECRET:
|
||||
secretKeyRef:
|
||||
name: grafana-oidc-credentials
|
||||
key: client-secret
|
||||
{{- end }}
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
@@ -25,14 +54,14 @@ grafana:
|
||||
grafana.ini:
|
||||
server:
|
||||
root_url: https://{{ .Env.GRAFANA_HOST }}
|
||||
{{- if eq (.Env.GRAFANA_OIDC_ENABLED | default "false") "true" }}
|
||||
{{- if .Env.GRAFANA_OIDC_ENABLED }}
|
||||
auth.generic_oauth:
|
||||
enabled: true
|
||||
name: Keycloak
|
||||
allow_sign_up: true
|
||||
client_id: grafana
|
||||
client_secret: {{ .Env.GRAFANA_OIDC_CLIENT_SECRET }}
|
||||
scopes: openid profile email groups
|
||||
client_secret: $__env{GRAFANA_OIDC_CLIENT_SECRET}
|
||||
scopes: openid profile email
|
||||
auth_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/auth
|
||||
token_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token
|
||||
api_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/userinfo
|
||||
@@ -67,6 +96,22 @@ grafana:
|
||||
# Prometheus Configuration
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
runAsGroup: 2000
|
||||
fsGroup: 2000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
containers:
|
||||
- name: prometheus
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
# Retention settings
|
||||
retention: 30d
|
||||
retentionSize: "50GB"
|
||||
@@ -112,6 +157,22 @@ prometheus:
|
||||
# Alertmanager Configuration
|
||||
alertmanager:
|
||||
alertmanagerSpec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
runAsGroup: 2000
|
||||
fsGroup: 2000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
containers:
|
||||
- name: alertmanager
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
# Storage
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
@@ -170,6 +231,21 @@ kubeStateMetrics:
|
||||
|
||||
# kube-state-metrics subchart configuration
|
||||
kube-state-metrics:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
runAsGroup: 65534
|
||||
fsGroup: 65534
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
containerSecurityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
|
||||
# Resource configuration based on Goldilocks/VPA recommendations
|
||||
resources:
|
||||
requests:
|
||||
@@ -196,6 +272,21 @@ prometheus-node-exporter:
|
||||
# Prometheus Operator Configuration
|
||||
# Resource configuration based on Goldilocks/VPA recommendations
|
||||
prometheusOperator:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
runAsGroup: 65534
|
||||
fsGroup: 65534
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
containerSecurityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 15m
|
||||
|
||||
Reference in New Issue
Block a user