diff --git a/CLAUDE.md b/CLAUDE.md index 7b266de..6ca24dc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -57,6 +57,14 @@ just vault::setup-oidc-auth # Configure Vault OIDC just k8s::setup-oidc-auth # Enable k8s OIDC auth ``` +### Observability Stack Installation (Optional) + +```bash +just prometheus::install # Install kube-prometheus-stack (Prometheus + Grafana + Alertmanager) +just prometheus::setup-oidc # Configure Grafana OIDC with Keycloak +# Future: Jaeger and OpenTelemetry Collector +``` + ### Common Operations ```bash @@ -72,6 +80,10 @@ just vault::get # Retrieve secret just postgres::create-db # Create database just postgres::psql # PostgreSQL shell +# Observability +just prometheus::grafana-password # Get Grafana admin password +just keycloak::add-user-to-group grafana-admins # Grant Grafana admin access + # Testing/validation kubectl --context -oidc get nodes # Test OIDC auth ``` @@ -85,6 +97,39 @@ kubectl --context -oidc get nodes # Test OIDC auth - **Templates**: `*.gomplate.yaml` files use environment variables from `.env.local` - **Custom Extensions**: `custom.just` can be created for additional workflows +### Gomplate Template Pattern + +**Environment Variable Management:** +- Justfile manages environment variables and their default values +- Gomplate templates access variables using `{{ .Env.VAR }}` + +**Example justfile pattern:** +```just +# At the top of justfile - define variables with defaults +export PROMETHEUS_NAMESPACE := env("PROMETHEUS_NAMESPACE", "monitoring") +export GRAFANA_HOST := env("GRAFANA_HOST", "") + +# In recipes - export variables for gomplate +install: + #!/bin/bash + set -euo pipefail + export GRAFANA_OIDC_ENABLED="${GRAFANA_OIDC_ENABLED:-false}" + gomplate -f values.gomplate.yaml -o values.yaml +``` + +**Example gomplate template:** +```yaml +# values.gomplate.yaml +namespace: {{ .Env.PROMETHEUS_NAMESPACE }} +ingress: + hosts: + - {{ .Env.GRAFANA_HOST }} +{{- if eq .Env.GRAFANA_OIDC_ENABLED "true" }} + oidc: + enabled: true +{{- end }} +``` + ### Authentication Flow 1. Keycloak provides OIDC identity for all services diff --git a/justfile b/justfile index ab7926b..b3dd54c 100644 --- a/justfile +++ b/justfile @@ -22,6 +22,7 @@ mod metabase mod minio mod oauth2-proxy mod postgres +mod prometheus mod qdrant mod querybook mod superset diff --git a/prometheus/.gitignore b/prometheus/.gitignore new file mode 100644 index 0000000..7f47975 --- /dev/null +++ b/prometheus/.gitignore @@ -0,0 +1 @@ +values.yaml diff --git a/prometheus/grafana-admin-external-secret.gomplate.yaml b/prometheus/grafana-admin-external-secret.gomplate.yaml new file mode 100644 index 0000000..2ddef30 --- /dev/null +++ b/prometheus/grafana-admin-external-secret.gomplate.yaml @@ -0,0 +1,22 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: grafana-admin-credentials + namespace: {{ .Env.PROMETHEUS_NAMESPACE }} +spec: + refreshInterval: 1h + secretStoreRef: + name: vault-secret-store + kind: ClusterSecretStore + target: + name: grafana-admin-credentials + creationPolicy: Owner + template: + data: + admin-user: "admin" + admin-password: "{{ `{{ .password }}` }}" + data: + - secretKey: password + remoteRef: + key: grafana/admin + property: password diff --git a/prometheus/justfile b/prometheus/justfile new file mode 100644 index 0000000..07870c9 --- /dev/null +++ b/prometheus/justfile @@ -0,0 +1,241 @@ +set fallback := true + +export PROMETHEUS_NAMESPACE := env("PROMETHEUS_NAMESPACE", "monitoring") +export PROMETHEUS_CHART_VERSION := env("PROMETHEUS_CHART_VERSION", "79.4.0") +export GRAFANA_HOST := env("GRAFANA_HOST", "") +export PROMETHEUS_HOST := env("PROMETHEUS_HOST", "") +export ALERTMANAGER_HOST := env("ALERTMANAGER_HOST", "") +export GRAFANA_ADMIN_PASSWORD := env("GRAFANA_ADMIN_PASSWORD", "") +export GRAFANA_OIDC_ENABLED := env("GRAFANA_OIDC_ENABLED", "false") +export GRAFANA_OIDC_CLIENT_SECRET := env("GRAFANA_OIDC_CLIENT_SECRET", "") +export KEYCLOAK_NAMESPACE := env("KEYCLOAK_NAMESPACE", "keycloak") +export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "") +export KEYCLOAK_HOST := env("KEYCLOAK_HOST", "") +export K8S_VAULT_NAMESPACE := env("K8S_VAULT_NAMESPACE", "vault") +export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets") + +[private] +default: + @just --list --unsorted --list-submodules + +# Add Helm repository +add-helm-repo: + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + helm repo update + +# Remove Helm repository +remove-helm-repo: + helm repo remove prometheus-community + +# Create namespace +create-namespace: + @kubectl get namespace ${PROMETHEUS_NAMESPACE} &>/dev/null || \ + kubectl create namespace ${PROMETHEUS_NAMESPACE} + +# Delete namespace +delete-namespace: + @kubectl delete namespace ${PROMETHEUS_NAMESPACE} --ignore-not-found + +# Create Grafana admin credentials +create-credentials: + #!/bin/bash + set -euo pipefail + password=$( + gum input --prompt="Grafana admin password: " --password --width=100 \ + --placeholder="Empty to generate a random password" + ) + if [ -z "${password}" ]; then + password=$(just utils::random-password) + echo "Generated password: ${password}" + fi + just create-namespace + + if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then + echo "External Secrets Operator detected. Creating ExternalSecret..." + just put-admin-credentials-to-vault "${password}" + + kubectl delete secret grafana-admin-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + kubectl delete externalsecret grafana-admin-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + + gomplate -f grafana-admin-external-secret.gomplate.yaml | kubectl apply -f - + + echo "Waiting for ExternalSecret to sync..." + kubectl wait --for=condition=Ready externalsecret/grafana-admin-credentials \ + -n ${PROMETHEUS_NAMESPACE} --timeout=60s + else + echo "External Secrets Operator not found. Creating secret directly..." + kubectl delete secret grafana-admin-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + + kubectl create secret generic grafana-admin-credentials -n ${PROMETHEUS_NAMESPACE} \ + --from-literal=admin-user="admin" \ + --from-literal=admin-password="${password}" + + if helm status vault -n ${K8S_VAULT_NAMESPACE} &>/dev/null; then + just put-admin-credentials-to-vault "${password}" + fi + fi + +# Delete Grafana admin credentials +delete-credentials: + @kubectl delete secret grafana-admin-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + @kubectl delete externalsecret grafana-admin-credentials -n ${PROMETHEUS_NAMESPACE} --ignore-not-found + +# Put admin credentials to Vault +put-admin-credentials-to-vault password: + @just vault::put grafana/admin password={{ password }} + @echo "Admin credentials stored in Vault under 'grafana/admin'." + +# Delete admin credentials from Vault +delete-admin-credentials-from-vault: + @just vault::delete grafana/admin + @echo "Admin credentials deleted from Vault." + +# Get Grafana admin password from secret +admin-password: + @kubectl get secret -n ${PROMETHEUS_NAMESPACE} grafana-admin-credentials \ + -o jsonpath="{.data.admin-password}" | base64 --decode + @echo + +# Install kube-prometheus-stack +install: check-env + #!/bin/bash + set -euo pipefail + just create-namespace + just add-helm-repo + + # Create credentials if not exists + if ! kubectl get secret grafana-admin-credentials -n ${PROMETHEUS_NAMESPACE} &>/dev/null; then + just create-credentials + fi + + export GRAFANA_OIDC_ENABLED="${GRAFANA_OIDC_ENABLED:-false}" + export GRAFANA_OIDC_CLIENT_SECRET="${GRAFANA_OIDC_CLIENT_SECRET:-}" + + gomplate -f values.gomplate.yaml -o values.yaml + helm upgrade --cleanup-on-fail --install kube-prometheus-stack \ + prometheus-community/kube-prometheus-stack \ + --version ${PROMETHEUS_CHART_VERSION} \ + -n ${PROMETHEUS_NAMESPACE} \ + --wait \ + -f values.yaml + + admin_password=$(just admin-password) + + echo "" + echo "=== kube-prometheus-stack installed ===" + echo "Components installed:" + echo " - Prometheus Operator" + echo " - Prometheus" + echo " - Grafana" + echo " - Alertmanager" + echo " - Node Exporter" + echo " - Kube State Metrics" + echo "" + echo "Grafana URL: https://${GRAFANA_HOST}" + echo "Grafana admin user: admin" + echo "Grafana admin password: ${admin_password}" + echo "" + echo "To setup Keycloak OIDC authentication for Grafana:" + echo " just prometheus::setup-oidc" + +# Uninstall kube-prometheus-stack +uninstall: + #!/bin/bash + set -euo pipefail + helm uninstall kube-prometheus-stack -n ${PROMETHEUS_NAMESPACE} --ignore-not-found --wait + # Clean up CRDs + kubectl delete crd \ + alertmanagerconfigs.monitoring.coreos.com \ + alertmanagers.monitoring.coreos.com \ + podmonitors.monitoring.coreos.com \ + probes.monitoring.coreos.com \ + prometheusagents.monitoring.coreos.com \ + prometheuses.monitoring.coreos.com \ + prometheusrules.monitoring.coreos.com \ + scrapeconfigs.monitoring.coreos.com \ + servicemonitors.monitoring.coreos.com \ + thanosrulers.monitoring.coreos.com \ + --ignore-not-found + just delete-namespace + +# Setup Keycloak OIDC authentication for Grafana +setup-oidc: + #!/bin/bash + set -euo pipefail + echo "Setting up Keycloak OIDC authentication for Grafana..." + + # Create Keycloak client for Grafana + just keycloak::delete-client "${KEYCLOAK_REALM}" "grafana" || true + oidc_client_secret=$(just utils::random-password) + redirect_urls="https://${GRAFANA_HOST}/login/generic_oauth" + just keycloak::create-client \ + realm="${KEYCLOAK_REALM}" \ + client_id="grafana" \ + redirect_url="${redirect_urls}" \ + client_secret="${oidc_client_secret}" + just keycloak::add-groups-mapper "grafana" + echo "✓ Keycloak client 'grafana' created" + + # Create admin group if it doesn't exist + just keycloak::create-group "grafana-admins" "" "Grafana administrators group" || true + + # Update Helm values with OIDC configuration + export GRAFANA_OIDC_CLIENT_SECRET="${oidc_client_secret}" + export GRAFANA_OIDC_ENABLED="true" + gomplate -f values.gomplate.yaml -o values.yaml + + # Upgrade Helm release with new values + helm upgrade kube-prometheus-stack \ + prometheus-community/kube-prometheus-stack \ + --version ${PROMETHEUS_CHART_VERSION} \ + -n ${PROMETHEUS_NAMESPACE} \ + --wait \ + -f values.yaml + + echo "" + echo "=== OIDC Setup Complete ===" + echo "Grafana is now configured to use Keycloak for authentication" + echo "" + echo "To grant admin access to a user:" + echo " just keycloak::add-user-to-group grafana-admins" + echo "" + echo "Grafana URL: https://${GRAFANA_HOST}" + +# Disable OIDC authentication +disable-oidc: + #!/bin/bash + set -euo pipefail + echo "Disabling Keycloak OIDC authentication for Grafana..." + + # Update Helm values to disable OIDC + export GRAFANA_OIDC_ENABLED="false" + export GRAFANA_OIDC_CLIENT_SECRET="" + gomplate -f values.gomplate.yaml -o values.yaml + + # Upgrade Helm release with new values + helm upgrade kube-prometheus-stack \ + prometheus-community/kube-prometheus-stack \ + --version ${PROMETHEUS_CHART_VERSION} \ + -n ${PROMETHEUS_NAMESPACE} \ + --wait \ + -f values.yaml + + echo "✓ OIDC authentication disabled" + +# Get Grafana admin password +grafana-password: + @just admin-password + +# Check the environment +[private] +check-env: + #!/bin/bash + set -euo pipefail + if [ -z "${GRAFANA_HOST}" ]; then + while [ -z "${GRAFANA_HOST}" ]; do + GRAFANA_HOST=$( + gum input --prompt="Grafana host: " --width=100 --placeholder="grafana.example.com" + ) + done + just env::set GRAFANA_HOST="${GRAFANA_HOST}" + fi diff --git a/prometheus/values.gomplate.yaml b/prometheus/values.gomplate.yaml new file mode 100644 index 0000000..53a1e3e --- /dev/null +++ b/prometheus/values.gomplate.yaml @@ -0,0 +1,127 @@ +--- +# Grafana Configuration +grafana: + enabled: true + + admin: + existingSecret: grafana-admin-credentials + userKey: admin-user + passwordKey: admin-password + + ingress: + enabled: true + ingressClassName: traefik + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: websecure + hosts: + - {{ .Env.GRAFANA_HOST }} + tls: + - hosts: + - {{ .Env.GRAFANA_HOST }} + + # Grafana configuration + grafana.ini: + server: + root_url: https://{{ .Env.GRAFANA_HOST }} +{{- if eq (.Env.GRAFANA_OIDC_ENABLED | default "false") "true" }} + auth.generic_oauth: + enabled: true + name: Keycloak + allow_sign_up: true + client_id: grafana + client_secret: {{ .Env.GRAFANA_OIDC_CLIENT_SECRET }} + scopes: openid profile email groups + auth_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/auth + token_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token + api_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/userinfo + use_refresh_token: true + role_attribute_path: "contains(groups[*], 'grafana-admins') && 'Admin' || contains(groups[*], 'grafana-editors') && 'Editor' || 'Viewer'" + signout_redirect_url: https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/logout?post_logout_redirect_uri=https://{{ .Env.GRAFANA_HOST }}/login +{{- end }} + + # Persist Grafana data + persistence: + enabled: true + size: 10Gi + +# Prometheus Configuration +prometheus: + prometheusSpec: + # Retention settings + retention: 30d + retentionSize: "50GB" + + # Storage + storageSpec: + volumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 50Gi + +{{- if .Env.PROMETHEUS_HOST }} + ingress: + enabled: true + ingressClassName: traefik + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: websecure + hosts: + - {{ .Env.PROMETHEUS_HOST }} + tls: + - hosts: + - {{ .Env.PROMETHEUS_HOST }} +{{- end }} + +# Alertmanager Configuration +alertmanager: + alertmanagerSpec: + # Storage + storage: + volumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi + +{{- if .Env.ALERTMANAGER_HOST }} + ingress: + enabled: true + ingressClassName: traefik + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: websecure + hosts: + - {{ .Env.ALERTMANAGER_HOST }} + tls: + - hosts: + - {{ .Env.ALERTMANAGER_HOST }} +{{- end }} + +# Enable default monitoring targets +kubeApiServer: + enabled: true + +kubelet: + enabled: true + +kubeControllerManager: + enabled: true + +coreDns: + enabled: true + +kubeEtcd: + enabled: true + +kubeScheduler: + enabled: true + +kubeProxy: + enabled: true + +kubeStateMetrics: + enabled: true + +nodeExporter: + enabled: true