diff --git a/querybook/README.md b/querybook/README.md index 52a83ef..0fc6fad 100644 --- a/querybook/README.md +++ b/querybook/README.md @@ -350,6 +350,62 @@ Trino (HTTPS via external hostname) - **Trino Connection**: Must use external HTTPS hostname (not internal service name) - **User Impersonation**: Admin credentials with X-Trino-User header for query attribution +## Pod Security Standards + +**Current Configuration**: `privileged` (enforce) / `baseline` (warn, audit) + +Querybook namespace is configured with the following Pod Security Standards: + +```yaml +pod-security.kubernetes.io/enforce: privileged +pod-security.kubernetes.io/warn: baseline +pod-security.kubernetes.io/audit: baseline +``` + +### Why Not Restricted or Baseline? + +Querybook's embedded **Elasticsearch** component requires privileged containers and special Linux capabilities that violate both `restricted` and `baseline` Pod Security Standards: + +**Elasticsearch Requirements**: + +- `privileged: true` - Container must run in privileged mode +- `capabilities.add: [IPC_LOCK, SYS_RESOURCE]` - Requires Linux capabilities for memory locking +- `sysctl -w vm.max_map_count=262144` - Init container needs privileged mode to configure kernel parameters + +These requirements are necessary for Elasticsearch to: + +1. Lock memory to prevent swapping (performance) +2. Set virtual memory map count (stability) +3. Configure ulimit for unlimited locked memory + +**Security Implications**: + +- Elasticsearch containers run with elevated privileges +- Init containers can modify kernel parameters +- Other components (web, worker, scheduler, redis) run without special privileges + +**Mitigation**: + +- `warn` and `audit` at `baseline` level to track violations +- Web init container (copy-keycloak-auth) uses `restricted`-level security context +- Future: Consider external Elasticsearch service to enable stricter Pod Security Standards + +**Component Security Status**: + +| Component | Privileges Required | Security Level | +|----------------|---------------------|----------------| +| Elasticsearch | privileged=true, IPC_LOCK, SYS_RESOURCE | Violates baseline | +| Web | None (container), runAsNonRoot (initContainer) | Baseline-ready | +| Worker | None | Baseline-ready | +| Scheduler | None | Baseline-ready | +| Redis | None | Baseline-ready | + +To check current Pod Security Standards configuration: + +```bash +kubectl get namespace querybook -o jsonpath='{.metadata.labels}' | jq +``` + ## Authentication ### User Login (OAuth2) diff --git a/querybook/justfile b/querybook/justfile index bb9483e..e1ff572 100644 --- a/querybook/justfile +++ b/querybook/justfile @@ -16,10 +16,20 @@ export KEYCLOAK_HOST := env("KEYCLOAK_HOST", "") default: @just --list --unsorted --list-submodules -# Create Querybook namespace +# Create Querybook namespace with Pod Security Standards +# Note: Elasticsearch requires privileged containers, so enforce=privileged +# but warn/audit at baseline level to encourage security improvements create-namespace: - @kubectl get namespace ${QUERYBOOK_NAMESPACE} &>/dev/null || \ + #!/bin/bash + set -euo pipefail + if ! kubectl get namespace ${QUERYBOOK_NAMESPACE} &>/dev/null; then kubectl create namespace ${QUERYBOOK_NAMESPACE} + fi + kubectl label namespace ${QUERYBOOK_NAMESPACE} \ + pod-security.kubernetes.io/enforce=privileged \ + pod-security.kubernetes.io/warn=baseline \ + pod-security.kubernetes.io/audit=baseline \ + --overwrite # Delete Querybook namespace delete-namespace: @@ -254,22 +264,6 @@ install: kubectl wait --for=condition=Available deployment/web \ -n ${QUERYBOOK_NAMESPACE} --timeout=300s - echo "Waiting for service to be accessible at https://${QUERYBOOK_HOST} ..." - for i in {1..60}; do - http_code=$(curl -k -s -o /dev/null -w "%{http_code}" https://${QUERYBOOK_HOST} 2>/dev/null || echo "000") - if echo "${http_code}" | grep -q -E "200|302|401|403"; then - echo "Service is now accessible (HTTP ${http_code})" - break - fi - if [ $i -eq 60 ]; then - echo "Warning: Service may not be fully accessible yet (last status: ${http_code})" - echo "Please wait a few more minutes and try accessing the URL" - else - echo "Waiting for service to respond... ($i/60, current status: ${http_code})" - sleep 5 - fi - done - echo "" echo "Querybook installed successfully!" echo "Access URL: https://${QUERYBOOK_HOST}" diff --git a/querybook/querybook-values.gomplate.yaml b/querybook/querybook-values.gomplate.yaml index 4e10265..a525d4f 100644 --- a/querybook/querybook-values.gomplate.yaml +++ b/querybook/querybook-values.gomplate.yaml @@ -15,13 +15,15 @@ worker: pullPolicy: IfNotPresent tag: latest {{- end }} + # Resource limits (based on Goldilocks/VPA recommendations, rounded to clean values) + # VPA recommendations: CPU 15m, Memory 2.8Gi resources: requests: - memory: 1Gi - cpu: 700m + cpu: 25m + memory: 3Gi limits: - memory: 2Gi - cpu: 1 + cpu: 500m + memory: 6Gi # Scheduler configuration scheduler: @@ -37,13 +39,15 @@ scheduler: pullPolicy: IfNotPresent tag: latest {{- end }} + # Resource limits (based on Goldilocks/VPA recommendations, rounded to clean values) + # VPA recommendations: CPU 15m, Memory 194Mi resources: requests: - memory: 200Mi - cpu: 100m + cpu: 25m + memory: 256Mi limits: - memory: 300Mi - cpu: 200m + cpu: 100m + memory: 512Mi # Web server configuration web: @@ -63,13 +67,15 @@ web: serviceType: ClusterIP servicePort: 80 containerPort: 10001 + # Resource limits (based on Goldilocks/VPA recommendations, rounded to clean values) + # VPA recommendations: CPU 224m, Memory 215Mi resources: requests: - memory: 1Gi - cpu: 500m + cpu: 250m + memory: 256Mi limits: - memory: 2Gi - cpu: 1 + cpu: 500m + memory: 512Mi # Custom initContainer to inject Keycloak auth backend initContainers: @@ -84,6 +90,17 @@ web: mountPath: /config - name: auth-volume mountPath: /auth + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + seccompProfile: + type: RuntimeDefault + capabilities: + drop: + - ALL # Volume mounts for main container volumeMounts: @@ -115,13 +132,15 @@ redis: service: serviceType: ClusterIP servicePort: 6379 + # Resource limits (based on Goldilocks/VPA recommendations, rounded to clean values) + # VPA recommendations: CPU 15m, Memory 100Mi resources: requests: - memory: 512Mi - cpu: 200m + cpu: 25m + memory: 128Mi limits: - memory: 1Gi - cpu: 500m + cpu: 100m + memory: 256Mi # Elasticsearch configuration (use Helm chart's embedded Elasticsearch) elasticsearch: @@ -144,13 +163,15 @@ elasticsearch: service: serviceType: ClusterIP servicePort: 9200 + # Resource limits (based on Goldilocks/VPA recommendations, rounded to clean values) + # VPA recommendations: CPU 78m, Memory 1.7Gi resources: requests: + cpu: 100m memory: 2Gi - cpu: 500m limits: - memory: 3Gi - cpu: 1 + cpu: 500m + memory: 4Gi # Ingress configuration ingress: