diff --git a/superset/README.md b/superset/README.md index 110a7cc..4b8b811 100644 --- a/superset/README.md +++ b/superset/README.md @@ -332,6 +332,35 @@ Expected packages: - Test query in SQL Lab first - Check Superset logs for errors +### "Unable to migrate query editor state to backend" Error + +**Symptom**: Repeated error message in SQL Lab: + +```plain +Unable to migrate query editor state to backend. Superset will retry later. +Please contact your administrator if this problem persists. +``` + +**Root Cause**: Known Apache Superset bug ([#30351](https://github.com/apache/superset/issues/30351), [#33423](https://github.com/apache/superset/issues/33423)) where `/tabstateview/` endpoint returns HTTP 400 errors. Multiple underlying causes: + +- Missing `dbId` in query editor state (KeyError) +- Foreign key constraint violations in `tab_state` table +- Missing PostgreSQL development tools in container images + +**Solution**: Disable SQL Lab backend persistence in `configOverrides`: + +```python +# Disable SQL Lab backend persistence to avoid tab state migration errors +SQLLAB_BACKEND_PERSISTENCE = False +``` + +**Impact**: + +- Query editor state stored in browser local storage only (not in database) +- Browser cache clear may lose unsaved queries +- Use "Saved Queries" feature for important queries +- This configuration is already applied in this deployment + ## References - [Apache Superset Documentation](https://superset.apache.org/docs/) diff --git a/superset/justfile b/superset/justfile index 5a46bb1..5e5cfcd 100644 --- a/superset/justfile +++ b/superset/justfile @@ -257,3 +257,48 @@ uninstall delete-db='true': just vault::delete superset/config || true just vault::delete superset/oauth || true fi + +# Restore Superset datasets, charts, and dashboards from backup +restore backup_file charts_only='false': + #!/bin/bash + set -euo pipefail + + BACKUP_FILE="{{ backup_file }}" + CHARTS_ONLY="{{ charts_only }}" + + # Convert to absolute path if relative + if [[ ! "${BACKUP_FILE}" = /* ]]; then + BACKUP_FILE="../${BACKUP_FILE}" + fi + + if [ ! -f "${BACKUP_FILE}" ]; then + echo "Error: Backup file '${BACKUP_FILE}' not found" + exit 1 + fi + + POD_NAME=$(kubectl get pods -n postgres -l cnpg.io/cluster=postgres-cluster \ + -o jsonpath='{.items[0].metadata.name}') + + if [ -z "${POD_NAME}" ]; then + echo "Error: PostgreSQL pod not found" + exit 1 + fi + + echo "Uploading backup file to PostgreSQL pod..." + kubectl cp "${BACKUP_FILE}" postgres/${POD_NAME}:/var/lib/postgresql/data/superset-restore.sql + + echo "Running restore script..." + if [ "${CHARTS_ONLY}" = "true" ]; then + bash restore-datasets-charts.sh --charts-only + else + bash restore-datasets-charts.sh + fi + + echo "" + echo "Restarting Superset pods to clear cache..." + kubectl delete pod -n ${SUPERSET_NAMESPACE} -l app=superset --wait=false || true + kubectl delete pod -n ${SUPERSET_NAMESPACE} -l app.kubernetes.io/component=worker --wait=false || true + + echo "" + echo "Restore completed successfully!" + echo "Please wait for Superset pods to restart." diff --git a/superset/restore-datasets-charts.sh b/superset/restore-datasets-charts.sh new file mode 100755 index 0000000..b1333f9 --- /dev/null +++ b/superset/restore-datasets-charts.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# Restore only Superset datasets, charts, and dashboards from backup +# +# Usage: +# ./restore-datasets-charts.sh [--charts-only] +# +# Options: +# --charts-only Restore only charts and datasets (skip dashboards) + +set -euo pipefail + +NAMESPACE="superset" +POSTGRES_NAMESPACE="postgres" +BACKUP_FILE="${BACKUP_FILE:-/var/lib/postgresql/data/superset-restore.sql}" +DB_NAME="superset" +DB_USER="postgres" # Use superuser for restore + +# Get PostgreSQL pod name +POD_NAME=$(kubectl get pods -n postgres -l cnpg.io/cluster=postgres-cluster \ + -o jsonpath='{.items[0].metadata.name}') + +# Get database password from secret +DB_PASSWORD=$(kubectl get secret -n postgres postgres-cluster-superuser -o jsonpath='{.data.password}' | base64 -d) + +# Core tables for datasets and charts +CORE_TABLES=( + "tables" # Dataset metadata + "table_columns" # Dataset columns + "sql_metrics" # Dataset metrics + "slices" # Chart definitions +) + +# Dashboard tables (restored by default) +DASHBOARD_TABLES=( + "dashboards" # Dashboard metadata + "dashboard_slices" # Chart-Dashboard relationships + "dashboard_user" # Dashboard-User relationships + "dashboard_roles" # Dashboard-Role relationships + "embedded_dashboards" # Embedded dashboard configurations +) + +# Parse command line arguments +RESTORE_DASHBOARDS=true # Default: restore dashboards +for arg in "$@"; do + case $arg in + --charts-only) + RESTORE_DASHBOARDS=false + shift + ;; + *) + echo "Unknown option: $arg" + echo "Usage: $0 [--charts-only]" + exit 1 + ;; + esac +done + +# Build table list +TABLES=("${CORE_TABLES[@]}") +if [ "$RESTORE_DASHBOARDS" = true ]; then + TABLES+=("${DASHBOARD_TABLES[@]}") +fi + +echo "Restoring the following tables in database '$DB_NAME':" +for table in "${TABLES[@]}"; do + echo " - $table" +done +echo "" + +# Restore each table +for table in "${TABLES[@]}"; do + echo "Restoring table: $table" + + # First, truncate the existing table (with CASCADE to handle foreign keys) + kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \ + bash -c "PGPASSWORD='$DB_PASSWORD' psql -h localhost -U $DB_USER -d $DB_NAME -c 'TRUNCATE TABLE $table CASCADE;'" || { + echo "Warning: Failed to truncate $table (table might not exist yet)" + } + + # Disable foreign key constraints temporarily + kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \ + bash -c "PGPASSWORD='$DB_PASSWORD' psql -h localhost -U $DB_USER -d $DB_NAME -c 'ALTER TABLE $table DISABLE TRIGGER ALL;'" || { + echo "Warning: Failed to disable triggers on $table" + } + + # Restore the table data (without --disable-triggers as we're managing it manually) + kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \ + bash -c "PGPASSWORD='$DB_PASSWORD' pg_restore -h localhost -U $DB_USER -d $DB_NAME \ + --table=$table \ + --data-only \ + $BACKUP_FILE" || { + echo "Error: Failed to restore $table" + exit 1 + } + + # Re-enable foreign key constraints + kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \ + bash -c "PGPASSWORD='$DB_PASSWORD' psql -h localhost -U $DB_USER -d $DB_NAME -c 'ALTER TABLE $table ENABLE TRIGGER ALL;'" || { + echo "Warning: Failed to enable triggers on $table" + } + + echo " ✓ Successfully restored $table" +done + +echo "" +echo "Restoration completed successfully!" +echo "" +echo "Restored tables:" +for table in "${TABLES[@]}"; do + count=$(kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \ + bash -c "PGPASSWORD='$DB_PASSWORD' psql -h localhost -U $DB_USER -d $DB_NAME -tAc 'SELECT COUNT(*) FROM $table;'") + echo " - $table: $count rows" +done diff --git a/superset/superset-values.gomplate.yaml b/superset/superset-values.gomplate.yaml index cd5ab35..ee89944 100644 --- a/superset/superset-values.gomplate.yaml +++ b/superset/superset-values.gomplate.yaml @@ -3,166 +3,170 @@ # Service configuration service: - type: ClusterIP - port: 8088 + type: ClusterIP + port: 8088 # Ingress configuration ingress: - enabled: true - ingressClassName: traefik - annotations: - kubernetes.io/ingress.class: traefik - traefik.ingress.kubernetes.io/router.entrypoints: websecure - hosts: + enabled: true + ingressClassName: traefik + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + hosts: + - {{ env.Getenv "SUPERSET_HOST" }} + tls: + - secretName: superset-tls + hosts: - {{ env.Getenv "SUPERSET_HOST" }} - tls: - - secretName: superset-tls - hosts: - - {{ env.Getenv "SUPERSET_HOST" }} # Init job settings (disable to use external database initialization) init: - enabled: true - loadExamples: false + enabled: true + createAdmin: false + loadExamples: false # Superset node configuration supersetNode: - replicaCount: 1 - connections: - # Redis configuration - redis_host: superset-redis-headless - redis_port: "6379" - redis_cache_db: "1" - redis_celery_db: "0" - # PostgreSQL configuration for initContainer (wait-for-postgres) - # The actual database connection uses SQLALCHEMY_DATABASE_URI from extraEnvRaw - db_host: postgres-cluster-rw.postgres - db_port: "5432" - db_user: superset - db_pass: {{ env.Getenv "SUPERSET_DB_PASSWORD" }} - db_name: superset + replicaCount: 1 + connections: + # Redis configuration + redis_host: superset-redis-headless + redis_port: "6379" + redis_cache_db: "1" + redis_celery_db: "0" + # PostgreSQL configuration for initContainer (wait-for-postgres) + # The actual database connection uses SQLALCHEMY_DATABASE_URI from extraEnvRaw + db_host: postgres-cluster-rw.postgres + db_port: "5432" + db_user: superset + db_pass: {{ env.Getenv "SUPERSET_DB_PASSWORD" }} + db_name: superset # Superset worker (Celery) configuration supersetWorker: - replicaCount: 1 + replicaCount: 1 # Database configuration (use existing PostgreSQL) postgresql: - enabled: false + enabled: false # Redis configuration (embedded) redis: - enabled: true - image: - registry: docker.io - repository: bitnami/redis - # Since August 2025, Bitnami changed its strategy: - # - Community users can only use 'latest' tag (no version pinning) - # - Versioned tags moved to 'bitnamilegacy' repository (deprecated, no updates) - # - For production with version pinning, consider using official redis image separately - tag: latest - master: - persistence: - enabled: false + enabled: true + image: + registry: docker.io + repository: bitnami/redis + # Since August 2025, Bitnami changed its strategy: + # - Community users can only use 'latest' tag (no version pinning) + # - Versioned tags moved to 'bitnamilegacy' repository (deprecated, no updates) + # - For production with version pinning, consider using official redis image separately + tag: latest + master: + persistence: + enabled: false # Extra environment variables extraEnv: - KEYCLOAK_HOST: {{ env.Getenv "KEYCLOAK_HOST" }} - KEYCLOAK_REALM: {{ env.Getenv "KEYCLOAK_REALM" }} + KEYCLOAK_HOST: {{ env.Getenv "KEYCLOAK_HOST" }} + KEYCLOAK_REALM: {{ env.Getenv "KEYCLOAK_REALM" }} # Extra environment variables from existing secrets extraEnvRaw: - - name: SUPERSET_SECRET_KEY - valueFrom: - secretKeyRef: - name: superset-secret - key: SECRET_KEY - - name: SQLALCHEMY_DATABASE_URI - valueFrom: - secretKeyRef: - name: superset-secret - key: SQLALCHEMY_DATABASE_URI - - name: OAUTH_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: superset-secret - key: OAUTH_CLIENT_SECRET + - name: SUPERSET_SECRET_KEY + valueFrom: + secretKeyRef: + name: superset-secret + key: SECRET_KEY + - name: SQLALCHEMY_DATABASE_URI + valueFrom: + secretKeyRef: + name: superset-secret + key: SQLALCHEMY_DATABASE_URI + - name: OAUTH_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: superset-secret + key: OAUTH_CLIENT_SECRET # Configuration overrides for superset_config.py configOverrides: - keycloak_oauth: | - import os - from flask_appbuilder.security.manager import AUTH_OAUTH - from superset.security import SupersetSecurityManager + keycloak_oauth: | + import os + from flask_appbuilder.security.manager import AUTH_OAUTH + from superset.security import SupersetSecurityManager - class CustomSsoSecurityManager(SupersetSecurityManager): - def oauth_user_info(self, provider, response=None): - """Get user information from OAuth provider.""" - if provider == "keycloak": - me = self.appbuilder.sm.oauth_remotes[provider].get( - "protocol/openid-connect/userinfo" - ) - data = me.json() - return { - "username": data.get("preferred_username"), - "name": data.get("name"), - "email": data.get("email"), - "first_name": data.get("given_name", ""), - "last_name": data.get("family_name", ""), - "role_keys": data.get("groups", []), - } - return {} - - - # Authentication type - AUTH_TYPE = AUTH_OAUTH - - # Auto-registration for new users - AUTH_USER_REGISTRATION = True - AUTH_USER_REGISTRATION_ROLE = "Gamma" - - # Custom security manager - CUSTOM_SECURITY_MANAGER = CustomSsoSecurityManager - - # OAuth configuration - OAUTH_PROVIDERS = [ - { - "name": "keycloak", - "icon": "fa-key", - "token_key": "access_token", - "remote_app": { - "client_id": "superset", - "client_secret": os.environ.get("OAUTH_CLIENT_SECRET"), - "server_metadata_url": f"https://{os.environ.get('KEYCLOAK_HOST')}/realms/{os.environ.get('KEYCLOAK_REALM')}/.well-known/openid-configuration", - "api_base_url": f"https://{os.environ.get('KEYCLOAK_HOST')}/realms/{os.environ.get('KEYCLOAK_REALM')}/", - "client_kwargs": { - "scope": "openid email profile" - }, + class CustomSsoSecurityManager(SupersetSecurityManager): + def oauth_user_info(self, provider, response=None): + """Get user information from OAuth provider.""" + if provider == "keycloak": + me = self.appbuilder.sm.oauth_remotes[provider].get( + "protocol/openid-connect/userinfo" + ) + data = me.json() + return { + "username": data.get("preferred_username"), + "name": data.get("name"), + "email": data.get("email"), + "first_name": data.get("given_name", ""), + "last_name": data.get("family_name", ""), + "role_keys": data.get("groups", []), } + return {} + + + # Authentication type + AUTH_TYPE = AUTH_OAUTH + + # Auto-registration for new users + AUTH_USER_REGISTRATION = True + AUTH_USER_REGISTRATION_ROLE = "Gamma" + + # Custom security manager + CUSTOM_SECURITY_MANAGER = CustomSsoSecurityManager + + # OAuth configuration + OAUTH_PROVIDERS = [ + { + "name": "keycloak", + "icon": "fa-key", + "token_key": "access_token", + "remote_app": { + "client_id": "superset", + "client_secret": os.environ.get("OAUTH_CLIENT_SECRET"), + "server_metadata_url": f"https://{os.environ.get('KEYCLOAK_HOST')}/realms/{os.environ.get('KEYCLOAK_REALM')}/.well-known/openid-configuration", + "api_base_url": f"https://{os.environ.get('KEYCLOAK_HOST')}/realms/{os.environ.get('KEYCLOAK_REALM')}/", + "client_kwargs": { + "scope": "openid email profile" + }, } - ] - - # Role mapping - AUTH_ROLES_MAPPING = { - "superset-admin": ["Admin"], - "Alpha": ["Alpha"], - "Gamma": ["Gamma"], } + ] - # Sync roles at each login - AUTH_ROLES_SYNC_AT_LOGIN = True + # Role mapping + AUTH_ROLES_MAPPING = { + "superset-admin": ["Admin"], + "Alpha": ["Alpha"], + "Gamma": ["Gamma"], + } - # Enable Trino database support - PREVENT_UNSAFE_DB_CONNECTIONS = False + # Sync roles at each login + AUTH_ROLES_SYNC_AT_LOGIN = True - # Proxy configuration (for HTTPS behind Traefik) - ENABLE_PROXY_FIX = True - PREFERRED_URL_SCHEME = "https" + # Enable Trino database support + PREVENT_UNSAFE_DB_CONNECTIONS = False + + # Proxy configuration (for HTTPS behind Traefik) + ENABLE_PROXY_FIX = True + PREFERRED_URL_SCHEME = "https" + + # Disable SQL Lab backend persistence to avoid tab state migration errors + SQLLAB_BACKEND_PERSISTENCE = False # Bootstrap script for initial setup # Note: Superset 5.0+ uses 'uv' instead of 'pip' for package management bootstrapScript: | - #!/bin/bash - uv pip install psycopg2-binary sqlalchemy-trino authlib - if [ ! -f ~/bootstrap ]; then echo "Bootstrap complete" > ~/bootstrap; fi + #!/bin/bash + uv pip install psycopg2-binary sqlalchemy-trino authlib + if [ ! -f ~/bootstrap ]; then echo "Bootstrap complete" > ~/bootstrap; fi