fix(superset): fix SQL Lab migration error

This commit is contained in:
Masaki Yatsu
2025-11-19 11:00:20 +09:00
parent 2e5c93ee01
commit aa80c2a3ad
4 changed files with 314 additions and 123 deletions

View File

@@ -332,6 +332,35 @@ Expected packages:
- Test query in SQL Lab first
- Check Superset logs for errors
### "Unable to migrate query editor state to backend" Error
**Symptom**: Repeated error message in SQL Lab:
```plain
Unable to migrate query editor state to backend. Superset will retry later.
Please contact your administrator if this problem persists.
```
**Root Cause**: Known Apache Superset bug ([#30351](https://github.com/apache/superset/issues/30351), [#33423](https://github.com/apache/superset/issues/33423)) where `/tabstateview/` endpoint returns HTTP 400 errors. Multiple underlying causes:
- Missing `dbId` in query editor state (KeyError)
- Foreign key constraint violations in `tab_state` table
- Missing PostgreSQL development tools in container images
**Solution**: Disable SQL Lab backend persistence in `configOverrides`:
```python
# Disable SQL Lab backend persistence to avoid tab state migration errors
SQLLAB_BACKEND_PERSISTENCE = False
```
**Impact**:
- Query editor state stored in browser local storage only (not in database)
- Browser cache clear may lose unsaved queries
- Use "Saved Queries" feature for important queries
- This configuration is already applied in this deployment
## References
- [Apache Superset Documentation](https://superset.apache.org/docs/)

View File

@@ -257,3 +257,48 @@ uninstall delete-db='true':
just vault::delete superset/config || true
just vault::delete superset/oauth || true
fi
# Restore Superset datasets, charts, and dashboards from backup
restore backup_file charts_only='false':
#!/bin/bash
set -euo pipefail
BACKUP_FILE="{{ backup_file }}"
CHARTS_ONLY="{{ charts_only }}"
# Convert to absolute path if relative
if [[ ! "${BACKUP_FILE}" = /* ]]; then
BACKUP_FILE="../${BACKUP_FILE}"
fi
if [ ! -f "${BACKUP_FILE}" ]; then
echo "Error: Backup file '${BACKUP_FILE}' not found"
exit 1
fi
POD_NAME=$(kubectl get pods -n postgres -l cnpg.io/cluster=postgres-cluster \
-o jsonpath='{.items[0].metadata.name}')
if [ -z "${POD_NAME}" ]; then
echo "Error: PostgreSQL pod not found"
exit 1
fi
echo "Uploading backup file to PostgreSQL pod..."
kubectl cp "${BACKUP_FILE}" postgres/${POD_NAME}:/var/lib/postgresql/data/superset-restore.sql
echo "Running restore script..."
if [ "${CHARTS_ONLY}" = "true" ]; then
bash restore-datasets-charts.sh --charts-only
else
bash restore-datasets-charts.sh
fi
echo ""
echo "Restarting Superset pods to clear cache..."
kubectl delete pod -n ${SUPERSET_NAMESPACE} -l app=superset --wait=false || true
kubectl delete pod -n ${SUPERSET_NAMESPACE} -l app.kubernetes.io/component=worker --wait=false || true
echo ""
echo "Restore completed successfully!"
echo "Please wait for Superset pods to restart."

View File

@@ -0,0 +1,113 @@
#!/bin/bash
# Restore only Superset datasets, charts, and dashboards from backup
#
# Usage:
# ./restore-datasets-charts.sh [--charts-only]
#
# Options:
# --charts-only Restore only charts and datasets (skip dashboards)
set -euo pipefail
NAMESPACE="superset"
POSTGRES_NAMESPACE="postgres"
BACKUP_FILE="${BACKUP_FILE:-/var/lib/postgresql/data/superset-restore.sql}"
DB_NAME="superset"
DB_USER="postgres" # Use superuser for restore
# Get PostgreSQL pod name
POD_NAME=$(kubectl get pods -n postgres -l cnpg.io/cluster=postgres-cluster \
-o jsonpath='{.items[0].metadata.name}')
# Get database password from secret
DB_PASSWORD=$(kubectl get secret -n postgres postgres-cluster-superuser -o jsonpath='{.data.password}' | base64 -d)
# Core tables for datasets and charts
CORE_TABLES=(
"tables" # Dataset metadata
"table_columns" # Dataset columns
"sql_metrics" # Dataset metrics
"slices" # Chart definitions
)
# Dashboard tables (restored by default)
DASHBOARD_TABLES=(
"dashboards" # Dashboard metadata
"dashboard_slices" # Chart-Dashboard relationships
"dashboard_user" # Dashboard-User relationships
"dashboard_roles" # Dashboard-Role relationships
"embedded_dashboards" # Embedded dashboard configurations
)
# Parse command line arguments
RESTORE_DASHBOARDS=true # Default: restore dashboards
for arg in "$@"; do
case $arg in
--charts-only)
RESTORE_DASHBOARDS=false
shift
;;
*)
echo "Unknown option: $arg"
echo "Usage: $0 [--charts-only]"
exit 1
;;
esac
done
# Build table list
TABLES=("${CORE_TABLES[@]}")
if [ "$RESTORE_DASHBOARDS" = true ]; then
TABLES+=("${DASHBOARD_TABLES[@]}")
fi
echo "Restoring the following tables in database '$DB_NAME':"
for table in "${TABLES[@]}"; do
echo " - $table"
done
echo ""
# Restore each table
for table in "${TABLES[@]}"; do
echo "Restoring table: $table"
# First, truncate the existing table (with CASCADE to handle foreign keys)
kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \
bash -c "PGPASSWORD='$DB_PASSWORD' psql -h localhost -U $DB_USER -d $DB_NAME -c 'TRUNCATE TABLE $table CASCADE;'" || {
echo "Warning: Failed to truncate $table (table might not exist yet)"
}
# Disable foreign key constraints temporarily
kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \
bash -c "PGPASSWORD='$DB_PASSWORD' psql -h localhost -U $DB_USER -d $DB_NAME -c 'ALTER TABLE $table DISABLE TRIGGER ALL;'" || {
echo "Warning: Failed to disable triggers on $table"
}
# Restore the table data (without --disable-triggers as we're managing it manually)
kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \
bash -c "PGPASSWORD='$DB_PASSWORD' pg_restore -h localhost -U $DB_USER -d $DB_NAME \
--table=$table \
--data-only \
$BACKUP_FILE" || {
echo "Error: Failed to restore $table"
exit 1
}
# Re-enable foreign key constraints
kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \
bash -c "PGPASSWORD='$DB_PASSWORD' psql -h localhost -U $DB_USER -d $DB_NAME -c 'ALTER TABLE $table ENABLE TRIGGER ALL;'" || {
echo "Warning: Failed to enable triggers on $table"
}
echo " ✓ Successfully restored $table"
done
echo ""
echo "Restoration completed successfully!"
echo ""
echo "Restored tables:"
for table in "${TABLES[@]}"; do
count=$(kubectl exec -n "$POSTGRES_NAMESPACE" "$POD_NAME" -- \
bash -c "PGPASSWORD='$DB_PASSWORD' psql -h localhost -U $DB_USER -d $DB_NAME -tAc 'SELECT COUNT(*) FROM $table;'")
echo " - $table: $count rows"
done

View File

@@ -3,166 +3,170 @@
# Service configuration
service:
type: ClusterIP
port: 8088
type: ClusterIP
port: 8088
# Ingress configuration
ingress:
enabled: true
ingressClassName: traefik
annotations:
kubernetes.io/ingress.class: traefik
traefik.ingress.kubernetes.io/router.entrypoints: websecure
hosts:
enabled: true
ingressClassName: traefik
annotations:
kubernetes.io/ingress.class: traefik
traefik.ingress.kubernetes.io/router.entrypoints: websecure
hosts:
- {{ env.Getenv "SUPERSET_HOST" }}
tls:
- secretName: superset-tls
hosts:
- {{ env.Getenv "SUPERSET_HOST" }}
tls:
- secretName: superset-tls
hosts:
- {{ env.Getenv "SUPERSET_HOST" }}
# Init job settings (disable to use external database initialization)
init:
enabled: true
loadExamples: false
enabled: true
createAdmin: false
loadExamples: false
# Superset node configuration
supersetNode:
replicaCount: 1
connections:
# Redis configuration
redis_host: superset-redis-headless
redis_port: "6379"
redis_cache_db: "1"
redis_celery_db: "0"
# PostgreSQL configuration for initContainer (wait-for-postgres)
# The actual database connection uses SQLALCHEMY_DATABASE_URI from extraEnvRaw
db_host: postgres-cluster-rw.postgres
db_port: "5432"
db_user: superset
db_pass: {{ env.Getenv "SUPERSET_DB_PASSWORD" }}
db_name: superset
replicaCount: 1
connections:
# Redis configuration
redis_host: superset-redis-headless
redis_port: "6379"
redis_cache_db: "1"
redis_celery_db: "0"
# PostgreSQL configuration for initContainer (wait-for-postgres)
# The actual database connection uses SQLALCHEMY_DATABASE_URI from extraEnvRaw
db_host: postgres-cluster-rw.postgres
db_port: "5432"
db_user: superset
db_pass: {{ env.Getenv "SUPERSET_DB_PASSWORD" }}
db_name: superset
# Superset worker (Celery) configuration
supersetWorker:
replicaCount: 1
replicaCount: 1
# Database configuration (use existing PostgreSQL)
postgresql:
enabled: false
enabled: false
# Redis configuration (embedded)
redis:
enabled: true
image:
registry: docker.io
repository: bitnami/redis
# Since August 2025, Bitnami changed its strategy:
# - Community users can only use 'latest' tag (no version pinning)
# - Versioned tags moved to 'bitnamilegacy' repository (deprecated, no updates)
# - For production with version pinning, consider using official redis image separately
tag: latest
master:
persistence:
enabled: false
enabled: true
image:
registry: docker.io
repository: bitnami/redis
# Since August 2025, Bitnami changed its strategy:
# - Community users can only use 'latest' tag (no version pinning)
# - Versioned tags moved to 'bitnamilegacy' repository (deprecated, no updates)
# - For production with version pinning, consider using official redis image separately
tag: latest
master:
persistence:
enabled: false
# Extra environment variables
extraEnv:
KEYCLOAK_HOST: {{ env.Getenv "KEYCLOAK_HOST" }}
KEYCLOAK_REALM: {{ env.Getenv "KEYCLOAK_REALM" }}
KEYCLOAK_HOST: {{ env.Getenv "KEYCLOAK_HOST" }}
KEYCLOAK_REALM: {{ env.Getenv "KEYCLOAK_REALM" }}
# Extra environment variables from existing secrets
extraEnvRaw:
- name: SUPERSET_SECRET_KEY
valueFrom:
secretKeyRef:
name: superset-secret
key: SECRET_KEY
- name: SQLALCHEMY_DATABASE_URI
valueFrom:
secretKeyRef:
name: superset-secret
key: SQLALCHEMY_DATABASE_URI
- name: OAUTH_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: superset-secret
key: OAUTH_CLIENT_SECRET
- name: SUPERSET_SECRET_KEY
valueFrom:
secretKeyRef:
name: superset-secret
key: SECRET_KEY
- name: SQLALCHEMY_DATABASE_URI
valueFrom:
secretKeyRef:
name: superset-secret
key: SQLALCHEMY_DATABASE_URI
- name: OAUTH_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: superset-secret
key: OAUTH_CLIENT_SECRET
# Configuration overrides for superset_config.py
configOverrides:
keycloak_oauth: |
import os
from flask_appbuilder.security.manager import AUTH_OAUTH
from superset.security import SupersetSecurityManager
keycloak_oauth: |
import os
from flask_appbuilder.security.manager import AUTH_OAUTH
from superset.security import SupersetSecurityManager
class CustomSsoSecurityManager(SupersetSecurityManager):
def oauth_user_info(self, provider, response=None):
"""Get user information from OAuth provider."""
if provider == "keycloak":
me = self.appbuilder.sm.oauth_remotes[provider].get(
"protocol/openid-connect/userinfo"
)
data = me.json()
return {
"username": data.get("preferred_username"),
"name": data.get("name"),
"email": data.get("email"),
"first_name": data.get("given_name", ""),
"last_name": data.get("family_name", ""),
"role_keys": data.get("groups", []),
}
return {}
# Authentication type
AUTH_TYPE = AUTH_OAUTH
# Auto-registration for new users
AUTH_USER_REGISTRATION = True
AUTH_USER_REGISTRATION_ROLE = "Gamma"
# Custom security manager
CUSTOM_SECURITY_MANAGER = CustomSsoSecurityManager
# OAuth configuration
OAUTH_PROVIDERS = [
{
"name": "keycloak",
"icon": "fa-key",
"token_key": "access_token",
"remote_app": {
"client_id": "superset",
"client_secret": os.environ.get("OAUTH_CLIENT_SECRET"),
"server_metadata_url": f"https://{os.environ.get('KEYCLOAK_HOST')}/realms/{os.environ.get('KEYCLOAK_REALM')}/.well-known/openid-configuration",
"api_base_url": f"https://{os.environ.get('KEYCLOAK_HOST')}/realms/{os.environ.get('KEYCLOAK_REALM')}/",
"client_kwargs": {
"scope": "openid email profile"
},
class CustomSsoSecurityManager(SupersetSecurityManager):
def oauth_user_info(self, provider, response=None):
"""Get user information from OAuth provider."""
if provider == "keycloak":
me = self.appbuilder.sm.oauth_remotes[provider].get(
"protocol/openid-connect/userinfo"
)
data = me.json()
return {
"username": data.get("preferred_username"),
"name": data.get("name"),
"email": data.get("email"),
"first_name": data.get("given_name", ""),
"last_name": data.get("family_name", ""),
"role_keys": data.get("groups", []),
}
return {}
# Authentication type
AUTH_TYPE = AUTH_OAUTH
# Auto-registration for new users
AUTH_USER_REGISTRATION = True
AUTH_USER_REGISTRATION_ROLE = "Gamma"
# Custom security manager
CUSTOM_SECURITY_MANAGER = CustomSsoSecurityManager
# OAuth configuration
OAUTH_PROVIDERS = [
{
"name": "keycloak",
"icon": "fa-key",
"token_key": "access_token",
"remote_app": {
"client_id": "superset",
"client_secret": os.environ.get("OAUTH_CLIENT_SECRET"),
"server_metadata_url": f"https://{os.environ.get('KEYCLOAK_HOST')}/realms/{os.environ.get('KEYCLOAK_REALM')}/.well-known/openid-configuration",
"api_base_url": f"https://{os.environ.get('KEYCLOAK_HOST')}/realms/{os.environ.get('KEYCLOAK_REALM')}/",
"client_kwargs": {
"scope": "openid email profile"
},
}
]
# Role mapping
AUTH_ROLES_MAPPING = {
"superset-admin": ["Admin"],
"Alpha": ["Alpha"],
"Gamma": ["Gamma"],
}
]
# Sync roles at each login
AUTH_ROLES_SYNC_AT_LOGIN = True
# Role mapping
AUTH_ROLES_MAPPING = {
"superset-admin": ["Admin"],
"Alpha": ["Alpha"],
"Gamma": ["Gamma"],
}
# Enable Trino database support
PREVENT_UNSAFE_DB_CONNECTIONS = False
# Sync roles at each login
AUTH_ROLES_SYNC_AT_LOGIN = True
# Proxy configuration (for HTTPS behind Traefik)
ENABLE_PROXY_FIX = True
PREFERRED_URL_SCHEME = "https"
# Enable Trino database support
PREVENT_UNSAFE_DB_CONNECTIONS = False
# Proxy configuration (for HTTPS behind Traefik)
ENABLE_PROXY_FIX = True
PREFERRED_URL_SCHEME = "https"
# Disable SQL Lab backend persistence to avoid tab state migration errors
SQLLAB_BACKEND_PERSISTENCE = False
# Bootstrap script for initial setup
# Note: Superset 5.0+ uses 'uv' instead of 'pip' for package management
bootstrapScript: |
#!/bin/bash
uv pip install psycopg2-binary sqlalchemy-trino authlib
if [ ! -f ~/bootstrap ]; then echo "Bootstrap complete" > ~/bootstrap; fi
#!/bin/bash
uv pip install psycopg2-binary sqlalchemy-trino authlib
if [ ! -f ~/bootstrap ]; then echo "Bootstrap complete" > ~/bootstrap; fi