From f9bb6148ccc768e34ee0061e08792a281e4d5516 Mon Sep 17 00:00:00 2001 From: Masaki Yatsu Date: Fri, 19 Sep 2025 03:13:18 +0900 Subject: [PATCH] feat(lakekeeper): add Lakekeeper --- justfile | 1 + lakekeeper/.gitignore | 3 + lakekeeper/justfile | 212 ++++++++++++++++++ ...per-database-external-secret.gomplate.yaml | 42 ++++ ...r-encryption-external-secret.gomplate.yaml | 22 ++ lakekeeper/lakekeeper-values.gomplate.yaml | 93 ++++++++ 6 files changed, 373 insertions(+) create mode 100644 lakekeeper/.gitignore create mode 100644 lakekeeper/justfile create mode 100644 lakekeeper/lakekeeper-database-external-secret.gomplate.yaml create mode 100644 lakekeeper/lakekeeper-encryption-external-secret.gomplate.yaml create mode 100644 lakekeeper/lakekeeper-values.gomplate.yaml diff --git a/justfile b/justfile index 880c2c9..5d8c70a 100644 --- a/justfile +++ b/justfile @@ -16,6 +16,7 @@ mod external-secrets mod keycloak mod jupyterhub mod k8s +mod lakekeeper mod longhorn mod metabase mod minio diff --git a/lakekeeper/.gitignore b/lakekeeper/.gitignore new file mode 100644 index 0000000..07f9f01 --- /dev/null +++ b/lakekeeper/.gitignore @@ -0,0 +1,3 @@ +lakekeeper-database-external-secret.yaml +lakekeeper-encryption-external-secret.yaml +lakekeeper-values.yaml diff --git a/lakekeeper/justfile b/lakekeeper/justfile new file mode 100644 index 0000000..a95c0e4 --- /dev/null +++ b/lakekeeper/justfile @@ -0,0 +1,212 @@ +set fallback := true + +export LAKEKEEPER_NAMESPACE := env("LAKEKEEPER_NAMESPACE", "lakekeeper") +export LAKEKEEPER_CHART_VERSION := env("LAKEKEEPER_CHART_VERSION", "0.7.1") +export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets") +export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack") + +[private] +default: + @just --list --unsorted --list-submodules + +# Add Helm repository +add-helm-repo: + helm repo add lakekeeper https://lakekeeper.github.io/lakekeeper-charts/ + helm repo update + +# Remove Helm repository +remove-helm-repo: + helm repo remove lakekeeper + +# Create namespace +create-namespace: + @kubectl get namespace ${LAKEKEEPER_NAMESPACE} &>/dev/null || \ + kubectl create namespace ${LAKEKEEPER_NAMESPACE} + +# Setup database for Lakekeeper +setup-database: + #!/bin/bash + set -euo pipefail + echo "Setting up Lakekeeper database..." + + if just postgres::db-exists lakekeeper &>/dev/null; then + echo "Database 'lakekeeper' already exists." + else + echo "Creating new database 'lakekeeper'..." + just postgres::create-db lakekeeper + fi + + # Generate password for user creation/update + if just postgres::user-exists lakekeeper &>/dev/null; then + echo "User 'lakekeeper' already exists." + # Check if we can get existing password from Vault/Secret + if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then + # Try to get existing password from Vault + if DB_PASSWORD=$(just vault::get lakekeeper/database password 2>/dev/null); then + echo "Using existing password from Vault." + else + echo "Generating new password and updating Vault..." + DB_PASSWORD=$(just utils::random-password) + just postgres::change-password lakekeeper "$DB_PASSWORD" + fi + else + # For direct Secret approach, generate new password + echo "Generating new password for existing user..." + DB_PASSWORD=$(just utils::random-password) + just postgres::change-password lakekeeper "$DB_PASSWORD" + fi + else + echo "Creating new user 'lakekeeper'..." + DB_PASSWORD=$(just utils::random-password) + just postgres::create-user lakekeeper "$DB_PASSWORD" + fi + + echo "Ensuring database permissions..." + just postgres::grant lakekeeper lakekeeper + + if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then + echo "External Secrets available. Storing credentials in Vault and creating ExternalSecret..." + just vault::put lakekeeper/database \ + username=lakekeeper \ + password="$DB_PASSWORD" \ + host=postgres-cluster-rw.postgres \ + port=5432 \ + database=lakekeeper + gomplate -f lakekeeper-database-external-secret.gomplate.yaml -o lakekeeper-database-external-secret.yaml + kubectl apply -f lakekeeper-database-external-secret.yaml + echo "Waiting for database secret to be ready..." + kubectl wait --for=condition=Ready externalsecret/lakekeeper-database-external-secret \ + -n ${LAKEKEEPER_NAMESPACE} --timeout=60s + else + echo "External Secrets not available. Creating Kubernetes Secret directly..." + kubectl delete secret lakekeeper-database-secret -n ${LAKEKEEPER_NAMESPACE} --ignore-not-found + kubectl create secret generic lakekeeper-database-secret -n ${LAKEKEEPER_NAMESPACE} \ + --from-literal=username=lakekeeper \ + --from-literal=password="$DB_PASSWORD" \ + --from-literal=host=postgres-cluster-rw.postgres \ + --from-literal=port=5432 \ + --from-literal=database=lakekeeper + echo "Database secret created directly in Kubernetes" + fi + echo "Database setup completed." + +# Delete database secret +delete-database-secret: + @kubectl delete secret lakekeeper-database-secret -n ${LAKEKEEPER_NAMESPACE} --ignore-not-found + @kubectl delete externalsecret lakekeeper-database-external-secret -n ${LAKEKEEPER_NAMESPACE} --ignore-not-found + +# Create OIDC client in Keycloak for Lakekeeper authentication +create-oidc-client: + #!/bin/bash + set -euo pipefail + if [ -z "${LAKEKEEPER_HOST:-}" ]; then + echo "Error: LAKEKEEPER_HOST environment variable is required" + exit 1 + fi + echo "Creating Lakekeeper OAuth client in Keycloak..." + + # Check if client already exists + if just keycloak::client-exists ${KEYCLOAK_REALM} lakekeeper &>/dev/null; then + echo "Client 'lakekeeper' already exists, skipping creation..." + echo "Existing client will preserve roles and mappers" + else + echo "Creating new public client for PKCE flow..." + # Create public client (no client secret) for PKCE flow + just keycloak::create-client \ + ${KEYCLOAK_REALM} \ + lakekeeper \ + "https://${LAKEKEEPER_HOST}/ui/callback" + fi + + # Add audience mapper to include 'lakekeeper' in JWT audience + echo "Adding audience mapper for JWT token..." + just keycloak::add-audience-mapper lakekeeper lakekeeper + + echo "OAuth client configured successfully for PKCE authentication" + +# Delete OIDC client (for cleanup purposes) +delete-oidc-client: + @just keycloak::delete-client ${KEYCLOAK_REALM} lakekeeper + +# Install Lakekeeper +install: + #!/bin/bash + set -euo pipefail + export LAKEKEEPER_HOST=${LAKEKEEPER_HOST:-} + while [ -z "${LAKEKEEPER_HOST}" ]; do + LAKEKEEPER_HOST=$( + gum input --prompt="Lakekeeper host (FQDN): " --width=100 \ + --placeholder="e.g., lakekeeper.example.com" + ) + done + echo "Installing Lakekeeper..." + just create-namespace + just setup-database + just create-oidc-client + just add-helm-repo + + # Helm chart will automatically create the encryption key secret + + gomplate -f lakekeeper-values.gomplate.yaml -o lakekeeper-values.yaml + + # Use --wait=false to avoid circular dependency: + # Helm waits for post-install hooks (migration job) to complete, + # but migration job can't start until Helm deployment finishes + helm upgrade --install lakekeeper lakekeeper/lakekeeper \ + --version ${LAKEKEEPER_CHART_VERSION} -n ${LAKEKEEPER_NAMESPACE} \ + --timeout=10m --wait=false \ + -f lakekeeper-values.yaml + + echo "Waiting for database migration to complete..." + kubectl wait --for=condition=complete job/lakekeeper-db-migration-1 \ + -n ${LAKEKEEPER_NAMESPACE} --timeout=300s + + echo "Waiting for Lakekeeper deployment to be ready..." + kubectl wait --for=condition=available deployment/lakekeeper \ + -n ${LAKEKEEPER_NAMESPACE} --timeout=300s + + echo "Lakekeeper installation completed" + echo "Access Lakekeeper at: https://${LAKEKEEPER_HOST}" + +# Uninstall Lakekeeper +uninstall delete-db='true': + #!/bin/bash + set -euo pipefail + echo "Uninstalling Lakekeeper..." + helm uninstall lakekeeper -n ${LAKEKEEPER_NAMESPACE} --ignore-not-found + + # Force delete stuck resources + echo "Checking for stuck resources..." + kubectl delete all --all -n ${LAKEKEEPER_NAMESPACE} --force --grace-period=0 2>/dev/null || true + + # Delete PVCs + PVCS=$(kubectl get pvc -n ${LAKEKEEPER_NAMESPACE} -o name 2>/dev/null || true) + if [ -n "$PVCS" ]; then + echo "Deleting PersistentVolumeClaims..." + kubectl delete pvc --all -n ${LAKEKEEPER_NAMESPACE} --force --grace-period=0 2>/dev/null || true + fi + + just delete-database-secret + just delete-oidc-client + if [ "{{ delete-db }}" = "true" ]; then + just postgres::delete-db lakekeeper + fi + # Clean up Keycloak client + just keycloak::delete-client ${KEYCLOAK_REALM} lakekeeper || true + echo "Lakekeeper uninstalled" + +# Clean up database and secrets +cleanup: + #!/bin/bash + set -euo pipefail + echo "This will delete the Lakekeeper database and all secrets." + if gum confirm "Are you sure you want to proceed?"; then + echo "Cleaning up Lakekeeper resources..." + just postgres::delete-db lakekeeper || true + just vault::delete lakekeeper/database || true + just vault::delete lakekeeper/oauth || true + just keycloak::delete-client ${KEYCLOAK_REALM} lakekeeper || true + echo "Cleanup completed" + else + echo "Cleanup cancelled" + fi diff --git a/lakekeeper/lakekeeper-database-external-secret.gomplate.yaml b/lakekeeper/lakekeeper-database-external-secret.gomplate.yaml new file mode 100644 index 0000000..7306e71 --- /dev/null +++ b/lakekeeper/lakekeeper-database-external-secret.gomplate.yaml @@ -0,0 +1,42 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: lakekeeper-database-external-secret + namespace: {{ .Env.LAKEKEEPER_NAMESPACE }} +spec: + refreshInterval: 1h + secretStoreRef: + name: vault-secret-store + kind: ClusterSecretStore + target: + name: lakekeeper-database-secret + creationPolicy: Owner + template: + type: Opaque + data: + username: "{{ `{{ .username }}` }}" + password: "{{ `{{ .password }}` }}" + host: "{{ `{{ .host }}` }}" + port: "{{ `{{ .port }}` }}" + database: "{{ `{{ .database }}` }}" + data: + - secretKey: username + remoteRef: + key: lakekeeper/database + property: username + - secretKey: password + remoteRef: + key: lakekeeper/database + property: password + - secretKey: host + remoteRef: + key: lakekeeper/database + property: host + - secretKey: port + remoteRef: + key: lakekeeper/database + property: port + - secretKey: database + remoteRef: + key: lakekeeper/database + property: database \ No newline at end of file diff --git a/lakekeeper/lakekeeper-encryption-external-secret.gomplate.yaml b/lakekeeper/lakekeeper-encryption-external-secret.gomplate.yaml new file mode 100644 index 0000000..d392948 --- /dev/null +++ b/lakekeeper/lakekeeper-encryption-external-secret.gomplate.yaml @@ -0,0 +1,22 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: lakekeeper-encryption-external-secret + namespace: {{ .Env.LAKEKEEPER_NAMESPACE }} +spec: + refreshInterval: 1h + secretStoreRef: + name: vault-secret-store + kind: ClusterSecretStore + target: + name: lakekeeper-encryption-key + creationPolicy: Owner + template: + type: Opaque + data: + encryption-key: "{{ `{{ .encryption_key }}` }}" + data: + - secretKey: encryption_key + remoteRef: + key: lakekeeper/encryption + property: encryption-key \ No newline at end of file diff --git a/lakekeeper/lakekeeper-values.gomplate.yaml b/lakekeeper/lakekeeper-values.gomplate.yaml new file mode 100644 index 0000000..353a6ff --- /dev/null +++ b/lakekeeper/lakekeeper-values.gomplate.yaml @@ -0,0 +1,93 @@ +catalog: + image: + repository: quay.io/lakekeeper/catalog + tag: v0.9.3 + pullPolicy: IfNotPresent + + replicaCount: 1 + + # Configuration using the correct structure + config: + LAKEKEEPER__OPENID_PROVIDER_URI: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}" + LAKEKEEPER__OPENID_AUDIENCE: "lakekeeper" + LAKEKEEPER__UI__OPENID_CLIENT_ID: "lakekeeper" + LAKEKEEPER__UI__OPENID_SCOPE: "openid profile email" + + # Secret management configuration + secrets: + backend: "postgres" + + # Service configuration + service: + type: ClusterIP + port: 8080 + + # Health probes + probes: + liveness: + enabled: true + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + readiness: + enabled: true + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + + # Resource limits + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 128Mi + + + # Database migration configuration + dbMigrations: + enabled: true + + # Ingress configuration + ingress: + enabled: true + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + ingressClassName: traefik + host: {{ .Env.LAKEKEEPER_HOST }} + path: "" + tls: + enabled: true + secretName: lakekeeper-tls + +# Authentication configuration +auth: + oauth2: + providerUri: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}" + audience: "lakekeeper" + authz: + backend: "allowall" + +# PostgreSQL subchart (disabled since we use external) +postgresql: + enabled: false + + +# External database configuration +externalDatabase: + type: postgres + host_read: postgres-cluster-rw.postgres + host_write: postgres-cluster-rw.postgres + port: 5432 + database: lakekeeper + user: lakekeeper + userSecret: lakekeeper-database-secret + userSecretKey: username + passwordSecret: lakekeeper-database-secret + passwordSecretKey: password