feat(lakekeeper): add Lakekeeper

This commit is contained in:
Masaki Yatsu
2025-09-19 03:13:18 +09:00
parent b559fe67a2
commit f9bb6148cc
6 changed files with 373 additions and 0 deletions

View File

@@ -16,6 +16,7 @@ mod external-secrets
mod keycloak
mod jupyterhub
mod k8s
mod lakekeeper
mod longhorn
mod metabase
mod minio

3
lakekeeper/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
lakekeeper-database-external-secret.yaml
lakekeeper-encryption-external-secret.yaml
lakekeeper-values.yaml

212
lakekeeper/justfile Normal file
View File

@@ -0,0 +1,212 @@
set fallback := true
export LAKEKEEPER_NAMESPACE := env("LAKEKEEPER_NAMESPACE", "lakekeeper")
export LAKEKEEPER_CHART_VERSION := env("LAKEKEEPER_CHART_VERSION", "0.7.1")
export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets")
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
[private]
default:
@just --list --unsorted --list-submodules
# Add Helm repository
add-helm-repo:
helm repo add lakekeeper https://lakekeeper.github.io/lakekeeper-charts/
helm repo update
# Remove Helm repository
remove-helm-repo:
helm repo remove lakekeeper
# Create namespace
create-namespace:
@kubectl get namespace ${LAKEKEEPER_NAMESPACE} &>/dev/null || \
kubectl create namespace ${LAKEKEEPER_NAMESPACE}
# Setup database for Lakekeeper
setup-database:
#!/bin/bash
set -euo pipefail
echo "Setting up Lakekeeper database..."
if just postgres::db-exists lakekeeper &>/dev/null; then
echo "Database 'lakekeeper' already exists."
else
echo "Creating new database 'lakekeeper'..."
just postgres::create-db lakekeeper
fi
# Generate password for user creation/update
if just postgres::user-exists lakekeeper &>/dev/null; then
echo "User 'lakekeeper' already exists."
# Check if we can get existing password from Vault/Secret
if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then
# Try to get existing password from Vault
if DB_PASSWORD=$(just vault::get lakekeeper/database password 2>/dev/null); then
echo "Using existing password from Vault."
else
echo "Generating new password and updating Vault..."
DB_PASSWORD=$(just utils::random-password)
just postgres::change-password lakekeeper "$DB_PASSWORD"
fi
else
# For direct Secret approach, generate new password
echo "Generating new password for existing user..."
DB_PASSWORD=$(just utils::random-password)
just postgres::change-password lakekeeper "$DB_PASSWORD"
fi
else
echo "Creating new user 'lakekeeper'..."
DB_PASSWORD=$(just utils::random-password)
just postgres::create-user lakekeeper "$DB_PASSWORD"
fi
echo "Ensuring database permissions..."
just postgres::grant lakekeeper lakekeeper
if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then
echo "External Secrets available. Storing credentials in Vault and creating ExternalSecret..."
just vault::put lakekeeper/database \
username=lakekeeper \
password="$DB_PASSWORD" \
host=postgres-cluster-rw.postgres \
port=5432 \
database=lakekeeper
gomplate -f lakekeeper-database-external-secret.gomplate.yaml -o lakekeeper-database-external-secret.yaml
kubectl apply -f lakekeeper-database-external-secret.yaml
echo "Waiting for database secret to be ready..."
kubectl wait --for=condition=Ready externalsecret/lakekeeper-database-external-secret \
-n ${LAKEKEEPER_NAMESPACE} --timeout=60s
else
echo "External Secrets not available. Creating Kubernetes Secret directly..."
kubectl delete secret lakekeeper-database-secret -n ${LAKEKEEPER_NAMESPACE} --ignore-not-found
kubectl create secret generic lakekeeper-database-secret -n ${LAKEKEEPER_NAMESPACE} \
--from-literal=username=lakekeeper \
--from-literal=password="$DB_PASSWORD" \
--from-literal=host=postgres-cluster-rw.postgres \
--from-literal=port=5432 \
--from-literal=database=lakekeeper
echo "Database secret created directly in Kubernetes"
fi
echo "Database setup completed."
# Delete database secret
delete-database-secret:
@kubectl delete secret lakekeeper-database-secret -n ${LAKEKEEPER_NAMESPACE} --ignore-not-found
@kubectl delete externalsecret lakekeeper-database-external-secret -n ${LAKEKEEPER_NAMESPACE} --ignore-not-found
# Create OIDC client in Keycloak for Lakekeeper authentication
create-oidc-client:
#!/bin/bash
set -euo pipefail
if [ -z "${LAKEKEEPER_HOST:-}" ]; then
echo "Error: LAKEKEEPER_HOST environment variable is required"
exit 1
fi
echo "Creating Lakekeeper OAuth client in Keycloak..."
# Check if client already exists
if just keycloak::client-exists ${KEYCLOAK_REALM} lakekeeper &>/dev/null; then
echo "Client 'lakekeeper' already exists, skipping creation..."
echo "Existing client will preserve roles and mappers"
else
echo "Creating new public client for PKCE flow..."
# Create public client (no client secret) for PKCE flow
just keycloak::create-client \
${KEYCLOAK_REALM} \
lakekeeper \
"https://${LAKEKEEPER_HOST}/ui/callback"
fi
# Add audience mapper to include 'lakekeeper' in JWT audience
echo "Adding audience mapper for JWT token..."
just keycloak::add-audience-mapper lakekeeper lakekeeper
echo "OAuth client configured successfully for PKCE authentication"
# Delete OIDC client (for cleanup purposes)
delete-oidc-client:
@just keycloak::delete-client ${KEYCLOAK_REALM} lakekeeper
# Install Lakekeeper
install:
#!/bin/bash
set -euo pipefail
export LAKEKEEPER_HOST=${LAKEKEEPER_HOST:-}
while [ -z "${LAKEKEEPER_HOST}" ]; do
LAKEKEEPER_HOST=$(
gum input --prompt="Lakekeeper host (FQDN): " --width=100 \
--placeholder="e.g., lakekeeper.example.com"
)
done
echo "Installing Lakekeeper..."
just create-namespace
just setup-database
just create-oidc-client
just add-helm-repo
# Helm chart will automatically create the encryption key secret
gomplate -f lakekeeper-values.gomplate.yaml -o lakekeeper-values.yaml
# Use --wait=false to avoid circular dependency:
# Helm waits for post-install hooks (migration job) to complete,
# but migration job can't start until Helm deployment finishes
helm upgrade --install lakekeeper lakekeeper/lakekeeper \
--version ${LAKEKEEPER_CHART_VERSION} -n ${LAKEKEEPER_NAMESPACE} \
--timeout=10m --wait=false \
-f lakekeeper-values.yaml
echo "Waiting for database migration to complete..."
kubectl wait --for=condition=complete job/lakekeeper-db-migration-1 \
-n ${LAKEKEEPER_NAMESPACE} --timeout=300s
echo "Waiting for Lakekeeper deployment to be ready..."
kubectl wait --for=condition=available deployment/lakekeeper \
-n ${LAKEKEEPER_NAMESPACE} --timeout=300s
echo "Lakekeeper installation completed"
echo "Access Lakekeeper at: https://${LAKEKEEPER_HOST}"
# Uninstall Lakekeeper
uninstall delete-db='true':
#!/bin/bash
set -euo pipefail
echo "Uninstalling Lakekeeper..."
helm uninstall lakekeeper -n ${LAKEKEEPER_NAMESPACE} --ignore-not-found
# Force delete stuck resources
echo "Checking for stuck resources..."
kubectl delete all --all -n ${LAKEKEEPER_NAMESPACE} --force --grace-period=0 2>/dev/null || true
# Delete PVCs
PVCS=$(kubectl get pvc -n ${LAKEKEEPER_NAMESPACE} -o name 2>/dev/null || true)
if [ -n "$PVCS" ]; then
echo "Deleting PersistentVolumeClaims..."
kubectl delete pvc --all -n ${LAKEKEEPER_NAMESPACE} --force --grace-period=0 2>/dev/null || true
fi
just delete-database-secret
just delete-oidc-client
if [ "{{ delete-db }}" = "true" ]; then
just postgres::delete-db lakekeeper
fi
# Clean up Keycloak client
just keycloak::delete-client ${KEYCLOAK_REALM} lakekeeper || true
echo "Lakekeeper uninstalled"
# Clean up database and secrets
cleanup:
#!/bin/bash
set -euo pipefail
echo "This will delete the Lakekeeper database and all secrets."
if gum confirm "Are you sure you want to proceed?"; then
echo "Cleaning up Lakekeeper resources..."
just postgres::delete-db lakekeeper || true
just vault::delete lakekeeper/database || true
just vault::delete lakekeeper/oauth || true
just keycloak::delete-client ${KEYCLOAK_REALM} lakekeeper || true
echo "Cleanup completed"
else
echo "Cleanup cancelled"
fi

View File

@@ -0,0 +1,42 @@
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: lakekeeper-database-external-secret
namespace: {{ .Env.LAKEKEEPER_NAMESPACE }}
spec:
refreshInterval: 1h
secretStoreRef:
name: vault-secret-store
kind: ClusterSecretStore
target:
name: lakekeeper-database-secret
creationPolicy: Owner
template:
type: Opaque
data:
username: "{{ `{{ .username }}` }}"
password: "{{ `{{ .password }}` }}"
host: "{{ `{{ .host }}` }}"
port: "{{ `{{ .port }}` }}"
database: "{{ `{{ .database }}` }}"
data:
- secretKey: username
remoteRef:
key: lakekeeper/database
property: username
- secretKey: password
remoteRef:
key: lakekeeper/database
property: password
- secretKey: host
remoteRef:
key: lakekeeper/database
property: host
- secretKey: port
remoteRef:
key: lakekeeper/database
property: port
- secretKey: database
remoteRef:
key: lakekeeper/database
property: database

View File

@@ -0,0 +1,22 @@
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: lakekeeper-encryption-external-secret
namespace: {{ .Env.LAKEKEEPER_NAMESPACE }}
spec:
refreshInterval: 1h
secretStoreRef:
name: vault-secret-store
kind: ClusterSecretStore
target:
name: lakekeeper-encryption-key
creationPolicy: Owner
template:
type: Opaque
data:
encryption-key: "{{ `{{ .encryption_key }}` }}"
data:
- secretKey: encryption_key
remoteRef:
key: lakekeeper/encryption
property: encryption-key

View File

@@ -0,0 +1,93 @@
catalog:
image:
repository: quay.io/lakekeeper/catalog
tag: v0.9.3
pullPolicy: IfNotPresent
replicaCount: 1
# Configuration using the correct structure
config:
LAKEKEEPER__OPENID_PROVIDER_URI: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}"
LAKEKEEPER__OPENID_AUDIENCE: "lakekeeper"
LAKEKEEPER__UI__OPENID_CLIENT_ID: "lakekeeper"
LAKEKEEPER__UI__OPENID_SCOPE: "openid profile email"
# Secret management configuration
secrets:
backend: "postgres"
# Service configuration
service:
type: ClusterIP
port: 8080
# Health probes
probes:
liveness:
enabled: true
httpGet:
path: /health
port: http
initialDelaySeconds: 30
periodSeconds: 10
readiness:
enabled: true
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 5
# Resource limits
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 100m
memory: 128Mi
# Database migration configuration
dbMigrations:
enabled: true
# Ingress configuration
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: traefik
traefik.ingress.kubernetes.io/router.entrypoints: websecure
ingressClassName: traefik
host: {{ .Env.LAKEKEEPER_HOST }}
path: ""
tls:
enabled: true
secretName: lakekeeper-tls
# Authentication configuration
auth:
oauth2:
providerUri: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}"
audience: "lakekeeper"
authz:
backend: "allowall"
# PostgreSQL subchart (disabled since we use external)
postgresql:
enabled: false
# External database configuration
externalDatabase:
type: postgres
host_read: postgres-cluster-rw.postgres
host_write: postgres-cluster-rw.postgres
port: 5432
database: lakekeeper
user: lakekeeper
userSecret: lakekeeper-database-secret
userSecretKey: username
passwordSecret: lakekeeper-database-secret
passwordSecretKey: password