feat(trino): use vended credentials

This commit is contained in:
Masaki Yatsu
2025-11-03 11:04:49 +09:00
parent 17d450c17a
commit cf2d94dfdd
3 changed files with 46 additions and 50 deletions

View File

@@ -93,7 +93,7 @@ Trino has a three-layer memory architecture that must be properly configured:
#### Memory Relationship #### Memory Relationship
``` ```plain
Kubernetes Memory (e.g., 1500Mi) Kubernetes Memory (e.g., 1500Mi)
└─ JVM Heap (e.g., 1500M, ~100%) └─ JVM Heap (e.g., 1500M, ~100%)
└─ Query Memory (1GB) + Heap Headroom (~365MB) └─ Query Memory (1GB) + Heap Headroom (~365MB)
@@ -144,7 +144,7 @@ TRINO_WORKER_JVM_HEAP=4G
**Error: Invalid memory configuration** **Error: Invalid memory configuration**
``` ```plain
IllegalArgumentException: Invalid memory configuration. IllegalArgumentException: Invalid memory configuration.
The sum of max query memory per node (1073741824) and heap headroom (382520525) The sum of max query memory per node (1073741824) and heap headroom (382520525)
cannot be larger than the available heap memory (1275068416) cannot be larger than the available heap memory (1275068416)
@@ -159,7 +159,7 @@ cannot be larger than the available heap memory (1275068416)
**Error: Pod stuck in Pending state** **Error: Pod stuck in Pending state**
``` ```plain
Warning FailedScheduling 0/1 nodes are available: 1 Insufficient memory. Warning FailedScheduling 0/1 nodes are available: 1 Insufficient memory.
``` ```
@@ -367,13 +367,15 @@ Queries Iceberg tables via Lakekeeper REST Catalog:
- **Storage**: MinIO S3-compatible object storage - **Storage**: MinIO S3-compatible object storage
- **REST Catalog**: Lakekeeper (Apache Iceberg REST Catalog implementation) - **REST Catalog**: Lakekeeper (Apache Iceberg REST Catalog implementation)
- **Authentication**: OAuth2 client credentials flow with Keycloak - **Authentication**: OAuth2 client credentials flow with Keycloak
- **S3 Credentials**: Always uses vended credentials (STS)
#### How It Works #### How It Works
1. Trino authenticates to Lakekeeper using OAuth2 (client credentials flow) 1. Trino authenticates to Lakekeeper using OAuth2 (client credentials flow)
2. Lakekeeper provides Iceberg table metadata from its catalog 2. Lakekeeper provides Iceberg table metadata from its catalog
3. Trino reads actual data files directly from MinIO using static S3 credentials 3. Lakekeeper also provides temporary S3 credentials (STS tokens) with each request
4. Vended credentials are disabled; Trino uses pre-configured MinIO access keys 4. Trino uses these temporary credentials to read data files directly from MinIO
5. Credentials automatically expire and are refreshed as needed
#### Configuration #### Configuration
@@ -384,7 +386,19 @@ The following settings are automatically configured when enabling the Iceberg ca
- `lakekeeper` scope added to Trino client as default scope - `lakekeeper` scope added to Trino client as default scope
- Audience mapper in `lakekeeper` scope adds `aud: lakekeeper` to JWT tokens - Audience mapper in `lakekeeper` scope adds `aud: lakekeeper` to JWT tokens
- S3 file system factory enabled (`fs.native-s3.enabled=true`) - S3 file system factory enabled (`fs.native-s3.enabled=true`)
- Static MinIO credentials provided via Kubernetes secrets - Vended credentials enabled (`iceberg.rest-catalog.vended-credentials-enabled=true`)
**Environment Variables:**
- `TRINO_ICEBERG_WAREHOUSE`: Warehouse name (default: `default`)
**Benefits of Vended Credentials:**
- No need to distribute static S3 credentials to Trino
- Automatic credential expiration and rotation via MinIO STS
- Better security through temporary tokens
- Centralized credential management through Lakekeeper
- No static credential management required
#### OAuth2 Scope and Audience #### OAuth2 Scope and Audience
@@ -666,7 +680,7 @@ Data Sources:
├─ Metadata: Lakekeeper (REST Catalog) ├─ Metadata: Lakekeeper (REST Catalog)
│ └─ OAuth2 → Keycloak (client credentials) │ └─ OAuth2 → Keycloak (client credentials)
└─ Data: MinIO (S3) └─ Data: MinIO (S3)
└─ Static credentials └─ Vended credentials (STS)
``` ```
### Key Components ### Key Components

View File

@@ -17,6 +17,7 @@ export TRINO_WORKER_JVM_HEAP := env("TRINO_WORKER_JVM_HEAP", "4G")
export TRINO_WORKER_COUNT := env("TRINO_WORKER_COUNT", "2") export TRINO_WORKER_COUNT := env("TRINO_WORKER_COUNT", "2")
export TRINO_POSTGRES_ENABLED := env("TRINO_POSTGRES_ENABLED", "true") export TRINO_POSTGRES_ENABLED := env("TRINO_POSTGRES_ENABLED", "true")
export TRINO_ICEBERG_ENABLED := env("TRINO_ICEBERG_ENABLED", "") export TRINO_ICEBERG_ENABLED := env("TRINO_ICEBERG_ENABLED", "")
export TRINO_ICEBERG_WAREHOUSE := env("TRINO_ICEBERG_WAREHOUSE", "default")
export POSTGRES_NAMESPACE := env("POSTGRES_NAMESPACE", "postgres") export POSTGRES_NAMESPACE := env("POSTGRES_NAMESPACE", "postgres")
export MINIO_NAMESPACE := env("MINIO_NAMESPACE", "minio") export MINIO_NAMESPACE := env("MINIO_NAMESPACE", "minio")
export LAKEKEEPER_NAMESPACE := env("LAKEKEEPER_NAMESPACE", "lakekeeper") export LAKEKEEPER_NAMESPACE := env("LAKEKEEPER_NAMESPACE", "lakekeeper")
@@ -194,6 +195,7 @@ enable-iceberg-catalog:
#!/bin/bash #!/bin/bash
set -euo pipefail set -euo pipefail
echo "Enabling Iceberg catalog with Lakekeeper integration..." echo "Enabling Iceberg catalog with Lakekeeper integration..."
echo "Vended credentials: enabled (always)"
if ! kubectl get service lakekeeper -n ${LAKEKEEPER_NAMESPACE} &>/dev/null; then if ! kubectl get service lakekeeper -n ${LAKEKEEPER_NAMESPACE} &>/dev/null; then
echo "Error: Lakekeeper is not installed. Please install Lakekeeper first with 'just lakekeeper::install'" echo "Error: Lakekeeper is not installed. Please install Lakekeeper first with 'just lakekeeper::install'"
@@ -205,8 +207,6 @@ enable-iceberg-catalog:
exit 1 exit 1
fi fi
just minio::create-user trino "trino-data"
echo "Configuring Keycloak client for Lakekeeper integration..." echo "Configuring Keycloak client for Lakekeeper integration..."
echo "Enabling service account for Trino client..." echo "Enabling service account for Trino client..."
@@ -217,29 +217,8 @@ enable-iceberg-catalog:
just keycloak::add-scope-to-client ${KEYCLOAK_REALM} trino lakekeeper just keycloak::add-scope-to-client ${KEYCLOAK_REALM} trino lakekeeper
echo "Keycloak configuration completed" echo "Keycloak configuration completed"
echo "Vended credentials enabled. Skipping static MinIO credentials setup."
if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then echo "Lakekeeper will provide temporary S3 credentials via STS."
echo "Creating ExternalSecret for MinIO credentials..."
gomplate -f trino-minio-external-secret.gomplate.yaml -o trino-minio-external-secret.yaml
kubectl apply -f trino-minio-external-secret.yaml
echo "Waiting for MinIO secret to be ready..."
kubectl wait --for=condition=Ready externalsecret/trino-minio-external-secret \
-n ${TRINO_NAMESPACE} --timeout=60s
else
echo "External Secrets not available. Creating Kubernetes Secret directly..."
ACCESS_KEY=trino
SECRET_KEY=$(just vault::get trino/minio secret_key 2>/dev/null || echo "")
if [ -z "$SECRET_KEY" ]; then
echo "Error: Could not retrieve MinIO credentials. Please check Vault."
exit 1
fi
kubectl delete secret trino-minio-secret -n ${TRINO_NAMESPACE} --ignore-not-found
kubectl create secret generic trino-minio-secret -n ${TRINO_NAMESPACE} \
--from-literal=access_key="$ACCESS_KEY" \
--from-literal=secret_key="$SECRET_KEY" \
--from-literal=endpoint="http://minio.${MINIO_NAMESPACE}.svc.cluster.local:9000"
echo "MinIO secret created directly in Kubernetes"
fi
echo "Iceberg catalog setup completed" echo "Iceberg catalog setup completed"
# Delete MinIO secret # Delete MinIO secret
@@ -267,8 +246,12 @@ install:
if [ -z "${TRINO_ICEBERG_ENABLED}" ]; then if [ -z "${TRINO_ICEBERG_ENABLED}" ]; then
if gum confirm "Enable Iceberg catalog with Lakekeeper and MinIO?"; then if gum confirm "Enable Iceberg catalog with Lakekeeper and MinIO?"; then
just enable-iceberg-catalog
TRINO_ICEBERG_ENABLED="true" TRINO_ICEBERG_ENABLED="true"
WAREHOUSE_NAME=$(gum input --prompt="Warehouse name: " --width=100 \
--placeholder="e.g., default" --value="default")
TRINO_ICEBERG_WAREHOUSE="${WAREHOUSE_NAME}"
just enable-iceberg-catalog
else else
TRINO_ICEBERG_ENABLED="false" TRINO_ICEBERG_ENABLED="false"
fi fi
@@ -278,6 +261,7 @@ install:
shared_secret=$(just utils::random-password) shared_secret=$(just utils::random-password)
export TRINO_SHARED_SECRET="${shared_secret}" export TRINO_SHARED_SECRET="${shared_secret}"
export TRINO_ICEBERG_WAREHOUSE="${TRINO_ICEBERG_WAREHOUSE}"
gomplate -f trino-values.gomplate.yaml -o trino-values.yaml gomplate -f trino-values.gomplate.yaml -o trino-values.yaml
@@ -305,7 +289,7 @@ upgrade:
echo "Upgrading Trino..." echo "Upgrading Trino..."
if [ -z "${TRINO_ICEBERG_ENABLED}" ]; then if [ -z "${TRINO_ICEBERG_ENABLED}" ]; then
if kubectl get secret trino-minio-secret -n ${TRINO_NAMESPACE} &>/dev/null; then if kubectl get configmap trino-catalogs -n ${TRINO_NAMESPACE} -o jsonpath='{.data.iceberg\.properties}' &>/dev/null; then
TRINO_ICEBERG_ENABLED="true" TRINO_ICEBERG_ENABLED="true"
echo "Iceberg catalog: enabled" echo "Iceberg catalog: enabled"
else else
@@ -314,6 +298,17 @@ upgrade:
fi fi
fi fi
if [ "${TRINO_ICEBERG_ENABLED}" = "true" ]; then
# Extract warehouse name from existing catalog config
if [ -z "${TRINO_ICEBERG_WAREHOUSE}" ]; then
WAREHOUSE=$(kubectl get configmap trino-catalogs -n ${TRINO_NAMESPACE} \
-o jsonpath='{.data.iceberg\.properties}' 2>/dev/null | \
grep "iceberg.rest-catalog.warehouse=" | cut -d'=' -f2 || echo "default")
TRINO_ICEBERG_WAREHOUSE="${WAREHOUSE}"
echo "Warehouse: ${TRINO_ICEBERG_WAREHOUSE}"
fi
fi
shared_secret=$( shared_secret=$(
kubectl get configmap trino-coordinator -n ${TRINO_NAMESPACE} \ kubectl get configmap trino-coordinator -n ${TRINO_NAMESPACE} \
-o jsonpath='{.data.config\.properties}' 2>/dev/null | -o jsonpath='{.data.config\.properties}' 2>/dev/null |
@@ -324,6 +319,7 @@ upgrade:
exit 1 exit 1
fi fi
export TRINO_SHARED_SECRET="${shared_secret}" export TRINO_SHARED_SECRET="${shared_secret}"
export TRINO_ICEBERG_WAREHOUSE="${TRINO_ICEBERG_WAREHOUSE}"
gomplate -f trino-values.gomplate.yaml -o trino-values.yaml gomplate -f trino-values.gomplate.yaml -o trino-values.yaml

View File

@@ -97,20 +97,18 @@ catalogs:
connector.name=iceberg connector.name=iceberg
iceberg.catalog.type=rest iceberg.catalog.type=rest
iceberg.rest-catalog.uri=http://lakekeeper.lakekeeper.svc.cluster.local:8181/catalog iceberg.rest-catalog.uri=http://lakekeeper.lakekeeper.svc.cluster.local:8181/catalog
iceberg.rest-catalog.warehouse=default iceberg.rest-catalog.warehouse={{ .Env.TRINO_ICEBERG_WAREHOUSE }}
iceberg.rest-catalog.security=OAUTH2 iceberg.rest-catalog.security=OAUTH2
iceberg.rest-catalog.oauth2.credential=${ENV:OAUTH2_CLIENT_ID}:${ENV:OAUTH2_CLIENT_SECRET} iceberg.rest-catalog.oauth2.credential=${ENV:OAUTH2_CLIENT_ID}:${ENV:OAUTH2_CLIENT_SECRET}
iceberg.rest-catalog.oauth2.server-uri=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token iceberg.rest-catalog.oauth2.server-uri=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token
iceberg.rest-catalog.oauth2.scope=openid profile lakekeeper iceberg.rest-catalog.oauth2.scope=openid profile lakekeeper
iceberg.rest-catalog.nested-namespace-enabled=true iceberg.rest-catalog.nested-namespace-enabled=true
iceberg.rest-catalog.vended-credentials-enabled=false iceberg.rest-catalog.vended-credentials-enabled=true
iceberg.unique-table-location=true iceberg.unique-table-location=true
fs.native-s3.enabled=true fs.native-s3.enabled=true
s3.region=us-east-1 s3.region=us-east-1
s3.endpoint=http://minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000 s3.endpoint=http://minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000
s3.path-style-access=true s3.path-style-access=true
s3.aws-access-key=${ENV:MINIO_ACCESS_KEY}
s3.aws-secret-key=${ENV:MINIO_SECRET_KEY}
{{- end }} {{- end }}
{{- else }} {{- else }}
catalogs: catalogs:
@@ -312,17 +310,5 @@ env:
name: trino-postgres-secret name: trino-postgres-secret
key: password key: password
{{- end }} {{- end }}
{{- if .Env.TRINO_ICEBERG_ENABLED }}
- name: MINIO_ACCESS_KEY
valueFrom:
secretKeyRef:
name: trino-minio-secret
key: access_key
- name: MINIO_SECRET_KEY
valueFrom:
secretKeyRef:
name: trino-minio-secret
key: secret_key
{{- end }}
envFrom: [] envFrom: []