From cf2d94dfdd59d9de41efde92d211673ce0f666cb Mon Sep 17 00:00:00 2001 From: Masaki Yatsu Date: Mon, 3 Nov 2025 11:04:49 +0900 Subject: [PATCH] feat(trino): use vended credentials --- trino/README.md | 28 +++++++++++++----- trino/justfile | 50 +++++++++++++++----------------- trino/trino-values.gomplate.yaml | 18 ++---------- 3 files changed, 46 insertions(+), 50 deletions(-) diff --git a/trino/README.md b/trino/README.md index c21b878..00e4672 100644 --- a/trino/README.md +++ b/trino/README.md @@ -93,7 +93,7 @@ Trino has a three-layer memory architecture that must be properly configured: #### Memory Relationship -``` +```plain Kubernetes Memory (e.g., 1500Mi) └─ JVM Heap (e.g., 1500M, ~100%) └─ Query Memory (1GB) + Heap Headroom (~365MB) @@ -144,7 +144,7 @@ TRINO_WORKER_JVM_HEAP=4G **Error: Invalid memory configuration** -``` +```plain IllegalArgumentException: Invalid memory configuration. The sum of max query memory per node (1073741824) and heap headroom (382520525) cannot be larger than the available heap memory (1275068416) @@ -159,7 +159,7 @@ cannot be larger than the available heap memory (1275068416) **Error: Pod stuck in Pending state** -``` +```plain Warning FailedScheduling 0/1 nodes are available: 1 Insufficient memory. ``` @@ -367,13 +367,15 @@ Queries Iceberg tables via Lakekeeper REST Catalog: - **Storage**: MinIO S3-compatible object storage - **REST Catalog**: Lakekeeper (Apache Iceberg REST Catalog implementation) - **Authentication**: OAuth2 client credentials flow with Keycloak +- **S3 Credentials**: Always uses vended credentials (STS) #### How It Works 1. Trino authenticates to Lakekeeper using OAuth2 (client credentials flow) 2. Lakekeeper provides Iceberg table metadata from its catalog -3. Trino reads actual data files directly from MinIO using static S3 credentials -4. Vended credentials are disabled; Trino uses pre-configured MinIO access keys +3. Lakekeeper also provides temporary S3 credentials (STS tokens) with each request +4. Trino uses these temporary credentials to read data files directly from MinIO +5. Credentials automatically expire and are refreshed as needed #### Configuration @@ -384,7 +386,19 @@ The following settings are automatically configured when enabling the Iceberg ca - `lakekeeper` scope added to Trino client as default scope - Audience mapper in `lakekeeper` scope adds `aud: lakekeeper` to JWT tokens - S3 file system factory enabled (`fs.native-s3.enabled=true`) -- Static MinIO credentials provided via Kubernetes secrets +- Vended credentials enabled (`iceberg.rest-catalog.vended-credentials-enabled=true`) + +**Environment Variables:** + +- `TRINO_ICEBERG_WAREHOUSE`: Warehouse name (default: `default`) + +**Benefits of Vended Credentials:** + +- No need to distribute static S3 credentials to Trino +- Automatic credential expiration and rotation via MinIO STS +- Better security through temporary tokens +- Centralized credential management through Lakekeeper +- No static credential management required #### OAuth2 Scope and Audience @@ -666,7 +680,7 @@ Data Sources: ├─ Metadata: Lakekeeper (REST Catalog) │ └─ OAuth2 → Keycloak (client credentials) └─ Data: MinIO (S3) - └─ Static credentials + └─ Vended credentials (STS) ``` ### Key Components diff --git a/trino/justfile b/trino/justfile index a262b51..b016382 100644 --- a/trino/justfile +++ b/trino/justfile @@ -17,6 +17,7 @@ export TRINO_WORKER_JVM_HEAP := env("TRINO_WORKER_JVM_HEAP", "4G") export TRINO_WORKER_COUNT := env("TRINO_WORKER_COUNT", "2") export TRINO_POSTGRES_ENABLED := env("TRINO_POSTGRES_ENABLED", "true") export TRINO_ICEBERG_ENABLED := env("TRINO_ICEBERG_ENABLED", "") +export TRINO_ICEBERG_WAREHOUSE := env("TRINO_ICEBERG_WAREHOUSE", "default") export POSTGRES_NAMESPACE := env("POSTGRES_NAMESPACE", "postgres") export MINIO_NAMESPACE := env("MINIO_NAMESPACE", "minio") export LAKEKEEPER_NAMESPACE := env("LAKEKEEPER_NAMESPACE", "lakekeeper") @@ -194,6 +195,7 @@ enable-iceberg-catalog: #!/bin/bash set -euo pipefail echo "Enabling Iceberg catalog with Lakekeeper integration..." + echo "Vended credentials: enabled (always)" if ! kubectl get service lakekeeper -n ${LAKEKEEPER_NAMESPACE} &>/dev/null; then echo "Error: Lakekeeper is not installed. Please install Lakekeeper first with 'just lakekeeper::install'" @@ -205,8 +207,6 @@ enable-iceberg-catalog: exit 1 fi - just minio::create-user trino "trino-data" - echo "Configuring Keycloak client for Lakekeeper integration..." echo "Enabling service account for Trino client..." @@ -217,29 +217,8 @@ enable-iceberg-catalog: just keycloak::add-scope-to-client ${KEYCLOAK_REALM} trino lakekeeper echo "Keycloak configuration completed" - - if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then - echo "Creating ExternalSecret for MinIO credentials..." - gomplate -f trino-minio-external-secret.gomplate.yaml -o trino-minio-external-secret.yaml - kubectl apply -f trino-minio-external-secret.yaml - echo "Waiting for MinIO secret to be ready..." - kubectl wait --for=condition=Ready externalsecret/trino-minio-external-secret \ - -n ${TRINO_NAMESPACE} --timeout=60s - else - echo "External Secrets not available. Creating Kubernetes Secret directly..." - ACCESS_KEY=trino - SECRET_KEY=$(just vault::get trino/minio secret_key 2>/dev/null || echo "") - if [ -z "$SECRET_KEY" ]; then - echo "Error: Could not retrieve MinIO credentials. Please check Vault." - exit 1 - fi - kubectl delete secret trino-minio-secret -n ${TRINO_NAMESPACE} --ignore-not-found - kubectl create secret generic trino-minio-secret -n ${TRINO_NAMESPACE} \ - --from-literal=access_key="$ACCESS_KEY" \ - --from-literal=secret_key="$SECRET_KEY" \ - --from-literal=endpoint="http://minio.${MINIO_NAMESPACE}.svc.cluster.local:9000" - echo "MinIO secret created directly in Kubernetes" - fi + echo "Vended credentials enabled. Skipping static MinIO credentials setup." + echo "Lakekeeper will provide temporary S3 credentials via STS." echo "Iceberg catalog setup completed" # Delete MinIO secret @@ -267,8 +246,12 @@ install: if [ -z "${TRINO_ICEBERG_ENABLED}" ]; then if gum confirm "Enable Iceberg catalog with Lakekeeper and MinIO?"; then - just enable-iceberg-catalog TRINO_ICEBERG_ENABLED="true" + WAREHOUSE_NAME=$(gum input --prompt="Warehouse name: " --width=100 \ + --placeholder="e.g., default" --value="default") + TRINO_ICEBERG_WAREHOUSE="${WAREHOUSE_NAME}" + + just enable-iceberg-catalog else TRINO_ICEBERG_ENABLED="false" fi @@ -278,6 +261,7 @@ install: shared_secret=$(just utils::random-password) export TRINO_SHARED_SECRET="${shared_secret}" + export TRINO_ICEBERG_WAREHOUSE="${TRINO_ICEBERG_WAREHOUSE}" gomplate -f trino-values.gomplate.yaml -o trino-values.yaml @@ -305,7 +289,7 @@ upgrade: echo "Upgrading Trino..." if [ -z "${TRINO_ICEBERG_ENABLED}" ]; then - if kubectl get secret trino-minio-secret -n ${TRINO_NAMESPACE} &>/dev/null; then + if kubectl get configmap trino-catalogs -n ${TRINO_NAMESPACE} -o jsonpath='{.data.iceberg\.properties}' &>/dev/null; then TRINO_ICEBERG_ENABLED="true" echo "Iceberg catalog: enabled" else @@ -314,6 +298,17 @@ upgrade: fi fi + if [ "${TRINO_ICEBERG_ENABLED}" = "true" ]; then + # Extract warehouse name from existing catalog config + if [ -z "${TRINO_ICEBERG_WAREHOUSE}" ]; then + WAREHOUSE=$(kubectl get configmap trino-catalogs -n ${TRINO_NAMESPACE} \ + -o jsonpath='{.data.iceberg\.properties}' 2>/dev/null | \ + grep "iceberg.rest-catalog.warehouse=" | cut -d'=' -f2 || echo "default") + TRINO_ICEBERG_WAREHOUSE="${WAREHOUSE}" + echo "Warehouse: ${TRINO_ICEBERG_WAREHOUSE}" + fi + fi + shared_secret=$( kubectl get configmap trino-coordinator -n ${TRINO_NAMESPACE} \ -o jsonpath='{.data.config\.properties}' 2>/dev/null | @@ -324,6 +319,7 @@ upgrade: exit 1 fi export TRINO_SHARED_SECRET="${shared_secret}" + export TRINO_ICEBERG_WAREHOUSE="${TRINO_ICEBERG_WAREHOUSE}" gomplate -f trino-values.gomplate.yaml -o trino-values.yaml diff --git a/trino/trino-values.gomplate.yaml b/trino/trino-values.gomplate.yaml index 8702913..0455036 100644 --- a/trino/trino-values.gomplate.yaml +++ b/trino/trino-values.gomplate.yaml @@ -97,20 +97,18 @@ catalogs: connector.name=iceberg iceberg.catalog.type=rest iceberg.rest-catalog.uri=http://lakekeeper.lakekeeper.svc.cluster.local:8181/catalog - iceberg.rest-catalog.warehouse=default + iceberg.rest-catalog.warehouse={{ .Env.TRINO_ICEBERG_WAREHOUSE }} iceberg.rest-catalog.security=OAUTH2 iceberg.rest-catalog.oauth2.credential=${ENV:OAUTH2_CLIENT_ID}:${ENV:OAUTH2_CLIENT_SECRET} iceberg.rest-catalog.oauth2.server-uri=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token iceberg.rest-catalog.oauth2.scope=openid profile lakekeeper iceberg.rest-catalog.nested-namespace-enabled=true - iceberg.rest-catalog.vended-credentials-enabled=false + iceberg.rest-catalog.vended-credentials-enabled=true iceberg.unique-table-location=true fs.native-s3.enabled=true s3.region=us-east-1 s3.endpoint=http://minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000 s3.path-style-access=true - s3.aws-access-key=${ENV:MINIO_ACCESS_KEY} - s3.aws-secret-key=${ENV:MINIO_SECRET_KEY} {{- end }} {{- else }} catalogs: @@ -312,17 +310,5 @@ env: name: trino-postgres-secret key: password {{- end }} -{{- if .Env.TRINO_ICEBERG_ENABLED }} - - name: MINIO_ACCESS_KEY - valueFrom: - secretKeyRef: - name: trino-minio-secret - key: access_key - - name: MINIO_SECRET_KEY - valueFrom: - secretKeyRef: - name: trino-minio-secret - key: secret_key -{{- end }} envFrom: []