feat(trino): Lakekeeper integration
This commit is contained in:
@@ -161,6 +161,14 @@ just vault::get secret/postgres/superuser password
|
|||||||
|
|
||||||
5. **Cleanup Operations**: Most modules provide cleanup recipes (e.g., `just keycloak::delete-user`) with confirmation prompts.
|
5. **Cleanup Operations**: Most modules provide cleanup recipes (e.g., `just keycloak::delete-user`) with confirmation prompts.
|
||||||
|
|
||||||
|
6. **Trino and Lakekeeper Integration**: When setting up Trino with Lakekeeper (Iceberg REST Catalog):
|
||||||
|
- The Keycloak client MUST have service accounts enabled for OAuth2 client credentials flow
|
||||||
|
- The `lakekeeper` client scope MUST be added to the Trino client
|
||||||
|
- An audience mapper MUST be configured to set `aud: lakekeeper` in JWT tokens
|
||||||
|
- Trino REQUIRES `fs.native-s3.enabled=true` to handle `s3://` URIs, regardless of vended credentials
|
||||||
|
- When `vended-credentials-enabled=false`, static S3 credentials must be provided via environment variables
|
||||||
|
- All these configurations are automatically applied by `just trino::install` when MinIO storage is enabled
|
||||||
|
|
||||||
## Testing and Validation
|
## Testing and Validation
|
||||||
|
|
||||||
After setup, validate the stack:
|
After setup, validate the stack:
|
||||||
|
|||||||
@@ -42,6 +42,9 @@ You will be prompted for:
|
|||||||
- Password authentication for JDBC access
|
- Password authentication for JDBC access
|
||||||
- PostgreSQL catalog (if selected)
|
- PostgreSQL catalog (if selected)
|
||||||
- Iceberg catalog with Lakekeeper (if MinIO selected)
|
- Iceberg catalog with Lakekeeper (if MinIO selected)
|
||||||
|
- Keycloak service account enabled for OAuth2 client credentials flow
|
||||||
|
- `lakekeeper` client scope added
|
||||||
|
- `lakekeeper` audience mapper configured
|
||||||
- TPCH catalog with sample data
|
- TPCH catalog with sample data
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
@@ -90,10 +93,16 @@ Returns the password for username `admin`.
|
|||||||
Port: 443
|
Port: 443
|
||||||
Username: admin
|
Username: admin
|
||||||
Password: [from just trino::admin-password]
|
Password: [from just trino::admin-password]
|
||||||
Catalog: postgresql
|
Catalog: postgresql (or iceberg for Iceberg tables)
|
||||||
SSL: Yes
|
SSL: Yes
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Catalog Selection**:
|
||||||
|
|
||||||
|
- Use `postgresql` to query PostgreSQL database tables
|
||||||
|
- Use `iceberg` to query Iceberg tables via Lakekeeper
|
||||||
|
- You can create multiple Metabase connections, one for each catalog
|
||||||
|
|
||||||
**Note**: Do NOT use internal Kubernetes hostnames like `trino.trino.svc.cluster.local` as they do not have valid TLS certificates for password authentication.
|
**Note**: Do NOT use internal Kubernetes hostnames like `trino.trino.svc.cluster.local` as they do not have valid TLS certificates for password authentication.
|
||||||
|
|
||||||
### Example Queries
|
### Example Queries
|
||||||
@@ -110,6 +119,19 @@ SELECT * FROM tpch.tiny.customer LIMIT 10;
|
|||||||
SELECT * FROM postgresql.public.pg_tables;
|
SELECT * FROM postgresql.public.pg_tables;
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Query Iceberg tables:**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Show schemas in Iceberg catalog
|
||||||
|
SHOW SCHEMAS FROM iceberg;
|
||||||
|
|
||||||
|
-- Show tables in a namespace
|
||||||
|
SHOW TABLES FROM iceberg.ecommerce;
|
||||||
|
|
||||||
|
-- Query Iceberg table
|
||||||
|
SELECT * FROM iceberg.ecommerce.products LIMIT 10;
|
||||||
|
```
|
||||||
|
|
||||||
**Show all catalogs:**
|
**Show all catalogs:**
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
@@ -120,6 +142,7 @@ SHOW CATALOGS;
|
|||||||
|
|
||||||
```sql
|
```sql
|
||||||
SHOW SCHEMAS FROM postgresql;
|
SHOW SCHEMAS FROM postgresql;
|
||||||
|
SHOW SCHEMAS FROM iceberg;
|
||||||
```
|
```
|
||||||
|
|
||||||
## Catalogs
|
## Catalogs
|
||||||
@@ -143,10 +166,52 @@ Queries your CloudNativePG cluster:
|
|||||||
|
|
||||||
### Iceberg (Optional)
|
### Iceberg (Optional)
|
||||||
|
|
||||||
Queries Iceberg tables via Lakekeeper:
|
Queries Iceberg tables via Lakekeeper REST Catalog:
|
||||||
|
|
||||||
- Catalog: `iceberg`
|
- **Catalog**: `iceberg`
|
||||||
- Storage: MinIO S3-compatible
|
- **Storage**: MinIO S3-compatible object storage
|
||||||
|
- **REST Catalog**: Lakekeeper (Apache Iceberg REST Catalog implementation)
|
||||||
|
- **Authentication**: OAuth2 client credentials flow with Keycloak
|
||||||
|
|
||||||
|
**How It Works**:
|
||||||
|
|
||||||
|
1. Trino authenticates to Lakekeeper using OAuth2 (client credentials flow)
|
||||||
|
2. Lakekeeper provides Iceberg table metadata from its catalog
|
||||||
|
3. Trino reads actual data files directly from MinIO using static S3 credentials
|
||||||
|
4. Vended credentials are disabled; Trino uses pre-configured MinIO access keys
|
||||||
|
|
||||||
|
**Configuration**:
|
||||||
|
|
||||||
|
The following settings are automatically configured during installation when MinIO storage is enabled:
|
||||||
|
|
||||||
|
- Service account enabled on Trino Keycloak client
|
||||||
|
- `lakekeeper` client scope added to Trino client
|
||||||
|
- Audience mapper configured to include `aud: lakekeeper` in JWT tokens
|
||||||
|
- S3 file system factory enabled (`fs.native-s3.enabled=true`)
|
||||||
|
- Static MinIO credentials provided via Kubernetes secrets
|
||||||
|
|
||||||
|
**Example Usage**:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- List all namespaces (schemas)
|
||||||
|
SHOW SCHEMAS FROM iceberg;
|
||||||
|
|
||||||
|
-- Create a namespace
|
||||||
|
CREATE SCHEMA iceberg.analytics;
|
||||||
|
|
||||||
|
-- List tables in a namespace
|
||||||
|
SHOW TABLES FROM iceberg.ecommerce;
|
||||||
|
|
||||||
|
-- Query table
|
||||||
|
SELECT * FROM iceberg.ecommerce.products LIMIT 10;
|
||||||
|
|
||||||
|
-- Create table
|
||||||
|
CREATE TABLE iceberg.analytics.sales (
|
||||||
|
date DATE,
|
||||||
|
product VARCHAR,
|
||||||
|
amount DECIMAL(10,2)
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
## Management
|
## Management
|
||||||
|
|
||||||
@@ -204,13 +269,20 @@ Cloudflare Tunnel (HTTPS)
|
|||||||
Traefik Ingress
|
Traefik Ingress
|
||||||
↓
|
↓
|
||||||
Trino Coordinator (HTTP:8080)
|
Trino Coordinator (HTTP:8080)
|
||||||
|
├─ OAuth2 → Keycloak (Web UI auth)
|
||||||
|
└─ Password file (JDBC auth)
|
||||||
↓
|
↓
|
||||||
Trino Workers (HTTP:8080)
|
Trino Workers (HTTP:8080)
|
||||||
↓
|
↓
|
||||||
Data Sources:
|
Data Sources:
|
||||||
- PostgreSQL (CloudNativePG)
|
- PostgreSQL (CloudNativePG)
|
||||||
- MinIO (S3)
|
└─ Direct SQL connection
|
||||||
- Iceberg (Lakekeeper)
|
|
||||||
|
- Iceberg Tables
|
||||||
|
├─ Metadata: Lakekeeper (REST Catalog)
|
||||||
|
│ └─ OAuth2 → Keycloak (client credentials)
|
||||||
|
└─ Data: MinIO (S3)
|
||||||
|
└─ Static credentials
|
||||||
```
|
```
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
@@ -245,9 +317,11 @@ kubectl exec -n trino deployment/trino-coordinator -- \
|
|||||||
|
|
||||||
#### Metabase Sync Fails
|
#### Metabase Sync Fails
|
||||||
|
|
||||||
- Ensure catalog is specified in connection settings (e.g., `postgresql`)
|
- Ensure catalog is specified in connection settings (e.g., `postgresql` or `iceberg`)
|
||||||
|
- For Iceberg catalog, verify Lakekeeper is running: `kubectl get pods -n lakekeeper`
|
||||||
- Check Trino coordinator logs for errors
|
- Check Trino coordinator logs for errors
|
||||||
- Verify PostgreSQL/Iceberg connectivity
|
- Verify PostgreSQL/Iceberg connectivity
|
||||||
|
- For Iceberg issues, check OAuth2 token: Service account should be enabled on Trino client
|
||||||
|
|
||||||
#### OAuth2 Login Fails
|
#### OAuth2 Login Fails
|
||||||
|
|
||||||
@@ -269,3 +343,5 @@ kubectl exec -n trino deployment/trino-coordinator -- \
|
|||||||
- [Password Authentication](https://trino.io/docs/current/security/password-file.html)
|
- [Password Authentication](https://trino.io/docs/current/security/password-file.html)
|
||||||
- [PostgreSQL Connector](https://trino.io/docs/current/connector/postgresql.html)
|
- [PostgreSQL Connector](https://trino.io/docs/current/connector/postgresql.html)
|
||||||
- [Iceberg Connector](https://trino.io/docs/current/connector/iceberg.html)
|
- [Iceberg Connector](https://trino.io/docs/current/connector/iceberg.html)
|
||||||
|
- [Lakekeeper (Iceberg REST Catalog)](https://lakekeeper.io/)
|
||||||
|
- [Apache Iceberg](https://iceberg.apache.org/)
|
||||||
|
|||||||
@@ -200,6 +200,19 @@ setup-minio-storage:
|
|||||||
|
|
||||||
just minio::create-user trino "trino-data"
|
just minio::create-user trino "trino-data"
|
||||||
|
|
||||||
|
echo "Configuring Keycloak client for Lakekeeper integration..."
|
||||||
|
|
||||||
|
echo "Enabling service account for Trino client..."
|
||||||
|
just keycloak::enable-service-account ${KEYCLOAK_REALM} trino
|
||||||
|
|
||||||
|
echo "Adding lakekeeper scope to Trino client..."
|
||||||
|
just keycloak::add-scope-to-client ${KEYCLOAK_REALM} trino lakekeeper
|
||||||
|
|
||||||
|
echo "Adding lakekeeper audience mapper to Trino client..."
|
||||||
|
just keycloak::add-audience-mapper trino lakekeeper
|
||||||
|
|
||||||
|
echo "Keycloak configuration completed"
|
||||||
|
|
||||||
if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then
|
if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then
|
||||||
echo "Creating ExternalSecret for MinIO credentials..."
|
echo "Creating ExternalSecret for MinIO credentials..."
|
||||||
gomplate -f trino-minio-external-secret.gomplate.yaml -o trino-minio-external-secret.yaml
|
gomplate -f trino-minio-external-secret.gomplate.yaml -o trino-minio-external-secret.yaml
|
||||||
|
|||||||
@@ -75,15 +75,21 @@ catalogs:
|
|||||||
iceberg: |
|
iceberg: |
|
||||||
connector.name=iceberg
|
connector.name=iceberg
|
||||||
iceberg.catalog.type=rest
|
iceberg.catalog.type=rest
|
||||||
iceberg.rest-catalog.uri=http://lakekeeper.lakekeeper.svc.cluster.local:8181
|
iceberg.rest-catalog.uri=http://lakekeeper.lakekeeper.svc.cluster.local:8181/catalog
|
||||||
iceberg.rest-catalog.warehouse=default
|
iceberg.rest-catalog.warehouse=default
|
||||||
|
iceberg.rest-catalog.security=OAUTH2
|
||||||
|
iceberg.rest-catalog.oauth2.credential=${ENV:OAUTH2_CLIENT_ID}:${ENV:OAUTH2_CLIENT_SECRET}
|
||||||
|
iceberg.rest-catalog.oauth2.server-uri=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token
|
||||||
|
iceberg.rest-catalog.oauth2.scope=openid profile lakekeeper
|
||||||
iceberg.rest-catalog.nested-namespace-enabled=true
|
iceberg.rest-catalog.nested-namespace-enabled=true
|
||||||
iceberg.rest-catalog.vended-credentials-enabled=true
|
iceberg.rest-catalog.vended-credentials-enabled=false
|
||||||
iceberg.unique-table-location=true
|
iceberg.unique-table-location=true
|
||||||
fs.native-s3.enabled=true
|
fs.native-s3.enabled=true
|
||||||
s3.region=us-east-1
|
s3.region=us-east-1
|
||||||
s3.endpoint=http://minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000
|
s3.endpoint=http://minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000
|
||||||
s3.path-style-access=true
|
s3.path-style-access=true
|
||||||
|
s3.aws-access-key=${ENV:MINIO_ACCESS_KEY}
|
||||||
|
s3.aws-secret-key=${ENV:MINIO_SECRET_KEY}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- else }}
|
{{- else }}
|
||||||
catalogs:
|
catalogs:
|
||||||
@@ -283,5 +289,17 @@ env:
|
|||||||
name: trino-postgres-secret
|
name: trino-postgres-secret
|
||||||
key: password
|
key: password
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
{{- if .Env.TRINO_MINIO_ENABLED }}
|
||||||
|
- name: MINIO_ACCESS_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: trino-minio-secret
|
||||||
|
key: access_key
|
||||||
|
- name: MINIO_SECRET_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: trino-minio-secret
|
||||||
|
key: secret_key
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
envFrom: []
|
envFrom: []
|
||||||
|
|||||||
Reference in New Issue
Block a user