fix(trino): fix auth and ingress
This commit is contained in:
1
trino/.gitignore
vendored
1
trino/.gitignore
vendored
@@ -2,3 +2,4 @@ trino-minio-external-secret.yaml
|
|||||||
trino-oauth-external-secret.yaml
|
trino-oauth-external-secret.yaml
|
||||||
trino-postgres-external-secret.yaml
|
trino-postgres-external-secret.yaml
|
||||||
trino-values.yaml
|
trino-values.yaml
|
||||||
|
trino-middleware.yaml
|
||||||
|
|||||||
129
trino/README.md
129
trino/README.md
@@ -7,10 +7,12 @@ Fast distributed SQL query engine for big data analytics with Keycloak authentic
|
|||||||
This module deploys Trino using the official Helm chart with:
|
This module deploys Trino using the official Helm chart with:
|
||||||
|
|
||||||
- **Keycloak OAuth2 authentication** for Web UI access
|
- **Keycloak OAuth2 authentication** for Web UI access
|
||||||
- **Password authentication** for JDBC clients (Metabase, etc.)
|
- **Password authentication** for JDBC clients (Metabase, Querybook, etc.)
|
||||||
|
- **Access control with user impersonation** for multi-user query attribution
|
||||||
- **PostgreSQL catalog** for querying PostgreSQL databases
|
- **PostgreSQL catalog** for querying PostgreSQL databases
|
||||||
- **Iceberg catalog** with Lakekeeper (optional)
|
- **Iceberg catalog** with Lakekeeper (optional)
|
||||||
- **TPCH catalog** with sample data for testing
|
- **TPCH catalog** with sample data for testing
|
||||||
|
- **Traefik integration** with proper header forwarding
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
@@ -39,7 +41,9 @@ You will be prompted for:
|
|||||||
- Trino coordinator (1 instance)
|
- Trino coordinator (1 instance)
|
||||||
- Trino workers (2 instances by default)
|
- Trino workers (2 instances by default)
|
||||||
- OAuth2 client in Keycloak
|
- OAuth2 client in Keycloak
|
||||||
- Password authentication for JDBC access
|
- Password authentication for JDBC/Querybook access
|
||||||
|
- Traefik Middleware for X-Forwarded-* header injection
|
||||||
|
- Access control with impersonation rules (admin can impersonate any user)
|
||||||
- PostgreSQL catalog (if selected)
|
- PostgreSQL catalog (if selected)
|
||||||
- Iceberg catalog with Lakekeeper (if MinIO selected)
|
- Iceberg catalog with Lakekeeper (if MinIO selected)
|
||||||
- Keycloak service account enabled for OAuth2 client credentials flow
|
- Keycloak service account enabled for OAuth2 client credentials flow
|
||||||
@@ -47,6 +51,8 @@ You will be prompted for:
|
|||||||
- `lakekeeper` audience mapper configured
|
- `lakekeeper` audience mapper configured
|
||||||
- TPCH catalog with sample data
|
- TPCH catalog with sample data
|
||||||
|
|
||||||
|
**Note**: Trino runs HTTP-only internally. HTTPS is provided by Traefik Ingress, which handles TLS termination.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
Environment variables (set in `.env.local` or override):
|
Environment variables (set in `.env.local` or override):
|
||||||
@@ -70,6 +76,8 @@ TRINO_WORKER_COUNT=2 # Number of workers
|
|||||||
2. Click "Sign in" to authenticate with Keycloak
|
2. Click "Sign in" to authenticate with Keycloak
|
||||||
3. Execute queries in the Web UI
|
3. Execute queries in the Web UI
|
||||||
|
|
||||||
|
**Note**: OAuth2 authentication works over HTTP internally. Traefik Ingress provides HTTPS to external users, and Trino processes X-Forwarded headers to generate correct HTTPS redirect URLs for OAuth2 flow.
|
||||||
|
|
||||||
### Get Admin Password
|
### Get Admin Password
|
||||||
|
|
||||||
For JDBC/Metabase connections:
|
For JDBC/Metabase connections:
|
||||||
@@ -82,7 +90,7 @@ Returns the password for username `admin`.
|
|||||||
|
|
||||||
### Metabase Integration
|
### Metabase Integration
|
||||||
|
|
||||||
**Important**: Trino requires TLS/SSL for password authentication. You must use the external hostname (not the internal Kubernetes service name).
|
**Important**: The Python Trino client (used by Metabase) requires HTTPS when using authentication. You must use the external hostname which has TLS provided by Traefik Ingress.
|
||||||
|
|
||||||
1. In Metabase, go to Admin → Databases → Add database
|
1. In Metabase, go to Admin → Databases → Add database
|
||||||
2. Select **Database type**: Starburst
|
2. Select **Database type**: Starburst
|
||||||
@@ -103,7 +111,52 @@ Returns the password for username `admin`.
|
|||||||
- Use `iceberg` to query Iceberg tables via Lakekeeper
|
- Use `iceberg` to query Iceberg tables via Lakekeeper
|
||||||
- You can create multiple Metabase connections, one for each catalog
|
- You can create multiple Metabase connections, one for each catalog
|
||||||
|
|
||||||
**Note**: Do NOT use internal Kubernetes hostnames like `trino.trino.svc.cluster.local` as they do not have valid TLS certificates for password authentication.
|
**Note**: Do NOT use internal Kubernetes hostnames like `trino.trino.svc.cluster.local:8080`. Internal services do not have TLS, and the Python Trino client enforces HTTPS when authentication is used. Always use the external hostname with port 443.
|
||||||
|
|
||||||
|
### Querybook Integration
|
||||||
|
|
||||||
|
**Connection Configuration**:
|
||||||
|
|
||||||
|
1. In Querybook, create a new Environment and Query Engine
|
||||||
|
2. Configure the Trino connection:
|
||||||
|
|
||||||
|
```plain
|
||||||
|
Connection String: trino://your-trino-host:443?SSL=true
|
||||||
|
Username: admin
|
||||||
|
Password: [from just trino::admin-password]
|
||||||
|
Catalog: postgresql (or iceberg for Iceberg tables)
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Optional: Configure `Proxy_user_id` to enable user impersonation
|
||||||
|
|
||||||
|
**User Impersonation**:
|
||||||
|
|
||||||
|
Trino is configured with file-based access control that allows the `admin` user to impersonate any user. This enables:
|
||||||
|
|
||||||
|
- Querybook to connect as `admin` but execute queries as the logged-in Querybook user
|
||||||
|
- Proper query attribution and audit logging
|
||||||
|
- User-specific access control (when configured)
|
||||||
|
|
||||||
|
The impersonation rules are defined in `trino-values.gomplate.yaml`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"catalogs": [{"allow": "all"}],
|
||||||
|
"impersonation": [
|
||||||
|
{
|
||||||
|
"original_user": "admin",
|
||||||
|
"new_user": ".*"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Why External Hostname is Required**:
|
||||||
|
|
||||||
|
- The Python Trino client enforces HTTPS when authentication is used (client-side requirement)
|
||||||
|
- Trino runs HTTP-only internally; TLS is provided by Traefik Ingress
|
||||||
|
- Internal service names (e.g., `trino.trino.svc.cluster.local:8080`) do not have TLS termination
|
||||||
|
- Therefore, you must use the external hostname (e.g., `trino.example.com:443`) which has TLS from Traefik
|
||||||
|
|
||||||
### Example Queries
|
### Example Queries
|
||||||
|
|
||||||
@@ -253,24 +306,45 @@ Removes:
|
|||||||
- Requires valid user in the configured realm
|
- Requires valid user in the configured realm
|
||||||
- Automatic redirect to Keycloak login
|
- Automatic redirect to Keycloak login
|
||||||
|
|
||||||
### JDBC/Metabase (Password)
|
### JDBC/Metabase/Querybook (Password)
|
||||||
|
|
||||||
- Username: `admin`
|
- Username: `admin`
|
||||||
- Password: Retrieved via `just trino::admin-password`
|
- Password: Retrieved via `just trino::admin-password`
|
||||||
- Stored in Vault at `trino/password`
|
- Stored in Vault at `trino/password`
|
||||||
|
- Requires external hostname with SSL/TLS
|
||||||
|
|
||||||
|
### Access Control
|
||||||
|
|
||||||
|
Trino uses file-based system access control with the following configuration:
|
||||||
|
|
||||||
|
**Catalogs**: All users can access all catalogs
|
||||||
|
|
||||||
|
**Impersonation**: The `admin` user can impersonate any user
|
||||||
|
|
||||||
|
This configuration enables:
|
||||||
|
|
||||||
|
- **Querybook Integration**: Admin user connects and executes queries as logged-in users
|
||||||
|
- **Audit Logging**: Queries are attributed to the actual user, not the admin account
|
||||||
|
- **Future Access Control**: Can be extended to add user-specific catalog/schema restrictions
|
||||||
|
|
||||||
|
The access control rules are defined in `/etc/trino/access-control/rules.json` (automatically generated from Helm values).
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
```
|
```
|
||||||
External Users
|
External Users / Querybook
|
||||||
↓
|
↓
|
||||||
Cloudflare Tunnel (HTTPS)
|
Cloudflare Tunnel (HTTPS)
|
||||||
↓
|
↓
|
||||||
Traefik Ingress
|
Traefik Ingress (HTTPS → HTTP)
|
||||||
|
├─ TLS Termination
|
||||||
|
├─ Traefik Middleware (X-Forwarded-* headers)
|
||||||
|
└─ Backend: HTTP (port 8080)
|
||||||
↓
|
↓
|
||||||
Trino Coordinator (HTTP:8080)
|
Trino Coordinator (HTTP:8080)
|
||||||
├─ OAuth2 → Keycloak (Web UI auth)
|
├─ OAuth2 → Keycloak (Web UI auth)
|
||||||
└─ Password file (JDBC auth)
|
├─ Password file (JDBC/Querybook auth)
|
||||||
|
└─ Access Control (file-based, impersonation rules)
|
||||||
↓
|
↓
|
||||||
Trino Workers (HTTP:8080)
|
Trino Workers (HTTP:8080)
|
||||||
↓
|
↓
|
||||||
@@ -285,6 +359,14 @@ Data Sources:
|
|||||||
└─ Static credentials
|
└─ Static credentials
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Key Components**:
|
||||||
|
|
||||||
|
- **TLS Termination**: Traefik Ingress handles HTTPS, Trino runs HTTP-only internally
|
||||||
|
- **Traefik Middleware**: Injects X-Forwarded-Proto, X-Forwarded-Host, X-Forwarded-Port headers for correct URL generation
|
||||||
|
- **Single Port Configuration**: HTTP (8080) for all communication (Ingress backend, internal clients, Querybook)
|
||||||
|
- **Access Control**: File-based system with impersonation rules allowing `admin` to impersonate any user
|
||||||
|
- **Querybook Integration**: Connects via external HTTPS hostname (Traefik provides TLS), uses admin credentials with user impersonation
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
### Check Pod Status
|
### Check Pod Status
|
||||||
@@ -315,6 +397,26 @@ kubectl exec -n trino deployment/trino-coordinator -- \
|
|||||||
|
|
||||||
### Common Issues
|
### Common Issues
|
||||||
|
|
||||||
|
#### Querybook Connection Fails
|
||||||
|
|
||||||
|
- **Error: "cannot use authentication with HTTP"**
|
||||||
|
- Python Trino client requires HTTPS when using authentication (client-side enforcement)
|
||||||
|
- Trino runs HTTP-only internally; TLS is provided by Traefik Ingress at the external hostname
|
||||||
|
- Solution: Use external hostname with SSL: `trino://your-trino-host:443?SSL=true`
|
||||||
|
- Do NOT use internal service names (e.g., `trino.trino.svc.cluster.local:8080`) as they lack TLS
|
||||||
|
|
||||||
|
- **Error: "Access Denied: User admin cannot impersonate user X"**
|
||||||
|
- Access control rules may not be properly configured
|
||||||
|
- Verify rules exist: `kubectl exec -n trino deployment/trino-coordinator -- cat /etc/trino/access-control/rules.json`
|
||||||
|
- Check for impersonation section in rules
|
||||||
|
|
||||||
|
- **Error: "500 Internal Server Error"**
|
||||||
|
- Check Traefik middleware exists: `kubectl get middleware trino-headers -n trino`
|
||||||
|
- Verify Ingress annotation references correct middleware: `trino-trino-headers@kubernetescrd`
|
||||||
|
- Check Traefik logs: `kubectl logs -n kube-system -l app.kubernetes.io/name=traefik`
|
||||||
|
- Verify Ingress backend port is 8080: `kubectl get ingress trino-coordinator -n trino -o yaml | grep "number:"`
|
||||||
|
- If Ingress shows port 8443, ensure `server.config.https.enabled: false` in values (Helm chart v1.41.0 auto-selects HTTPS port when enabled)
|
||||||
|
|
||||||
#### Metabase Sync Fails
|
#### Metabase Sync Fails
|
||||||
|
|
||||||
- Ensure catalog is specified in connection settings (e.g., `postgresql` or `iceberg`)
|
- Ensure catalog is specified in connection settings (e.g., `postgresql` or `iceberg`)
|
||||||
@@ -332,8 +434,15 @@ kubectl exec -n trino deployment/trino-coordinator -- \
|
|||||||
#### Password Authentication Fails
|
#### Password Authentication Fails
|
||||||
|
|
||||||
- Retrieve current password: `just trino::admin-password`
|
- Retrieve current password: `just trino::admin-password`
|
||||||
- Ensure SSL/TLS is enabled in JDBC URL
|
- Ensure SSL/TLS is enabled in JDBC URL (use external hostname with port 443)
|
||||||
- For internal testing, HTTP is supported via `http-server.authentication.allow-insecure-over-http=true`
|
- Python Trino client requires HTTPS for authentication (client-side enforcement)
|
||||||
|
- Trino coordinator allows password auth over HTTP via `http-server.authentication.allow-insecure-over-http=true`, but clients may refuse to connect without TLS
|
||||||
|
|
||||||
|
#### URLs Contain "localhost" Instead of Actual Hostname
|
||||||
|
|
||||||
|
- Traefik middleware is not injecting X-Forwarded headers
|
||||||
|
- Verify middleware exists and is referenced in Ingress annotations
|
||||||
|
- Check `http-server.process-forwarded=true` is set in Trino configuration
|
||||||
|
|
||||||
## References
|
## References
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ export TRINO_CHART_VERSION := env("TRINO_CHART_VERSION", "1.41.0")
|
|||||||
export TRINO_IMAGE_TAG := env("TRINO_IMAGE_TAG", "477")
|
export TRINO_IMAGE_TAG := env("TRINO_IMAGE_TAG", "477")
|
||||||
export TRINO_HOST := env("TRINO_HOST", "")
|
export TRINO_HOST := env("TRINO_HOST", "")
|
||||||
export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets")
|
export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets")
|
||||||
|
export K8S_VAULT_NAMESPACE := env("K8S_VAULT_NAMESPACE", "vault")
|
||||||
export KEYCLOAK_HOST := env("KEYCLOAK_HOST", "")
|
export KEYCLOAK_HOST := env("KEYCLOAK_HOST", "")
|
||||||
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
|
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
|
||||||
export TRINO_COORDINATOR_MEMORY := env("TRINO_COORDINATOR_MEMORY", "4Gi")
|
export TRINO_COORDINATOR_MEMORY := env("TRINO_COORDINATOR_MEMORY", "4Gi")
|
||||||
@@ -276,6 +277,10 @@ install:
|
|||||||
|
|
||||||
gomplate -f trino-values.gomplate.yaml -o trino-values.yaml
|
gomplate -f trino-values.gomplate.yaml -o trino-values.yaml
|
||||||
|
|
||||||
|
echo "Creating Traefik Middleware..."
|
||||||
|
gomplate -f trino-middleware.gomplate.yaml -o trino-middleware.yaml
|
||||||
|
kubectl apply -f trino-middleware.yaml
|
||||||
|
|
||||||
helm upgrade --install trino trino/trino --namespace ${TRINO_NAMESPACE} \
|
helm upgrade --install trino trino/trino --namespace ${TRINO_NAMESPACE} \
|
||||||
--version ${TRINO_CHART_VERSION} -f trino-values.yaml --wait --timeout=10m
|
--version ${TRINO_CHART_VERSION} -f trino-values.yaml --wait --timeout=10m
|
||||||
|
|
||||||
@@ -318,6 +323,10 @@ upgrade:
|
|||||||
|
|
||||||
gomplate -f trino-values.gomplate.yaml -o trino-values.yaml
|
gomplate -f trino-values.gomplate.yaml -o trino-values.yaml
|
||||||
|
|
||||||
|
echo "Creating Traefik Middleware..."
|
||||||
|
gomplate -f trino-middleware.gomplate.yaml -o trino-middleware.yaml
|
||||||
|
kubectl apply -f trino-middleware.yaml
|
||||||
|
|
||||||
helm upgrade trino trino/trino --namespace ${TRINO_NAMESPACE} --version ${TRINO_CHART_VERSION} \
|
helm upgrade trino trino/trino --namespace ${TRINO_NAMESPACE} --version ${TRINO_CHART_VERSION} \
|
||||||
-f trino-values.yaml --wait --timeout=10m
|
-f trino-values.yaml --wait --timeout=10m
|
||||||
|
|
||||||
@@ -353,6 +362,7 @@ cleanup:
|
|||||||
just vault::delete trino/postgres || true
|
just vault::delete trino/postgres || true
|
||||||
just vault::delete trino/minio || true
|
just vault::delete trino/minio || true
|
||||||
just vault::delete trino/password || true
|
just vault::delete trino/password || true
|
||||||
|
just vault::delete trino/tls || true
|
||||||
just keycloak::delete-client ${KEYCLOAK_REALM} trino || true
|
just keycloak::delete-client ${KEYCLOAK_REALM} trino || true
|
||||||
echo "Cleanup completed"
|
echo "Cleanup completed"
|
||||||
else
|
else
|
||||||
|
|||||||
11
trino/trino-middleware.gomplate.yaml
Normal file
11
trino/trino-middleware.gomplate.yaml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: Middleware
|
||||||
|
metadata:
|
||||||
|
name: trino-headers
|
||||||
|
namespace: {{ .Env.TRINO_NAMESPACE }}
|
||||||
|
spec:
|
||||||
|
headers:
|
||||||
|
customRequestHeaders:
|
||||||
|
X-Forwarded-Proto: "https"
|
||||||
|
X-Forwarded-Host: "{{ .Env.TRINO_HOST }}"
|
||||||
|
X-Forwarded-Port: "443"
|
||||||
@@ -29,6 +29,8 @@ server:
|
|||||||
|
|
||||||
workerExtraConfig: ""
|
workerExtraConfig: ""
|
||||||
coordinatorExtraConfig: |
|
coordinatorExtraConfig: |
|
||||||
|
discovery.uri=http://localhost:8080
|
||||||
|
http-server.authentication.allow-insecure-over-http=true
|
||||||
http-server.authentication.oauth2.issuer=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}
|
http-server.authentication.oauth2.issuer=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}
|
||||||
http-server.authentication.oauth2.auth-url=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/auth
|
http-server.authentication.oauth2.auth-url=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/auth
|
||||||
http-server.authentication.oauth2.token-url=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token
|
http-server.authentication.oauth2.token-url=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token
|
||||||
@@ -55,7 +57,25 @@ additionalExchangeManagerProperties: {}
|
|||||||
|
|
||||||
eventListenerProperties: {}
|
eventListenerProperties: {}
|
||||||
|
|
||||||
accessControl: {}
|
accessControl:
|
||||||
|
type: configmap
|
||||||
|
refreshPeriod: 60s
|
||||||
|
configFile: "rules.json"
|
||||||
|
rules:
|
||||||
|
rules.json: |-
|
||||||
|
{
|
||||||
|
"catalogs": [
|
||||||
|
{
|
||||||
|
"allow": "all"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"impersonation": [
|
||||||
|
{
|
||||||
|
"original_user": "admin",
|
||||||
|
"new_user": ".*"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
resourceGroups: {}
|
resourceGroups: {}
|
||||||
|
|
||||||
@@ -107,6 +127,8 @@ ingress:
|
|||||||
className: traefik
|
className: traefik
|
||||||
annotations:
|
annotations:
|
||||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||||
|
# Pass X-Forwarded-* headers to Trino so it returns correct URLs
|
||||||
|
traefik.ingress.kubernetes.io/router.middlewares: trino-trino-headers@kubernetescrd
|
||||||
hosts:
|
hosts:
|
||||||
- host: {{ .Env.TRINO_HOST }}
|
- host: {{ .Env.TRINO_HOST }}
|
||||||
paths:
|
paths:
|
||||||
|
|||||||
Reference in New Issue
Block a user