fix(trino): fix auth and ingress
This commit is contained in:
1
trino/.gitignore
vendored
1
trino/.gitignore
vendored
@@ -2,3 +2,4 @@ trino-minio-external-secret.yaml
|
||||
trino-oauth-external-secret.yaml
|
||||
trino-postgres-external-secret.yaml
|
||||
trino-values.yaml
|
||||
trino-middleware.yaml
|
||||
|
||||
129
trino/README.md
129
trino/README.md
@@ -7,10 +7,12 @@ Fast distributed SQL query engine for big data analytics with Keycloak authentic
|
||||
This module deploys Trino using the official Helm chart with:
|
||||
|
||||
- **Keycloak OAuth2 authentication** for Web UI access
|
||||
- **Password authentication** for JDBC clients (Metabase, etc.)
|
||||
- **Password authentication** for JDBC clients (Metabase, Querybook, etc.)
|
||||
- **Access control with user impersonation** for multi-user query attribution
|
||||
- **PostgreSQL catalog** for querying PostgreSQL databases
|
||||
- **Iceberg catalog** with Lakekeeper (optional)
|
||||
- **TPCH catalog** with sample data for testing
|
||||
- **Traefik integration** with proper header forwarding
|
||||
|
||||
## Prerequisites
|
||||
|
||||
@@ -39,7 +41,9 @@ You will be prompted for:
|
||||
- Trino coordinator (1 instance)
|
||||
- Trino workers (2 instances by default)
|
||||
- OAuth2 client in Keycloak
|
||||
- Password authentication for JDBC access
|
||||
- Password authentication for JDBC/Querybook access
|
||||
- Traefik Middleware for X-Forwarded-* header injection
|
||||
- Access control with impersonation rules (admin can impersonate any user)
|
||||
- PostgreSQL catalog (if selected)
|
||||
- Iceberg catalog with Lakekeeper (if MinIO selected)
|
||||
- Keycloak service account enabled for OAuth2 client credentials flow
|
||||
@@ -47,6 +51,8 @@ You will be prompted for:
|
||||
- `lakekeeper` audience mapper configured
|
||||
- TPCH catalog with sample data
|
||||
|
||||
**Note**: Trino runs HTTP-only internally. HTTPS is provided by Traefik Ingress, which handles TLS termination.
|
||||
|
||||
## Configuration
|
||||
|
||||
Environment variables (set in `.env.local` or override):
|
||||
@@ -70,6 +76,8 @@ TRINO_WORKER_COUNT=2 # Number of workers
|
||||
2. Click "Sign in" to authenticate with Keycloak
|
||||
3. Execute queries in the Web UI
|
||||
|
||||
**Note**: OAuth2 authentication works over HTTP internally. Traefik Ingress provides HTTPS to external users, and Trino processes X-Forwarded headers to generate correct HTTPS redirect URLs for OAuth2 flow.
|
||||
|
||||
### Get Admin Password
|
||||
|
||||
For JDBC/Metabase connections:
|
||||
@@ -82,7 +90,7 @@ Returns the password for username `admin`.
|
||||
|
||||
### Metabase Integration
|
||||
|
||||
**Important**: Trino requires TLS/SSL for password authentication. You must use the external hostname (not the internal Kubernetes service name).
|
||||
**Important**: The Python Trino client (used by Metabase) requires HTTPS when using authentication. You must use the external hostname which has TLS provided by Traefik Ingress.
|
||||
|
||||
1. In Metabase, go to Admin → Databases → Add database
|
||||
2. Select **Database type**: Starburst
|
||||
@@ -103,7 +111,52 @@ Returns the password for username `admin`.
|
||||
- Use `iceberg` to query Iceberg tables via Lakekeeper
|
||||
- You can create multiple Metabase connections, one for each catalog
|
||||
|
||||
**Note**: Do NOT use internal Kubernetes hostnames like `trino.trino.svc.cluster.local` as they do not have valid TLS certificates for password authentication.
|
||||
**Note**: Do NOT use internal Kubernetes hostnames like `trino.trino.svc.cluster.local:8080`. Internal services do not have TLS, and the Python Trino client enforces HTTPS when authentication is used. Always use the external hostname with port 443.
|
||||
|
||||
### Querybook Integration
|
||||
|
||||
**Connection Configuration**:
|
||||
|
||||
1. In Querybook, create a new Environment and Query Engine
|
||||
2. Configure the Trino connection:
|
||||
|
||||
```plain
|
||||
Connection String: trino://your-trino-host:443?SSL=true
|
||||
Username: admin
|
||||
Password: [from just trino::admin-password]
|
||||
Catalog: postgresql (or iceberg for Iceberg tables)
|
||||
```
|
||||
|
||||
3. Optional: Configure `Proxy_user_id` to enable user impersonation
|
||||
|
||||
**User Impersonation**:
|
||||
|
||||
Trino is configured with file-based access control that allows the `admin` user to impersonate any user. This enables:
|
||||
|
||||
- Querybook to connect as `admin` but execute queries as the logged-in Querybook user
|
||||
- Proper query attribution and audit logging
|
||||
- User-specific access control (when configured)
|
||||
|
||||
The impersonation rules are defined in `trino-values.gomplate.yaml`:
|
||||
|
||||
```json
|
||||
{
|
||||
"catalogs": [{"allow": "all"}],
|
||||
"impersonation": [
|
||||
{
|
||||
"original_user": "admin",
|
||||
"new_user": ".*"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Why External Hostname is Required**:
|
||||
|
||||
- The Python Trino client enforces HTTPS when authentication is used (client-side requirement)
|
||||
- Trino runs HTTP-only internally; TLS is provided by Traefik Ingress
|
||||
- Internal service names (e.g., `trino.trino.svc.cluster.local:8080`) do not have TLS termination
|
||||
- Therefore, you must use the external hostname (e.g., `trino.example.com:443`) which has TLS from Traefik
|
||||
|
||||
### Example Queries
|
||||
|
||||
@@ -253,24 +306,45 @@ Removes:
|
||||
- Requires valid user in the configured realm
|
||||
- Automatic redirect to Keycloak login
|
||||
|
||||
### JDBC/Metabase (Password)
|
||||
### JDBC/Metabase/Querybook (Password)
|
||||
|
||||
- Username: `admin`
|
||||
- Password: Retrieved via `just trino::admin-password`
|
||||
- Stored in Vault at `trino/password`
|
||||
- Requires external hostname with SSL/TLS
|
||||
|
||||
### Access Control
|
||||
|
||||
Trino uses file-based system access control with the following configuration:
|
||||
|
||||
**Catalogs**: All users can access all catalogs
|
||||
|
||||
**Impersonation**: The `admin` user can impersonate any user
|
||||
|
||||
This configuration enables:
|
||||
|
||||
- **Querybook Integration**: Admin user connects and executes queries as logged-in users
|
||||
- **Audit Logging**: Queries are attributed to the actual user, not the admin account
|
||||
- **Future Access Control**: Can be extended to add user-specific catalog/schema restrictions
|
||||
|
||||
The access control rules are defined in `/etc/trino/access-control/rules.json` (automatically generated from Helm values).
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
External Users
|
||||
External Users / Querybook
|
||||
↓
|
||||
Cloudflare Tunnel (HTTPS)
|
||||
↓
|
||||
Traefik Ingress
|
||||
Traefik Ingress (HTTPS → HTTP)
|
||||
├─ TLS Termination
|
||||
├─ Traefik Middleware (X-Forwarded-* headers)
|
||||
└─ Backend: HTTP (port 8080)
|
||||
↓
|
||||
Trino Coordinator (HTTP:8080)
|
||||
├─ OAuth2 → Keycloak (Web UI auth)
|
||||
└─ Password file (JDBC auth)
|
||||
├─ Password file (JDBC/Querybook auth)
|
||||
└─ Access Control (file-based, impersonation rules)
|
||||
↓
|
||||
Trino Workers (HTTP:8080)
|
||||
↓
|
||||
@@ -285,6 +359,14 @@ Data Sources:
|
||||
└─ Static credentials
|
||||
```
|
||||
|
||||
**Key Components**:
|
||||
|
||||
- **TLS Termination**: Traefik Ingress handles HTTPS, Trino runs HTTP-only internally
|
||||
- **Traefik Middleware**: Injects X-Forwarded-Proto, X-Forwarded-Host, X-Forwarded-Port headers for correct URL generation
|
||||
- **Single Port Configuration**: HTTP (8080) for all communication (Ingress backend, internal clients, Querybook)
|
||||
- **Access Control**: File-based system with impersonation rules allowing `admin` to impersonate any user
|
||||
- **Querybook Integration**: Connects via external HTTPS hostname (Traefik provides TLS), uses admin credentials with user impersonation
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Check Pod Status
|
||||
@@ -315,6 +397,26 @@ kubectl exec -n trino deployment/trino-coordinator -- \
|
||||
|
||||
### Common Issues
|
||||
|
||||
#### Querybook Connection Fails
|
||||
|
||||
- **Error: "cannot use authentication with HTTP"**
|
||||
- Python Trino client requires HTTPS when using authentication (client-side enforcement)
|
||||
- Trino runs HTTP-only internally; TLS is provided by Traefik Ingress at the external hostname
|
||||
- Solution: Use external hostname with SSL: `trino://your-trino-host:443?SSL=true`
|
||||
- Do NOT use internal service names (e.g., `trino.trino.svc.cluster.local:8080`) as they lack TLS
|
||||
|
||||
- **Error: "Access Denied: User admin cannot impersonate user X"**
|
||||
- Access control rules may not be properly configured
|
||||
- Verify rules exist: `kubectl exec -n trino deployment/trino-coordinator -- cat /etc/trino/access-control/rules.json`
|
||||
- Check for impersonation section in rules
|
||||
|
||||
- **Error: "500 Internal Server Error"**
|
||||
- Check Traefik middleware exists: `kubectl get middleware trino-headers -n trino`
|
||||
- Verify Ingress annotation references correct middleware: `trino-trino-headers@kubernetescrd`
|
||||
- Check Traefik logs: `kubectl logs -n kube-system -l app.kubernetes.io/name=traefik`
|
||||
- Verify Ingress backend port is 8080: `kubectl get ingress trino-coordinator -n trino -o yaml | grep "number:"`
|
||||
- If Ingress shows port 8443, ensure `server.config.https.enabled: false` in values (Helm chart v1.41.0 auto-selects HTTPS port when enabled)
|
||||
|
||||
#### Metabase Sync Fails
|
||||
|
||||
- Ensure catalog is specified in connection settings (e.g., `postgresql` or `iceberg`)
|
||||
@@ -332,8 +434,15 @@ kubectl exec -n trino deployment/trino-coordinator -- \
|
||||
#### Password Authentication Fails
|
||||
|
||||
- Retrieve current password: `just trino::admin-password`
|
||||
- Ensure SSL/TLS is enabled in JDBC URL
|
||||
- For internal testing, HTTP is supported via `http-server.authentication.allow-insecure-over-http=true`
|
||||
- Ensure SSL/TLS is enabled in JDBC URL (use external hostname with port 443)
|
||||
- Python Trino client requires HTTPS for authentication (client-side enforcement)
|
||||
- Trino coordinator allows password auth over HTTP via `http-server.authentication.allow-insecure-over-http=true`, but clients may refuse to connect without TLS
|
||||
|
||||
#### URLs Contain "localhost" Instead of Actual Hostname
|
||||
|
||||
- Traefik middleware is not injecting X-Forwarded headers
|
||||
- Verify middleware exists and is referenced in Ingress annotations
|
||||
- Check `http-server.process-forwarded=true` is set in Trino configuration
|
||||
|
||||
## References
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ export TRINO_CHART_VERSION := env("TRINO_CHART_VERSION", "1.41.0")
|
||||
export TRINO_IMAGE_TAG := env("TRINO_IMAGE_TAG", "477")
|
||||
export TRINO_HOST := env("TRINO_HOST", "")
|
||||
export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets")
|
||||
export K8S_VAULT_NAMESPACE := env("K8S_VAULT_NAMESPACE", "vault")
|
||||
export KEYCLOAK_HOST := env("KEYCLOAK_HOST", "")
|
||||
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
|
||||
export TRINO_COORDINATOR_MEMORY := env("TRINO_COORDINATOR_MEMORY", "4Gi")
|
||||
@@ -276,6 +277,10 @@ install:
|
||||
|
||||
gomplate -f trino-values.gomplate.yaml -o trino-values.yaml
|
||||
|
||||
echo "Creating Traefik Middleware..."
|
||||
gomplate -f trino-middleware.gomplate.yaml -o trino-middleware.yaml
|
||||
kubectl apply -f trino-middleware.yaml
|
||||
|
||||
helm upgrade --install trino trino/trino --namespace ${TRINO_NAMESPACE} \
|
||||
--version ${TRINO_CHART_VERSION} -f trino-values.yaml --wait --timeout=10m
|
||||
|
||||
@@ -318,6 +323,10 @@ upgrade:
|
||||
|
||||
gomplate -f trino-values.gomplate.yaml -o trino-values.yaml
|
||||
|
||||
echo "Creating Traefik Middleware..."
|
||||
gomplate -f trino-middleware.gomplate.yaml -o trino-middleware.yaml
|
||||
kubectl apply -f trino-middleware.yaml
|
||||
|
||||
helm upgrade trino trino/trino --namespace ${TRINO_NAMESPACE} --version ${TRINO_CHART_VERSION} \
|
||||
-f trino-values.yaml --wait --timeout=10m
|
||||
|
||||
@@ -353,6 +362,7 @@ cleanup:
|
||||
just vault::delete trino/postgres || true
|
||||
just vault::delete trino/minio || true
|
||||
just vault::delete trino/password || true
|
||||
just vault::delete trino/tls || true
|
||||
just keycloak::delete-client ${KEYCLOAK_REALM} trino || true
|
||||
echo "Cleanup completed"
|
||||
else
|
||||
|
||||
11
trino/trino-middleware.gomplate.yaml
Normal file
11
trino/trino-middleware.gomplate.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: Middleware
|
||||
metadata:
|
||||
name: trino-headers
|
||||
namespace: {{ .Env.TRINO_NAMESPACE }}
|
||||
spec:
|
||||
headers:
|
||||
customRequestHeaders:
|
||||
X-Forwarded-Proto: "https"
|
||||
X-Forwarded-Host: "{{ .Env.TRINO_HOST }}"
|
||||
X-Forwarded-Port: "443"
|
||||
@@ -29,6 +29,8 @@ server:
|
||||
|
||||
workerExtraConfig: ""
|
||||
coordinatorExtraConfig: |
|
||||
discovery.uri=http://localhost:8080
|
||||
http-server.authentication.allow-insecure-over-http=true
|
||||
http-server.authentication.oauth2.issuer=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}
|
||||
http-server.authentication.oauth2.auth-url=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/auth
|
||||
http-server.authentication.oauth2.token-url=https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token
|
||||
@@ -55,7 +57,25 @@ additionalExchangeManagerProperties: {}
|
||||
|
||||
eventListenerProperties: {}
|
||||
|
||||
accessControl: {}
|
||||
accessControl:
|
||||
type: configmap
|
||||
refreshPeriod: 60s
|
||||
configFile: "rules.json"
|
||||
rules:
|
||||
rules.json: |-
|
||||
{
|
||||
"catalogs": [
|
||||
{
|
||||
"allow": "all"
|
||||
}
|
||||
],
|
||||
"impersonation": [
|
||||
{
|
||||
"original_user": "admin",
|
||||
"new_user": ".*"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
resourceGroups: {}
|
||||
|
||||
@@ -107,6 +127,8 @@ ingress:
|
||||
className: traefik
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
# Pass X-Forwarded-* headers to Trino so it returns correct URLs
|
||||
traefik.ingress.kubernetes.io/router.middlewares: trino-trino-headers@kubernetescrd
|
||||
hosts:
|
||||
- host: {{ .Env.TRINO_HOST }}
|
||||
paths:
|
||||
|
||||
Reference in New Issue
Block a user