Compare commits
10 Commits
7234840eba
...
fb1e4c20fa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fb1e4c20fa | ||
|
|
593da33d64 | ||
|
|
f3bc41e9eb | ||
|
|
98b03704d7 | ||
|
|
6fa0d27f7d | ||
|
|
d9ee90c32c | ||
|
|
7dc732268e | ||
|
|
2955d7d783 | ||
|
|
5055a36d87 | ||
|
|
46fdff720f |
@@ -429,6 +429,7 @@ When writing Markdown documentation:
|
||||
```
|
||||
|
||||
2. **Always validate with markdownlint-cli2**:
|
||||
- Run `markdownlint-cli2 <file>` before committing any Markdown files
|
||||
- Run from the project root directory to use `.markdownlint.yaml` config:
|
||||
`cd <top-dir> && markdownlint-cli2 <relative-path>`
|
||||
- Fix all linting errors to ensure consistent formatting
|
||||
- Pay attention to code block language specifications (MD040) and list formatting (MD029)
|
||||
|
||||
14
README.md
14
README.md
@@ -63,6 +63,7 @@ A remotely accessible Kubernetes home lab with OIDC authentication. Build a mode
|
||||
### LLM & AI Applications (Optional)
|
||||
|
||||
- **[Ollama](https://ollama.com/)**: Local LLM inference server with GPU acceleration
|
||||
- **[LiteLLM](https://litellm.ai/)**: Unified LLM gateway for accessing multiple providers through OpenAI-compatible API
|
||||
- **[LibreChat](https://www.librechat.ai/)**: Web-based chat interface with multi-model support and MCP integration
|
||||
- **[Langfuse](https://langfuse.com/)**: LLM observability and analytics platform for tracking and debugging AI applications
|
||||
|
||||
@@ -346,6 +347,18 @@ LLM observability and analytics platform:
|
||||
|
||||
[📖 See Langfuse Documentation](./langfuse/README.md)
|
||||
|
||||
### LiteLLM
|
||||
|
||||
Unified LLM gateway and proxy:
|
||||
|
||||
- **Multi-Provider Support**: Anthropic, OpenAI, Ollama, Mistral, Groq, and more through single API
|
||||
- **OpenAI-Compatible**: Drop-in replacement for OpenAI SDK
|
||||
- **Virtual Keys**: Generate scoped API keys for users with usage tracking
|
||||
- **Cost Tracking**: Monitor spending across all LLM providers
|
||||
- **Keycloak Authentication**: OAuth2 for Admin UI with role-based access
|
||||
|
||||
[📖 See LiteLLM Documentation](./litellm/README.md)
|
||||
|
||||
### Dagster
|
||||
|
||||
Modern data orchestration platform:
|
||||
@@ -485,6 +498,7 @@ kubectl --context yourpc-oidc get nodes
|
||||
# JupyterHub: https://jupyter.yourdomain.com
|
||||
# MLflow: https://mlflow.yourdomain.com
|
||||
# Langfuse: https://langfuse.yourdomain.com
|
||||
# LiteLLM: https://litellm.yourdomain.com
|
||||
# LibreChat: https://chat.yourdomain.com
|
||||
```
|
||||
|
||||
|
||||
1
clickhouse/.gitignore
vendored
1
clickhouse/.gitignore
vendored
@@ -2,4 +2,5 @@ clickhouse-credentials-external-secret.yaml
|
||||
clickhouse-ingress.yaml
|
||||
clickhouse-installation-template.yaml
|
||||
clickhouse-operator-values.yaml
|
||||
clickhouse-servicemonitor.yaml
|
||||
clickhouse.yaml
|
||||
|
||||
@@ -44,3 +44,31 @@ ClickHouse can use the following Linux capabilities for enhanced performance, bu
|
||||
| `SYS_NICE` | Thread priority control via `os_thread_priority` | Setting has no effect |
|
||||
|
||||
These capabilities are disabled by default to comply with baseline Pod Security Standards. To enable them, the namespace must allow privileged pods, and you need to uncomment the `add` line in `clickhouse-installation-template.yaml`.
|
||||
|
||||
## Monitoring
|
||||
|
||||
ClickHouse exposes Prometheus metrics on port 9363. When Prometheus (kube-prometheus-stack) is installed, monitoring can be enabled during installation or manually.
|
||||
|
||||
### Enable Monitoring
|
||||
|
||||
```bash
|
||||
just clickhouse::setup-monitoring
|
||||
```
|
||||
|
||||
This creates a ServiceMonitor and a metrics Service for Prometheus to scrape.
|
||||
|
||||
### Grafana Dashboard
|
||||
|
||||
Import the ClickHouse dashboard from Grafana.com:
|
||||
|
||||
1. Open Grafana → **Dashboards** → **New** → **Import**
|
||||
2. Enter Dashboard ID: `14192`
|
||||
3. Click **Load**, select **Prometheus** data source, then **Import**
|
||||
|
||||
The dashboard includes panels for memory, connections, queries, I/O, replication, merge operations, cache, and ZooKeeper metrics.
|
||||
|
||||
### Remove Monitoring
|
||||
|
||||
```bash
|
||||
just clickhouse::remove-monitoring
|
||||
```
|
||||
|
||||
@@ -20,6 +20,10 @@ spec:
|
||||
containers:
|
||||
- name: clickhouse
|
||||
image: {{ .Env.CLICKHOUSE_IMAGE }}
|
||||
ports:
|
||||
- name: prometheus
|
||||
containerPort: 9363
|
||||
protocol: TCP
|
||||
resources:
|
||||
requests:
|
||||
cpu: {{ .Env.CLICKHOUSE_CPU_REQUEST }}
|
||||
|
||||
67
clickhouse/clickhouse-servicemonitor.gomplate.yaml
Normal file
67
clickhouse/clickhouse-servicemonitor.gomplate.yaml
Normal file
@@ -0,0 +1,67 @@
|
||||
{{- if .Env.MONITORING_ENABLED }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: clickhouse-metrics
|
||||
namespace: {{ .Env.CLICKHOUSE_NAMESPACE }}
|
||||
labels:
|
||||
app: clickhouse
|
||||
clickhouse.altinity.com/chi: clickhouse
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- name: prometheus
|
||||
port: 9363
|
||||
targetPort: 9363
|
||||
protocol: TCP
|
||||
selector:
|
||||
clickhouse.altinity.com/chi: clickhouse
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: clickhouse
|
||||
namespace: {{ .Env.CLICKHOUSE_NAMESPACE }}
|
||||
labels:
|
||||
app: clickhouse
|
||||
release: kube-prometheus-stack
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: clickhouse
|
||||
clickhouse.altinity.com/chi: clickhouse
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- {{ .Env.CLICKHOUSE_NAMESPACE }}
|
||||
endpoints:
|
||||
- port: prometheus
|
||||
path: /metrics
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: clickhouse-operator
|
||||
namespace: {{ .Env.CLICKHOUSE_NAMESPACE }}
|
||||
labels:
|
||||
app: clickhouse-operator
|
||||
release: kube-prometheus-stack
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: altinity-clickhouse-operator
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- {{ .Env.CLICKHOUSE_NAMESPACE }}
|
||||
endpoints:
|
||||
- port: ch-metrics
|
||||
path: /metrics
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
- port: op-metrics
|
||||
path: /metrics
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
{{- end }}
|
||||
@@ -29,6 +29,17 @@ spec:
|
||||
<schema_type>transposed</schema_type>
|
||||
</asynchronous_metric_log>
|
||||
</clickhouse>
|
||||
# Enable Prometheus metrics endpoint
|
||||
prometheus.xml: |
|
||||
<clickhouse>
|
||||
<prometheus>
|
||||
<endpoint>/metrics</endpoint>
|
||||
<port>9363</port>
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
</prometheus>
|
||||
</clickhouse>
|
||||
users:
|
||||
admin/k8s_secret_password: clickhouse-credentials/admin
|
||||
admin/networks/ip: "::/0"
|
||||
|
||||
@@ -5,6 +5,8 @@ export CLICKHOUSE_HOST := env("CLICKHOUSE_HOST", "")
|
||||
export CLICKHOUSE_CHART_VERSION := env("CLICKHOUSE_CHART_VERSION", "0.25.5")
|
||||
export CLICKHOUSE_IMAGE := env("CLICKHOUSE_IMAGE", "clickhouse/clickhouse-server:25.10")
|
||||
export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets")
|
||||
export PROMETHEUS_NAMESPACE := env("PROMETHEUS_NAMESPACE", "monitoring")
|
||||
export MONITORING_ENABLED := env("MONITORING_ENABLED", "")
|
||||
|
||||
# ClickHouse resource settings
|
||||
export CLICKHOUSE_MEMORY_REQUEST := env("CLICKHOUSE_MEMORY_REQUEST", "1Gi")
|
||||
@@ -107,6 +109,16 @@ install:
|
||||
--placeholder="e.g., clickhouse.example.com"
|
||||
)
|
||||
done
|
||||
# Check if Prometheus is available and ask about monitoring
|
||||
if helm status kube-prometheus-stack -n ${PROMETHEUS_NAMESPACE} &>/dev/null; then
|
||||
if [ -z "${MONITORING_ENABLED}" ]; then
|
||||
if gum confirm "Enable Prometheus monitoring?"; then
|
||||
MONITORING_ENABLED="true"
|
||||
else
|
||||
MONITORING_ENABLED="false"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
echo "Installing ClickHouse..."
|
||||
just create-namespace
|
||||
just install-zookeeper
|
||||
@@ -124,6 +136,10 @@ install:
|
||||
kubectl wait --for=jsonpath='{.status.status}'=Completed \
|
||||
clickhouseinstallation/clickhouse -n ${CLICKHOUSE_NAMESPACE} --timeout=600s
|
||||
just setup-ingress ${CLICKHOUSE_HOST}
|
||||
# Setup monitoring if enabled
|
||||
if [ "${MONITORING_ENABLED}" = "true" ]; then
|
||||
just setup-monitoring
|
||||
fi
|
||||
echo "ClickHouse installation completed successfully"
|
||||
echo "ClickHouse API at: https://${CLICKHOUSE_HOST}"
|
||||
|
||||
@@ -137,6 +153,27 @@ setup-ingress host:
|
||||
kubectl apply -n ${CLICKHOUSE_NAMESPACE} -f clickhouse-ingress.yaml
|
||||
echo "ClickHouse Ingress configured successfully"
|
||||
|
||||
# Setup Prometheus monitoring for ClickHouse
|
||||
setup-monitoring:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
echo "Setting up Prometheus monitoring for ClickHouse..."
|
||||
kubectl label namespace ${CLICKHOUSE_NAMESPACE} buun.channel/enable-monitoring=true --overwrite
|
||||
MONITORING_ENABLED="true" gomplate -f clickhouse-servicemonitor.gomplate.yaml \
|
||||
-o clickhouse-servicemonitor.yaml
|
||||
kubectl apply -f clickhouse-servicemonitor.yaml
|
||||
echo "Prometheus monitoring configured successfully"
|
||||
|
||||
# Remove Prometheus monitoring for ClickHouse
|
||||
remove-monitoring:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
echo "Removing Prometheus monitoring for ClickHouse..."
|
||||
kubectl delete servicemonitor clickhouse clickhouse-operator -n ${CLICKHOUSE_NAMESPACE} --ignore-not-found
|
||||
kubectl delete service clickhouse-metrics -n ${CLICKHOUSE_NAMESPACE} --ignore-not-found
|
||||
kubectl label namespace ${CLICKHOUSE_NAMESPACE} buun.channel/enable-monitoring- --ignore-not-found
|
||||
echo "Prometheus monitoring removed"
|
||||
|
||||
# Uninstall ClickHouse (delete_volumes='false' to preserve PVCs and namespace)
|
||||
uninstall delete-volumes='true':
|
||||
#!/bin/bash
|
||||
|
||||
@@ -445,6 +445,10 @@ ingress:
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: traefik
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
# Enable sticky sessions for WebSocket connections (required for Jupyter RTC/MCP)
|
||||
traefik.ingress.kubernetes.io/service.sticky.cookie: "true"
|
||||
traefik.ingress.kubernetes.io/service.sticky.cookie.name: jupyter-session
|
||||
traefik.ingress.kubernetes.io/service.sticky.cookie.secure: "true"
|
||||
ingressClassName: traefik
|
||||
hosts:
|
||||
- {{ .Env.JUPYTERHUB_HOST }}
|
||||
|
||||
2
justfile
2
justfile
@@ -23,10 +23,12 @@ mod kserve
|
||||
mod langfuse
|
||||
mod lakekeeper
|
||||
mod librechat
|
||||
mod litellm
|
||||
mod longhorn
|
||||
mod metabase
|
||||
mod mlflow
|
||||
mod minio
|
||||
mod nats
|
||||
mod nvidia-device-plugin
|
||||
mod fairwinds-polaris
|
||||
mod oauth2-proxy
|
||||
|
||||
@@ -98,8 +98,8 @@ install:
|
||||
--version ${LANGFUSE_CHART_VERSION} -n ${LANGFUSE_NAMESPACE} --wait \
|
||||
-f langfuse-values.yaml
|
||||
|
||||
# Uninstall Langfuse
|
||||
uninstall:
|
||||
# Uninstall Langfuse (delete-data: true to delete database and storage)
|
||||
uninstall delete-data='false':
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
helm uninstall langfuse -n ${LANGFUSE_NAMESPACE} --wait --ignore-not-found
|
||||
@@ -108,6 +108,17 @@ uninstall:
|
||||
# Clean up Keycloak client and Vault secrets to avoid stale credentials
|
||||
just delete-keycloak-client || true
|
||||
|
||||
if [ "{{ delete-data }}" = "true" ]; then
|
||||
echo "Deleting database and storage..."
|
||||
just delete-postgres-user-and-db || true
|
||||
just delete-clickhouse-user || true
|
||||
just delete-minio-user || true
|
||||
just delete-keycloak-user || true
|
||||
just delete-salt || true
|
||||
just delete-nextauth-secret || true
|
||||
just delete-redis-password || true
|
||||
echo "Langfuse uninstalled with all data deleted."
|
||||
else
|
||||
echo "Langfuse uninstalled successfully"
|
||||
echo ""
|
||||
echo "Note: The following resources were NOT deleted:"
|
||||
@@ -115,12 +126,11 @@ uninstall:
|
||||
echo " - ClickHouse user and database (langfuse)"
|
||||
echo " - MinIO user and bucket (langfuse)"
|
||||
echo " - Keycloak user (langfuse)"
|
||||
echo " - Vault secrets (langfuse/*)"
|
||||
echo ""
|
||||
echo "To delete these resources, run:"
|
||||
echo " just langfuse::delete-postgres-user-and-db"
|
||||
echo " just langfuse::delete-clickhouse-user"
|
||||
echo " just langfuse::delete-minio-user"
|
||||
echo " just langfuse::delete-keycloak-user"
|
||||
echo "To delete all data, run:"
|
||||
echo " just langfuse::uninstall true"
|
||||
fi
|
||||
|
||||
# Create all secrets (PostgreSQL, Keycloak, MinIO, Redis)
|
||||
create-secrets:
|
||||
|
||||
@@ -69,13 +69,13 @@ langfuse:
|
||||
tls:
|
||||
enabled: true
|
||||
|
||||
# Resource configuration based on Goldilocks/VPA recommendations
|
||||
# CPU limits increased to handle startup spikes
|
||||
# Resource recommendations from Goldilocks VPA
|
||||
# web target: cpu=15m, memory=717Mi
|
||||
web:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 15m
|
||||
memory: 704Mi
|
||||
cpu: 25m
|
||||
memory: 768Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 1.5Gi
|
||||
@@ -89,10 +89,12 @@ langfuse:
|
||||
timeoutSeconds: 30
|
||||
failureThreshold: 5
|
||||
|
||||
# Resource recommendations from Goldilocks VPA
|
||||
# worker target: cpu=15m, memory=380Mi
|
||||
worker:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 15m
|
||||
cpu: 25m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
@@ -113,6 +115,16 @@ redis:
|
||||
username: "default"
|
||||
existingSecret: redis-auth
|
||||
existingSecretPasswordKey: secret
|
||||
# Resource recommendations from Goldilocks VPA
|
||||
# valkey target: cpu=15m, memory=100Mi
|
||||
master:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
|
||||
clickhouse:
|
||||
deploy: false
|
||||
|
||||
@@ -94,10 +94,10 @@ ingress:
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
cpu: 25m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 1000m
|
||||
cpu: 100m
|
||||
memory: 1Gi
|
||||
|
||||
mongodb:
|
||||
@@ -110,6 +110,13 @@ mongodb:
|
||||
tag: "latest"
|
||||
persistence:
|
||||
size: 8Gi
|
||||
resources:
|
||||
requests:
|
||||
cpu: 75m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 2Gi
|
||||
podSecurityContext:
|
||||
fsGroup: 1001
|
||||
seccompProfile:
|
||||
@@ -132,6 +139,13 @@ meilisearch:
|
||||
tag: "v1.7.3"
|
||||
auth:
|
||||
existingMasterKeySecret: "librechat-credentials-env"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 1Gi
|
||||
podSecurityContext:
|
||||
fsGroup: 1000
|
||||
seccompProfile:
|
||||
|
||||
3
litellm/.gitignore
vendored
Normal file
3
litellm/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
litellm-values.yaml
|
||||
apikey-external-secret.yaml
|
||||
models.yaml
|
||||
547
litellm/README.md
Normal file
547
litellm/README.md
Normal file
@@ -0,0 +1,547 @@
|
||||
# LiteLLM
|
||||
|
||||
Unified LLM gateway and proxy for accessing multiple LLM providers through a single OpenAI-compatible API:
|
||||
|
||||
- **Multi-Provider Support**: Anthropic, OpenAI, Ollama, Mistral, Groq, Cohere, Azure, Bedrock, Vertex AI
|
||||
- **OpenAI-Compatible API**: Drop-in replacement for OpenAI SDK
|
||||
- **Load Balancing & Fallback**: Automatic failover between providers
|
||||
- **Virtual Keys**: Generate API keys for users with usage tracking
|
||||
- **Cost Tracking**: Monitor spending across providers
|
||||
- **Rate Limiting**: Control usage per key/user
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Kubernetes cluster (k3s)
|
||||
- External Secrets Operator (required)
|
||||
- PostgreSQL cluster (CloudNativePG)
|
||||
- Vault for secrets management
|
||||
|
||||
## Configuration Overview
|
||||
|
||||
LiteLLM requires two types of configuration:
|
||||
|
||||
1. **Environment variables** (`.env.local`): Host, namespace, chart version
|
||||
2. **Model definitions** (`models.yaml`): LLM providers and models to expose
|
||||
|
||||
This separation allows flexible model configuration without modifying environment files.
|
||||
|
||||
## Installation
|
||||
|
||||
### Step 1: Create Model Configuration
|
||||
|
||||
Copy the example configuration and customize:
|
||||
|
||||
```bash
|
||||
cp litellm/models.example.yaml litellm/models.yaml
|
||||
```
|
||||
|
||||
Edit `litellm/models.yaml` to configure your models:
|
||||
|
||||
```yaml
|
||||
# Anthropic Claude
|
||||
- model_name: claude-sonnet
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-7-sonnet-latest
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
|
||||
# OpenAI
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: openai/gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
# Ollama (local models - no API key required)
|
||||
- model_name: llama3
|
||||
litellm_params:
|
||||
model: ollama/llama3.2
|
||||
api_base: http://ollama.ollama:11434
|
||||
```
|
||||
|
||||
### Step 2: Set API Keys
|
||||
|
||||
For each provider that requires an API key:
|
||||
|
||||
```bash
|
||||
just litellm::set-api-key anthropic
|
||||
just litellm::set-api-key openai
|
||||
```
|
||||
|
||||
Or interactively select the provider:
|
||||
|
||||
```bash
|
||||
just litellm::set-api-key
|
||||
```
|
||||
|
||||
API keys are stored in Vault and synced to Kubernetes via External Secrets Operator.
|
||||
|
||||
### Step 3: Install LiteLLM
|
||||
|
||||
```bash
|
||||
just litellm::install
|
||||
```
|
||||
|
||||
You will be prompted for:
|
||||
|
||||
- **LiteLLM host (FQDN)**: e.g., `litellm.example.com`
|
||||
- **Enable Prometheus monitoring**: If kube-prometheus-stack is installed
|
||||
|
||||
## Model Management
|
||||
|
||||
### Add a Model Interactively
|
||||
|
||||
```bash
|
||||
just litellm::add-model
|
||||
```
|
||||
|
||||
This guides you through:
|
||||
|
||||
1. Selecting a provider
|
||||
2. Choosing a model
|
||||
3. Setting a model alias
|
||||
|
||||
### Remove a Model
|
||||
|
||||
```bash
|
||||
just litellm::remove-model
|
||||
```
|
||||
|
||||
### List Configured Models
|
||||
|
||||
```bash
|
||||
just litellm::list-models
|
||||
```
|
||||
|
||||
### Example Output
|
||||
|
||||
```text
|
||||
Configured models:
|
||||
- claude-sonnet: anthropic/claude-3-7-sonnet-latest
|
||||
- claude-haiku: anthropic/claude-3-5-haiku-latest
|
||||
- llama3: ollama/llama3.2
|
||||
```
|
||||
|
||||
## API Key Management
|
||||
|
||||
### Set API Key for a Provider
|
||||
|
||||
```bash
|
||||
just litellm::set-api-key anthropic
|
||||
```
|
||||
|
||||
### Get API Key (from Vault)
|
||||
|
||||
```bash
|
||||
just litellm::get-api-key anthropic
|
||||
```
|
||||
|
||||
### Verify All Required Keys
|
||||
|
||||
```bash
|
||||
just litellm::verify-api-keys
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| -------- | ------- | ----------- |
|
||||
| `LITELLM_NAMESPACE` | `litellm` | Kubernetes namespace |
|
||||
| `LITELLM_CHART_VERSION` | `0.1.825` | Helm chart version |
|
||||
| `LITELLM_HOST` | (prompt) | External hostname (FQDN) |
|
||||
| `OLLAMA_NAMESPACE` | `ollama` | Ollama namespace for local models |
|
||||
| `MONITORING_ENABLED` | (prompt) | Enable Prometheus ServiceMonitor |
|
||||
|
||||
## Authentication
|
||||
|
||||
LiteLLM has two types of authentication:
|
||||
|
||||
1. **API Access**: Uses Master Key or Virtual Keys for programmatic access
|
||||
2. **Admin UI**: Uses Keycloak SSO for browser-based access
|
||||
|
||||
### Enable SSO for Admin UI
|
||||
|
||||
After installing LiteLLM, enable Keycloak authentication for the Admin UI:
|
||||
|
||||
```bash
|
||||
just litellm::setup-oidc
|
||||
```
|
||||
|
||||
This will:
|
||||
|
||||
- Create a Keycloak client for LiteLLM
|
||||
- Store the client secret in Vault
|
||||
- Configure LiteLLM with OIDC environment variables
|
||||
- Upgrade the deployment with SSO enabled
|
||||
|
||||
### Disable SSO
|
||||
|
||||
To disable SSO and return to unauthenticated Admin UI access:
|
||||
|
||||
```bash
|
||||
just litellm::disable-oidc
|
||||
```
|
||||
|
||||
### SSO Configuration Details
|
||||
|
||||
| Setting | Value |
|
||||
| ------- | ----- |
|
||||
| Callback URL | `https://<litellm-host>/sso/callback` |
|
||||
| Authorization Endpoint | `https://<keycloak-host>/realms/<realm>/protocol/openid-connect/auth` |
|
||||
| Token Endpoint | `https://<keycloak-host>/realms/<realm>/protocol/openid-connect/token` |
|
||||
| Userinfo Endpoint | `https://<keycloak-host>/realms/<realm>/protocol/openid-connect/userinfo` |
|
||||
| Scope | `openid email profile` |
|
||||
|
||||
## User Management
|
||||
|
||||
SSO users are automatically created in LiteLLM when they first log in. By default, new users are assigned the `internal_user_viewer` role (read-only access).
|
||||
|
||||
### List Users
|
||||
|
||||
```bash
|
||||
just litellm::list-users
|
||||
```
|
||||
|
||||
### Assign Role to User
|
||||
|
||||
Interactively select user and role:
|
||||
|
||||
```bash
|
||||
just litellm::assign-role
|
||||
```
|
||||
|
||||
Or specify directly:
|
||||
|
||||
```bash
|
||||
just litellm::assign-role buun proxy_admin
|
||||
```
|
||||
|
||||
### User Roles
|
||||
|
||||
| Role | Description |
|
||||
| ---- | ----------- |
|
||||
| `proxy_admin` | Full admin access (manage keys, users, models, settings) |
|
||||
| `proxy_admin_viewer` | Admin read-only access |
|
||||
| `internal_user` | Can create and manage own API keys |
|
||||
| `internal_user_viewer` | Read-only access (default for SSO users) |
|
||||
|
||||
**Note**: To manage API keys in the Admin UI, users need at least `internal_user` or `proxy_admin` role.
|
||||
|
||||
## API Usage
|
||||
|
||||
LiteLLM exposes an OpenAI-compatible API at `https://your-litellm-host/`.
|
||||
|
||||
### Get Master Key
|
||||
|
||||
```bash
|
||||
just litellm::master-key
|
||||
```
|
||||
|
||||
### Generate Virtual Key for a User
|
||||
|
||||
```bash
|
||||
just litellm::generate-virtual-key buun
|
||||
```
|
||||
|
||||
This will prompt for a model selection and generate an API key for the specified user. Select `all` to grant access to all models.
|
||||
|
||||
### OpenAI SDK Example
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
base_url="https://litellm.example.com",
|
||||
api_key="sk-..." # Virtual key or master key
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="claude-sonnet", # Use your model alias
|
||||
messages=[{"role": "user", "content": "Hello!"}]
|
||||
)
|
||||
print(response.choices[0].message.content)
|
||||
```
|
||||
|
||||
### curl Example
|
||||
|
||||
```bash
|
||||
curl https://litellm.example.com/v1/chat/completions \
|
||||
-H "Authorization: Bearer sk-..." \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "claude-sonnet",
|
||||
"messages": [{"role": "user", "content": "Hello!"}]
|
||||
}'
|
||||
```
|
||||
|
||||
## Team Management
|
||||
|
||||
Teams allow you to group users and configure team-specific settings such as Langfuse projects for observability.
|
||||
|
||||
### Create a Team
|
||||
|
||||
```bash
|
||||
just litellm::create-team
|
||||
```
|
||||
|
||||
Or with a name directly:
|
||||
|
||||
```bash
|
||||
just litellm::create-team name="project-alpha"
|
||||
```
|
||||
|
||||
### List Teams
|
||||
|
||||
```bash
|
||||
just litellm::list-teams
|
||||
```
|
||||
|
||||
### Get Team Info
|
||||
|
||||
```bash
|
||||
just litellm::get-team team_id=<team-id>
|
||||
```
|
||||
|
||||
### Delete a Team
|
||||
|
||||
```bash
|
||||
just litellm::delete-team team_id=<team-id>
|
||||
```
|
||||
|
||||
### Generate Virtual Key for a Team
|
||||
|
||||
```bash
|
||||
just litellm::generate-team-key
|
||||
```
|
||||
|
||||
This will prompt for team selection and username. The generated key inherits the team's settings (including Langfuse project configuration).
|
||||
|
||||
## Langfuse Integration
|
||||
|
||||
[Langfuse](https://langfuse.com/) provides LLM observability with tracing, monitoring, and analytics. LiteLLM can send traces to Langfuse for every API call.
|
||||
|
||||
### Enable Langfuse Integration
|
||||
|
||||
During installation (`just litellm::install`) or upgrade (`just litellm::upgrade`), you will be prompted to enable Langfuse integration. Alternatively:
|
||||
|
||||
```bash
|
||||
just litellm::setup-langfuse
|
||||
```
|
||||
|
||||
You will need Langfuse API keys (Public Key and Secret Key) from the Langfuse UI: **Settings > API Keys**.
|
||||
|
||||
### Set Langfuse API Keys
|
||||
|
||||
```bash
|
||||
just litellm::set-langfuse-keys
|
||||
```
|
||||
|
||||
### Disable Langfuse Integration
|
||||
|
||||
```bash
|
||||
just litellm::disable-langfuse
|
||||
```
|
||||
|
||||
### Per-Team Langfuse Projects
|
||||
|
||||
Each team can have its own Langfuse project for isolated observability. This is useful when different projects or departments need separate trace data.
|
||||
|
||||
#### Setup Flow
|
||||
|
||||
1. Create a team:
|
||||
|
||||
```bash
|
||||
just litellm::create-team name="project-alpha"
|
||||
```
|
||||
|
||||
2. Create a Langfuse project for the team and get API keys from Langfuse UI
|
||||
|
||||
3. Configure the team's Langfuse project:
|
||||
|
||||
```bash
|
||||
just litellm::set-team-langfuse-project
|
||||
```
|
||||
|
||||
This will prompt for team selection and Langfuse API keys.
|
||||
|
||||
4. Generate a key for the team:
|
||||
|
||||
```bash
|
||||
just litellm::generate-team-key
|
||||
```
|
||||
|
||||
5. Use the team key for API calls - traces will be sent to the team's Langfuse project
|
||||
|
||||
#### Architecture
|
||||
|
||||
```plain
|
||||
LiteLLM Proxy
|
||||
|
|
||||
+-- Default Langfuse Project (for keys without team)
|
||||
|
|
||||
+-- Team A --> Langfuse Project A
|
||||
|
|
||||
+-- Team B --> Langfuse Project B
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
| -------- | ------- | ----------- |
|
||||
| `LITELLM_LANGFUSE_INTEGRATION_ENABLED` | (prompt) | Enable Langfuse integration |
|
||||
| `LANGFUSE_HOST` | (prompt) | Langfuse instance hostname |
|
||||
|
||||
## Supported Providers
|
||||
|
||||
| Provider | Model Prefix | API Key Required |
|
||||
| -------- | ------------ | ---------------- |
|
||||
| Anthropic | `anthropic/` | Yes |
|
||||
| OpenAI | `openai/` | Yes |
|
||||
| Ollama | `ollama/` | No (uses `api_base`) |
|
||||
| Mistral | `mistral/` | Yes |
|
||||
| Groq | `groq/` | Yes |
|
||||
| Cohere | `cohere/` | Yes |
|
||||
| Azure OpenAI | `azure/` | Yes |
|
||||
| AWS Bedrock | `bedrock/` | Yes |
|
||||
| Google Vertex AI | `vertexai/` | Yes |
|
||||
|
||||
## Architecture
|
||||
|
||||
```plain
|
||||
External Users/Applications
|
||||
|
|
||||
Cloudflare Tunnel (HTTPS)
|
||||
|
|
||||
Traefik Ingress (HTTPS)
|
||||
|
|
||||
LiteLLM Proxy (HTTP inside cluster)
|
||||
|-- PostgreSQL (usage tracking, virtual keys)
|
||||
|-- Redis (caching, rate limiting)
|
||||
|-- External Secrets (API keys from Vault)
|
||||
|
|
||||
+-- Anthropic API
|
||||
+-- OpenAI API
|
||||
+-- Ollama (local)
|
||||
+-- Other providers...
|
||||
```
|
||||
|
||||
## Upgrade
|
||||
|
||||
After modifying `models.yaml` or updating API keys:
|
||||
|
||||
```bash
|
||||
just litellm::upgrade
|
||||
```
|
||||
|
||||
## Uninstall
|
||||
|
||||
```bash
|
||||
just litellm::uninstall
|
||||
```
|
||||
|
||||
This removes:
|
||||
|
||||
- Helm release and all Kubernetes resources
|
||||
- Namespace
|
||||
- External Secrets
|
||||
|
||||
**Note**: The following resources are NOT deleted:
|
||||
|
||||
- PostgreSQL database (use `just postgres::delete-db litellm`)
|
||||
- API keys in Vault
|
||||
|
||||
### Full Cleanup
|
||||
|
||||
To remove everything including database and Vault secrets:
|
||||
|
||||
```bash
|
||||
just litellm::cleanup
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Check Pod Status
|
||||
|
||||
```bash
|
||||
kubectl get pods -n litellm
|
||||
```
|
||||
|
||||
Expected pods:
|
||||
|
||||
- `litellm-*` - LiteLLM proxy
|
||||
- `litellm-redis-master-0` - Redis instance
|
||||
|
||||
### View Logs
|
||||
|
||||
```bash
|
||||
kubectl logs -n litellm deployment/litellm --tail=100
|
||||
```
|
||||
|
||||
### API Key Not Working
|
||||
|
||||
Verify the ExternalSecret is synced:
|
||||
|
||||
```bash
|
||||
kubectl get externalsecret -n litellm
|
||||
kubectl get secret apikey -n litellm -o yaml
|
||||
```
|
||||
|
||||
### Model Not Found
|
||||
|
||||
Ensure the model is configured in `models.yaml` and the deployment is updated:
|
||||
|
||||
```bash
|
||||
just litellm::list-models
|
||||
just litellm::upgrade
|
||||
```
|
||||
|
||||
### Provider API Errors
|
||||
|
||||
Check if the API key is set correctly:
|
||||
|
||||
```bash
|
||||
just litellm::get-api-key anthropic
|
||||
```
|
||||
|
||||
If empty, set the API key:
|
||||
|
||||
```bash
|
||||
just litellm::set-api-key anthropic
|
||||
```
|
||||
|
||||
### Database Connection Issues
|
||||
|
||||
Check PostgreSQL connectivity:
|
||||
|
||||
```bash
|
||||
kubectl exec -n litellm deployment/litellm -- \
|
||||
psql -h postgres-cluster-rw.postgres -U litellm -d litellm -c "SELECT 1"
|
||||
```
|
||||
|
||||
## Configuration Files
|
||||
|
||||
| File | Description |
|
||||
| ---- | ----------- |
|
||||
| `models.yaml` | Model definitions (user-created, gitignored) |
|
||||
| `models.example.yaml` | Example model configuration |
|
||||
| `litellm-values.gomplate.yaml` | Helm values template |
|
||||
| `apikey-external-secret.gomplate.yaml` | ExternalSecret for API keys |
|
||||
| `keycloak-auth-external-secret.gomplate.yaml` | ExternalSecret for Keycloak OIDC |
|
||||
| `langfuse-auth-external-secret.gomplate.yaml` | ExternalSecret for Langfuse API keys |
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- **Pod Security Standards**: Namespace configured with **baseline** enforcement
|
||||
(LiteLLM's Prisma requires write access to `/.cache`, which prevents `restricted` level)
|
||||
- **Secrets Management**: API keys stored in Vault, synced via External Secrets Operator
|
||||
- **Virtual Keys**: Generate scoped API keys for users instead of sharing master key
|
||||
- **TLS/HTTPS**: All external traffic encrypted via Traefik Ingress
|
||||
- **Database Credentials**: Unique PostgreSQL user with minimal privileges
|
||||
|
||||
## References
|
||||
|
||||
- [LiteLLM Documentation](https://docs.litellm.ai/)
|
||||
- [LiteLLM GitHub](https://github.com/BerriAI/litellm)
|
||||
- [LiteLLM Helm Chart](https://github.com/BerriAI/litellm/tree/main/deploy/charts/litellm-helm)
|
||||
- [Supported Models](https://docs.litellm.ai/docs/providers)
|
||||
- [Virtual Keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
|
||||
- [Langfuse Integration](https://docs.litellm.ai/docs/proxy/logging#langfuse)
|
||||
- [Team-based Logging](https://docs.litellm.ai/docs/proxy/team_logging)
|
||||
29
litellm/apikey-external-secret.gomplate.yaml
Normal file
29
litellm/apikey-external-secret.gomplate.yaml
Normal file
@@ -0,0 +1,29 @@
|
||||
{{- $models := (datasource "models") -}}
|
||||
{{- $providerMap := dict -}}
|
||||
{{- range $models -}}
|
||||
{{- if has .litellm_params "api_key" -}}
|
||||
{{- $parts := strings.Split "/" .litellm_params.model -}}
|
||||
{{- $provider := index $parts 0 -}}
|
||||
{{- $providerMap = merge $providerMap (dict $provider true) -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: apikey-external-secret
|
||||
namespace: {{ .Env.LITELLM_NAMESPACE }}
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
name: vault-secret-store
|
||||
kind: ClusterSecretStore
|
||||
target:
|
||||
name: apikey
|
||||
creationPolicy: Owner
|
||||
data:
|
||||
{{- range $provider, $_ := $providerMap }}
|
||||
- secretKey: {{ $provider | strings.ToUpper }}_API_KEY
|
||||
remoteRef:
|
||||
key: litellm/{{ $provider }}
|
||||
property: apikey
|
||||
{{- end }}
|
||||
1074
litellm/justfile
Normal file
1074
litellm/justfile
Normal file
File diff suppressed because it is too large
Load Diff
22
litellm/keycloak-auth-external-secret.gomplate.yaml
Normal file
22
litellm/keycloak-auth-external-secret.gomplate.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: keycloak-auth-external-secret
|
||||
namespace: {{ .Env.LITELLM_NAMESPACE }}
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
name: vault-secret-store
|
||||
kind: ClusterSecretStore
|
||||
target:
|
||||
name: keycloak-auth
|
||||
creationPolicy: Owner
|
||||
data:
|
||||
- secretKey: GENERIC_CLIENT_ID
|
||||
remoteRef:
|
||||
key: keycloak/client/litellm
|
||||
property: client_id
|
||||
- secretKey: GENERIC_CLIENT_SECRET
|
||||
remoteRef:
|
||||
key: keycloak/client/litellm
|
||||
property: client_secret
|
||||
22
litellm/langfuse-auth-external-secret.gomplate.yaml
Normal file
22
litellm/langfuse-auth-external-secret.gomplate.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: langfuse-auth-external-secret
|
||||
namespace: {{ .Env.LITELLM_NAMESPACE }}
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
name: vault-secret-store
|
||||
kind: ClusterSecretStore
|
||||
target:
|
||||
name: langfuse-auth
|
||||
creationPolicy: Owner
|
||||
data:
|
||||
- secretKey: LANGFUSE_PUBLIC_KEY
|
||||
remoteRef:
|
||||
key: litellm/langfuse
|
||||
property: public_key
|
||||
- secretKey: LANGFUSE_SECRET_KEY
|
||||
remoteRef:
|
||||
key: litellm/langfuse
|
||||
property: secret_key
|
||||
114
litellm/litellm-values.gomplate.yaml
Normal file
114
litellm/litellm-values.gomplate.yaml
Normal file
@@ -0,0 +1,114 @@
|
||||
# https://github.com/BerriAI/litellm/tree/main/deploy/charts/litellm-helm
|
||||
# https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
|
||||
|
||||
masterkeySecretName: ""
|
||||
masterkeySecretKey: ""
|
||||
|
||||
# Note: LiteLLM image requires write access to /.cache for Prisma
|
||||
# Pod Security Standards must be set to "baseline" for this namespace
|
||||
podSecurityContext: {}
|
||||
|
||||
securityContext: {}
|
||||
|
||||
# Resource recommendations from Goldilocks VPA
|
||||
# litellm target: cpu=11m, memory=549Mi
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 1Gi
|
||||
|
||||
migrationJob:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
memory: 1Gi
|
||||
|
||||
environmentSecrets:
|
||||
- apikey
|
||||
{{- if .Env.LITELLM_OIDC_ENABLED }}
|
||||
- keycloak-auth
|
||||
{{- end }}
|
||||
{{- if .Env.LITELLM_LANGFUSE_INTEGRATION_ENABLED }}
|
||||
- langfuse-auth
|
||||
{{- end }}
|
||||
|
||||
extraEnvVars:
|
||||
{{- if .Env.LITELLM_OIDC_ENABLED }}
|
||||
- name: PROXY_BASE_URL
|
||||
value: "https://{{ .Env.LITELLM_HOST }}"
|
||||
- name: GENERIC_AUTHORIZATION_ENDPOINT
|
||||
value: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/auth"
|
||||
- name: GENERIC_TOKEN_ENDPOINT
|
||||
value: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token"
|
||||
- name: GENERIC_USERINFO_ENDPOINT
|
||||
value: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/userinfo"
|
||||
- name: GENERIC_SCOPE
|
||||
value: "openid email profile"
|
||||
{{- end }}
|
||||
{{- if .Env.LITELLM_LANGFUSE_INTEGRATION_ENABLED }}
|
||||
- name: LANGFUSE_HOST
|
||||
value: "https://{{ .Env.LANGFUSE_HOST }}"
|
||||
{{- end }}
|
||||
|
||||
proxy_config:
|
||||
model_list:
|
||||
{{ file.Read "models.yaml" | indent 4 }}
|
||||
{{- if .Env.LITELLM_LANGFUSE_INTEGRATION_ENABLED }}
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse"]
|
||||
failure_callback: ["langfuse"]
|
||||
{{- end }}
|
||||
|
||||
db:
|
||||
useExisting: true
|
||||
|
||||
endpoint: postgres-cluster-rw.postgres
|
||||
database: litellm
|
||||
secret:
|
||||
name: postgres-auth
|
||||
usernameKey: username
|
||||
passwordKey: password
|
||||
|
||||
deployStandalone: false
|
||||
|
||||
redis:
|
||||
enabled: true
|
||||
# Resource recommendations from Goldilocks VPA
|
||||
# redis target: cpu=15m, memory=100Mi
|
||||
master:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: traefik
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: traefik
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
hosts:
|
||||
- host: {{ .Env.LITELLM_HOST }}
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Env.LITELLM_HOST }}
|
||||
|
||||
{{- if .Env.MONITORING_ENABLED }}
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
labels:
|
||||
release: kube-prometheus-stack
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
{{- end }}
|
||||
106
litellm/models.example.yaml
Normal file
106
litellm/models.example.yaml
Normal file
@@ -0,0 +1,106 @@
|
||||
# LiteLLM Model Configuration
|
||||
# Copy this file to models.yaml and customize for your environment.
|
||||
#
|
||||
# Usage:
|
||||
# cp litellm/models.example.yaml litellm/models.yaml
|
||||
# # Edit models.yaml to add/remove models
|
||||
# just litellm::install
|
||||
#
|
||||
# API keys are stored in Vault and injected as environment variables.
|
||||
# Use: just litellm::set-api-key provider=<provider>
|
||||
#
|
||||
# Supported providers:
|
||||
# - anthropic: Claude models (Opus, Sonnet, Haiku)
|
||||
# - openai: GPT and o-series models
|
||||
# - ollama: Local models (no API key required)
|
||||
# - azure: Azure OpenAI
|
||||
# - bedrock: AWS Bedrock
|
||||
# - vertexai: Google Vertex AI
|
||||
# - mistral: Mistral AI
|
||||
# - groq: Groq (fast inference)
|
||||
# - cohere: Cohere
|
||||
|
||||
# Anthropic Claude (https://docs.anthropic.com/en/docs/about-claude/models/overview)
|
||||
- model_name: claude-sonnet
|
||||
litellm_params:
|
||||
model: anthropic/claude-sonnet-4-20250514
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
|
||||
- model_name: claude-haiku
|
||||
litellm_params:
|
||||
model: anthropic/claude-haiku-4-20251015
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
|
||||
# - model_name: claude-opus
|
||||
# litellm_params:
|
||||
# model: anthropic/claude-opus-4-20250514
|
||||
# api_key: os.environ/ANTHROPIC_API_KEY
|
||||
|
||||
# OpenAI (https://platform.openai.com/docs/models)
|
||||
# - model_name: gpt-4o
|
||||
# litellm_params:
|
||||
# model: openai/gpt-4o
|
||||
# api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
# - model_name: gpt-4o-mini
|
||||
# litellm_params:
|
||||
# model: openai/gpt-4o-mini
|
||||
# api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
# - model_name: o3
|
||||
# litellm_params:
|
||||
# model: openai/o3
|
||||
# api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
# - model_name: o4-mini
|
||||
# litellm_params:
|
||||
# model: openai/o4-mini
|
||||
# api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
# Ollama (local models - no API key required)
|
||||
# - model_name: llama4-scout
|
||||
# litellm_params:
|
||||
# model: ollama/llama4:scout
|
||||
# api_base: http://ollama.ollama:11434
|
||||
|
||||
# - model_name: qwen3
|
||||
# litellm_params:
|
||||
# model: ollama/qwen3:8b
|
||||
# api_base: http://ollama.ollama:11434
|
||||
|
||||
# - model_name: deepseek-r1
|
||||
# litellm_params:
|
||||
# model: ollama/deepseek-r1:8b
|
||||
# api_base: http://ollama.ollama:11434
|
||||
|
||||
# Mistral AI (https://docs.mistral.ai/getting-started/models/models_overview/)
|
||||
# - model_name: mistral-large
|
||||
# litellm_params:
|
||||
# model: mistral/mistral-large-latest
|
||||
# api_key: os.environ/MISTRAL_API_KEY
|
||||
|
||||
# - model_name: ministral-8b
|
||||
# litellm_params:
|
||||
# model: mistral/ministral-8b-latest
|
||||
# api_key: os.environ/MISTRAL_API_KEY
|
||||
|
||||
# - model_name: codestral
|
||||
# litellm_params:
|
||||
# model: mistral/codestral-latest
|
||||
# api_key: os.environ/MISTRAL_API_KEY
|
||||
|
||||
# Groq (fast inference - https://console.groq.com/docs/models)
|
||||
# - model_name: groq-llama4-scout
|
||||
# litellm_params:
|
||||
# model: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||
# api_key: os.environ/GROQ_API_KEY
|
||||
|
||||
# - model_name: groq-llama3.3
|
||||
# litellm_params:
|
||||
# model: groq/llama-3.3-70b-versatile
|
||||
# api_key: os.environ/GROQ_API_KEY
|
||||
|
||||
# - model_name: groq-llama3.1
|
||||
# litellm_params:
|
||||
# model: groq/llama-3.1-8b-instant
|
||||
# api_key: os.environ/GROQ_API_KEY
|
||||
113
nats/justfile
Normal file
113
nats/justfile
Normal file
@@ -0,0 +1,113 @@
|
||||
set fallback := true
|
||||
|
||||
export NATS_NAMESPACE := env("NATS_NAMESPACE", "nats")
|
||||
export NATS_CHART_VERSION := env("NATS_CHART_VERSION", "2.12.2")
|
||||
export NATS_REPLICAS := env("NATS_REPLICAS", "1")
|
||||
export NATS_JETSTREAM_ENABLED := env("NATS_JETSTREAM_ENABLED", "true")
|
||||
export NATS_JETSTREAM_STORAGE_SIZE := env("NATS_JETSTREAM_STORAGE_SIZE", "10Gi")
|
||||
export NATS_JETSTREAM_MEMORY_SIZE := env("NATS_JETSTREAM_MEMORY_SIZE", "256Mi")
|
||||
|
||||
[private]
|
||||
default:
|
||||
@just --list --unsorted --list-submodules
|
||||
|
||||
# Add Helm repository
|
||||
add-helm-repo:
|
||||
helm repo add nats https://nats-io.github.io/k8s/helm/charts/
|
||||
helm repo update nats
|
||||
|
||||
# Remove Helm repository
|
||||
remove-helm-repo:
|
||||
helm repo remove nats
|
||||
|
||||
# Create NATS namespace
|
||||
create-namespace:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
if ! kubectl get namespace ${NATS_NAMESPACE} &>/dev/null; then
|
||||
kubectl create namespace ${NATS_NAMESPACE}
|
||||
fi
|
||||
kubectl label namespace ${NATS_NAMESPACE} \
|
||||
pod-security.kubernetes.io/enforce=restricted \
|
||||
pod-security.kubernetes.io/enforce-version=latest \
|
||||
pod-security.kubernetes.io/warn=restricted \
|
||||
pod-security.kubernetes.io/warn-version=latest \
|
||||
--overwrite
|
||||
|
||||
# Delete NATS namespace
|
||||
delete-namespace:
|
||||
kubectl delete namespace ${NATS_NAMESPACE} --ignore-not-found
|
||||
|
||||
# Install NATS
|
||||
install:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
just create-namespace
|
||||
just add-helm-repo
|
||||
|
||||
gomplate -f values.gomplate.yaml -o values.yaml
|
||||
helm upgrade --install nats nats/nats \
|
||||
--version ${NATS_CHART_VERSION} \
|
||||
-n ${NATS_NAMESPACE} \
|
||||
-f values.yaml \
|
||||
--wait
|
||||
|
||||
echo ""
|
||||
echo "NATS installed successfully"
|
||||
echo "Namespace: ${NATS_NAMESPACE}"
|
||||
echo "Replicas: ${NATS_REPLICAS}"
|
||||
echo "JetStream enabled: ${NATS_JETSTREAM_ENABLED}"
|
||||
echo ""
|
||||
echo "Internal URL: nats://nats.${NATS_NAMESPACE}.svc:4222"
|
||||
|
||||
# Upgrade NATS
|
||||
upgrade:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
gomplate -f values.gomplate.yaml -o values.yaml
|
||||
helm upgrade nats nats/nats \
|
||||
--version ${NATS_CHART_VERSION} \
|
||||
-n ${NATS_NAMESPACE} \
|
||||
-f values.yaml \
|
||||
--wait
|
||||
|
||||
echo "NATS upgraded successfully"
|
||||
|
||||
# Uninstall NATS
|
||||
uninstall:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
if ! gum confirm "Are you sure you want to uninstall NATS?"; then
|
||||
echo "Aborted"
|
||||
exit 0
|
||||
fi
|
||||
helm uninstall nats -n ${NATS_NAMESPACE} --wait --ignore-not-found
|
||||
just delete-namespace
|
||||
echo "NATS uninstalled"
|
||||
|
||||
# Show NATS status
|
||||
status:
|
||||
kubectl get pods -n ${NATS_NAMESPACE}
|
||||
kubectl get svc -n ${NATS_NAMESPACE}
|
||||
|
||||
# Show NATS logs
|
||||
logs:
|
||||
kubectl logs -n ${NATS_NAMESPACE} -l app.kubernetes.io/name=nats -f
|
||||
|
||||
# Show server info via monitoring endpoint
|
||||
server-info:
|
||||
kubectl exec -n ${NATS_NAMESPACE} nats-0 -c nats -- \
|
||||
wget -qO- http://localhost:8222/varz | head -50
|
||||
|
||||
# Show JetStream info via monitoring endpoint
|
||||
js-info:
|
||||
kubectl exec -n ${NATS_NAMESPACE} nats-0 -c nats -- \
|
||||
wget -qO- http://localhost:8222/jsz
|
||||
|
||||
# Port forward for local testing
|
||||
port-forward:
|
||||
@echo "NATS available at localhost:4222"
|
||||
@echo "Monitor available at http://localhost:8222"
|
||||
kubectl port-forward -n ${NATS_NAMESPACE} svc/nats 4222:4222 8222:8222
|
||||
64
nats/values.gomplate.yaml
Normal file
64
nats/values.gomplate.yaml
Normal file
@@ -0,0 +1,64 @@
|
||||
config:
|
||||
cluster:
|
||||
enabled: {{ if gt (conv.ToInt .Env.NATS_REPLICAS) 1 }}true{{ else }}false{{ end }}
|
||||
replicas: {{ .Env.NATS_REPLICAS }}
|
||||
|
||||
jetstream:
|
||||
enabled: {{ .Env.NATS_JETSTREAM_ENABLED }}
|
||||
fileStore:
|
||||
enabled: true
|
||||
dir: /data
|
||||
pvc:
|
||||
enabled: true
|
||||
size: {{ .Env.NATS_JETSTREAM_STORAGE_SIZE }}
|
||||
memoryStore:
|
||||
enabled: true
|
||||
maxSize: {{ .Env.NATS_JETSTREAM_MEMORY_SIZE }}
|
||||
|
||||
monitor:
|
||||
enabled: true
|
||||
port: 8222
|
||||
|
||||
container:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
merge:
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsNonRoot: true
|
||||
allowPrivilegeEscalation: false
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
reloader:
|
||||
enabled: true
|
||||
merge:
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsNonRoot: true
|
||||
allowPrivilegeEscalation: false
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
podTemplate:
|
||||
merge:
|
||||
spec:
|
||||
securityContext:
|
||||
fsGroup: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
natsBox:
|
||||
enabled: false
|
||||
64
nats/values.yaml
Normal file
64
nats/values.yaml
Normal file
@@ -0,0 +1,64 @@
|
||||
config:
|
||||
cluster:
|
||||
enabled: false
|
||||
replicas: 1
|
||||
|
||||
jetstream:
|
||||
enabled: true
|
||||
fileStore:
|
||||
enabled: true
|
||||
dir: /data
|
||||
pvc:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
memoryStore:
|
||||
enabled: true
|
||||
maxSize: 256Mi
|
||||
|
||||
monitor:
|
||||
enabled: true
|
||||
port: 8222
|
||||
|
||||
container:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
merge:
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsNonRoot: true
|
||||
allowPrivilegeEscalation: false
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
reloader:
|
||||
enabled: true
|
||||
merge:
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsNonRoot: true
|
||||
allowPrivilegeEscalation: false
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
podTemplate:
|
||||
merge:
|
||||
spec:
|
||||
securityContext:
|
||||
fsGroup: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
natsBox:
|
||||
enabled: false
|
||||
@@ -8,6 +8,7 @@ export OLLAMA_GPU_TYPE := env("OLLAMA_GPU_TYPE", "nvidia")
|
||||
export OLLAMA_GPU_COUNT := env("OLLAMA_GPU_COUNT", "1")
|
||||
export OLLAMA_MODELS := env("OLLAMA_MODELS", "")
|
||||
export OLLAMA_STORAGE_SIZE := env("OLLAMA_STORAGE_SIZE", "30Gi")
|
||||
export OLLAMA_HELM_TIMEOUT := env("OLLAMA_HELM_TIMEOUT", "10m")
|
||||
|
||||
[private]
|
||||
default:
|
||||
@@ -66,7 +67,8 @@ install:
|
||||
|
||||
gomplate -f values.gomplate.yaml -o values.yaml
|
||||
helm upgrade --install ollama ollama/ollama \
|
||||
--version ${OLLAMA_CHART_VERSION} -n ${OLLAMA_NAMESPACE} --wait \
|
||||
--version ${OLLAMA_CHART_VERSION} -n ${OLLAMA_NAMESPACE} \
|
||||
--wait --timeout ${OLLAMA_HELM_TIMEOUT} \
|
||||
-f values.yaml
|
||||
|
||||
echo ""
|
||||
@@ -97,7 +99,8 @@ upgrade:
|
||||
|
||||
gomplate -f values.gomplate.yaml -o values.yaml
|
||||
helm upgrade ollama ollama/ollama \
|
||||
--version ${OLLAMA_CHART_VERSION} -n ${OLLAMA_NAMESPACE} --wait \
|
||||
--version ${OLLAMA_CHART_VERSION} -n ${OLLAMA_NAMESPACE} \
|
||||
--wait --timeout ${OLLAMA_HELM_TIMEOUT} \
|
||||
-f values.yaml
|
||||
|
||||
echo "Ollama upgraded successfully"
|
||||
|
||||
Reference in New Issue
Block a user