feat(clickhouse): enable Prometheus monitoring

This commit is contained in:
Masaki Yatsu
2025-12-04 11:34:22 +09:00
parent 7dc732268e
commit d9ee90c32c
6 changed files with 148 additions and 0 deletions

View File

@@ -2,4 +2,5 @@ clickhouse-credentials-external-secret.yaml
clickhouse-ingress.yaml clickhouse-ingress.yaml
clickhouse-installation-template.yaml clickhouse-installation-template.yaml
clickhouse-operator-values.yaml clickhouse-operator-values.yaml
clickhouse-servicemonitor.yaml
clickhouse.yaml clickhouse.yaml

View File

@@ -44,3 +44,31 @@ ClickHouse can use the following Linux capabilities for enhanced performance, bu
| `SYS_NICE` | Thread priority control via `os_thread_priority` | Setting has no effect | | `SYS_NICE` | Thread priority control via `os_thread_priority` | Setting has no effect |
These capabilities are disabled by default to comply with baseline Pod Security Standards. To enable them, the namespace must allow privileged pods, and you need to uncomment the `add` line in `clickhouse-installation-template.yaml`. These capabilities are disabled by default to comply with baseline Pod Security Standards. To enable them, the namespace must allow privileged pods, and you need to uncomment the `add` line in `clickhouse-installation-template.yaml`.
## Monitoring
ClickHouse exposes Prometheus metrics on port 9363. When Prometheus (kube-prometheus-stack) is installed, monitoring can be enabled during installation or manually.
### Enable Monitoring
```bash
just clickhouse::setup-monitoring
```
This creates a ServiceMonitor and a metrics Service for Prometheus to scrape.
### Grafana Dashboard
Import the ClickHouse dashboard from Grafana.com:
1. Open Grafana → **Dashboards****New****Import**
2. Enter Dashboard ID: `14192`
3. Click **Load**, select **Prometheus** data source, then **Import**
The dashboard includes panels for memory, connections, queries, I/O, replication, merge operations, cache, and ZooKeeper metrics.
### Remove Monitoring
```bash
just clickhouse::remove-monitoring
```

View File

@@ -20,6 +20,10 @@ spec:
containers: containers:
- name: clickhouse - name: clickhouse
image: {{ .Env.CLICKHOUSE_IMAGE }} image: {{ .Env.CLICKHOUSE_IMAGE }}
ports:
- name: prometheus
containerPort: 9363
protocol: TCP
resources: resources:
requests: requests:
cpu: {{ .Env.CLICKHOUSE_CPU_REQUEST }} cpu: {{ .Env.CLICKHOUSE_CPU_REQUEST }}

View File

@@ -0,0 +1,67 @@
{{- if .Env.MONITORING_ENABLED }}
---
apiVersion: v1
kind: Service
metadata:
name: clickhouse-metrics
namespace: {{ .Env.CLICKHOUSE_NAMESPACE }}
labels:
app: clickhouse
clickhouse.altinity.com/chi: clickhouse
spec:
type: ClusterIP
ports:
- name: prometheus
port: 9363
targetPort: 9363
protocol: TCP
selector:
clickhouse.altinity.com/chi: clickhouse
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: clickhouse
namespace: {{ .Env.CLICKHOUSE_NAMESPACE }}
labels:
app: clickhouse
release: kube-prometheus-stack
spec:
selector:
matchLabels:
app: clickhouse
clickhouse.altinity.com/chi: clickhouse
namespaceSelector:
matchNames:
- {{ .Env.CLICKHOUSE_NAMESPACE }}
endpoints:
- port: prometheus
path: /metrics
interval: 30s
scrapeTimeout: 10s
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: clickhouse-operator
namespace: {{ .Env.CLICKHOUSE_NAMESPACE }}
labels:
app: clickhouse-operator
release: kube-prometheus-stack
spec:
selector:
matchLabels:
app.kubernetes.io/name: altinity-clickhouse-operator
namespaceSelector:
matchNames:
- {{ .Env.CLICKHOUSE_NAMESPACE }}
endpoints:
- port: ch-metrics
path: /metrics
interval: 30s
scrapeTimeout: 10s
- port: op-metrics
path: /metrics
interval: 30s
scrapeTimeout: 10s
{{- end }}

View File

@@ -29,6 +29,17 @@ spec:
<schema_type>transposed</schema_type> <schema_type>transposed</schema_type>
</asynchronous_metric_log> </asynchronous_metric_log>
</clickhouse> </clickhouse>
# Enable Prometheus metrics endpoint
prometheus.xml: |
<clickhouse>
<prometheus>
<endpoint>/metrics</endpoint>
<port>9363</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</prometheus>
</clickhouse>
users: users:
admin/k8s_secret_password: clickhouse-credentials/admin admin/k8s_secret_password: clickhouse-credentials/admin
admin/networks/ip: "::/0" admin/networks/ip: "::/0"

View File

@@ -5,6 +5,8 @@ export CLICKHOUSE_HOST := env("CLICKHOUSE_HOST", "")
export CLICKHOUSE_CHART_VERSION := env("CLICKHOUSE_CHART_VERSION", "0.25.5") export CLICKHOUSE_CHART_VERSION := env("CLICKHOUSE_CHART_VERSION", "0.25.5")
export CLICKHOUSE_IMAGE := env("CLICKHOUSE_IMAGE", "clickhouse/clickhouse-server:25.10") export CLICKHOUSE_IMAGE := env("CLICKHOUSE_IMAGE", "clickhouse/clickhouse-server:25.10")
export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets") export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets")
export PROMETHEUS_NAMESPACE := env("PROMETHEUS_NAMESPACE", "monitoring")
export MONITORING_ENABLED := env("MONITORING_ENABLED", "")
# ClickHouse resource settings # ClickHouse resource settings
export CLICKHOUSE_MEMORY_REQUEST := env("CLICKHOUSE_MEMORY_REQUEST", "1Gi") export CLICKHOUSE_MEMORY_REQUEST := env("CLICKHOUSE_MEMORY_REQUEST", "1Gi")
@@ -107,6 +109,16 @@ install:
--placeholder="e.g., clickhouse.example.com" --placeholder="e.g., clickhouse.example.com"
) )
done done
# Check if Prometheus is available and ask about monitoring
if helm status kube-prometheus-stack -n ${PROMETHEUS_NAMESPACE} &>/dev/null; then
if [ -z "${MONITORING_ENABLED}" ]; then
if gum confirm "Enable Prometheus monitoring?"; then
MONITORING_ENABLED="true"
else
MONITORING_ENABLED="false"
fi
fi
fi
echo "Installing ClickHouse..." echo "Installing ClickHouse..."
just create-namespace just create-namespace
just install-zookeeper just install-zookeeper
@@ -124,6 +136,10 @@ install:
kubectl wait --for=jsonpath='{.status.status}'=Completed \ kubectl wait --for=jsonpath='{.status.status}'=Completed \
clickhouseinstallation/clickhouse -n ${CLICKHOUSE_NAMESPACE} --timeout=600s clickhouseinstallation/clickhouse -n ${CLICKHOUSE_NAMESPACE} --timeout=600s
just setup-ingress ${CLICKHOUSE_HOST} just setup-ingress ${CLICKHOUSE_HOST}
# Setup monitoring if enabled
if [ "${MONITORING_ENABLED}" = "true" ]; then
just setup-monitoring
fi
echo "ClickHouse installation completed successfully" echo "ClickHouse installation completed successfully"
echo "ClickHouse API at: https://${CLICKHOUSE_HOST}" echo "ClickHouse API at: https://${CLICKHOUSE_HOST}"
@@ -137,6 +153,27 @@ setup-ingress host:
kubectl apply -n ${CLICKHOUSE_NAMESPACE} -f clickhouse-ingress.yaml kubectl apply -n ${CLICKHOUSE_NAMESPACE} -f clickhouse-ingress.yaml
echo "ClickHouse Ingress configured successfully" echo "ClickHouse Ingress configured successfully"
# Setup Prometheus monitoring for ClickHouse
setup-monitoring:
#!/bin/bash
set -euo pipefail
echo "Setting up Prometheus monitoring for ClickHouse..."
kubectl label namespace ${CLICKHOUSE_NAMESPACE} buun.channel/enable-monitoring=true --overwrite
MONITORING_ENABLED="true" gomplate -f clickhouse-servicemonitor.gomplate.yaml \
-o clickhouse-servicemonitor.yaml
kubectl apply -f clickhouse-servicemonitor.yaml
echo "Prometheus monitoring configured successfully"
# Remove Prometheus monitoring for ClickHouse
remove-monitoring:
#!/bin/bash
set -euo pipefail
echo "Removing Prometheus monitoring for ClickHouse..."
kubectl delete servicemonitor clickhouse clickhouse-operator -n ${CLICKHOUSE_NAMESPACE} --ignore-not-found
kubectl delete service clickhouse-metrics -n ${CLICKHOUSE_NAMESPACE} --ignore-not-found
kubectl label namespace ${CLICKHOUSE_NAMESPACE} buun.channel/enable-monitoring- --ignore-not-found
echo "Prometheus monitoring removed"
# Uninstall ClickHouse (delete_volumes='false' to preserve PVCs and namespace) # Uninstall ClickHouse (delete_volumes='false' to preserve PVCs and namespace)
uninstall delete-volumes='true': uninstall delete-volumes='true':
#!/bin/bash #!/bin/bash