# https://raw.githubusercontent.com/Altinity/clickhouse-operator/refs/heads/master/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node.yaml # Setup Service to provide access to Zookeeper for clients apiVersion: v1 kind: Service metadata: # DNS would be like zookeeper.zoons name: zookeeper labels: app: zookeeper spec: ports: - port: 2181 name: client - port: 7000 name: prometheus selector: app: zookeeper what: node --- # Setup Headless Service for StatefulSet apiVersion: v1 kind: Service metadata: # DNS would be like zookeeper-0.zookeepers.etc name: zookeepers labels: app: zookeeper spec: ports: - port: 2888 name: server - port: 3888 name: leader-election clusterIP: None selector: app: zookeeper what: node --- # Setup max number of unavailable pods in StatefulSet apiVersion: policy/v1 kind: PodDisruptionBudget metadata: name: zookeeper-pod-disruption-budget spec: selector: matchLabels: app: zookeeper maxUnavailable: 1 --- # Setup Zookeeper StatefulSet # Possible params: # 1. replicas # 2. memory # 3. cpu # 4. storage # 5. storageClassName # 6. user to run app apiVersion: apps/v1 kind: StatefulSet metadata: # nodes would be named as zookeeper-0, zookeeper-1, zookeeper-2 name: zookeeper labels: app: zookeeper spec: selector: matchLabels: app: zookeeper serviceName: zookeepers replicas: 1 updateStrategy: type: RollingUpdate podManagementPolicy: OrderedReady template: metadata: labels: app: zookeeper what: node annotations: prometheus.io/port: '7000' prometheus.io/scrape: 'true' spec: affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: matchExpressions: - key: "app" operator: In values: - zookeeper # TODO think about multi-AZ EKS # topologyKey: topology.kubernetes.io/zone topologyKey: "kubernetes.io/hostname" containers: - name: kubernetes-zookeeper imagePullPolicy: IfNotPresent image: "docker.io/zookeeper:3.8.4" resources: requests: memory: "512M" cpu: "1" limits: memory: "4Gi" cpu: "2" ports: - containerPort: 2181 name: client - containerPort: 2888 name: server - containerPort: 3888 name: leader-election - containerPort: 7000 name: prometheus env: - name: SERVERS value: "1" securityContext: allowPrivilegeEscalation: false runAsNonRoot: true capabilities: drop: [ALL] seccompProfile: type: RuntimeDefault # See those links for proper startup settings: # https://github.com/kow3ns/kubernetes-zookeeper/blob/master/docker/scripts/start-zookeeper # https://clickhouse.yandex/docs/en/operations/tips/#zookeeper # https://github.com/ClickHouse/ClickHouse/issues/11781 command: - bash - -x - -c - | HOST=`hostname -s` && DOMAIN=`hostname -d` && CLIENT_PORT=2181 && SERVER_PORT=2888 && ELECTION_PORT=3888 && PROMETHEUS_PORT=7000 && ZOO_DATA_DIR=/var/lib/zookeeper/data && ZOO_DATA_LOG_DIR=/var/lib/zookeeper/datalog && { echo "clientPort=${CLIENT_PORT}" echo 'tickTime=2000' echo 'initLimit=300' echo 'syncLimit=10' echo 'maxClientCnxns=2000' echo 'maxTimeToWaitForEpoch=2000' echo 'maxSessionTimeout=60000000' echo "dataDir=${ZOO_DATA_DIR}" echo "dataLogDir=${ZOO_DATA_LOG_DIR}" echo 'autopurge.snapRetainCount=10' echo 'autopurge.purgeInterval=1' echo 'preAllocSize=131072' echo 'snapCount=3000000' echo 'leaderServes=yes' echo 'standaloneEnabled=false' echo '4lw.commands.whitelist=*' echo 'metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider' echo "metricsProvider.httpPort=${PROMETHEUS_PORT}" echo "skipACL=true" echo "fastleader.maxNotificationInterval=10000" } > /conf/zoo.cfg && { echo "zookeeper.root.logger=CONSOLE" echo "zookeeper.console.threshold=INFO" echo "log4j.rootLogger=\${zookeeper.root.logger}" echo "log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender" echo "log4j.appender.CONSOLE.Threshold=\${zookeeper.console.threshold}" echo "log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout" echo "log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n" } > /conf/log4j.properties && echo 'JVMFLAGS="-Xms128M -Xmx4G -XX:ActiveProcessorCount=8 -XX:+AlwaysPreTouch -Djute.maxbuffer=8388608 -XX:MaxGCPauseMillis=50"' > /conf/java.env && if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then NAME=${BASH_REMATCH[1]} && ORD=${BASH_REMATCH[2]}; else echo "Failed to parse name and ordinal of Pod" && exit 1; fi && mkdir -pv ${ZOO_DATA_DIR} && mkdir -pv ${ZOO_DATA_LOG_DIR} && export MY_ID=$((ORD+1)) && echo $MY_ID > $ZOO_DATA_DIR/myid && for (( i=1; i<=$SERVERS; i++ )); do echo "server.$i=$NAME-$((i-1)).$DOMAIN:$SERVER_PORT:$ELECTION_PORT" >> /conf/zoo.cfg; done && if [[ $SERVERS -eq 1 ]]; then echo "group.1=1" >> /conf/zoo.cfg; else echo "group.1=1:2:3" >> /conf/zoo.cfg; fi && for (( i=1; i<=$SERVERS; i++ )); do WEIGHT=1 if [[ $i == 1 ]]; then WEIGHT=10 fi echo "weight.$i=$WEIGHT" >> /conf/zoo.cfg; done && zkServer.sh start-foreground readinessProbe: exec: command: - bash - -c - ' IFS=; MNTR=$(exec 3<>/dev/tcp/127.0.0.1/2181 ; printf "mntr" >&3 ; tee <&3; exec 3<&- ;); while [[ "$MNTR" == "This ZooKeeper instance is not currently serving requests" ]]; do echo "wait mntr works"; sleep 1; MNTR=$(exec 3<>/dev/tcp/127.0.0.1/2181 ; printf "mntr" >&3 ; tee <&3; exec 3<&- ;); done; STATE=$(echo -e $MNTR | grep zk_server_state | cut -d " " -f 2); if [[ "$STATE" =~ "leader" ]]; then echo "check leader state"; SYNCED_FOLLOWERS=$(echo -e $MNTR | grep zk_synced_followers | awk -F"[[:space:]]+" "{print \$2}" | cut -d "." -f 1); if [[ "$SYNCED_FOLLOWERS" != "0" ]]; then ./bin/zkCli.sh ls /; exit $?; else exit 0; fi; elif [[ "$STATE" =~ "follower" ]]; then echo "check follower state"; PEER_STATE=$(echo -e $MNTR | grep zk_peer_state); if [[ "$PEER_STATE" =~ "following - broadcast" ]]; then ./bin/zkCli.sh ls /; exit $?; else exit 1; fi; else exit 1; fi ' initialDelaySeconds: 15 periodSeconds: 10 timeoutSeconds: 60 livenessProbe: exec: command: - bash - -xc - 'date && OK=$(exec 3<>/dev/tcp/127.0.0.1/2181 ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;); if [[ "$OK" == "imok" ]]; then exit 0; else exit 1; fi' initialDelaySeconds: 10 periodSeconds: 30 timeoutSeconds: 5 volumeMounts: - name: datadir-volume mountPath: /var/lib/zookeeper securityContext: runAsUser: 1000 runAsGroup: 1000 fsGroup: 1000 volumeClaimTemplates: - metadata: name: datadir-volume spec: accessModes: - ReadWriteOnce resources: requests: storage: 25Gi