set fallback := true export LOCAL_K8S_HOST := env("LOCAL_K8S_HOST", "") export EXTERNAL_K8S_HOST := env("EXTERNAL_K8S_HOST", "") export KEYCLOAK_HOST := env("KEYCLOAK_HOST", "") export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack") export K8S_OIDC_CLIENT_ID := env('K8S_OIDC_CLIENT_ID', "k8s") export K3S_ENABLE_REGISTRY := env("K3S_ENABLE_REGISTRY", "true") [private] default: @just --list --unsorted --list-submodules # Install k3s cluster install: #!/bin/bash set -euo pipefail just env::check username=$(gum input --prompt="SSH username: " --value="${USER}" --width=100) kubeconfig="" context="" if gum confirm "Update KUBECONFIG?"; then kubeconfig=$( gum input --prompt="KUBECONFIG file: " --value="${HOME}/.kube/config" --width=100 ) context=$( gum input --prompt="Context name: " --value="${LOCAL_K8S_HOST}" --width=100 ) fi args=( "install" "--host" "${LOCAL_K8S_HOST}" "--tls-san" "${EXTERNAL_K8S_HOST}" "--user" "${username}" ) if [ -n "${context}" ]; then args+=("--context" "${context}") fi if [ -n "${kubeconfig}" ]; then mkdir -p "$(dirname "${kubeconfig}")" args+=("--local-path" "${kubeconfig}" "--merge") fi echo "Running: k3sup ${args[*]}" k3sup "${args[@]}" if [ -n "${context}" ]; then kubectl config use-context "${context}" fi if [ "${K3S_ENABLE_REGISTRY}" = "true" ]; then echo "Setting up local Docker registry..." # Deploy Docker registry to cluster kubectl apply -f ./registry/registry.yaml # Set Pod Security Standard for registry namespace kubectl label namespace registry pod-security.kubernetes.io/enforce=restricted --overwrite # Wait for registry deployment echo "Waiting for registry to be ready..." kubectl wait --for=condition=available --timeout=60s deployment/registry -n registry # Configure registries.yaml for k3s just configure-registry echo "✓ Local Docker registry deployed and configured" echo "" echo "Registry accessible at:" echo " localhost:30500" echo "" echo "Usage:" echo " export DOCKER_HOST=ssh://${LOCAL_K8S_HOST}" echo " docker build -t localhost:30500/myapp:latest ." echo " docker push localhost:30500/myapp:latest" echo " kubectl run myapp --image=localhost:30500/myapp:latest" fi echo "k3s cluster installed on ${LOCAL_K8S_HOST}." # Uninstall k3s cluster uninstall: #!/bin/bash set -euo pipefail if gum confirm "Uninstall k3s from ${LOCAL_K8S_HOST}?"; then # Check if Longhorn is installed and uninstall it first if helm status longhorn -n longhorn-system &>/dev/null; then echo "Detected Longhorn installation. Uninstalling Longhorn first to prevent CSI mount issues..." just longhorn::uninstall || echo "Warning: Longhorn uninstallation had errors, continuing..." # Wait a bit for CSI cleanup echo "Waiting for CSI cleanup..." sleep 5 fi # Force cleanup any remaining CSI mounts echo "Cleaning up CSI mounts..." ssh "${LOCAL_K8S_HOST}" "sudo pkill -9 umount || true" ssh "${LOCAL_K8S_HOST}" "sudo umount -f /var/lib/kubelet/plugins/kubernetes.io/csi/*/globalmount 2>/dev/null || true" ssh "${LOCAL_K8S_HOST}" "sudo umount -l /var/lib/kubelet/plugins/kubernetes.io/csi/*/globalmount 2>/dev/null || true" ssh "${LOCAL_K8S_HOST}" "/usr/local/bin/k3s-uninstall.sh" echo "Cleaning up kubeconfig entries..." cluster_name=$(kubectl config view -o json | jq -r ".contexts[] | select(.name == \"${LOCAL_K8S_HOST}\") | .context.cluster // empty") user_name=$(kubectl config view -o json | jq -r ".contexts[] | select(.name == \"${LOCAL_K8S_HOST}\") | .context.user // empty") if kubectl config get-contexts "${LOCAL_K8S_HOST}" &>/dev/null; then kubectl config delete-context "${LOCAL_K8S_HOST}" echo "Deleted context: ${LOCAL_K8S_HOST}" fi if [ -n "${cluster_name}" ] && kubectl config get-clusters | grep -q "^${cluster_name}$"; then kubectl config delete-cluster "${cluster_name}" echo "Deleted cluster: ${cluster_name}" fi if [ -n "${user_name}" ] && kubectl config get-users | grep -q "^${user_name}$"; then kubectl config delete-user "${user_name}" echo "Deleted user: ${user_name}" fi echo "k3s cluster uninstalled from ${LOCAL_K8S_HOST}." else echo "Uninstallation cancelled." >&2 exit 1 fi # Stop k3s cluster gracefully (with volume detachment and CSI cleanup) stop: #!/bin/bash set -euo pipefail START_TIME=$(date +%s) elapsed() { echo "$(($(date +%s) - START_TIME))s" } echo "Starting graceful k3s shutdown..." # Get node name NODE_NAME=$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) if [ -z "$NODE_NAME" ]; then echo "⚠ Could not get node name, k3s may already be stopped" echo "Running k3s-killall.sh for cleanup..." ssh "${LOCAL_K8S_HOST}" "sudo /usr/local/bin/k3s-killall.sh 2>/dev/null || true" echo "✓ Cleanup completed ($(elapsed))" exit 0 fi echo "Node: $NODE_NAME" # Drain the node to gracefully evict all pods and detach Longhorn volumes # This is the recommended way to shutdown with Longhorn (see: https://github.com/longhorn/longhorn/issues/7206) echo "[$(elapsed)] Draining node to gracefully detach Longhorn volumes..." kubectl drain "$NODE_NAME" \ --ignore-daemonsets \ --delete-emptydir-data \ --force \ --grace-period=30 \ --timeout=90s 2>&1 || { echo "⚠ Drain had warnings (this is usually OK for single-node clusters)" } echo "[$(elapsed)] Drain completed" # Wait for Longhorn volumes to be fully detached if helm status longhorn -n longhorn-system &>/dev/null; then echo "[$(elapsed)] Waiting for Longhorn volumes to be detached..." TIMEOUT=30 ELAPSED=0 while [ $ELAPSED -lt $TIMEOUT ]; do ATTACHED=$(kubectl get volumes.longhorn.io -n longhorn-system -o json 2>/dev/null | \ jq -r '.items[] | select(.status.state == "attached") | .metadata.name' 2>/dev/null || true) if [ -z "$ATTACHED" ]; then echo "[$(elapsed)] ✓ All Longhorn volumes detached successfully" break fi ATTACHED_COUNT=$(echo "$ATTACHED" | grep -c . || echo 0) echo " Still waiting for $ATTACHED_COUNT volume(s) to detach..." sleep 2 ELAPSED=$((ELAPSED + 2)) done if [ $ELAPSED -ge $TIMEOUT ]; then echo "[$(elapsed)] ⚠ Warning: Timeout waiting for volumes to detach" fi fi # Stop and disable k3s service to prevent auto-start on reboot echo "[$(elapsed)] Stopping and disabling k3s service..." ssh "${LOCAL_K8S_HOST}" "sudo systemctl stop k3s 2>/dev/null || true" ssh "${LOCAL_K8S_HOST}" "sudo systemctl disable k3s 2>/dev/null || true" # Run k3s-killall.sh to clean up all container processes echo "[$(elapsed)] Running k3s-killall.sh to stop all container processes..." ssh "${LOCAL_K8S_HOST}" 'bash -s' << 'EOF' set +e if [ -x /usr/local/bin/k3s-killall.sh ]; then echo " Executing /usr/local/bin/k3s-killall.sh..." timeout 180 sudo /usr/local/bin/k3s-killall.sh || { echo " k3s-killall.sh timed out, forcing cleanup..." # Use pgrep/kill instead of pkill -f to avoid matching ourselves for pid in $(pgrep -x k3s 2>/dev/null); do sudo kill -9 "$pid" 2>/dev/null || true done sudo pkill -9 -x containerd-shim-runc-v2 2>/dev/null || true sudo pkill -9 -x containerd 2>/dev/null || true } else echo " k3s-killall.sh not found, stopping manually..." sudo systemctl stop k3s 2>/dev/null || true for pid in $(pgrep -x k3s 2>/dev/null); do sudo kill -9 "$pid" 2>/dev/null || true done sudo pkill -9 -x containerd-shim-runc-v2 2>/dev/null || true fi exit 0 EOF # Wait for containerd-shim processes to terminate echo "[$(elapsed)] Waiting for containerd-shim processes to terminate..." SHIM_TIMEOUT=15 SHIM_ELAPSED=0 while [ $SHIM_ELAPSED -lt $SHIM_TIMEOUT ]; do SHIM_COUNT=$(ssh "${LOCAL_K8S_HOST}" "pgrep containerd-shim 2>/dev/null | wc -l | tr -d ' '") SHIM_COUNT=${SHIM_COUNT:-0} if [ "$SHIM_COUNT" -eq 0 ] 2>/dev/null; then echo "[$(elapsed)] ✓ All containerd-shim processes terminated" break fi echo " Still waiting for $SHIM_COUNT containerd-shim process(es)..." sleep 2 SHIM_ELAPSED=$((SHIM_ELAPSED + 2)) done if [ $SHIM_ELAPSED -ge $SHIM_TIMEOUT ]; then echo "[$(elapsed)] ⚠ Warning: containerd-shim processes did not terminate within timeout" echo " Forcing termination..." ssh "${LOCAL_K8S_HOST}" "sudo pkill -9 containerd-shim 2>/dev/null || true" sleep 1 fi echo "" echo "✓ k3s stopped gracefully on ${LOCAL_K8S_HOST}. (Total: $(elapsed))" echo "You can now safely shutdown the machine." echo "" echo "IMPORTANT: k3s has been disabled and will NOT start automatically on reboot." echo "After reboot, you MUST manually run:" echo " just k8s::start" echo " just vault::unseal # If Vault is installed" # Start k3s cluster start: #!/bin/bash set -euo pipefail echo "Enabling and starting k3s service..." ssh "${LOCAL_K8S_HOST}" "sudo systemctl enable k3s" ssh "${LOCAL_K8S_HOST}" "sudo systemctl start k3s" echo "Waiting for k3s to be ready..." sleep 5 kubectl wait --for=condition=Ready nodes --all --timeout=60s # Uncordon the node if it was cordoned by 'just k8s::stop' NODE_NAME=$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) if [ -n "$NODE_NAME" ]; then NODE_SCHEDULABLE=$(kubectl get node "$NODE_NAME" -o jsonpath='{.spec.unschedulable}' 2>/dev/null || echo "false") if [ "$NODE_SCHEDULABLE" = "true" ]; then echo "Uncordoning node $NODE_NAME..." kubectl uncordon "$NODE_NAME" fi fi # Wait for Longhorn CSI plugin to be ready before other pods start using volumes if helm status longhorn -n longhorn &>/dev/null; then echo "Waiting for Longhorn CSI plugin to be ready..." if ! kubectl wait --for=condition=Ready pod -l app=longhorn-csi-plugin -n longhorn --timeout=120s 2>/dev/null; then echo "⚠ Longhorn CSI plugin not ready, restarting pod..." kubectl delete pod -l app=longhorn-csi-plugin -n longhorn --ignore-not-found kubectl wait --for=condition=Ready pod -l app=longhorn-csi-plugin -n longhorn --timeout=120s fi echo "✓ Longhorn CSI plugin is ready" fi echo "k3s started on ${LOCAL_K8S_HOST}." # Restart k3s cluster (with CSI cleanup) restart: #!/bin/bash set -euo pipefail just k8s::stop just k8s::start # Setup k8s OIDC authentication setup-oidc-auth: #!/bin/bash set -euo pipefail just env::check # Clear OIDC token cache if it exists if [ -d ~/.kube/cache/oidc-login ]; then if gum confirm "Clear OIDC token cache? (Recommended after Keycloak reinstall)"; then rm -rf ~/.kube/cache/oidc-login/ echo "OIDC token cache cleared." fi fi gomplate -f ./k3s/config.gomplate.yaml | \ ssh ${LOCAL_K8S_HOST} "sudo tee /etc/rancher/k3s/config.yaml > /dev/null" ssh ${LOCAL_K8S_HOST} "sudo systemctl restart k3s" kubectl config set-credentials ${LOCAL_K8S_HOST}-oidc \ --exec-api-version=client.authentication.k8s.io/v1beta1 \ --exec-command=kubectl \ --exec-arg=oidc-login \ --exec-arg=get-token \ --exec-arg=--oidc-issuer-url=https://${KEYCLOAK_HOST}/realms/${KEYCLOAK_REALM} \ --exec-arg=--oidc-client-id=${K8S_OIDC_CLIENT_ID} kubectl config set-cluster ${LOCAL_K8S_HOST}-oidc \ --server=https://${EXTERNAL_K8S_HOST} kubectl config set-context ${LOCAL_K8S_HOST}-oidc \ --cluster=${LOCAL_K8S_HOST}-oidc --user=${LOCAL_K8S_HOST}-oidc kubectl config use-context ${LOCAL_K8S_HOST}-oidc # Create the container registry credentials create-regcred namespace='default': #!/bin/bash set -euo pipefail while [ -z "${CONTAINER_REGISTRY_SERVER:-}" ]; do if ! CONTAINER_REGISTRY_SERVER=$( gum input --prompt="Container registry server: " --width=100 \ --placeholder="e.g., index.docker.io/v1 ghcr.io" ); then echo "Setup cancelled." >&2 exit 1 fi done while [ -z "${CONTAINER_REGISTRY_USERNAME:-}" ]; do if ! CONTAINER_REGISTRY_USERNAME=$( gum input --prompt="Container registry username: " --width=100 ); then echo "Setup cancelled." >&2 exit 1 fi done while [ -z "${CONTAINER_REGISTRY_PASSWORD:-}" ]; do if ! CONTAINER_REGISTRY_PASSWORD=$( gum input --prompt="Container registry password or token: " --password --width=100 ); then echo "Setup cancelled." >&2 exit 1 fi done while [ -z "${CONTAINER_REGISTRY_EMAIL:-}" ]; do if ! CONTAINER_REGISTRY_EMAIL=$( gum input --prompt="Container registry email: " --width=100 ); then echo "Setup cancelled." >&2 exit 1 fi done kubectl create -n {{ namespace }} secret docker-registry regcred \ --docker-server="${CONTAINER_REGISTRY_SERVER}" \ --docker-username="${CONTAINER_REGISTRY_USERNAME}" \ --docker-password="${CONTAINER_REGISTRY_PASSWORD}" \ --docker-email="${CONTAINER_REGISTRY_EMAIL}" # Delete the container registry credentials delete-regcred namespace='default': kubectl delete -n {{ namespace }} secret regcred --ignore-not-found # Copy the container registry credentials from the default namespace copy-regcred namespace: #!/bin/bash set -euo pipefail if ! kubectl get -n default secret regcred &>/dev/null; then just create-regcred default fi if kubectl get -n {{ namespace }} secret regcred &>/dev/null; then kubectl delete -n {{ namespace }} secret regcred fi kubectl get -n default secret regcred -o json | \ sed "s/\"namespace\": \"default\"/\"namespace\": \"{{ namespace }}\"/g" | \ kubectl apply -n {{ namespace }} -f - # Check local Docker registry status check-registry: #!/bin/bash set -euo pipefail echo "Checking local Docker registry status..." echo "" # Check if registry deployment exists echo "1. Registry deployment status:" if kubectl get deployment registry -n registry &>/dev/null; then echo "✓ Registry deployment exists" kubectl get deployment registry -n registry echo "" echo "Registry pods:" kubectl get pods -n registry -l app=registry else echo "✗ Registry deployment not found" fi echo "" # Check registry services echo "2. Registry services:" if kubectl get service registry -n registry &>/dev/null; then echo "✓ Registry service exists" kubectl get service registry registry-nodeport -n registry else echo "✗ Registry services not found" fi echo "" # Check k3s registries configuration echo "3. K3s registries configuration:" if ssh "${LOCAL_K8S_HOST}" "sudo test -f /etc/rancher/k3s/registries.yaml"; then echo "✓ Registries configuration exists" ssh "${LOCAL_K8S_HOST}" "sudo cat /etc/rancher/k3s/registries.yaml" else echo "✗ Registries configuration not found" fi echo "" # Test registry accessibility echo "4. Registry accessibility test:" echo "Testing from k3s host (localhost:30500):" if ssh "${LOCAL_K8S_HOST}" "curl -f -s http://localhost:30500/v2/" &>/dev/null; then echo "✓ Registry is accessible from k3s host" # Show registry catalog echo "Registry catalog:" ssh "${LOCAL_K8S_HOST}" "curl -s http://localhost:30500/v2/_catalog" 2>/dev/null || \ echo "Unable to retrieve catalog" else echo "✗ Registry is not accessible from k3s host" fi echo "" echo "Note: To push images, use:" echo " export DOCKER_HOST=ssh://${LOCAL_K8S_HOST}" echo " docker push localhost:30500/myimage:tag" # Deploy Docker registry manually deploy-registry: #!/bin/bash set -euo pipefail echo "Deploying local Docker registry..." kubectl apply -f ./registry/registry.yaml # Set Pod Security Standard for registry namespace kubectl label namespace registry pod-security.kubernetes.io/enforce=restricted --overwrite echo "Waiting for registry to be ready..." kubectl wait --for=condition=available --timeout=60s deployment/registry -n registry echo "✓ Registry deployed and ready" # Remove Docker registry remove-registry: #!/bin/bash set -euo pipefail if gum confirm "Remove local Docker registry?"; then kubectl delete namespace registry --ignore-not-found echo "✓ Registry removed" else echo "Registry removal cancelled." fi # Configure k3s to use local registry configure-registry: #!/bin/bash set -euo pipefail echo "Configuring k3s registries.yaml..." ssh "${LOCAL_K8S_HOST}" "sudo mkdir -p /etc/rancher/k3s" gomplate -f ./registry/registries.gomplate.yaml | ssh "${LOCAL_K8S_HOST}" "sudo tee /etc/rancher/k3s/registries.yaml > /dev/null" echo "Restarting k3s to apply registry configuration..." ssh "${LOCAL_K8S_HOST}" "sudo systemctl restart k3s" echo "✓ Registry configuration applied" [positional-arguments] wait-deployments-ready *args: #!/bin/bash set -euo pipefail namespace="$1" shift deployments=("$@") check_ready() { for deployment in "${deployments[@]}"; do ready=$(kubectl get -n ${namespace} deployment "${deployment}" \ -o jsonpath="{.status.readyReplicas}" 2>/dev/null || true) replicas=$(kubectl get -n ${namespace} deployment "${deployment}" \ -o jsonpath="{.status.replicas}" 2>/dev/null || true) if [[ "${ready}" != "${replicas}" || -z "${ready}" ]]; then return 0 fi done return 1 } echo -n "Waiting for deployments $@ to be ready..." while check_ready; do echo -n "." sleep 2 done echo "ok" # Delete completed pods across all namespaces delete-completed-pods namespace='': #!/bin/bash set -euo pipefail NAMESPACE="${NAMESPACE:-{{ namespace }}}" if [ -n "${NAMESPACE}" ]; then echo "Deleting completed pods in namespace '${NAMESPACE}'..." SELECTOR="--namespace=${NAMESPACE}" else echo "Deleting completed pods in all namespaces..." SELECTOR="--all-namespaces" fi # Find and delete pods with status.phase=Succeeded or status.phase=Failed COMPLETED_PODS=$(kubectl get pods ${SELECTOR} -o json | \ jq -r '.items[] | select(.status.phase == "Succeeded" or .status.phase == "Failed") | .metadata.namespace + "/" + .metadata.name') if [ -z "${COMPLETED_PODS}" ]; then echo "No completed pods found." exit 0 fi echo "Found completed pods:" echo "${COMPLETED_PODS}" echo "" if gum confirm "Delete these pods?"; then echo "${COMPLETED_PODS}" | while IFS='/' read -r ns pod; do echo "Deleting pod ${pod} in namespace ${ns}..." kubectl delete pod "${pod}" -n "${ns}" --ignore-not-found done echo "Completed pods deleted." else echo "Deletion cancelled." fi # Delete completed jobs across all namespaces delete-completed-jobs namespace='': #!/bin/bash set -euo pipefail NAMESPACE="${NAMESPACE:-{{ namespace }}}" if [ -n "${NAMESPACE}" ]; then echo "Deleting completed jobs in namespace '${NAMESPACE}'..." SELECTOR="--namespace=${NAMESPACE}" else echo "Deleting completed jobs in all namespaces..." SELECTOR="--all-namespaces" fi # Find and delete jobs with status.succeeded > 0 or status.failed > 0 COMPLETED_JOBS=$(kubectl get jobs ${SELECTOR} -o json | \ jq -r '.items[] | select((.status.succeeded // 0) > 0 or (.status.failed // 0) > 0) | .metadata.namespace + "/" + .metadata.name') if [ -z "${COMPLETED_JOBS}" ]; then echo "No completed jobs found." exit 0 fi echo "Found completed jobs:" echo "${COMPLETED_JOBS}" echo "" if gum confirm "Delete these jobs?"; then echo "${COMPLETED_JOBS}" | while IFS='/' read -r ns job; do echo "Deleting job ${job} in namespace ${ns}..." kubectl delete job "${job}" -n "${ns}" --ignore-not-found --cascade=foreground done echo "Completed jobs deleted." else echo "Deletion cancelled." fi