diff --git a/jupyterhub/images/datastack-cuda-notebook/Dockerfile b/jupyterhub/images/datastack-cuda-notebook/Dockerfile index d7ddb35..217d09a 100644 --- a/jupyterhub/images/datastack-cuda-notebook/Dockerfile +++ b/jupyterhub/images/datastack-cuda-notebook/Dockerfile @@ -20,7 +20,16 @@ RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ bash jq \ "openjdk-${openjdk_version}-jre-headless" \ - ca-certificates-java && \ + ca-certificates-java \ + gnupg + +# Install ClickHouse client +RUN curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | \ + gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | \ + tee /etc/apt/sources.list.d/clickhouse.list && \ + apt-get update --yes && \ + apt-get install --yes --no-install-recommends clickhouse-client && \ apt-get clean && rm -rf /var/lib/apt/lists/* # If spark_version is not set, latest Spark will be installed @@ -70,6 +79,7 @@ RUN mamba install --yes \ 'aif360' \ 'airflow' \ 'chromadb' \ + 'clickhouse-connect' \ 'csvkit' \ 'dalex' \ 'datafusion' \ diff --git a/jupyterhub/images/datastack-notebook/Dockerfile b/jupyterhub/images/datastack-notebook/Dockerfile index 0623166..72135bf 100644 --- a/jupyterhub/images/datastack-notebook/Dockerfile +++ b/jupyterhub/images/datastack-notebook/Dockerfile @@ -20,7 +20,16 @@ RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ bash jq \ "openjdk-${openjdk_version}-jre-headless" \ - ca-certificates-java && \ + ca-certificates-java \ + gnupg + +# Install ClickHouse client +RUN curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | \ + gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | \ + tee /etc/apt/sources.list.d/clickhouse.list && \ + apt-get update --yes && \ + apt-get install --yes --no-install-recommends clickhouse-client && \ apt-get clean && rm -rf /var/lib/apt/lists/* # If spark_version is not set, latest Spark will be installed @@ -70,6 +79,7 @@ RUN mamba install --yes \ 'aif360' \ 'airflow' \ 'chromadb' \ + 'clickhouse-connect' \ 'csvkit' \ 'dalex' \ 'datafusion' \ diff --git a/jupyterhub/jupyterhub-values.gomplate.yaml b/jupyterhub/jupyterhub-values.gomplate.yaml index 87f9ae3..5762c31 100644 --- a/jupyterhub/jupyterhub-values.gomplate.yaml +++ b/jupyterhub/jupyterhub-values.gomplate.yaml @@ -193,6 +193,15 @@ singleuser: ports: - port: 8200 protocol: TCP + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: clickhouse + ports: + - port: 8123 + protocol: TCP + - port: 9000 + protocol: TCP # Allow DNS resolution - to: - ipBlock: diff --git a/jupyterhub/justfile b/jupyterhub/justfile index bb125cb..5162b9d 100644 --- a/jupyterhub/justfile +++ b/jupyterhub/justfile @@ -8,7 +8,7 @@ export JUPYTERHUB_OIDC_CLIENT_SESSION_MAX := env("JUPYTERHUB_OIDC_CLIENT_SESSION export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "") export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "") export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "") -export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-31") +export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-34") export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook") export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook") export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false") @@ -26,6 +26,8 @@ export JUPYTERHUB_CULL_MAX_AGE := env("JUPYTERHUB_CULL_MAX_AGE", "604800") export VAULT_AGENT_LOG_LEVEL := env("VAULT_AGENT_LOG_LEVEL", "info") export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "warning") export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500") +export SPARK_DOWNLOAD_URL := env("SPARK_DOWNLOAD_URL", "https://dlcdn.apache.org/spark/") +export SPARK_VERSION := env("SPARK_VERSION", "4.0.1") export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn") export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack") export VAULT_HOST := env("VAULT_HOST", "") @@ -195,8 +197,8 @@ build-kernel-images: cp ../../../python-package/dist/*.whl ./ docker build -t \ ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \ - --build-arg spark_version="3.5.4" \ - --build-arg spark_download_url="https://archive.apache.org/dist/spark/" \ + --build-arg spark_version="${SPARK_VERSION}" \ + --build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \ . ) rm -f ./images/datastack-notebook/*.whl @@ -206,8 +208,8 @@ build-kernel-images: cp ../../../python-package/dist/*.whl ./ docker build -t \ ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \ - --build-arg spark_version="3.5.4" \ - --build-arg spark_download_url="https://archive.apache.org/dist/spark/" \ + --build-arg spark_version="${SPARK_VERSION}" \ + --build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \ . ) rm -f ./images/datastack-cuda-notebook/*.whl