feat(jupyterhub): connect to ClickHouse

This commit is contained in:
Masaki Yatsu
2025-09-10 11:58:50 +09:00
parent ab36360584
commit c93e01347f
4 changed files with 38 additions and 7 deletions

View File

@@ -20,7 +20,16 @@ RUN apt-get update --yes && \
apt-get install --yes --no-install-recommends \ apt-get install --yes --no-install-recommends \
bash jq \ bash jq \
"openjdk-${openjdk_version}-jre-headless" \ "openjdk-${openjdk_version}-jre-headless" \
ca-certificates-java && \ ca-certificates-java \
gnupg
# Install ClickHouse client
RUN curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | \
gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg && \
echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | \
tee /etc/apt/sources.list.d/clickhouse.list && \
apt-get update --yes && \
apt-get install --yes --no-install-recommends clickhouse-client && \
apt-get clean && rm -rf /var/lib/apt/lists/* apt-get clean && rm -rf /var/lib/apt/lists/*
# If spark_version is not set, latest Spark will be installed # If spark_version is not set, latest Spark will be installed
@@ -70,6 +79,7 @@ RUN mamba install --yes \
'aif360' \ 'aif360' \
'airflow' \ 'airflow' \
'chromadb' \ 'chromadb' \
'clickhouse-connect' \
'csvkit' \ 'csvkit' \
'dalex' \ 'dalex' \
'datafusion' \ 'datafusion' \

View File

@@ -20,7 +20,16 @@ RUN apt-get update --yes && \
apt-get install --yes --no-install-recommends \ apt-get install --yes --no-install-recommends \
bash jq \ bash jq \
"openjdk-${openjdk_version}-jre-headless" \ "openjdk-${openjdk_version}-jre-headless" \
ca-certificates-java && \ ca-certificates-java \
gnupg
# Install ClickHouse client
RUN curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | \
gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg && \
echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | \
tee /etc/apt/sources.list.d/clickhouse.list && \
apt-get update --yes && \
apt-get install --yes --no-install-recommends clickhouse-client && \
apt-get clean && rm -rf /var/lib/apt/lists/* apt-get clean && rm -rf /var/lib/apt/lists/*
# If spark_version is not set, latest Spark will be installed # If spark_version is not set, latest Spark will be installed
@@ -70,6 +79,7 @@ RUN mamba install --yes \
'aif360' \ 'aif360' \
'airflow' \ 'airflow' \
'chromadb' \ 'chromadb' \
'clickhouse-connect' \
'csvkit' \ 'csvkit' \
'dalex' \ 'dalex' \
'datafusion' \ 'datafusion' \

View File

@@ -193,6 +193,15 @@ singleuser:
ports: ports:
- port: 8200 - port: 8200
protocol: TCP protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: clickhouse
ports:
- port: 8123
protocol: TCP
- port: 9000
protocol: TCP
# Allow DNS resolution # Allow DNS resolution
- to: - to:
- ipBlock: - ipBlock:

View File

@@ -8,7 +8,7 @@ export JUPYTERHUB_OIDC_CLIENT_SESSION_MAX := env("JUPYTERHUB_OIDC_CLIENT_SESSION
export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "") export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "")
export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "") export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "")
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "") export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-31") export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-34")
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook") export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook") export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false") export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
@@ -26,6 +26,8 @@ export JUPYTERHUB_CULL_MAX_AGE := env("JUPYTERHUB_CULL_MAX_AGE", "604800")
export VAULT_AGENT_LOG_LEVEL := env("VAULT_AGENT_LOG_LEVEL", "info") export VAULT_AGENT_LOG_LEVEL := env("VAULT_AGENT_LOG_LEVEL", "info")
export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "warning") export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "warning")
export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500") export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500")
export SPARK_DOWNLOAD_URL := env("SPARK_DOWNLOAD_URL", "https://dlcdn.apache.org/spark/")
export SPARK_VERSION := env("SPARK_VERSION", "4.0.1")
export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn") export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn")
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack") export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
export VAULT_HOST := env("VAULT_HOST", "") export VAULT_HOST := env("VAULT_HOST", "")
@@ -195,8 +197,8 @@ build-kernel-images:
cp ../../../python-package/dist/*.whl ./ cp ../../../python-package/dist/*.whl ./
docker build -t \ docker build -t \
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \ ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
--build-arg spark_version="3.5.4" \ --build-arg spark_version="${SPARK_VERSION}" \
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \ --build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \
. .
) )
rm -f ./images/datastack-notebook/*.whl rm -f ./images/datastack-notebook/*.whl
@@ -206,8 +208,8 @@ build-kernel-images:
cp ../../../python-package/dist/*.whl ./ cp ../../../python-package/dist/*.whl ./
docker build -t \ docker build -t \
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \ ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
--build-arg spark_version="3.5.4" \ --build-arg spark_version="${SPARK_VERSION}" \
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \ --build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \
. .
) )
rm -f ./images/datastack-cuda-notebook/*.whl rm -f ./images/datastack-cuda-notebook/*.whl