feat(jupyerhub): update container images

This commit is contained in:
Masaki Yatsu
2025-09-15 19:28:51 +09:00
parent 2a82597ed2
commit c6ba59ad2a
3 changed files with 36 additions and 41 deletions

View File

@@ -80,10 +80,10 @@ RUN mamba install --yes \
'chromadb' \ 'chromadb' \
'clickhouse-connect' \ 'clickhouse-connect' \
'csvkit' \ 'csvkit' \
'dagster' \
'dalex' \ 'dalex' \
'datafusion' \ 'datafusion' \
'dbt' \ 'dbt' \
'dlt' \
'duckdb' \ 'duckdb' \
'faiss' \ 'faiss' \
'gitpython' \ 'gitpython' \
@@ -105,7 +105,6 @@ RUN mamba install --yes \
'langchain-mistralai' \ 'langchain-mistralai' \
'langchain-mongodb' \ 'langchain-mongodb' \
'langchain-nomic' \ 'langchain-nomic' \
'langchain-openai' \
'langchain-prompty' \ 'langchain-prompty' \
'langchain-qdrant' \ 'langchain-qdrant' \
'langchain-robocorp' \ 'langchain-robocorp' \
@@ -139,17 +138,15 @@ RUN mamba install --yes \
# RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==4.0.0.dev2 # RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==4.0.0.dev2
# RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==3.5.4 # RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==3.5.4
RUN pip install \ # URL to use for pip downloads
ARG pip_repository_url="https://pypi.org/simple/"
RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" \
agno \ agno \
apache-airflow \ dagster-dlt \
apache-airflow-client \
fastembed \ fastembed \
feature-engine \ feature-engine \
jupyter-ai \
jupyter-ai-magics[all] \
kreuzberg \ kreuzberg \
langchain-huggingface \
langchain-perplexity \
langfuse \ langfuse \
pydantic-ai \ pydantic-ai \
ragas \ ragas \
@@ -157,23 +154,22 @@ RUN pip install \
tavily-python \ tavily-python \
tweet-preprocessor tweet-preprocessor
RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" \
'dlt[clickhouse,databricks,deltalake,dremio,duckdb,filesystem,parquet,postgres,pyiceberg,qdrant,redshift,s3,snowflake,sql-database,sqlalchemy,workspace]'
# Install PyTorch with pip (https://pytorch.org/get-started/locally/) # Install PyTorch with pip (https://pytorch.org/get-started/locally/)
# langchain-openai must be updated to avoid pydantic v2 error # langchain-openai must be updated to avoid pydantic v2 error
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540 # https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540
# hadolint ignore=DL3013 # hadolint ignore=DL3013
RUN pip install --no-cache-dir --extra-index-url=https://pypi.nvidia.com --index-url 'https://download.pytorch.org/whl/cu124' \ RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu' --upgrade \
'torch' \ langchain-openai \
'torchaudio' \ torch \
'torchvision' && \ torchaudio \
pip install --upgrade langchain-openai && \ torchvision
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install buunstack package # Install buunstack package
COPY *.whl /opt/ COPY *.whl /opt/
RUN pip install /opt/*.whl && \ RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" /opt/*.whl
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
WORKDIR "${HOME}" WORKDIR "${HOME}"
EXPOSE 4040 EXPOSE 4040

View File

@@ -80,10 +80,10 @@ RUN mamba install --yes \
'chromadb' \ 'chromadb' \
'clickhouse-connect' \ 'clickhouse-connect' \
'csvkit' \ 'csvkit' \
'dagster' \
'dalex' \ 'dalex' \
'datafusion' \ 'datafusion' \
'dbt' \ 'dbt' \
'dlt' \
'duckdb' \ 'duckdb' \
'faiss' \ 'faiss' \
'gitpython' \ 'gitpython' \
@@ -105,7 +105,6 @@ RUN mamba install --yes \
'langchain-mistralai' \ 'langchain-mistralai' \
'langchain-mongodb' \ 'langchain-mongodb' \
'langchain-nomic' \ 'langchain-nomic' \
'langchain-openai' \
'langchain-prompty' \ 'langchain-prompty' \
'langchain-qdrant' \ 'langchain-qdrant' \
'langchain-robocorp' \ 'langchain-robocorp' \
@@ -139,41 +138,38 @@ RUN mamba install --yes \
# RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==4.0.0.dev2 # RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==4.0.0.dev2
# RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==3.5.4 # RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==3.5.4
RUN pip install \ # URL to use for pip downloads
ARG pip_repository_url="https://pypi.org/simple/"
RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" \
agno \ agno \
apache-airflow \ dagster-dlt \
apache-airflow-client \
fastembed \ fastembed \
feature-engine \ feature-engine \
jupyter-ai \
jupyter-ai-magics[all] \
kreuzberg \ kreuzberg \
langfuse \ langfuse \
langchain-huggingface \
langchain-perplexity \
pydantic-ai \ pydantic-ai \
ragas \ ragas \
smolagents \ smolagents \
tavily-python \ tavily-python \
tweet-preprocessor tweet-preprocessor
RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" \
'dlt[clickhouse,databricks,deltalake,dremio,duckdb,filesystem,parquet,postgres,pyiceberg,qdrant,redshift,s3,snowflake,sql-database,sqlalchemy,workspace]'
# Install PyTorch with pip (https://pytorch.org/get-started/locally/) # Install PyTorch with pip (https://pytorch.org/get-started/locally/)
# langchain-openai must be updated to avoid pydantic v2 error # langchain-openai must be updated to avoid pydantic v2 error
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540 # https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540
# hadolint ignore=DL3013 # hadolint ignore=DL3013
RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu' \ RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu' --upgrade \
'torch' \ langchain-openai \
'torchaudio' \ torch \
'torchvision' && \ torchaudio \
pip install --upgrade langchain-openai && \ torchvision
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install buunstack package # Install buunstack package
COPY *.whl /opt/ COPY *.whl /opt/
RUN pip install /opt/*.whl && \ RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" /opt/*.whl
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
WORKDIR "${HOME}" WORKDIR "${HOME}"
EXPOSE 4040 EXPOSE 4040

View File

@@ -9,7 +9,7 @@ export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "")
export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "") export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "")
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "") export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "")
export JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED := env("JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED", "") export JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED := env("JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED", "")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-37") export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-40")
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook") export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook") export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false") export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
@@ -29,6 +29,7 @@ export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "warnin
export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500") export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500")
export SPARK_DOWNLOAD_URL := env("SPARK_DOWNLOAD_URL", "https://dlcdn.apache.org/spark/") export SPARK_DOWNLOAD_URL := env("SPARK_DOWNLOAD_URL", "https://dlcdn.apache.org/spark/")
export SPARK_VERSION := env("SPARK_VERSION", "4.0.1") export SPARK_VERSION := env("SPARK_VERSION", "4.0.1")
export PIP_REPOSITORY_URL := env("PIP_REPOSITORY_URL", "https://pypi.org/simple/")
export AIRFLOW_DAGS_STORAGE_SIZE := env("AIRFLOW_DAGS_STORAGE_SIZE", "10Gi") export AIRFLOW_DAGS_STORAGE_SIZE := env("AIRFLOW_DAGS_STORAGE_SIZE", "10Gi")
export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn") export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn")
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack") export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
@@ -219,10 +220,11 @@ build-kernel-images:
( (
cd ./images/datastack-notebook cd ./images/datastack-notebook
cp ../../../python-package/dist/*.whl ./ cp ../../../python-package/dist/*.whl ./
docker build -t \ DOCKER_BUILDKIT=1 docker build -t \
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \ ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
--build-arg spark_version="${SPARK_VERSION}" \ --build-arg spark_version="${SPARK_VERSION}" \
--build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \ --build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \
--build-arg pip_repository_url="${PIP_REPOSITORY_URL}" \
. .
) )
rm -f ./images/datastack-notebook/*.whl rm -f ./images/datastack-notebook/*.whl
@@ -230,10 +232,11 @@ build-kernel-images:
( (
cd ./images/datastack-cuda-notebook cd ./images/datastack-cuda-notebook
cp ../../../python-package/dist/*.whl ./ cp ../../../python-package/dist/*.whl ./
docker build -t \ DOCKER_BUILDKIT=1 docker build -t \
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \ ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
--build-arg spark_version="${SPARK_VERSION}" \ --build-arg spark_version="${SPARK_VERSION}" \
--build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \ --build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \
--build-arg pip_repository_url="${PIP_REPOSITORY_URL}" \
. .
) )
rm -f ./images/datastack-cuda-notebook/*.whl rm -f ./images/datastack-cuda-notebook/*.whl