From c6ba59ad2a781a3c9fb76a6dc6eacb3de9014aa9 Mon Sep 17 00:00:00 2001 From: Masaki Yatsu Date: Mon, 15 Sep 2025 19:28:51 +0900 Subject: [PATCH] feat(jupyerhub): update container images --- .../images/datastack-cuda-notebook/Dockerfile | 34 ++++++++----------- .../images/datastack-notebook/Dockerfile | 34 ++++++++----------- jupyterhub/justfile | 9 +++-- 3 files changed, 36 insertions(+), 41 deletions(-) diff --git a/jupyterhub/images/datastack-cuda-notebook/Dockerfile b/jupyterhub/images/datastack-cuda-notebook/Dockerfile index 1ac920c..27f669c 100644 --- a/jupyterhub/images/datastack-cuda-notebook/Dockerfile +++ b/jupyterhub/images/datastack-cuda-notebook/Dockerfile @@ -80,10 +80,10 @@ RUN mamba install --yes \ 'chromadb' \ 'clickhouse-connect' \ 'csvkit' \ + 'dagster' \ 'dalex' \ 'datafusion' \ 'dbt' \ - 'dlt' \ 'duckdb' \ 'faiss' \ 'gitpython' \ @@ -105,7 +105,6 @@ RUN mamba install --yes \ 'langchain-mistralai' \ 'langchain-mongodb' \ 'langchain-nomic' \ - 'langchain-openai' \ 'langchain-prompty' \ 'langchain-qdrant' \ 'langchain-robocorp' \ @@ -139,17 +138,15 @@ RUN mamba install --yes \ # RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==4.0.0.dev2 # RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==3.5.4 -RUN pip install \ +# URL to use for pip downloads +ARG pip_repository_url="https://pypi.org/simple/" + +RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" \ agno \ - apache-airflow \ - apache-airflow-client \ + dagster-dlt \ fastembed \ feature-engine \ - jupyter-ai \ - jupyter-ai-magics[all] \ kreuzberg \ - langchain-huggingface \ - langchain-perplexity \ langfuse \ pydantic-ai \ ragas \ @@ -157,23 +154,22 @@ RUN pip install \ tavily-python \ tweet-preprocessor +RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" \ + 'dlt[clickhouse,databricks,deltalake,dremio,duckdb,filesystem,parquet,postgres,pyiceberg,qdrant,redshift,s3,snowflake,sql-database,sqlalchemy,workspace]' + # Install PyTorch with pip (https://pytorch.org/get-started/locally/) # langchain-openai must be updated to avoid pydantic v2 error # https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540 # hadolint ignore=DL3013 -RUN pip install --no-cache-dir --extra-index-url=https://pypi.nvidia.com --index-url 'https://download.pytorch.org/whl/cu124' \ - 'torch' \ - 'torchaudio' \ - 'torchvision' && \ - pip install --upgrade langchain-openai && \ - fix-permissions "${CONDA_DIR}" && \ - fix-permissions "/home/${NB_USER}" +RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu' --upgrade \ + langchain-openai \ + torch \ + torchaudio \ + torchvision # Install buunstack package COPY *.whl /opt/ -RUN pip install /opt/*.whl && \ - fix-permissions "${CONDA_DIR}" && \ - fix-permissions "/home/${NB_USER}" +RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" /opt/*.whl WORKDIR "${HOME}" EXPOSE 4040 diff --git a/jupyterhub/images/datastack-notebook/Dockerfile b/jupyterhub/images/datastack-notebook/Dockerfile index 5be7f9b..a077c7b 100644 --- a/jupyterhub/images/datastack-notebook/Dockerfile +++ b/jupyterhub/images/datastack-notebook/Dockerfile @@ -80,10 +80,10 @@ RUN mamba install --yes \ 'chromadb' \ 'clickhouse-connect' \ 'csvkit' \ + 'dagster' \ 'dalex' \ 'datafusion' \ 'dbt' \ - 'dlt' \ 'duckdb' \ 'faiss' \ 'gitpython' \ @@ -105,7 +105,6 @@ RUN mamba install --yes \ 'langchain-mistralai' \ 'langchain-mongodb' \ 'langchain-nomic' \ - 'langchain-openai' \ 'langchain-prompty' \ 'langchain-qdrant' \ 'langchain-robocorp' \ @@ -139,41 +138,38 @@ RUN mamba install --yes \ # RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==4.0.0.dev2 # RUN pip install pyspark[connect,ml,mllib,pandas-on-spark,sql]==3.5.4 -RUN pip install \ +# URL to use for pip downloads +ARG pip_repository_url="https://pypi.org/simple/" + +RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" \ agno \ - apache-airflow \ - apache-airflow-client \ + dagster-dlt \ fastembed \ feature-engine \ - jupyter-ai \ - jupyter-ai-magics[all] \ kreuzberg \ langfuse \ - langchain-huggingface \ - langchain-perplexity \ pydantic-ai \ ragas \ smolagents \ tavily-python \ tweet-preprocessor +RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" \ + 'dlt[clickhouse,databricks,deltalake,dremio,duckdb,filesystem,parquet,postgres,pyiceberg,qdrant,redshift,s3,snowflake,sql-database,sqlalchemy,workspace]' + # Install PyTorch with pip (https://pytorch.org/get-started/locally/) # langchain-openai must be updated to avoid pydantic v2 error # https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540 # hadolint ignore=DL3013 -RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu' \ - 'torch' \ - 'torchaudio' \ - 'torchvision' && \ - pip install --upgrade langchain-openai && \ - fix-permissions "${CONDA_DIR}" && \ - fix-permissions "/home/${NB_USER}" +RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu' --upgrade \ + langchain-openai \ + torch \ + torchaudio \ + torchvision # Install buunstack package COPY *.whl /opt/ -RUN pip install /opt/*.whl && \ - fix-permissions "${CONDA_DIR}" && \ - fix-permissions "/home/${NB_USER}" +RUN --mount=type=cache,target=/home/${NB_USER}/.cache/pip pip install -i "${pip_repository_url}" /opt/*.whl WORKDIR "${HOME}" EXPOSE 4040 diff --git a/jupyterhub/justfile b/jupyterhub/justfile index f5c69fa..41bb485 100644 --- a/jupyterhub/justfile +++ b/jupyterhub/justfile @@ -9,7 +9,7 @@ export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "") export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "") export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "") export JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED := env("JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED", "") -export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-37") +export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-40") export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook") export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook") export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false") @@ -29,6 +29,7 @@ export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "warnin export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500") export SPARK_DOWNLOAD_URL := env("SPARK_DOWNLOAD_URL", "https://dlcdn.apache.org/spark/") export SPARK_VERSION := env("SPARK_VERSION", "4.0.1") +export PIP_REPOSITORY_URL := env("PIP_REPOSITORY_URL", "https://pypi.org/simple/") export AIRFLOW_DAGS_STORAGE_SIZE := env("AIRFLOW_DAGS_STORAGE_SIZE", "10Gi") export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn") export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack") @@ -219,10 +220,11 @@ build-kernel-images: ( cd ./images/datastack-notebook cp ../../../python-package/dist/*.whl ./ - docker build -t \ + DOCKER_BUILDKIT=1 docker build -t \ ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \ --build-arg spark_version="${SPARK_VERSION}" \ --build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \ + --build-arg pip_repository_url="${PIP_REPOSITORY_URL}" \ . ) rm -f ./images/datastack-notebook/*.whl @@ -230,10 +232,11 @@ build-kernel-images: ( cd ./images/datastack-cuda-notebook cp ../../../python-package/dist/*.whl ./ - docker build -t \ + DOCKER_BUILDKIT=1 docker build -t \ ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \ --build-arg spark_version="${SPARK_VERSION}" \ --build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \ + --build-arg pip_repository_url="${PIP_REPOSITORY_URL}" \ . ) rm -f ./images/datastack-cuda-notebook/*.whl