feat(dagster): install dependencies with initContainers

This commit is contained in:
Masaki Yatsu
2025-10-06 16:04:14 +09:00
parent 20664a170a
commit c4da02441b
2 changed files with 72 additions and 34 deletions

View File

@@ -28,22 +28,38 @@ dagsterWebserver:
memory: "1Gi" memory: "1Gi"
cpu: "1000m" cpu: "1000m"
extraPrependedInitContainers:
- name: install-packages
image: "{{ .Env.DAGSTER_CONTAINER_IMAGE }}:{{ .Env.DAGSTER_CONTAINER_TAG }}"
command:
- /bin/bash
- -c
- |
pip install --target /opt/dagster/site-packages {{ .Env.DAGSTER_EXTRA_PACKAGES }}
volumeMounts:
- name: extra-packages
mountPath: /opt/dagster/site-packages
env: env:
- name: DAGSTER_HOME - name: DAGSTER_HOME
value: /opt/dagster/dagster_home value: /opt/dagster/dagster_home
- name: PYTHONPATH - name: PYTHONPATH
value: /opt/dagster/user-code value: /opt/dagster/site-packages:/opt/dagster/user-code
- name: PIP_USER - name: PIP_USER
value: "true" value: "true"
volumeMounts: volumeMounts:
- name: user-code - name: user-code
mountPath: /opt/dagster/user-code mountPath: /opt/dagster/user-code
- name: extra-packages
mountPath: /opt/dagster/site-packages
volumes: volumes:
- name: user-code - name: user-code
persistentVolumeClaim: persistentVolumeClaim:
claimName: dagster-user-code-pvc claimName: dagster-user-code-pvc
- name: extra-packages
emptyDir: {}
{{- if eq (.Env.DAGSTER_ENV_SECRETS_EXIST | default "false") "true" }} {{- if eq (.Env.DAGSTER_ENV_SECRETS_EXIST | default "false") "true" }}
envSecrets: envSecrets:
@@ -70,14 +86,30 @@ dagsterDaemon:
memory: "1Gi" memory: "1Gi"
cpu: "1000m" cpu: "1000m"
extraPrependedInitContainers:
- name: install-packages
image: "{{ .Env.DAGSTER_CONTAINER_IMAGE }}:{{ .Env.DAGSTER_CONTAINER_TAG }}"
command:
- /bin/bash
- -c
- |
pip install --target /opt/dagster/site-packages {{ .Env.DAGSTER_EXTRA_PACKAGES }}
volumeMounts:
- name: extra-packages
mountPath: /opt/dagster/site-packages
volumeMounts: volumeMounts:
- name: user-code - name: user-code
mountPath: /opt/dagster/user-code mountPath: /opt/dagster/user-code
- name: extra-packages
mountPath: /opt/dagster/site-packages
volumes: volumes:
- name: user-code - name: user-code
persistentVolumeClaim: persistentVolumeClaim:
claimName: dagster-user-code-pvc claimName: dagster-user-code-pvc
- name: extra-packages
emptyDir: {}
{{- if eq (.Env.DAGSTER_ENV_SECRETS_EXIST | default "false") "true" }} {{- if eq (.Env.DAGSTER_ENV_SECRETS_EXIST | default "false") "true" }}
envSecrets: envSecrets:
@@ -88,7 +120,7 @@ dagsterDaemon:
- name: DAGSTER_HOME - name: DAGSTER_HOME
value: /opt/dagster/dagster_home value: /opt/dagster/dagster_home
- name: PYTHONPATH - name: PYTHONPATH
value: /opt/dagster/user-code value: /opt/dagster/site-packages:/opt/dagster/user-code
- name: PIP_USER - name: PIP_USER
value: "true" value: "true"
@@ -105,10 +137,16 @@ runLauncher:
volumeMounts: volumeMounts:
- name: user-code - name: user-code
mountPath: /opt/dagster/user-code mountPath: /opt/dagster/user-code
- name: extra-packages
mountPath: /opt/dagster/site-packages
volumes: volumes:
- name: user-code - name: user-code
persistentVolumeClaim: persistentVolumeClaim:
claimName: dagster-user-code-pvc claimName: dagster-user-code-pvc
- name: extra-packages
emptyDir: {}
envVars:
- "PYTHONPATH=/opt/dagster/site-packages:/opt/dagster/user-code"
envSecrets: envSecrets:
- name: dagster-database-secret - name: dagster-database-secret
{{- if eq (.Env.DAGSTER_STORAGE_TYPE | default "local") "minio" }} {{- if eq (.Env.DAGSTER_STORAGE_TYPE | default "local") "minio" }}
@@ -117,6 +155,19 @@ runLauncher:
{{- if eq (.Env.DAGSTER_ENV_SECRETS_EXIST | default "false") "true" }} {{- if eq (.Env.DAGSTER_ENV_SECRETS_EXIST | default "false") "true" }}
- name: dagster-env-secret - name: dagster-env-secret
{{- end }} {{- end }}
runK8sConfig:
podSpecConfig:
initContainers:
- name: install-packages
image: "{{ .Env.DAGSTER_CONTAINER_IMAGE }}:{{ .Env.DAGSTER_CONTAINER_TAG }}"
command:
- /bin/bash
- -c
- |
pip install --target /opt/dagster/site-packages {{ .Env.DAGSTER_EXTRA_PACKAGES }}
volumeMounts:
- name: extra-packages
mountPath: /opt/dagster/site-packages
postgresql: postgresql:
enabled: false enabled: false

View File

@@ -3,7 +3,7 @@ set fallback := true
export DAGSTER_NAMESPACE := env("DAGSTER_NAMESPACE", "dagster") export DAGSTER_NAMESPACE := env("DAGSTER_NAMESPACE", "dagster")
export DAGSTER_CHART_VERSION := env("DAGSTER_CHART_VERSION", "1.11.10") export DAGSTER_CHART_VERSION := env("DAGSTER_CHART_VERSION", "1.11.10")
export DAGSTER_CONTAINER_IMAGE := env("DAGSTER_CONTAINER_IMAGE", "docker.io/dagster/dagster-k8s") export DAGSTER_CONTAINER_IMAGE := env("DAGSTER_CONTAINER_IMAGE", "docker.io/dagster/dagster-k8s")
export DAGSTER_CONTAINER_TAG := env("DAGSTER_CONTAINER_TAG", "1.11.10") export DAGSTER_CONTAINER_TAG := env("DAGSTER_CONTAINER_TAG", "1.11.13")
export DAGSTER_CONTAINER_PULL_POLICY := env("DAGSTER_CONTAINER_PULL_POLICY", "IfNotPresent") export DAGSTER_CONTAINER_PULL_POLICY := env("DAGSTER_CONTAINER_PULL_POLICY", "IfNotPresent")
export DAGSTER_HOST := env("DAGSTER_HOST", "") export DAGSTER_HOST := env("DAGSTER_HOST", "")
export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets") export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets")
@@ -12,6 +12,7 @@ export DAGSTER_STORAGE_SIZE := env("DAGSTER_STORAGE_SIZE", "20Gi")
export DAGSTER_CODE_STORAGE_SIZE := env("DAGSTER_CODE_STORAGE_SIZE", "10Gi") export DAGSTER_CODE_STORAGE_SIZE := env("DAGSTER_CODE_STORAGE_SIZE", "10Gi")
export MINIO_NAMESPACE := env("MINIO_NAMESPACE", "minio") export MINIO_NAMESPACE := env("MINIO_NAMESPACE", "minio")
export DAGSTER_STORAGE_TYPE := env("DAGSTER_STORAGE_TYPE", "") export DAGSTER_STORAGE_TYPE := env("DAGSTER_STORAGE_TYPE", "")
export DAGSTER_EXTRA_PACKAGES := env("DAGSTER_EXTRA_PACKAGES", "dlt[duckdb] pyarrow pyiceberg s3fs simple-salesforce")
[private] [private]
default: default:
@@ -347,10 +348,10 @@ add-workspace-module module_name working_directory:
CURRENT_WORKSPACE=$(kubectl get configmap dagster-workspace-yaml -n ${DAGSTER_NAMESPACE} -o jsonpath='{.data.workspace\.yaml}') CURRENT_WORKSPACE=$(kubectl get configmap dagster-workspace-yaml -n ${DAGSTER_NAMESPACE} -o jsonpath='{.data.workspace\.yaml}')
# Create temporary file with current content # Create temporary file with current content
echo "$CURRENT_WORKSPACE" > /tmp/current_workspace.yaml echo "${CURRENT_WORKSPACE}" > /tmp/current_workspace.yaml
# Check if module already exists # Check if module already exists
if echo "$CURRENT_WORKSPACE" | grep -q "module_name: ${MODULE_NAME}"; then if echo "${CURRENT_WORKSPACE}" | grep -q "module_name: ${MODULE_NAME}"; then
echo "Module '${MODULE_NAME}' already exists in workspace - skipping workspace update" echo "Module '${MODULE_NAME}' already exists in workspace - skipping workspace update"
echo "✓ Project files updated successfully" echo "✓ Project files updated successfully"
exit 0 exit 0
@@ -364,9 +365,9 @@ add-workspace-module module_name working_directory:
EOF EOF
# Add to workspace # Add to workspace
if echo "$CURRENT_WORKSPACE" | grep -q "load_from: \[\]"; then if echo "${CURRENT_WORKSPACE}" | grep -q "load_from: \[\]"; then
# Replace empty array with new entry # Replace empty array with new entry
NEW_WORKSPACE=$(echo "$CURRENT_WORKSPACE" | sed 's/load_from: \[\]/load_from:/') NEW_WORKSPACE=$(echo "${CURRENT_WORKSPACE}" | sed 's/load_from: \[\]/load_from:/')
NEW_WORKSPACE="${NEW_WORKSPACE}"$'\n'"$(cat /tmp/new_entry.txt)" NEW_WORKSPACE="${NEW_WORKSPACE}"$'\n'"$(cat /tmp/new_entry.txt)"
else else
# Append to existing entries # Append to existing entries
@@ -374,7 +375,7 @@ add-workspace-module module_name working_directory:
fi fi
# Update ConfigMap using jq with proper key escaping # Update ConfigMap using jq with proper key escaping
PATCH_JSON=$(jq -n --arg workspace "$NEW_WORKSPACE" '{"data": {"workspace.yaml": $workspace}}') PATCH_JSON=$(jq -n --arg workspace "${NEW_WORKSPACE}" '{"data": {"workspace.yaml": $workspace}}')
kubectl patch configmap dagster-workspace-yaml -n ${DAGSTER_NAMESPACE} --patch "$PATCH_JSON" kubectl patch configmap dagster-workspace-yaml -n ${DAGSTER_NAMESPACE} --patch "$PATCH_JSON"
echo "✓ Module '${MODULE_NAME}' added to workspace" echo "✓ Module '${MODULE_NAME}' added to workspace"
@@ -424,34 +425,19 @@ deploy-project project_dir='':
echo "Project name: ${PROJECT_NAME}" echo "Project name: ${PROJECT_NAME}"
echo "Python module name: ${PYTHON_MODULE_NAME}" echo "Python module name: ${PYTHON_MODULE_NAME}"
# Check if user code PVC exists
if ! kubectl get pvc dagster-user-code-pvc -n ${DAGSTER_NAMESPACE} &>/dev/null; then
echo "Error: User code PVC not found. Run 'just dagster::setup-user-code-pvc' first."
exit 1
fi
# Check if Longhorn is available for ReadWriteMany support
if kubectl get storageclass longhorn &>/dev/null; then
echo "Longhorn detected - PVC supports ReadWriteMany for sharing with other services"
else
echo "Longhorn not detected - PVC will use ReadWriteOnce (Dagster-only access)"
fi
echo "Deploying project '${PROJECT_NAME}'..."
# Find running Dagster webserver pod # Find running Dagster webserver pod
DAGSTER_POD=$(kubectl get pods -n ${DAGSTER_NAMESPACE} -l component=dagster-webserver -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") DAGSTER_POD=$(kubectl get pods -n ${DAGSTER_NAMESPACE} -l component=dagster-webserver -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
if [ -z "$DAGSTER_POD" ] || ! kubectl get pod "$DAGSTER_POD" -n ${DAGSTER_NAMESPACE} &>/dev/null; then if [ -z "${DAGSTER_POD}" ] || ! kubectl get pod "${DAGSTER_POD}" -n ${DAGSTER_NAMESPACE} &>/dev/null; then
echo "Error: No running Dagster webserver pod found" echo "Error: No running Dagster webserver pod found"
echo "Please ensure Dagster is installed and running first" echo "Please ensure Dagster is installed and running first"
exit 1 exit 1
fi fi
echo "Using Dagster webserver pod: $DAGSTER_POD" echo "Using Dagster webserver pod: ${DAGSTER_POD}"
# Create directory if it doesn't exist # Create directory if it doesn't exist
kubectl exec "$DAGSTER_POD" -n ${DAGSTER_NAMESPACE} -- mkdir -p "/opt/dagster/user-code/${PROJECT_NAME}" 2>/dev/null || true kubectl exec "${DAGSTER_POD}" -n ${DAGSTER_NAMESPACE} -- mkdir -p "/opt/dagster/user-code/${PROJECT_NAME}" 2>/dev/null || true
# Copy project files (excluding .venv, __pycache__, and other unnecessary files) # Copy project files (excluding .venv, __pycache__, and other unnecessary files)
echo "Copying project files to shared PVC (excluding .venv, __pycache__, etc.)..." echo "Copying project files to shared PVC (excluding .venv, __pycache__, etc.)..."
@@ -459,6 +445,7 @@ deploy-project project_dir='':
# Create a tar archive excluding unnecessary files and directories # Create a tar archive excluding unnecessary files and directories
tar -czf - \ tar -czf - \
-C "${PROJECT_DIR}" \ -C "${PROJECT_DIR}" \
--no-xattrs \
--exclude='.venv' \ --exclude='.venv' \
--exclude='__pycache__' \ --exclude='__pycache__' \
--exclude='*.pyc' \ --exclude='*.pyc' \
@@ -473,7 +460,7 @@ deploy-project project_dir='':
# Determine the correct working directory (check if src directory exists) # Determine the correct working directory (check if src directory exists)
WORKING_DIR="/opt/dagster/user-code/${PROJECT_NAME}" WORKING_DIR="/opt/dagster/user-code/${PROJECT_NAME}"
if kubectl exec "$DAGSTER_POD" -n ${DAGSTER_NAMESPACE} -- test -d "/opt/dagster/user-code/${PROJECT_NAME}/src" 2>/dev/null; then if kubectl exec "${DAGSTER_POD}" -n ${DAGSTER_NAMESPACE} -- test -d "/opt/dagster/user-code/${PROJECT_NAME}/src" 2>/dev/null; then
WORKING_DIR="/opt/dagster/user-code/${PROJECT_NAME}/src" WORKING_DIR="/opt/dagster/user-code/${PROJECT_NAME}/src"
echo "Found src directory, using: ${WORKING_DIR}" echo "Found src directory, using: ${WORKING_DIR}"
else else
@@ -520,7 +507,7 @@ remove-project project_name='':
# Find running Dagster webserver pod # Find running Dagster webserver pod
DAGSTER_POD=$(kubectl get pods -n ${DAGSTER_NAMESPACE} -l component=dagster-webserver -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") DAGSTER_POD=$(kubectl get pods -n ${DAGSTER_NAMESPACE} -l component=dagster-webserver -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
if [ -z "$DAGSTER_POD" ] || ! kubectl get pod "$DAGSTER_POD" -n ${DAGSTER_NAMESPACE} &>/dev/null; then if [ -z "${DAGSTER_POD}" ] || ! kubectl get pod "${DAGSTER_POD}" -n ${DAGSTER_NAMESPACE} &>/dev/null; then
echo "Error: No running Dagster webserver pod found" echo "Error: No running Dagster webserver pod found"
echo "Please ensure Dagster is installed and running first" echo "Please ensure Dagster is installed and running first"
exit 1 exit 1
@@ -528,7 +515,7 @@ remove-project project_name='':
# Remove project files from PVC # Remove project files from PVC
echo "Removing project files from shared PVC..." echo "Removing project files from shared PVC..."
kubectl exec "$DAGSTER_POD" -n ${DAGSTER_NAMESPACE} -- rm -rf "/opt/dagster/user-code/${PROJECT_NAME}" 2>/dev/null || true kubectl exec "${DAGSTER_POD}" -n ${DAGSTER_NAMESPACE} -- rm -rf "/opt/dagster/user-code/${PROJECT_NAME}" 2>/dev/null || true
# Remove from workspace.yaml # Remove from workspace.yaml
echo "Removing module '${PYTHON_MODULE_NAME}' from workspace..." echo "Removing module '${PYTHON_MODULE_NAME}' from workspace..."
@@ -537,20 +524,20 @@ remove-project project_name='':
CURRENT_WORKSPACE=$(kubectl get configmap dagster-workspace-yaml -n ${DAGSTER_NAMESPACE} -o jsonpath='{.data.workspace\.yaml}') CURRENT_WORKSPACE=$(kubectl get configmap dagster-workspace-yaml -n ${DAGSTER_NAMESPACE} -o jsonpath='{.data.workspace\.yaml}')
# Check if module exists # Check if module exists
if ! echo "$CURRENT_WORKSPACE" | grep -q "module_name: ${PYTHON_MODULE_NAME}"; then if ! echo "${CURRENT_WORKSPACE}" | grep -q "module_name: ${PYTHON_MODULE_NAME}"; then
echo "Module '${PYTHON_MODULE_NAME}' not found in workspace - only removing files" echo "Module '${PYTHON_MODULE_NAME}' not found in workspace - only removing files"
else else
# Remove the module entry using sed (remove the python_module block) # Remove the module entry using sed (remove the python_module block)
NEW_WORKSPACE=$(echo "$CURRENT_WORKSPACE" | sed "/- python_module:/,/working_directory: .*/{/module_name: ${PYTHON_MODULE_NAME}/,/working_directory: .*/d;}") NEW_WORKSPACE=$(echo "${CURRENT_WORKSPACE}" | sed "/- python_module:/,/working_directory: .*/{/module_name: ${PYTHON_MODULE_NAME}/,/working_directory: .*/d;}")
# If no modules left, reset to empty array # If no modules left, reset to empty array
if ! echo "$NEW_WORKSPACE" | grep -q "module_name:"; then if ! echo "${NEW_WORKSPACE}" | grep -q "module_name:"; then
NEW_WORKSPACE="load_from: []"$'\n' NEW_WORKSPACE="load_from: []"$'\n'
fi fi
# Update ConfigMap using jq # Update ConfigMap using jq
PATCH_JSON=$(jq -n --arg workspace "$NEW_WORKSPACE" '{"data": {"workspace.yaml": $workspace}}') PATCH_JSON=$(jq -n --arg workspace "${NEW_WORKSPACE}" '{"data": {"workspace.yaml": $workspace}}')
kubectl patch configmap dagster-workspace-yaml -n ${DAGSTER_NAMESPACE} --patch "$PATCH_JSON" kubectl patch configmap dagster-workspace-yaml -n ${DAGSTER_NAMESPACE} --patch "${PATCH_JSON}"
echo "✓ Module '${PYTHON_MODULE_NAME}' removed from workspace" echo "✓ Module '${PYTHON_MODULE_NAME}' removed from workspace"
fi fi