[add] optimized github actions and dockerfile

sagar-sarkale-yral · sagar-sarkale-yral · commit 3a452125d01c · 2025-03-27T17:59:27.000+05:30
diff --git a/.github/workflows/fly-deploy-gpu.yml b/.github/workflows/fly-deploy-gpu.yml
@@ -20,32 +20,31 @@ jobs:
       - uses: actions/checkout@v3
         with:
           submodules: recursive
-      - uses: superfly/flyctl-actions/setup-flyctl@master
 
-      # Print system info
+      # Set up uv for Python environment management
+      - name: Set up uv
+        id: setup-uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          python-version: "3.10"
+          enable-cache: true
+          cache-dependency-glob: |
+            **/requirements*.txt
+            **/pyproject.toml
+            src_deploy/setup.sh
+
+      # Print system info and uv version
       - name: Print system info
         run: |
           echo "System information:"
           uname -a
           free -h
           df -h
           cat /etc/os-release
+          echo "UV version: ${{ steps.setup-uv.outputs.uv-version }}"
+          uv --version
 
-      # Set up Docker Buildx
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-        with:
-          driver-opts: |
-            image=moby/buildkit:latest
-
-      # Cache Docker layers - Updated approach
-      - name: Cache Docker layers
-        uses: actions/cache@v3
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ hashFiles('./src_deploy/gpu.Dockerfile') }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
+      - uses: superfly/flyctl-actions/setup-flyctl@master
 
       # Set Fly secrets
       - name: Set Fly secrets
@@ -87,29 +86,8 @@ jobs:
           ls -la ./src_deploy/
           cat ./src_deploy/gpu.Dockerfile
 
-      # Deploy to Fly with build caching - Updated approach
+      # Deploy to Fly with debug flags
       - name: Deploy a docker container to fly.io
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./src_deploy/gpu.Dockerfile
-          push: false
-          load: true
-          tags: flyio-deploy:latest
-          cache-from: type=local,src=/tmp/.buildx-cache
-          cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max
-
-      # Move cache to prevent cache growth
-      - name: Move cache
-        run: |
-          rm -rf /tmp/.buildx-cache
-          mv /tmp/.buildx-cache-new /tmp/.buildx-cache
-
-      # Deploy using built image
-      - name: Deploy to Fly.io
-        run: |
-          flyctl deploy --remote-only \
-            --config src_deploy/gpu.fly.toml \
-            --image flyio-deploy:latest
+        run: flyctl deploy --remote-only --config src_deploy/gpu.fly.toml --dockerfile ./src_deploy/gpu.Dockerfile
         env:
           FLY_API_TOKEN: ${{ secrets.FLY_IO_DEPLOY_TOKEN }}
diff --git a/src_deploy/gpu.Dockerfile b/src_deploy/gpu.Dockerfile
@@ -65,11 +65,12 @@ WORKDIR /home/$NB_USER
 # Expose sglang server port
 EXPOSE 8080
 
-# Install uv
-COPY --from=ghcr.io/astral-sh/uv:0.6.9 /uv /uvx /bin/
+# Install uv directly from GitHub
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+ENV PATH="/home/$NB_USER/.cargo/bin:$PATH"
 
 # Test that uv works
-# RUN uv --version
+RUN uv --version
 
 # Copy the entire src_deploy directory structure
 COPY --chown=$NB_USER:users ./src_deploy/ /home/$NB_USER/
@@ -81,9 +82,14 @@ RUN chmod +x /home/$NB_USER/setup.sh /home/$NB_USER/entrypoint.py /home/$NB_USER
 
 USER $NB_USER
 
-# Run GPU setup script and create logs directory
-RUN /home/$NB_USER/setup.sh || echo "Setup script had issues but we're continuing the build" \
+# Create virtual environment using uv and run setup script
+RUN uv venv $HOME/.venv \
+    && . $HOME/.venv/bin/activate \
+    && /home/$NB_USER/setup.sh || echo "Setup script had issues but we're continuing the build" \
     && mkdir -p /home/$NB_USER/logs
 
+# Add venv to PATH
+ENV PATH="/home/$NB_USER/.venv/bin:$PATH"
+
 # Set entrypoint to our startup script
 CMD ["/home/ubuntu/startup.sh"]
diff --git a/src_deploy/setup.sh b/src_deploy/setup.sh
@@ -13,60 +13,46 @@ echo "Can't initialize NVML. OR No CUDA runtime is found, using CUDA_HOME='/usr/
 echo "Please ignore these warnings. They are expected."
 echo "########################################################"
 
-# Block 2: Use uv instead of pip
-echo "Setting up Python environment..."
-
-# Create virtual environment using uv
-uv venv $HOME/.venv
-echo "✓ Virtual environment created"
-
-# Make sure we use the virtual environment's Python and uv
-PYTHON="$HOME/.venv/bin/python"
-UV="uv"
-
-# Add venv activation to .bashrc
-echo 'source $HOME/.venv/bin/activate' >> ~/.bashrc
+# Block 2: Python environment is already created by Dockerfile
+echo "Using uv for package installation..."
+
+# Make sure we're using the venv from Dockerfile
+if [ -d "$HOME/.venv" ]; then
+    echo "Using existing virtual environment"
+    source $HOME/.venv/bin/activate
+    PYTHON="python"
+else
+    echo "Warning: Virtual environment not found, creating one now"
+    uv venv $HOME/.venv
+    source $HOME/.venv/bin/activate
+    PYTHON="python"
+fi
 
 # Block 3: Python Dependencies using uv
 echo "Installing Python packages..."
-$UV pip install --upgrade pip
-echo "✓ Pip upgraded"
 
-# Install setuptools first (required by triton)
-echo "Installing setuptools..."
-$UV pip install setuptools wheel
-echo "✓ Setuptools and wheel installed"
+# Install packages in parallel with efficient dependency resolution
+echo "Installing core packages..."
+uv pip install -U "transformers==4.48.3" triton "sglang[all]>=0.4.2.post4" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
 
-# Install transformers
-echo "Installing transformers..."
-$UV pip install "transformers==4.48.3"
-$PYTHON -c "import transformers" && echo "✓ Transformers installed" || echo "Warning: Could not import transformers, but continuing"
+echo "Installing additional packages..."
+uv pip install -U accelerate bitsandbytes huggingface_hub
 
-# Install triton first as it's a dependency for sglang
-echo "Installing triton..."
-$UV pip install triton
+# Validate installations
+$PYTHON -c "import transformers" && echo "✓ Transformers installed" || echo "Warning: Could not import transformers, but continuing"
 $PYTHON -c "import triton" && echo "✓ Triton installed" || echo "Warning: Could not import triton"
-
-# Install sglang and dependencies
-echo "Installing sglang and dependencies..."
-$UV pip install "sglang[all]>=0.4.2.post4" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
 $PYTHON -c "import sglang" && echo "✓ SGLang installed" || { echo "ERROR: Could not import sglang, installation failed"; exit 1; }
-
-# Install additional required packages
-echo "Installing additional packages..."
-$UV pip install accelerate bitsandbytes
 $PYTHON -c "import accelerate" && echo "✓ Accelerate installed" || echo "Warning: Could not import accelerate"
-# $PYTHON -c "import bitsandbytes" && echo "✓ BitsAndBytes installed" || echo "Warning: Could not import bitsandbytes"
-
-# Install huggingface_hub
-echo "Installing huggingface_hub..."
-$UV pip install huggingface_hub
 $PYTHON -c "import huggingface_hub" && echo "✓ Huggingface_hub installed" || echo "Warning: Could not import huggingface_hub"
 
-# Install requirements.txt packages
-echo "Installing requirements.txt packages..."
-$UV pip install -r ~/requirements.txt
-echo "✓ Application dependencies installed"
+# Install requirements.txt packages efficiently
+if [ -f ~/requirements.txt ]; then
+    echo "Installing requirements.txt packages..."
+    uv pip install -r ~/requirements.txt
+    echo "✓ Application dependencies installed"
+else
+    echo "No requirements.txt found, skipping"
+fi
 
 # Block 4: Skip CUDA Check during build
 echo "Note: Skipping CUDA check during build phase. Will check when container runs."
diff --git a/src_deploy/setup_dev.sh b/src_deploy/setup_dev.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Don't exit immediately on errors during build phase
+set +e
+
+echo "Starting setup for sglang server on A10 GPU..."
+
+# Block 1: Skip GPU Check during build
+echo "########################################################"
+echo "Note: Skipping GPU checks during build phase. Will check when container runs."
+echo "You might see a warning about GPU like: "
+echo "Can't initialize NVML. OR No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' "
+echo "Please ignore these warnings. They are expected."
+echo "########################################################"
+
+# Block 2: Use uv instead of pip
+echo "Setting up Python environment..."
+
+# Create virtual environment using uv
+uv venv $HOME/.venv
+echo "✓ Virtual environment created"
+
+# Make sure we use the virtual environment's Python and uv
+PYTHON="$HOME/.venv/bin/python"
+UV="uv"
+
+# Add venv activation to .bashrc
+echo 'source $HOME/.venv/bin/activate' >> ~/.bashrc
+
+# Block 3: Python Dependencies using uv
+echo "Installing Python packages..."
+$UV pip install --upgrade pip
+echo "✓ Pip upgraded"
+
+# Install setuptools first (required by triton)
+echo "Installing setuptools..."
+$UV pip install setuptools wheel
+echo "✓ Setuptools and wheel installed"
+
+# Install transformers
+echo "Installing transformers..."
+$UV pip install "transformers==4.48.3"
+$PYTHON -c "import transformers" && echo "✓ Transformers installed" || echo "Warning: Could not import transformers, but continuing"
+
+# Install triton first as it's a dependency for sglang
+echo "Installing triton..."
+$UV pip install triton
+$PYTHON -c "import triton" && echo "✓ Triton installed" || echo "Warning: Could not import triton"
+
+# Install sglang and dependencies
+echo "Installing sglang and dependencies..."
+$UV pip install "sglang[all]>=0.4.2.post4" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
+$PYTHON -c "import sglang" && echo "✓ SGLang installed" || { echo "ERROR: Could not import sglang, installation failed"; exit 1; }
+
+# Install additional required packages
+echo "Installing additional packages..."
+$UV pip install accelerate bitsandbytes
+$PYTHON -c "import accelerate" && echo "✓ Accelerate installed" || echo "Warning: Could not import accelerate"
+# $PYTHON -c "import bitsandbytes" && echo "✓ BitsAndBytes installed" || echo "Warning: Could not import bitsandbytes"
+
+# Install huggingface_hub
+echo "Installing huggingface_hub..."
+$UV pip install huggingface_hub
+$PYTHON -c "import huggingface_hub" && echo "✓ Huggingface_hub installed" || echo "Warning: Could not import huggingface_hub"
+
+# Install requirements.txt packages
+echo "Installing requirements.txt packages..."
+$UV pip install -r ~/requirements.txt
+echo "✓ Application dependencies installed"
+
+# Block 4: Skip CUDA Check during build
+echo "Note: Skipping CUDA check during build phase. Will check when container runs."
+
+echo "Setup completed!"
+echo "The next step will download the model when the server starts."
+
+# Return success regardless of individual command results
+exit 0