Skip to content

Commit 80278a8

Browse files
[update] workflow deploy issues
1 parent be59564 commit 80278a8

File tree

2 files changed

+16
-10
lines changed

2 files changed

+16
-10
lines changed

src_deploy/gpu.Dockerfile

+15-9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-devel
1+
FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime
22

33
# Set environment variables
44
ENV DEBIAN_FRONTEND=noninteractive
@@ -17,22 +17,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1717
ninja-build \
1818
&& rm -rf /var/lib/apt/lists/*
1919

20-
# Install NVIDIA driver libraries needed for libcuda.so and libnvidia-ml.so
21-
RUN apt-get update && apt-get install -y --no-install-recommends \
22-
nvidia-driver-525 \
23-
&& rm -rf /var/lib/apt/lists/*
20+
# IMPORTANT: Don't install NVIDIA drivers - fly.io provides these
21+
# The A10 GPUs on fly.io already have the proper drivers
2422

2523
# Set up working directory
2624
WORKDIR /app
2725
RUN mkdir -p ./data
2826

29-
# Install Python packages - use the pre-installed PyTorch
27+
# Install Python packages
3028
RUN pip install --no-cache-dir --upgrade pip && \
3129
pip install --no-cache-dir "transformers==4.48.3" && \
3230
pip install --no-cache-dir "sglang[all]>=0.4.2.post4" && \
3331
pip install --no-cache-dir jupyter pandas tqdm nvitop scikit-learn seaborn matplotlib faiss-gpu faiss-cpu bitsandbytes && \
34-
# pip install --no-cache-dir flash-attn --no-build-isolation && \
35-
# pip install --no-cache-dir autoawq --no-build-isolation && \
3632
pip install --no-cache-dir accelerate
3733

3834
# Copy source code
@@ -41,8 +37,18 @@ COPY . .
4137
# Add GPU check script
4238
COPY ./src_deploy/check_gpu.py /app/check_gpu.py
4339

40+
# Setup proper NVIDIA configurations
41+
ENV NVIDIA_VISIBLE_DEVICES=all
42+
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
43+
4444
# Run ldconfig to update library cache
4545
RUN ldconfig
4646

47-
CMD ["sh", "-c", "python /app/check_gpu.py && python -m sglang.launch_server --model-path microsoft/Phi-3.5-mini-instruct --host 0.0.0.0 --port 8899 --api-key None --mem-fraction-static 0.9 --max-running-requests 1024 --attention-backend triton --disable-cuda-graph --dtype float16 --chunked-prefill-size 512 --enable-metrics --show-time-cost --enable-cache-report --log-level info --watchdog-timeout 120 --schedule-policy lpm --schedule-conservativeness 0.8"]
47+
# Use a healthcheck to ensure GPU is available
48+
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
49+
CMD python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'"
50+
51+
# Expose port for the service
52+
EXPOSE 8899
4853

54+
CMD ["sh", "-c", "python /app/check_gpu.py && python -m sglang.launch_server --model-path microsoft/Phi-3.5-mini-instruct --host 0.0.0.0 --port 8899 --api-key None --mem-fraction-static 0.9 --max-running-requests 1024 --attention-backend triton --disable-cuda-graph --dtype float16 --chunked-prefill-size 512 --enable-metrics --show-time-cost --enable-cache-report --log-level info --watchdog-timeout 120 --schedule-policy lpm --schedule-conservativeness 0.8"]

src_deploy/gpu.fly.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ primary_region = 'ord'
1010
dockerfile = 'gpu.Dockerfile'
1111

1212
[http_service]
13-
internal_port = 8080
13+
internal_port = 8899
1414
force_https = true
1515
auto_stop_machines = 'stop'
1616
auto_start_machines = true

0 commit comments

Comments
 (0)