remove flaky remote function failure (#244) #1070
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build monarch | |
on: | |
push: | |
branches: | |
- main | |
pull_request: | |
branches: | |
- main | |
- gh/** | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} | |
cancel-in-progress: true | |
jobs: | |
test: | |
name: cuda12.6-py3.10-4xlarge | |
strategy: | |
fail-fast: true | |
matrix: | |
include: | |
- name: 4xlarge | |
runs-on: linux.g5.4xlarge.nvidia.gpu | |
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126' | |
gpu-arch-type: "cuda" | |
gpu-arch-version: "12.6" | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
with: | |
timeout: 60 | |
runner: ${{ matrix.runs-on }} | |
gpu-arch-type: ${{ matrix.gpu-arch-type }} | |
gpu-arch-version: ${{ matrix.gpu-arch-version }} | |
submodules: recursive | |
script: | | |
conda create -n venv python=3.10 -y | |
conda activate venv | |
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH | |
python -m pip install --upgrade pip | |
# Install native dependencies | |
dnf update -y | |
dnf install clang-devel libunwind libunwind-devel -y | |
# Install rust and setup nightly toolchain | |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y | |
source $HOME/.cargo/env | |
rustup toolchain install nightly | |
rustup default nightly | |
# Install build dependencies | |
pip install -r build-requirements.txt | |
# Install test dependencies | |
pip install -r python/tests/requirements.txt | |
# Install remote process_allocator binary (some tests use it) | |
cargo install --path monarch_hyperactor | |
# Build and install monarch | |
# NB: monarch is currently can't be built in isolated builds (e.g not PEP519 compatible) | |
# because 'torch-sys' needs to be compiled against 'torch' in the main python environment | |
# so that libtorch is linked correctly at runtime. | |
pip install --no-build-isolation . | |
# Run tests | |
LC_ALL=C pytest python/tests/ -s -v -m "not oss_skip" -n auto | |
python python/tests/test_mock_cuda.py |