|
62 | 62 | - name: Check out code |
63 | 63 | uses: actions/checkout@v5 |
64 | 64 |
|
| 65 | + - name: Install system dependencies |
| 66 | + if: contains(matrix.alias, 'distributed') |
| 67 | + run: | |
| 68 | + set -eux |
| 69 | + apt-get update |
| 70 | + apt-get install -y curl wget git pkg-config zlib1g-dev build-essential |
| 71 | +
|
| 72 | + - name: Install NVSHMEM |
| 73 | + if: contains(matrix.alias, 'distributed') |
| 74 | + run: | |
| 75 | + set -euxo pipefail |
| 76 | + curl -L https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cuda.sh -o install_cuda.sh |
| 77 | + chmod +x install_cuda.sh |
| 78 | + source install_cuda.sh |
| 79 | + install_nvshmem 13 3.4.5 |
| 80 | +
|
65 | 81 | - name: Install uv |
66 | 82 | uses: astral-sh/setup-uv@v7 |
67 | 83 | with: |
|
97 | 113 | fi |
98 | 114 |
|
99 | 115 | - name: Install Triton |
100 | | - if: contains(matrix.alias, 'cpu') || (steps.cache.outputs.cache-hit != 'true' && matrix.pytorch-version != 'pytorch-2.9') |
| 116 | + if: contains(matrix.alias, 'cpu') || (steps.cache.outputs.cache-hit != 'true' && matrix.pytorch-version != 'pytorch-2.9') || contains(matrix.alias, 'distributed') |
101 | 117 | run: | |
102 | 118 | set -x |
103 | 119 | source .venv/bin/activate |
@@ -139,7 +155,11 @@ jobs: |
139 | 155 | if [[ "${{ contains(matrix.alias, 'cpu') }}" == "true" ]]; then export TRITON_CPU_BACKEND=1; fi |
140 | 156 | # -rf: print failed tests |
141 | 157 | # --timeout: max allowed time for each test |
142 | | - pytest -rf --timeout=60 |
| 158 | + if [[ "${{ matrix.alias }}" == *distributed* ]]; then |
| 159 | + pytest -rf --timeout=120 test/test_distributed.py |
| 160 | + else |
| 161 | + pytest -rf --timeout=60 |
| 162 | + fi |
143 | 163 |
|
144 | 164 | test-notebooks: |
145 | 165 | name: test-notebooks-cu128-py3.12-pytorch-2.9-a10g |
|
0 commit comments