Skip to content

Commit adaf8b7

Browse files
committed
debug skipped tests
1 parent 6427d2f commit adaf8b7

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

.github/workflows/test.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ jobs:
7272
if: contains(matrix.alias, 'distributed')
7373
run: |
7474
set -euxo pipefail
75+
GPU_COUNT=$(nvidia-smi -L | wc -l)
76+
if [ "$GPU_COUNT" -ne 4 ]; then
77+
echo "Error: Expected 4 GPUs but found $GPU_COUNT"
78+
exit 1
79+
fi
7580
curl -L https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cuda.sh -o install_cuda.sh
7681
chmod +x install_cuda.sh
7782
source install_cuda.sh
@@ -155,7 +160,7 @@ jobs:
155160
# -rf: print failed tests
156161
# --timeout: max allowed time for each test
157162
TEST_PATH=$([[ "${{ contains(matrix.alias, 'distributed') }}" == "true" ]] && echo "test/test_examples_dist.py" || echo ".")
158-
EXTRA_FLAGS=$([[ "${{ contains(matrix.alias, 'distributed') }}" == "true" ]] && echo "" || echo "--ignore=test/test_examples_dist.py")
163+
EXTRA_FLAGS=$([[ "${{ contains(matrix.alias, 'distributed') }}" == "true" ]] && echo "-vs" || echo "--ignore=test/test_examples_dist.py")
159164
pytest -rf --timeout=60 $EXTRA_FLAGS $TEST_PATH
160165
161166
test-notebooks:

test/test_examples_dist.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def _init_process(self):
4343
)
4444
torch.manual_seed(42 + self.rank)
4545

46-
@skip_if_lt_x_gpu(4)
4746
def test_all_gather_matmul(self):
4847
self._init_process()
4948

@@ -100,7 +99,6 @@ def test_all_gather_matmul(self):
10099
torch.cuda.current_stream().wait_stream(backend_stream)
101100
dist.destroy_process_group()
102101

103-
@skip_if_lt_x_gpu(4)
104102
def test_all_reduce(self):
105103
self._init_process()
106104

0 commit comments

Comments
 (0)