Fix session fixture teardown exceptions being reported as duplicate XFAILs

Fazeel Usmani · Fazeel Usmani · commit a255b4fe98ef · 2025-11-10T09:02:19.000Z
When a session-scoped autouse fixture raises an exception during teardown, and the last test in the suite is marked @pytest.mark.xfail, pytest was incorrectly showing an extra XFAIL line (duplicated) instead of reporting the teardown failure as an ERROR. The root cause was that the xfail handling in pytest_runtest_makereport was being applied to all phases (setup, call, teardown), converting any exception into an xfail result if the test was marked with xfail. This meant that session fixture teardown exceptions were being misreported as expected failures. The fix restricts xfail handling to only apply during the "call" phase. Setup and teardown failures are now properly reported as errors, regardless of xfail markers on the test. This aligns with the principle that xfail should only apply to test execution, not to fixture setup/teardown failures. Fixes #8375
diff --git a/src/_pytest/skipping.py b/src/_pytest/skipping.py
@@ -287,21 +287,27 @@ def pytest_runtest_makereport(
         rep.outcome = "skipped"
     elif not rep.skipped and xfailed:
         if call.excinfo:
-            raises = xfailed.raises
-            if raises is None or (
-                (
-                    isinstance(raises, type | tuple)
-                    and isinstance(call.excinfo.value, raises)
-                )
-                or (
-                    isinstance(raises, AbstractRaises)
-                    and raises.matches(call.excinfo.value)
-                )
-            ):
-                rep.outcome = "skipped"
-                rep.wasxfail = xfailed.reason
-            else:
-                rep.outcome = "failed"
+            # Only apply xfail handling to the "call" phase.
+            # Setup and teardown failures should be reported as errors,
+            # not as expected failures, even if the test is marked xfail.
+            # This ensures that fixture teardown exceptions (e.g., from
+            # session-scoped fixtures) are properly reported as errors.
+            if call.when == "call":
+                raises = xfailed.raises
+                if raises is None or (
+                    (
+                        isinstance(raises, type | tuple)
+                        and isinstance(call.excinfo.value, raises)
+                    )
+                    or (
+                        isinstance(raises, AbstractRaises)
+                        and raises.matches(call.excinfo.value)
+                    )
+                ):
+                    rep.outcome = "skipped"
+                    rep.wasxfail = xfailed.reason
+                else:
+                    rep.outcome = "failed"
         elif call.when == "call":
             if xfailed.strict:
                 rep.outcome = "failed"
diff --git a/testing/python/fixtures.py b/testing/python/fixtures.py
@@ -4914,7 +4914,8 @@ def test_crash_expected_setup_and_teardown() -> None:
         """
     )
     result = pytester.runpytest()
-    assert result.ret == 0
+    # Fixture setup failures are reported as errors, not xfails
+    assert result.ret == 1  # Errors from fixture setup failures
 
 
 def test_scoped_fixture_teardown_order(pytester: Pytester) -> None:
diff --git a/testing/test_skipping.py b/testing/test_skipping.py
@@ -737,6 +737,11 @@ def test_2():
 
 class TestXFailwithSetupTeardown:
     def test_failing_setup_issue9(self, pytester: Pytester) -> None:
+        """Setup failures should be reported as errors, not xfails.
+
+        Even if a test is marked xfail, if the setup fails, that's an
+        infrastructure error, not an expected test failure.
+        """
         pytester.makepyfile(
             """
             import pytest
@@ -749,9 +754,14 @@ def test_func():
         """
         )
         result = pytester.runpytest()
-        result.stdout.fnmatch_lines(["*1 xfail*"])
+        result.stdout.fnmatch_lines(["*1 error*"])
 
     def test_failing_teardown_issue9(self, pytester: Pytester) -> None:
+        """Teardown failures should be reported as errors, not xfails.
+
+        Even if a test is marked xfail, if the teardown fails, that's an
+        infrastructure error, not an expected test failure.
+        """
         pytester.makepyfile(
             """
             import pytest
@@ -764,7 +774,7 @@ def test_func():
         """
         )
         result = pytester.runpytest()
-        result.stdout.fnmatch_lines(["*1 xfail*"])
+        result.stdout.fnmatch_lines(["*1 error*"])
 
 
 class TestSkip:
@@ -1185,6 +1195,11 @@ def test_default_markers(pytester: Pytester) -> None:
 
 
 def test_xfail_test_setup_exception(pytester: Pytester) -> None:
+    """Setup exceptions should be reported as errors, not xfails.
+
+    Even if a test is marked xfail, if setup fails (via pytest_runtest_setup hook),
+    that's an infrastructure error, not an expected test failure.
+    """
     pytester.makeconftest(
         """
             def pytest_runtest_setup():
@@ -1200,9 +1215,9 @@ def test_func():
         """
     )
     result = pytester.runpytest(p)
-    assert result.ret == 0
-    assert "xfailed" in result.stdout.str()
-    result.stdout.no_fnmatch_line("*xpassed*")
+    assert result.ret == 1  # Should fail due to error
+    assert "error" in result.stdout.str()
+    result.stdout.no_fnmatch_line("*xfailed*")
 
 
 def test_imperativeskip_on_xfail_test(pytester: Pytester) -> None:
@@ -1489,3 +1504,37 @@ def test_exit_reason_only():
     )
     result = pytester.runpytest(p)
     result.stdout.fnmatch_lines("*_pytest.outcomes.Exit: foo*")
+
+
+def test_session_fixture_teardown_exception_with_xfail(pytester: Pytester) -> None:
+    """Test that session fixture teardown exceptions are reported as errors,
+    not as duplicate xfails, even when the last test is marked xfail.
+
+    Regression test for issue #8375.
+    """
+    pytester.makepyfile(
+        """
+        import pytest
+
+        @pytest.fixture(autouse=True, scope='session')
+        def failme():
+            yield
+            raise RuntimeError('cleanup fails for some reason')
+
+        def test_ok():
+            assert True
+
+        @pytest.mark.xfail()
+        def test_expected_failure():
+            assert False
+        """
+    )
+    result = pytester.runpytest("-q")
+    result.stdout.fnmatch_lines(
+        [
+            "*1 passed, 1 xfailed, 1 error*",
+        ]
+    )
+    # Make sure we don't have duplicate xfails (would be "2 xfailed" before the fix)
+    assert "2 xfailed" not in result.stdout.str()
+    assert "1 xfailed" in result.stdout.str()

Original file line number	Diff line number	Diff line change
`@@ -4914,7 +4914,8 @@ def test_crash_expected_setup_and_teardown() -> None:`
`4914`	`4914`	`"""`
`4915`	`4915`	`)`
`4916`	`4916`	`result = pytester.runpytest()`
`4917`		`- assert result.ret == 0`
	`4917`	`+ # Fixture setup failures are reported as errors, not xfails`
	`4918`	`+ assert result.ret == 1 # Errors from fixture setup failures`
`4918`	`4919`
`4919`	`4920`
`4920`	`4921`	`def test_scoped_fixture_teardown_order(pytester: Pytester) -> None:`