From 8e8dabb7cc7dac83f52e74e0dc5e2d08d5faa2dd Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Sat, 8 Nov 2025 15:08:43 -0800 Subject: [PATCH] Improve the SingleLineTestResult and integrate with parallel_testsuite Now the parallel test suite now longer needs its own reporting logic it can just share use the existing Runner/Result which now supports the color and single long output formats. By default we now buffer test output and use the single line test runner. Adding `-v` will disabled this and give multi-line non-buffered output. Adding another `-v` will enable logging in the test framework (same as setting EMTEST_VERBOSE) --- test/color_runner.py | 35 ++++++++++++-- test/parallel_testsuite.py | 96 ++++++++++++-------------------------- test/runner.py | 19 +++++--- test/single_line_runner.py | 76 ++++++++++++------------------ 4 files changed, 101 insertions(+), 125 deletions(-) diff --git a/test/color_runner.py b/test/color_runner.py index a13f9d98f2ad8..740a6cc8e328f 100644 --- a/test/color_runner.py +++ b/test/color_runner.py @@ -35,13 +35,37 @@ def _restoreStdout(self): class ProgressMixin: test_count = 0 progress_counter = 0 + fail_count = 0 + + def addError(self, test, err): + self.fail_count += 1 + super().addError(test, err) + + def addFailure(self, test, err): + self.fail_count += 1 + super().addFailure(test, err) + + def addUnexpectedSuccess(self, test): + self.fail_count += 1 + super().addUnexpectedSuccess(test) + + def writeProgressPrefix(self): + if self.showAll: + base_progress = f'[{self.progress_counter}/{self.test_count}' + rtn = len(base_progress) + progress = with_color(CYAN, base_progress) + if self.fail_count: + fail_str = f'-{self.fail_count}' + progress += with_color(RED, fail_str) + rtn += len(fail_str) + progress += with_color(CYAN, '] ') + self.stream.write(progress) + return rtn + 2 + return 0 def startTest(self, test): - assert self.test_count > 0 self.progress_counter += 1 - if self.showAll: - progress = f'[{self.progress_counter}/{self.test_count}] ' - self.stream.write(with_color(CYAN, progress)) + self.writeProgressPrefix() super().startTest(test) @@ -62,6 +86,9 @@ class ColorTextRunner(unittest.TextTestRunner): """Subclass of TextTestRunner that uses ColorTextResult""" resultclass = ColorTextResult # type: ignore + def __init__(self, *args, **kwargs): + super().__init__(*args, verbosity=2, **kwargs) + def _makeResult(self): result = super()._makeResult() result.test_count = self.test_count diff --git a/test/parallel_testsuite.py b/test/parallel_testsuite.py index 8a5652ed78cfe..fea891d192203 100644 --- a/test/parallel_testsuite.py +++ b/test/parallel_testsuite.py @@ -13,10 +13,10 @@ import browser_common import common +from color_runner import BufferingMixin from common import errlog from tools import emprofile, utils -from tools.colored_logger import CYAN, GREEN, RED, with_color from tools.utils import WINDOWS EMTEST_VISUALIZE = os.getenv('EMTEST_VISUALIZE') @@ -66,6 +66,10 @@ def run_test(args): test.set_temp_dir(temp_dir) try: test(result) + except KeyboardInterrupt: + # In case of KeyboardInterrupt do not emit buffered stderr/stdout + # as we unwind. + result._mirrorOutput = False finally: result.elapsed = time.perf_counter() - start_time @@ -106,43 +110,11 @@ def __init__(self, max_cores, options): self.max_cores = max_cores self.max_failures = options.max_failures self.failing_and_slow_first = options.failing_and_slow_first - self.progress_counter = 0 def addTest(self, test): super().addTest(test) test.is_parallel = True - def printOneResult(self, res): - self.progress_counter += 1 - progress = f'[{self.progress_counter}/{self.num_tests}] ' - - if res.test_result == 'success': - msg = 'ok' - color = GREEN - elif res.test_result == 'errored': - msg = 'ERROR' - color = RED - elif res.test_result == 'failed': - msg = 'FAIL' - color = RED - elif res.test_result == 'skipped': - reason = res.skipped[0][1] - msg = f"skipped '{reason}'" - color = CYAN - elif res.test_result == 'unexpected success': - msg = 'unexpected success' - color = RED - elif res.test_result == 'expected failure': - color = RED - msg = 'expected failure' - else: - assert False, f'unhandled test result {res.test_result}' - - if res.test_result != 'skipped': - msg += f' ({res.elapsed:.2f}s)' - - errlog(f'{with_color(CYAN, progress)}{res.test} ... {with_color(color, msg)}') - def run(self, result): # The 'spawn' method is used on windows and it can be useful to set this on # all platforms when debugging multiprocessing issues. Without this we @@ -151,6 +123,12 @@ def run(self, result): # issues. # multiprocessing.set_start_method('spawn') + # No need to worry about stdout/stderr buffering since are a not + # actually running the test here, only setting the results. + buffer = result.buffer + result.buffer = False + + result.core_time = 0 tests = self.get_sorted_tests() self.num_tests = self.countTestCases() contains_browser_test = any(test.is_browser_test() for test in tests) @@ -176,7 +154,7 @@ def run(self, result): allowed_failures_counter = manager.Value('i', self.max_failures) results = [] - args = ((t, allowed_failures_counter, result.buffer) for t in tests) + args = ((t, allowed_failures_counter, buffer) for t in tests) for res in pool.imap_unordered(run_test, args, chunksize=1): # results may be be None if # of allowed errors was exceeded # and the harness aborted. @@ -184,7 +162,7 @@ def run(self, result): if res.test_result not in ['success', 'skipped'] and allowed_failures_counter is not None: # Signal existing multiprocess pool runners so that they can exit early if needed. allowed_failures_counter.value -= 1 - self.printOneResult(res) + res.integrate_result(result) results.append(res) # Send a task to each worker to tear down the browser and server. This @@ -192,7 +170,7 @@ def run(self, result): # are cycled through once. num_tear_downs = sum([pool.apply(tear_down, ()) for i in range(use_cores)]) # Assert the assumed behavior above hasn't changed. - if num_tear_downs != use_cores: + if num_tear_downs != use_cores and not buffer: errlog(f'Expected {use_cores} teardowns, got {num_tear_downs}') if self.failing_and_slow_first: @@ -218,7 +196,9 @@ def update_test_results_to(test_name): json.dump(previous_test_run_results, open(common.PREVIOUS_TEST_RUN_RESULTS_FILE, 'w'), indent=2) - return self.combine_results(result, results) + if EMTEST_VISUALIZE: + self.visualize_results(results) + return result def get_sorted_tests(self): """A list of this suite's tests, sorted with the @is_slow_test tests first. @@ -237,14 +217,11 @@ def test_key(test): return sorted(self, key=test_key, reverse=True) - def combine_results(self, result, buffered_results): - errlog('') - errlog('DONE: combining results on main thread') - errlog('') + def visualize_results(self, results): + assert EMTEST_VISUALIZE # Sort the results back into alphabetical order. Running the tests in # parallel causes mis-orderings, this makes the results more readable. - results = sorted(buffered_results, key=lambda res: str(res.test)) - result.core_time = 0 + results = sorted(results, key=lambda res: str(res.test)) # shared data structures are hard in the python multi-processing world, so # use a file to share the flaky test information across test processes. @@ -252,30 +229,17 @@ def combine_results(self, result, buffered_results): # Extract only the test short names flaky_tests = [x.split('.')[-1] for x in flaky_tests] - # The next integrateResult loop will print a *lot* of lines really fast. This - # will cause a Python exception being thrown when attempting to print to - # stderr, if stderr is in nonblocking mode, like it is on Buildbot CI: - # See https://github.com/buildbot/buildbot/issues/8659 - # To work around that problem, set stderr to blocking mode before printing. - if not WINDOWS: - os.set_blocking(sys.stderr.fileno(), True) - for r in results: - # Integrate the test result to the global test result object - r.integrateResult(result) r.log_test_run_for_visualization(flaky_tests) # Generate the parallel test run visualization - if EMTEST_VISUALIZE: - emprofile.create_profiling_graph(utils.path_from_root('out/graph')) - # Cleanup temp files that were used for the visualization - emprofile.delete_profiler_logs() - utils.delete_file(common.flaky_tests_log_filename) - - return result + emprofile.create_profiling_graph(utils.path_from_root('out/graph')) + # Cleanup temp files that were used for the visualization + emprofile.delete_profiler_logs() + utils.delete_file(common.flaky_tests_log_filename) -class BufferedParallelTestResult(unittest.TestResult): +class BufferedParallelTestResult(BufferingMixin, unittest.TestResult): """A picklable struct used to communicate test results across processes """ def __init__(self): @@ -293,15 +257,12 @@ def test_short_name(self): def addDuration(self, test, elapsed): self.test_duration = elapsed - def integrateResult(self, overall_results): + def integrate_result(self, overall_results): """This method get called on the main thread once the buffered result - is received. It add the buffered result to the overall result.""" + is received. It adds the buffered result to the overall result.""" # The exception info objects that we are adding here have already # been turned into strings so make _exc_info_to_string into a no-op. overall_results._exc_info_to_string = lambda x, _y: x - # No need to worry about stdout/stderr buffering since are a not - # actually running the test here, only setting the results. - overall_results.buffer = False overall_results.startTest(self.test) if self.test_result == 'success': overall_results.addSuccess(self.test) @@ -321,7 +282,8 @@ def integrateResult(self, overall_results): overall_results.core_time += self.test_duration def log_test_run_for_visualization(self, flaky_tests): - if EMTEST_VISUALIZE and (self.test_result != 'skipped' or self.test_duration > 0.2): + assert EMTEST_VISUALIZE + if self.test_result != 'skipped' or self.test_duration > 0.2: test_result = self.test_result if test_result == 'success' and self.test_short_name() in flaky_tests: test_result = 'warnings' diff --git a/test/runner.py b/test/runner.py index 19f1800a510e2..127c25a7e4d02 100755 --- a/test/runner.py +++ b/test/runner.py @@ -429,12 +429,16 @@ def run_tests(options, suites): testRunner = xmlrunner.XMLTestRunner(output=output, verbosity=2, failfast=options.failfast) print('Writing XML test output to ' + os.path.abspath(output.name)) - elif options.buffer and options.ansi and not options.verbose: - # When buffering is enabled and ansi color output is available use our nice single-line + elif options.ansi and not options.verbose: + # When not in verbose mode and ansi color output is available use our nice single-line # result display. - testRunner = SingleLineTestRunner(verbosity=2, failfast=options.failfast) + testRunner = SingleLineTestRunner(failfast=options.failfast) else: - testRunner = ColorTextRunner(verbosity=2, failfast=options.failfast) + if not options.ansi: + print('using verbose test runner (ANSI not avilable)') + else: + print('using verbose test runner (verbose output requested)') + testRunner = ColorTextRunner(failfast=options.failfast) total_core_time = 0 run_start_time = time.perf_counter() @@ -472,7 +476,9 @@ def parse_args(): 'test. Implies --cores=1. Defaults to true when running a single test') parser.add_argument('--no-clean', action='store_true', help='Do not clean the temporary directory before each test run') - parser.add_argument('--verbose', '-v', action='store_true') + parser.add_argument('--verbose', '-v', action='count', default=0, + help="Show test stdout and stderr, and don't use the single-line test reporting. " + 'Specifying `-v` twice will enable test framework logging (i.e. EMTEST_VERBOSE)') # TODO: Replace with BooleanOptionalAction once we can depend on python3.9 parser.add_argument('--ansi', action='store_true', default=None) parser.add_argument('--no-ansi', action='store_false', dest='ansi', default=None) @@ -492,7 +498,6 @@ def parse_args(): help='Use the default CI browser configuration.') parser.add_argument('tests', nargs='*') parser.add_argument('--failfast', action='store_true', help='If true, test run will abort on first failed test.') - parser.add_argument('-b', '--buffer', action='store_true', help='Buffer stdout and stderr during tests') parser.add_argument('--max-failures', type=int, default=2**31 - 1, help='If specified, test run will abort after N failed tests.') parser.add_argument('--failing-and-slow-first', action='store_true', help='Run failing tests first, then sorted by slowest first. Combine with --failfast for fast fail-early CI runs.') parser.add_argument('--start-at', metavar='NAME', help='Skip all tests up until ') @@ -587,7 +592,7 @@ def set_env(name, option_value): set_env('EMTEST_SKIP_SLOW', options.skip_slow) set_env('EMTEST_ALL_ENGINES', options.all_engines) set_env('EMTEST_REBASELINE', options.rebaseline) - set_env('EMTEST_VERBOSE', options.verbose) + set_env('EMTEST_VERBOSE', options.verbose > 1) set_env('EMTEST_CORES', options.cores) set_env('EMTEST_FORCE64', options.force64) diff --git a/test/single_line_runner.py b/test/single_line_runner.py index 524cc43da0593..e5d2ab986c2c1 100644 --- a/test/single_line_runner.py +++ b/test/single_line_runner.py @@ -6,7 +6,7 @@ import shutil import unittest -from color_runner import BufferingMixin, ColorTextRunner +from color_runner import ColorTextResult, ColorTextRunner from tools.colored_logger import CYAN, GREEN, RED, with_color @@ -20,65 +20,47 @@ def term_width(): return shutil.get_terminal_size()[0] -class SingleLineTestResult(BufferingMixin, unittest.TextTestResult): +class SingleLineTestResult(ColorTextResult): """Similar to the standard TextTestResult but uses ANSI escape codes for color output and reusing a single line on the terminal. """ - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.progress_counter = 0 - - def writeStatusLine(self, line): - clearline(self._original_stderr) - self._original_stderr.write(line) - self._original_stderr.flush() - - def updateStatus(self, test, msg, color): - progress = f'[{self.progress_counter}/{self.test_count}] ' - # Format the line so that it fix within the terminal width, unless it's less then min_len + def writeStatus(self, test, msg, color, line_pos): + # Because the message can include the skip reason (which can be very long sometimes), truncate + # it to a reasonable length to avoid exceeding line length. + if len(msg) > 30: + msg = msg[:30] + # Format the line so that it fix within the terminal width, unless its less then min_len # in which case there is not much we can do, and we just overflow the line. - min_len = len(progress) + len(msg) + 5 + min_len = line_pos + len(msg) + 5 test_name = str(test) if term_width() > min_len: max_name = term_width() - min_len test_name = test_name[:max_name] - line = f'{with_color(CYAN, progress)}{test_name} ... {with_color(color, msg)}' - self.writeStatusLine(line) + line = f'{test_name} ... {with_color(color, msg)}' + self._original_stderr.write(line) + + def _write_status(self, test, status): + clearline(self._original_stderr) + pos = self.writeProgressPrefix() + # Add some color to the status message + if status == 'ok': + color = GREEN + elif status.isupper(): + color = RED + # Use a newline when a test fails, so you can see a list of failures while + # the other tests are still running + status += '\n' + else: + color = CYAN + self.writeStatus(test, status, color, pos) + self._original_stderr.flush() def startTest(self, test): self.progress_counter += 1 - assert self.test_count > 0 - # Note: We explicitly do not use `super()` here but instead call `unittest.TestResult`. i.e. - # we skip the superclass (since we don't want its specific behaviour) and instead call its - # superclass. + # We explictly don't call TextTestResult.startTest here since we delay all printing + # of results until `_write_status` unittest.TestResult.startTest(self, test) - if self.progress_counter == 1: - self.updateStatus(test, '', GREEN) - - def addSuccess(self, test): - unittest.TestResult.addSuccess(self, test) - self.updateStatus(test, 'ok', GREEN) - - def addFailure(self, test, err): - unittest.TestResult.addFailure(self, test, err) - self.updateStatus(test, 'FAIL', RED) - - def addError(self, test, err): - unittest.TestResult.addError(self, test, err) - self.updateStatus(test, 'ERROR', RED) - - def addExpectedFailure(self, test, err): - unittest.TestResult.addExpectedFailure(self, test, err) - self.updateStatus(test, 'expected failure', RED) - - def addUnexpectedSuccess(self, test, err): - unittest.TestResult.addUnexpectedSuccess(self, test, err) - self.updateStatus(test, 'UNEXPECTED SUCCESS', RED) - - def addSkip(self, test, reason): - unittest.TestResult.addSkip(self, test, reason) - self.updateStatus(test, f"skipped '{reason}'", CYAN) def printErrors(self): # All tests have been run at this point so print a final newline