[libc++] Use pandas.DataFrame in compare-benchmarks

ldionne · ldionne · commit 2810a489d233 · 2025-09-25T20:43:57.000-04:00
This opens the door to performing more advanced computations on the
data we're comparing.
diff --git a/libcxx/utils/compare-benchmarks b/libcxx/utils/compare-benchmarks
@@ -7,28 +7,33 @@ import statistics
 import sys
 import tempfile
 
-import plotly
+import numpy
+import pandas
+import plotly.express
 import tabulate
 
-def parse_lnt(lines):
+def parse_lnt(lines, aggregate=statistics.median):
     """
-    Parse lines in LNT format and return a dictionnary of the form:
+    Parse lines in LNT format and return a list of dictionnaries of the form:
 
-        {
-            'benchmark1': {
-                'metric1': [float],
-                'metric2': [float],
+        [
+            {
+                'benchmark': <benchmark1>,
+                <metric1>: float,
+                <metric2>: float,
                 ...
             },
-            'benchmark2': {
-                'metric1': [float],
-                'metric2': [float],
+            {
+                'benchmark': <benchmark2>,
+                <metric1>: float,
+                <metric2>: float,
                 ...
             },
             ...
-        }
+        ]
 
-    Each metric may have multiple values.
+    If a metric has multiple values associated to it, they are aggregated into a single
+    value using the provided aggregation function.
     """
     results = {}
     for line in lines:
@@ -37,61 +42,51 @@ def parse_lnt(lines):
             continue
 
         (identifier, value) = line.split(' ')
-        (name, metric) = identifier.split('.')
-        if name not in results:
-            results[name] = {}
-        if metric not in results[name]:
-            results[name][metric] = []
-        results[name][metric].append(float(value))
-    return results
-
-def plain_text_comparison(benchmarks, baseline, candidate, baseline_name=None, candidate_name=None):
+        (benchmark, metric) = identifier.split('.')
+        if benchmark not in results:
+            results[benchmark] = {'benchmark': benchmark}
+
+        entry = results[benchmark]
+        if metric not in entry:
+            entry[metric] = []
+        entry[metric].append(float(value))
+
+    for (bm, entry) in results.items():
+        for metric in entry:
+            if isinstance(entry[metric], list):
+                entry[metric] = aggregate(entry[metric])
+
+    return list(results.values())
+
+def plain_text_comparison(data, metric, baseline_name=None, candidate_name=None):
     """
-    Create a tabulated comparison of the baseline and the candidate.
+    Create a tabulated comparison of the baseline and the candidate for the given metric.
     """
+    data = data.replace(numpy.nan, None).sort_values(by='benchmark') # avoid NaNs in tabulate output
     headers = ['Benchmark', baseline_name, candidate_name, 'Difference', '% Difference']
     fmt = (None, '.2f', '.2f', '.2f', '.2f')
-    table = []
-    for (bm, base, cand) in zip(benchmarks, baseline, candidate):
-        diff = (cand - base) if base and cand else None
-        percent = 100 * (diff / base) if base and cand else None
-        row = [bm, base, cand, diff, percent]
-        table.append(row)
+    table = data[['benchmark', f'{metric}_baseline', f'{metric}_candidate', 'difference', 'percent']].set_index('benchmark')
     return tabulate.tabulate(table, headers=headers, floatfmt=fmt, numalign='right')
 
-def create_chart(benchmarks, baseline, candidate, subtitle=None, baseline_name=None, candidate_name=None):
+def create_chart(data, metric, subtitle=None, baseline_name=None, candidate_name=None):
     """
-    Create a bar chart comparing 'baseline' and 'candidate'.
+    Create a bar chart comparing the given metric between the baseline and the candidate.
     """
-    figure = plotly.graph_objects.Figure(layout={
-        'title': {
-            'text': f'{baseline_name} vs {candidate_name}',
-            'subtitle': {'text': subtitle}
-        }
+    data = data.sort_values(by='benchmark').rename(columns={
+        f'{metric}_baseline': baseline_name,
+        f'{metric}_candidate': candidate_name
     })
-    figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=baseline, name=baseline_name))
-    figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=candidate, name=candidate_name))
+    figure = plotly.express.bar(data, title=f'{baseline_name} vs {candidate_name}',
+                                      subtitle=subtitle,
+                                      x='benchmark', y=[baseline_name, candidate_name], barmode='group')
+    figure.update_layout(xaxis_title='', yaxis_title='', legend_title='')
     return figure
 
-def prepare_series(baseline, candidate, metric, aggregate=statistics.median):
-    """
-    Prepare the data for being formatted or displayed as a chart.
-
-    Metrics that have more than one value are aggregated using the given aggregation function.
-    """
-    all_benchmarks = sorted(list(set(baseline.keys()) | set(candidate.keys())))
-    baseline_series = []
-    candidate_series = []
-    for bm in all_benchmarks:
-        baseline_series.append(aggregate(baseline[bm][metric]) if bm in baseline and metric in baseline[bm] else None)
-        candidate_series.append(aggregate(candidate[bm][metric]) if bm in candidate and metric in candidate[bm] else None)
-    return (all_benchmarks, baseline_series, candidate_series)
-
 def main(argv):
     parser = argparse.ArgumentParser(
         prog='compare-benchmarks',
         description='Compare the results of two sets of benchmarks in LNT format.',
-        epilog='This script requires the `tabulate` and the `plotly` Python modules.')
+        epilog='This script depends on the modules listed in `libcxx/utils/requirements.txt`.')
     parser.add_argument('baseline', type=argparse.FileType('r'),
         help='Path to a LNT format file containing the benchmark results for the baseline.')
     parser.add_argument('candidate', type=argparse.FileType('r'),
@@ -127,26 +122,28 @@ def main(argv):
     if args.format == 'text' and args.open:
         parser.error('Passing --open makes no sense with --format=text')
 
-    baseline = parse_lnt(args.baseline.readlines())
-    candidate = parse_lnt(args.candidate.readlines())
+    baseline = pandas.DataFrame(parse_lnt(args.baseline.readlines()))
+    candidate = pandas.DataFrame(parse_lnt(args.candidate.readlines()))
 
-    if args.filter is not None:
-        regex = re.compile(args.filter)
-        baseline = {k: v for (k, v) in baseline.items() if regex.search(k)}
-        candidate = {k: v for (k, v) in candidate.items() if regex.search(k)}
+    # Join the baseline and the candidate into a single dataframe and add some new columns
+    data = baseline.merge(candidate, how='outer', on='benchmark', suffixes=('_baseline', '_candidate'))
+    data['difference'] = data[f'{args.metric}_candidate'] - data[f'{args.metric}_baseline']
+    data['percent'] = 100 * (data['difference'] / data[f'{args.metric}_baseline'])
 
-    (benchmarks, baseline_series, candidate_series) = prepare_series(baseline, candidate, args.metric)
+    if args.filter is not None:
+        keeplist = [b for b in data['benchmark'] if re.search(args.filter, b) is not None]
+        data = data[data['benchmark'].isin(keeplist)]
 
     if args.format == 'chart':
-        figure = create_chart(benchmarks, baseline_series, candidate_series, subtitle=args.subtitle,
-                                                                             baseline_name=args.baseline_name,
-                                                                             candidate_name=args.candidate_name)
+        figure = create_chart(data, args.metric, subtitle=args.subtitle,
+                                                 baseline_name=args.baseline_name,
+                                                 candidate_name=args.candidate_name)
         do_open = args.output is None or args.open
         output = args.output or tempfile.NamedTemporaryFile(suffix='.html').name
         plotly.io.write_html(figure, file=output, auto_open=do_open)
     else:
-        diff = plain_text_comparison(benchmarks, baseline_series, candidate_series, baseline_name=args.baseline_name,
-                                                                                    candidate_name=args.candidate_name)
+        diff = plain_text_comparison(data, args.metric, baseline_name=args.baseline_name,
+                                                        candidate_name=args.candidate_name)
         diff += '\n'
         if args.output is not None:
             with open(args.output, 'w') as out:
diff --git a/libcxx/utils/visualize-historical b/libcxx/utils/visualize-historical
@@ -158,28 +158,28 @@ def parse_lnt(lines, aggregate=statistics.median):
     If a metric has multiple values associated to it, they are aggregated into a single
     value using the provided aggregation function.
     """
-    results = []
+    results = {}
     for line in lines:
         line = line.strip()
         if not line:
             continue
 
         (identifier, value) = line.split(' ')
         (benchmark, metric) = identifier.split('.')
-        if not any(x['benchmark'] == benchmark for x in results):
-            results.append({'benchmark': benchmark})
+        if benchmark not in results:
+            results[benchmark] = {'benchmark': benchmark}
 
-        entry = next(x for x in results if x['benchmark'] == benchmark)
+        entry = results[benchmark]
         if metric not in entry:
             entry[metric] = []
         entry[metric].append(float(value))
 
-    for entry in results:
+    for (bm, entry) in results.items():
         for metric in entry:
             if isinstance(entry[metric], list):
                 entry[metric] = aggregate(entry[metric])
 
-    return results
+    return list(results.values())
 
 def sorted_revlist(git_repo, commits):
     """