feat(bench): add helper scripts

lalvarezt · lalvarezt · commit dc06069e0649 · 2025-11-08T13:48:56.000+01:00
diff --git a/scripts/README.md b/scripts/README.md
@@ -138,6 +138,123 @@ python3 scripts/compare_benchmarks.py \
 cat comparison.md
 ```
 
+## Version Comparison Workflow
+
+For comparing performance across multiple commits (e.g., to find when a regression was introduced), use the `compile_benchmark_versions.sh` script.
+
+### `compile_benchmark_versions.sh`
+
+This script compiles the benchmark tool for every commit in a range, making it easy to run performance comparisons across different versions.
+
+**Features:**
+
+- **Idempotent**: Only compiles versions that don't already exist
+- **Safe**: Uses git worktrees in temporary directories (doesn't affect your working directory)
+- **Convenient**: Stores binaries with commit SHA for easy identification
+- **Non-intrusive**: Works even with uncommitted changes in your main working directory
+- **Storage**: Uses `$XDG_DATA_HOME/string_pipeline/benchmarks/` (typically `~/.local/share/string_pipeline/benchmarks/`)
+
+**Usage:**
+
+```bash
+# Compile all versions from 78594af (stabilized benchmark tool v1.0.0) to HEAD
+./scripts/compile_benchmark_versions.sh
+
+# Compile specific range
+./scripts/compile_benchmark_versions.sh --start abc1234 --end def5678
+
+# See what would be compiled (dry run)
+./scripts/compile_benchmark_versions.sh --dry-run
+
+# List already compiled versions
+./scripts/compile_benchmark_versions.sh --list
+
+# Remove all compiled versions
+./scripts/compile_benchmark_versions.sh --clean
+
+# Verbose output for debugging
+./scripts/compile_benchmark_versions.sh --verbose
+```
+
+**Example Workflow - Finding a Performance Regression:**
+
+```bash
+# 1. Compile all versions
+./scripts/compile_benchmark_versions.sh
+
+# 2. Set up benchmark directory path
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+
+# 3. Run benchmarks on two versions
+$BENCH_DIR/bench_throughput_abc1234 \
+  --sizes 10000 \
+  --iterations 100 \
+  --output before.json
+
+$BENCH_DIR/bench_throughput_def5678 \
+  --sizes 10000 \
+  --iterations 100 \
+  --output after.json
+
+# 4. Compare results
+python3 scripts/compare_benchmarks.py before.json after.json
+
+# 5. If regression found, bisect by testing commits in between
+$BENCH_DIR/bench_throughput_xyz9999 --sizes 10000 --iterations 100 --output middle.json
+python3 scripts/compare_benchmarks.py before.json middle.json
+```
+
+### `compare_benchmark_versions.sh`
+
+After compiling benchmark binaries, use this script to quickly compare performance between two versions using hyperfine.
+
+**Features:**
+
+- **Fast comparison**: Uses hyperfine for accurate benchmark timing
+- **Automatic validation**: Checks that both binaries exist before running
+- **Flexible parameters**: Customize warmup, runs, and sizes
+- **Clear output**: Shows which version is faster with statistical confidence
+
+**Requirements:**
+
+- hyperfine must be installed (`apt install hyperfine` or `brew install hyperfine`)
+
+**Usage:**
+
+```bash
+# Basic comparison with defaults
+./scripts/compare_benchmark_versions.sh 78594af c5a8a11
+
+# Custom warmup and runs for better accuracy
+./scripts/compare_benchmark_versions.sh 78594af c5a8a11 --warmup 5 --runs 20
+
+# Compare with specific benchmark parameters
+./scripts/compare_benchmark_versions.sh abc1234 def5678 --sizes 10000
+```
+
+**Example Workflow - Performance Comparison:**
+
+```bash
+# 1. Compile the versions you want to compare
+./scripts/compile_benchmark_versions.sh --start 78594af --end c5a8a11
+
+# 2. Run hyperfine comparison
+./scripts/compare_benchmark_versions.sh 78594af c5a8a11
+
+# Output shows:
+# - Mean execution time for each version
+# - Standard deviation
+# - Min/max range
+# - Relative speed comparison (e.g., "1.05x faster")
+```
+
+**Important Notes:**
+
+- This compares **execution time** of the entire benchmark run, not the benchmark throughput metrics
+- Both versions run with identical parameters for fair comparison
+- Hyperfine handles warmup runs and statistical analysis automatically
+- For more detailed performance analysis, use the benchmark JSON output with `compare_benchmarks.py`
+
 ## Configuration
 
 ### Benchmark Parameters
diff --git a/scripts/compare_benchmark_versions.sh b/scripts/compare_benchmark_versions.sh
@@ -0,0 +1,183 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Script to compare two compiled benchmark binaries using hyperfine
+# This makes it easy to see performance differences between versions
+
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Default values
+WARMUP=5
+RUNS=50
+SIZE="10000"
+
+# Usage information
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") <sha1> <sha2> [OPTIONS]
+
+Compare performance of two compiled benchmark binaries using hyperfine.
+
+The script will run both benchmark versions with the same parameters and
+use hyperfine to measure and compare their execution time.
+
+ARGUMENTS:
+    <sha1>              Short SHA of first benchmark version (baseline)
+    <sha2>              Short SHA of second benchmark version (current)
+
+OPTIONS:
+    --warmup N          Number of warmup runs (default: $WARMUP)
+    --runs N            Number of benchmark runs (default: $RUNS)
+    --size SIZE         Input size (default: $SIZE)
+    -h, --help          Show this help message
+
+EXAMPLES:
+    # Basic comparison with defaults
+    $(basename "$0") 78594af c5a8a11
+
+    # Custom warmup and runs
+    $(basename "$0") 78594af c5a8a11 --warmup 5 --runs 20
+
+    # Compare with specific size
+    $(basename "$0") abc1234 def5678 --size 50000
+
+NOTES:
+    - Binaries must be compiled first using compile_benchmark_versions.sh
+    - Both binaries will be run with the same benchmark parameters
+    - hyperfine must be installed (https://github.com/sharkdp/hyperfine)
+    - Results show execution time comparison, not benchmark throughput
+EOF
+}
+
+# Print colored message
+log_info() {
+  echo -e "${BLUE}ℹ${NC} $*"
+}
+
+log_success() {
+  echo -e "${GREEN}✓${NC} $*"
+}
+
+log_error() {
+  echo -e "${RED}✗${NC} $*" >&2
+}
+
+# Check if hyperfine is installed
+check_hyperfine() {
+  if ! command -v hyperfine &>/dev/null; then
+    log_error "hyperfine is not installed"
+    echo ""
+    echo "Install hyperfine:"
+    echo "  - Debian/Ubuntu: apt install hyperfine"
+    echo "  - macOS: brew install hyperfine"
+    echo "  - Cargo: cargo install hyperfine"
+    echo "  - GitHub: https://github.com/sharkdp/hyperfine"
+    echo ""
+    exit 1
+  fi
+}
+
+# Check if binary exists
+check_binary() {
+  local sha=$1
+  local binary_path="$BENCH_DIR/bench_throughput_$sha"
+
+  if [ ! -f "$binary_path" ]; then
+    log_error "Benchmark binary not found: bench_throughput_$sha"
+    echo ""
+    echo "The binary for commit $sha has not been compiled yet."
+    echo ""
+    echo "Compile it first using:"
+    echo -e "  ${YELLOW}./scripts/compile_benchmark_versions.sh --start $sha --end $sha${NC}"
+    echo ""
+    echo "Or compile a range of versions:"
+    echo -e "  ${YELLOW}./scripts/compile_benchmark_versions.sh${NC}"
+    echo ""
+    exit 1
+  fi
+}
+
+# Parse command line arguments
+if [ $# -lt 2 ]; then
+  usage
+  exit 1
+fi
+
+SHA1=$1
+SHA2=$2
+shift 2
+
+while [ $# -gt 0 ]; do
+  case $1 in
+  --warmup)
+    WARMUP="$2"
+    shift 2
+    ;;
+  --runs)
+    RUNS="$2"
+    shift 2
+    ;;
+  --size)
+    SIZE="$2"
+    shift 2
+    ;;
+  -h | --help)
+    usage
+    exit 0
+    ;;
+  *)
+    log_error "Unknown option: $1"
+    echo ""
+    usage
+    exit 1
+    ;;
+  esac
+done
+
+# Validate inputs
+check_hyperfine
+check_binary "$SHA1"
+check_binary "$SHA2"
+
+BINARY1="$BENCH_DIR/bench_throughput_$SHA1"
+BINARY2="$BENCH_DIR/bench_throughput_$SHA2"
+
+# Prepare hyperfine command
+HYPERFINE_ARGS=(
+  "--warmup" "$WARMUP"
+  "--runs" "$RUNS"
+)
+
+# Print comparison info
+echo ""
+log_info "Comparing benchmark versions using hyperfine"
+echo ""
+echo "  Baseline: $SHA1"
+echo "  Current:  $SHA2"
+echo ""
+echo "Benchmark parameters:"
+echo "  Size:      $SIZE"
+echo ""
+echo "Hyperfine parameters:"
+echo "  Warmup runs:     $WARMUP"
+echo "  Benchmark runs:  $RUNS"
+echo ""
+
+# Run hyperfine comparison
+hyperfine \
+  "${HYPERFINE_ARGS[@]}" \
+  --command-name "$SHA1" \
+  "$BINARY1 --sizes $SIZE --output /dev/null" \
+  --command-name "$SHA2" \
+  "$BINARY2 --sizes $SIZE --output /dev/null"
+
+echo ""
+log_success "Comparison complete!"
diff --git a/scripts/compile_benchmark_versions.sh b/scripts/compile_benchmark_versions.sh