diff --git a/.github/workflows/bench-command.yml b/.github/workflows/bench-command.yml
new file mode 100644
index 0000000..6123465
--- /dev/null
+++ b/.github/workflows/bench-command.yml
@@ -0,0 +1,620 @@
+name: Benchmark Command
+# Trigger on-demand benchmarks via PR comments
+# Usage: /bench <ref1> <ref2> [size] [warmup] [runs]
+# Examples:
+#   /bench main v0.13.0
+#   /bench abc12345 def56789 50000
+#   /bench main HEAD 50000 5 20
+# Only repository owner can trigger this command
+
+on:
+  issue_comment:
+    types: [created]
+
+# Prevent concurrent benchmark runs on the same PR
+concurrency:
+  group: bench-${{ github.event.issue.number }}
+  cancel-in-progress: true
+
+jobs:
+  check-permission:
+    name: Check Command Permission
+    # Only run on PR comments (not regular issues)
+    if: |
+      github.event.issue.pull_request &&
+      startsWith(github.event.comment.body, '/bench ')
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    outputs:
+      authorized: ${{ steps.check.outputs.authorized }}
+      ref1: ${{ steps.parse.outputs.ref1 }}
+      ref2: ${{ steps.parse.outputs.ref2 }}
+      size: ${{ steps.parse.outputs.size }}
+      warmup: ${{ steps.parse.outputs.warmup }}
+      runs: ${{ steps.parse.outputs.runs }}
+    steps:
+      - name: Check if commenter is repo owner
+        id: check
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const commenter = context.payload.comment.user.login;
+            const owner = context.payload.repository.owner.login;
+            const isOwner = commenter === owner;
+
+            console.log(`Commenter: ${commenter}`);
+            console.log(`Repository owner: ${owner}`);
+            console.log(`Is owner: ${isOwner}`);
+
+            if (!isOwner) {
+              await github.rest.reactions.createForIssueComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: context.payload.comment.id,
+                content: '-1'
+              });
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: `❌ **Permission denied**: Only @${owner} can trigger benchmark comparisons.`
+              });
+            } else {
+              await github.rest.reactions.createForIssueComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: context.payload.comment.id,
+                content: 'eyes'
+              });
+            }
+
+            core.setOutput('authorized', isOwner);
+
+      - name: Parse benchmark command
+        id: parse
+        if: steps.check.outputs.authorized == 'true'
+        continue-on-error: true
+        run: |
+          set -euo pipefail
+          COMMENT="${{ github.event.comment.body }}"
+
+          # Parse command: /bench ref1 ref2 [size] [warmup] [runs]
+          # Remove /bench prefix and extract parameters
+          PARAMS=$(echo "$COMMENT" | sed 's|^/bench[[:space:]]*||')
+
+          # Extract parameters
+          REF1=$(echo "$PARAMS" | awk '{print $1}')
+          REF2=$(echo "$PARAMS" | awk '{print $2}')
+          SIZE=$(echo "$PARAMS" | awk '{print $3}')
+          WARMUP=$(echo "$PARAMS" | awk '{print $4}')
+          RUNS=$(echo "$PARAMS" | awk '{print $5}')
+
+          # Validate required parameters
+          if [ -z "$REF1" ] || [ -z "$REF2" ]; then
+            echo "error=Invalid format. Missing required parameters." >> $GITHUB_OUTPUT
+            echo "parse_failed=true" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          # Set defaults for optional parameters
+          if [ -z "$SIZE" ] || ! [[ "$SIZE" =~ ^[0-9]+$ ]]; then
+            SIZE=10000
+          fi
+
+          if [ -z "$WARMUP" ] || ! [[ "$WARMUP" =~ ^[0-9]+$ ]]; then
+            WARMUP=5
+          fi
+
+          if [ -z "$RUNS" ] || ! [[ "$RUNS" =~ ^[0-9]+$ ]]; then
+            RUNS=50
+          fi
+
+          echo "ref1=$REF1" >> $GITHUB_OUTPUT
+          echo "ref2=$REF2" >> $GITHUB_OUTPUT
+          echo "size=$SIZE" >> $GITHUB_OUTPUT
+          echo "warmup=$WARMUP" >> $GITHUB_OUTPUT
+          echo "runs=$RUNS" >> $GITHUB_OUTPUT
+          echo "parse_failed=false" >> $GITHUB_OUTPUT
+
+          echo "Parsed parameters:"
+          echo "  ref1: $REF1"
+          echo "  ref2: $REF2"
+          echo "  size: $SIZE"
+          echo "  warmup: $WARMUP"
+          echo "  runs: $RUNS"
+
+      - name: Post parse error
+        if: steps.check.outputs.authorized == 'true' && steps.parse.outcome == 'failure'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: context.payload.comment.id,
+              content: 'confused'
+            });
+
+            const errorMessage = [
+              '❌ **Invalid command format**',
+              '',
+              '**Usage:** `/bench <ref1> <ref2> [size] [warmup] [runs]`',
+              '',
+              '**Examples:**',
+              '```',
+              '/bench main v0.13.0',
+              '/bench abc12345 def56789 50000',
+              '/bench main HEAD 50000 5',
+              '/bench main HEAD 50000 5 20',
+              '```',
+              '',
+              '**Parameters:**',
+              '- `ref1` (required): Baseline git reference',
+              '- `ref2` (required): Current git reference',
+              '- `size` (optional): Input size (default: 10000)',
+              '- `warmup` (optional): Warmup runs (default: 5)',
+              '- `runs` (optional): Benchmark runs (default: 50)',
+              '',
+              '**Note:** This runs all 28 predefined templates with a single input size.',
+              'For detailed per-template analysis with hyperfine, use the local tools.'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: errorMessage
+            });
+
+      - name: Post acknowledgment
+        if: steps.check.outputs.authorized == 'true' && steps.parse.outcome == 'success'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const ref1 = '${{ steps.parse.outputs.ref1 }}';
+            const ref2 = '${{ steps.parse.outputs.ref2 }}';
+            const size = '${{ steps.parse.outputs.size }}';
+            const warmup = '${{ steps.parse.outputs.warmup }}';
+            const runs = '${{ steps.parse.outputs.runs }}';
+
+            const message = [
+              '🚀 **Benchmark comparison started**',
+              '',
+              '**Comparing:**',
+              `- **Baseline**: \`${ref1}\``,
+              `- **Current**: \`${ref2}\``,
+              '',
+              '**Parameters:**',
+              `- **Size**: ${size} paths`,
+              `- **Warmup**: ${warmup} runs`,
+              `- **Runs**: ${runs} measurements`,
+              `- **Templates**: All 28 predefined templates`,
+              '',
+              'Results will be posted here when complete...'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: message
+            });
+
+  run-benchmarks:
+    name: Run Benchmark Comparison
+    needs: check-permission
+    if: needs.check-permission.outputs.authorized == 'true' && needs.check-permission.outputs.ref1 != ''
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Need full history to access all refs
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache Rust dependencies
+        uses: Swatinem/rust-cache@v2
+
+      - name: Install hyperfine
+        run: |
+          wget https://github.com/sharkdp/hyperfine/releases/download/v1.18.0/hyperfine_1.18.0_amd64.deb
+          sudo dpkg -i hyperfine_1.18.0_amd64.deb
+          hyperfine --version
+
+      - name: Fetch refs from remote
+        run: |
+          set -euo pipefail
+          REF1="${{ needs.check-permission.outputs.ref1 }}"
+          REF2="${{ needs.check-permission.outputs.ref2 }}"
+
+          echo "Fetching ref1: $REF1"
+          git fetch origin "$REF1" || git fetch origin "refs/tags/$REF1" || git fetch origin "refs/heads/$REF1" || true
+
+          echo "Fetching ref2: $REF2"
+          git fetch origin "$REF2" || git fetch origin "refs/tags/$REF2" || git fetch origin "refs/heads/$REF2" || true
+
+          # Update remote refs
+          git fetch origin --tags
+
+      - name: Validate and order refs
+        id: validate
+        run: |
+          set -euo pipefail
+          REF1="${{ needs.check-permission.outputs.ref1 }}"
+          REF2="${{ needs.check-permission.outputs.ref2 }}"
+
+          # Validate both refs exist
+          if ! git rev-parse --verify "$REF1" >/dev/null 2>&1; then
+            echo "error=Ref '$REF1' not found" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          if ! git rev-parse --verify "$REF2" >/dev/null 2>&1; then
+            echo "error=Ref '$REF2' not found" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          # Resolve to full SHAs
+          SHA1=$(git rev-parse "$REF1")
+          SHA2=$(git rev-parse "$REF2")
+
+          # Check if both refs resolve to the same commit
+          if [ "$SHA1" = "$SHA2" ]; then
+            echo "same_commit=true" >> $GITHUB_OUTPUT
+            echo "ref1_sha=$(git rev-parse --short=8 $REF1)" >> $GITHUB_OUTPUT
+            echo "ref2_sha=$(git rev-parse --short=8 $REF2)" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
+          echo "same_commit=false" >> $GITHUB_OUTPUT
+
+          # Determine which is older (baseline) and newer (current)
+          # Get commit timestamps
+          TIMESTAMP1=$(git log -1 --format=%ct "$SHA1")
+          TIMESTAMP2=$(git log -1 --format=%ct "$SHA2")
+
+          if [ "$TIMESTAMP1" -le "$TIMESTAMP2" ]; then
+            # REF1 is older or same age -> baseline
+            BASELINE_REF="$REF1"
+            BASELINE_SHA="$SHA1"
+            CURRENT_REF="$REF2"
+            CURRENT_SHA="$SHA2"
+          else
+            # REF2 is older -> baseline
+            BASELINE_REF="$REF2"
+            BASELINE_SHA="$SHA2"
+            CURRENT_REF="$REF1"
+            CURRENT_SHA="$SHA1"
+          fi
+
+          # Output the determined ordering
+          echo "baseline_ref=$BASELINE_REF" >> $GITHUB_OUTPUT
+          echo "baseline_sha=$(git rev-parse --short=8 $BASELINE_SHA)" >> $GITHUB_OUTPUT
+          echo "current_ref=$CURRENT_REF" >> $GITHUB_OUTPUT
+          echo "current_sha=$(git rev-parse --short=8 $CURRENT_SHA)" >> $GITHUB_OUTPUT
+
+          # Keep original refs for display
+          echo "ref1_sha=$(git rev-parse --short=8 $REF1)" >> $GITHUB_OUTPUT
+          echo "ref2_sha=$(git rev-parse --short=8 $REF2)" >> $GITHUB_OUTPUT
+
+          echo "Determined ordering:"
+          echo "  Baseline (older): $BASELINE_REF ($BASELINE_SHA)"
+          echo "  Current (newer):  $CURRENT_REF ($CURRENT_SHA)"
+
+      - name: Handle same commit case
+        if: steps.validate.outputs.same_commit == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const ref1 = '${{ needs.check-permission.outputs.ref1 }}';
+            const ref2 = '${{ needs.check-permission.outputs.ref2 }}';
+            const sha = '${{ steps.validate.outputs.ref1_sha }}';
+
+            const message = [
+              '⚠️ **Same commit detected**',
+              '',
+              `Both \`${ref1}\` and \`${ref2}\` resolve to the same commit: \`${sha}\``,
+              '',
+              'No benchmark comparison needed - the refs are identical.',
+              '',
+              '**Tip:** To compare different versions, use refs that point to different commits.'
+            ].join('\n');
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'eyes'
+            });
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: message
+            });
+
+      - name: Check benchmark tool exists in baseline
+        if: steps.validate.outputs.same_commit == 'false'
+        id: check_baseline_tool
+        run: |
+          set -euo pipefail
+          BASELINE_REF="${{ steps.validate.outputs.baseline_ref }}"
+          echo "Checking out $BASELINE_REF..."
+          git checkout "$BASELINE_REF"
+
+          # Check if bench-throughput binary is defined in Cargo.toml
+          if ! grep -q 'name = "bench-throughput"' Cargo.toml 2>/dev/null; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool 'bench-throughput' not found in $BASELINE_REF"
+            exit 0
+          fi
+
+          # Check if the source file exists
+          if ! grep -A 2 'name = "bench-throughput"' Cargo.toml | grep -q 'path.*='; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool source file not found in $BASELINE_REF"
+            exit 0
+          fi
+
+          echo "exists=true" >> $GITHUB_OUTPUT
+          echo "✓ Benchmark tool found in $BASELINE_REF"
+
+      - name: Check benchmark tool exists in current
+        if: steps.validate.outputs.same_commit == 'false'
+        id: check_current_tool
+        run: |
+          set -euo pipefail
+          CURRENT_REF="${{ steps.validate.outputs.current_ref }}"
+          echo "Checking out $CURRENT_REF..."
+          git checkout "$CURRENT_REF"
+
+          # Check if bench-throughput binary is defined in Cargo.toml
+          if ! grep -q 'name = "bench-throughput"' Cargo.toml 2>/dev/null; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool 'bench-throughput' not found in $CURRENT_REF"
+            exit 0
+          fi
+
+          # Check if the source file exists
+          if ! grep -A 2 'name = "bench-throughput"' Cargo.toml | grep -q 'path.*='; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool source file not found in $CURRENT_REF"
+            exit 0
+          fi
+
+          echo "exists=true" >> $GITHUB_OUTPUT
+          echo "✓ Benchmark tool found in $CURRENT_REF"
+
+      - name: Post missing tool error
+        if: steps.validate.outputs.same_commit == 'false' && (steps.check_baseline_tool.outputs.exists == 'false' || steps.check_current_tool.outputs.exists == 'false')
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const baseline_ref = '${{ steps.validate.outputs.baseline_ref }}';
+            const current_ref = '${{ steps.validate.outputs.current_ref }}';
+            const baseline_exists = '${{ steps.check_baseline_tool.outputs.exists }}' === 'true';
+            const current_exists = '${{ steps.check_current_tool.outputs.exists }}' === 'true';
+
+            let message = '❌ **Benchmark comparison failed**\n\n';
+            message += '**Reason**: The benchmark tool (`bench-throughput`) does not exist in ';
+
+            if (!baseline_exists && !current_exists) {
+              message += `both refs:\n- \`${baseline_ref}\` (baseline/older)\n- \`${current_ref}\` (current/newer)`;
+            } else if (!baseline_exists) {
+              message += `baseline ref: \`${baseline_ref}\` (older commit)`;
+            } else {
+              message += `current ref: \`${current_ref}\` (newer commit)`;
+            }
+
+            message += '\n\n**Solution**: Ensure both refs contain the benchmark tool.';
+            message += '\n\n**Example**: `/bench main HEAD`';
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'confused'
+            });
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: message
+            });
+
+      - name: Build baseline benchmark tool
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        run: |
+          set -euo pipefail
+          BASELINE_REF="${{ steps.validate.outputs.baseline_ref }}"
+
+          echo "Checking out baseline: $BASELINE_REF..."
+          git checkout "$BASELINE_REF"
+
+          echo "Building benchmark tool..."
+          if ! cargo build --release --bin bench-throughput 2>&1 | tee build_baseline.log; then
+            echo "❌ Failed to build benchmark tool for $BASELINE_REF"
+            exit 1
+          fi
+
+          # Save binary with unique name
+          cp target/release/bench-throughput bench-throughput-baseline
+          echo "✓ Built baseline benchmark tool"
+
+      - name: Build current benchmark tool
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        run: |
+          set -euo pipefail
+          CURRENT_REF="${{ steps.validate.outputs.current_ref }}"
+
+          echo "Checking out current: $CURRENT_REF..."
+          git checkout "$CURRENT_REF"
+
+          # Rebuild in case dependencies changed
+          echo "Building benchmark tool..."
+          if ! cargo build --release --bin bench-throughput 2>&1 | tee build_current.log; then
+            echo "❌ Failed to build benchmark tool for $CURRENT_REF"
+            exit 1
+          fi
+
+          # Save binary with unique name
+          cp target/release/bench-throughput bench-throughput-current
+          echo "✓ Built current benchmark tool"
+
+      - name: Run benchmarks
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        run: |
+          set -euo pipefail
+          SIZE="${{ needs.check-permission.outputs.size }}"
+          WARMUP="${{ needs.check-permission.outputs.warmup }}"
+          RUNS="${{ needs.check-permission.outputs.runs }}"
+          BASELINE_SHA="${{ steps.validate.outputs.baseline_sha }}"
+          CURRENT_SHA="${{ steps.validate.outputs.current_sha }}"
+
+          echo "Running benchmark comparison..."
+          echo "  Baseline: $BASELINE_SHA"
+          echo "  Current:  $CURRENT_SHA"
+          echo "  Size:     $SIZE paths"
+          echo "  Warmup:   $WARMUP runs"
+          echo "  Runs:     $RUNS measurements"
+          echo ""
+
+          # Run hyperfine with markdown export
+          hyperfine \
+            --warmup "$WARMUP" \
+            --runs "$RUNS" \
+            --export-markdown comparison_results.md \
+            --command-name "baseline ($BASELINE_SHA)" \
+            "./bench-throughput-baseline --template all --size $SIZE --output /dev/null" \
+            --command-name "current ($CURRENT_SHA)" \
+            "./bench-throughput-current --template all --size $SIZE --output /dev/null"
+
+          echo "✓ Benchmark comparison complete"
+
+      - name: Post results to PR
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const comparison_results = fs.readFileSync('comparison_results.md', 'utf8');
+            const baseline_sha = '${{ steps.validate.outputs.baseline_sha }}';
+            const current_sha = '${{ steps.validate.outputs.current_sha }}';
+            const size = '${{ needs.check-permission.outputs.size }}';
+            const warmup = '${{ needs.check-permission.outputs.warmup }}';
+            const runs = '${{ needs.check-permission.outputs.runs }}';
+
+            const body = [
+              '## 🔬 Benchmark Comparison Report',
+              '',
+              '**Requested by:** @${{ github.event.comment.user.login }}',
+              '',
+              '**Configuration:**',
+              `- **Baseline (older):** \`${baseline_sha}\``,
+              `- **Current (newer):** \`${current_sha}\``,
+              `- **Test:** All 28 predefined templates`,
+              `- **Input size:** ${size} paths per run`,
+              `- **Warmup:** ${warmup} runs`,
+              `- **Measurements:** ${runs} runs`,
+              '',
+              '**Results:**',
+              '',
+              comparison_results.trim(),
+              '',
+              '> **Interpretation:**',
+              '> - **Mean**: Average execution time across all runs',
+              '> - **Min/Max**: Fastest and slowest runs observed',
+              '> - **Relative**: Speed comparison (1.00 = baseline, <1.00 = faster, >1.00 = slower)',
+              '> - Each run processes all 28 templates on ${size} generated paths',
+              '',
+              '---',
+              '',
+              '<sub>Triggered by [/bench command](${{ github.event.comment.html_url }})</sub>',
+              '',
+              '**Note:** Build logs are available in the [workflow artifacts](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}).'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: body
+            });
+
+      - name: Upload benchmark artifacts
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-comparison-${{ github.event.comment.id }}
+          path: |
+            comparison_results.md
+            build_baseline.log
+            build_current.log
+          retention-days: 30
+
+      - name: Add success reaction
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'rocket'
+            });
+
+  handle-error:
+    name: Handle Errors
+    needs: [check-permission, run-benchmarks]
+    if: failure() && needs.check-permission.outputs.authorized == 'true'
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+    steps:
+      - name: Post error message
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const ref1 = '${{ needs.check-permission.outputs.ref1 }}';
+            const ref2 = '${{ needs.check-permission.outputs.ref2 }}';
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'confused'
+            });
+
+            const errorBody = [
+              '❌ **Benchmark comparison failed**',
+              '',
+              `Failed to compare \`${ref1}\` and \`${ref2}\`.`,
+              '',
+              '**Please check:**',
+              '- Both refs exist and are valid git references (branches, tags, or commits)',
+              '- The benchmark tool exists in both refs',
+              '- The code at those refs compiles successfully',
+              '- Parameters are in correct format: `/bench <ref1> <ref2> [size] [warmup] [runs]`',
+              '',
+              '**See the [workflow run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.**'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: errorBody
+            });
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
deleted file mode 100644
index 459499d..0000000
--- a/.github/workflows/benchmark.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: Performance Benchmarks
-on: [push, pull_request]
-
-jobs:
-  benchmark:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: dtolnay/rust-toolchain@stable
-      - name: Build benchmark tool
-        run: cargo build --release --bin string-pipeline-bench
-      - name: Run benchmarks
-        run: |
-          ./target/release/string-pipeline-bench --iterations 5000 > benchmark_results.txt
-      - name: Upload results
-        uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-results
-          path: benchmark_results.json
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
index 570e3ca..943063c 100644
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -217,9 +217,8 @@ jobs:
            zsh xz-utils liblz4-tool musl-tools brotli zstd
 
       - name: Install Rust
-        uses: dtolnay/rust-toolchain@master
+        uses: dtolnay/rust-toolchain@stable
         with:
-          toolchain: nightly
           target: ${{ matrix.target }}
 
       # for some reason, the above action doesn't seem to set the target correctly
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0c0ce61..67746bc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
       - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@nightly
+        uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
       - name: Run tests
         run: cargo test --locked --all-features --workspace -- --nocapture
@@ -28,7 +28,7 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
       - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@nightly
+        uses: dtolnay/rust-toolchain@stable
         with:
           components: rustfmt
       - uses: Swatinem/rust-cache@v2
diff --git a/.gitignore b/.gitignore
index ea8c4bf..7a9e023 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,7 @@
 /target
+
+# Benchmark results
+bench_results.json
+benchmark_results.json
+benchmark_results.txt
+comparison.md
diff --git a/Cargo.lock b/Cargo.lock
index e3af432..211cbad 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -53,7 +53,7 @@ version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -64,7 +64,7 @@ checksum = "6680de5231bd6ee4c6191b8a1325daa282b415391ec9d3a37bd34f2060dc73fa"
 dependencies = [
  "anstyle",
  "once_cell_polyfill",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -189,6 +189,17 @@ version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
 
+[[package]]
+name = "comfy-table"
+version = "7.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b"
+dependencies = [
+ "crossterm 0.29.0",
+ "unicode-segmentation",
+ "unicode-width",
+]
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -256,6 +267,45 @@ version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
+[[package]]
+name = "crossterm"
+version = "0.28.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6"
+dependencies = [
+ "bitflags",
+ "crossterm_winapi",
+ "mio",
+ "parking_lot",
+ "rustix 0.38.44",
+ "signal-hook",
+ "signal-hook-mio",
+ "winapi",
+]
+
+[[package]]
+name = "crossterm"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
+dependencies = [
+ "bitflags",
+ "crossterm_winapi",
+ "document-features",
+ "parking_lot",
+ "rustix 1.0.7",
+ "winapi",
+]
+
+[[package]]
+name = "crossterm_winapi"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "crunchy"
 version = "0.2.3"
@@ -296,6 +346,15 @@ dependencies = [
  "crypto-common",
 ]
 
+[[package]]
+name = "document-features"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
+dependencies = [
+ "litrs",
+]
+
 [[package]]
 name = "either"
 version = "1.15.0"
@@ -309,7 +368,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
 dependencies = [
  "libc",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -346,7 +405,7 @@ dependencies = [
  "cfg-if",
  "libc",
  "r-efi",
- "wasi",
+ "wasi 0.14.2+wasi-0.2.4",
 ]
 
 [[package]]
@@ -423,12 +482,24 @@ version = "0.2.172"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
 
+[[package]]
+name = "litrs"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
+
 [[package]]
 name = "lock_api"
 version = "0.4.13"
@@ -451,6 +522,18 @@ version = "2.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
 
+[[package]]
+name = "mio"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873"
+dependencies = [
+ "libc",
+ "log",
+ "wasi 0.11.1+wasi-snapshot-preview1",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -662,6 +745,19 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "88f8660c1ff60292143c98d08fc6e2f654d722db50410e3f3797d40baaf9d8f3"
 
+[[package]]
+name = "rustix"
+version = "0.38.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.4.15",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "rustix"
 version = "1.0.7"
@@ -671,8 +767,8 @@ dependencies = [
  "bitflags",
  "errno",
  "libc",
- "linux-raw-sys",
- "windows-sys",
+ "linux-raw-sys 0.9.4",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -745,6 +841,36 @@ dependencies = [
  "digest",
 ]
 
+[[package]]
+name = "signal-hook"
+version = "0.3.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2"
+dependencies = [
+ "libc",
+ "signal-hook-registry",
+]
+
+[[package]]
+name = "signal-hook-mio"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc"
+dependencies = [
+ "libc",
+ "mio",
+ "signal-hook",
+]
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "smallvec"
 version = "1.15.1"
@@ -757,7 +883,9 @@ version = "0.13.0"
 dependencies = [
  "clap",
  "clap_mangen",
+ "comfy-table",
  "criterion",
+ "crossterm 0.28.1",
  "dashmap",
  "fast-strip-ansi",
  "memchr",
@@ -766,8 +894,11 @@ dependencies = [
  "pest",
  "pest_derive",
  "regex",
+ "serde",
+ "serde_json",
  "smallvec",
  "tempfile",
+ "unicode-width",
 ]
 
 [[package]]
@@ -796,8 +927,8 @@ dependencies = [
  "fastrand",
  "getrandom",
  "once_cell",
- "rustix",
- "windows-sys",
+ "rustix 1.0.7",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -848,6 +979,18 @@ version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
 
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
 [[package]]
 name = "utf8parse"
 version = "0.2.2"
@@ -880,6 +1023,12 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
 [[package]]
 name = "wasi"
 version = "0.14.2+wasi-0.2.4"
@@ -957,15 +1106,43 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
 [[package]]
 name = "winapi-util"
 version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
 [[package]]
 name = "windows-sys"
 version = "0.59.0"
@@ -975,6 +1152,15 @@ dependencies = [
  "windows-targets",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
 [[package]]
 name = "windows-targets"
 version = "0.52.6"
diff --git a/Cargo.toml b/Cargo.toml
index 772ee29..cb98f59 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,11 @@ parking_lot = "0.12.3"
 dashmap = "6.1.0"
 smallvec = "1.15.0"
 memchr = "2.7.4"
+crossterm = "0.28"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+comfy-table = "7.1"
+unicode-width = "0.2"
 
 [build-dependencies]
 clap = { version = "4.5.39", features = ["derive", "cargo"] }
@@ -38,6 +43,11 @@ path = "src/main.rs"
 name = "string-pipeline-bench"
 path = "src/bin/bench.rs"
 
+[[bin]]
+bench = false
+name = "bench-throughput"
+path = "src/bin/bench-throughput.rs"
+
 [profile.staging]
 inherits = "dev"
 opt-level = 3
diff --git a/README.md b/README.md
index 2ded8a0..43142f0 100644
--- a/README.md
+++ b/README.md
@@ -225,7 +225,6 @@ cargo build --release --bin bench
 [📖 Template System](docs/template-system.md)
 [⚙️  CLI Options & Usage](docs/command-line-options.md)
 [🔍 Comprehensive Debug System Guide](docs/debug-system.md)
-[📊 Performance Benchmarking Guide](docs/benchmarking.md)
 
 ## 🤝 Contributing
 
diff --git a/docs/benchmarking.md b/docs/benchmarking.md
deleted file mode 100644
index c8c41c8..0000000
--- a/docs/benchmarking.md
+++ /dev/null
@@ -1,783 +0,0 @@
-# 🏆 String Pipeline Benchmarking Tool
-
-_NOTE: what follows has mostly been assembled using AI as an experiment and as a basis for further improvements._
-
-A simple benchmarking tool that helps measure performance of string pipeline operations and provides timing information in both text and JSON formats.
-
-## 📋 Table of Contents
-
-- [🚀 Quick Start](#-quick-start)
-- [✨ Features Overview](#-features-overview)
-- [📖 Usage Guide](#-usage-guide)
-  - [Basic Usage](#basic-usage)
-  - [Command Line Options](#command-line-options)
-  - [Output Formats](#output-formats)
-- [🧪 Benchmark Categories](#-benchmark-categories)
-  - [Single Operations](#1--single-operations)
-  - [Multiple Simple Operations](#2--multiple-simple-operations)
-  - [Map Operations](#3-️-map-operations)
-  - [Complex Operations](#4--complex-operations)
-- [📊 Test Data & Methodology](#-test-data--methodology)
-- [📈 Performance Analysis](#-performance-analysis)
-  - [Basic Methods](#basic-methods)
-  - [Timing Precision](#timing-precision)
-  - [Metrics Explanation](#metrics-explanation)
-- [💼 Automated Usage](#-automated-usage)
-  - [Script Integration](#script-integration)
-  - [Performance Comparison](#performance-comparison)
-- [🔧 Development Guide](#-development-guide)
-  - [Adding New Benchmarks](#adding-new-benchmarks)
-  - [Performance Considerations](#performance-considerations)
-  - [Best Practices](#best-practices)
-- [📋 Example Results](#-example-results)
-- [⚠️ Troubleshooting](#️-troubleshooting)
-
-## 🚀 Quick Start
-
-```bash
-# Run with default settings (1000 iterations, text output)
-cargo run --bin bench
-
-# Run in release mode for better performance
-cargo run --release --bin bench
-
-# Quick test with fewer iterations
-cargo run --bin bench -- --iterations 100
-```
-
-## ✨ Features Overview
-
-- 🧪 **Test Coverage**: Tests single operations, multiple operations, map operations, and complex nested operations
-- 📊 **Basic Statistics**: Runs configurable iterations (default 1000) and calculates averages with outlier removal
-- 🏋️ **Warmup Phase**: Runs warmup iterations (10% of measurements) to help get consistent timing
-- 🎯 **Outlier Removal**: Removes top and bottom 5% of measurements to reduce noise
-- 📄 **Multiple Output Formats**: Supports both human-readable text and machine-readable JSON output
-- 🏗️ **Performance Categories**: Groups results by operation type for easier analysis
-- 📈 **Basic Metrics**: Provides average, minimum, maximum times from the filtered measurements
-- ⚡ **Automation Support**: Works well in CI/CD and automated scripts
-- 🔍 **Debug Integration**: Works with the existing debug system's timing capabilities
-
-## 📖 Usage Guide
-
-### Basic Usage
-
-| Command | Description | Use Case |
-|---------|-------------|----------|
-| `cargo run --bin bench` | Default run (1000 iterations, text) | Development testing |
-| `cargo run --release --bin bench` | Optimized build | Better performance measurements |
-| `./target/release/bench.exe` | Direct binary execution | Scripts and automation |
-
-```bash
-# 🚀 Development workflow
-cargo run --bin bench -- --iterations 100  # Quick test
-
-# 🔄 More thorough testing
-cargo build --release --bin bench
-./target/release/bench --iterations 5000 --format json > results.json
-```
-
-### Command Line Options
-
-| Option | Short | Default | Description |
-|--------|-------|---------|-------------|
-| `--iterations` | `-n` | `1000` | Number of iterations per benchmark |
-| `--format` | `-f` | `text` | Output format: `text` or `json` |
-| `--help` | `-h` | - | Show help information |
-| `--version` | `-V` | - | Show version information |
-
-**Examples:**
-
-```bash
-# 📊 Better accuracy (more iterations)
-cargo run --bin bench -- --iterations 2000
-
-# 🤖 Machine processing (JSON output)
-cargo run --bin bench -- --format json
-
-# 🚀 Quick development test
-cargo run --bin bench -- --iterations 50 --format text
-
-# 🔍 Help and version info
-cargo run --bin bench -- --help
-cargo run --bin bench -- --version
-```
-
-### Output Formats
-
-#### 📄 Text Output (Default)
-
-Good for **reading results** and **development workflows**:
-
-- ✅ **Progress indicators** during execution with real-time feedback
-- ✅ **Formatted tables** with aligned columns and readable timing units
-- ✅ **Performance summary** by category with fastest/slowest identification
-- ✅ **Basic statistics** including total execution time and outlier counts
-- ✅ **Color-coded** output (when terminal supports it)
-
-```text
-🔸 Running single operation benchmarks...
-  Single: upper ... ✓ avg: 295ns
-  Single: lower ... ✓ avg: 149ns
-
-📊 Summary:
-• Total benchmarks run: 33
-• Total execution time: 392.17ms
-```
-
-#### 🤖 JSON Output
-
-Good for **automation**, **scripts**, and **data processing**:
-
-- ✅ **Machine-readable** structured data
-- ✅ **Timestamps** and version information for tracking
-- ✅ **Timing metrics** for each benchmark
-- ✅ **Categorized results** for easier filtering
-- ✅ **Works well** with tools like `jq`, `python`, etc.
-
-```json
-{
-  "summary": {
-    "total_benchmarks": 33,
-    "total_execution_time_ns": 392170000,
-    "iterations_per_benchmark": 1000
-  },
-  "categories": {
-    "single_operations": [...],
-    "map_operations": [...]
-  },
-  "timestamp": "2024-01-15T10:30:45Z",
-  "version": "0.13.0"
-}
-```
-
-## 🧪 Benchmark Categories
-
-The benchmark suite is organized into **four distinct categories** that test different aspects of the pipeline system, from basic operations to complex nested transformations.
-
-### 1. 🔧 Single Operations
-
-Tests **individual pipeline operations** to establish baseline performance:
-
-| Operation | Template | Purpose | Expected Performance |
-|-----------|----------|---------|---------------------|
-| `split` | `{split:,:..\|join:,}` | Text splitting capability | ~3-4μs |
-| `upper` | `{upper}` | Case conversion | ~200-300ns |
-| `lower` | `{lower}` | Case conversion | ~150-200ns |
-| `trim` | `{trim}` | Whitespace removal | ~100-150ns |
-| `reverse` | `{reverse}` | String/list reversal | ~600-700ns |
-| `sort` | `{split:,:..\|sort\|join:,}` | Alphabetical sorting | ~3-4μs |
-| `unique` | `{split:,:..\|unique\|join:,}` | Duplicate removal | ~5-6μs |
-| `replace` | `{replace:s/a/A/g}` | Pattern replacement | ~2-3μs |
-| `filter` | `{split:,:..\|filter:^[a-m]\|join:,}` | Pattern filtering | ~14-16μs |
-
-> 💡 **Baseline Importance:** These measurements establish the **fundamental performance characteristics** of each operation and serve as building blocks for understanding more complex pipeline performance.
-
-### 2. 🔗 Multiple Simple Operations
-
-Tests **chains of basic operations** to measure composition overhead:
-
-| Pipeline | Template | Purpose | Performance Range |
-|----------|----------|---------|------------------|
-| Split + Join | `{split:,:..\|join: }` | Basic transformation | ~3μs |
-| Split + Sort + Join | `{split:,:..\|sort\|join:;}` | Sorting pipeline | ~3-4μs |
-| Split + Unique + Join | `{split:,:..\|unique\|join:,}` | Deduplication | ~5-6μs |
-| Split + Reverse + Join | `{split:,:..\|reverse\|join:-}` | Reversal pipeline | ~3μs |
-| Split + Filter + Join | `{split:,:..\|filter:^[a-m]\|join:,}` | Filtering pipeline | ~16-17μs |
-| Split + Slice + Join | `{split:,:..\|slice:0..5\|join:&}` | Range extraction | ~4μs |
-| Upper + Trim + Replace | `{upper\|trim\|replace:s/,/ /g}` | String transformations | ~3-4μs |
-| Split + Sort + Unique + Join | `{split:,:..\|sort\|unique\|join:+}` | Multi-step processing | ~5-6μs |
-
-> 🎯 **Composition Analysis:** These tests reveal how **operation chaining affects performance** and whether there are significant overhead costs in pipeline composition.
-
-### 3. 🗺️ Map Operations
-
-Tests **operations applied to each list item** via the map function:
-
-| Operation Type | Template | Purpose | Performance Range |
-|----------------|----------|---------|------------------|
-| Map(Upper) | `{split:,:..\|map:{upper}\|join:,}` | Case conversion mapping | ~8-9μs |
-| Map(Trim+Upper) | `{split:,:..\|map:{trim\|upper}\|join: }` | Chained operations in map | ~9-10μs |
-| Map(Prepend) | `{split:,:..\|map:{prepend:item}\|join:,}` | Text prefix addition | ~9-10μs |
-| Map(Append) | `{split:,:..\|map:{append:-fruit}\|join:;}` | Text suffix addition | ~10-11μs |
-| Map(Reverse) | `{split:,:..\|map:{reverse}\|join:,}` | String reversal per item | ~8-9μs |
-| Map(Substring) | `{split:,:..\|map:{substring:0..3}\|join: }` | Text extraction per item | ~8-9μs |
-| Map(Pad) | `{split:,:..\|map:{pad:10:_}\|join:,}` | Text padding per item | ~10-11μs |
-| Map(Replace) | `{split:,:..\|map:{replace:s/e/E/g}\|join:,}` | Pattern replacement per item | ~49-60μs |
-
-> 🔍 **Map Performance:** Map operations show **scaling behavior** based on list size and the complexity of the inner operation. Replace operations are notably slower due to regex processing.
-
-### 4. 🚀 Complex Operations
-
-Tests **sophisticated nested operations** and real-world transformation scenarios:
-
-| Complexity Level | Template | Purpose | Performance Range |
-|------------------|----------|---------|------------------|
-| Nested Split+Join | `{split:,:..\|map:{split:_:..\|join:-}\|join: }` | Multi-level parsing | ~15-16μs |
-| Combined Transform | `{split:,:..\|map:{upper\|substring:0..5}\|join:,}` | Chained transformations | ~10μs |
-| Filter+Map Chain | `{split:,:..\|filter:^[a-m]\|map:{reverse}\|join:&}` | Conditional processing | ~16-17μs |
-| Replace+Transform | `{split:,:..\|map:{upper\|replace:s/A/a/g}\|join:;}` | Pattern + transformation | ~50-60μs |
-| Unique+Map | `{split:,:..\|unique\|map:{upper}\|join:,}` | Dedup + transformation | ~10-11μs |
-| Multi-Replace | `{split:,:..\|map:{replace:s/a/A/g\|upper}\|join:,}` | Complex pattern work | ~51-60μs |
-| Substring+Pad | `{split:,:..\|map:{substring:0..3\|pad:5:_}\|join:+}` | Text formatting pipeline | ~10-11μs |
-| Multi-Level Filter | `{split:,:..\|filter:^[a-z]\|map:{upper}\|sort\|join: }` | Comprehensive processing | ~17-18μs |
-
-> 🏆 **Real-World Scenarios:** Complex operations represent **typical production use cases** and help identify performance bottlenecks in sophisticated data transformation pipelines.
-
-## 📊 Test Data & Methodology
-
-### 🍎 Test Dataset
-
-The benchmark uses a **carefully designed test dataset** that provides realistic performance characteristics:
-
-| Property | Value | Purpose |
-|----------|-------|---------|
-| **Content** | Comma-separated fruit names | Real-world data structure |
-| **Length** | 208 characters | Moderate size for consistent timing |
-| **Items** | 26 distinct fruits | Good sample size |
-| **Unicode** | ASCII + Unicode safe | Comprehensive character handling |
-| **Separators** | Commas, underscores, pipes | Multiple parsing scenarios |
-
-**Actual Test Data:**
-
-```text
-"apple,banana,cherry,date,elderberry,fig,grape,honeydew,ice_fruit,jackfruit,kiwi,lemon,mango,nectarine,orange,papaya,quince,raspberry,strawberry,tomato,ugli_fruit,vanilla,watermelon,xigua,yellow_apple,zucchini"
-```
-
-> 🎯 **Why This Dataset?** This data provides **realistic performance characteristics** without being too large to cause timing inconsistencies or too small to provide meaningful measurements.
-
-## 📈 Performance Analysis
-
-### Basic Methods
-
-#### 🏋️ Warmup Phase
-
-The benchmark includes a **warmup phase** to help get more consistent measurements by reducing cold-start effects:
-
-| Step | Process | Rationale |
-|------|---------|-----------|
-| 1. **Warmup Calculation** | Calculate 10% of measurement iterations | Proportional to test size |
-| 2. **Cache Warming** | Run operations without timing measurement | Prime CPU caches and memory |
-| 3. **System Stabilization** | Allow CPU frequency scaling to settle | More consistent conditions |
-| 4. **Memory Allocation** | Pre-allocate common data structures | Reduce allocation overhead |
-
-```rust
-// Warmup phase implementation
-fn benchmark_template(&self, name: &str, template_str: &str) -> BenchmarkResult {
-    let template = Template::parse(template_str)?;
-
-    // Warmup phase - run operations without timing
-    for _ in 0..self.warmup_iterations {
-        let _ = template.format(&self.test_data)?;
-    }
-
-    // Actual measurement phase begins here...
-}
-```
-
-> 🎯 **Warmup Benefits:** Helps reduce timing variations by reducing cold cache effects and system instability.
-
-#### 🎯 Outlier Removal
-
-The benchmark uses a **simple approach** to reduce measurement noise:
-
-| Step | Process | Rationale |
-|------|---------|-----------|
-| 1. **Data Collection** | Collect all timing measurements | Raw performance data |
-| 2. **Sorting** | Sort measurements by duration | Prepare for filtering |
-| 3. **Filtering** | Remove top & bottom 5% | Remove timing outliers |
-| 4. **Average Calculation** | Calculate mean of remaining 90% | More stable average |
-| 5. **Reporting** | Report outliers removed count | Show what was filtered |
-
-```rust
-// Simplified outlier removal algorithm
-fn remove_outliers(mut times: Vec<Duration>) -> (Vec<Duration>, usize) {
-    times.sort();
-    let len = times.len();
-    let outlier_count = (len as f64 * 0.05).ceil() as usize;
-
-    let start_idx = outlier_count;
-    let end_idx = len - outlier_count;
-
-    let filtered = times[start_idx..end_idx].to_vec();
-    let outliers_removed = times.len() - filtered.len();
-
-    (filtered, outliers_removed)
-}
-```
-
-> 📊 **Simple Approach:** This basic filtering helps reduce noise in timing measurements, similar to what other benchmarking tools do.
-
-### Timing Precision
-
-#### ⚡ Timing Details
-
-| Feature | Implementation | Benefit |
-|---------|----------------|---------|
-| **Resolution** | Nanosecond precision via `std::time::Instant` | Good for fast operations |
-| **Overhead** | Small timing overhead (~10-20ns) | Minimal impact on results |
-| **Platform** | Cross-platform timing support | Works across systems |
-| **Formatting** | Automatic unit selection (ns/μs/ms/s) | Easy to read output |
-
-#### 📏 Unit Formatting Algorithm
-
-```rust
-fn format_duration(duration: Duration) -> String {
-    let nanos = duration.as_nanos();
-    if nanos < 1_000 {
-        format!("{}ns", nanos)
-    } else if nanos < 1_000_000 {
-        format!("{:.2}μs", nanos as f64 / 1_000.0)
-    } else if nanos < 1_000_000_000 {
-        format!("{:.2}ms", nanos as f64 / 1_000_000.0)
-    } else {
-        format!("{:.2}s", duration.as_secs_f64())
-    }
-}
-```
-
-### Metrics Explanation
-
-#### 📊 Core Metrics
-
-| Metric | Description | Interpretation |
-|--------|-------------|----------------|
-| **Average** | Mean time after outlier removal | Main performance indicator |
-| **Min** | Fastest measurement after outlier removal | Best-case timing |
-| **Max** | Slowest measurement after outlier removal | Worst-case timing |
-| **Iterations** | Number of measurement runs performed | How many times we measured |
-| **Warmup** | Number of pre-measurement runs | System preparation cycles |
-
-#### 🎯 Performance Ranges
-
-| Performance Level | Time Range | Operations |
-|------------------|------------|------------|
-| **Ultra Fast** | < 1μs | `upper`, `lower`, `trim` |
-| **Fast** | 1-10μs | `split`, `join`, `sort`, basic chains |
-| **Moderate** | 10-50μs | `map` operations, complex chains |
-| **Intensive** | > 50μs | `replace` operations, regex processing |
-
-> 💡 **Iteration Guidelines:**
->
-> - **Development**: 50-100 iterations for quick feedback
-> - **Automation**: 500-1000 iterations for better reliability
-> - **Thorough testing**: 2000-5000 iterations for more stable results
-
-## 📋 Example Results
-
-### 📊 Text Output Sample
-
-```text
-🔸 Running single operation benchmarks...
-  Single: split ... ✓ avg: 3.53μs
-  Single: upper ... ✓ avg: 295ns
-  Single: lower ... ✓ avg: 149ns
-
-🔸 Running multiple simple operations benchmarks...
-  Multi: split + join ... ✓ avg: 3.12μs
-  Multi: split + sort + join ... ✓ avg: 3.47μs
-
-================================================================================
-                          BENCHMARK RESULTS
-================================================================================
-
-📊 Summary:
-• Total benchmarks run: 33
-• Total execution time: 392.17ms
-• Measurement iterations per benchmark: 1000
-• Warmup iterations per benchmark: 100 (10% of measurements)
-
-📈 Detailed Results:
-Benchmark                                               Average          Min          Max
-----------------------------------------------------------------------------------------
-Single: upper                                             295ns        200ns       380ns
-Single: lower                                             149ns        120ns       180ns
-Map: split + map(replace) + join                        49.16μs      42.90μs      55.80μs
-
-📋 Performance by Category:
-🔹 Single Operations (9 tests)
-   Average: 3.31μs | Fastest: 136ns (trim) | Slowest: 14.03μs (filter)
-
-🔹 Map Operations (8 tests)
-   Average: 14.22μs | Fastest: 8.35μs (map(upper)) | Slowest: 49.16μs (map(replace))
-```
-
-### 🤖 JSON Output Sample
-
-```json
-{
-  "summary": {
-    "total_benchmarks": 33,
-    "total_execution_time_ns": 392170000,
-    "total_execution_time_formatted": "392.17ms",
-    "iterations_per_benchmark": 1000,
-    "outlier_removal_method": "Top and bottom 5% removed",
-    "warmup_iterations_per_benchmark": 100
-  },
-  "categories": {
-    "single_operations": [
-      {
-        "name": "Single: upper",
-        "iterations": 1000,
-        "average_time_ns": 295000,
-        "average_time_formatted": "295ns",
-        "min_time_ns": 200000,
-        "min_time_formatted": "200ns",
-        "max_time_ns": 9100000,
-        "max_time_formatted": "9.10μs",
-        "outliers_removed": 100,
-        "total_raw_measurements": 1000
-      }
-    ]
-  },
-  "timestamp": "2024-01-15T10:30:45Z",
-  "version": "0.13.0"
-}
-```
-
-## 💼 Automated Usage
-
-### Script Integration
-
-#### 🚀 GitHub Actions Example
-
-```yaml
-name: Performance Benchmarks
-on: [push, pull_request]
-
-jobs:
-  benchmark:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: dtolnay/rust-toolchain@stable
-      - name: Build benchmark tool
-        run: cargo build --release --bin bench
-      - name: Run benchmarks
-        run: |
-          ./target/release/bench --iterations 5000 --format json > benchmark_results.json
-      - name: Upload results
-        uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-results
-          path: benchmark_results.json
-```
-
-#### 🔍 Processing Results with jq
-
-```bash
-# Extract summary information
-cat benchmark_results.json | jq '.summary'
-
-# Get average times for single operations
-cat benchmark_results.json | jq '.categories.single_operations[].average_time_formatted'
-
-# Find slowest operations
-cat benchmark_results.json | jq -r '.categories[] | .[] | "\(.name): \(.average_time_formatted)"' | sort -V
-
-# Performance alerts (fail if any operation > 100μs)
-SLOW_OPS=$(cat benchmark_results.json | jq '.categories[][] | select(.average_time_ns > 100000000)')
-if [ ! -z "$SLOW_OPS" ]; then
-  echo "Performance regression detected!"
-  exit 1
-fi
-```
-
-### Performance Comparison
-
-#### 📊 Simple Comparison Script
-
-```bash
-#!/bin/bash
-# compare_benchmarks.sh
-
-BASELINE="baseline.json"
-CURRENT="current.json"
-THRESHOLD=1.1  # 10% regression threshold
-
-# Run current benchmark
-./target/release/bench --format json > "$CURRENT"
-
-# Compare with baseline (if exists)
-if [ -f "$BASELINE" ]; then
-  echo "🔍 Checking for performance changes..."
-
-  # Extract and compare key metrics
-  jq -r '.categories[][] | "\(.name) \(.average_time_ns)"' "$BASELINE" > baseline_times.txt
-  jq -r '.categories[][] | "\(.name) \(.average_time_ns)"' "$CURRENT" > current_times.txt
-
-  # Performance regression analysis
-  python3 << 'EOF'
-import json
-import sys
-
-with open('baseline.json') as f:
-    baseline = json.load(f)
-with open('current.json') as f:
-    current = json.load(f)
-
-threshold = 1.1
-regressions = []
-
-for category in baseline['categories']:
-    for i, bench in enumerate(baseline['categories'][category]):
-        current_bench = current['categories'][category][i]
-        ratio = current_bench['average_time_ns'] / bench['average_time_ns']
-
-        if ratio > threshold:
-            regressions.append({
-                'name': bench['name'],
-                'baseline': bench['average_time_formatted'],
-                'current': current_bench['average_time_formatted'],
-                'ratio': f"{ratio:.2f}x"
-            })
-
-if regressions:
-    print("⚠️  Performance changes detected:")
-    for reg in regressions:
-        print(f"  {reg['name']}: {reg['baseline']} → {reg['current']} ({reg['ratio']})")
-    sys.exit(1)
-else:
-    print("✅ No significant performance changes")
-EOF
-else
-  echo "📁 No baseline found, creating baseline from current run..."
-  cp "$CURRENT" "$BASELINE"
-fi
-```
-
-## 🔧 Development Guide
-
-### Adding New Benchmarks
-
-#### 📝 Step-by-Step Process
-
-1. **🎯 Identify the Operation Category**
-
-   ```rust
-   // Choose the appropriate method in src/bin/bench.rs
-   fn run_single_operation_benchmarks()     // Individual operations
-   fn run_multiple_simple_benchmarks()     // Operation chains
-   fn run_multiple_map_benchmarks()        // Map operations
-   fn run_complex_benchmarks()             // Complex scenarios
-   ```
-
-2. **✍️ Follow the Naming Convention**
-
-   ```rust
-   // Pattern: "Category: descriptive_name"
-   ("Single: operation_name", "{template}")
-   ("Multi: operation1 + operation2", "{template}")
-   ("Map: split + map(operation)", "{template}")
-   ("Complex: detailed_description", "{template}")
-   ```
-
-3. **🧪 Create Valid Templates**
-
-   ```rust
-   // ✅ Good examples
-   ("Single: upper", "{upper}"),
-   ("Multi: split + sort + join", "{split:,:..|sort|join:,}"),
-   ("Map: split + map(trim)", "{split:,:..|map:{trim}|join:,}"),
-
-   // ❌ Avoid these patterns
-   ("Single: split", "{split:,}"),  // Missing range/join
-   ("Map: nested", "{split:,:..|map:{map:{upper}}}"),  // Nested maps not supported
-   ```
-
-4. **🔍 Test with Small Iterations**
-
-   ```bash
-   # Test new benchmarks first
-   cargo run --bin bench -- --iterations 10
-   ```
-
-### Performance Considerations
-
-#### ⚡ Basic Guidelines
-
-| Consideration | Impact | Recommendation |
-|---------------|--------|----------------|
-| **Build Mode** | 3-10x performance difference | Use `--release` for better measurements |
-| **Iteration Count** | Result stability | 1000+ for automation, 2000+ for comparison |
-| **Data Size** | Timing consistency | Current 208-char dataset works well |
-| **System Load** | Measurement variance | Run on quiet systems when possible |
-| **Memory** | Allocation overhead | Consider memory usage for intensive operations |
-
-#### 🏗️ Architecture Insights
-
-```rust
-// Performance-critical path in benchmark execution
-fn benchmark_template(&self, name: &str, template_str: &str) -> BenchmarkResult {
-    // 1. Template compilation (one-time cost)
-    let template = Template::parse(template_str, None).unwrap();
-
-    // 2. Hot loop (measured operations)
-    for _ in 0..self.iterations {
-        let start = Instant::now();
-        let _ = template.format(&self.test_data).unwrap();  // Core measurement
-        let duration = start.elapsed();
-        times.push(duration);
-    }
-
-    // 3. Basic analysis (post-processing)
-    BenchmarkResult::new(name.to_string(), times)
-}
-```
-
-### Best Practices
-
-#### ✅ Do's
-
-1. **🏭 Use Release Builds for Better Measurements**
-
-   ```bash
-   # Development/testing
-   cargo run --bin bench -- --iterations 100
-
-   # More accurate benchmarks
-   cargo build --release --bin bench
-   ./target/release/bench --iterations 2000
-   ```
-
-2. **📊 Choose Appropriate Iteration Counts**
-
-   ```bash
-   # Quick development feedback (30-60 seconds)
-   --iterations 50
-
-   # Automated scripts (2-5 minutes)
-   --iterations 1000
-
-   # Thorough analysis (5-15 minutes)
-   --iterations 5000
-   ```
-
-3. **🔍 Validate Templates Before Adding**
-
-   ```bash
-   # Test individual templates
-   cargo run --bin string-pipeline -- "{new_template}" "test_data"
-   ```
-
-4. **📈 Monitor Trends, Not Just Absolutes**
-
-   ```bash
-   # Track performance over time
-   git log --oneline | head -10 | while read commit; do
-     git checkout $commit
-     ./target/release/bench --format json >> performance_history.jsonl
-   done
-   ```
-
-#### ❌ Don'ts
-
-1. **🚫 Don't Mix Debug and Release Results**
-
-   ```bash
-   # Wrong: Comparing different build modes
-   cargo run --bin bench > debug_results.txt
-   cargo run --release --bin bench > release_results.txt
-   # These results are not comparable!
-   ```
-
-2. **🚫 Don't Ignore System Conditions**
-
-   ```bash
-   # Wrong: Running during high system load
-   # Make sure system is idle before benchmarking
-
-   # Right: Check system load
-   top -bn1 | grep "load average"
-   ```
-
-3. **🚫 Don't Skip Outlier Analysis**
-
-   ```bash
-   # Wrong: Assuming outliers are always noise
-   # High outlier counts may indicate:
-   # - System interference
-   # - Memory allocation issues
-   # - Template complexity problems
-   ```
-
-## ⚠️ Troubleshooting
-
-### Common Issues
-
-#### 🐛 Build Problems
-
-**Problem:** `error: failed to remove file benchmark.exe`
-
-```bash
-# Solution: Process is still running
-taskkill /F /IM bench.exe  # Windows
-killall bench             # Linux/macOS
-
-# Wait a moment, then rebuild
-cargo build --release --bin bench
-```
-
-**Problem:** `Parse error: Expected operation`
-
-```bash
-# Check template syntax
-cargo run --bin string-pipeline -- "{your_template}" "test"
-
-# Common fixes:
-"{split:,}"          → "{split:,:..|join:,}"
-"{map:{map:{upper}}}" → "{split:,:..|map:{upper}}"
-```
-
-#### ⚡ Performance Issues
-
-**Problem:** Benchmarks taking too long
-
-```bash
-# Reduce iterations for development
-cargo run --bin bench -- --iterations 100
-
-# Check system resources
-htop  # Linux/macOS
-taskmgr  # Windows
-```
-
-**Problem:** Inconsistent results
-
-```bash
-# Possible causes and solutions:
-# 1. System load → Run on idle system
-# 2. Debug build → Use --release
-# 3. Too few iterations → Increase --iterations
-# 4. Background processes → Close unnecessary applications
-```
-
-#### 📊 Data Analysis Issues
-
-**Problem:** JSON parsing errors
-
-```bash
-# Validate JSON output
-./target/release/bench --format json | jq '.'
-
-# Check for truncated output
-./target/release/bench --format json > results.json
-jq '.' results.json  # Should not error
-```
-
-**Problem:** Unexpected performance patterns
-
-```bash
-# Debug with template analysis
-cargo run --bin string-pipeline -- "{!your_template}" "test_data"
-
-# Profile memory usage
-valgrind --tool=massif ./target/release/bench --iterations 100
-```
-
-> 💡 **Need More Help?**
->
-> 🔍 **Template Issues**: Check the [Template System Documentation](template-system.md) for syntax help
->
-> 🐛 **Debug Mode**: Use `{!template}` syntax to see step-by-step execution
->
-> 📊 **Performance Analysis**: Consider using `cargo flamegraph` for detailed profiling
diff --git a/docs/command-line-options.md b/docs/command-line-options.md
index 61fa99a..fe78591 100644
--- a/docs/command-line-options.md
+++ b/docs/command-line-options.md
@@ -730,8 +730,6 @@ DEBUG: Total execution time: 18.7456ms
 '{split:,:..|map:{trim|upper|append:!}}'
 ```
 
-> 📊 **Comprehensive Guide:** For detailed benchmarking methodology, performance analysis, automation scripts, and optimization strategies, see the [🏆 Performance Benchmarking Guide](benchmarking.md).
-
 ## 🔧 Troubleshooting
 
 ### 🐛 Common Issues and Solutions
diff --git a/docs/template-system.md b/docs/template-system.md
index 63d115c..228b1d1 100644
--- a/docs/template-system.md
+++ b/docs/template-system.md
@@ -653,7 +653,7 @@ Converts text to uppercase.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Examples:**
 
@@ -671,7 +671,7 @@ Converts text to lowercase.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Examples:**
 
@@ -784,7 +784,7 @@ Reverses the order of list items or characters in a string.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Behavior on Different Input Types:**
 
@@ -807,7 +807,7 @@ Removes duplicate items from a list, preserving order.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Order Preservation:** The first occurrence of each item is kept, maintaining the original order.
 
@@ -873,7 +873,7 @@ Removes ANSI escape sequences (colors, formatting) from text.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Sequence Types Removed:** Color codes, cursor movement, text formatting, and other ANSI escape sequences.
 
@@ -942,9 +942,11 @@ The range system includes robust edge case handling:
 
 ### When is Escaping Required?
 
-Different argument types have different escaping requirements:
+The template parser uses a unified argument parsing system where all operations follow the same escaping rules for consistency and maintainability:
 
-### Simple Arguments (append, prepend, join, etc.)
+### Operation Arguments
+
+All operations use the same argument parsing rules. The following characters require escaping:
 
 | Character | Escape | Reason                |
 |-----------|--------|----------------------|
@@ -954,18 +956,6 @@ Different argument types have different escaping requirements:
 | `{`       | `\{`   | Starts template      |
 | `\`       | `\\`   | Escape character     |
 
-### Regex Arguments (filter, regex_extract)
-
-Regex patterns can contain most characters naturally.
-
-### Split Arguments
-
-Split separators can contain most characters. Only escape:
-
-| Character | Escape | Reason |
-|-----------|--------|--------|
-| `:` | `\:` | Visual helper |
-
 ### Special Sequences
 
 | Sequence | Result | Description |
@@ -1431,5 +1421,4 @@ string-pipeline '{split:,:..|map:{prepend:• |append: ✓}}' 'First item,Second
 📚 **Essential Resources:**
 
 - 🐛 **[Debug System Guide](debug-system.md)** - Master debugging techniques and error diagnosis
-- 🏆 **[Performance Benchmarking Guide](benchmarking.md)** - Optimize templates for production use
 - 🔗 **[Command Line Interface Guide](command-line-options.md)** - CLI features and automation tips
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 0000000..bb3f069
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,346 @@
+# Benchmark CI/CD Scripts
+
+This directory contains scripts used by the GitHub Actions CI/CD pipeline to track and compare performance benchmarks.
+
+## Overview
+
+The benchmark system uses an **on-demand approach** triggered via PR comments. There are no automatic benchmark runs,
+all comparisons are triggered manually by the repository owner using the `/bench` command.
+
+## The `/bench` Command
+
+### Command Syntax
+
+```bash
+/bench <ref1> <ref2> [size] [warmup] [runs]
+```
+
+**Parameters:**
+
+- `ref1`   (required): First git reference (commit, branch, or tag)
+- `ref2`   (required): Second git reference to compare
+- `size`   (optional): Number of paths to process per run (default: 10000)
+- `warmup` (optional): Number of warmup runs (default: 5)
+- `runs`   (optional): Number of measurement runs (default: 50)
+
+**Auto-Ordering:** The workflow automatically determines which ref is older (baseline) and which is newer (current)
+based on commit timestamps. You don't need to worry about parameter order - `/bench main feature` and
+`/bench feature main` produce the same comparison with correct labeling.
+
+### Examples
+
+```bash
+# Basic comparison with all defaults (size=10000, warmup=5, runs=50)
+/bench main v0.13.0
+
+# Compare two commits with custom size
+/bench abc12345 def56789 50000
+
+# Custom size and warmup
+/bench main HEAD 50000 10
+
+# Full custom parameters: size=50000, warmup=10, runs=100
+/bench main HEAD 50000 10 100
+
+# Compare feature branch vs main (order doesn't matter)
+/bench feature-branch main
+```
+
+### Workflow
+
+1. **Post command** in a PR comment: `/bench main HEAD`
+2. **Bot acknowledges** with 👀 reaction and status message
+3. **Validation** checks:
+   - User is repository owner
+   - Both refs exist
+   - Benchmark tool exists in both refs
+   - Parameters are valid
+4. **Install hyperfine** in CI environment
+5. **Build** benchmark binaries for both refs
+6. **Run with hyperfine**:
+   - 5 warmup runs
+   - 50 measurement runs
+   - Tests all templates in each run
+7. **Results posted** as PR comment with hyperfine comparison
+   - Mean execution time for each version
+   - Standard deviation, min/max ranges
+   - Relative speed comparison (e.g., "1.05x faster")
+8. **Success reaction** 🚀 (or 😕 on failure)
+9. **Artifacts uploaded** for 30 days
+
+## Files
+
+### `analyze_all_templates.sh`
+
+Benchmarks all templates by running hyperfine twice (once per version).
+
+**Usage:**
+
+```bash
+./scripts/analyze_all_templates.sh <baseline-sha> <current-sha> [options]
+
+Options:
+  --size <n>       Input size in paths (default: 10000)
+  --warmup <n>     Warmup runs (default: 5)
+  --runs <n>       Benchmark runs (default: 50)
+  --export-dir     Output directory (default: ./template_analysis)
+```
+
+**Output:**
+
+- Hyperfine JSON files
+- Markdown report with per-template comparison
+- Highlights regressions and improvements
+
+**Workflow integration:**
+
+```bash
+# 1. Compile versions
+./scripts/compile_benchmark_versions.sh abc12345 def56789
+
+# 2. Run comprehensive analysis
+./scripts/analyze_all_templates.sh abc12345 def56789 --runs 100
+
+# 3. View results
+cat template_analysis/comparison_report.md
+```
+
+### `compare_template_results.py`
+
+Parses hyperfine JSON outputs and generates per-template comparison reports.
+
+Called automatically by `analyze_all_templates.sh`.
+
+## GitHub Actions Workflow
+
+### Benchmark Command (`.github/workflows/bench-command.yml`)
+
+The single workflow that handles all benchmark comparisons.
+
+**Triggers:**
+
+- PR comments starting with `/bench`
+
+**What it does:**
+
+1. **Validates** user permissions and parameters
+2. **Installs** hyperfine
+3. **Checks** both refs for benchmark tool existence
+4. **Builds** the benchmark tool for each ref
+5. **Runs** benchmarks with hyperfine directly
+   - 5 warmup runs + 50 measurement runs
+   - All templates mode (single execution time per run)
+   - Results exported as markdown table
+6. **Posts** detailed report to PR with markdown table
+7. **Uploads** artifacts (markdown results + build logs)
+
+**Artifacts:**
+
+- **benchmark-comparison-<comment_id>**
+  - Hyperfine comparison results (markdown table)
+  - Build logs for both refs (baseline and current)
+  - Retained for 30 days
+
+## Running Benchmarks Locally
+
+### Quick Single-Template Test
+
+```bash
+cargo build --release --bin bench-throughput
+
+# Single template, single run (quick smoke test)
+./target/release/bench-throughput --template "{split:/:-1}" --size 10000
+
+# With JSON output for inspection
+./target/release/bench-throughput --template all --size 10000 --output my_benchmark.json
+```
+
+### Analysis with Hyperfine
+
+```bash
+# Quick overall check (all templates in one run)
+hyperfine --warmup 5 --runs 50 \
+  './target/release/bench-throughput --template all --size 10000 --output /dev/null'
+
+# Detailed analysis of specific template
+hyperfine --warmup 10 --runs 100 \
+  './target/release/bench-throughput --template "{split:/:-1}" --size 10000 --output /dev/null'
+```
+
+### Per-Template Detailed Analysis
+
+Analyze all templates using a single command:
+
+```bash
+# First, compile the versions you want to compare
+./scripts/compile_benchmark_versions.sh abc1234 def5678
+
+# Run comprehensive per-template analysis
+./scripts/analyze_all_templates.sh abc1234 def5678
+
+# With custom parameters
+./scripts/analyze_all_templates.sh abc1234 def5678 \
+  --size 50000 \
+  --runs 100 \
+  --export-dir ./my_analysis
+```
+
+**What it does:**
+
+1. Runs hyperfine with `--parameter-list` on all templates (baseline version)
+2. Runs hyperfine with `--parameter-list` on all templates (current version)
+3. Generates report comparing each template
+
+**Output:**
+
+- `template_analysis/baseline_results.json`
+- `template_analysis/current_results.json`
+- `template_analysis/comparison_report.md`
+
+## Version Comparison Workflow
+
+For comparing performance across multiple commits (e.g., to find when a regression was introduced), use the
+`compile_benchmark_versions.sh` script.
+
+### `compile_benchmark_versions.sh`
+
+This script compiles the benchmark tool for every commit in a range, making it easy to run performance comparisons
+across different versions.
+
+**Features:**
+
+- **Idempotent**: Only compiles versions that don't already exist
+- **Safe**: Uses git worktrees in temporary directories (doesn't affect your working directory)
+- **Convenient**: Stores binaries with commit SHA for easy identification
+- **Non-intrusive**: Works even with uncommitted changes in your main working directory
+- **Storage**: Uses `$XDG_DATA_HOME/string_pipeline/benchmarks/` (typically `~/.local/share/string_pipeline/benchmarks/`)
+
+**Usage:**
+
+```bash
+# Compile all versions since the introduction of the benchmark tool
+./scripts/compile_benchmark_versions.sh
+
+# Compile specific range
+./scripts/compile_benchmark_versions.sh --start abc1234 --end def5678
+
+# See what would be compiled (dry run)
+./scripts/compile_benchmark_versions.sh --dry-run
+
+# List already compiled versions
+./scripts/compile_benchmark_versions.sh --list
+
+# Remove all compiled versions
+./scripts/compile_benchmark_versions.sh --clean
+
+# Verbose output for debugging
+./scripts/compile_benchmark_versions.sh --verbose
+```
+
+**Example Workflow - Finding a Performance Regression:**
+
+```bash
+# 1. Compile all versions
+./scripts/compile_benchmark_versions.sh
+
+# 2. Set up benchmark directory path
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+
+# 3. Quick overall comparison with hyperfine
+./scripts/compare_benchmark_versions.sh abc12345 def56789 --all
+
+# 4. If regression detected, run detailed per-template analysis
+./scripts/analyze_all_templates.sh abc12345 def56789 --runs 100
+
+# 5. Or analyze a specific template
+./scripts/compare_benchmark_versions.sh abc12345 def56789 \
+  --template "{split:/:-1}" --runs 100
+```
+
+### `compare_benchmark_versions.sh`
+
+After compiling benchmark binaries, use this script to quickly compare performance between two versions using hyperfine.
+
+**Requirements:**
+
+- hyperfine must be installed (`apt install hyperfine` or `brew install hyperfine`)
+
+**Usage:**
+
+```bash
+# Specific template mode (default)
+./scripts/compare_benchmark_versions.sh abc12345 def56789
+
+# Custom template
+./scripts/compare_benchmark_versions.sh abc12345 def56789 --template "{upper}"
+
+# All templates mode
+./scripts/compare_benchmark_versions.sh abc12345 def56789 --all
+
+# Custom parameters
+./scripts/compare_benchmark_versions.sh abc12345 def56789 \
+  --template "{split:/:-1}" \
+  --warmup 10 --runs 100 --size 50000
+```
+
+**Example Workflow - Performance Comparison:**
+
+```bash
+# 1. Compile the versions you want to compare
+./scripts/compile_benchmark_versions.sh --start abc12345 --end def56789
+
+# 2. Run hyperfine comparison on specific template
+./scripts/compare_benchmark_versions.sh abc12345 def56789 \
+  --template "{split:/:-1}" \
+  --warmup 10 --runs 100
+
+# 3. For comprehensive check, use all-templates mode
+./scripts/compare_benchmark_versions.sh abc12345 def56789 --all --runs 20
+```
+
+## Configuration
+
+### Benchmark Parameters
+
+Default parameters:
+
+- **Input size:** 10,000 paths
+- **Templates:** All predefined templates
+- **Hyperfine warmup:** 5 runs (CI only)
+- **Hyperfine runs:** 50 runs (CI only)
+
+These can be overridden:
+
+```bash
+# Custom size
+/bench main HEAD 50000
+
+# Local: Custom hyperfine parameters
+hyperfine --warmup 20 --runs 200 \
+  './bench-throughput --template "{upper}" --size 100000'
+```
+
+## Offline vs CI Benchmarking
+
+**CI/CD (Quick check):**
+
+- Uses hyperfine with 5 warmup + 50 runs
+- Tests all templates at once
+- Provides overall execution time + per-template breakdown
+- Good for regression detection
+- Fast feedback (~3-5 minutes)
+
+**Offline (Comprehensive analysis):**
+
+- Use `compare_benchmark_versions.sh` locally
+- Full control over hyperfine parameters (warmup, runs)
+- Focus on specific templates
+- Export results in multiple formats
+- Ideal for performance investigation
+
+**Recommended workflow:**
+
+1. CI detects potential regression via `/bench`
+2. Investigate offline with hyperfine + specific templates
+3. Narrow down the problematic operation
+4. Fix and verify with both CI and offline tools
diff --git a/scripts/analyze_all_templates.sh b/scripts/analyze_all_templates.sh
new file mode 100755
index 0000000..6c4d274
--- /dev/null
+++ b/scripts/analyze_all_templates.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+set -euo pipefail
+
+# Analyze all predefined templates
+# Uses hyperfine's --parameter-list to run efficiently
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") <baseline-sha> <current-sha> [options]
+
+Analyze all predefined templates.
+
+Arguments:
+  baseline-sha    Git SHA/ref for baseline version
+  current-sha     Git SHA/ref for current version
+
+Options:
+  --size <n>      Input size in paths (default: 10000)
+  --warmup <n>    Number of warmup runs (default: 5)
+  --runs <n>      Number of benchmark runs (default: 50)
+  --export-dir    Directory for output files (default: ./template_analysis)
+
+Examples:
+  $(basename "$0") abc12345 def56789
+  $(basename "$0") main HEAD --size 50000 --runs 100
+  $(basename "$0") main feature-branch --export-dir ./results
+
+Output:
+  - Hyperfine JSON for baseline and current versions
+  - Markdown comparison report with per-template analysis
+EOF
+  exit 1
+}
+
+# Default values
+SIZE="10000"
+WARMUP=5
+RUNS=50
+EXPORT_DIR="./template_analysis"
+
+# Parse arguments
+if [ $# -lt 2 ]; then
+  usage
+fi
+
+BASELINE_SHA="$1"
+CURRENT_SHA="$2"
+shift 2
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+  --size)
+    SIZE="$2"
+    shift 2
+    ;;
+  --warmup)
+    WARMUP="$2"
+    shift 2
+    ;;
+  --runs)
+    RUNS="$2"
+    shift 2
+    ;;
+  --export-dir)
+    EXPORT_DIR="$2"
+    shift 2
+    ;;
+  -h | --help)
+    usage
+    ;;
+  *)
+    echo "Error: Unknown option $1"
+    usage
+    ;;
+  esac
+done
+
+# Check binaries exist
+BASELINE_BIN="$BENCH_DIR/bench_throughput_$BASELINE_SHA"
+CURRENT_BIN="$BENCH_DIR/bench_throughput_$CURRENT_SHA"
+
+if [ ! -f "$BASELINE_BIN" ]; then
+  echo "Error: Baseline binary not found: $BASELINE_BIN"
+  echo "Run compile_benchmark_versions.sh first"
+  exit 1
+fi
+
+if [ ! -f "$CURRENT_BIN" ]; then
+  echo "Error: Current binary not found: $CURRENT_BIN"
+  echo "Run compile_benchmark_versions.sh first"
+  exit 1
+fi
+
+# Create export directory
+mkdir -p "$EXPORT_DIR"
+
+# Comprehensive template set covering all operation types
+TEMPLATES=(
+  # String Operations (direct, no split needed)
+  "{upper}"
+  "{lower}"
+  "{reverse}"
+  "{trim}"
+  "{trim:left}"
+  "{trim:right}"
+  "{substring:0..10}"
+  "{substring:-5..}"
+  "{append:.bak}"
+  "{prepend:backup_}"
+  "{surround:\"}"
+  "{pad:80: :right}"
+  "{pad:80:0:left}"
+  "{replace:s/\\.txt$/.md/}"
+  "{replace:s/\\/\\/+/\\//g}"
+  "{regex_extract:[^/]+$}"
+  "{strip_ansi}"
+  # Split Operations
+  "{split:/:..}"
+  "{split:/:-1}"
+  "{split:/:0..-1}"
+  "{split:/:0..3}"
+  # List Operations (with split)
+  "{split:/:..|join:/}"
+  "{split:/:..|filter:^[a-z]+$}"
+  "{split:/:..|filter_not:^\\.}"
+  "{split:/:..|sort}"
+  "{split:/:..|sort:desc}"
+  "{split:/:..|reverse}"
+  "{split:/:..|unique}"
+  "{split:/:..|slice:2..5}"
+  "{split:/:..|slice:-3..}"
+  "{split:/:..|map:{upper}}"
+  "{split:/:..|map:{trim}}"
+  # Complex Chains
+  "{trim|upper|pad:20}"
+  "{split:/:..|filter:^[a-z]+$|sort|join:-}"
+  "{split:/:-1|split:.:0}"
+  "{split:/:..|map:{upper}|join:/}"
+)
+
+# Convert array to comma-separated list for hyperfine
+TEMPLATE_LIST=$(
+  IFS=,
+  echo "${TEMPLATES[*]}"
+)
+
+echo "========================================="
+echo "Per-Template Benchmark Analysis"
+echo "========================================="
+echo "Baseline:    $BASELINE_SHA"
+echo "Current:     $CURRENT_SHA"
+echo "Templates:   All predefined templates"
+echo "Input size:  $SIZE paths"
+echo "Warmup:      $WARMUP runs"
+echo "Runs:        $RUNS measurements"
+echo "Output dir:  $EXPORT_DIR"
+echo "========================================="
+echo ""
+
+# Run hyperfine for baseline version (all templates)
+echo "Phase 1/3: Benchmarking baseline version ($BASELINE_SHA)..."
+hyperfine \
+  --warmup "$WARMUP" \
+  --runs "$RUNS" \
+  --parameter-list template "$TEMPLATE_LIST" \
+  --export-json "$EXPORT_DIR/baseline_results.json" \
+  --style basic \
+  "$BASELINE_BIN --template {template} --size $SIZE --output /dev/null"
+
+echo ""
+echo "Phase 2/3: Benchmarking current version ($CURRENT_SHA)..."
+# Run hyperfine for current version (all templates)
+hyperfine \
+  --warmup "$WARMUP" \
+  --runs "$RUNS" \
+  --parameter-list template "$TEMPLATE_LIST" \
+  --export-json "$EXPORT_DIR/current_results.json" \
+  --style basic \
+  "$CURRENT_BIN --template {template} --size $SIZE --output /dev/null"
+
+echo ""
+echo "Phase 3/3: Generating comparison report..."
+
+# Generate comparison report using Python
+python3 "$SCRIPT_DIR/compare_template_results.py" \
+  "$EXPORT_DIR/baseline_results.json" \
+  "$EXPORT_DIR/current_results.json" \
+  --baseline-name "$BASELINE_SHA" \
+  --current-name "$CURRENT_SHA" \
+  --size "$SIZE" \
+  >"$EXPORT_DIR/comparison_report.md"
+
+echo ""
+echo "✓ Analysis complete!"
+echo ""
+echo "Results:"
+echo "  - Baseline data:  $EXPORT_DIR/baseline_results.json"
+echo "  - Current data:   $EXPORT_DIR/current_results.json"
+echo "  - Report:         $EXPORT_DIR/comparison_report.md"
+echo ""
+echo "View report:"
+echo "  cat $EXPORT_DIR/comparison_report.md"
diff --git a/scripts/compare_benchmark_versions.sh b/scripts/compare_benchmark_versions.sh
new file mode 100755
index 0000000..57fa5d9
--- /dev/null
+++ b/scripts/compare_benchmark_versions.sh
@@ -0,0 +1,230 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Script to compare two compiled benchmark binaries using hyperfine
+# Supports both "all templates" mode and specific template mode
+
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Default values
+WARMUP=5
+RUNS=50
+SIZE="10000"
+TEMPLATE="{split:/:-1}"
+ALL_MODE=false
+STYLE=""
+
+# Usage information
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") <sha1> <sha2> [OPTIONS]
+
+Compare performance of two compiled benchmark binaries using hyperfine.
+
+ARGUMENTS:
+    <sha1>              Short SHA of first benchmark version (baseline)
+    <sha2>              Short SHA of second benchmark version (current)
+
+OPTIONS:
+    --warmup N          Number of warmup runs (default: $WARMUP)
+    --runs N            Number of benchmark runs (default: $RUNS)
+    --size SIZE         Input size (default: $SIZE)
+    --template TPL      Template to benchmark (default: "$TEMPLATE")
+    --all               Compare using all templates mode
+    --style STYLE       Hyperfine output style (basic|full|nocolor|color|none)
+    -h, --help          Show this help message
+
+EXAMPLES:
+    # Compare specific template with hyperfine (default)
+    $(basename "$0") abc12345 def56789
+
+    # Compare with custom template
+    $(basename "$0") abc12345 def56789 --template "{split:/:..|join:/}"
+
+    # Compare all templates mode (single run each, summary output)
+    $(basename "$0") abc12345 def56789 --all
+
+    # Custom settings for specific template
+    $(basename "$0") abc12345 def56789 --template "{upper}" --warmup 10 --runs 100 --size 50000
+
+MODES:
+    Specific template mode (default):
+        - Uses hyperfine to benchmark a single template
+        - Multiple runs with statistical analysis from hyperfine
+        - Best for detailed performance comparison of one template
+
+    All templates mode (--all):
+        - Runs all predefined templates once
+        - Hyperfine measures total execution time
+        - Best for overall performance regression testing
+
+NOTES:
+    - Binaries must be compiled first using compile_benchmark_versions.sh
+    - hyperfine must be installed (https://github.com/sharkdp/hyperfine)
+EOF
+}
+
+# Print colored message
+log_info() {
+  echo -e "${BLUE}ℹ${NC} $*"
+}
+
+log_success() {
+  echo -e "${GREEN}✓${NC} $*"
+}
+
+log_error() {
+  echo -e "${RED}✗${NC} $*" >&2
+}
+
+# Check if hyperfine is installed
+check_hyperfine() {
+  if ! command -v hyperfine &>/dev/null; then
+    log_error "hyperfine is not installed"
+    echo ""
+    echo "Install hyperfine:"
+    echo "  - Debian/Ubuntu: apt install hyperfine"
+    echo "  - macOS: brew install hyperfine"
+    echo "  - Cargo: cargo install hyperfine"
+    echo "  - GitHub: https://github.com/sharkdp/hyperfine"
+    echo ""
+    exit 1
+  fi
+}
+
+# Check if binary exists
+check_binary() {
+  local sha=$1
+  local binary_path="$BENCH_DIR/bench_throughput_$sha"
+
+  if [ ! -f "$binary_path" ]; then
+    log_error "Benchmark binary not found: bench_throughput_$sha"
+    echo ""
+    echo "The binary for commit $sha has not been compiled yet."
+    echo ""
+    echo "Compile it first using:"
+    echo -e "  ${YELLOW}./scripts/compile_benchmark_versions.sh --start $sha --end $sha${NC}"
+    echo ""
+    echo "Or compile a range of versions:"
+    echo -e "  ${YELLOW}./scripts/compile_benchmark_versions.sh${NC}"
+    echo ""
+    exit 1
+  fi
+}
+
+# Parse command line arguments
+if [ $# -lt 2 ]; then
+  usage
+  exit 1
+fi
+
+SHA1=$1
+SHA2=$2
+shift 2
+
+while [ $# -gt 0 ]; do
+  case $1 in
+  --warmup)
+    WARMUP="$2"
+    shift 2
+    ;;
+  --runs)
+    RUNS="$2"
+    shift 2
+    ;;
+  --size)
+    SIZE="$2"
+    shift 2
+    ;;
+  --template)
+    TEMPLATE="$2"
+    shift 2
+    ;;
+  --all)
+    ALL_MODE=true
+    shift
+    ;;
+  --style)
+    STYLE="$2"
+    shift 2
+    ;;
+  -h | --help)
+    usage
+    exit 0
+    ;;
+  *)
+    log_error "Unknown option: $1"
+    echo ""
+    usage
+    exit 1
+    ;;
+  esac
+done
+
+# Validate inputs
+check_hyperfine
+check_binary "$SHA1"
+check_binary "$SHA2"
+
+BINARY1="$BENCH_DIR/bench_throughput_$SHA1"
+BINARY2="$BENCH_DIR/bench_throughput_$SHA2"
+
+# Print comparison info
+echo ""
+log_info "Comparing benchmark versions using hyperfine"
+echo ""
+echo "  Baseline: $SHA1"
+echo "  Current:  $SHA2"
+echo ""
+
+if [ "$ALL_MODE" = true ]; then
+  echo "Mode: All templates"
+  echo "  Size: $SIZE"
+  echo ""
+  echo "Hyperfine parameters:"
+  echo "  Warmup runs:     $WARMUP"
+  echo "  Benchmark runs:  $RUNS"
+  echo ""
+
+  # All templates mode - benchmark complete tool execution
+  HYPERFINE_ARGS=(--warmup "$WARMUP" --runs "$RUNS")
+  [ -n "$STYLE" ] && HYPERFINE_ARGS+=(--style "$STYLE")
+
+  hyperfine \
+    "${HYPERFINE_ARGS[@]}" \
+    --command-name "$SHA1" \
+    "$BINARY1 --template all --size $SIZE --output /dev/null" \
+    --command-name "$SHA2" \
+    "$BINARY2 --template all --size $SIZE --output /dev/null"
+else
+  echo "Mode: Specific template"
+  echo "  Template: $TEMPLATE"
+  echo "  Size:     $SIZE"
+  echo ""
+  echo "Hyperfine parameters:"
+  echo "  Warmup runs:     $WARMUP"
+  echo "  Benchmark runs:  $RUNS"
+  echo ""
+
+  # Specific template mode - hyperfine orchestrates multiple runs
+  HYPERFINE_ARGS=(--warmup "$WARMUP" --runs "$RUNS")
+  [ -n "$STYLE" ] && HYPERFINE_ARGS+=(--style "$STYLE")
+
+  hyperfine \
+    "${HYPERFINE_ARGS[@]}" \
+    --command-name "$SHA1" \
+    "$BINARY1 --template '$TEMPLATE' --size $SIZE" \
+    --command-name "$SHA2" \
+    "$BINARY2 --template '$TEMPLATE' --size $SIZE"
+fi
+
+echo ""
+log_success "Comparison complete!"
diff --git a/scripts/compare_template_results.py b/scripts/compare_template_results.py
new file mode 100755
index 0000000..45c5cfc
--- /dev/null
+++ b/scripts/compare_template_results.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+"""
+Compare hyperfine JSON results for per-template analysis.
+
+This script parses two hyperfine JSON files (baseline and current) where each
+file contains results from running all templates. It generates a markdown
+comparison report.
+"""
+
+import json
+import sys
+import argparse
+from typing import Dict, Tuple
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Compare hyperfine per-template results"
+    )
+    parser.add_argument("baseline_json", help="Baseline hyperfine JSON results")
+    parser.add_argument("current_json", help="Current hyperfine JSON results")
+    parser.add_argument(
+        "--baseline-name", default="baseline", help="Name for baseline version"
+    )
+    parser.add_argument(
+        "--current-name", default="current", help="Name for current version"
+    )
+    parser.add_argument("--size", type=int, help="Input size used")
+    return parser.parse_args()
+
+
+def load_hyperfine_json(filepath: str) -> Dict:
+    """Load hyperfine JSON results."""
+    with open(filepath, "r") as f:
+        return json.load(f)
+
+
+def extract_template_from_command(command: str) -> str:
+    """Extract template string from hyperfine command.
+
+    Command format: 'binary --template {template} --size N --output /dev/null'
+    """
+    parts = command.split("--template ")
+    if len(parts) < 2:
+        return "unknown"
+
+    template_part = parts[1].split(" ")[0]
+    return template_part
+
+
+def format_time_ms(seconds: float) -> str:
+    """Format time in seconds to human-readable string."""
+    ms = seconds * 1000
+    if ms < 1:
+        return f"{ms * 1000:.2f}μs"
+    elif ms < 1000:
+        return f"{ms:.2f}ms"
+    else:
+        return f"{ms / 1000:.2f}s"
+
+
+def calculate_change(baseline: float, current: float) -> Tuple[float, str]:
+    """Calculate percentage change and return emoji indicator.
+
+    For timing metrics, lower is better:
+    - Negative change = improvement (faster)
+    - Positive change = regression (slower)
+    """
+    if baseline == 0:
+        return 0.0, "➖"
+
+    change_pct = ((current - baseline) / baseline) * 100
+
+    if abs(change_pct) < 2:  # Less than 2% change is noise
+        emoji = "➖"
+    elif change_pct < -5:  # >5% faster is significant improvement
+        emoji = "🟢"
+    elif change_pct < -2:  # 2-5% faster is improvement
+        emoji = "✅"
+    elif change_pct > 10:  # >10% slower is regression
+        emoji = "🔴"
+    elif change_pct > 5:  # 5-10% slower is warning
+        emoji = "⚠️"
+    else:  # 2-5% slower is caution
+        emoji = "🟡"
+
+    return change_pct, emoji
+
+
+def generate_comparison_report(
+    baseline_data: Dict,
+    current_data: Dict,
+    baseline_name: str,
+    current_name: str,
+    input_size: int | None = None,
+) -> str:
+    """Generate markdown comparison report from hyperfine JSON data."""
+
+    # Build lookup by template
+    baseline_by_template = {}
+    for result in baseline_data["results"]:
+        template = extract_template_from_command(result["command"])
+        baseline_by_template[template] = result
+
+    current_by_template = {}
+    for result in current_data["results"]:
+        template = extract_template_from_command(result["command"])
+        current_by_template[template] = result
+
+    # Find common templates
+    common_templates = sorted(
+        set(baseline_by_template.keys()) & set(current_by_template.keys())
+    )
+
+    if not common_templates:
+        return "Error: No common templates found between baseline and current results."
+
+    # Generate report
+    lines = []
+    lines.append("# 📊 Per-Template Benchmark Analysis\n")
+    lines.append(f"**Baseline:** `{baseline_name}`")
+    lines.append(f"**Current:** `{current_name}`")
+    if input_size:
+        lines.append(f"**Input size:** {input_size:,} paths per run")
+    lines.append(f"**Templates analyzed:** {len(common_templates)}\n")
+
+    # Summary statistics
+    regressions = []
+    improvements = []
+    neutral = []
+
+    # Build comparison table
+    lines.append("## Performance Comparison\n")
+    lines.append(
+        "| Template | Baseline Mean | Current Mean | Change | Min | Max | StdDev | Notes |"
+    )
+    lines.append(
+        "|----------|---------------|--------------|--------|-----|-----|--------|-------|"
+    )
+
+    for template in common_templates:
+        baseline = baseline_by_template[template]
+        current = current_by_template[template]
+
+        # Extract timing statistics (all in seconds from hyperfine)
+        baseline_mean = baseline["mean"]
+        current_mean = current["mean"]
+        current_min = current["min"]
+        current_max = current["max"]
+        current_stddev = current["stddev"]
+
+        # Calculate change
+        change_pct, emoji = calculate_change(baseline_mean, current_mean)
+
+        # Track significant changes
+        if change_pct > 10:
+            regressions.append((template, change_pct))
+        elif change_pct < -5:
+            improvements.append((template, change_pct))
+        else:
+            neutral.append(template)
+
+        # Build notes (check if variation is high)
+        notes = []
+        cv = (current_stddev / current_mean * 100) if current_mean > 0 else 0
+        if cv > 10:
+            notes.append("high variance")
+
+        # Format timing data
+        baseline_str = format_time_ms(baseline_mean)
+        current_str = format_time_ms(current_mean)
+        min_str = format_time_ms(current_min)
+        max_str = format_time_ms(current_max)
+        stddev_str = format_time_ms(current_stddev)
+
+        notes_str = ", ".join(notes) if notes else "—"
+
+        lines.append(
+            f"| `{template}` "
+            f"| {baseline_str} "
+            f"| {current_str} "
+            f"| {emoji} {change_pct:+.1f}% "
+            f"| {min_str} "
+            f"| {max_str} "
+            f"| ±{stddev_str} "
+            f"| {notes_str} |"
+        )
+
+    lines.append("")
+
+    # Summary section
+    lines.append("## Summary\n")
+    lines.append(f"- **Total templates:** {len(common_templates)}")
+    lines.append(f"- **Improvements:** {len(improvements)} 🟢")
+    lines.append(f"- **Regressions:** {len(regressions)} 🔴")
+    lines.append(f"- **Neutral:** {len(neutral)} ➖\n")
+
+    # Highlight significant changes
+    if regressions:
+        lines.append("### ⚠️ Performance Regressions\n")
+        for template, change in sorted(regressions, key=lambda x: x[1], reverse=True):
+            baseline = baseline_by_template[template]
+            current = current_by_template[template]
+            lines.append(
+                f"- **`{template}`**: {change:+.1f}% slower "
+                f"({format_time_ms(baseline['mean'])} → {format_time_ms(current['mean'])})"
+            )
+        lines.append("")
+
+    if improvements:
+        lines.append("### ✨ Performance Improvements\n")
+        for template, change in sorted(improvements, key=lambda x: x[1]):
+            baseline = baseline_by_template[template]
+            current = current_by_template[template]
+            lines.append(
+                f"- **`{template}`**: {abs(change):.1f}% faster "
+                f"({format_time_ms(baseline['mean'])} → {format_time_ms(current['mean'])})"
+            )
+        lines.append("")
+
+    # Measurement details
+    lines.append("## Measurement Details\n")
+    lines.append("Hyperfine metrics:")
+    lines.append("- **Mean**: Average execution time across all runs")
+    lines.append("- **Min/Max**: Fastest and slowest runs observed")
+    lines.append("- **StdDev**: Standard deviation (measure of consistency)")
+    lines.append("- **High variance**: Templates with coefficient of variation >10%\n")
+
+    # Legend
+    lines.append("---\n")
+    lines.append("### Legend")
+    lines.append("- 🟢 Significant improvement (>5% faster)")
+    lines.append("- ✅ Improvement (2-5% faster)")
+    lines.append("- ➖ Neutral (<2% change)")
+    lines.append("- 🟡 Caution (2-5% slower)")
+    lines.append("- ⚠️ Warning (5-10% slower)")
+    lines.append("- 🔴 Regression (>10% slower)")
+
+    return "\n".join(lines)
+
+
+def main():
+    args = parse_args()
+
+    try:
+        baseline_data = load_hyperfine_json(args.baseline_json)
+        current_data = load_hyperfine_json(args.current_json)
+
+        report = generate_comparison_report(
+            baseline_data,
+            current_data,
+            args.baseline_name,
+            args.current_name,
+            args.size,
+        )
+
+        print(report)
+
+    except FileNotFoundError as e:
+        print(f"Error: File not found: {e}", file=sys.stderr)
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        print(f"Error: Invalid JSON: {e}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/compile_benchmark_versions.sh b/scripts/compile_benchmark_versions.sh
new file mode 100755
index 0000000..e2f8904
--- /dev/null
+++ b/scripts/compile_benchmark_versions.sh
@@ -0,0 +1,348 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Script to compile benchmark binaries for multiple git commits
+# This makes it easy to compare performance across different versions
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+DEFAULT_START_COMMIT="5e028194"
+VERBOSE=0
+DRY_RUN=0
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Usage information
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Compile the throughput benchmark for multiple git commits to enable
+version-to-version performance comparisons.
+
+Compiled binaries are stored as: bench_throughput_<shortsha>
+Location: \$XDG_DATA_HOME/string_pipeline/benchmarks/
+
+OPTIONS:
+    --start COMMIT      Starting commit (default: $DEFAULT_START_COMMIT)
+    --end COMMIT        Ending commit (default: HEAD)
+    --list              List already compiled versions and exit
+    --dry-run           Show what would be compiled without doing it
+    --clean             Remove all compiled benchmarks and exit
+    --verbose           Show detailed output
+    -h, --help          Show this help message
+
+EXAMPLES:
+    # Compile all versions from $DEFAULT_START_COMMIT to HEAD
+    $(basename "$0")
+
+    # Compile specific range
+    $(basename "$0") --start abc12345 --end def56789
+
+    # List available compiled versions
+    $(basename "$0") --list
+
+    # See what would be compiled
+    $(basename "$0") --dry-run
+
+    # Clean up old compiled versions
+    $(basename "$0") --clean
+
+USAGE AFTER COMPILATION:
+    # Quick overall comparison with hyperfine
+    ./scripts/compare_benchmark_versions.sh abc12345 def56789 --all
+
+    # Detailed per-template analysis
+    ./scripts/analyze_all_templates.sh abc12345 def56789 --runs 100
+
+    # Analyze specific template
+    ./scripts/compare_benchmark_versions.sh abc12345 def56789 \\
+        --template "{split:/:-1}" --runs 100
+EOF
+}
+
+# Print colored message
+log_info() {
+  echo -e "${BLUE}ℹ${NC} $*"
+}
+
+log_success() {
+  echo -e "${GREEN}✓${NC} $*"
+}
+
+log_warning() {
+  echo -e "${YELLOW}⚠${NC} $*"
+}
+
+log_error() {
+  echo -e "${RED}✗${NC} $*" >&2
+}
+
+log_verbose() {
+  if [ "$VERBOSE" -eq 1 ]; then
+    echo -e "${BLUE}[verbose]${NC} $*"
+  fi
+}
+
+# List compiled versions
+list_versions() {
+  if [ ! -d "$BENCH_DIR" ]; then
+    log_warning "No benchmark directory found at: $BENCH_DIR"
+    return
+  fi
+
+  local count=0
+  log_info "Compiled benchmark versions in: $BENCH_DIR"
+  echo ""
+
+  while IFS= read -r -d '' binary; do
+    local filename
+    filename=$(basename "$binary")
+    local sha="${filename#bench_throughput_}"
+    local size
+    size=$(du -h "$binary" | cut -f1)
+    local date
+    date=$(stat -c '%y' "$binary" 2>/dev/null || stat -f '%Sm' "$binary" 2>/dev/null || echo "unknown")
+
+    echo "  $sha  ($size, compiled: ${date%.*})"
+    count=$((count + 1))
+  done < <(find "$BENCH_DIR" -type f -name "bench_throughput_*" -print0 2>/dev/null | sort -z)
+
+  if [ "$count" -eq 0 ]; then
+    log_warning "No compiled benchmarks found"
+  else
+    echo ""
+    log_success "Found $count compiled version(s)"
+  fi
+}
+
+# Clean compiled versions
+clean_versions() {
+  if [ ! -d "$BENCH_DIR" ]; then
+    log_warning "No benchmark directory found at: $BENCH_DIR"
+    return
+  fi
+
+  local count=0
+  while IFS= read -r -d '' binary; do
+    log_verbose "Removing: $binary"
+    rm -f "$binary"
+    count=$((count + 1))
+  done < <(find "$BENCH_DIR" -type f -name "bench_throughput_*" -print0 2>/dev/null)
+
+  if [ "$count" -eq 0 ]; then
+    log_info "No compiled benchmarks to clean"
+  else
+    log_success "Removed $count compiled version(s)"
+  fi
+}
+
+# Get short SHA for a commit
+get_short_sha() {
+  local commit=$1
+  git rev-parse --short=8 "$commit" 2>/dev/null
+}
+
+# Check if binary exists for a commit
+binary_exists() {
+  local short_sha=$1
+  [ -f "$BENCH_DIR/bench_throughput_$short_sha" ]
+}
+
+# Compile benchmark for a commit using git worktree
+compile_for_commit() {
+  local commit=$1
+  local short_sha=$2
+  local binary_path="$BENCH_DIR/bench_throughput_$short_sha"
+
+  if binary_exists "$short_sha"; then
+    log_verbose "Skipping $short_sha (already compiled)"
+    return 0
+  fi
+
+  log_info "Compiling $short_sha..."
+
+  if [ "$DRY_RUN" -eq 1 ]; then
+    echo "  [DRY RUN] Would create worktree for $commit and compile"
+    return 0
+  fi
+
+  # Create temporary directory for worktree
+  local worktree_dir
+  worktree_dir=$(mktemp -d -t "bench_compile_${short_sha}_XXXXXX")
+
+  log_verbose "Created worktree directory: $worktree_dir"
+
+  # Add worktree for this commit
+  if ! git worktree add -q --detach "$worktree_dir" "$commit" 2>/dev/null; then
+    log_error "Failed to create worktree for $commit"
+    rm -rf "$worktree_dir"
+    return 1
+  fi
+
+  # Try to compile in the worktree
+  local compile_success=0
+  if (cd "$worktree_dir" && cargo build --release --bin bench-throughput >/dev/null 2>&1); then
+    # Copy binary to benchmark directory
+    if [ -f "$worktree_dir/target/release/bench-throughput" ]; then
+      cp "$worktree_dir/target/release/bench-throughput" "$binary_path"
+      log_success "Compiled $short_sha"
+      compile_success=1
+    else
+      log_error "Binary not found after compilation for $short_sha"
+    fi
+  else
+    log_warning "Compilation failed for $short_sha"
+  fi
+
+  # Cleanup worktree
+  log_verbose "Cleaning up worktree for $short_sha"
+  git worktree remove --force "$worktree_dir" 2>/dev/null || true
+  rm -rf "$worktree_dir"
+
+  [ "$compile_success" -eq 1 ]
+}
+
+# Main compilation logic
+compile_versions() {
+  local start_commit=$1
+  local end_commit=$2
+
+  # Verify commits exist
+  if ! git rev-parse "$start_commit" >/dev/null 2>&1; then
+    log_error "Invalid start commit: $start_commit"
+    exit 1
+  fi
+
+  if ! git rev-parse "$end_commit" >/dev/null 2>&1; then
+    log_error "Invalid end commit: $end_commit"
+    exit 1
+  fi
+
+  # Create benchmark directory
+  mkdir -p "$BENCH_DIR"
+
+  # Get list of commits
+  log_info "Collecting commits from $start_commit to $end_commit..."
+  local commits
+  mapfile -t commits < <(git rev-list --reverse "$start_commit^..$end_commit")
+
+  local total=${#commits[@]}
+  log_info "Found $total commit(s) to process"
+  echo ""
+
+  # Counters
+  local compiled=0
+  local skipped=0
+  local failed=0
+
+  # Process each commit
+  for commit in "${commits[@]}"; do
+    local short_sha
+    short_sha=$(get_short_sha "$commit")
+
+    if binary_exists "$short_sha"; then
+      log_success "[$((compiled + skipped + failed + 1))/$total] $short_sha (already exists)"
+      skipped=$((skipped + 1))
+    else
+      echo -n "[$((compiled + skipped + failed + 1))/$total] "
+      if compile_for_commit "$commit" "$short_sha"; then
+        compiled=$((compiled + 1))
+      else
+        failed=$((failed + 1))
+      fi
+    fi
+  done
+
+  # Print summary
+  echo ""
+  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+  echo "Summary:"
+  echo "  Total commits:      $total"
+  echo "  Newly compiled:     $compiled"
+  echo "  Already compiled:   $skipped"
+  echo "  Failed:             $failed"
+  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+  echo ""
+  log_info "Binaries location: $BENCH_DIR"
+
+  if [ "$compiled" -gt 0 ] || [ "$skipped" -gt 0 ]; then
+    echo ""
+    log_success "Ready for version comparison!"
+    echo ""
+    echo "Example usage:"
+    echo "  # Run benchmark with a specific version"
+    local example_sha
+    example_sha=$(get_short_sha "$end_commit")
+    echo "  $BENCH_DIR/bench_throughput_$example_sha \\"
+    echo "    --template all --size 10000 \\"
+    echo "    --output results.json"
+  fi
+}
+
+# Parse command line arguments
+START_COMMIT="$DEFAULT_START_COMMIT"
+END_COMMIT="HEAD"
+ACTION="compile"
+
+while [ $# -gt 0 ]; do
+  case $1 in
+  --start)
+    START_COMMIT="$2"
+    shift 2
+    ;;
+  --end)
+    END_COMMIT="$2"
+    shift 2
+    ;;
+  --list)
+    ACTION="list"
+    shift
+    ;;
+  --clean)
+    ACTION="clean"
+    shift
+    ;;
+  --dry-run)
+    DRY_RUN=1
+    shift
+    ;;
+  --verbose)
+    VERBOSE=1
+    shift
+    ;;
+  -h | --help)
+    usage
+    exit 0
+    ;;
+  *)
+    log_error "Unknown option: $1"
+    echo ""
+    usage
+    exit 1
+    ;;
+  esac
+done
+
+# Change to project root
+cd "$PROJECT_ROOT"
+
+# Execute action
+case $ACTION in
+list)
+  list_versions
+  ;;
+clean)
+  clean_versions
+  ;;
+compile)
+  compile_versions "$START_COMMIT" "$END_COMMIT"
+  ;;
+esac
diff --git a/src/bin/bench-throughput.rs b/src/bin/bench-throughput.rs
new file mode 100644
index 0000000..f9e7742
--- /dev/null
+++ b/src/bin/bench-throughput.rs
@@ -0,0 +1,694 @@
+use clap::{Arg, Command};
+use comfy_table::{
+    Attribute as TableAttribute, Cell, Color as TableColor, ContentArrangement, Table,
+    presets::UTF8_FULL,
+};
+use crossterm::{
+    cursor, execute, queue,
+    style::{Attribute, Color, Print, ResetColor, SetAttribute, SetForegroundColor},
+    terminal::{Clear, ClearType},
+};
+use serde::{Serialize, Serializer};
+use std::io::{self, Write};
+use std::time::{Duration, Instant};
+use string_pipeline::Template;
+use unicode_width::UnicodeWidthStr;
+
+const TOOL_VERSION: &str = "2.0.0";
+
+// Helper to serialize Duration as nanoseconds
+fn serialize_duration<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    serializer.serialize_u128(duration.as_nanos())
+}
+
+/// Represents the results of a throughput benchmark for a specific input size
+#[derive(Debug, Clone, Serialize)]
+struct BenchmarkResult {
+    input_size: usize,
+    #[serde(serialize_with = "serialize_duration")]
+    parse_time: Duration,
+    #[serde(serialize_with = "serialize_duration")]
+    total_format_time: Duration,
+    #[serde(serialize_with = "serialize_duration")]
+    avg_time_per_path: Duration,
+    throughput_paths_per_sec: f64,
+}
+
+impl BenchmarkResult {
+    fn new(
+        input_size: usize,
+        parse_time: Duration,
+        total_format_time: Duration,
+    ) -> Self {
+        let avg_time_per_path = total_format_time / input_size as u32;
+        let throughput_paths_per_sec = input_size as f64 / total_format_time.as_secs_f64();
+
+        BenchmarkResult {
+            input_size,
+            parse_time,
+            total_format_time,
+            avg_time_per_path,
+            throughput_paths_per_sec,
+        }
+    }
+}
+
+/// Generates realistic absolute path strings for benchmarking
+struct PathGenerator {
+    directories: Vec<&'static str>,
+    filenames: Vec<&'static str>,
+    extensions: Vec<&'static str>,
+}
+
+impl PathGenerator {
+    fn new() -> Self {
+        PathGenerator {
+            directories: vec![
+                "home",
+                "usr",
+                "var",
+                "opt",
+                "etc",
+                "lib",
+                "bin",
+                "sbin",
+                "tmp",
+                "dev",
+                "projects",
+                "workspace",
+                "repos",
+                "src",
+                "tests",
+                "docs",
+                "config",
+                "data",
+                "cache",
+                "logs",
+                "build",
+                "dist",
+                "target",
+                "node_modules",
+                "vendor",
+                "components",
+                "services",
+                "models",
+                "controllers",
+                "views",
+                "utils",
+            ],
+            filenames: vec![
+                "main",
+                "lib",
+                "index",
+                "app",
+                "server",
+                "client",
+                "config",
+                "utils",
+                "helper",
+                "handler",
+                "service",
+                "model",
+                "controller",
+                "router",
+                "middleware",
+                "test",
+                "spec",
+                "readme",
+                "license",
+                "changelog",
+                "makefile",
+                "dockerfile",
+                "package",
+                "cargo",
+                "mod",
+                "types",
+                "constants",
+                "errors",
+                "validation",
+            ],
+            extensions: vec![
+                "rs", "txt", "md", "json", "toml", "yaml", "yml", "js", "ts", "py", "go", "c",
+                "cpp", "h", "sh",
+            ],
+        }
+    }
+
+    /// Generate a single path with specified seed and depth
+    fn generate_path(&self, seed: usize, depth: usize) -> String {
+        let mut parts = vec![];
+
+        // Generate directory components
+        for i in 0..depth {
+            let idx = (seed + i * 7) % self.directories.len();
+            parts.push(self.directories[idx]);
+        }
+
+        // Add filename with extension
+        let filename_idx = (seed * 13) % self.filenames.len();
+        let ext_idx = (seed * 17) % self.extensions.len();
+        let filename = format!(
+            "{}.{}",
+            self.filenames[filename_idx], self.extensions[ext_idx]
+        );
+        parts.push(&filename);
+
+        format!("/{}", parts.join("/"))
+    }
+
+    /// Generate N unique paths with varying depths
+    fn generate_paths(&self, count: usize) -> Vec<String> {
+        (0..count)
+            .map(|i| {
+                let depth = 2 + (i % 9); // Depths from 2 to 10
+                self.generate_path(i, depth)
+            })
+            .collect()
+    }
+}
+
+/// Comprehensive template set with proper coverage for all operation types.
+/// Organizes templates into three categories:
+/// - String operations (direct, no split needed)
+/// - Split operations
+/// - List operations (require split first, use map:{upper} for secondary ops)
+struct TemplateSet;
+
+impl TemplateSet {
+    fn get_templates() -> Vec<(&'static str, &'static str)> {
+        vec![
+            // ===== String Operations (direct, no split needed) =====
+            ("Upper", "{upper}"),
+            ("Lower", "{lower}"),
+            ("Reverse", "{reverse}"),
+            ("Trim", "{trim}"),
+            ("Trim left", "{trim:left}"),
+            ("Trim right", "{trim:right}"),
+            ("Substring range", "{substring:0..10}"),
+            ("Substring negative", "{substring:-5..}"),
+            ("Append", "{append:.bak}"),
+            ("Prepend", "{prepend:backup_}"),
+            ("Surround", "{surround:\"}"),
+            ("Pad right", "{pad:80: :right}"),
+            ("Pad left", "{pad:80:0:left}"),
+            ("Replace simple", "{replace:s/\\.txt$/.md/}"),
+            ("Replace global", "{replace:s/\\/\\/+/\\//g}"),
+            ("Regex extract", "{regex_extract:[^/]+$}"),
+            ("Strip ANSI", "{strip_ansi}"),
+
+            // ===== Split Operations =====
+            ("Split all", "{split:/:..}"),
+            ("Split last", "{split:/:-1}"),
+            ("Split range", "{split:/:0..-1}"),
+            ("Split first 3", "{split:/:0..3}"),
+
+            // ===== List Operations (with split) =====
+            ("Join", "{split:/:..|join:/}"),
+            ("Filter", "{split:/:..|filter:^[a-z]+$}"),
+            ("Filter not", "{split:/:..|filter_not:^\\.}"),
+            ("Sort", "{split:/:..|sort}"),
+            ("Sort desc", "{split:/:..|sort:desc}"),
+            ("Reverse list", "{split:/:..|reverse}"),
+            ("Unique", "{split:/:..|unique}"),
+            ("Slice", "{split:/:..|slice:2..5}"),
+            ("Slice negative", "{split:/:..|slice:-3..}"),
+            ("Map upper", "{split:/:..|map:{upper}}"),
+            ("Map trim", "{split:/:..|map:{trim}}"),
+
+            // ===== Complex Chains =====
+            ("Chain string ops", "{trim|upper|pad:20}"),
+            ("Chain list ops", "{split:/:..|filter:^[a-z]+$|sort|join:-}"),
+            ("Nested split", "{split:/:-1|split:.:0}"),
+            ("Map + join", "{split:/:..|map:{upper}|join:/}"),
+        ]
+    }
+}
+
+/// Runs a benchmark for a single template with a single input size
+fn benchmark_template(
+    template_str: &str,
+    size: usize,
+) -> Result<BenchmarkResult, Box<dyn std::error::Error>> {
+    let generator = PathGenerator::new();
+
+    // Time template parsing
+    let parse_start = Instant::now();
+    let template = Template::parse(template_str)?;
+    let parse_time = parse_start.elapsed();
+
+    // Generate paths
+    let paths = generator.generate_paths(size);
+
+    // Time formatting
+    let format_start = Instant::now();
+    for path in &paths {
+        let _ = template.format(path)?;
+    }
+    let total_format_time = format_start.elapsed();
+
+    Ok(BenchmarkResult::new(size, parse_time, total_format_time))
+}
+
+/// Execute a template without timing (for hyperfine integration)
+fn execute_template(
+    template_str: &str,
+    size: usize,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Parse template
+    let template = Template::parse(template_str)?;
+
+    // Generate paths
+    let generator = PathGenerator::new();
+    let paths = generator.generate_paths(size);
+
+    // Format all paths
+    for path in &paths {
+        let _ = template.format(path)?;
+    }
+
+    Ok(())
+}
+
+fn format_duration(duration: Duration) -> String {
+    let nanos = duration.as_nanos();
+    if nanos < 1_000 {
+        format!("{nanos}ns")
+    } else if nanos < 1_000_000 {
+        format!("{:.2}μs", nanos as f64 / 1_000.0)
+    } else if nanos < 1_000_000_000 {
+        format!("{:.2}ms", nanos as f64 / 1_000_000.0)
+    } else {
+        format!("{:.2}s", duration.as_secs_f64())
+    }
+}
+
+fn format_throughput(paths_per_sec: f64) -> String {
+    if paths_per_sec >= 1_000_000.0 {
+        format!("{:.2}M/s", paths_per_sec / 1_000_000.0)
+    } else if paths_per_sec >= 1_000.0 {
+        format!("{:.2}K/s", paths_per_sec / 1_000.0)
+    } else {
+        format!("{:.2}/s", paths_per_sec)
+    }
+}
+
+fn format_size(size: usize) -> String {
+    if size >= 1_000_000 {
+        format!("{}M", size / 1_000_000)
+    } else if size >= 1_000 {
+        format!("{}K", size / 1_000)
+    } else {
+        size.to_string()
+    }
+}
+
+// Styled output helpers
+fn print_header(text: &str) {
+    let mut stdout = io::stdout();
+    let text_width = text.width();
+    let _ = execute!(
+        stdout,
+        SetForegroundColor(Color::Cyan),
+        SetAttribute(Attribute::Bold),
+        Print("╔"),
+        Print("═".repeat(78)),
+        Print("╗\n║ "),
+        Print(text),
+        Print(" ".repeat(77 - text_width)),
+        Print("║\n╚"),
+        Print("═".repeat(78)),
+        Print("╝\n"),
+        ResetColor
+    );
+}
+
+fn print_section_header(text: &str) {
+    let mut stdout = io::stdout();
+    let _ = execute!(
+        stdout,
+        Print("\n"),
+        SetForegroundColor(Color::Cyan),
+        SetAttribute(Attribute::Bold),
+        Print(text),
+        ResetColor,
+        Print("\n"),
+        SetForegroundColor(Color::DarkGrey),
+        Print("─".repeat(80)),
+        ResetColor
+    );
+}
+
+fn print_error(msg: &str) {
+    let mut stdout = io::stdout();
+    let _ = execute!(
+        stdout,
+        SetForegroundColor(Color::Red),
+        Print("✗ "),
+        ResetColor,
+        Print(msg),
+        Print("\n")
+    );
+}
+
+fn print_progress_bar(current: usize, total: usize, template_name: &str) {
+    let mut stdout = io::stdout();
+    let progress = (current as f64 / total as f64) * 100.0;
+    let filled = ((progress / 100.0) * 40.0) as usize;
+    let _ = queue!(
+        stdout,
+        cursor::MoveToColumn(0),
+        Clear(ClearType::CurrentLine),
+        SetForegroundColor(Color::Cyan),
+        Print("["),
+        SetForegroundColor(Color::Green),
+        Print("█".repeat(filled)),
+        SetForegroundColor(Color::DarkGrey),
+        Print("░".repeat(40 - filled)),
+        SetForegroundColor(Color::Cyan),
+        Print("]"),
+        ResetColor,
+        Print(format!(" {:.0}% ({}/{}) - ", progress, current, total)),
+        SetAttribute(Attribute::Dim),
+        Print(template_name),
+        ResetColor
+    );
+    stdout.flush().ok();
+}
+
+fn print_template_result(template_name: &str, result: &BenchmarkResult) {
+    print_section_header(&format!("Template: {}", template_name));
+
+    // Create results table with comfy-table
+    let mut table = Table::new();
+    table
+        .load_preset(UTF8_FULL)
+        .set_content_arrangement(ContentArrangement::Dynamic)
+        .set_header(vec![
+            Cell::new("Input Size")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Parse Time")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Total Time")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Avg/Path")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Throughput")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+        ]);
+
+    table.add_row(vec![
+        Cell::new(format_size(result.input_size)),
+        Cell::new(format_duration(result.parse_time)),
+        Cell::new(format_duration(result.total_format_time)),
+        Cell::new(format_duration(result.avg_time_per_path)),
+        Cell::new(format_throughput(result.throughput_paths_per_sec)),
+    ]);
+
+    println!("\n{}\n", table);
+}
+
+fn print_summary(all_results: &[(&str, BenchmarkResult)]) {
+    let size = all_results[0].1.input_size;
+    let header_text = format!("📊 SUMMARY - Performance at {}", format_size(size));
+    print_header(&header_text);
+
+    // Collect results for sorting
+    let mut summary_data: Vec<(&str, Duration, f64)> = all_results
+        .iter()
+        .map(|(name, result)| (*name, result.avg_time_per_path, result.throughput_paths_per_sec))
+        .collect();
+
+    // Sort by throughput (highest first)
+    summary_data.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap());
+
+    // Create summary table with comfy-table
+    let mut table = Table::new();
+    table
+        .load_preset(UTF8_FULL)
+        .set_content_arrangement(ContentArrangement::Dynamic)
+        .set_header(vec![
+            Cell::new("Template")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Avg/Path")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Throughput")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+        ]);
+
+    for (idx, (template_name, avg_time, throughput)) in summary_data.iter().enumerate() {
+        // Highlight fastest (green) and slowest (yellow)
+        let color = if idx == 0 {
+            TableColor::Green
+        } else if idx == summary_data.len() - 1 {
+            TableColor::Yellow
+        } else {
+            TableColor::Reset
+        };
+
+        table.add_row(vec![
+            Cell::new(template_name).fg(color),
+            Cell::new(format_duration(*avg_time)).fg(color),
+            Cell::new(format_throughput(*throughput)).fg(color),
+        ]);
+    }
+
+    println!("{}", table);
+}
+
+/// Output results in JSON format for tracking over time
+#[derive(Serialize)]
+struct BenchmarkOutput<'a> {
+    version: String,
+    timestamp: u64,
+    benchmarks: Vec<TemplateBenchmark<'a>>,
+}
+
+#[derive(Serialize)]
+struct TemplateBenchmark<'a> {
+    template_name: &'a str,
+    result: &'a BenchmarkResult,
+}
+
+fn output_json(
+    all_results: &[(&str, BenchmarkResult)],
+    output_path: Option<&str>,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let timestamp = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)?
+        .as_secs();
+
+    let benchmarks: Vec<TemplateBenchmark> = all_results
+        .iter()
+        .map(|(name, result)| TemplateBenchmark {
+            template_name: name,
+            result,
+        })
+        .collect();
+
+    let output = BenchmarkOutput {
+        version: TOOL_VERSION.to_string(),
+        timestamp,
+        benchmarks,
+    };
+
+    let json_string = serde_json::to_string_pretty(&output)?;
+
+    if let Some(path) = output_path {
+        std::fs::write(path, json_string)?;
+        let mut stdout = io::stdout();
+        let _ = execute!(
+            stdout,
+            Print("\n"),
+            SetForegroundColor(Color::Green),
+            Print("✓ JSON output written to: "),
+            ResetColor,
+            Print(format!("{}\n", path))
+        );
+    } else {
+        println!("\n{}", json_string);
+    }
+
+    Ok(())
+}
+
+fn get_default_output_path() -> Result<String, Box<dyn std::error::Error>> {
+    let data_home = std::env::var("XDG_DATA_HOME").unwrap_or_else(|_| {
+        let home = std::env::var("HOME").expect("HOME environment variable not set");
+        format!("{}/.local/share", home)
+    });
+
+    let benchmark_dir = format!("{}/string-pipeline/benchmarks", data_home);
+    std::fs::create_dir_all(&benchmark_dir)?;
+
+    let timestamp = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)?
+        .as_secs();
+
+    Ok(format!("{}/bench-{}.json", benchmark_dir, timestamp))
+}
+
+fn list_templates() {
+    println!("Available predefined templates:\n");
+    for (name, template) in TemplateSet::get_templates() {
+        println!("  {:<30} {}", name, template);
+    }
+}
+
+fn execute_all_templates_mode(size: usize, output_path: Option<&str>, verbose: bool) {
+    print_header(&format!(
+        "String Pipeline Throughput Benchmark {}",
+        TOOL_VERSION
+    ));
+
+    let mut stdout = io::stdout();
+    let _ = execute!(
+        stdout,
+        Print("Measuring template processing performance\n\n"),
+        SetForegroundColor(Color::Cyan),
+        Print("Input size: "),
+        ResetColor,
+        Print(format!("{}\n", format_size(size)))
+    );
+
+    let templates = TemplateSet::get_templates();
+    let mut all_results = Vec::new();
+    let total_templates = templates.len();
+
+    for (idx, (template_name, template_str)) in templates.iter().enumerate() {
+        print_progress_bar(idx + 1, total_templates, template_name);
+
+        match benchmark_template(template_str, size) {
+            Ok(result) => {
+                let mut stdout = io::stdout();
+                let _ = execute!(
+                    stdout,
+                    cursor::MoveToColumn(0),
+                    Clear(ClearType::CurrentLine)
+                );
+                if verbose {
+                    print_template_result(template_name, &result);
+                }
+                all_results.push((*template_name, result));
+            }
+            Err(e) => {
+                let mut stdout = io::stdout();
+                let _ = execute!(
+                    stdout,
+                    cursor::MoveToColumn(0),
+                    Clear(ClearType::CurrentLine)
+                );
+                print_error(&format!("Failed to benchmark '{}': {}", template_name, e));
+            }
+        }
+    }
+
+    print_summary(&all_results);
+
+    if let Some(path) = output_path
+        && let Err(e) = output_json(&all_results, Some(path))
+    {
+        eprintln!("Error writing JSON output: {}", e);
+        std::process::exit(1);
+    }
+
+    let mut stdout = io::stdout();
+    let _ = execute!(
+        stdout,
+        SetForegroundColor(Color::Green),
+        SetAttribute(Attribute::Bold),
+        Print("\n✓ Benchmark complete!\n"),
+        ResetColor
+    );
+}
+
+fn execute_specific_template_mode(template_str: &str, size: usize) {
+    match execute_template(template_str, size) {
+        Ok(_) => std::process::exit(0),
+        Err(e) => {
+            eprintln!("Error: {}", e);
+            std::process::exit(1);
+        }
+    }
+}
+
+fn main() {
+    let matches = Command::new("String Pipeline Throughput Benchmark")
+        .version(TOOL_VERSION)
+        .about("Benchmarks template processing performance")
+        .arg(
+            Arg::new("template")
+                .short('t')
+                .long("template")
+                .value_name("TEMPLATE")
+                .help("Template to benchmark: 'all' for predefined set, or template string")
+                .default_value("all"),
+        )
+        .arg(
+            Arg::new("size")
+                .short('s')
+                .long("size")
+                .value_name("COUNT")
+                .help("Number of paths to process")
+                .default_value("10000"),
+        )
+        .arg(
+            Arg::new("output")
+                .short('o')
+                .long("output")
+                .value_name("FILE")
+                .help("JSON output file (only for --template all)"),
+        )
+        .arg(
+            Arg::new("verbose")
+                .short('v')
+                .long("verbose")
+                .action(clap::ArgAction::SetTrue)
+                .help("Show detailed per-template results (only for --template all)"),
+        )
+        .arg(
+            Arg::new("list")
+                .long("list-templates")
+                .action(clap::ArgAction::SetTrue)
+                .help("List available predefined templates and exit"),
+        )
+        .get_matches();
+
+    // Parse arguments
+    let template_arg = matches.get_one::<String>("template").unwrap();
+    let size: usize = matches
+        .get_one::<String>("size")
+        .unwrap()
+        .parse()
+        .expect("Invalid size value");
+    let output_path = matches
+        .get_one::<String>("output")
+        .map(|s| s.to_string())
+        .or_else(|| get_default_output_path().ok());
+    let verbose = matches.get_flag("verbose");
+    let list = matches.get_flag("list");
+
+    // List templates
+    if list {
+        list_templates();
+        return;
+    }
+
+    // Mode 1: All templates
+    if template_arg == "all" {
+        execute_all_templates_mode(size, output_path.as_deref(), verbose);
+    } else {
+        // Mode 2: Specific template
+        execute_specific_template_mode(template_arg, size);
+    }
+}
diff --git a/src/pipeline/mod.rs b/src/pipeline/mod.rs
index 83e5cde..565112c 100644
--- a/src/pipeline/mod.rs
+++ b/src/pipeline/mod.rs
@@ -1354,21 +1354,69 @@ fn apply_single_operation(
             apply_list_operation(val, |list| apply_range(&list, range), "Slice")
         }
         StringOp::Filter { pattern } => {
-            let re = get_cached_regex(pattern)?;
+            // Fast path for literal string matching (no regex metacharacters)
+            let is_literal = !pattern.contains([
+                '\\', '.', '*', '+', '?', '^', '$', '|', '[', ']', '(', ')', '{', '}',
+            ]);
+
             match val {
-                Value::List(list) => Ok(Value::List(
-                    list.into_iter().filter(|s| re.is_match(s)).collect(),
-                )),
-                Value::Str(s) => Ok(Value::Str(if re.is_match(&s) { s } else { String::new() })),
+                Value::List(list) => {
+                    if is_literal {
+                        Ok(Value::List(
+                            list.into_iter().filter(|s| s.contains(pattern)).collect(),
+                        ))
+                    } else {
+                        let re = get_cached_regex(pattern)?;
+                        Ok(Value::List(
+                            list.into_iter().filter(|s| re.is_match(s)).collect(),
+                        ))
+                    }
+                }
+                Value::Str(s) => {
+                    if is_literal {
+                        Ok(Value::Str(if s.contains(pattern) {
+                            s
+                        } else {
+                            String::new()
+                        }))
+                    } else {
+                        let re = get_cached_regex(pattern)?;
+                        Ok(Value::Str(if re.is_match(&s) { s } else { String::new() }))
+                    }
+                }
             }
         }
         StringOp::FilterNot { pattern } => {
-            let re = get_cached_regex(pattern)?;
+            // Fast path for literal string matching (no regex metacharacters)
+            let is_literal = !pattern.contains([
+                '\\', '.', '*', '+', '?', '^', '$', '|', '[', ']', '(', ')', '{', '}',
+            ]);
+
             match val {
-                Value::List(list) => Ok(Value::List(
-                    list.into_iter().filter(|s| !re.is_match(s)).collect(),
-                )),
-                Value::Str(s) => Ok(Value::Str(if re.is_match(&s) { String::new() } else { s })),
+                Value::List(list) => {
+                    if is_literal {
+                        Ok(Value::List(
+                            list.into_iter().filter(|s| !s.contains(pattern)).collect(),
+                        ))
+                    } else {
+                        let re = get_cached_regex(pattern)?;
+                        Ok(Value::List(
+                            list.into_iter().filter(|s| !re.is_match(s)).collect(),
+                        ))
+                    }
+                }
+                Value::Str(s) => {
+                    if is_literal {
+                        Ok(Value::Str(if s.contains(pattern) {
+                            String::new()
+                        } else {
+                            s
+                        }))
+                    } else {
+                        let re = get_cached_regex(pattern)?;
+                        Ok(Value::Str(if re.is_match(&s) { String::new() } else { s }))
+                    }
+                }
             }
         }
         StringOp::Sort { direction } => {
@@ -1429,16 +1477,24 @@ fn apply_single_operation(
             flags,
         } => {
             if let Value::Str(s) = val {
-                // Early exit for simple string patterns (not regex)
-                if !flags.contains('g')
-                    && !pattern.contains([
-                        '\\', '.', '*', '+', '?', '^', '$', '|', '[', ']', '(', ')', '{', '}',
-                    ])
-                    && !s.contains(pattern)
-                {
-                    return Ok(Value::Str(s));
+                // Fast path for literal string replacement (no regex metacharacters or special flags)
+                let is_literal = !pattern.contains([
+                    '\\', '.', '*', '+', '?', '^', '$', '|', '[', ']', '(', ')', '{', '}',
+                ]);
+
+                // Only use fast path if no special regex flags (case-insensitive, multiline, etc.)
+                let has_special_flags = flags.chars().any(|c| c != 'g');
+
+                if is_literal && !has_special_flags {
+                    let result = if flags.contains('g') {
+                        s.replace(pattern, replacement)
+                    } else {
+                        s.replacen(pattern, replacement, 1)
+                    };
+                    return Ok(Value::Str(result));
                 }
 
+                // Regex path for complex patterns
                 let pattern_to_use = if flags.is_empty() {
                     pattern.clone()
                 } else {
diff --git a/src/pipeline/parser.rs b/src/pipeline/parser.rs
index 0ac12f0..a2f179e 100644
--- a/src/pipeline/parser.rs
+++ b/src/pipeline/parser.rs
@@ -5,8 +5,7 @@
 //! Pest parser generator for robust syntax handling with comprehensive error reporting.
 //!
 //! The parser supports the full template syntax including operations, ranges,
-//! escape sequences, and debug flags, with intelligent handling of special
-//! characters in different contexts.
+//! escape sequences, and debug flags.
 //!
 
 use pest::Parser;
diff --git a/src/pipeline/template.pest b/src/pipeline/template.pest
index 0aee8ea..3d0d72a 100644
--- a/src/pipeline/template.pest
+++ b/src/pipeline/template.pest
@@ -4,30 +4,31 @@ debug_flag = @{ "!" }
 
 operation_list = { operation ~ ("|" ~ operation)* }
 
+// Ordered by frequency based on benchmark usage patterns
 operation = {
     shorthand_range
   | shorthand_index
   | split
+  | join
   | upper
   | lower
   | trim
-  | append
-  | prepend
-  | surround
-  | quote
-  | join
   | substring
+  | reverse
   | replace
-  | map
   | filter
   | filter_not
-  | slice
   | sort
-  | reverse
   | unique
+  | map
+  | slice
+  | append
+  | prepend
+  | surround
+  | quote
+  | pad
   | regex_extract
   | strip_ansi
-  | pad
 }
 
 shorthand_index = { number }
@@ -40,123 +41,77 @@ shorthand_range = {
   | range_full
 }
 
+// Argument parsing - requires escaping for special characters
+argument     = { (escaped_char | normal_char)* }
+normal_char  = { !("|" | "}" | "{" | ":" | "\\") ~ ANY }
+escaped_char = { "\\" ~ ANY }
+
 // Main operations - using specific arg types where needed
-regex_extract = { "regex_extract" ~ ":" ~ regex_arg ~ (":" ~ number)? }
-filter_not    = { "filter_not" ~ ":" ~ regex_arg }
-filter        = { "filter" ~ ":" ~ regex_arg }
+regex_extract =  { "regex_extract" ~ ":" ~ argument ~ (":" ~ number)? }
+filter_not    =  { "filter_not" ~ ":" ~ argument }
+filter        =  { "filter" ~ ":" ~ argument }
 strip_ansi    = @{ "strip_ansi" }
-map           = { "map" ~ ":" ~ map_operation }
-split         = { "split" ~ ":" ~ split_arg ~ ":" ~ range_spec? }
-substring     = { "substring" ~ ":" ~ range_spec }
-replace       = { "replace" ~ ":" ~ sed_string }
-append        = { "append" ~ ":" ~ simple_arg }
-prepend       = { "prepend" ~ ":" ~ simple_arg }
-surround      = { "surround" ~ ":" ~ simple_arg }
-quote         = { "quote" ~ ":" ~ simple_arg }
+map           =  { "map" ~ ":" ~ map_operation }
+split         =  { "split" ~ ":" ~ argument ~ ":" ~ range_spec? }
+substring     =  { "substring" ~ ":" ~ range_spec }
+replace       =  { "replace" ~ ":" ~ sed_string }
+append        =  { "append" ~ ":" ~ argument }
+prepend       =  { "prepend" ~ ":" ~ argument }
+surround      =  { "surround" ~ ":" ~ argument }
+quote         =  { "quote" ~ ":" ~ argument }
 upper         = @{ "upper" }
 lower         = @{ "lower" }
-trim          = { "trim" ~ (":" ~ simple_arg)? ~ (":" ~ direction)? }
-join          = { "join" ~ ":" ~ simple_arg }
-slice         = { "slice" ~ ":" ~ range_spec }
-sort          = { "sort" ~ (":" ~ sort_direction)? }
+trim          =  { "trim" ~ (":" ~ argument)? ~ (":" ~ direction)? }
+join          =  { "join" ~ ":" ~ argument }
+slice         =  { "slice" ~ ":" ~ range_spec }
+sort          =  { "sort" ~ (":" ~ sort_direction)? }
 reverse       = @{ "reverse" }
 unique        = @{ "unique" }
-pad           = { "pad" ~ ":" ~ number ~ (":" ~ pad_char)? ~ (":" ~ direction)? }
+pad           =  { "pad" ~ ":" ~ number ~ (":" ~ pad_char)? ~ (":" ~ direction)? }
 
 // Direction specifiers
 direction      = @{ "left" | "right" | "both" }
 sort_direction = @{ "asc" | "desc" }
-pad_char       = @{ simple_arg_content+ }
+pad_char       = @{ argument }
 
 // Map operation
 map_operation       = { "{" ~ map_operation_list ~ "}" }
 map_operation_list  = { map_inner_operation ~ ("|" ~ map_inner_operation)* }
+// Ordered by frequency for map operations
 map_inner_operation = {
-    strip_ansi
+    upper
+  | lower
+  | trim
   | substring
+  | reverse
   | replace
+  | map_split
+  | map_join
   | append
   | prepend
   | surround
   | quote
-  | upper
-  | lower
-  | trim
   | pad
-  | reverse
-  | map_split
-  | map_join
   | map_slice
   | map_sort
   | map_unique
   | map_filter
   | map_filter_not
   | map_regex_extract
+  | strip_ansi
 }
 
 // Map-specific operations that need special handling
-map_split      = { "split" ~ ":" ~ split_arg ~ (":" ~ range_spec)? }
-map_join       = { "join" ~ ":" ~ simple_arg }
-map_slice      = { "slice" ~ ":" ~ range_spec }
-map_sort       = { "sort" ~ (":" ~ sort_direction)? }
+map_split      =  { "split" ~ ":" ~ argument ~ (":" ~ range_spec)? }
+map_join       =  { "join" ~ ":" ~ argument }
+map_slice      =  { "slice" ~ ":" ~ range_spec }
+map_sort       =  { "sort" ~ (":" ~ sort_direction)? }
 map_unique     = @{ "unique" }
-map_filter     = { "filter" ~ ":" ~ map_regex_arg }
-map_filter_not = { "filter_not" ~ ":" ~ map_regex_arg }
+map_filter     =  { "filter" ~ ":" ~ argument }
+map_filter_not =  { "filter_not" ~ ":" ~ argument }
 
 // Map-specific regex extract
-map_regex_extract = { "regex_extract" ~ ":" ~ map_regex_arg ~ (":" ~ number)? }
-
-// Simplified argument handling - three types to handle specific cases
-simple_arg         = @{ simple_arg_content* }
-simple_arg_content =  { escaped_char | simple_normal_char }
-simple_normal_char =  { !(":" | "|" | "}" | "{" | "\\") ~ ANY }
-
-// Split args - need to handle pipes that aren't operations
-split_arg          = @{ (split_escaped_char | split_content)* }
-split_content      =  { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
-split_escaped_char =  { "\\" ~ ANY }
-
-// Regex args - need to handle pipes and braces in regex patterns
-regex_arg          = @{ (regex_escaped_char | regex_content)* }
-regex_content      =  { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
-regex_escaped_char =  { "\\" ~ ANY }
-
-// Map regex args - handle braces in regex patterns
-map_regex_arg          = @{ (map_regex_escaped_char | map_regex_brace | map_regex_content)* }
-map_regex_brace        =  { "{" ~ (!"}" ~ ANY)* ~ "}" }
-map_regex_content      =  { !(":" ~ number) ~ !("|" ~ operation_keyword) ~ !("{" | ("}" ~ ("|" | "}" | EOI))) ~ ANY }
-map_regex_escaped_char =  { "\\" ~ ANY }
-
-// Common escaped character handling
-escaped_char = { "\\" ~ ANY }
-
-// Operation keywords for lookahead (simplified list)
-operation_keyword = _{
-    "split"
-  | "upper"
-  | "lower"
-  | "trim"
-  | "append"
-  | "prepend"
-  | "surround"
-  | "quote"
-  | "join"
-  | "substring"
-  | "replace"
-  | "map"
-  | "filter"
-  | "filter_not"
-  | "slice"
-  | "sort"
-  | "reverse"
-  | "unique"
-  | "regex_extract"
-  | "strip_ansi"
-  | "pad"
-}
-
-// Range parts for lookahead
-range_part = _{ ".." | "..=" }
+map_regex_extract = { "regex_extract" ~ ":" ~ argument ~ (":" ~ number)? }
 
 // Sed strings
 sed_string       =  { "s/" ~ sed_pattern ~ "/" ~ sed_replacement ~ "/" ~ sed_flags? }
diff --git a/tests/multi_template_tests.rs b/tests/multi_template_tests.rs
index 786a1d7..f396d6e 100644
--- a/tests/multi_template_tests.rs
+++ b/tests/multi_template_tests.rs
@@ -55,7 +55,7 @@ fn test_multi_template_caching_optimization() {
 fn test_multi_template_different_separators() {
     // Test multiple template sections with different separators
     let template =
-        MultiTemplate::parse("Comma: {split:,:0} Space: {split: :1} Pipe: {split:|:0}").unwrap();
+        MultiTemplate::parse("Comma: {split:,:0} Space: {split: :1} Pipe: {split:\\|:0}").unwrap();
     let result = template.format("a,b c|d").unwrap();
     assert_eq!(result, "Comma: a Space: c|d Pipe: a,b c");
 }
diff --git a/tests/template/complex_pipeline.rs b/tests/template/complex_pipeline.rs
index 6e281bf..8853e83 100644
--- a/tests/template/complex_pipeline.rs
+++ b/tests/template/complex_pipeline.rs
@@ -395,7 +395,7 @@ fn test_special_chars_pipeline() {
 
 #[test]
 fn test_escaped_pipes_pipeline() {
-    let result = process("test", r"{replace:s/test/a|b/|split:|:..|join:-}");
+    let result = process("test", r"{replace:s/test/a|b/|split:\|:..|join:-}");
     assert_eq!(result.unwrap(), "a-b");
 }
 
diff --git a/tests/template/simple_pipeline.rs b/tests/template/simple_pipeline.rs
index c2a480d..f98e154 100644
--- a/tests/template/simple_pipeline.rs
+++ b/tests/template/simple_pipeline.rs
@@ -1290,7 +1290,7 @@ pub mod regex_extract_operations {
         assert_eq!(
             process(
                 "Version: 1.2.3-beta",
-                r"{regex_extract:Version: (\d+\.\d+\.\d+):1}"
+                r"{regex_extract:Version\: (\d+\.\d+\.\d+):1}"
             )
             .unwrap(),
             "1.2.3"