Skip to content

Commit f251f1b

Browse files
committed
refactor: remove heuristic-based squash merge detection
- Remove commit message keyword detection for squash merges - Only detect true merge commits with multiple parents (len(parents) > 1) - Squash merges (single parent) now treated as regular single commits - Removed SOCKET_GIT_DISABLE_SQUASH_HEURISTIC environment variable - Updated documentation to remove squash heuristic configuration - Updated tests to reflect deterministic behavior - Provides more predictable and reliable merge detection BREAKING CHANGE: Squash merges are no longer detected via heuristics and will be processed as regular single commits using git show instead of git diff parent^1..commit
1 parent a159395 commit f251f1b

File tree

3 files changed

+28
-83
lines changed

3 files changed

+28
-83
lines changed

README.md

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -339,20 +339,6 @@ steps:
339339
340340
### Advanced Configuration Options
341341
342-
#### Squash Merge Detection Control
343-
344-
The CLI uses heuristic-based detection for squash merges. To disable this behavior:
345-
346-
```bash
347-
export SOCKET_GIT_DISABLE_SQUASH_HEURISTIC=1
348-
socketcli --target-path ./my-project
349-
```
350-
351-
**When to use this:**
352-
- False positives: Regular commits with merge-like messages are misclassified
353-
- Consistent behavior: You want deterministic single-commit detection only
354-
- Performance: Avoiding heuristic analysis for large repositories
355-
356342
#### Default Branch Detection Matrix
357343
358344
| Scenario | `--default-branch` | `--ignore-commit-files` | Behavior |

socketsecurity/core/git_interface.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -558,11 +558,11 @@ def _detect_merge_commit_fallback(self) -> bool:
558558
Detect changed files using merge-aware fallback for merge commits.
559559
560560
This fallback is used when CI-specific MR variables are not present.
561-
It detects merge commits (including squash merges) and uses git diff.
561+
It detects only true merge commits (multiple parents) and uses git diff.
562562
563563
IMPORTANT LIMITATIONS:
564564
1. git show --name-only <merge-commit> does NOT list filenames (expected Git behavior)
565-
2. Squash merges (single parent): Currently detected by commit message keywords
565+
2. Only detects true merge commits (multiple parents), not squash merges
566566
3. Octopus merges (3+ parents): Uses first-parent diff only, may miss changes
567567
4. First-parent choice: Assumes main branch is first parent (typical but not guaranteed)
568568
@@ -575,29 +575,10 @@ def _detect_merge_commit_fallback(self) -> bool:
575575
Returns:
576576
True if detection was successful, False otherwise
577577
"""
578-
# Check if this is a merge commit
578+
# Check if this is a merge commit (multiple parents only)
579579
is_merge_commit = len(self.commit.parents) > 1
580580

581-
# TODO: Squash merge detection is heuristic-based, not structurally guaranteed
582-
# We detect squash merges by checking commit message keywords ('merge', 'squash', 'octopus')
583-
# This is pragmatic but not deterministic - future devs should be aware that:
584-
# 1. False positives: Regular commits with merge-like messages may be misclassified
585-
# 2. False negatives: Squash merges with non-standard messages may be missed
586-
# 3. No Git structural guarantee: Unlike true merge commits (>1 parent), squash merges
587-
# are indistinguishable from regular commits in Git's object model
588-
589-
# Allow opt-out of squash heuristic via environment variable
590-
squash_heuristic_disabled = os.getenv('SOCKET_GIT_DISABLE_SQUASH_HEURISTIC', '').lower() in ('1', 'true', 'yes')
591-
592-
if squash_heuristic_disabled:
593-
is_squash_merge = False
594-
log.debug("Squash merge heuristic disabled via SOCKET_GIT_DISABLE_SQUASH_HEURISTIC")
595-
else:
596-
is_squash_merge = (len(self.commit.parents) == 1 and
597-
any(keyword in self.commit.message.lower()
598-
for keyword in ['merge', 'squash', 'octopus']))
599-
600-
if not (is_merge_commit or is_squash_merge):
581+
if not is_merge_commit:
601582
return False
602583

603584
try:

tests/core/test_git_interface.py

Lines changed: 24 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -272,14 +272,11 @@ def test_merge_commit_detection(self, git_instance):
272272

273273
def test_real_squash_merge_detection(self, squash_git_instance, caplog):
274274
"""
275-
Test real squash merge detection (single-parent "squash").
275+
Test that squash merges are treated as regular single commits.
276276
277-
LIMITATION: git show --name-only <squash-merge> does not list filenames
278-
(expected Git behavior). Since squash merges have single parent, our current
279-
logic treats them as single commits and uses single-commit-show method.
280-
281-
Expected behavior: detection.method=single-commit-show unless we implement
282-
squash-aware diff logic.
277+
With heuristic-based detection removed, squash merges (single parent)
278+
are now treated as regular commits and use single-commit-show method.
279+
This provides more predictable and deterministic behavior.
283280
"""
284281
with caplog.at_level(logging.INFO):
285282
# Clear environment to force fallback behavior
@@ -292,9 +289,8 @@ def test_real_squash_merge_detection(self, squash_git_instance, caplog):
292289
assert len(squash_git_instance.commit.parents) == 1
293290
assert "squash merge" in squash_git_instance.commit.message.lower()
294291

295-
# Should use merge-diff due to squash merge keywords in commit message
296-
# Note: This demonstrates that our squash merge detection is working correctly
297-
assert squash_git_instance._detection_method == "merge-diff"
292+
# Should use single-commit-show (no longer detects heuristically as merge)
293+
assert squash_git_instance._detection_method == "single-commit-show"
298294

299295
# Should detect files (even though git show may not show diff)
300296
assert isinstance(show_files, list)
@@ -307,8 +303,8 @@ def test_real_squash_merge_detection(self, squash_git_instance, caplog):
307303

308304
# Parse the decision log
309305
decision_log = decision_logs[0]
310-
assert "method=merge-diff" in decision_log
311-
assert "source=merge-commit-fallback" in decision_log
306+
assert "method=single-commit-show" in decision_log
307+
assert "source=final-fallback" in decision_log
312308

313309
def test_changed_files_filtering(self, git_instance):
314310
"""
@@ -352,8 +348,8 @@ def test_lazy_property_consistency(self, git_instance):
352348
# Detection method should be set
353349
assert git_instance._detection_method is not None
354350

355-
# Results should be consistent
356-
assert len(show_files) == len(changed_files)
351+
# Results should be consistent (changed_files is filtered subset of show_files)
352+
assert len(changed_files) <= len(show_files)
357353

358354
def test_detection_method_persistence(self, git_instance):
359355
"""
@@ -637,34 +633,14 @@ def test_detection_summary_log_schema_frozen(self, git_instance, caplog):
637633
assert len(sha) == 8, f"SHA should be 8 characters: {sha}"
638634
assert cmd.startswith("git "), f"Command should start with 'git ': {cmd}"
639635

640-
def test_squash_heuristic_opt_out(self, squash_git_instance, caplog):
636+
def test_squash_merge_no_heuristic_detection(self, squash_git_instance):
641637
"""
642-
Test SOCKET_GIT_DISABLE_SQUASH_HEURISTIC=1 disables squash detection.
643-
"""
644-
with caplog.at_level(logging.DEBUG):
645-
# Test with heuristic disabled
646-
with patch.dict(os.environ, {'SOCKET_GIT_DISABLE_SQUASH_HEURISTIC': '1'}, clear=True):
647-
# Reset detection state
648-
squash_git_instance._show_files = None
649-
squash_git_instance._changed_files = None
650-
squash_git_instance._detection_method = None
651-
652-
# Trigger detection
653-
show_files = squash_git_instance.show_files
654-
655-
# Should use single-commit-show instead of merge-diff
656-
assert squash_git_instance._detection_method == "single-commit-show"
657-
658-
# Verify log message about disabled heuristic
659-
debug_messages = [record.message for record in caplog.records if record.levelname == "DEBUG"]
660-
disabled_logs = [msg for msg in debug_messages if "Squash merge heuristic disabled" in msg]
661-
assert len(disabled_logs) > 0, f"Expected disabled heuristic log, got: {debug_messages}"
662-
663-
def test_squash_heuristic_enabled_by_default(self, squash_git_instance):
664-
"""
665-
Test that squash heuristic is enabled by default (previous behavior).
638+
Test that squash merges are no longer detected via heuristics.
639+
640+
Squash merges (single parent) are now treated as regular single commits,
641+
since heuristic-based detection has been removed.
666642
"""
667-
# Clear environment to ensure no opt-out
643+
# Clear environment to ensure clean state
668644
with patch.dict(os.environ, {}, clear=True):
669645
# Reset detection state
670646
squash_git_instance._show_files = None
@@ -674,8 +650,8 @@ def test_squash_heuristic_enabled_by_default(self, squash_git_instance):
674650
# Trigger detection
675651
show_files = squash_git_instance.show_files
676652

677-
# Should use merge-diff due to squash heuristic
678-
assert squash_git_instance._detection_method == "merge-diff"
653+
# Should use single-commit-show (no longer detects as merge)
654+
assert squash_git_instance._detection_method == "single-commit-show"
679655

680656
def test_merge_fallback_guard_no_parents(self, git_instance):
681657
"""
@@ -702,12 +678,14 @@ def test_merge_fallback_guard_invalid_parent(self, git_instance, caplog):
702678
Test merge fallback guard when parent commit is not resolvable.
703679
"""
704680
with caplog.at_level(logging.ERROR):
705-
# Mock a commit with invalid parent
706-
mock_parent = MagicMock()
707-
mock_parent.hexsha = "invalid_sha"
681+
# Mock a merge commit with invalid parent (multiple parents for real merge)
682+
mock_parent1 = MagicMock()
683+
mock_parent1.hexsha = "invalid_sha"
684+
mock_parent2 = MagicMock()
685+
mock_parent2.hexsha = "another_parent"
708686

709687
mock_commit = MagicMock()
710-
mock_commit.parents = [mock_parent]
688+
mock_commit.parents = [mock_parent1, mock_parent2] # Multiple parents = real merge
711689
mock_commit.hexsha = "deadbeef"
712690
mock_commit.message = "merge commit"
713691

0 commit comments

Comments
 (0)