Skip to content

Commit 616c862

Browse files
authored
<regex>: Process minimum number of reps in simple loops non-recursively (#5762)
1 parent 2251b37 commit 616c862

File tree

1 file changed

+62
-47
lines changed

1 file changed

+62
-47
lines changed

stl/inc/regex

Lines changed: 62 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1679,6 +1679,7 @@ enum class _Rx_unwind_ops {
16791679
_After_neg_assert,
16801680
_Disjunction_eval_alt_on_failure,
16811681
_Disjunction_eval_alt_always,
1682+
_Do_nothing,
16821683
};
16831684

16841685
template <class _BidIt>
@@ -1811,10 +1812,11 @@ private:
18111812

18121813
void _Increase_stack_usage_count();
18131814
void _Decrease_stack_usage_count();
1815+
void _Increase_complexity_count();
18141816

18151817
bool _Do_rep0(_Node_rep*, bool);
18161818
bool _Do_rep(_Node_rep*, bool, int);
1817-
bool _Do_rep_first(_Node_rep*);
1819+
void _Prepare_rep(_Node_rep*);
18181820
bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
18191821
_It _Do_class(_Node_base*, _It);
18201822
bool _Match_pat(_Node_base*);
@@ -3403,34 +3405,19 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Decrease_stack_usage_cou
34033405
}
34043406
}
34053407

3408+
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3409+
void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Increase_complexity_count() {
3410+
if (0 < _Max_complexity_count && --_Max_complexity_count <= 0) {
3411+
_Xregex_error(regex_constants::error_complexity);
3412+
}
3413+
}
3414+
34063415
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
34073416
bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node, bool _Greedy) {
34083417
// apply repetition to loop with no nested if/do
3409-
int _Ix = 0;
3410-
const size_t _Frame_idx = _Push_frame();
3411-
3412-
if (0 < _Node->_Min) {
3413-
// GH-5365: We can avoid resetting capture groups for the first iteration
3414-
// because we know that a simple repetition of this loop was not encountered before.
3415-
if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
3416-
_Pop_frame(_Frame_idx);
3417-
return false;
3418-
} else if (_Tgt_state._Cur == _Frames[_Frame_idx]._Match_state._Cur) { // matches empty string
3419-
// loop is branchless, so it will only ever match empty strings
3420-
// -> skip all other matches as they don't change state and immediately try tail
3421-
_Pop_frame(_Frame_idx);
3422-
return _Match_pat(_Node->_End_rep->_Next);
3423-
} else { // loop never matches the empty string
3424-
for (_Ix = 1; _Ix < _Node->_Min; ++_Ix) { // do minimum number of reps
3425-
// GH-5365: We have to reset the capture groups from the second iteration on.
3426-
_Tgt_state._Grp_valid = _Frames[_Frame_idx]._Match_state._Grp_valid;
3427-
if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
3428-
_Pop_frame(_Frame_idx);
3429-
return false;
3430-
}
3431-
}
3432-
}
3433-
}
3418+
int _Ix = _Node->_Min;
3419+
const size_t _Frame_idx = _Loop_vals[_Node->_Loop_number]._Loop_frame_idx;
3420+
_Loop_vals[_Node->_Loop_number]._Loop_idx = _Ix + 1;
34343421

34353422
_Tgt_state_t<_It> _Final;
34363423
bool _Matched0 = false;
@@ -3439,7 +3426,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34393426

34403427
if (_Match_pat(_Node->_End_rep->_Next)) {
34413428
if (!_Greedy) {
3442-
_Pop_frame(_Frame_idx);
34433429
return true; // go with current match
34443430
}
34453431

@@ -3458,14 +3444,12 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34583444
_Done = true;
34593445
// we only potentially accept/try tail for POSIX
34603446
if ((_Sflags & regex_constants::_Any_posix) && _Match_pat(_Node->_End_rep->_Next)) {
3461-
_Pop_frame(_Frame_idx);
34623447
return true; // go with current match
34633448
}
34643449
} else {
34653450
_Saved_pos = _Tgt_state._Cur;
34663451
if (_Match_pat(_Node->_End_rep->_Next)) {
34673452
if (!_Greedy) {
3468-
_Pop_frame(_Frame_idx);
34693453
return true; // go with current match
34703454
}
34713455

@@ -3489,7 +3473,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34893473
_Saved_pos = _Tgt_state._Cur;
34903474
if (_Match_pat(_Node->_End_rep->_Next)) {
34913475
if (!_Greedy) {
3492-
_Pop_frame(_Frame_idx);
34933476
return true; // go with current match
34943477
}
34953478

@@ -3504,7 +3487,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
35043487
_Tgt_state = _Final;
35053488
}
35063489

3507-
_Pop_frame(_Frame_idx);
35083490
return _Matched0;
35093491
}
35103492

@@ -3577,12 +3559,7 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
35773559
}
35783560

35793561
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3580-
bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep* _Node) {
3581-
bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
3582-
// apply repetition
3583-
if (_Node->_Simple_loop == 1) {
3584-
return _Do_rep0(_Node, _Greedy);
3585-
}
3562+
void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Prepare_rep(_Node_rep* _Node) {
35863563
_Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
35873564

35883565
// Determine first capture group in repetition for later capture group reset, if not done so previously.
@@ -3593,8 +3570,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep*
35933570
_Psav->_Group_first = static_cast<unsigned int>(_Tgt_state._Grp_valid.size());
35943571
}
35953572
}
3596-
3597-
return _Do_rep(_Node, _Greedy, 0);
35983573
}
35993574

36003575
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
@@ -4153,22 +4128,58 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41534128
break;
41544129

41554130
case _N_rep:
4156-
if (!_Do_rep_first(static_cast<_Node_rep*>(_Nx))) {
4157-
_Failed = true;
4131+
{
4132+
auto _Node = static_cast<_Node_rep*>(_Nx);
4133+
_Prepare_rep(_Node);
4134+
bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
4135+
4136+
if (_Node->_Simple_loop == 1) {
4137+
auto& _Sav = _Loop_vals[_Node->_Loop_number];
4138+
_Sav._Loop_idx = 1;
4139+
_Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing);
4140+
if (_Node->_Min == 0) {
4141+
_Failed = !_Do_rep0(_Node, _Greedy);
4142+
_Next = nullptr;
4143+
} else {
4144+
_Increase_complexity_count();
4145+
}
4146+
} else {
4147+
_Failed = !_Do_rep(_Node, _Greedy, 0);
4148+
_Next = nullptr;
4149+
}
41584150
}
41594151

4160-
_Next = nullptr;
41614152
break;
41624153

41634154
case _N_end_rep:
41644155
{
41654156
_Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
4166-
if (_Nr->_Simple_loop == 0
4167-
&& !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Loop_vals[_Nr->_Loop_number]._Loop_idx)) {
4168-
_Failed = true; // recurse only if loop contains if/do
4157+
auto& _Sav = _Loop_vals[_Nr->_Loop_number];
4158+
if (_Nr->_Simple_loop != 0) {
4159+
if (_Sav._Loop_idx <= _Nr->_Min) {
4160+
if (_Sav._Loop_idx == 1
4161+
&& _Tgt_state._Cur == _Frames[_Sav._Loop_frame_idx]._Match_state._Cur) { // match empty
4162+
// loop is branchless, so it will only ever match empty strings
4163+
// -> skip all other matches as they don't change state and immediately try tail
4164+
_Increase_complexity_count();
4165+
// _Next is already assigned correctly for matching tail
4166+
} else if (_Sav._Loop_idx < _Nr->_Min) { // needs at least one more rep to reach minimum
4167+
_Increase_complexity_count();
4168+
// GH-5365: We have to reset the capture groups from the second iteration on.
4169+
_Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
4170+
_Next = _Nr->_Next;
4171+
++_Sav._Loop_idx;
4172+
} else { // minimum number of reps reached
4173+
_Failed = !_Do_rep0(_Nr, (_Nr->_Flags & _Fl_greedy) != 0);
4174+
_Next = nullptr;
4175+
}
4176+
} else { // internal _Match_pat(_Node->_Next) call in _Do_rep0()
4177+
_Next = nullptr;
4178+
}
4179+
} else {
4180+
_Failed = !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Sav._Loop_idx);
4181+
_Next = nullptr;
41694182
}
4170-
4171-
_Next = nullptr;
41724183
break;
41734184
}
41744185

@@ -4243,6 +4254,7 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42434254
_Nx = _Node->_Next;
42444255
_Tgt_state = _Frame._Match_state;
42454256
_Failed = false;
4257+
_Increase_complexity_count();
42464258
if (_Node->_Child) {
42474259
_Frame._Node = _Node->_Child;
42484260
++_Frames_count;
@@ -4252,6 +4264,9 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42524264
break;
42534265
}
42544266

4267+
case _Rx_unwind_ops::_Do_nothing:
4268+
break;
4269+
42554270
default:
42564271
#if _ITERATOR_DEBUG_LEVEL != 0
42574272
_STL_REPORT_ERROR("internal stack of regex matcher corrupted");

0 commit comments

Comments
 (0)