@@ -1679,6 +1679,7 @@ enum class _Rx_unwind_ops {
16791679 _After_neg_assert,
16801680 _Disjunction_eval_alt_on_failure,
16811681 _Disjunction_eval_alt_always,
1682+ _Do_nothing,
16821683};
16831684
16841685template <class _BidIt>
@@ -1811,10 +1812,11 @@ private:
18111812
18121813 void _Increase_stack_usage_count();
18131814 void _Decrease_stack_usage_count();
1815+ void _Increase_complexity_count();
18141816
18151817 bool _Do_rep0(_Node_rep*, bool);
18161818 bool _Do_rep(_Node_rep*, bool, int);
1817- bool _Do_rep_first (_Node_rep*);
1819+ void _Prepare_rep (_Node_rep*);
18181820 bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
18191821 _It _Do_class(_Node_base*, _It);
18201822 bool _Match_pat(_Node_base*);
@@ -3403,34 +3405,19 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Decrease_stack_usage_cou
34033405 }
34043406}
34053407
3408+ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3409+ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Increase_complexity_count() {
3410+ if (0 < _Max_complexity_count && --_Max_complexity_count <= 0) {
3411+ _Xregex_error(regex_constants::error_complexity);
3412+ }
3413+ }
3414+
34063415template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
34073416bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node, bool _Greedy) {
34083417 // apply repetition to loop with no nested if/do
3409- int _Ix = 0;
3410- const size_t _Frame_idx = _Push_frame();
3411-
3412- if (0 < _Node->_Min) {
3413- // GH-5365: We can avoid resetting capture groups for the first iteration
3414- // because we know that a simple repetition of this loop was not encountered before.
3415- if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
3416- _Pop_frame(_Frame_idx);
3417- return false;
3418- } else if (_Tgt_state._Cur == _Frames[_Frame_idx]._Match_state._Cur) { // matches empty string
3419- // loop is branchless, so it will only ever match empty strings
3420- // -> skip all other matches as they don't change state and immediately try tail
3421- _Pop_frame(_Frame_idx);
3422- return _Match_pat(_Node->_End_rep->_Next);
3423- } else { // loop never matches the empty string
3424- for (_Ix = 1; _Ix < _Node->_Min; ++_Ix) { // do minimum number of reps
3425- // GH-5365: We have to reset the capture groups from the second iteration on.
3426- _Tgt_state._Grp_valid = _Frames[_Frame_idx]._Match_state._Grp_valid;
3427- if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
3428- _Pop_frame(_Frame_idx);
3429- return false;
3430- }
3431- }
3432- }
3433- }
3418+ int _Ix = _Node->_Min;
3419+ const size_t _Frame_idx = _Loop_vals[_Node->_Loop_number]._Loop_frame_idx;
3420+ _Loop_vals[_Node->_Loop_number]._Loop_idx = _Ix + 1;
34343421
34353422 _Tgt_state_t<_It> _Final;
34363423 bool _Matched0 = false;
@@ -3439,7 +3426,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34393426
34403427 if (_Match_pat(_Node->_End_rep->_Next)) {
34413428 if (!_Greedy) {
3442- _Pop_frame(_Frame_idx);
34433429 return true; // go with current match
34443430 }
34453431
@@ -3458,14 +3444,12 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34583444 _Done = true;
34593445 // we only potentially accept/try tail for POSIX
34603446 if ((_Sflags & regex_constants::_Any_posix) && _Match_pat(_Node->_End_rep->_Next)) {
3461- _Pop_frame(_Frame_idx);
34623447 return true; // go with current match
34633448 }
34643449 } else {
34653450 _Saved_pos = _Tgt_state._Cur;
34663451 if (_Match_pat(_Node->_End_rep->_Next)) {
34673452 if (!_Greedy) {
3468- _Pop_frame(_Frame_idx);
34693453 return true; // go with current match
34703454 }
34713455
@@ -3489,7 +3473,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34893473 _Saved_pos = _Tgt_state._Cur;
34903474 if (_Match_pat(_Node->_End_rep->_Next)) {
34913475 if (!_Greedy) {
3492- _Pop_frame(_Frame_idx);
34933476 return true; // go with current match
34943477 }
34953478
@@ -3504,7 +3487,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
35043487 _Tgt_state = _Final;
35053488 }
35063489
3507- _Pop_frame(_Frame_idx);
35083490 return _Matched0;
35093491}
35103492
@@ -3577,12 +3559,7 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
35773559}
35783560
35793561template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3580- bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep* _Node) {
3581- bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
3582- // apply repetition
3583- if (_Node->_Simple_loop == 1) {
3584- return _Do_rep0(_Node, _Greedy);
3585- }
3562+ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Prepare_rep(_Node_rep* _Node) {
35863563 _Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
35873564
35883565 // Determine first capture group in repetition for later capture group reset, if not done so previously.
@@ -3593,8 +3570,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep*
35933570 _Psav->_Group_first = static_cast<unsigned int>(_Tgt_state._Grp_valid.size());
35943571 }
35953572 }
3596-
3597- return _Do_rep(_Node, _Greedy, 0);
35983573}
35993574
36003575template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
@@ -4153,22 +4128,58 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41534128 break;
41544129
41554130 case _N_rep:
4156- if (!_Do_rep_first(static_cast<_Node_rep*>(_Nx))) {
4157- _Failed = true;
4131+ {
4132+ auto _Node = static_cast<_Node_rep*>(_Nx);
4133+ _Prepare_rep(_Node);
4134+ bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
4135+
4136+ if (_Node->_Simple_loop == 1) {
4137+ auto& _Sav = _Loop_vals[_Node->_Loop_number];
4138+ _Sav._Loop_idx = 1;
4139+ _Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing);
4140+ if (_Node->_Min == 0) {
4141+ _Failed = !_Do_rep0(_Node, _Greedy);
4142+ _Next = nullptr;
4143+ } else {
4144+ _Increase_complexity_count();
4145+ }
4146+ } else {
4147+ _Failed = !_Do_rep(_Node, _Greedy, 0);
4148+ _Next = nullptr;
4149+ }
41584150 }
41594151
4160- _Next = nullptr;
41614152 break;
41624153
41634154 case _N_end_rep:
41644155 {
41654156 _Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
4166- if (_Nr->_Simple_loop == 0
4167- && !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Loop_vals[_Nr->_Loop_number]._Loop_idx)) {
4168- _Failed = true; // recurse only if loop contains if/do
4157+ auto& _Sav = _Loop_vals[_Nr->_Loop_number];
4158+ if (_Nr->_Simple_loop != 0) {
4159+ if (_Sav._Loop_idx <= _Nr->_Min) {
4160+ if (_Sav._Loop_idx == 1
4161+ && _Tgt_state._Cur == _Frames[_Sav._Loop_frame_idx]._Match_state._Cur) { // match empty
4162+ // loop is branchless, so it will only ever match empty strings
4163+ // -> skip all other matches as they don't change state and immediately try tail
4164+ _Increase_complexity_count();
4165+ // _Next is already assigned correctly for matching tail
4166+ } else if (_Sav._Loop_idx < _Nr->_Min) { // needs at least one more rep to reach minimum
4167+ _Increase_complexity_count();
4168+ // GH-5365: We have to reset the capture groups from the second iteration on.
4169+ _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
4170+ _Next = _Nr->_Next;
4171+ ++_Sav._Loop_idx;
4172+ } else { // minimum number of reps reached
4173+ _Failed = !_Do_rep0(_Nr, (_Nr->_Flags & _Fl_greedy) != 0);
4174+ _Next = nullptr;
4175+ }
4176+ } else { // internal _Match_pat(_Node->_Next) call in _Do_rep0()
4177+ _Next = nullptr;
4178+ }
4179+ } else {
4180+ _Failed = !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Sav._Loop_idx);
4181+ _Next = nullptr;
41694182 }
4170-
4171- _Next = nullptr;
41724183 break;
41734184 }
41744185
@@ -4243,6 +4254,7 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42434254 _Nx = _Node->_Next;
42444255 _Tgt_state = _Frame._Match_state;
42454256 _Failed = false;
4257+ _Increase_complexity_count();
42464258 if (_Node->_Child) {
42474259 _Frame._Node = _Node->_Child;
42484260 ++_Frames_count;
@@ -4252,6 +4264,9 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42524264 break;
42534265 }
42544266
4267+ case _Rx_unwind_ops::_Do_nothing:
4268+ break;
4269+
42554270 default:
42564271#if _ITERATOR_DEBUG_LEVEL != 0
42574272 _STL_REPORT_ERROR("internal stack of regex matcher corrupted");
0 commit comments