-
Notifications
You must be signed in to change notification settings - Fork 1.6k
<regex>: Implement small buffer optimization to store loop state
#6092
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1643,6 +1643,106 @@ public: | |
| _Builder2& operator=(const _Builder2&) = delete; | ||
| }; | ||
|
|
||
| template <class _Ty, class _Alloc = allocator<_Ty>> | ||
| class _Rx_fixed_size_buffer { | ||
| using _Al_size_type = _Alloc_size_t<_Alloc>; | ||
|
|
||
| struct _Val { | ||
| _Ty* _First; | ||
| _Ty* _Last; | ||
| bool _Allocated; | ||
| }; | ||
|
|
||
| struct _NODISCARD _Allocation_guard { | ||
| _Alloc& _Al; | ||
| _Ty* _Ptr; | ||
| _Al_size_type _Size; | ||
|
|
||
| _Allocation_guard& operator=(const _Allocation_guard&) = delete; | ||
| _Allocation_guard& operator=(_Allocation_guard&&) = delete; | ||
|
|
||
| ~_Allocation_guard() noexcept { | ||
| if (_Ptr) { | ||
| _Al.deallocate(_Ptr, _Size); | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| public: | ||
| _Rx_fixed_size_buffer(const _Alloc& _Al = _Alloc{}) : _Mypair(_One_then_variadic_args_t{}, _Al) {} | ||
| _Rx_fixed_size_buffer(const _Rx_fixed_size_buffer&) = delete; | ||
| _Rx_fixed_size_buffer(_Rx_fixed_size_buffer&&) = delete; | ||
|
|
||
| _NODISCARD size_t _Initialize(unsigned char* _Stack_buf, size_t _Stack_size, size_t _Size) { | ||
| if (_Stack_size / sizeof(_Ty) >= _Size) { | ||
| _Use_external_buf(_Stack_buf, _Size); | ||
| return _Size * sizeof(_Ty); | ||
| } else { | ||
| _Allocate_buf(_Size); | ||
| return 0U; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The suffix is misleading. Just 0. (There's a pointer-size-dependent suffix I think, but we can't use that here due to C++14 at least) |
||
| } | ||
| } | ||
|
|
||
| ~_Rx_fixed_size_buffer() { | ||
| auto& _Data = _Mypair._Myval2; | ||
| if (_Data._Allocated) { | ||
| auto& _Al = _Mypair._Get_first(); | ||
| _STD _Destroy_range(_Data._First, _Data._Last, _Al); | ||
| _Al.deallocate(_Data._First, static_cast<_Al_size_type>(_Data._Last - _Data._First)); | ||
| } else { | ||
| _STD _Destroy_range(_Data._First, _Data._Last); | ||
| } | ||
| } | ||
|
|
||
| _NODISCARD _Ty& operator[](const size_t _Pos) { | ||
| auto& _Data = _Mypair._Myval2; | ||
| #if _ITERATOR_DEBUG_LEVEL != 0 | ||
| _STL_VERIFY( | ||
| _Pos < static_cast<size_t>(_Data._Last - _Data._First), "_Rx_fixed_size_buffer subscript out of range"); | ||
| #endif | ||
| return _Data._First[_Pos]; | ||
| } | ||
|
|
||
| _NODISCARD const _Ty& operator[](const size_t _Pos) const { | ||
| auto& _Data = _Mypair._Myval2; | ||
| #if _ITERATOR_DEBUG_LEVEL != 0 | ||
| _STL_VERIFY( | ||
| _Pos < static_cast<size_t>(_Data._Last - _Data._First), "_Rx_fixed_size_buffer subscript out of range"); | ||
| #endif | ||
| return _Data._First[_Pos]; | ||
| } | ||
|
|
||
| private: | ||
| void _Use_external_buf(unsigned char* _Buf, size_t _Ty_size) { | ||
| auto _Ty_first = reinterpret_cast<_Ty*>(_Buf); | ||
| auto _Ty_last = _Ty_first + _Ty_size; | ||
| auto& _Data = _Mypair._Myval2; | ||
| _STD uninitialized_fill(_Ty_first, _Ty_last, _Ty{}); | ||
| _Data._Allocated = false; | ||
| _Data._First = _Ty_first; | ||
| _Data._Last = _Ty_last; | ||
| } | ||
|
|
||
| void _Allocate_buf(size_t _Ty_size) { | ||
| auto& _Al = _Mypair._Get_first(); | ||
|
|
||
| if (_Ty_size > size_t{PTRDIFF_MAX} || _Ty_size > allocator_traits<_Alloc>::max_size(_Al)) { | ||
| _Xregex_error(regex_constants::error_stack); | ||
| } | ||
|
|
||
| auto _Al_size = static_cast<_Al_size_type>(_Ty_size); | ||
| auto _Ty_first = _Al.allocate(_Al_size); | ||
| _Allocation_guard _Guard{_Al, _Ty_first, _Al_size}; | ||
| auto& _Data = _Mypair._Myval2; | ||
| _Data._Last = _STD _Uninitialized_fill_n(_Ty_first, _Ty_size, _Ty{}, _Al); | ||
| _Data._Allocated = true; | ||
| _Data._First = _Ty_first; | ||
| _Guard._Ptr = nullptr; | ||
| } | ||
|
|
||
| _Compressed_pair<_Alloc, _Val> _Mypair; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may be the time to use no unique address
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no_unique_address is C++20. I think
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if it's supported downlevel by MSVC, Clang, and EDG. ( I think messing with it here is too much risk for too little value. I'd rather stick with |
||
| }; | ||
|
|
||
| template <class _BidIt> | ||
| class _Bt_state_t { // holds the state needed for backtracking | ||
| public: | ||
|
|
@@ -1700,10 +1800,10 @@ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc> | |
| class _Matcher3 { // provides ways to match a regular expression to a text sequence | ||
| public: | ||
| _Matcher3(_It _Pfirst, _It _Plast, const _RxTraits& _Tr, _Root_node* _Re, unsigned int _Nx, | ||
| regex_constants::syntax_option_type _Sf, regex_constants::match_flag_type _Mf) | ||
| regex_constants::syntax_option_type _Sf, regex_constants::match_flag_type _Mf, unsigned char* _Stackbuf, | ||
| size_t _Stackbuf_size) | ||
| : _Begin(_Pfirst), _End(_Plast), _Sflags(_Sf), _Mflags(_Mf), _Ncap(_Nx), | ||
| _Longest((_Re->_Flags & _Fl_longest) && !(_Mf & regex_constants::match_any)), _Traits(_Tr) { | ||
| _Loop_vals.resize(_Re->_Loops); | ||
| _Adl_verify_range(_Pfirst, _Plast); | ||
| if (_Re->_Flags & _Fl_begin_needs_w) { | ||
| _Char_class_w = _Lookup_char_class(static_cast<_Elem>('W')); | ||
|
|
@@ -1727,6 +1827,8 @@ public: | |
| // before engaging the expensive NFA interpreter loop. | ||
| _Start = _Re->_Next->_Next; | ||
|
|
||
| (void) _Loop_vals._Initialize(_Stackbuf, _Stackbuf_size, _Re->_Loops); | ||
|
|
||
| // sanitize multiline mode setting | ||
| #if _REGEX_LEGACY_MULTILINE_MODE | ||
| _Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars | ||
|
|
@@ -1816,7 +1918,7 @@ public: | |
| private: | ||
| _Tgt_state_t<_It> _Tgt_state; | ||
| _Tgt_state_t<_It> _Res; | ||
| vector<_Loop_vals_v3_t<_Iter_diff_t<_It>>> _Loop_vals; | ||
| _Rx_fixed_size_buffer<_Loop_vals_v3_t<_Iter_diff_t<_It>>> _Loop_vals; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume we're not between releases and matcher doesn't need to be renamed again
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That`s what I took from STL's remarks on Discord that he would like to audit no_unique_address for the 14.51 release: The branch-off is imminent, but hasn't happened yet.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's correct. As long as we land this soon-ish, we're still covered by the |
||
| vector<_Rx_state_frame_t<_It>> _Frames; | ||
| size_t _Frames_count; | ||
| size_t _Frames_limit; | ||
|
|
@@ -2330,8 +2432,9 @@ bool _Regex_match1(_It _First, _It _Last, match_results<_BidIt, _Alloc>* _Matche | |
| return false; | ||
| } | ||
|
|
||
| _Matcher3<_BidIt, _Elem, _RxTraits, _It, void> _Mx( | ||
| _First, _Last, _Re._Get_traits(), _Re._Get(), _Re.mark_count() + 1, _Re.flags(), _Flgs); | ||
| alignas(_Loop_vals_v3_t<_Iter_diff_t<_It>>) unsigned char _Stackbuf[4096]; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this alignment?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Usually it's the same as a pointer, except if some user passes an iterator where |
||
| _Matcher3<_BidIt, _Elem, _RxTraits, _It, void> _Mx(_First, _Last, _Re._Get_traits(), _Re._Get(), | ||
| _Re.mark_count() + 1, _Re.flags(), _Flgs, _Stackbuf, sizeof(_Stackbuf)); | ||
| return _Mx._Match(_Matches, _Full); | ||
| } | ||
|
|
||
|
|
@@ -2407,8 +2510,9 @@ bool _Regex_search2(_It _First, _It _Last, match_results<_BidIt, _Alloc>* _Match | |
| ++_First; | ||
| } | ||
|
|
||
| _Matcher3<_BidIt, _Elem, _RxTraits, _It, void> _Mx( | ||
| _First, _Last, _Re._Get_traits(), _Re._Get(), _Re.mark_count() + 1, _Re.flags(), _Flgs); | ||
| alignas(_Loop_vals_v3_t<_Iter_diff_t<_It>>) unsigned char _Stackbuf[4096]; | ||
| _Matcher3<_BidIt, _Elem, _RxTraits, _It, void> _Mx(_First, _Last, _Re._Get_traits(), _Re._Get(), | ||
| _Re.mark_count() + 1, _Re.flags(), _Flgs, _Stackbuf, sizeof(_Stackbuf)); | ||
|
|
||
| if (_Mx._Match(_Matches, false)) { | ||
| _Found = true; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't
noexceptby default here in this case?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I just copied this from some guard in
<vector>.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then that's right probably.