Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 111 additions & 7 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1643,6 +1643,106 @@ public:
_Builder2& operator=(const _Builder2&) = delete;
};

template <class _Ty, class _Alloc = allocator<_Ty>>
class _Rx_fixed_size_buffer {
using _Al_size_type = _Alloc_size_t<_Alloc>;

struct _Val {
_Ty* _First;
_Ty* _Last;
bool _Allocated;
};

struct _NODISCARD _Allocation_guard {
_Alloc& _Al;
_Ty* _Ptr;
_Al_size_type _Size;

_Allocation_guard& operator=(const _Allocation_guard&) = delete;
_Allocation_guard& operator=(_Allocation_guard&&) = delete;

~_Allocation_guard() noexcept {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't noexcept by default here in this case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I just copied this from some guard in <vector>.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then that's right probably.

if (_Ptr) {
_Al.deallocate(_Ptr, _Size);
}
}
};

public:
_Rx_fixed_size_buffer(const _Alloc& _Al = _Alloc{}) : _Mypair(_One_then_variadic_args_t{}, _Al) {}
_Rx_fixed_size_buffer(const _Rx_fixed_size_buffer&) = delete;
_Rx_fixed_size_buffer(_Rx_fixed_size_buffer&&) = delete;

_NODISCARD size_t _Initialize(unsigned char* _Stack_buf, size_t _Stack_size, size_t _Size) {
if (_Stack_size / sizeof(_Ty) >= _Size) {
_Use_external_buf(_Stack_buf, _Size);
return _Size * sizeof(_Ty);
} else {
_Allocate_buf(_Size);
return 0U;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The suffix is misleading. Just 0.

(There's a pointer-size-dependent suffix I think, but we can't use that here due to C++14 at least)

}
}

~_Rx_fixed_size_buffer() {
auto& _Data = _Mypair._Myval2;
if (_Data._Allocated) {
auto& _Al = _Mypair._Get_first();
_STD _Destroy_range(_Data._First, _Data._Last, _Al);
_Al.deallocate(_Data._First, static_cast<_Al_size_type>(_Data._Last - _Data._First));
} else {
_STD _Destroy_range(_Data._First, _Data._Last);
}
}

_NODISCARD _Ty& operator[](const size_t _Pos) {
auto& _Data = _Mypair._Myval2;
#if _ITERATOR_DEBUG_LEVEL != 0
_STL_VERIFY(
_Pos < static_cast<size_t>(_Data._Last - _Data._First), "_Rx_fixed_size_buffer subscript out of range");
#endif
return _Data._First[_Pos];
}

_NODISCARD const _Ty& operator[](const size_t _Pos) const {
auto& _Data = _Mypair._Myval2;
#if _ITERATOR_DEBUG_LEVEL != 0
_STL_VERIFY(
_Pos < static_cast<size_t>(_Data._Last - _Data._First), "_Rx_fixed_size_buffer subscript out of range");
#endif
return _Data._First[_Pos];
}

private:
void _Use_external_buf(unsigned char* _Buf, size_t _Ty_size) {
auto _Ty_first = reinterpret_cast<_Ty*>(_Buf);
auto _Ty_last = _Ty_first + _Ty_size;
auto& _Data = _Mypair._Myval2;
_STD uninitialized_fill(_Ty_first, _Ty_last, _Ty{});
_Data._Allocated = false;
_Data._First = _Ty_first;
_Data._Last = _Ty_last;
}

void _Allocate_buf(size_t _Ty_size) {
auto& _Al = _Mypair._Get_first();

if (_Ty_size > size_t{PTRDIFF_MAX} || _Ty_size > allocator_traits<_Alloc>::max_size(_Al)) {
_Xregex_error(regex_constants::error_stack);
}

auto _Al_size = static_cast<_Al_size_type>(_Ty_size);
auto _Ty_first = _Al.allocate(_Al_size);
_Allocation_guard _Guard{_Al, _Ty_first, _Al_size};
auto& _Data = _Mypair._Myval2;
_Data._Last = _STD _Uninitialized_fill_n(_Ty_first, _Ty_size, _Ty{}, _Al);
_Data._Allocated = true;
_Data._First = _Ty_first;
_Guard._Ptr = nullptr;
}

_Compressed_pair<_Alloc, _Val> _Mypair;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be the time to use no unique address

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no_unique_address is C++20. I think [[msvc::no_unique_address]] is supported in older C++ dialects as well as an extension, but then we need a decision that we will make use of this extension.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if it's supported downlevel by MSVC, Clang, and EDG. (_MSVC_NO_UNIQUE_ADDRESS is currently not set up to activate downlevel.) There's also a concern with CUDA.

I think messing with it here is too much risk for too little value. I'd rather stick with _Compressed_pair.

};

template <class _BidIt>
class _Bt_state_t { // holds the state needed for backtracking
public:
Expand Down Expand Up @@ -1700,10 +1800,10 @@ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
class _Matcher3 { // provides ways to match a regular expression to a text sequence
public:
_Matcher3(_It _Pfirst, _It _Plast, const _RxTraits& _Tr, _Root_node* _Re, unsigned int _Nx,
regex_constants::syntax_option_type _Sf, regex_constants::match_flag_type _Mf)
regex_constants::syntax_option_type _Sf, regex_constants::match_flag_type _Mf, unsigned char* _Stackbuf,
size_t _Stackbuf_size)
: _Begin(_Pfirst), _End(_Plast), _Sflags(_Sf), _Mflags(_Mf), _Ncap(_Nx),
_Longest((_Re->_Flags & _Fl_longest) && !(_Mf & regex_constants::match_any)), _Traits(_Tr) {
_Loop_vals.resize(_Re->_Loops);
_Adl_verify_range(_Pfirst, _Plast);
if (_Re->_Flags & _Fl_begin_needs_w) {
_Char_class_w = _Lookup_char_class(static_cast<_Elem>('W'));
Expand All @@ -1727,6 +1827,8 @@ public:
// before engaging the expensive NFA interpreter loop.
_Start = _Re->_Next->_Next;

(void) _Loop_vals._Initialize(_Stackbuf, _Stackbuf_size, _Re->_Loops);

// sanitize multiline mode setting
#if _REGEX_LEGACY_MULTILINE_MODE
_Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars
Expand Down Expand Up @@ -1816,7 +1918,7 @@ public:
private:
_Tgt_state_t<_It> _Tgt_state;
_Tgt_state_t<_It> _Res;
vector<_Loop_vals_v3_t<_Iter_diff_t<_It>>> _Loop_vals;
_Rx_fixed_size_buffer<_Loop_vals_v3_t<_Iter_diff_t<_It>>> _Loop_vals;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume we're not between releases and matcher doesn't need to be renamed again

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That`s what I took from STL's remarks on Discord that he would like to audit no_unique_address for the 14.51 release: The branch-off is imminent, but hasn't happened yet.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct. As long as we land this soon-ish, we're still covered by the _Matcher3 rename.

vector<_Rx_state_frame_t<_It>> _Frames;
size_t _Frames_count;
size_t _Frames_limit;
Expand Down Expand Up @@ -2330,8 +2432,9 @@ bool _Regex_match1(_It _First, _It _Last, match_results<_BidIt, _Alloc>* _Matche
return false;
}

_Matcher3<_BidIt, _Elem, _RxTraits, _It, void> _Mx(
_First, _Last, _Re._Get_traits(), _Re._Get(), _Re.mark_count() + 1, _Re.flags(), _Flgs);
alignas(_Loop_vals_v3_t<_Iter_diff_t<_It>>) unsigned char _Stackbuf[4096];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this alignment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually it's the same as a pointer, except if some user passes an iterator where _Iter_diff_t<_It> requires some stricter alignment (although I don't see why anyone would want to do that).

_Matcher3<_BidIt, _Elem, _RxTraits, _It, void> _Mx(_First, _Last, _Re._Get_traits(), _Re._Get(),
_Re.mark_count() + 1, _Re.flags(), _Flgs, _Stackbuf, sizeof(_Stackbuf));
return _Mx._Match(_Matches, _Full);
}

Expand Down Expand Up @@ -2407,8 +2510,9 @@ bool _Regex_search2(_It _First, _It _Last, match_results<_BidIt, _Alloc>* _Match
++_First;
}

_Matcher3<_BidIt, _Elem, _RxTraits, _It, void> _Mx(
_First, _Last, _Re._Get_traits(), _Re._Get(), _Re.mark_count() + 1, _Re.flags(), _Flgs);
alignas(_Loop_vals_v3_t<_Iter_diff_t<_It>>) unsigned char _Stackbuf[4096];
_Matcher3<_BidIt, _Elem, _RxTraits, _It, void> _Mx(_First, _Last, _Re._Get_traits(), _Re._Get(),
_Re.mark_count() + 1, _Re.flags(), _Flgs, _Stackbuf, sizeof(_Stackbuf));

if (_Mx._Match(_Matches, false)) {
_Found = true;
Expand Down