Skip to content

Commit 2aa588c

Browse files
jckingcopybara-github
authored andcommitted
Implement optimized string and bytes concatenation
PiperOrigin-RevId: 738849121
1 parent b1e0b7c commit 2aa588c

File tree

6 files changed

+88
-22
lines changed

6 files changed

+88
-22
lines changed

common/internal/byte_string.cc

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,51 @@ T ConsumeAndDestroy(T& object) {
5656

5757
} // namespace
5858

59+
ByteString ByteString::Concat(const ByteString& lhs, const ByteString& rhs,
60+
absl::Nonnull<google::protobuf::Arena*> arena) {
61+
ABSL_DCHECK(arena != nullptr);
62+
63+
if (lhs.empty()) {
64+
return rhs;
65+
}
66+
if (rhs.empty()) {
67+
return lhs;
68+
}
69+
70+
if (lhs.GetKind() == ByteStringKind::kLarge ||
71+
rhs.GetKind() == ByteStringKind::kLarge) {
72+
// If either the left or right are absl::Cord, use absl::Cord.
73+
absl::Cord result;
74+
result.Append(lhs.ToCord());
75+
result.Append(rhs.ToCord());
76+
return ByteString(std::move(result));
77+
}
78+
79+
const size_t lhs_size = lhs.size();
80+
const size_t rhs_size = rhs.size();
81+
const size_t result_size = lhs_size + rhs_size;
82+
ByteString result;
83+
if (result_size <= kSmallByteStringCapacity) {
84+
// If the resulting string fits in inline storage, do it.
85+
result.rep_.small.size = result_size;
86+
result.rep_.small.arena = arena;
87+
lhs.CopyToArray(result.rep_.small.data);
88+
rhs.CopyToArray(result.rep_.small.data + lhs_size);
89+
} else {
90+
// Otherwise allocate on the arena.
91+
char* result_data =
92+
reinterpret_cast<char*>(arena->AllocateAligned(result_size));
93+
lhs.CopyToArray(result_data);
94+
rhs.CopyToArray(result_data + lhs_size);
95+
result.rep_.medium.data = result_data;
96+
result.rep_.medium.size = result_size;
97+
result.rep_.medium.owner =
98+
reinterpret_cast<uintptr_t>(arena) | kMetadataOwnerArenaBit;
99+
result.rep_.medium.kind = ByteStringKind::kMedium;
100+
}
101+
return result;
102+
}
103+
59104
ByteString::ByteString(Allocator<> allocator, absl::string_view string) {
60105
ABSL_DCHECK_LE(string.size(), max_size());
61106
auto* arena = allocator.arena();
@@ -249,6 +294,25 @@ void ByteString::RemoveSuffix(size_t n) {
249294
}
250295
}
251296

297+
void ByteString::CopyToArray(absl::Nonnull<char*> out) const {
298+
ABSL_DCHECK(out != nullptr);
299+
300+
switch (GetKind()) {
301+
case ByteStringKind::kSmall: {
302+
absl::string_view small = GetSmall();
303+
std::memcpy(out, small.data(), small.size());
304+
} break;
305+
case ByteStringKind::kMedium: {
306+
absl::string_view medium = GetMedium();
307+
std::memcpy(out, medium.data(), medium.size());
308+
} break;
309+
case ByteStringKind::kLarge: {
310+
const absl::Cord& large = GetLarge();
311+
(CopyCordToArray)(large, out);
312+
} break;
313+
}
314+
}
315+
252316
std::string ByteString::ToString() const {
253317
switch (GetKind()) {
254318
case ByteStringKind::kSmall:

common/internal/byte_string.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ namespace cel {
4343

4444
class BytesValueInputStream;
4545
class BytesValueOutputStream;
46+
class StringValue;
4647

4748
namespace common_internal {
4849

@@ -171,6 +172,9 @@ absl::string_view LegacyByteString(const ByteString& string, bool stable,
171172
class CEL_COMMON_INTERNAL_BYTE_STRING_TRIVIAL_ABI [[nodiscard]]
172173
ByteString final {
173174
public:
175+
static ByteString Concat(const ByteString& lhs, const ByteString& rhs,
176+
absl::Nonnull<google::protobuf::Arena*> arena);
177+
174178
ByteString() : ByteString(NewDeleteAllocator()) {}
175179

176180
explicit ByteString(absl::Nullable<const char*> string)
@@ -333,6 +337,7 @@ ByteString final {
333337
friend struct ByteStringTestFriend;
334338
friend class cel::BytesValueInputStream;
335339
friend class cel::BytesValueOutputStream;
340+
friend class cel::StringValue;
336341
friend absl::string_view LegacyByteString(
337342
const ByteString& string, bool stable,
338343
absl::Nonnull<google::protobuf::Arena*> arena);
@@ -475,6 +480,8 @@ ByteString final {
475480

476481
static void DestroyLarge(LargeByteStringRep& rep) { GetLarge(rep).~Cord(); }
477482

483+
void CopyToArray(absl::Nonnull<char*> out) const;
484+
478485
ByteStringRep rep_;
479486
};
480487

common/values/bytes_value.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "absl/strings/cord.h"
2424
#include "absl/strings/str_cat.h"
2525
#include "absl/strings/string_view.h"
26+
#include "common/internal/byte_string.h"
2627
#include "common/value.h"
2728
#include "internal/status_macros.h"
2829
#include "internal/strings.h"
@@ -53,6 +54,12 @@ std::string BytesDebugString(const Bytes& value) {
5354

5455
} // namespace
5556

57+
BytesValue BytesValue::Concat(const BytesValue& lhs, const BytesValue& rhs,
58+
absl::Nonnull<google::protobuf::Arena*> arena) {
59+
return BytesValue(
60+
common_internal::ByteString::Concat(lhs.value_, rhs.value_, arena));
61+
}
62+
5663
std::string BytesValue::DebugString() const { return BytesDebugString(*this); }
5764

5865
absl::Status BytesValue::SerializeTo(

common/values/bytes_value.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ class BytesValue final : private common_internal::ValueMixin<BytesValue> {
8181
absl::Nullable<google::protobuf::Arena*> arena
8282
ABSL_ATTRIBUTE_LIFETIME_BOUND) = delete;
8383

84+
static BytesValue Concat(const BytesValue& lhs, const BytesValue& rhs,
85+
absl::Nonnull<google::protobuf::Arena*> arena
86+
ABSL_ATTRIBUTE_LIFETIME_BOUND);
87+
8488
ABSL_DEPRECATED("Use From")
8589
explicit BytesValue(absl::Nullable<const char*> value) : value_(value) {}
8690

common/values/string_value.cc

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
// limitations under the License.
1414

1515
#include <cstddef>
16+
#include <cstring>
1617
#include <string>
17-
#include <utility>
1818

1919
#include "google/protobuf/wrappers.pb.h"
2020
#include "absl/base/nullability.h"
@@ -25,6 +25,7 @@
2525
#include "absl/strings/match.h"
2626
#include "absl/strings/str_cat.h"
2727
#include "absl/strings/string_view.h"
28+
#include "common/internal/byte_string.h"
2829
#include "common/value.h"
2930
#include "internal/status_macros.h"
3031
#include "internal/strings.h"
@@ -58,19 +59,8 @@ std::string StringDebugString(const Bytes& value) {
5859

5960
StringValue StringValue::Concat(const StringValue& lhs, const StringValue& rhs,
6061
absl::Nonnull<google::protobuf::Arena*> arena) {
61-
ABSL_DCHECK(arena != nullptr);
62-
63-
if (lhs.IsEmpty()) {
64-
return rhs;
65-
}
66-
if (rhs.IsEmpty()) {
67-
return lhs;
68-
}
69-
70-
absl::Cord result;
71-
result.Append(lhs.ToCord());
72-
result.Append(rhs.ToCord());
73-
return StringValue(std::move(result));
62+
return StringValue(
63+
common_internal::ByteString::Concat(lhs.value_, rhs.value_, arena));
7464
}
7565

7666
std::string StringValue::DebugString() const {

runtime/standard/string_functions.cc

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,7 @@ absl::StatusOr<StringValue> ConcatString(
4040
absl::Nonnull<const google::protobuf::DescriptorPool*>,
4141
absl::Nonnull<google::protobuf::MessageFactory*>,
4242
absl::Nonnull<google::protobuf::Arena*> arena) {
43-
// TODO: use StringValue::Concat when remaining interop usages
44-
// removed. Modern concat implementation forces additional copies when
45-
// converting to legacy string values.
46-
return StringValue(arena, absl::StrCat(value1.ToString(), value2.ToString()));
43+
return StringValue::Concat(value1, value2, arena);
4744
}
4845

4946
// Concatenation for bytes type.
@@ -52,10 +49,7 @@ absl::StatusOr<BytesValue> ConcatBytes(
5249
absl::Nonnull<const google::protobuf::DescriptorPool*>,
5350
absl::Nonnull<google::protobuf::MessageFactory*>,
5451
absl::Nonnull<google::protobuf::Arena*> arena) {
55-
// TODO: use BytesValue::Concat when remaining interop usages
56-
// removed. Modern concat implementation forces additional copies when
57-
// converting to legacy string values.
58-
return BytesValue(arena, absl::StrCat(value1.ToString(), value2.ToString()));
52+
return BytesValue::Concat(value1, value2, arena);
5953
}
6054

6155
bool StringContains(const StringValue& value, const StringValue& substr) {

0 commit comments

Comments
 (0)