Skip to content

Commit 4200050

Browse files
committed
Optimize XML and Desearialization
1 parent 9ada20b commit 4200050

2 files changed

Lines changed: 53 additions & 42 deletions

File tree

src/s3cpp/s3.hpp

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ struct ListBucketResult {
4747
std::string StartAfter;
4848
};
4949

50-
5150
class S3Client {
5251
public:
5352
// TODO(cristian): We should accept and define the endpoint url here
@@ -66,35 +65,42 @@ class S3Client {
6665
HttpRequest req = Client.get(std::format("http://127.0.0.1:9000/{}?prefix={}&max-keys={}", bucket, prefix, maxKeys)).header("Host", "127.0.0.1");
6766
Signer.sign(req);
6867
HttpResponse res = req.execute();
69-
ListBucketResult response;
70-
response = deserializeListBucketResult(Parser.parse(res.body()));
71-
return response;
68+
ListBucketResult response = deserializeListBucketResult(Parser.parse(res.body()), maxKeys);
69+
return response;
7270
}
7371

74-
ListBucketResult deserializeListBucketResult(const std::vector<XMLNode>& nodes) {
72+
ListBucketResult deserializeListBucketResult(const std::vector<XMLNode>& nodes, const int maxKeys) {
7573
// TODO(cristian): Detect and parse errors
7674
ListBucketResult response;
77-
response.Contents.push_back(Contents_{});
75+
response.Contents.reserve(maxKeys);
76+
response.CommonPrefixes.reserve(maxKeys);
77+
78+
response.Contents.push_back(Contents_ {});
7879
response.CommonPrefixes.push_back(CommonPrefix {});
80+
7981
int contentsIdx = 0;
8082
int commonPrefixesIdx = 0;
8183

8284
// To keep track when we need to append an element
83-
std::unordered_set<std::string> contentsKeySet;
84-
std::unordered_set<std::string> commonPrefixKeySet;
85+
std::vector<std::string_view> seenContents;
86+
std::vector<std::string_view> seenCommonPrefix;
8587

8688
for (const auto& node : nodes) {
8789
/* Sigh... no reflection */
8890

8991
// Check if we've seen this tag before in the current object
90-
if (contentsKeySet.contains(node.tag)) {
91-
response.Contents.push_back(Contents_{});
92-
contentsKeySet.clear();
93-
contentsIdx++;
94-
} else if (commonPrefixKeySet.contains(node.tag)) {
95-
response.CommonPrefixes.push_back(CommonPrefix {});
96-
commonPrefixKeySet.clear();
97-
commonPrefixesIdx++;
92+
if (node.tag.contains("ListBucketResult.Contents")) {
93+
if (std::find(seenContents.begin(), seenContents.end(), node.tag) != seenContents.end()) {
94+
response.Contents.push_back(Contents_ {});
95+
seenContents.clear();
96+
contentsIdx++;
97+
}
98+
} else if (node.tag.contains("ListBucketResult.CommonPrefix")) {
99+
if (std::find(seenCommonPrefix.begin(), seenCommonPrefix.end(), node.tag) != seenCommonPrefix.end()) {
100+
response.CommonPrefixes.push_back(CommonPrefix {});
101+
seenCommonPrefix.clear();
102+
commonPrefixesIdx++;
103+
}
98104
}
99105

100106
if (node.tag == "ListBucketResult.IsTruncated") {
@@ -141,9 +147,9 @@ class S3Client {
141147

142148
// Add already seen fields
143149
if (node.tag.contains("ListBucketResult.Contents")) {
144-
contentsKeySet.insert(node.tag);
150+
seenContents.push_back(node.tag);
145151
} else if (node.tag.contains("ListBucketResult.CommonPrefix")) {
146-
commonPrefixKeySet.insert(node.tag);
152+
seenCommonPrefix.push_back(node.tag);
147153
}
148154
}
149155
return response;

src/s3cpp/xml.hpp

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@ class XMLParser {
2626
auto state = States::Start;
2727

2828
// Setup buffers we will use
29-
std::string currentTag = "";
30-
std::string currentTagClose = "";
31-
std::string currentBody = "";
32-
std::string currentPath = "";
33-
std::string currentEntity = "";
29+
std::string currentTag;
30+
std::string currentTagClose;
31+
std::string currentBody;
32+
std::string currentPath;
33+
std::string currentEntity;
3434
auto tagStack = std::stack<std::string> {};
35+
int tagCloseIdx = 0;
3536

3637
for (char ch : sv) {
3738
auto prevState = state;
@@ -47,11 +48,11 @@ class XMLParser {
4748
state = States::Start;
4849
else {
4950
state = States::TagName;
50-
currentTag.push_back(ch);
51+
currentTag += ch;
5152
if (currentPath.size() >= 2 && currentPath[currentPath.size() - 2] != '.') {
52-
currentPath.push_back('.');
53+
currentPath += '.';
5354
}
54-
currentPath.push_back(ch);
55+
currentPath += ch;
5556
}
5657
break;
5758
}
@@ -61,18 +62,18 @@ class XMLParser {
6162
else if (ch == '>') {
6263
state = States::Body;
6364
tagStack.push(currentTag);
64-
currentTag = "";
65+
currentTag.clear();
6566
} else {
66-
currentTag.push_back(ch);
67-
currentPath.push_back(ch);
67+
currentTag += ch;
68+
currentPath += ch;
6869
}
6970
break;
7071
}
7172
case States::TagAttr: {
7273
if (ch == '>') {
7374
state = States::Body;
7475
tagStack.push(currentTag);
75-
currentTag = "";
76+
currentTag.clear();
7677
}
7778
break;
7879
}
@@ -82,41 +83,44 @@ class XMLParser {
8283
} else if (ch == '&') {
8384
state = States::Entity;
8485
} else {
85-
currentBody.push_back(ch);
86+
currentBody += ch;
8687
}
8788
break;
8889
}
8990
case States::Entity: {
9091
if (ch == ';') {
9192
// Decode entity and append it to currentBody
9293
state = States::Body;
93-
currentBody.push_back(decodeXMLEntity(currentEntity));
94-
currentEntity = "";
94+
currentBody += decodeXMLEntity(currentEntity);
95+
currentEntity.clear();
9596
} else {
96-
currentEntity.push_back(ch);
97+
currentEntity += ch;
9798
}
9899
break;
99100
}
100101
case States::Tag: {
101102
if (ch == '/') {
102103
state = States::TagClose;
103-
if (currentTagClose.size() == 0)
104+
if (tagCloseIdx == 0)
104105
currentTagClose = tagStack.top();
105106
} else {
106-
currentTag.push_back(ch);
107-
currentPath.push_back('.');
108-
currentPath.push_back(ch);
107+
currentTag += ch;
108+
currentPath += '.';
109+
currentPath += ch;
109110
state = States::Processing;
110111
}
111112
break;
112113
}
113114
case States::TagClose: {
114-
if (ch != currentTagClose[0]) {
115+
if (ch != currentTagClose[tagCloseIdx]) {
115116
throw std::runtime_error(std::format("Invalid closing tag encountered: {} for char {}", currentTagClose, ch));
116117
} else {
117-
currentTagClose.erase(0, 1);
118-
if (currentTagClose.size() == 0)
118+
// currentTagClose.erase(0, 1);
119+
tagCloseIdx++;
120+
if (tagCloseIdx == currentTagClose.size()) {
119121
state = States::Emit;
122+
tagCloseIdx = 0;
123+
}
120124
}
121125
break;
122126
}
@@ -140,7 +144,8 @@ class XMLParser {
140144
if (auto pos = currentPath.find_last_of('.'); pos != std::string::npos) {
141145
currentPath.erase(pos, std::string::npos);
142146
}
143-
currentBody = "";
147+
currentBody.clear();
148+
currentTagClose.clear();
144149
break;
145150
}
146151
default:

0 commit comments

Comments
 (0)