Skip to content

Commit 5b5e71e

Browse files
committed
Desearlization of ListBucketResult done. We now return a struct with proper data
1 parent 3b510bb commit 5b5e71e

3 files changed

Lines changed: 228 additions & 25 deletions

File tree

src/s3cpp/s3.hpp

Lines changed: 86 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
#include <charconv>
2+
#include <print>
13
#include <s3cpp/auth.h>
24
#include <s3cpp/xml.hpp>
5+
#include <stdexcept>
6+
#include <unordered_set>
37

48
class S3Client {
59
public:
6-
// TODO(cristian): We should accept and define the endpoint url here
10+
// TODO(cristian): We should accept and define the endpoint url here
711
S3Client(const std::string& access, const std::string& secret)
812
: Client(HttpClient())
913
, Signer(AWSSigV4Signer(access, secret))
@@ -13,24 +17,91 @@ class S3Client {
1317
, Signer(AWSSigV4Signer(access, secret, region))
1418
, Parser(XMLParser()) { }
1519

16-
void list_objects(const std::string& bucket) {
17-
return list_objects(bucket, "/");
18-
}
19-
20-
void list_objects(const std::string& bucket, const std::string& prefix) {
21-
// TODO(cristian): Decide what to do with the Host header
22-
23-
HttpRequest req = Client.get(std::format("http://127.0.0.1:9000/{}?prefix={}", bucket, prefix))
24-
.header("Host", "127.0.0.1");
20+
ListBucketResult list_objects(const std::string& bucket) { return list_objects(bucket, "/", 1000); }
21+
ListBucketResult list_objects(const std::string& bucket, const std::string& prefix) { return list_objects(bucket, prefix, 1000); }
22+
ListBucketResult list_objects(const std::string& bucket, const std::string& prefix, int maxKeys) {
23+
HttpRequest req = Client.get(std::format("http://127.0.0.1:9000/{}?prefix={}&max-keys={}", bucket, prefix, maxKeys)).header("Host", "127.0.0.1");
2524
Signer.sign(req);
25+
HttpResponse res = req.execute();
26+
return deserializeListBucketResult(Parser.parse(res.body()));
27+
}
28+
29+
ListBucketResult deserializeListBucketResult(const std::vector<XMLNode>& nodes) {
30+
// TODO(cristian): Detect and parse errors
31+
ListBucketResult response;
32+
response.Contents.push_back(Contents{});
33+
response.CommonPrefixes.push_back(CommonPrefix{});
34+
int contentsIdx = 0;
35+
int commonPrefixesIdx = 0;
36+
37+
// To keep track when we need to append an element
38+
std::unordered_set<std::string> contentsKeySet;
39+
std::unordered_set<std::string> commonPrefixKeySet;
2640

27-
HttpResponse res = req.execute();
41+
for (const auto& node : nodes) {
42+
/* Sigh... no reflection */
43+
44+
// Check if we've seen this tag before in the current object
45+
if (contentsKeySet.contains(node.tag)) {
46+
response.Contents.push_back(Contents{});
47+
contentsKeySet.clear();
48+
contentsIdx++;
49+
} else if (commonPrefixKeySet.contains(node.tag)) {
50+
response.CommonPrefixes.push_back(CommonPrefix{});
51+
commonPrefixKeySet.clear();
52+
commonPrefixesIdx++;
53+
}
2854

29-
auto xml_response = Parser.parse(res.body());
30-
for (const auto& xml_node : xml_response) {
31-
std::println("{}: {}", xml_node.tag, xml_node.value);
32-
}
55+
if (node.tag == "ListBucketResult.IsTruncated") {
56+
response.IsTruncated = Parser.parseBool(std::move(node.value));
57+
} else if (node.tag == "ListBucketResult.Marker") {
58+
response.Marker = std::move(node.value);
59+
} else if (node.tag == "ListBucketResult.NextMarker") {
60+
response.NextMarker = std::move(node.value);
61+
} else if (node.tag == "ListBucketResult.Name") {
62+
response.Name = std::move(node.value);
63+
} else if (node.tag == "ListBucketResult.Prefix") {
64+
response.Prefix = std::move(node.value);
65+
} else if (node.tag == "ListBucketResult.Delimiter") {
66+
response.Delimiter = std::move(node.value);
67+
} else if (node.tag == "ListBucketResult.MaxKeys") {
68+
response.MaxKeys = Parser.parseNumber<int>(std::move(node.value));
69+
} else if (node.tag == "ListBucketResult.EncodingType") {
70+
response.EncodingType = std::move(node.value);
71+
} else if (node.tag == "ListBucketResult.Contents.ChecksumAlgorithm") {
72+
response.Contents[contentsIdx].ChecksumAlgorithm = std::move(node.value);
73+
} else if (node.tag == "ListBucketResult.Contents.ChecksumType") {
74+
response.Contents[contentsIdx].ChecksumType = std::move(node.value);
75+
} else if (node.tag == "ListBucketResult.Contents.ETag") {
76+
response.Contents[contentsIdx].ETag = std::move(node.value);
77+
} else if (node.tag == "ListBucketResult.Contents.Key") {
78+
response.Contents[contentsIdx].Key = std::move(node.value);
79+
} else if (node.tag == "ListBucketResult.Contents.LastModified") {
80+
response.Contents[contentsIdx].LastModified = std::move(node.value);
81+
} else if (node.tag == "ListBucketResult.Contents.Owner.DisplayName") {
82+
response.Contents[contentsIdx].Owner.DisplayName = std::move(node.value);
83+
} else if (node.tag == "ListBucketResult.Contents.Owner.ID") {
84+
response.Contents[contentsIdx].Owner.ID = std::move(node.value);
85+
} else if (node.tag == "ListBucketResult.Contents.RestoreStatus.IsRestoreInProgress") {
86+
response.Contents[contentsIdx].RestoreStatus.IsRestoreInProgress = Parser.parseBool(node.value);
87+
} else if (node.tag == "ListBucketResult.Contents.RestoreStatus.RestoreExpiryDate") {
88+
response.Contents[contentsIdx].RestoreStatus.RestoreExpiryDate = std::move(node.value);
89+
} else if (node.tag == "ListBucketResult.Contents.Size") {
90+
response.Contents[contentsIdx].Size = Parser.parseNumber<long>(node.value);
91+
} else if (node.tag == "ListBucketResult.Contents.StorageClass") {
92+
response.Contents[contentsIdx].StorageClass = std::move(node.value);
93+
} else {
94+
throw std::runtime_error(std::format("No case for ListBucketResult response found for: {}", node.tag));
95+
}
3396

97+
// Add already seen fields
98+
if (node.tag.contains("ListBucketResult.Contents")) {
99+
contentsKeySet.insert(node.tag);
100+
} else if (node.tag.contains("ListBucketResult.CommonPrefix")) {
101+
commonPrefixKeySet.insert(node.tag);
102+
}
103+
}
104+
return response;
34105
}
35106

36107
private:

src/s3cpp/xml.hpp

Lines changed: 73 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,50 @@
66
#include <utility>
77
#include <vector>
88

9+
// ListBucketResult
10+
// https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_ResponseSyntax
11+
12+
struct Contents {
13+
std::string ChecksumAlgorithm;
14+
std::string ChecksumType;
15+
std::string ETag;
16+
std::string Key;
17+
std::string LastModified;
18+
struct Owner_ {
19+
std::string DisplayName;
20+
std::string ID;
21+
} Owner;
22+
struct RestoreStatus_ {
23+
bool IsRestoreInProgress;
24+
std::string RestoreExpiryDate;
25+
} RestoreStatus;
26+
int64_t Size;
27+
std::string StorageClass;
28+
};
29+
30+
struct CommonPrefix {
31+
std::string Prefix;
32+
};
33+
34+
struct ListBucketResult {
35+
bool IsTruncated;
36+
std::string Marker;
37+
std::string NextMarker;
38+
std::vector<Contents> Contents;
39+
std::string Name;
40+
std::string Prefix;
41+
std::string Delimiter;
42+
int MaxKeys;
43+
std::vector<CommonPrefix> CommonPrefixes;
44+
std::string EncodingType;
45+
int KeyCount;
46+
std::string ContinuationToken;
47+
std::string NextContinuationToken;
48+
std::string StartAfter;
49+
};
50+
51+
// We will use a regular Key Value struct to represent the raw XML nodes
52+
// TODO(cristian): Make private
953
struct XMLNode {
1054
const std::string tag;
1155
const std::string value;
@@ -14,7 +58,7 @@ struct XMLNode {
1458
class XMLParser {
1559
public:
1660
// Finite State Machine (FSM) for parsing S3 valid XML
17-
// See: TODO(cristian)
61+
// See the automata on #10 whiteboard: https://ggcr.github.io/whiteboards/
1862
std::vector<XMLNode> parse(const std::string& xml) {
1963
auto xmlElements = std::vector<XMLNode>();
2064
auto sv = std::string_view { xml };
@@ -163,26 +207,45 @@ class XMLParser {
163207
else if (entity == "amp")
164208
return '&';
165209

166-
// XML numerical values (i.e. ETags using quotes)
167-
int code = 0;
168-
int base;
210+
return parseNumber<char>(entity);
211+
212+
throw std::runtime_error(std::format("Unknown XML entity: &{};", entity));
213+
}
214+
215+
template <typename T>
216+
T parseNumber(const std::string s) {
217+
int code;
169218
std::from_chars_result result;
170-
if (entity.starts_with('#') && entity.size() > 1) {
171-
if (entity[1] == 'x' || entity[1] == 'X') {
219+
int base = 10;
220+
221+
// Parse XML numerical entities (i.e. '&#34;')
222+
if (s.starts_with('#') && s.size() > 1) {
223+
if (s[1] == 'x' || s[1] == 'X') {
172224
// Hex: #xhhhh
173225
base = 16;
174-
result = std::from_chars(entity.data() + 2, entity.data() + entity.size(), code, base);
226+
result = std::from_chars(s.data() + 2, s.data() + s.size(), code, base);
175227
} else {
176228
// Decimal: #hhhh
177229
base = 10;
178-
result = std::from_chars(entity.data() + 1, entity.data() + entity.size(), code, base);
230+
result = std::from_chars(s.data() + 1, s.data() + s.size(), code, base);
179231
}
232+
} else { // Regular case
233+
result = std::from_chars(s.data(), s.data() + s.size(), code, base);
180234
}
235+
181236
if (result.ec == std::errc {}) {
182-
return static_cast<char>(code);
237+
return static_cast<T>(code);
183238
}
239+
throw std::runtime_error(std::format("Unable to parse number from '{}'", s));
240+
}
184241

185-
throw std::runtime_error(std::format("Unknown XML entity: &{};", entity));
242+
bool parseBool(const std::string& s) {
243+
if (s == "True" || s == "true")
244+
return true;
245+
else if (s == "False" || s == "false")
246+
return false;
247+
else
248+
throw std::runtime_error(std::format("Unable to parse boolean from string: '{}'", s));
186249
}
187250

188251
private:

test/s3_test.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,72 @@ TEST(S3, ListObjectsDirPrefix) {
3939
throw;
4040
}
4141
}
42+
43+
TEST(S3, ListObjectsDirPrefixMaxKeys) {
44+
S3Client client("minio_access", "minio_secret");
45+
try {
46+
client.list_objects("my-bucket", "path/to/", 1);
47+
} catch (const std::exception& e) {
48+
const std::string emsg = e.what();
49+
if (emsg == "libcurl error: Could not connect to server" || emsg == "libcurl error: Couldn't connect to server") {
50+
GTEST_SKIP_("Skipping MinIOBasicRequest: Server not up");
51+
}
52+
throw;
53+
}
54+
}
55+
56+
TEST(S3, ListObjectsCheckFields) {
57+
S3Client client("minio_access", "minio_secret");
58+
try {
59+
ListBucketResult response = client.list_objects("my-bucket", "path/to/", 2);
60+
61+
// Check top-level fields
62+
EXPECT_EQ(response.Name, "my-bucket");
63+
EXPECT_EQ(response.Prefix, "path/to/");
64+
EXPECT_EQ(response.MaxKeys, 2);
65+
EXPECT_EQ(response.IsTruncated, true);
66+
EXPECT_FALSE(response.NextMarker.empty());
67+
68+
// Should have exactly 2 contents
69+
EXPECT_EQ(response.Contents.size(), 2);
70+
71+
// Check first object
72+
EXPECT_EQ(response.Contents[0].Key, "path/to/file_1.txt");
73+
EXPECT_EQ(response.Contents[0].LastModified, "2025-12-20T16:09:33.907Z");
74+
EXPECT_EQ(response.Contents[0].ETag, "\"12d56ae2b967f517787bd4ade69fc2b0\"");
75+
EXPECT_EQ(response.Contents[0].Size, 26);
76+
EXPECT_EQ(response.Contents[0].Owner.ID, "02d6176db174dc93cb1b899f7c6078f08654445fe8cf1b6ce98d8855f66bdbf4");
77+
EXPECT_EQ(response.Contents[0].Owner.DisplayName, "minio");
78+
EXPECT_EQ(response.Contents[0].StorageClass, "STANDARD");
79+
80+
// Check second object
81+
EXPECT_EQ(response.Contents[1].Key, "path/to/file_10.txt");
82+
EXPECT_EQ(response.Contents[1].LastModified, "2025-12-20T16:09:33.936Z");
83+
EXPECT_EQ(response.Contents[1].ETag, "\"c4bab200495494cf824918ae6f2d117f\"");
84+
EXPECT_EQ(response.Contents[1].Size, 27);
85+
EXPECT_EQ(response.Contents[1].Owner.ID, "02d6176db174dc93cb1b899f7c6078f08654445fe8cf1b6ce98d8855f66bdbf4");
86+
EXPECT_EQ(response.Contents[1].Owner.DisplayName, "minio");
87+
EXPECT_EQ(response.Contents[1].StorageClass, "STANDARD");
88+
} catch (const std::exception& e) {
89+
const std::string emsg = e.what();
90+
if (emsg == "libcurl error: Could not connect to server" || emsg == "libcurl error: Couldn't connect to server") {
91+
GTEST_SKIP_("Skipping MinIOBasicRequest: Server not up");
92+
}
93+
throw;
94+
}
95+
}
96+
97+
TEST(S3, ListObjectsCheckLenKeys) {
98+
S3Client client("minio_access", "minio_secret");
99+
try {
100+
// has 10K objects - limit is 1000 keys
101+
ListBucketResult response = client.list_objects("my-bucket", "path/to/");
102+
EXPECT_EQ(response.Contents.size(), 1000);
103+
} catch (const std::exception& e) {
104+
const std::string emsg = e.what();
105+
if (emsg == "libcurl error: Could not connect to server" || emsg == "libcurl error: Couldn't connect to server") {
106+
GTEST_SKIP_("Skipping MinIOBasicRequest: Server not up");
107+
}
108+
throw;
109+
}
110+
}

0 commit comments

Comments
 (0)