@@ -220,6 +220,59 @@ TEST_P(ArrayRecordWriterTest, RandomDatasetTest) {
220220 ASSERT_TRUE (reader.Close ());
221221}
222222
223+ TEST_P (ArrayRecordWriterTest, CompressionRatioTest) {
224+ float expected_ratio = 1.0 ;
225+ auto options = GetOptions ();
226+ if (options.pad_to_block_boundary ()) {
227+ GTEST_SKIP ()
228+ << " Padded boundaries are known to have bad compression ratio." ;
229+ }
230+ // We use uniform int distribution for values that are within a byte.
231+ // Therefore regular compression algorithm should easily compress it
232+ // for at least 2x and near 4x. However, the snappy compression doesn't
233+ // work that well so we set a higher threshold.
234+ switch (options.compression_type ()) {
235+ case riegeli::CompressionType::kNone :
236+ GTEST_SKIP () << " No need to verify compression ratio for uncompressed." ;
237+ case riegeli::CompressionType::kBrotli :
238+ expected_ratio = 0.4 ;
239+ break ;
240+ case riegeli::CompressionType::kZstd :
241+ expected_ratio = 0.4 ;
242+ break ;
243+ case riegeli::CompressionType::kSnappy :
244+ expected_ratio = 0.6 ;
245+ break ;
246+ }
247+ options.set_group_size (128 );
248+ std::mt19937 bitgen;
249+ std::uniform_int_distribution<uint32_t > dist (0 , 128 );
250+ constexpr uint32_t num_records = 32768 ;
251+ constexpr uint32_t dim = 256 ;
252+ std::vector<uint32_t > records (num_records * dim);
253+
254+ for (auto i : Seq (num_records * dim)) {
255+ records[i] = dist (bitgen);
256+ }
257+ std::string encoded;
258+
259+ ARThreadPool* pool = nullptr ;
260+ if (std::get<3 >(GetParam ())) {
261+ pool = ArrayRecordGlobalPool ();
262+ }
263+
264+ auto writer = ArrayRecordWriter (
265+ riegeli::Maker<riegeli::StringWriter>(&encoded), options, pool);
266+
267+ for (auto i : Seq (num_records)) {
268+ EXPECT_TRUE (
269+ writer.WriteRecord (records.data () + dim * i, dim * sizeof (uint32_t )));
270+ }
271+ ASSERT_TRUE (writer.Close ());
272+ EXPECT_LE (encoded.size (),
273+ num_records * dim * sizeof (uint32_t ) * expected_ratio);
274+ }
275+
223276INSTANTIATE_TEST_SUITE_P (
224277 ParamTest, ArrayRecordWriterTest,
225278 testing::Combine (testing::Values(CompressionType::kUncompressed ,
@@ -253,7 +306,7 @@ TEST(ArrayRecordWriterOptionsTest, ParsingTest) {
253306 EXPECT_FALSE (option.pad_to_block_boundary ());
254307
255308 EXPECT_EQ (option.ToString (),
256- " group_size:65536 ,"
309+ " group_size:1 ,"
257310 " transpose:false,"
258311 " pad_to_block_boundary:false,"
259312 " zstd:3,"
@@ -274,7 +327,7 @@ TEST(ArrayRecordWriterOptionsTest, ParsingTest) {
274327 EXPECT_FALSE (option.pad_to_block_boundary ());
275328
276329 EXPECT_EQ (option.ToString (),
277- " group_size:65536 ,"
330+ " group_size:1 ,"
278331 " transpose:false,"
279332 " pad_to_block_boundary:false,"
280333 " zstd:3,"
@@ -362,7 +415,7 @@ TEST(ArrayRecordWriterOptionsTest, ParsingTest) {
362415 EXPECT_TRUE (option.pad_to_block_boundary ());
363416
364417 EXPECT_EQ (option.ToString (),
365- " group_size:65536 ,"
418+ " group_size:1 ,"
366419 " transpose:false,"
367420 " pad_to_block_boundary:true,"
368421 " uncompressed" );
@@ -382,7 +435,7 @@ TEST(ArrayRecordWriterOptionsTest, ParsingTest) {
382435 EXPECT_TRUE (option.pad_to_block_boundary ());
383436
384437 EXPECT_EQ (option.ToString (),
385- " group_size:65536 ,"
438+ " group_size:1 ,"
386439 " transpose:false,"
387440 " pad_to_block_boundary:true,"
388441 " snappy" );
0 commit comments