Skip to content

Commit cc6314a

Browse files
committed
Check whether metadata is kept intact in all pyarrow interactions
1 parent e2d6cf0 commit cc6314a

File tree

1 file changed

+16
-12
lines changed

1 file changed

+16
-12
lines changed

arrow-pyarrow-integration-testing/tests/test_sql.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ def test_empty_recordbatch_with_row_count():
527527
"""
528528

529529
# Create an empty schema with no fields
530-
batch = pa.RecordBatch.from_pydict({"a": [1, 2, 3, 4]}).select([])
530+
batch = pa.RecordBatch.from_pydict({"a": [1, 2, 3, 4]}, metadata={b'key1': b'value1'}).select([])
531531
num_rows = 4
532532
assert batch.num_rows == num_rows
533533
assert batch.num_columns == 0
@@ -545,7 +545,7 @@ def test_record_batch_reader():
545545
"""
546546
Python -> Rust -> Python
547547
"""
548-
schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
548+
schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
549549
batches = [
550550
pa.record_batch([[[1], [2, 42]]], schema),
551551
pa.record_batch([[None, [], [5, 6]]], schema),
@@ -571,7 +571,7 @@ def test_record_batch_reader_pycapsule():
571571
"""
572572
Python -> Rust -> Python
573573
"""
574-
schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
574+
schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
575575
batches = [
576576
pa.record_batch([[[1], [2, 42]]], schema),
577577
pa.record_batch([[None, [], [5, 6]]], schema),
@@ -621,7 +621,7 @@ def test_record_batch_pycapsule():
621621
"""
622622
Python -> Rust -> Python
623623
"""
624-
schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
624+
schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
625625
batch = pa.record_batch([[[1], [2, 42]]], schema)
626626
wrapped = StreamWrapper(batch)
627627
b = rust.round_trip_record_batch_reader(wrapped)
@@ -640,7 +640,7 @@ def test_table_pycapsule():
640640
"""
641641
Python -> Rust -> Python
642642
"""
643-
schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
643+
schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
644644
batches = [
645645
pa.record_batch([[[1], [2, 42]]], schema),
646646
pa.record_batch([[None, [], [5, 6]]], schema),
@@ -650,55 +650,59 @@ def test_table_pycapsule():
650650
b = rust.round_trip_record_batch_reader(wrapped)
651651
new_table = b.read_all()
652652

653-
assert table.schema == new_table.schema
654653
assert table == new_table
654+
assert table.schema == new_table.schema
655+
assert table.schema.metadata == new_table.schema.metadata
655656
assert len(table.to_batches()) == len(new_table.to_batches())
656657

657658

658659
def test_table_empty():
659660
"""
660661
Python -> Rust -> Python
661662
"""
662-
schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
663+
schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
663664
table = pa.Table.from_batches([], schema=schema)
664665
new_table = rust.build_table([], schema=schema)
665666

666-
assert table.schema == new_table.schema
667667
assert table == new_table
668+
assert table.schema == new_table.schema
669+
assert table.schema.metadata == new_table.schema.metadata
668670
assert len(table.to_batches()) == len(new_table.to_batches())
669671

670672

671673
def test_table_roundtrip():
672674
"""
673675
Python -> Rust -> Python
674676
"""
675-
schema = pa.schema([('ints', pa.list_(pa.int32()))])
677+
schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
676678
batches = [
677679
pa.record_batch([[[1], [2, 42]]], schema),
678680
pa.record_batch([[None, [], [5, 6]]], schema),
679681
]
680682
table = pa.Table.from_batches(batches, schema=schema)
681683
new_table = rust.round_trip_table(table)
682684

683-
assert table.schema == new_table.schema
684685
assert table == new_table
686+
assert table.schema == new_table.schema
687+
assert table.schema.metadata == new_table.schema.metadata
685688
assert len(table.to_batches()) == len(new_table.to_batches())
686689

687690

688691
def test_table_from_batches():
689692
"""
690693
Python -> Rust -> Python
691694
"""
692-
schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
695+
schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
693696
batches = [
694697
pa.record_batch([[[1], [2, 42]]], schema),
695698
pa.record_batch([[None, [], [5, 6]]], schema),
696699
]
697700
table = pa.Table.from_batches(batches)
698701
new_table = rust.build_table(batches, schema)
699702

700-
assert table.schema == new_table.schema
701703
assert table == new_table
704+
assert table.schema == new_table.schema
705+
assert table.schema.metadata == new_table.schema.metadata
702706
assert len(table.to_batches()) == len(new_table.to_batches())
703707

704708

0 commit comments

Comments
 (0)