From 297b6a850618448ae03f207d900861284ad86b5c Mon Sep 17 00:00:00 2001 From: Hongyi Guo Date: Fri, 24 Dec 2021 17:29:30 +0800 Subject: [PATCH] return pyarrow.Table instead of return the pandas-compatible NumPy array via to_pandas() the to_pandas() is pretty expensive, if the returned data size is huge, e.g 8GB, it might cause core crush. return the pyarrow.Table gives end user more fliexibility --- dremio_client/flight/__init__.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/dremio_client/flight/__init__.py b/dremio_client/flight/__init__.py index 4d7a39b..b93b0e0 100644 --- a/dremio_client/flight/__init__.py +++ b/dremio_client/flight/__init__.py @@ -146,11 +146,9 @@ def query( batches.append(batch) except StopIteration: break - data = pa.Table.from_batches(batches) - if pandas: - return data.to_pandas() - else: - return data + table = pa.Table.from_batches(batches) + + return table except ImportError: