Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,27 @@ Worksheet instance.
Tip: A ``sheets`` property containing the sheet names is available on
the Workbook instance.

The ``rows()`` method will hand out an iterator to read the worksheet
rows.

Cell Metadata and Data Validation
---------------------------------

Each cell now exposes metadata fields: validation, cell_type, format, and constraints.
This allows downstream libraries (like pandas) to assign datatypes based on Excel's intended validation/type.

.. code:: python

# You can use .rows(sparse=True) to skip empty rows
for row in sheet.rows():
print(row)
# [Cell(r=0, c=0, v='TEXT'), Cell(r=0, c=1, v=42.1337)]
for cell in row:
print(cell)
# Cell(r=0, c=0, v='TEXT', validation='Text', cell_type='str', format='General', constraints=None)

You can retrieve validation info for a specific cell:

.. code:: python

info = sheet.get_cell_validation(0, 0)
print(info)
# {'validation': 'Text', 'cell_type': 'str', 'format': 'General', 'constraints': None}

Do note that dates will appear as floats. You must use the
``convert_date(date)`` method from the ``pyxlsb`` module to turn them
Expand Down
31 changes: 26 additions & 5 deletions pyxlsb/worksheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
if sys.version_info > (3,):
xrange = range

Cell = namedtuple('Cell', ['r', 'c', 'v'])
Cell = namedtuple('Cell', ['r', 'c', 'v', 'validation', 'cell_type', 'format', 'constraints'])

class Worksheet(object):
def __init__(self, name, fp, rels_fp=None, stringtable=None, debug=False):
Expand All @@ -23,6 +23,7 @@ def __init__(self, name, fp, rels_fp=None, stringtable=None, debug=False):
self.cols = []
self.rels = {}
self.hyperlinks = {}
self._cell_validations = {} # {(row, col): {validation info}}
self._parse()

def __enter__(self):
Expand All @@ -39,6 +40,18 @@ def _parse(self):
for el in self._rels:
self.rels[el.attrib['Id']] = el.attrib['Target']

# Parse validation records (placeholder, actual parsing needed)
for item in self._reader:
# Example: if item[0] == biff12.DATAVALIDATION:
# for each cell in validation range:
# self._cell_validations[(row, col)] = {
# 'validation': item[1].type,
# 'cell_type': item[1].cell_type,
# 'format': item[1].format,
# 'constraints': item[1].constraints
# }
pass # TODO: Implement actual validation parsing

for item in self._reader:
if item[0] == biff12.DIMENSION:
self.dimension = item[1]
Expand All @@ -64,19 +77,27 @@ def rows(self, sparse=False):
if not sparse:
while row_num < item[1].r - 1:
row_num += 1
yield [Cell(row_num, i, None) for i in xrange(self.dimension.c + self.dimension.w)]
yield [Cell(row_num, i, None, None, None, None, None) for i in xrange(self.dimension.c + self.dimension.w)]
row_num = item[1].r
row = [Cell(row_num, i, None) for i in xrange(self.dimension.c + self.dimension.w)]
row = [Cell(row_num, i, None, None, None, None, None) for i in xrange(self.dimension.c + self.dimension.w)]
elif item[0] >= biff12.BLANK and item[0] <= biff12.FORMULA_BOOLERR:
if item[0] == biff12.STRING and self._stringtable is not None:
row[item[1].c] = Cell(row_num, item[1].c, self._stringtable[item[1].v])
meta = self._cell_validations.get((row_num, item[1].c), {})
row[item[1].c] = Cell(row_num, item[1].c, self._stringtable[item[1].v],
meta.get('validation'), meta.get('cell_type'), meta.get('format'), meta.get('constraints'))
else:
row[item[1].c] = Cell(row_num, item[1].c, item[1].v)
meta = self._cell_validations.get((row_num, item[1].c), {})
row[item[1].c] = Cell(row_num, item[1].c, item[1].v,
meta.get('validation'), meta.get('cell_type'), meta.get('format'), meta.get('constraints'))
elif item[0] == biff12.SHEETDATA_END:
if row is not None:
yield row
break

def get_cell_validation(self, row, col):
"""Return validation info for a given cell (row, col)."""
return self._cell_validations.get((row, col), None)

def close(self):
self._reader.close()
if self._rels_fp is not None:
Expand Down