Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
# - app-tag.zip
# - app-tag-info.whl
- name: Archive package
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: dist
path: ./dist
Expand All @@ -79,7 +79,7 @@ jobs:

# download artifacts from job: build
- name: Download artifacts from build
uses: actions/download-artifact@v2
uses: actions/download-artifact@v4
with:
name: dist
path: dist
Expand Down Expand Up @@ -110,7 +110,7 @@ jobs:
uses: actions/checkout@v2

- name: Download artifacts from release
uses: actions/download-artifact@v2
uses: actions/download-artifact@v4
with:
name: dist
path: dist
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
runs-on: ubuntu-latest

container:
image: python:3.8
image: python:3.10

steps:
- name: Check out code
Expand All @@ -36,6 +36,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel
pip install -r requirements.txt
pip install pytest
python setup.py develop
Expand All @@ -53,7 +54,7 @@ jobs:

strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]

steps:
- name: Check out code
Expand All @@ -67,6 +68,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel
pip install -r requirements.txt
pip install pytest pytest-cov
python setup.py develop
Expand All @@ -82,7 +84,7 @@ jobs:

# upload docx for further job
- name: Archive package
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: outputs
name: outputs-py${{ matrix.python-version }}
path: ./test/outputs
15 changes: 13 additions & 2 deletions pdf2docx/table/Cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ def __init__(self, raw:dict=None):
self.border_width = raw.get('border_width', (0,0,0,0)) # type: tuple [float]
self.merged_cells = raw.get('merged_cells', (1,1)) # type: tuple [int]

def _block_text(self, block):
'''Get text from a block, always returning a str (for join).'''
if not hasattr(block, 'text'):
return '<NEST TABLE>'
t = block.text
if t is None:
return ''
if isinstance(t, list):
return '\n'.join(str(x) for x in t)
return str(t)

@property
def text(self):
Expand All @@ -28,8 +38,9 @@ def text(self):
# fixme: prev code did `if block.is_text_block`, but sometimes
# there is no `is_text_block` member; would be good to ensure
# this member is always present and avoid use of `hasattr()`.
return '\n'.join([block.text if hasattr(block, 'text') else '<NEST TABLE>'
for block in self.blocks])
return '\n'.join([self._block_text(block) for block in self.blocks])
# return '\n'.join([block.text if hasattr(block, 'text') else '<NEST TABLE>'
# for block in self.blocks])


@property
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
PyMuPDF>=1.19.0
PyMuPDF>=1.26.7
python-docx>=0.8.10
fonttools>=4.24.0
numpy>=1.17.2
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def load_requirements(fname):
include_package_data=True,
zip_safe=False,
install_requires=load_requirements("requirements.txt"),
python_requires=">=3.6",
python_requires=">=3.10",
entry_points={
"console_scripts": [
"pdf2docx=pdf2docx.main:main"
Expand Down
Binary file added test/samples/demo-table-empty-cell.pdf
Binary file not shown.
15 changes: 15 additions & 0 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,21 @@ def test_extracting_table(self):
['Description F', '1.00', '0.86', '0.37', '0.78', '0.01']
]
assert table==sample

# ------------------------------------------
# extract tables with empty/none cells
# ------------------------------------------
def test_extract_tables_empty_cell(self):
'''Test extracting tables from demo-table-empty-cell.pdf (no crash, returns list).'''
# Test that extract_tables() handles tables with empty cells correctly.
filename = 'demo-table-empty-cell'
pdf_file = os.path.join(sample_path, f'{filename}.pdf')
cv = Converter(pdf_file)
tables = cv.extract_tables()
cv.close()
assert isinstance(tables, list), 'extract_tables() should return a list'
# At least one table expected from this sample
assert len(tables) >= 1, f'expected at least 1 table, got {len(tables)}'


# ------------------------------------------
Expand Down