Skip to content

[ChatGPT]这个错误源于 shapely 库在尝试处理矩形时,遇到非法的几何形状(LinearRing 没有闭合)。可能是某些矩形数据存在问题,导致无法合并或解析。 #54

@LibraScorpion

Description

@LibraScorpion

环境:colab
补充下处理的目标文件
https://drive.google.com/file/d/1uQSPYKJu-J-_NerHZE-C524pR_IgT-_K/view?usp=sharing

GEOSException Traceback (most recent call last)
in <cell line: 2>()
1 # @title 🚀 Define vars & Kickoff
----> 2 result = parse_pdf(
3 pdf_path=pdf_file_path,
4 output_dir="./output",
5 api_key="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", # @param {type:"string"} - String Field

9 frames
in parse_pdf(pdf_path, output_dir, prompt, api_key, base_url, model, verbose, gpt_worker, **args)
260 os.makedirs(output_dir)
261
--> 262 image_infos = _parse_pdf_to_images(pdf_path, output_dir=output_dir)
263 content = _gpt_parse_images(
264 image_infos=image_infos,

in _parse_pdf_to_images(pdf_path, output_dir)
145 logging.info(f'parse page: {page_index}')
146 rect_images = []
--> 147 rects = _parse_rects(page)
148 for index, rect in enumerate(rects):
149 fitz_rect = fitz.Rect(rect)

in _parse_rects(page)
115 rect_list += image_rects
116
--> 117 merged_rects = _merge_rects(rect_list, distance=10, horizontal_distance=100)
118 merged_rects = [rect for rect in merged_rects if explain_validity(rect) == 'Valid Geometry']
119

in _merge_rects(rect_list, distance, horizontal_distance)
66 if _is_near(rect, other_rect, distance) or (
67 horizontal_distance and _is_horizontal_near(rect, other_rect, horizontal_distance)):
---> 68 rect = _union_rects(rect, other_rect)
69 rect_list.remove(other_rect)
70 merged = True

in _union_rects(rect1, rect2)
49 Union two rectangles.
50 """
---> 51 return sg.box(*(rect1.union(rect2).bounds))
52
53

/usr/local/lib/python3.10/dist-packages/shapely/geometry/geo.py in box(minx, miny, maxx, maxy, ccw)
51 if not ccw:
52 coords = coords[::-1]
---> 53 return Polygon(coords)
54
55

/usr/local/lib/python3.10/dist-packages/shapely/geometry/polygon.py in new(self, shell, holes)
228 return shell
229 else:
--> 230 shell = LinearRing(shell)
231
232 if holes is not None:

/usr/local/lib/python3.10/dist-packages/shapely/geometry/polygon.py in new(self, coordinates)
102 return shapely.from_wkt("LINEARRING EMPTY")
103
--> 104 geom = shapely.linearrings(coordinates)
105 if not isinstance(geom, LinearRing):
106 raise ValueError("Invalid values passed to LinearRing constructor")

/usr/local/lib/python3.10/dist-packages/shapely/decorators.py in wrapped(*args, **kwargs)
75 for arr in array_args:
76 arr.flags.writeable = False
---> 77 return func(*args, **kwargs)
78 finally:
79 for arr, old_flag in zip(array_args, old_flags):

/usr/local/lib/python3.10/dist-packages/shapely/creation.py in linearrings(coords, y, z, indices, out, **kwargs)
169 coords = _xyz_to_coords(coords, y, z)
170 if indices is None:
--> 171 return lib.linearrings(coords, out=out, **kwargs)
172 else:
173 return simple_geometries_1d(coords, indices, GeometryType.LINEARRING, out=out)

GEOSException: IllegalArgumentException: Points of LinearRing do not form a closed linestring

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions