From 3fdbbde6a72c80ac82642650eb07e9769dc6431d Mon Sep 17 00:00:00 2001 From: Balys Anikevicius Date: Wed, 26 Nov 2025 13:59:09 +0100 Subject: [PATCH 1/2] bugfix/gracefully-handle-large-thumbnails Summary: User report: mapillary_tools crashes when trying to upload the attached set of images. Also, there is no mention on which image is causing the crash, meaning the user has to hunt through potentially thousands of images to find the misbehaving one. Traceback (most recent call last): File "main.py", line 8, in File "mapillary_tools\commands\__main__.py", line 164, in main File "mapillary_tools\commands\process_and_upload.py", line 33, in run File "mapillary_tools\commands\upload.py", line 75, in run File "mapillary_tools\upload.py", line 117, in upload File "mapillary_tools\upload.py", line 108, in upload File "mapillary_tools\upload.py", line 631, in _continue_or_fail File "mapillary_tools\uploader.py", line 517, in upload_images File "mapillary_tools\uploader.py", line 543, in _upload_sequence File "concurrent\futures\_base.py", line 619, in result_iterator File "concurrent\futures\_base.py", line 317, in _result_or_cancel File "concurrent\futures\_base.py", line 449, in result File "concurrent\futures\_base.py", line 401, in __get_result File "concurrent\futures\thread.py", line 59, in run File "mapillary_tools\uploader.py", line 602, in upload File "mapillary_tools\uploader.py", line 643, in dump_image_bytes File "mapillary_tools\exif_write.py", line 201, in dump_image_bytes File "mapillary_tools\exif_write.py", line 186, in _safe_dump File "mapillary_tools\exif_write.py", line 153, in _safe_dump File "piexif\_dump.py", line 94, in dump ValueError: Given thumbnail is too large. max 64kB [PYI-9472:ERROR] Failed to execute script 'main' due to unhandled exception! "But I guess the actual problems are quite clear: (1) ValueError: Given thumbnail is too large. max 64kB, Mapillary needs to gracefully handle that situation and (2) on file errors, Mapillary needs to report the failing image name." Test Plan: Added unit tests, ran the existing ones as described in README. It looks like the tests get stuck on tests/unit/test_persistent_cache.py::test_multithread_shared_cache_comprehensive but I believe this is unrelated to the change. To reproduce test getting stuck run: ``` pytest -s -vv tests ``` and observe it getting stuck after 181 tests. --- mapillary_tools/exif_write.py | 12 +++++ mapillary_tools/uploader.py | 4 ++ pyproject.toml | 1 + tests/unit/test_exifedit.py | 90 +++++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+) diff --git a/mapillary_tools/exif_write.py b/mapillary_tools/exif_write.py index 25d6c419..b7732f1f 100644 --- a/mapillary_tools/exif_write.py +++ b/mapillary_tools/exif_write.py @@ -214,6 +214,18 @@ def _safe_dump(self) -> bytes: else: del self._ef[ifd][tag] # retry later + elif "thumbnail is too large" in message.lower(): + # Handle oversized thumbnails (max 64kB per EXIF spec) + if thumbnail_removed: + raise exc + LOG.debug( + "Thumbnail too large (max 64kB) -- removing thumbnail and 1st: %s", + exc, + ) + del self._ef["thumbnail"] + del self._ef["1st"] + thumbnail_removed = True + # retry later else: raise exc except Exception as exc: diff --git a/mapillary_tools/uploader.py b/mapillary_tools/uploader.py index 2107d610..bc3c8190 100644 --- a/mapillary_tools/uploader.py +++ b/mapillary_tools/uploader.py @@ -797,6 +797,10 @@ def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes: raise ExifError( f"Failed to dump EXIF bytes: {ex}", metadata.filename ) from ex + except ValueError as ex: + raise ExifError( + f"Failed to dump EXIF bytes: {ex}", metadata.filename + ) from ex # Thread-safe def _get_cached_file_handle(self, key: str) -> str | None: diff --git a/pyproject.toml b/pyproject.toml index 504f00f4..4888ac29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ include = ["mapillary_tools*"] [dependency-groups] dev = [ "mypy", + "Pillow", "pyinstaller", "pyre-check", "pytest", diff --git a/tests/unit/test_exifedit.py b/tests/unit/test_exifedit.py index e5aa8fbe..94032b3a 100644 --- a/tests/unit/test_exifedit.py +++ b/tests/unit/test_exifedit.py @@ -1,13 +1,16 @@ import datetime +import io import os import shutil import unittest from pathlib import Path +import piexif import py.path from mapillary_tools.exif_read import ExifRead from mapillary_tools.exif_write import ExifEdit +from PIL import Image this_file = Path(__file__) this_file_dir = this_file.parent @@ -260,6 +263,93 @@ def test_add_repeatedly_time_original_corrupt_exif(self): def test_add_repeatedly_time_original_corrupt_exif_2(self): add_repeatedly_time_original_general(self, CORRUPT_EXIF_FILE_2) + def test_large_thumbnail_handling(self): + """Test that images with thumbnails larger than 64kB are handled gracefully.""" + # Create a test image with a large thumbnail (>64kB) + test_image_path = data_dir.joinpath("tmp", "large_thumbnail.jpg") + + # Create a simple test image + img = Image.new("RGB", (100, 100), color="red") + img.save(test_image_path, "JPEG") + + # Create a large thumbnail (>64kB) by creating a high-quality large thumbnail + # Use a larger size and add noise to make it incompressible + large_thumbnail = Image.new("RGB", (2048, 2048)) + # Fill with random-like data to prevent compression + pixels = large_thumbnail.load() + for i in range(2048): + for j in range(2048): + pixels[i, j] = ( + (i * 7 + j * 13) % 256, + (i * 11 + j * 17) % 256, + (i * 19 + j * 23) % 256, + ) + + thumbnail_bytes = io.BytesIO() + large_thumbnail.save(thumbnail_bytes, "JPEG", quality=100) + thumbnail_data = thumbnail_bytes.getvalue() + + # Verify thumbnail is larger than 64kB + self.assertGreater( + len(thumbnail_data), + 64 * 1024, + f"Test thumbnail should be larger than 64kB but got {len(thumbnail_data)} bytes", + ) + + # Load the image and add GPS data first + exif_edit = ExifEdit(test_image_path) + test_latitude = 50.5475894785 + test_longitude = 15.595866685 + exif_edit.add_lat_lon(test_latitude, test_longitude) + + # Manually insert the large thumbnail into the internal EXIF structure + # This simulates what would happen if an image came in with a large thumbnail + exif_edit._ef["thumbnail"] = thumbnail_data + exif_edit._ef["1st"] = { + piexif.ImageIFD.Compression: 6, + piexif.ImageIFD.XResolution: (72, 1), + piexif.ImageIFD.YResolution: (72, 1), + piexif.ImageIFD.ResolutionUnit: 2, + piexif.ImageIFD.JPEGInterchangeFormat: 0, + piexif.ImageIFD.JPEGInterchangeFormatLength: len(thumbnail_data), + } + + # Without the fix, this would raise: ValueError: Given thumbnail is too large. max 64kB + # With the fix, it should remove the thumbnail and succeed + image_bytes = exif_edit.dump_image_bytes() + + # Verify the output is valid + self.assertIsNotNone(image_bytes) + self.assertGreater(len(image_bytes), 0) + + # Verify the resulting image is a valid JPEG + result_image = Image.open(io.BytesIO(image_bytes)) + self.assertEqual(result_image.format, "JPEG") + self.assertEqual(result_image.size, (100, 100)) + + # Verify we can read the GPS data from the result + output_exif = piexif.load(image_bytes) + self.assertIn("GPS", output_exif) + self.assertIn(piexif.GPSIFD.GPSLatitude, output_exif["GPS"]) + self.assertIn(piexif.GPSIFD.GPSLongitude, output_exif["GPS"]) + + # CRITICAL: Verify the large thumbnail was actually removed + # The fix should have deleted both "thumbnail" and "1st" to handle the error + # piexif.load() may include "thumbnail": None after removal + thumbnail_value = output_exif.get("thumbnail") + self.assertTrue( + thumbnail_value is None or thumbnail_value == b"", + f"Large thumbnail should have been removed but got: {thumbnail_value[:100] if thumbnail_value else None}", + ) + # If 1st exists, it should be empty or not contain thumbnail reference data + if "1st" in output_exif: + # If 1st exists, it should be empty or minimal + self.assertNotIn( + piexif.ImageIFD.JPEGInterchangeFormat, + output_exif.get("1st", {}), + "Thumbnail metadata should have been removed", + ) + def test_exif_write(tmpdir: py.path.local): image_dir = tmpdir.mkdir("images") From 6b2a36d5631bb19c955a3ecf6bfda73c45d89cc9 Mon Sep 17 00:00:00 2001 From: Balys Anikevicius Date: Thu, 27 Nov 2025 14:26:44 +0100 Subject: [PATCH 2/2] Review comments and formatting --- mapillary_tools/authenticate.py | 1 - mapillary_tools/blackvue_parser.py | 1 - mapillary_tools/camm/camm_builder.py | 1 - mapillary_tools/commands/process_and_upload.py | 1 - .../commands/video_process_and_upload.py | 1 - mapillary_tools/http.py | 1 - mapillary_tools/serializer/gpx.py | 1 - mapillary_tools/utils.py | 1 - tests/cli/blackvue_parser.py | 1 - tests/cli/camm_parser.py | 1 - tests/cli/gpmf_parser.py | 1 - tests/cli/gps_filter.py | 1 - tests/cli/simple_mp4_builder.py | 1 - tests/cli/upload_api_v4.py | 1 - tests/integration/fixtures.py | 2 -- tests/integration/test_video_process.py | 1 - tests/unit/test_config.py | 1 - tests/unit/test_exifedit.py | 18 +++++++----------- tests/unit/test_exifread.py | 3 --- tests/unit/test_ffmpeg.py | 2 -- tests/unit/test_persistent_cache.py | 1 - tests/unit/test_sample_video.py | 1 - tests/unit/test_sequence_processing.py | 1 - tests/unit/test_store.py | 1 - tests/unit/test_upload_api_v4.py | 1 - tests/unit/test_uploader.py | 2 -- tests/unit/test_utils.py | 1 - 27 files changed, 7 insertions(+), 42 deletions(-) diff --git a/mapillary_tools/authenticate.py b/mapillary_tools/authenticate.py index f7ae4bfa..bdbfb602 100644 --- a/mapillary_tools/authenticate.py +++ b/mapillary_tools/authenticate.py @@ -7,7 +7,6 @@ import typing as T import jsonschema - import requests from . import api_v4, config, constants, exceptions, http diff --git a/mapillary_tools/blackvue_parser.py b/mapillary_tools/blackvue_parser.py index 56a696e3..0bafd860 100644 --- a/mapillary_tools/blackvue_parser.py +++ b/mapillary_tools/blackvue_parser.py @@ -1,7 +1,6 @@ from __future__ import annotations import dataclasses - import json import logging import re diff --git a/mapillary_tools/camm/camm_builder.py b/mapillary_tools/camm/camm_builder.py index 719d8428..af80900c 100644 --- a/mapillary_tools/camm/camm_builder.py +++ b/mapillary_tools/camm/camm_builder.py @@ -9,7 +9,6 @@ mp4_sample_parser as sample_parser, simple_mp4_builder as builder, ) - from . import camm_parser diff --git a/mapillary_tools/commands/process_and_upload.py b/mapillary_tools/commands/process_and_upload.py index 268986b8..3db511cd 100644 --- a/mapillary_tools/commands/process_and_upload.py +++ b/mapillary_tools/commands/process_and_upload.py @@ -1,7 +1,6 @@ import inspect from ..authenticate import fetch_user_items - from .process import Command as ProcessCommand from .upload import Command as UploadCommand diff --git a/mapillary_tools/commands/video_process_and_upload.py b/mapillary_tools/commands/video_process_and_upload.py index d1407a7d..dfe34e0d 100644 --- a/mapillary_tools/commands/video_process_and_upload.py +++ b/mapillary_tools/commands/video_process_and_upload.py @@ -1,7 +1,6 @@ import inspect from ..authenticate import fetch_user_items - from .upload import Command as UploadCommand from .video_process import Command as VideoProcessCommand diff --git a/mapillary_tools/http.py b/mapillary_tools/http.py index 2a384eb1..147cd2de 100644 --- a/mapillary_tools/http.py +++ b/mapillary_tools/http.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging - import ssl import sys import typing as T diff --git a/mapillary_tools/serializer/gpx.py b/mapillary_tools/serializer/gpx.py index 2dbcaa84..0c6fedd1 100644 --- a/mapillary_tools/serializer/gpx.py +++ b/mapillary_tools/serializer/gpx.py @@ -12,7 +12,6 @@ import gpxpy.gpx from .. import geo, types - from ..telemetry import CAMMGPSPoint, GPSPoint from ..types import ( BaseSerializer, diff --git a/mapillary_tools/utils.py b/mapillary_tools/utils.py index 5db0839f..6dae2b97 100644 --- a/mapillary_tools/utils.py +++ b/mapillary_tools/utils.py @@ -1,7 +1,6 @@ from __future__ import annotations import concurrent.futures - import hashlib import logging import os diff --git a/tests/cli/blackvue_parser.py b/tests/cli/blackvue_parser.py index 1bb01a54..257d14f6 100644 --- a/tests/cli/blackvue_parser.py +++ b/tests/cli/blackvue_parser.py @@ -7,7 +7,6 @@ import gpxpy import gpxpy.gpx - from mapillary_tools import blackvue_parser, geo, utils diff --git a/tests/cli/camm_parser.py b/tests/cli/camm_parser.py index d44700f5..627972e3 100644 --- a/tests/cli/camm_parser.py +++ b/tests/cli/camm_parser.py @@ -7,7 +7,6 @@ import gpxpy import gpxpy.gpx - from mapillary_tools import geo, utils from mapillary_tools.camm import camm_parser diff --git a/tests/cli/gpmf_parser.py b/tests/cli/gpmf_parser.py index 2cadeb36..d9e61a10 100644 --- a/tests/cli/gpmf_parser.py +++ b/tests/cli/gpmf_parser.py @@ -11,7 +11,6 @@ import gpxpy import gpxpy.gpx import mapillary_tools.geo as geo - import mapillary_tools.gpmf.gpmf_parser as gpmf_parser import mapillary_tools.gpmf.gps_filter as gps_filter import mapillary_tools.telemetry as telemetry diff --git a/tests/cli/gps_filter.py b/tests/cli/gps_filter.py index 47c132e4..0b98fe45 100644 --- a/tests/cli/gps_filter.py +++ b/tests/cli/gps_filter.py @@ -5,7 +5,6 @@ import typing as T import gpxpy - from mapillary_tools import constants, geo, telemetry from mapillary_tools.gpmf import gps_filter diff --git a/tests/cli/simple_mp4_builder.py b/tests/cli/simple_mp4_builder.py index e239f9e4..72f3f7b0 100644 --- a/tests/cli/simple_mp4_builder.py +++ b/tests/cli/simple_mp4_builder.py @@ -2,7 +2,6 @@ from pathlib import Path from mapillary_tools.camm import camm_builder - from mapillary_tools.geotag import geotag_videos_from_video from mapillary_tools.mp4 import simple_mp4_builder as builder diff --git a/tests/cli/upload_api_v4.py b/tests/cli/upload_api_v4.py index e10b5a68..3b9e53ad 100644 --- a/tests/cli/upload_api_v4.py +++ b/tests/cli/upload_api_v4.py @@ -7,7 +7,6 @@ import requests import tqdm from mapillary_tools import api_v4, authenticate, http - from mapillary_tools.upload_api_v4 import FakeUploadService, UploadService diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index d1158602..88ee05b5 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -12,10 +12,8 @@ import exifread import jsonschema - import py.path import pytest - from mapillary_tools import upload_api_v4, utils EXECUTABLE = os.getenv( diff --git a/tests/integration/test_video_process.py b/tests/integration/test_video_process.py index f6c5437f..34f59762 100644 --- a/tests/integration/test_video_process.py +++ b/tests/integration/test_video_process.py @@ -4,7 +4,6 @@ from pathlib import Path import exifread - import py.path import pytest diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 67af45ca..c23fbec1 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,7 +1,6 @@ import typing as T import py.path - from mapillary_tools import config diff --git a/tests/unit/test_exifedit.py b/tests/unit/test_exifedit.py index 94032b3a..7f7386da 100644 --- a/tests/unit/test_exifedit.py +++ b/tests/unit/test_exifedit.py @@ -7,7 +7,6 @@ import piexif import py.path - from mapillary_tools.exif_read import ExifRead from mapillary_tools.exif_write import ExifEdit from PIL import Image @@ -314,8 +313,7 @@ def test_large_thumbnail_handling(self): piexif.ImageIFD.JPEGInterchangeFormatLength: len(thumbnail_data), } - # Without the fix, this would raise: ValueError: Given thumbnail is too large. max 64kB - # With the fix, it should remove the thumbnail and succeed + # Given thumbnail is too large, max 64kB, thumbnail and 1st metadata should be removed. image_bytes = exif_edit.dump_image_bytes() # Verify the output is valid @@ -341,14 +339,12 @@ def test_large_thumbnail_handling(self): thumbnail_value is None or thumbnail_value == b"", f"Large thumbnail should have been removed but got: {thumbnail_value[:100] if thumbnail_value else None}", ) - # If 1st exists, it should be empty or not contain thumbnail reference data - if "1st" in output_exif: - # If 1st exists, it should be empty or minimal - self.assertNotIn( - piexif.ImageIFD.JPEGInterchangeFormat, - output_exif.get("1st", {}), - "Thumbnail metadata should have been removed", - ) + + first_value = output_exif.get("1st") + self.assertTrue( + first_value is None or first_value == {} or len(first_value) == 0, + f"1st metadata should have been removed but got: {first_value}", + ) def test_exif_write(tmpdir: py.path.local): diff --git a/tests/unit/test_exifread.py b/tests/unit/test_exifread.py index cf7e989d..a6d82d24 100644 --- a/tests/unit/test_exifread.py +++ b/tests/unit/test_exifread.py @@ -1,14 +1,11 @@ import datetime import os - import typing as T from pathlib import Path import py.path - import pytest from mapillary_tools import geo - from mapillary_tools.exif_read import ( _parse_coord, ExifRead, diff --git a/tests/unit/test_ffmpeg.py b/tests/unit/test_ffmpeg.py index 6c0b016e..44a25536 100644 --- a/tests/unit/test_ffmpeg.py +++ b/tests/unit/test_ffmpeg.py @@ -3,9 +3,7 @@ from pathlib import Path import py.path - import pytest - from mapillary_tools import ffmpeg from ..integration.fixtures import pytest_skip_if_not_ffmpeg_installed, setup_data diff --git a/tests/unit/test_persistent_cache.py b/tests/unit/test_persistent_cache.py index 8faaf76d..9875afa0 100644 --- a/tests/unit/test_persistent_cache.py +++ b/tests/unit/test_persistent_cache.py @@ -6,7 +6,6 @@ import traceback import pytest - from mapillary_tools.history import PersistentCache diff --git a/tests/unit/test_sample_video.py b/tests/unit/test_sample_video.py index c575c3ec..cc8118e9 100644 --- a/tests/unit/test_sample_video.py +++ b/tests/unit/test_sample_video.py @@ -9,7 +9,6 @@ import py.path import pytest - from mapillary_tools import exif_read, ffmpeg, sample_video from mapillary_tools.serializer import description diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index 0034fb90..df1a17fa 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -6,7 +6,6 @@ import py.path import pytest - from mapillary_tools import ( exceptions, geo, diff --git a/tests/unit/test_store.py b/tests/unit/test_store.py index a4a68643..5f0a95e2 100644 --- a/tests/unit/test_store.py +++ b/tests/unit/test_store.py @@ -3,7 +3,6 @@ import tempfile import pytest - from mapillary_tools.store import KeyValueStore diff --git a/tests/unit/test_upload_api_v4.py b/tests/unit/test_upload_api_v4.py index a1a71448..d282fb82 100644 --- a/tests/unit/test_upload_api_v4.py +++ b/tests/unit/test_upload_api_v4.py @@ -2,7 +2,6 @@ from pathlib import Path import py - from mapillary_tools import upload_api_v4 diff --git a/tests/unit/test_uploader.py b/tests/unit/test_uploader.py index fa2f3a72..fb4b2af3 100644 --- a/tests/unit/test_uploader.py +++ b/tests/unit/test_uploader.py @@ -5,7 +5,6 @@ import py.path import pytest - from mapillary_tools import api_v4, uploader from mapillary_tools.serializer import description @@ -619,7 +618,6 @@ def test_image_sequence_uploader_multiple_sequences( test_exif.copy(test_exif2) test_exif3 = setup_unittest_data.join("test_exif_3.jpg") test_exif.copy(test_exif3) - fixed_exif = setup_unittest_data.join("fixed_exif.jpg") # Create metadata for multiple sequences image_metadatas = [ diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 0f735e40..3b750319 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -2,7 +2,6 @@ from pathlib import Path import py.path - from mapillary_tools import utils