diff --git a/piexif/_load.py b/piexif/_load.py index 6b0e28b..c56a050 100644 --- a/piexif/_load.py +++ b/piexif/_load.py @@ -1,4 +1,4 @@ -import sys +import collections from struct import unpack_from from ._common import * @@ -9,6 +9,9 @@ LITTLE_ENDIAN = b"\x49\x49" +Tag = collections.namedtuple('Tag', 'tag type value offset') + + def load(input_data, key_is_name=False): """ py:function:: piexif.load(filename) @@ -19,69 +22,135 @@ def load(input_data, key_is_name=False): :return: Exif data({"0th":dict, "Exif":dict, "GPS":dict, "Interop":dict, "1st":dict, "thumbnail":bytes}) :rtype: dict """ - exif_dict = {"0th":{}, - "Exif":{}, - "GPS":{}, - "Interop":{}, - "1st":{}, - "thumbnail":None} - exifReader = _ExifReader.from_image(input_data) - if exifReader is None: - return exif_dict - - exif_dict["0th"], first_ifd_pointer = exifReader.get_ifd_dict(exifReader.root_pointer, "0th") - CHILD_IFDS = [ - ("Exif", "0th", ImageIFD.ExifTag), - ("GPS", "0th", ImageIFD.GPSTag), - ("Interop", "Exif", ExifIFD.InteroperabilityTag), - ] - for name, parent, tag in CHILD_IFDS: - if tag in exif_dict[parent]: - pointer = exif_dict[parent][tag] - if isinstance(pointer, tuple): - # To catch cases, where there are zero or multiple values - continue - exif_dict[name] = exifReader.get_ifd_dict(pointer, name)[0] - if first_ifd_pointer: - exif_dict["1st"] = exifReader.get_ifd_dict(first_ifd_pointer, "1st")[0] - if (ImageIFD.JPEGInterchangeFormat in exif_dict["1st"] and - ImageIFD.JPEGInterchangeFormatLength in exif_dict["1st"]): - end = (exif_dict["1st"][ImageIFD.JPEGInterchangeFormat] + - exif_dict["1st"][ImageIFD.JPEGInterchangeFormatLength]) - thumb = exifReader.tiftag[exif_dict["1st"][ImageIFD.JPEGInterchangeFormat]:end] - exif_dict["thumbnail"] = thumb - - if key_is_name: - exif_dict = _get_key_name_dict(exif_dict) - return exif_dict - - -class _ExifReader(object): - @classmethod - def from_image(cls, data, treat_as_path=True): - if data[0:2] == b"\xff\xd8": # JPEG - segments = split_into_segments(data) - app1 = get_exif_seg(segments) - if app1: - tiff_data = app1[10:] - else: - tiff_data = None - elif data[0:2] in (b"\x49\x49", b"\x4d\x4d"): # TIFF - tiff_data = data - elif data[0:4] == b"RIFF" and data[8:12] == b"WEBP": - tiff_data = _webp.get_exif(data) - elif data[0:4] == b"Exif": # Exif - tiff_data = data[6:] + loader = ExifLoader(key_is_name=key_is_name) + tiff_data = extract_tiff_data(input_data) + if tiff_data: + loader.load(tiff_data) + return loader.exif + + +def extract_tiff_data(data, treat_as_path=True): + if data[0:2] == b"\xff\xd8": # JPEG + segments = split_into_segments(data) + app1 = get_exif_seg(segments) + if app1: + tiff_data = app1[10:] else: - if treat_as_path: - with open(data, 'rb') as f: - return cls.from_image(f.read(), False) - else: - raise InvalidImageDataError("Given image is neither JPEG, WEBP nor TIFF.") - if not tiff_data: - return None - return cls(tiff_data) - + tiff_data = None + elif data[0:2] in (b"\x49\x49", b"\x4d\x4d"): # TIFF + tiff_data = data + elif data[0:4] == b"RIFF" and data[8:12] == b"WEBP": + tiff_data = _webp.get_exif(data) + elif data[0:4] == b"Exif": # Exif + tiff_data = data[6:] + else: + if treat_as_path: + with open(data, 'rb') as f: + return extract_tiff_data(f.read(), False) + else: + raise InvalidImageDataError("Given image is neither JPEG, WEBP nor TIFF.") + if not tiff_data: + return None + return tiff_data + + + +class ExifLoader(object): + def __init__(self, read_unknown=False, key_is_name=False): + self._read_unknown = read_unknown + self._key_is_name = key_is_name + self._reader = None + self._tiff_data = None + self.exif = self._blank_exif() + + def _blank_exif(self): + return { + "0th": {}, + "Exif": {}, + "GPS": {}, + "Interop": {}, + "1st": {}, + "thumbnail": None + } + + def _get_ifd(self, pointer, ifd_name): + if not pointer or isinstance(pointer, tuple): + # Skip null pointers, and catch cases, where pointer has zero or + # multiple values + return {}, None + result = {} + if ifd_name in ["0th", "1st"]: + ifd_name = "Image" + ifd, next = self._reader.get_ifd(pointer) + for tag in ifd: + known_tag = tag.tag in TAGS[ifd_name] + if not (self._read_unknown or known_tag): + continue + values = tag.value + if known_tag: + expected_value_type = TAGS[ifd_name][tag.tag]['type'] + if tag.type != expected_value_type: + try: + values = coerce(values, tag.type, expected_value_type) + except ValueError: + # Skip if coercion failed + continue + if len(values) == 1: + values = values[0] + result[tag.tag] = values + return result, next + + def _load_ifd(self, name, pointer): + ifd, next = self._get_ifd(pointer, name) + self.exif[name] = ifd + return next + + def _get(self, ifd_name, tag_name): + return self.exif[ifd_name].get(tag_name, None) + + def _apply_tag_names(self): + result = {} + for name, tag_category in [ + ("0th", "Image"), + ("1st", "Image"), + ("Exif", "Exif"), + ("GPS", "GPS"), + ]: + result[name] = {} + for tag, value in self.exif[name].items(): + if tag not in TAGS[tag_category]: + continue + tag_name = TAGS[tag_category][tag]["name"] + result[name][tag_name] = value + result["thumbnail"] = self.exif["thumbnail"] + self.exif = result + + def load(self, tiff_data): + self.exif = self._blank_exif() + self._reader = TiffReader(tiff_data) + self._tiff_data = tiff_data + + first_ifd_pointer = self._load_ifd("0th", self._reader.root_pointer) + self._load_ifd("1st", first_ifd_pointer) + CHILD_IFDS = [ + ("Exif", "0th", ImageIFD.ExifTag), + ("GPS", "0th", ImageIFD.GPSTag), + ("Interop", "Exif", ExifIFD.InteroperabilityTag), + ] + for name, parent, tag in CHILD_IFDS: + pointer = self._get(parent, tag) + self._load_ifd(name, pointer) + + thumb_off = self._get("1st", ImageIFD.JPEGInterchangeFormat) + thumb_len = self._get("1st", ImageIFD.JPEGInterchangeFormatLength) + if isinstance(thumb_off, int) and isinstance(thumb_len, int): + thumb = self._tiff_data[thumb_off:thumb_off + thumb_len] + self.exif["thumbnail"] = thumb + + if self._key_is_name: + self._apply_tag_names() + +class TiffReader(object): def __init__(self, data): self.tiftag = data if len(data) < 8: @@ -126,43 +195,22 @@ def _read_tag(self, pointer): # Collate rationals if len(format) > 1: values = zip(*[iter(values)] * len(format)) - return tag, value_type, tuple(values) + return Tag(tag, value_type, tuple(values), pointer) - def get_ifd_dict(self, pointer, ifd_name, read_unknown=False): - ifd_dict = {} + def get_ifd(self, pointer): if pointer > len(self.tiftag) - 2: - return {}, None + return [], None + result = [] tag_count, = self._unpack_from("H", pointer) offset = pointer + 2 tag_count = min(tag_count, (len(self.tiftag) - offset) // 12) - if ifd_name in ["0th", "1st"]: - t = "Image" - else: - t = ifd_name for x in range(tag_count): pointer = offset + 12 * x - read_result = self._read_tag(pointer) - if not read_result: + tag = self._read_tag(pointer) + if not tag: # Skip broken tags continue - tag, value_type, values = read_result - if tag in TAGS[t]: - expected_value_type = TAGS[t][tag]['type'] - if value_type != expected_value_type: - try: - values = coerce(values, value_type, expected_value_type) - except ValueError: - # Skip if coercion failed - continue - if len(values) == 1: - values = values[0] - ifd_dict[tag] = values - elif read_unknown: - value_num, = self._unpack_from("L", pointer + 4) - pointer_or_value = self.tiftag[pointer + 8: pointer + 12] - ifd_dict[tag] = value_type, value_num, pointer_or_value, self.tiftag - else: - pass + result.append(tag) pointer = offset + 12 * tag_count if pointer + 4 < len(self.tiftag): @@ -172,19 +220,9 @@ def get_ifd_dict(self, pointer, ifd_name, read_unknown=False): else: next = None - return ifd_dict, next + return result, next -def _get_key_name_dict(exif_dict): - new_dict = { - "0th":{TAGS["Image"][n]["name"]:value for n, value in exif_dict["0th"].items()}, - "Exif":{TAGS["Exif"][n]["name"]:value for n, value in exif_dict["Exif"].items()}, - "1st":{TAGS["Image"][n]["name"]:value for n, value in exif_dict["1st"].items()}, - "GPS":{TAGS["GPS"][n]["name"]:value for n, value in exif_dict["GPS"].items()}, - "Interop":{TAGS["Interop"][n]["name"]:value for n, value in exif_dict["Interop"].items()}, - "thumbnail":exif_dict["thumbnail"], - } - return new_dict def coerce(value, type, target): if target == TYPES.Undefined: diff --git a/tests/s_test.py b/tests/s_test.py index c5467eb..cfc2000 100644 --- a/tests/s_test.py +++ b/tests/s_test.py @@ -642,25 +642,35 @@ class UTests(unittest.TestCase): def test_ExifReader_return_unknown(self): b1 = b"MM\x00\x2a\x00\x00\x00\x08" b2 = b"\x00\x01" + b"\xff\xff\x00\x01\x00\x00\x00\x01" + b"\x00\x00\x00\x00" - er = piexif._load._ExifReader(b1 + b2) - ifd = er.get_ifd_dict(8, "0th", True)[0] - self.assertEqual(ifd[65535][0], 1) - self.assertEqual(ifd[65535][1], 1) - self.assertEqual(ifd[65535][2], b"\x00\x00\x00\x00") + er = piexif._load.ExifLoader(read_unknown=True) + er.load(b1 + b2) + self.assertEqual(er.exif['0th'][65535], 0) def test_truncated_ifd(self): b1 = b"MM\x00\x2a\x00\x00\x00\x08" b2 = b"\xff\xff" + b"\x00\x0b\x00\x02\x00\x00\x00\x04" + b"FOO\x00" - er = piexif._load._ExifReader(b1 + b2) - ifd = er.get_ifd_dict(8, "0th", True)[0] - self.assertEqual(ifd[ImageIFD.ProcessingSoftware], b"FOO") + er = piexif._load.TiffReader(b1 + b2) + ifd = {t.tag: t.value for t in er.get_ifd(8)[0]} + self.assertEqual(ifd[ImageIFD.ProcessingSoftware], (b"FOO",)) def test_ascii_zero(self): b1 = b"MM\x00\x2a\x00\x00\x00\x08" b2 = b"\x00\x01" + b"\x00\x0b\x00\x02\x00\x00\x00\x04" + b"F\x00OO" - er = piexif._load._ExifReader(b1 + b2) - ifd = er.get_ifd_dict(8, "0th", True)[0] - self.assertEqual(ifd[ImageIFD.ProcessingSoftware], b"F") + er = piexif._load.TiffReader(b1 + b2) + ifd = {t.tag: t.value for t in er.get_ifd(8)[0]} + self.assertEqual(ifd[ImageIFD.ProcessingSoftware], (b"F",)) + + def test_bad_thumb(self): + b1 = b"MM\x00\x2a\x00\x00\x00\x08" + zero = b"\x00\x01\x87\x69\x00\x04\x00\x00\x00\x01\x00\x00\x00\x1a\x00\x00\x00\x1a" + first = ( + b"\x00\x02" + + b"\x02\x01\x00\x01\x00\x00\x00\x03\x01\x02\x03\x04" + + b"\x02\x02\x00\x01\x00\x00\x00\x03\x01\x02\x03\x04" + ) + result = piexif._load.load(b1 + zero + first) + # should not crash + assert result def test_no_first_ifd(self): input = b'Exif\x00\x00II*\x00\x08\x00\x00\x00\00'