Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 140 additions & 102 deletions piexif/_load.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import sys
import collections
from struct import unpack_from

from ._common import *
Expand All @@ -9,6 +9,9 @@
LITTLE_ENDIAN = b"\x49\x49"


Tag = collections.namedtuple('Tag', 'tag type value offset')


def load(input_data, key_is_name=False):
"""
py:function:: piexif.load(filename)
Expand All @@ -19,69 +22,135 @@ def load(input_data, key_is_name=False):
:return: Exif data({"0th":dict, "Exif":dict, "GPS":dict, "Interop":dict, "1st":dict, "thumbnail":bytes})
:rtype: dict
"""
exif_dict = {"0th":{},
"Exif":{},
"GPS":{},
"Interop":{},
"1st":{},
"thumbnail":None}
exifReader = _ExifReader.from_image(input_data)
if exifReader is None:
return exif_dict

exif_dict["0th"], first_ifd_pointer = exifReader.get_ifd_dict(exifReader.root_pointer, "0th")
CHILD_IFDS = [
("Exif", "0th", ImageIFD.ExifTag),
("GPS", "0th", ImageIFD.GPSTag),
("Interop", "Exif", ExifIFD.InteroperabilityTag),
]
for name, parent, tag in CHILD_IFDS:
if tag in exif_dict[parent]:
pointer = exif_dict[parent][tag]
if isinstance(pointer, tuple):
# To catch cases, where there are zero or multiple values
continue
exif_dict[name] = exifReader.get_ifd_dict(pointer, name)[0]
if first_ifd_pointer:
exif_dict["1st"] = exifReader.get_ifd_dict(first_ifd_pointer, "1st")[0]
if (ImageIFD.JPEGInterchangeFormat in exif_dict["1st"] and
ImageIFD.JPEGInterchangeFormatLength in exif_dict["1st"]):
end = (exif_dict["1st"][ImageIFD.JPEGInterchangeFormat] +
exif_dict["1st"][ImageIFD.JPEGInterchangeFormatLength])
thumb = exifReader.tiftag[exif_dict["1st"][ImageIFD.JPEGInterchangeFormat]:end]
exif_dict["thumbnail"] = thumb

if key_is_name:
exif_dict = _get_key_name_dict(exif_dict)
return exif_dict


class _ExifReader(object):
@classmethod
def from_image(cls, data, treat_as_path=True):
if data[0:2] == b"\xff\xd8": # JPEG
segments = split_into_segments(data)
app1 = get_exif_seg(segments)
if app1:
tiff_data = app1[10:]
else:
tiff_data = None
elif data[0:2] in (b"\x49\x49", b"\x4d\x4d"): # TIFF
tiff_data = data
elif data[0:4] == b"RIFF" and data[8:12] == b"WEBP":
tiff_data = _webp.get_exif(data)
elif data[0:4] == b"Exif": # Exif
tiff_data = data[6:]
loader = ExifLoader(key_is_name=key_is_name)
tiff_data = extract_tiff_data(input_data)
if tiff_data:
loader.load(tiff_data)
return loader.exif


def extract_tiff_data(data, treat_as_path=True):
if data[0:2] == b"\xff\xd8": # JPEG
segments = split_into_segments(data)
app1 = get_exif_seg(segments)
if app1:
tiff_data = app1[10:]
else:
if treat_as_path:
with open(data, 'rb') as f:
return cls.from_image(f.read(), False)
else:
raise InvalidImageDataError("Given image is neither JPEG, WEBP nor TIFF.")
if not tiff_data:
return None
return cls(tiff_data)

tiff_data = None
elif data[0:2] in (b"\x49\x49", b"\x4d\x4d"): # TIFF
tiff_data = data
elif data[0:4] == b"RIFF" and data[8:12] == b"WEBP":
tiff_data = _webp.get_exif(data)
elif data[0:4] == b"Exif": # Exif
tiff_data = data[6:]
else:
if treat_as_path:
with open(data, 'rb') as f:
return extract_tiff_data(f.read(), False)
else:
raise InvalidImageDataError("Given image is neither JPEG, WEBP nor TIFF.")
if not tiff_data:
return None
return tiff_data



class ExifLoader(object):
def __init__(self, read_unknown=False, key_is_name=False):
self._read_unknown = read_unknown
self._key_is_name = key_is_name
self._reader = None
self._tiff_data = None
self.exif = self._blank_exif()

def _blank_exif(self):
return {
"0th": {},
"Exif": {},
"GPS": {},
"Interop": {},
"1st": {},
"thumbnail": None
}

def _get_ifd(self, pointer, ifd_name):
if not pointer or isinstance(pointer, tuple):
# Skip null pointers, and catch cases, where pointer has zero or
# multiple values
return {}, None
result = {}
if ifd_name in ["0th", "1st"]:
ifd_name = "Image"
ifd, next = self._reader.get_ifd(pointer)
for tag in ifd:
known_tag = tag.tag in TAGS[ifd_name]
if not (self._read_unknown or known_tag):
continue
values = tag.value
if known_tag:
expected_value_type = TAGS[ifd_name][tag.tag]['type']
if tag.type != expected_value_type:
try:
values = coerce(values, tag.type, expected_value_type)
except ValueError:
# Skip if coercion failed
continue
if len(values) == 1:
values = values[0]
result[tag.tag] = values
return result, next

def _load_ifd(self, name, pointer):
ifd, next = self._get_ifd(pointer, name)
self.exif[name] = ifd
return next

def _get(self, ifd_name, tag_name):
return self.exif[ifd_name].get(tag_name, None)

def _apply_tag_names(self):
result = {}
for name, tag_category in [
("0th", "Image"),
("1st", "Image"),
("Exif", "Exif"),
("GPS", "GPS"),
]:
result[name] = {}
for tag, value in self.exif[name].items():
if tag not in TAGS[tag_category]:
continue
tag_name = TAGS[tag_category][tag]["name"]
result[name][tag_name] = value
result["thumbnail"] = self.exif["thumbnail"]
self.exif = result

def load(self, tiff_data):
self.exif = self._blank_exif()
self._reader = TiffReader(tiff_data)
self._tiff_data = tiff_data

first_ifd_pointer = self._load_ifd("0th", self._reader.root_pointer)
self._load_ifd("1st", first_ifd_pointer)
CHILD_IFDS = [
("Exif", "0th", ImageIFD.ExifTag),
("GPS", "0th", ImageIFD.GPSTag),
("Interop", "Exif", ExifIFD.InteroperabilityTag),
]
for name, parent, tag in CHILD_IFDS:
pointer = self._get(parent, tag)
self._load_ifd(name, pointer)

thumb_off = self._get("1st", ImageIFD.JPEGInterchangeFormat)
thumb_len = self._get("1st", ImageIFD.JPEGInterchangeFormatLength)
if isinstance(thumb_off, int) and isinstance(thumb_len, int):
thumb = self._tiff_data[thumb_off:thumb_off + thumb_len]
self.exif["thumbnail"] = thumb

if self._key_is_name:
self._apply_tag_names()

class TiffReader(object):
def __init__(self, data):
self.tiftag = data
if len(data) < 8:
Expand Down Expand Up @@ -126,43 +195,22 @@ def _read_tag(self, pointer):
# Collate rationals
if len(format) > 1:
values = zip(*[iter(values)] * len(format))
return tag, value_type, tuple(values)
return Tag(tag, value_type, tuple(values), pointer)

def get_ifd_dict(self, pointer, ifd_name, read_unknown=False):
ifd_dict = {}
def get_ifd(self, pointer):
if pointer > len(self.tiftag) - 2:
return {}, None
return [], None
result = []
tag_count, = self._unpack_from("H", pointer)
offset = pointer + 2
tag_count = min(tag_count, (len(self.tiftag) - offset) // 12)
if ifd_name in ["0th", "1st"]:
t = "Image"
else:
t = ifd_name
for x in range(tag_count):
pointer = offset + 12 * x
read_result = self._read_tag(pointer)
if not read_result:
tag = self._read_tag(pointer)
if not tag:
# Skip broken tags
continue
tag, value_type, values = read_result
if tag in TAGS[t]:
expected_value_type = TAGS[t][tag]['type']
if value_type != expected_value_type:
try:
values = coerce(values, value_type, expected_value_type)
except ValueError:
# Skip if coercion failed
continue
if len(values) == 1:
values = values[0]
ifd_dict[tag] = values
elif read_unknown:
value_num, = self._unpack_from("L", pointer + 4)
pointer_or_value = self.tiftag[pointer + 8: pointer + 12]
ifd_dict[tag] = value_type, value_num, pointer_or_value, self.tiftag
else:
pass
result.append(tag)

pointer = offset + 12 * tag_count
if pointer + 4 < len(self.tiftag):
Expand All @@ -172,19 +220,9 @@ def get_ifd_dict(self, pointer, ifd_name, read_unknown=False):
else:
next = None

return ifd_dict, next
return result, next


def _get_key_name_dict(exif_dict):
new_dict = {
"0th":{TAGS["Image"][n]["name"]:value for n, value in exif_dict["0th"].items()},
"Exif":{TAGS["Exif"][n]["name"]:value for n, value in exif_dict["Exif"].items()},
"1st":{TAGS["Image"][n]["name"]:value for n, value in exif_dict["1st"].items()},
"GPS":{TAGS["GPS"][n]["name"]:value for n, value in exif_dict["GPS"].items()},
"Interop":{TAGS["Interop"][n]["name"]:value for n, value in exif_dict["Interop"].items()},
"thumbnail":exif_dict["thumbnail"],
}
return new_dict

def coerce(value, type, target):
if target == TYPES.Undefined:
Expand Down
32 changes: 21 additions & 11 deletions tests/s_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,25 +642,35 @@ class UTests(unittest.TestCase):
def test_ExifReader_return_unknown(self):
b1 = b"MM\x00\x2a\x00\x00\x00\x08"
b2 = b"\x00\x01" + b"\xff\xff\x00\x01\x00\x00\x00\x01" + b"\x00\x00\x00\x00"
er = piexif._load._ExifReader(b1 + b2)
ifd = er.get_ifd_dict(8, "0th", True)[0]
self.assertEqual(ifd[65535][0], 1)
self.assertEqual(ifd[65535][1], 1)
self.assertEqual(ifd[65535][2], b"\x00\x00\x00\x00")
er = piexif._load.ExifLoader(read_unknown=True)
er.load(b1 + b2)
self.assertEqual(er.exif['0th'][65535], 0)

def test_truncated_ifd(self):
b1 = b"MM\x00\x2a\x00\x00\x00\x08"
b2 = b"\xff\xff" + b"\x00\x0b\x00\x02\x00\x00\x00\x04" + b"FOO\x00"
er = piexif._load._ExifReader(b1 + b2)
ifd = er.get_ifd_dict(8, "0th", True)[0]
self.assertEqual(ifd[ImageIFD.ProcessingSoftware], b"FOO")
er = piexif._load.TiffReader(b1 + b2)
ifd = {t.tag: t.value for t in er.get_ifd(8)[0]}
self.assertEqual(ifd[ImageIFD.ProcessingSoftware], (b"FOO",))

def test_ascii_zero(self):
b1 = b"MM\x00\x2a\x00\x00\x00\x08"
b2 = b"\x00\x01" + b"\x00\x0b\x00\x02\x00\x00\x00\x04" + b"F\x00OO"
er = piexif._load._ExifReader(b1 + b2)
ifd = er.get_ifd_dict(8, "0th", True)[0]
self.assertEqual(ifd[ImageIFD.ProcessingSoftware], b"F")
er = piexif._load.TiffReader(b1 + b2)
ifd = {t.tag: t.value for t in er.get_ifd(8)[0]}
self.assertEqual(ifd[ImageIFD.ProcessingSoftware], (b"F",))

def test_bad_thumb(self):
b1 = b"MM\x00\x2a\x00\x00\x00\x08"
zero = b"\x00\x01\x87\x69\x00\x04\x00\x00\x00\x01\x00\x00\x00\x1a\x00\x00\x00\x1a"
first = (
b"\x00\x02" +
b"\x02\x01\x00\x01\x00\x00\x00\x03\x01\x02\x03\x04" +
b"\x02\x02\x00\x01\x00\x00\x00\x03\x01\x02\x03\x04"
)
result = piexif._load.load(b1 + zero + first)
# should not crash
assert result

def test_no_first_ifd(self):
input = b'Exif\x00\x00II*\x00\x08\x00\x00\x00\00'
Expand Down