From cb208dcdc46ee1ab03fb174222a051f9fbe253d5 Mon Sep 17 00:00:00 2001 From: Keith Harrison Date: Thu, 1 Jun 2017 16:52:07 -0300 Subject: [PATCH 1/2] Adding file size checking, skipping files over the configured max files size --- f2flickr/uploadr.py | 10 +++++++++- uploadr.ini.sample | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/f2flickr/uploadr.py b/f2flickr/uploadr.py index f9d9242..99a8b55 100644 --- a/f2flickr/uploadr.py +++ b/f2flickr/uploadr.py @@ -79,6 +79,9 @@ mpeg '''.split()) +# Max file size +MAX_FILE_SIZE = int(configdict.get('max_file_size', '1073741824')) + ## ## You shouldn't need to modify anything below here ## @@ -633,7 +636,12 @@ def grabNewImages(dirname): continue ext = f.lower().split(".")[-1] if ext in ALLOWED_EXT and not ignoreMatch(f, ignoreglobs): - images.append(os.path.normpath(os.path.join(dirpath, f))) + filepath = os.path.normpath(os.path.join(dirpath, f)) + filesize = os.path.getsize(filepath) + if filesize > MAX_FILE_SIZE: + logging.info('Skipping %s - %d bytes is over max_file_size' % (filepath, filesize)) + continue + images.append(filepath) images.sort() return images diff --git a/uploadr.ini.sample b/uploadr.ini.sample index ab73244..4c97464 100644 --- a/uploadr.ini.sample +++ b/uploadr.ini.sample @@ -52,3 +52,7 @@ override_dates = 0 # # secret = 13c314caee8b1f31 # api_key = 91dfde3ed605f6b8b9d9c38886547dcf + +# +# Max file size. Any file over max_file_size bytes will be logged and ignored. Defaults to 1GB +max_file_size = 1073741824 From 881635fab4dcadac9ad93c1998495a764fce3ade Mon Sep 17 00:00:00 2001 From: Keith Harrison Date: Tue, 27 Jun 2017 15:52:13 -0300 Subject: [PATCH 2/2] Adding some database management command line options --- f2flickr/flickr.py | 31 ++++---- f2flickr/flickr2history.py | 142 ++++++++++++++++++++++++++++++++++++- f2flickr/uploadr.py | 50 ++++++++++++- 3 files changed, 209 insertions(+), 14 deletions(-) diff --git a/f2flickr/flickr.py b/f2flickr/flickr.py index c9204e5..7108b0b 100644 --- a/f2flickr/flickr.py +++ b/f2flickr/flickr.py @@ -962,7 +962,7 @@ def getPhotos(self, per_page='', page='', **extras): #for details of each param #XXX: Could be Photo.search(cls) -def photos_search(user_id='', auth=False, tags='', tag_mode='', text='',\ +def photos_search_with_pages(user_id='', auth=False, tags='', tag_mode='', text='',\ min_upload_date='', max_upload_date='',\ min_taken_date='', max_taken_date='', \ license='', per_page='', page='', sort='',\ @@ -989,26 +989,33 @@ def photos_search(user_id='', auth=False, tags='', tag_mode='', text='',\ photos.append(_parse_photo(photo)) else: photos = [_parse_photo(data.rsp.photos.photo)] + return photos, int(data.rsp.photos.pages) + +def photos_search(user_id='', auth=False, tags='', tag_mode='', text='',\ + min_upload_date='', max_upload_date='',\ + min_taken_date='', max_taken_date='', \ + license='', per_page='', page='', sort='',\ + safe_search='', content_type='', **kwargs): + """Returns a list of Photo objects. + + If auth=True then will auth the user. Can see private etc + """ + + photos, pages = photos_search_with_pages(**locals()) + return photos def photos_search_pages(user_id='', auth=False, tags='', tag_mode='', text='',\ min_upload_date='', max_upload_date='',\ min_taken_date='', max_taken_date='', \ - license='', per_page='', page='', sort=''): + license='', per_page='', page='', sort='',\ + safe_search='', content_type='', **kwargs): """Returns the number of pages for the previous function (photos_search()) """ - method = 'flickr.photos.search' - - data = _doget(method, auth=auth, user_id=user_id, tags=tags, text=text,\ - min_upload_date=min_upload_date,\ - max_upload_date=max_upload_date, \ - min_taken_date=min_taken_date, \ - max_taken_date=max_taken_date, \ - license=license, per_page=per_page,\ - page=page, sort=sort) + photos, pages = photos_search_with_pages(**locals()) - return data.rsp.photos.pages + return pages def photos_get_recent(extras='', per_page='', page=''): """http://www.flickr.com/services/api/flickr.photos.getRecent.html diff --git a/f2flickr/flickr2history.py b/f2flickr/flickr2history.py index 4197182..e12bc7c 100644 --- a/f2flickr/flickr2history.py +++ b/f2flickr/flickr2history.py @@ -11,6 +11,7 @@ import sys import os import f2flickr.flickr as flickr +from pprint import pprint def getPhotoIDbyTag(tag, user): """ @@ -101,7 +102,102 @@ def convert_format(images, imageDir, historyFile): (num_images, num_ok, num_not_found, num_converted)) uploaded.close() -def reshelf(images, imageDir, historyFile): +def get_photos_from_flickr(): + """ + Get all photo ids from flickr + """ + logging.debug('flickr2history: get_photo_ids_from_flickr') + try: + user = flickr.test_login() + logging.debug(user.id) + except: + logging.error(sys.exc_info()[0]) + return None + + per_page = 500 + + logging.debug("Fetching page 1...") + photos, pages = flickr.photos_search_with_pages(user_id=user.id, auth=all, per_page=per_page); + photodict = {} + for photo in photos: + photodict[photo.id] = photo + + for page in range(2, pages + 1): + logging.debug("Fetching page {}...".format(page)) + photos, pages = flickr.photos_search_with_pages(user_id=user.id, auth=all, per_page=per_page, page=page); + for photo in photos: + photodict[photo.id] = photo + + return photodict + +def get_photo_ids_from_database_file(history_file): + """ + Get all photo ids from the history database file + """ + history = shelve.open(history_file) + return get_photo_ids_from_database(history) + +def get_photo_ids_from_database(history): + """ + Get all photo ids from the history database + """ + return list(filter(lambda x: not x.startswith('/'), history.keys())) + +def get_photo_paths_from_database(history): + """ + Get all photo paths from the history database + """ + return list(filter(lambda x: x.startswith('/'), history.keys())) + +def database_compare(images, image_dir, history_file, absolute_path): + history = shelve.open(history_file) + logging.info("Loading photo database from flickr...") + photos = get_photos_from_flickr() + basepath = image_dir if absolute_path else '' + + flickr_ids = set(photos.keys()) + database_ids = set(get_photo_ids_from_database(history)) + database_paths = set(get_photo_paths_from_database(history)) + filesystem_paths = set(map(lambda x: '/' + os.path.relpath(x, image_dir), images)) + + print("%s photos on flickr, %s photos on disk, %s photos in database" % (len(flickr_ids), len(images), len(database_ids))) + + notinfilesystem = list(database_paths - filesystem_paths) + notinfilesystem.sort() + print('####################################################') + print("%s photos in the database not on the filesystem..." % len(notinfilesystem)) + for path in notinfilesystem: + print("id=%s: path=%s" % (history[path][0], basepath + path)) + + notindatabase = list(filesystem_paths - database_paths) + notindatabase.sort() + print('####################################################') + print("%s photos on the filesystem not in the database..." % len(notindatabase)) + for path in notindatabase: + print("path=%s" % basepath + path) + + notinflickr = list(database_ids - flickr_ids) + notinflickr.sort() + print('####################################################') + print("%s photos in database not on flickr..." % len(notinflickr)) + for photoid in notinflickr: + print("id=%s: path=%s" % (photoid, basepath + history[photoid])) + + notindatabase = list(flickr_ids - database_ids) + notindatabase.sort() + print('####################################################') + print("%s photos on flickr not in database..." % len(notindatabase)) + for photoid in notindatabase: + tags = [tag.raw for tag in photos[photoid].tags] + hashtags = filter(lambda x: x.startswith('#'), tags) + if len(hashtags) > 0: + path = basepath + hashtags[0][1:].replace('#', ' ') + print('id=%s, path=%s, exists_local=%s' % (photoid, path, os.path.isfile(path))) + else: + print('id=%s, tags=%s' % (photoid, ', '. join(tags))) + + +def reshelf(images, imageDir, historyFile): """ Store image reference in the history file if its not there yet and if we actually can find it on Flickr. @@ -146,3 +242,47 @@ def reshelf(images, imageDir, historyFile): uploaded[ str(image)] = ( str(photo.id), file_mtime, file_size ) uploaded[ str(photo.id) ] =str(image) uploaded.close() + +def delete_photo(filename, image_dir, history_file): + logging.debug('flickr2history: Started delete_photo') + try: + user = flickr.test_login() + logging.debug(user.id) + except: + logging.error(sys.exc_info()[0]) + return None + + path = '/' + os.path.relpath(filename, image_dir) if filename.startswith(image_dir) else filename + tag = '#' + path.replace(' ', '#') + + history = shelve.open(history_file) + found_in_database = history.has_key(path) + photos = flickr.photos_search(user_id=user.id, auth=all, tags=tag, tag_mode='any') + + if history.has_key(path) or len(photos) > 0: + if history.has_key(path): + photoid, uploaded, filesize = history[path] + logging.info('Found in database: path=%s, id=%s, uploaded=%s, filesize=%s', path, photoid, uploaded, filesize) + if len(photos) > 0: + for photo in photos: + tags = [tag.raw for tag in photo.tags] + hashtags = filter(lambda x: x.startswith('#'), tags) + logging.info('Found photo on flickr: id=%s, tags=%s', photo.id, ', '.join(hashtags if len(hashtags) > 0 else tags)) + + delete_confirm = raw_input('Are you sure you want to delete these items (yes/no)? ') + if delete_confirm.lower() == 'yes' or delete_confirm.lower() == 'y': + if history.has_key(path): + photoid, uploaded, filesize = history[path] + logging.info('Deleting database entry: path=%s, id=%s, uploaded=%s, filesize=%s', path, photoid, uploaded, filesize) + del history[path] + if history.has_key(photoid): + logging.info('Deleting database entry: id=%s, path=%s', photoid, history[photoid]) + del history[photoid] + if len(photos) > 0: + for photo in photos: + logging.info('Deleting photo: %s', photo.id) + photo.delete() + else: + print('Aborted.') + else: + print('Could not find photo in database or on flickr matching: %s' % path) diff --git a/f2flickr/uploadr.py b/f2flickr/uploadr.py index 99a8b55..266cccb 100644 --- a/f2flickr/uploadr.py +++ b/f2flickr/uploadr.py @@ -36,11 +36,12 @@ from itertools import groupby from os.path import dirname import calendar +import argparse import f2flickr.flickr as flickr import f2flickr.tags2set as tags2set from f2flickr.configuration import configdict -from flickr2history import convert_format +from flickr2history import * from xml.dom import minidom # @@ -645,10 +646,27 @@ def grabNewImages(dirname): images.sort() return images +def list_history(absolutepath): + """ + Print the history database + """ + history = shelve.open(HISTORY_FILE) + keys = list(history.keys()) + keys.sort() + + for key in keys: + if key.startswith('/'): + if absolutepath: + print("{}{}".format(IMAGE_DIR, key)) + else: + print("{}".format(key)) + def main(): """ Initial entry point for the uploads """ + global HISTORY_FILE + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(filename)s:%(lineno)s - %(funcName)20s() %(message)s', filename='debug.log', @@ -663,6 +681,28 @@ def main(): console.setFormatter(logging.Formatter('%(asctime)s %(filename)s:%(lineno)s - %(funcName)20s() %(message)s')) logging.getLogger('').addHandler(console) + + parser = argparse.ArgumentParser() + parser.add_argument('-u', '--upload', action='store_true', help='Upload photos to flickr (default option)') + parser.add_argument('-l', '--list', action='store_true', help='List all the file entries in the history database') + parser.add_argument('-a', '--absolutepath', action='store_true', help='Show absolute file path when listing files') + parser.add_argument('-r', '--reshelf', action='store_true', help='Rebuild the history database') + parser.add_argument('-c', '--compare', action='store_true', help='Compare database entries with filesystem and flickr') + parser.add_argument('-f', '--historyfile', action='store', help='Use the specified history file for operations') + parser.add_argument('-d', '--delete', action='store', help='Delete the specified photo (by path) from the database and flickr') + args = parser.parse_args() + + if args.historyfile: + HISTORY_FILE = args.historyfile + + if args.list: + list_history(args.absolutepath) + sys.exit() + + if args.delete: + delete_photo(args.delete, IMAGE_DIR, HISTORY_FILE) + sys.exit() + uploadinstance = Uploadr() if not uploadinstance.checkToken(): uploadinstance.authenticate() @@ -671,6 +711,14 @@ def main(): images = grabNewImages(IMAGE_DIR) logging.info('Found %d images' % len(images)) + if args.compare: + database_compare(images, IMAGE_DIR, HISTORY_FILE, args.absolutepath) + sys.exit() + + if args.reshelf: + reshelf(images, IMAGE_DIR, HISTORY_FILE) + sys.exit() + # Convert history file to new format, if necessary. logging.info('Converting existing history file to new format, if needed') convert_format(images, IMAGE_DIR, HISTORY_FILE)