From 274309b671bbad1586b5a5f48bf8bc28a47f4ba4 Mon Sep 17 00:00:00 2001 From: Ustrici Date: Sat, 20 Jun 2015 13:33:21 +0200 Subject: [PATCH 01/12] Code formatting Code formatting --- PodGrab.py | 55 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 59fa788..c8bb11e 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -24,7 +24,7 @@ import os import sys import argparse -import urllib.request as urllib2 +import urllib2 import xml.dom.minidom import datetime from time import gmtime, strftime, strptime, mktime @@ -77,9 +77,9 @@ def main(argv): mail = "" # Added 2011-10-06 Werner Avenant global current_directory - global m3u_file + global m3u_file now = datetime.datetime.now(); - m3u_file = str(now)[:10] + '.m3u' + m3u_file = str(now)[:10] + '.m3u' current_directory = os.path.realpath(os.path.dirname(sys.argv[0])) download_directory = DOWNLOAD_DIRECTORY @@ -120,7 +120,7 @@ def main(argv): data = open_datasource(feed_url) if not data: error_string = "Not a valid XML file or URL feed!" - has_error = 1 + has_error = 1 else: print("XML data source opened\n") mode = MODE_DOWNLOAD @@ -347,7 +347,7 @@ def iterate_feed(data, mode, download_dir, today, cur, conn, feed): if mode == MODE_DOWNLOAD: print("Bulk download. Processing...") # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later - num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) + num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) print("\n", num_podcasts, "have been downloaded") elif mode == MODE_SUBSCRIBE: print("Feed to subscribe to: " + feed + ". Checking for database duplicate...") @@ -431,32 +431,31 @@ def write_podcast(item, channel_title, date, type): local_file = local_file + ".wma" # Check if file exists, but if the file size is zero (which happens when the user - # presses Crtl-C during a download) - the the code should go ahead and download + # presses Crtl-C during a download) - the the code should go ahead and download # as if the file didn't exist if os.path.exists(local_file) and os.path.getsize(local_file) != 0: return 'File Exists' else: print("\nDownloading " + item_file_name + " which was published on " + date) try: - req = urllib2.urlopen(item) - CHUNK = 16 * 1024 - with open(local_file, 'wb') as fp: - while True: - chunk = req.read(CHUNK) - if not chunk: break - fp.write(chunk) - - item_file_name = os.path.basename(fp.name) - - #item_file = urllib2.urlopen(item) - #output = open(local_file, 'wb') - # 2011-10-06 Werner Avenant - For some reason the file name changes when - # saved to disk - probably a python feature (sorry, only wrote my first line of python today) - #item_file_name = os.path.basename(output.name) - #output.write(item_file.read()) - #output.close() + req = urllib2.urlopen(item) + CHUNK = 16 * 1024 + with open(local_file, 'wb') as fp: + while True: + chunk = req.read(CHUNK) + if not chunk: break + fp.write(chunk) + + item_file_name = os.path.basename(fp.name) + + #item_file = urllib2.urlopen(item) + #output = open(local_file, 'wb') + # 2011-10-06 Werner Avenant - For some reason the file name changes when + # saved to disk - probably a python feature (sorry, only wrote my first line of python today) + #item_file_name = os.path.basename(output.name) + #output.write(item_file.read()) + #output.close() print("Podcast: ", item, " downloaded to: ", local_file) - # 2011-11-06 Append to m3u file output = open(current_directory + os.sep + m3u_file, 'a') output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + item_file_name + "\n") @@ -568,9 +567,9 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): last_ep = get_last_subscription_downloaded(cur, conn, feed) - ### NB NB - The logic here is that we get the "last_ep" before we enter the loop - ### The result is that it allows the code to "catch up" on missed episodes because - ### we never update the "last_ep" while inside the loop. + ### NB NB - The logic here is that we get the "last_ep" before we enter the loop + ### The result is that it allows the code to "catch up" on missed episodes because + ### we never update the "last_ep" while inside the loop. for item in chan.getElementsByTagName('item'): try: @@ -708,7 +707,7 @@ def get_last_subscription_downloaded(cur, conn, feed): row = (feed,) cur.execute('SELECT last_ep FROM subscriptions WHERE feed = ?', row) rec = cur.fetchone() - return rec[0] + return rec[0] if __name__ == "__main__": main(sys.argv[1:]) From bb86098cf3c452ef055aa1a0b9f2490f8dc99dd6 Mon Sep 17 00:00:00 2001 From: Ustrici Date: Sat, 20 Jun 2015 14:44:27 +0200 Subject: [PATCH 02/12] Code formatting --- PodGrab.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index c8bb11e..a81d25e 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -159,7 +159,7 @@ def main(argv): print("Default encoding: " + sys.getdefaultencoding()) todays_date = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) - print("Current Directory: ", current_directory) + print("Current Directory: " + current_directory) if does_database_exist(current_directory): connection = connect_database(current_directory) if not connection: @@ -336,27 +336,27 @@ def iterate_feed(data, mode, download_dir, today, cur, conn, feed): for channel in xml_data.getElementsByTagName('channel'): channel_title = channel.getElementsByTagName('title')[0].firstChild.data channel_link = channel.getElementsByTagName('link')[0].firstChild.data - print("Channel Title: ===" + channel_title + "===") + print("Channel Title: === " + channel_title + " ===") print("Channel Link: " + channel_link) channel_title = clean_string(channel_title) channel_directory = download_dir + os.sep + channel_title if not os.path.exists(channel_directory): os.makedirs(channel_directory) - print("Current Date: ", today) + print("Current Date: " + today) if mode == MODE_DOWNLOAD: print("Bulk download. Processing...") # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) - print("\n", num_podcasts, "have been downloaded") + print("\n" + num_podcasts + "have been downloaded") elif mode == MODE_SUBSCRIBE: - print("Feed to subscribe to: " + feed + ". Checking for database duplicate...") + print("Feed to subscribe to: " + feed + ".\nChecking for database duplicate...") if not does_sub_exist(cur, conn, feed): - print("Subscribe. Processing...") + print("Subscribe.\nProcessing...") # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) - print("\n", num_podcasts, "have been downloaded from your subscription") + print("\n" + num_podcasts + "have been downloaded from your subscription") else: print("Subscription already exists! Skipping...") elif mode == MODE_UPDATE: @@ -383,10 +383,10 @@ def clean_string(str): for c in new_string: if c.isalnum() or c == "-" or c == "." or c.isspace(): new_string_final = new_string_final + ''.join(c) - new_string_final = new_string_final.strip() new_string_final = new_string_final.replace(' ','-') new_string_final = new_string_final.replace('---','-') new_string_final = new_string_final.replace('--','-') + new_string_final = new_string_final.strip() return new_string_final @@ -451,18 +451,18 @@ def write_podcast(item, channel_title, date, type): #item_file = urllib2.urlopen(item) #output = open(local_file, 'wb') # 2011-10-06 Werner Avenant - For some reason the file name changes when - # saved to disk - probably a python feature (sorry, only wrote my first line of python today) + # saved to disk - probably a python feature (sorry, only wrote my first line of python today) #item_file_name = os.path.basename(output.name) #output.write(item_file.read()) #output.close() - print("Podcast: ", item, " downloaded to: ", local_file) + print("Podcast: " + item + " downloaded to: " + local_file) # 2011-11-06 Append to m3u file output = open(current_directory + os.sep + m3u_file, 'a') output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + item_file_name + "\n") output.close() return 'Successful Write' except urllib2.URLError as e: - print("ERROR - Could not write item to file: ", e) + print("ERROR - Could not write item to file: " + e) return 'Write Error' @@ -632,7 +632,7 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): #traceback.print_exc() print("This RSS item has no downloadable URL link for the podcast for '" + item_title + "'. Skipping...") - return str(num) + " podcasts totalling " + str(size) + " bytes" + return(str(num) + " podcast(s) totalling " + str(size) + " byte(s)") def fix_date(date): From e8831433c00c70b8f92644edee81f45855c750e8 Mon Sep 17 00:00:00 2001 From: icepic0 Date: Sat, 1 Aug 2015 14:21:20 -0400 Subject: [PATCH 03/12] Command line options and Plex file naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - added command line switches to control man hardcoded files, paths, and options. The defaults are the same except M3U files are off by default. I’m thinking about making some of these configurations in the database. - Added an option to save the file with the format Name.Season/Year Episode/Month+Day.Title of Episode format which looks better and allows Plex to read it as a TV series. --- PodGrab.py | 120 ++++++++++++++++++++++++++++++++++++++++++----------- README | 12 +++++- 2 files changed, 107 insertions(+), 25 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index a81d25e..121de55 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -6,9 +6,16 @@ # Jonathan Baker # jon@the-node.org (http://the-node.org) -# Werner Avenant - added small changes to write M3U file of podcasts downloaded today +# Version: 1.1.3 - +# - added small changes to write M3U file of podcasts downloaded today +# Werner Avenant # werner.avenant@gmail.com (http://www.collectiveminds.co.za) +# Version: 1.1.4 - 07/31/2015 +# - added command line switches for db location, download location, plex configuration, M3U creation +# - changed mkdir to mkdirs +# David Smith + # Do with this code what you will, it's "open source". As a courtesy, # I would appreciate credit if you base your code on mine. If you find # a bug or think the code sucks balls, please let me know :-) @@ -35,7 +42,7 @@ import platform import traceback import unicodedata - +from subprocess import Popen, PIPE MODE_NONE = 70 @@ -51,6 +58,8 @@ MODE_IMPORT = 80 NUM_MAX_DOWNLOADS = 4 +PLEX_NAMING = 0 +CREATE_M3U = 0 DOWNLOAD_DIRECTORY = "podcasts" #DOWNLOAD_DIRECTORY = os.path.realpath("/home/hrehfeld/host/d/download/podcasts_podgrab") @@ -59,7 +68,6 @@ current_directory = '' m3u_file = '' - total_item = 0 total_size = 0 has_error = 0 @@ -81,14 +89,22 @@ def main(argv): now = datetime.datetime.now(); m3u_file = str(now)[:10] + '.m3u' current_directory = os.path.realpath(os.path.dirname(sys.argv[0])) - download_directory = DOWNLOAD_DIRECTORY + global db_name + global db_path + db_name = "PodGrab.db" + db_path=current_directory + global DOWNLOAD_DIRECTORY + global NUM_MAX_DOWNLOADS + global PLEX_NAMING + global CREATE_M3U global total_items global total_size total_items = 0 total_size = 0 data = "" + parser = argparse.ArgumentParser(description='A command line Podcast downloader for RSS XML feeds') parser.add_argument('-s', '--subscribe', action="store", dest="sub_feed_url", help='Subscribe to the following XML feed and download latest podcast') parser.add_argument('-d', '--download', action="store", dest="dl_feed_url", help='Bulk download all podcasts in the following XML feed or file') @@ -102,9 +118,38 @@ def main(argv): parser.add_argument('-io', '--import', action="store", dest="opml_import", help='Import subscriptions from OPML file') parser.add_argument('-eo', '--export', action="store_const", const="OPML_EXPORT", dest="opml_export", help='Export subscriptions to OPML file') + + parser.add_argument('-pn', '--plex-naming', action="store_true", dest="plex_naming", help='Name files with Season=Year and Epsiode=Month+Day') + parser.add_argument('-max', '--max-downloads', action="store", dest="max_downloads", help='Max number of podcasts to download') + parser.add_argument('-dir', '--download-directory', action="store", dest="download_directory", help='Directory to store podcasts in') + parser.add_argument('-db', '--db_path', action="store", dest="db_path", help='Location of the PodGrab.db file') + parser.add_argument('-m3u', '--create-m3u', action="store_true", dest="create_m3u", help='Create m3u files for playlists') + arguments = parser.parse_args() + if arguments.download_directory: + DOWNLOAD_DIRECTORY = arguments.download_directory + + if arguments.db_path: + db_path = arguments.db_path + + if arguments.max_downloads: + NUM_MAX_DOWNLOADS = arguments.max_downloads + print("Max items per podcast is " + str(NUM_MAX_DOWNLOADS)) + + if arguments.plex_naming: + print("PLEX naming is on") + PLEX_NAMING = 1 + else: + print("PLEX naming is off") + + if arguments.create_m3u: + print("M3U files will be created") + CREATE_M3U = 1 + else: + print("M3U files will not created") + if arguments.sub_feed_url: feed_url = arguments.sub_feed_url data = open_datasource(feed_url) @@ -160,6 +205,7 @@ def main(argv): print("Default encoding: " + sys.getdefaultencoding()) todays_date = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) print("Current Directory: " + current_directory) + if does_database_exist(current_directory): connection = connect_database(current_directory) if not connection: @@ -179,16 +225,16 @@ def main(argv): setup_database(cursor, connection) print("Database setup complete") - if not os.path.exists(download_directory): + if not os.path.exists(DOWNLOAD_DIRECTORY): print("Podcast download directory is missing. Creating...") try: - os.mkdir(download_directory) - print("Download directory '" + download_directory + "' created") + os.makedirs(DOWNLOAD_DIRECTORY) + print("Download directory '" + DOWNLOAD_DIRECTORY + "' created") except OSError: error_string = "Could not create podcast download sub-directory!" has_error = 1 else: - print("Download directory exists: '" + download_directory + "'" ) + print("Download directory exists: '" + DOWNLOAD_DIRECTORY + "'" ) if not has_error: if mode == MODE_UNSUBSCRIBE: feed_name = get_name_from_feed(cursor, connection, feed_url) @@ -196,7 +242,7 @@ def main(argv): print("Feed does not exist in the database! Skipping...") else: feed_name = clean_string(feed_name) - channel_directory = download_directory + os.sep + feed_name + channel_directory = DOWNLOAD_DIRECTORY + os.sep + feed_name print("Deleting '" + channel_directory + "'...") delete_subscription(cursor, connection, feed_url) try : @@ -218,7 +264,7 @@ def main(argv): if not data: print("'" + feed_url + "' for '" + feed_name + "' is not a valid feed URL!") else: - message = iterate_feed(data, mode, download_directory, todays_date, cursor, connection, feed_url) + message = iterate_feed(data, mode, DOWNLOAD_DIRECTORY, todays_date, cursor, connection, feed_url) print(message) mail += message mail = mail + "\n\n" + str(total_items) + " podcasts totalling " + str(total_size) + " bytes have been downloaded." @@ -226,7 +272,7 @@ def main(argv): print("Have e-mail address(es) - attempting e-mail...") mail_updates(cursor, connection, mail, str(total_items)) elif mode == MODE_DOWNLOAD or mode == MODE_SUBSCRIBE: - print(iterate_feed(data, mode, download_directory, todays_date, cursor, connection, feed_url)) + print(iterate_feed(data, mode, DOWNLOAD_DIRECTORY, todays_date, cursor, connection, feed_url)) elif mode == MODE_MAIL_ADD: add_mail_user(cursor, connection, mail_address) print("E-Mail address: " + mail_address + " has been added") @@ -238,7 +284,7 @@ def main(argv): elif mode == MODE_EXPORT: export_opml_file(cursor, connection, current_directory) elif mode == MODE_IMPORT: - import_opml_file(cursor, connection, current_directory, download_directory, import_file_name) + import_opml_file(cursor, connection, current_directory, DOWNLOAD_DIRECTORY, import_file_name) else: print("Sorry, there was some sort of error: '" + error_string + "'\nExiting...\n") if connection: @@ -381,9 +427,9 @@ def clean_string(str): new_string = new_string.rstrip("-") new_string_final = '' for c in new_string: - if c.isalnum() or c == "-" or c == "." or c.isspace(): + if c.isalnum() or c == "-" or c == "_" or c == "." or c.isspace(): new_string_final = new_string_final + ''.join(c) - new_string_final = new_string_final.replace(' ','-') + new_string_final = new_string_final.replace(' ','_') new_string_final = new_string_final.replace('---','-') new_string_final = new_string_final.replace('--','-') new_string_final = new_string_final.strip() @@ -392,13 +438,21 @@ def clean_string(str): # Change 2011-10-06 - Changed chan_loc to channel_title to help with relative path names # in the m3u file -def write_podcast(item, channel_title, date, type): +def write_podcast(item, channel_title, date, type, title, desc): (item_path, item_file_name) = os.path.split(item) + plex_info = "" + item_save_name = item_file_name + + # Added name and season to the saved file name based on the date released. This is compatible with Plex TV inputs. + if PLEX_NAMING: + struct_time_item = datetime.datetime.strptime(fix_date(date), "%a, %d %b %Y %H:%M:%S") + plex_info = channel_title + "." + struct_time_item.strftime("S%YE%m%d") + "." + item_save_name = plex_info + title if len(item_file_name) > 50: item_file_name = item_file_name[:50] - local_file = DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + clean_string(item_file_name) + local_file = DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + clean_string(item_save_name) if type == "video/quicktime" or type == "audio/mp4" or type == "video/mp4": if not local_file.endswith(".mp4"): local_file = local_file + ".mp4" @@ -436,7 +490,7 @@ def write_podcast(item, channel_title, date, type): if os.path.exists(local_file) and os.path.getsize(local_file) != 0: return 'File Exists' else: - print("\nDownloading " + item_file_name + " which was published on " + date) + print("\nDownloading " + item_file_name + " as \"" + clean_string(item_save_name) + "\"" + " which was published on " + date) try: req = urllib2.urlopen(item) CHUNK = 16 * 1024 @@ -457,9 +511,11 @@ def write_podcast(item, channel_title, date, type): #output.close() print("Podcast: " + item + " downloaded to: " + local_file) # 2011-11-06 Append to m3u file - output = open(current_directory + os.sep + m3u_file, 'a') - output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + item_file_name + "\n") - output.close() + if CREATE_M3U: + output = open(DOWNLOAD_DIRECTORY + os.sep + m3u_file, 'a') + output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + item_file_name + "\n") + output.close() + return 'Successful Write' except urllib2.URLError as e: print("ERROR - Could not write item to file: " + e) @@ -467,8 +523,9 @@ def write_podcast(item, channel_title, date, type): def does_database_exist(curr_loc): - db_name = "PodGrab.db" - if os.path.exists(curr_loc + os.sep + db_name): + #db_name = "PodGrab.db" + #if os.path.exists(curr_loc + os.sep + db_name): + if os.path.exists(db_path + os.sep + db_name): return 1 else: return 0 @@ -532,9 +589,19 @@ def mail(server_url=None, sender='', to='', subject='', text=''): def connect_database(curr_loc): - conn = sqlite3.connect(curr_loc + os.sep + "PodGrab.db") + #conn = sqlite3.connect(curr_loc + os.sep + "PodGrab.db") + if not os.path.exists(db_path): + try: + print("Creating dir " + db_path) + os.makedirs(db_path) + except OSError: + error_string = "Could not create podcast database directory!" + return 0 + + conn = sqlite3.connect(db_path + os.sep + db_name) return conn + def setup_database(cur, conn): cur.execute("CREATE TABLE subscriptions (channel text, feed text, last_ep text)") cur.execute("CREATE TABLE email (address text)") @@ -574,12 +641,16 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): for item in chan.getElementsByTagName('item'): try: item_title = item.getElementsByTagName('title')[0].firstChild.data + item_desc = item.getElementsByTagName('description')[0].firstChild.data item_date = item.getElementsByTagName('pubDate')[0].firstChild.data item_file = item.getElementsByTagName('enclosure')[0].getAttribute('url') item_size = item.getElementsByTagName('enclosure')[0].getAttribute('length') item_type = item.getElementsByTagName('enclosure')[0].getAttribute('type') struct_time_today = strptime(today, "%a, %d %b %Y %H:%M:%S") + #item_title = item_title.strip() + #item_desc = item_desc.strip() + has_error = 0 try: struct_time_item = strptime(fix_date(item_date), "%a, %d %b %Y %H:%M:%S") @@ -599,7 +670,7 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): if not has_error: if mktime(struct_time_item) > mktime(struct_last_ep) or mode == MODE_DOWNLOAD: - saved = write_podcast(item_file, channel_title, item_date, item_type) + saved = write_podcast(item_file, channel_title, item_date, item_type, item_title, item_desc) if saved == 'File Exists': print("File Existed - updating local database's Last Episode") @@ -607,6 +678,7 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): if saved == 'Successful Write': print("\nTitle: " + item_title) + print("Description: " + item_desc) print("Date: " + item_date) print("File: " + item_file) print("Size: " + item_size + " bytes") diff --git a/README b/README index 067d3af..4bb88bf 100644 --- a/README +++ b/README @@ -24,7 +24,7 @@ Author: Werner Avenant werner.avenant@gmail.com (http://www.collectiveminds.co.z Changes after fork: - Added support for M3U files listing all files downloaded that day - - Last Episode Detection wasn't always right. It wasn't noticable + - Last Episode Detection wasn't always right. It wasn't noticeable because if the file existed it wouldn't download the episode. Rewrote last_ep logic - Changed write_podcast to return if a file existed. This in turn @@ -33,3 +33,13 @@ Changes after fork: - Function update_subscription will check to see if the last_ep is older than the existing last_ep - Moved NUM_MAX_DOWNLOAD to the front of the file for easy configuration + +==== CHANGES MADE AFTER FORK ==== + +Author: David Smith + +Changes after fork: + + - Added option to output file names in an Season/Year Episode/Month+Day Title of Episode format + - Added command line switches for db location, download location, plex configuration, M3U creation + - Changed mkdir to mkdirs to deal with creating multiple levels of directories From 64b96a1aaa5811b655499c58ce4242ef12ddadef Mon Sep 17 00:00:00 2001 From: icepic0 Date: Sun, 2 Aug 2015 19:51:11 -0400 Subject: [PATCH 04/12] PodGrab, now with metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The major update is to add Title and Description to the metadata fields in the podcasts if the do not exist. I used ffmpeg so if you don’t have it won’t work, but it shouldn’t break anything else. I also in included a new executable that just does metadata updates. --- .gitignore | 2 + PodGrab.py | 191 ++++++++++++++++++++++++++++++++++----------- update_metadata.py | 136 ++++++++++++++++++++++++++++++++ 3 files changed, 285 insertions(+), 44 deletions(-) create mode 100755 update_metadata.py diff --git a/.gitignore b/.gitignore index e65c467..85a53a6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ PodGrab.db *.m3u + +test diff --git a/PodGrab.py b/PodGrab.py index 121de55..8f576af 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -42,7 +42,7 @@ import platform import traceback import unicodedata -from subprocess import Popen, PIPE +from subprocess import Popen, PIPE, call MODE_NONE = 70 @@ -60,6 +60,7 @@ NUM_MAX_DOWNLOADS = 4 PLEX_NAMING = 0 CREATE_M3U = 0 +UPDATE_METADATA = 0 DOWNLOAD_DIRECTORY = "podcasts" #DOWNLOAD_DIRECTORY = os.path.realpath("/home/hrehfeld/host/d/download/podcasts_podgrab") @@ -94,6 +95,7 @@ def main(argv): db_name = "PodGrab.db" db_path=current_directory + global UPDATE_METADATA global DOWNLOAD_DIRECTORY global NUM_MAX_DOWNLOADS global PLEX_NAMING @@ -124,10 +126,17 @@ def main(argv): parser.add_argument('-dir', '--download-directory', action="store", dest="download_directory", help='Directory to store podcasts in') parser.add_argument('-db', '--db_path', action="store", dest="db_path", help='Location of the PodGrab.db file') parser.add_argument('-m3u', '--create-m3u', action="store_true", dest="create_m3u", help='Create m3u files for playlists') + parser.add_argument('-um', '--update_metadata', action="store_true", dest="update_metadata", help='Use ffmpeg to update metadata with the title and description from the feed') arguments = parser.parse_args() + if arguments.update_metadata: + print("Metadata will be updated") + UPDATE_METADATA = 1 + else: + print("Metadata will be left alone") + if arguments.download_directory: DOWNLOAD_DIRECTORY = arguments.download_directory @@ -438,7 +447,7 @@ def clean_string(str): # Change 2011-10-06 - Changed chan_loc to channel_title to help with relative path names # in the m3u file -def write_podcast(item, channel_title, date, type, title, desc): +def write_podcast(item, channel_title, date, type, title, metadata_feed): (item_path, item_file_name) = os.path.split(item) plex_info = "" item_save_name = item_file_name @@ -449,40 +458,12 @@ def write_podcast(item, channel_title, date, type, title, desc): plex_info = channel_title + "." + struct_time_item.strftime("S%YE%m%d") + "." item_save_name = plex_info + title - if len(item_file_name) > 50: - item_file_name = item_file_name[:50] + if len(item_save_name) > 50: + item_save_name = item_save_name[:50] local_file = DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + clean_string(item_save_name) - if type == "video/quicktime" or type == "audio/mp4" or type == "video/mp4": - if not local_file.endswith(".mp4"): - local_file = local_file + ".mp4" - - elif type == "video/mpeg": - if not local_file.endswith(".mpg"): - local_file = local_file + ".mpg" - elif type == "video/x-flv": - if not local_file.endswith(".flv"): - local_file = local_file + ".flv" - - elif type == "video/x-ms-wmv": - if not local_file.endswith(".wmv"): - local_file = local_file + ".wmv" - - elif type == "video/webm" or type == "audio/webm": - if not local_file.endswith(".webm"): - local_file = local_file + ".webm" - - elif type == "audio/mpeg": - if not local_file.endswith(".mp3"): - local_file = local_file + ".mp3" - - elif type == "audio/ogg" or type == "video/ogg" or type == "audio/vorbis": - if not local_file.endswith(".ogg"): - local_file = local_file + ".ogg" - elif type == "audio/x-ms-wma" or type == "audio/x-ms-wax": - if not local_file.endswith(".wma"): - local_file = local_file + ".wma" + local_file = fix_file_extention(type, local_file) # Check if file exists, but if the file size is zero (which happens when the user # presses Crtl-C during a download) - the the code should go ahead and download @@ -501,30 +482,145 @@ def write_podcast(item, channel_title, date, type, title, desc): fp.write(chunk) item_file_name = os.path.basename(fp.name) - - #item_file = urllib2.urlopen(item) - #output = open(local_file, 'wb') - # 2011-10-06 Werner Avenant - For some reason the file name changes when - # saved to disk - probably a python feature (sorry, only wrote my first line of python today) - #item_file_name = os.path.basename(output.name) - #output.write(item_file.read()) - #output.close() print("Podcast: " + item + " downloaded to: " + local_file) + # 2011-11-06 Append to m3u file if CREATE_M3U: + print("Creating M3U file in " + DOWNLOAD_DIRECTORY + os.sep + m3u_file) output = open(DOWNLOAD_DIRECTORY + os.sep + m3u_file, 'a') output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + item_file_name + "\n") output.close() + # add missing metadata in the file to match metadata in the feed + if UPDATE_METADATA: + metadata_file = read_metadata(local_file) + if metadata_file: + for key in sorted(iter(metadata_file)): + print("Existing Metadata: " + key + "=" + metadata_file[key]) + metadata_write = write_metadata(local_file, metadata_feed, metadata_file) return 'Successful Write' except urllib2.URLError as e: print("ERROR - Could not write item to file: " + e) return 'Write Error' +# Fix any odd file endings +def fix_file_extention(type, local_file): + if type == "video/quicktime" or type == "audio/mp4" or type == "video/mp4": + if not local_file.endswith(".mp4"): + local_file = local_file + ".mp4" + elif type == "video/mpeg": + if not local_file.endswith(".mpg"): + local_file = local_file + ".mpg" + elif type == "video/x-flv": + if not local_file.endswith(".flv"): + local_file = local_file + ".flv" + elif type == "video/x-ms-wmv": + if not local_file.endswith(".wmv"): + local_file = local_file + ".wmv" + elif type == "video/webm" or type == "audio/webm": + if not local_file.endswith(".webm"): + local_file = local_file + ".webm" + elif type == "audio/mpeg": + if not local_file.endswith(".mp3"): + local_file = local_file + ".mp3" + elif type == "audio/ogg" or type == "video/ogg" or type == "audio/vorbis": + if not local_file.endswith(".ogg"): + local_file = local_file + ".ogg" + elif type == "audio/x-ms-wma" or type == "audio/x-ms-wax": + if not local_file.endswith(".wma"): + local_file = local_file + ".wma" + return(local_file) + + +# read metadata from an audio or video file. Assumes that it can call ffmpg in the path. This dependency should be fixed. +# I've only tested with mp4 video files and mp3 audio files. +def read_metadata(local_file): + metadata = metadata_feed = dict() + #print("\nReading file: " + local_file) + if not os.path.exists(local_file): + print("File not found for metadata update") + return 1 + + cmd_line = ['ffmpeg', '-loglevel', 'quiet', '-i', local_file, '-f', 'ffmetadata', '-'] + + try: + process = Popen(cmd_line, stdout=PIPE, stderr=PIPE) # I'm not sure if I want to do anything with stderr yet + stdout, stderr = process.communicate() + except OSError as e: + print >>sys.stderr, "FFMPEG Failed, aborting metadata updates:", e + return 0 + for line in stdout.splitlines(): + line.rstrip() + tokens = line.partition('=') + if tokens[2]: + #print("DATA: " + tokens[0] + " = " + tokens[2]) + if tokens[0] == 'title': + metadata['TITLE_MATCH'] = tokens[2] + elif tokens[0] == 'description' or tokens[0] == 'TDES': + metadata['DESCRIPTION_MATCH'] = tokens[2] + #elif tokens[0] == 'album': + # metadata['ALBUM_MATCH'] = tokens[2] + #elif tokens[0] == 'minor_version': + # metadata['EPISODE_MATCH'] = tokens[2] + + metadata[tokens[0]] = tokens[2] + #else: + # print("Not valid metadata: ", line) + + return(metadata) + + +# write metadata to an audio or video file. Assumes that it can call ffmpg in the path. This dependency should be fixed. +def write_metadata(local_file, metadata_feed, metadata_file): + update_needed = 0 + cmd_line = ['ffmpeg', '-y', '-loglevel', 'quiet', '-i', local_file] + (item_path, item_file_name) = os.path.split(local_file) + tmp_file = item_path + os.sep + "TMP_" + item_file_name # note, for ffmpeg this needs to be the same extention + + # Which metadata do we have? + if not 'TITLE_MATCH' in metadata_file: + #print("Adding Title: " + metadata_feed['title']) + update_needed = 1 + cmd_line.extend(['-metadata', "title=" + metadata_feed['title']]) + + if not 'DESCRIPTION_MATCH' in metadata_file: + #print("Adding Description: " + metadata_feed['description']) + update_needed = 1 + cmd_line.extend(['-metadata', "description=" + metadata_feed['description']]) + + if update_needed: + print("Updating Metadata on " + local_file) + + cmd_line_mapping = ['-map', '0', '-codec', 'copy'] + cmd_line_end = [tmp_file] + + try: + rtn = call(cmd_line + cmd_line_mapping + cmd_line_end) + if rtn == 0: + os.rename(tmp_file, local_file) + else: + # I have some podcasts that seem to have extra streams in them. I found this on Apple Byte podcast which has RTP hit streams. + #print >>sys.stderr, "Child returned", rtn + print("Unknown streams found, Trying to copy just one stream of audio and video for metadata") + cmd_line_mapping = ['-codec', 'copy'] + rtn = call(cmd_line + cmd_line_mapping + cmd_line_end) + if rtn != 0: + print("Copy Failed") + if os.path.exists(tmp_file): + os.remove(tmp_file) + return rtn + else: + os.rename(tmp_file, local_file) + except OSError as e: + print >>sys.stderr, "Execution failed:", e + return 1 + else: + print("File already has embedded title and description, no need to update the file") + return 0 + + def does_database_exist(curr_loc): - #db_name = "PodGrab.db" - #if os.path.exists(curr_loc + os.sep + db_name): if os.path.exists(db_path + os.sep + db_name): return 1 else: @@ -650,6 +746,13 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): #item_title = item_title.strip() #item_desc = item_desc.strip() + metadata_feed = dict() + metadata_feed['title'] = item_title + metadata_feed['description'] = item_desc + metadata_feed['date'] = item_date + metadata_feed['file'] = item_file + metadata_feed['size'] = item_size + metadata_feed['type'] = item_type has_error = 0 try: @@ -670,7 +773,7 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): if not has_error: if mktime(struct_time_item) > mktime(struct_last_ep) or mode == MODE_DOWNLOAD: - saved = write_podcast(item_file, channel_title, item_date, item_type, item_title, item_desc) + saved = write_podcast(item_file, channel_title, item_date, item_type, item_title, metadata_feed) if saved == 'File Exists': print("File Existed - updating local database's Last Episode") diff --git a/update_metadata.py b/update_metadata.py new file mode 100755 index 0000000..f59b9ce --- /dev/null +++ b/update_metadata.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python + +import sys +import os +import argparse +from subprocess import Popen, PIPE, call + + +def main(argv): + + parser = argparse.ArgumentParser(description='A command line way to edit video and audio metadata.') + parser.add_argument('-f', '--file', action="store", dest="file", help='File to process') + parser.add_argument('-t', '--title', action="store", dest="title", help='Title to apply') + parser.add_argument('-d', '--description', action="store", dest="description", help='Description to apply') + parser.add_argument('-ro', '--read_only', action="store_true", dest="read_only", help='Only print metadata, do not write') + + arguments = parser.parse_args() + + metadata_feed = dict() + metadata_feed['title'] = arguments.title + metadata_feed['description'] = arguments.description + + #print("command line: " + ', '.join(argv)) + if arguments.file: + local_file = arguments.file + else: + print("no file given") + return 1 + + # read the file for any existing metadata + #print("Calling Read Metadata") + metadata_file = read_metadata(local_file) + if metadata_file: + for key in sorted(iter(metadata_file)): + print("KEY: " + key + "=" + metadata_file[key]) + + #print("Calling Write Metadata") + if not arguments.read_only: + print("Writing Metadata") + metadata_write = write_metadata(local_file, metadata_feed, metadata_file) + return 0 + +# read metadata from an audio or video file. Assumes that it can call ffmpg in the path. This dependency should be fixed. +# I've only tested with mp4 video files and mp3 audio files. +def read_metadata(local_file): + metadata = metadata_feed = dict() + print("\nReading file: " + local_file) + if not os.path.exists(local_file): + print("File not found for metadata update") + return 1 + + cmd_line = ['ffmpeg', '-loglevel', 'quiet', '-i', local_file, '-f', 'ffmetadata', '-'] + + try: + process = Popen(cmd_line, stdout=PIPE, stderr=PIPE) # I'm not sure if I want to do anything with stderr yet + stdout, stderr = process.communicate() + except OSError as e: + print >>sys.stderr, "FFMPEG Failed, aborting metadata updates:", e + return 0 + + for line in stdout.splitlines(): + line.rstrip() + tokens = line.partition('=') + if tokens[2]: + #print("DATA: " + tokens[0] + " = " + tokens[2]) + if tokens[0] == 'title': + metadata['TITLE_MATCH'] = tokens[2] + elif tokens[0] == 'description' or tokens[0] == 'TDES': + metadata['DESCRIPTION_MATCH'] = tokens[2] + #elif tokens[0] == 'album': + # metadata['ALBUM_MATCH'] = tokens[2] + #elif tokens[0] == 'minor_version': + # metadata['EPISODE_MATCH'] = tokens[2] + + metadata[tokens[0]] = tokens[2] + #else: + # print("Not valid metadata: ", line) + return(metadata) + + +# write metadata to an audio or video file. Assumes that it can call ffmpg in the path. This dependency should be fixed. +def write_metadata(local_file, metadata_feed, metadata_file): + update_needed = 0 + cmd_line = ['ffmpeg', '-y', '-loglevel', 'quiet', '-i', local_file] + tmp_file = "TMP_" + local_file # note, for ffmpeg this needs to be the same extention + + # Which metadata do we have? + if not 'TITLE_MATCH' in metadata_file: + print("Adding Title: " + metadata_feed['title']) + update_needed = 1 + cmd_line.extend(['-metadata', "title=" + metadata_feed['title']]) + else: + print("Title already exists") + + if not 'DESCRIPTION_MATCH' in metadata_file: + print("Adding Description: " + metadata_feed['description']) + update_needed = 1 + cmd_line.extend(['-metadata', "description=" + metadata_feed['description']]) + else: + print("Description already exists") + + if update_needed: + print("Updating Metadata on " + local_file) + + cmd_line_mapping = ['-map', '0', '-codec', 'copy'] + cmd_line_end = [tmp_file] + + print("Command line: " + ' '.join(cmd_line + cmd_line_mapping + cmd_line_end)) + try: + rtn = call(cmd_line + cmd_line_mapping + cmd_line_end) + if rtn == 0: + os.rename(tmp_file, local_file) + else: + # I have some podcasts that seem to have extra streams in them. I found this on Apple Byte podcast which has RTP hit streams. + #print >>sys.stderr, "Child returned", rtn + print("Trying to copy just one stream of audio and video") + cmd_line_mapping = ['-codec', 'copy'] + rtn = call(cmd_line + cmd_line_mapping + cmd_line_end) + if rtn != 0: + print("Copy Failed") + if os.path.exists(tmp_file): + os.remove(tmp_file) + return rtn + else: + os.rename(tmp_file, local_file) + except OSError as e: + print >>sys.stderr, "Execution failed:", e + return 0 + else: + print("File already has title and description, no need to update the file") + return 1 + + +if __name__ == "__main__": + main(sys.argv[1:]) + From 4a6ad7cb445b9b00ad9da5a78afd0d145e8ba8fd Mon Sep 17 00:00:00 2001 From: icepic0 Date: Sun, 2 Aug 2015 19:57:34 -0400 Subject: [PATCH 05/12] updated readme updated readme file and change history in PodGrab.py --- PodGrab.py | 2 ++ README | 2 ++ 2 files changed, 4 insertions(+) diff --git a/PodGrab.py b/PodGrab.py index 8f576af..4a331f2 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -14,6 +14,8 @@ # Version: 1.1.4 - 07/31/2015 # - added command line switches for db location, download location, plex configuration, M3U creation # - changed mkdir to mkdirs +# Version 1.1.5 - 8/2/2015 +# - added option to populate missing metadata in the mp3/mp4 file from the information in the feed. # David Smith # Do with this code what you will, it's "open source". As a courtesy, diff --git a/README b/README index 4bb88bf..6591c6b 100644 --- a/README +++ b/README @@ -43,3 +43,5 @@ Changes after fork: - Added option to output file names in an Season/Year Episode/Month+Day Title of Episode format - Added command line switches for db location, download location, plex configuration, M3U creation - Changed mkdir to mkdirs to deal with creating multiple levels of directories + - added option to populate missing metadata in the mp3/mp4 file from the information in the feed. + From 0f357c4f2043767eac4c9299ac3963a6b14ccbf2 Mon Sep 17 00:00:00 2001 From: icepic0 Date: Mon, 3 Aug 2015 19:45:00 -0400 Subject: [PATCH 06/12] fixed max downloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit you have to cast to int or your input is a string…and so it is bigger than any int. This should fix max downloads not being followed if input on the command line. --- PodGrab.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PodGrab.py b/PodGrab.py index 4a331f2..36feb9e 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -146,7 +146,7 @@ def main(argv): db_path = arguments.db_path if arguments.max_downloads: - NUM_MAX_DOWNLOADS = arguments.max_downloads + NUM_MAX_DOWNLOADS = int(arguments.max_downloads) print("Max items per podcast is " + str(NUM_MAX_DOWNLOADS)) if arguments.plex_naming: From 3776759978a6550f3ef380a7f7446a008624a85f Mon Sep 17 00:00:00 2001 From: icepic0 Date: Fri, 7 Aug 2015 17:43:32 -0400 Subject: [PATCH 07/12] Minor: made printing subscriptions prettier --- .gitignore | 2 -- PodGrab.py | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 85a53a6..e65c467 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,3 @@ PodGrab.db *.m3u - -test diff --git a/PodGrab.py b/PodGrab.py index 36feb9e..f867327 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -852,9 +852,9 @@ def list_subscriptions(cur, conn): try: result = cur.execute('SELECT * FROM subscriptions') for sub in result: - print("Name:\t\t", sub[0]) - print("Feed:\t\t", sub[1]) - print("Last Ep:\t", sub[2], "\n") + print("Name:\t\t" + sub[0]) + print("Feed:\t\t" + sub[1]) + print("Last Ep:\t" + sub[2] + "\n") count += 1 print(str(count) + " subscriptions present") except sqlite3.OperationalError: From 030d4695629e089b6d41331c06a6a278c2e4981a Mon Sep 17 00:00:00 2001 From: icepic0 Date: Sat, 8 Aug 2015 14:48:18 -0400 Subject: [PATCH 08/12] changed error to exit when the error is encountered --- PodGrab.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index f867327..e8e33d8 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -167,6 +167,8 @@ def main(argv): if not data: error_string = "Not a valid XML file or URL feed!" has_error = 1 + exit_clean(error_string, 1) + else: print("XML data source opened\n") mode = MODE_SUBSCRIBE @@ -177,6 +179,7 @@ def main(argv): if not data: error_string = "Not a valid XML file or URL feed!" has_error = 1 + exit_clean(error_string, 1) else: print("XML data source opened\n") mode = MODE_DOWNLOAD @@ -212,16 +215,19 @@ def main(argv): else: error_string = "No Arguments supplied - for usage run 'PodGrab.py -h'" has_error = 1 + exit_clean(error_string, 1) print("Default encoding: " + sys.getdefaultencoding()) todays_date = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) print("Current Directory: " + current_directory) +# Database Check/Create if does_database_exist(current_directory): connection = connect_database(current_directory) if not connection: error_string = "Could not connect to PodGrab database file!" has_error = 1 + exit_clean(error_string, 1) else: cursor = connection.cursor() else: @@ -230,12 +236,15 @@ def main(argv): if not connection: error_string = "Could not create PodGrab database file!" has_error = 1 + exit_clean(error_string, 1) else: print("PodGrab database created") cursor = connection.cursor() setup_database(cursor, connection) print("Database setup complete") + +# Download Directory if not os.path.exists(DOWNLOAD_DIRECTORY): print("Podcast download directory is missing. Creating...") try: @@ -244,6 +253,7 @@ def main(argv): except OSError: error_string = "Could not create podcast download sub-directory!" has_error = 1 + exit_clean(error_string, 1) else: print("Download directory exists: '" + DOWNLOAD_DIRECTORY + "'" ) if not has_error: @@ -297,10 +307,19 @@ def main(argv): elif mode == MODE_IMPORT: import_opml_file(cursor, connection, current_directory, DOWNLOAD_DIRECTORY, import_file_name) else: - print("Sorry, there was some sort of error: '" + error_string + "'\nExiting...\n") - if connection: - connection.close() - + #print("Sorry, there was some sort of error: '" + error_string + "'\nExiting...\n") + #if connection: + # connection.close() + exit_clean(error_string, 1) +# +# End of main() +# + +def exit_clean(error_string, error_code): + print("Sorry, there was some sort of error: '" + error_string + "'\nExiting...\n") + #if connection: + # connection.close() + sys.exit(error_code) def open_datasource(xml_url): try: From 386109a04b7773dc29ee70652bb8f6bd67961942 Mon Sep 17 00:00:00 2001 From: icepic0 Date: Sat, 8 Aug 2015 15:54:21 -0400 Subject: [PATCH 09/12] opml bug fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit opml import could import a entry with a null feed. Fixed to make sure feed names start with “http” Added import re. --- PodGrab.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index e8e33d8..9913f2d 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -45,6 +45,7 @@ import traceback import unicodedata from subprocess import Popen, PIPE, call +import re MODE_NONE = 70 @@ -256,6 +257,8 @@ def main(argv): exit_clean(error_string, 1) else: print("Download directory exists: '" + DOWNLOAD_DIRECTORY + "'" ) + +# Main execution if not has_error: if mode == MODE_UNSUBSCRIBE: feed_name = get_name_from_feed(cursor, connection, feed_url) @@ -386,10 +389,17 @@ def import_opml_file(cur, conn, cur_dir, download_dir, import_file): item_feed = item.getAttribute('xmlUrl').encode('utf-8') item_name = item.getAttribute('title').encode('utf-8') item_name = clean_string(item_name) - print("Subscription Title: " + item_name) - print("Subscription Feed: " + item_feed) + + print("Subscription Title: " + '"' + item_name + '"') + print("Subscription Feed: " + '"' + item_feed + '"') item_directory = download_dir + os.sep + item_name + # check in case the feed isn't real + # youtube opml files have a header that triggers this + if not item_feed or not re.match(r'^http', item_feed): + print("Feed not valid, ignoring") + continue + if not os.path.exists(item_directory): os.makedirs(item_directory) if not does_sub_exist(cur, conn, item_feed): From 6a3405abe26c5294a8643a11571638c43d03cfa2 Mon Sep 17 00:00:00 2001 From: icepic0 Date: Sun, 9 Aug 2015 11:57:18 -0400 Subject: [PATCH 10/12] Minor - quote to bracket updated quotes around URL to brackets to fix clickabality of the URLs in logs. --- PodGrab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 9913f2d..9f0be95 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -283,10 +283,10 @@ def main(argv): for sub in subs: feed_name = sub[0] feed_url = sub[1] - print("Feed for subscription: '" + feed_name + "' from '" + feed_url + "' is updating...") + print("Feed for subscription: '" + feed_name + "' from <" + feed_url + "> is updating...") data = open_datasource(feed_url) if not data: - print("'" + feed_url + "' for '" + feed_name + "' is not a valid feed URL!") + print("<" + feed_url + "> for '" + feed_name + "' is not a valid feed URL!") else: message = iterate_feed(data, mode, DOWNLOAD_DIRECTORY, todays_date, cursor, connection, feed_url) print(message) From d93eba91c57358829a5f6d0f55ab77304d5d9f16 Mon Sep 17 00:00:00 2001 From: icepic0 Date: Wed, 2 Sep 2015 19:19:52 -0400 Subject: [PATCH 11/12] minor: added line breaks to XML error messages --- PodGrab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 9f0be95..d42881f 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -451,11 +451,11 @@ def iterate_feed(data, mode, download_dir, today, cur, conn, feed): message += str(num_podcasts) + " have been downloaded from your subscription: '" + channel_title + "'\n" except xml.parsers.expat.ExpatError: print("ERROR - Malformed XML syntax in feed. Skipping...") - message += "0 podcasts have been downloaded from this feed due to RSS syntax problems. Please try again later" + message += "0 podcasts have been downloaded from " + channel_title + " due to RSS syntax problems. Please try again later\n" except UnicodeEncodeError as e: print(e) print("ERROR - Unicode encoding error in string. Cannot convert to ASCII. Skipping...") - message += "0 podcasts have been downloaded from this feed due to RSS syntax problems. Please try again later" + message += "0 podcasts have been downloaded from " + channel_title + " due to RSS syntax problems. Please try again later\n" return message From 77a6362fc2ddb90db7c03f9bf4771c2f4e5f9b91 Mon Sep 17 00:00:00 2001 From: icepic0 Date: Wed, 2 Sep 2015 19:21:09 -0400 Subject: [PATCH 12/12] feedparser version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Feed parser understands atom and other RSS versions….but it isn’t very helpful as atom and RSS inclusions are presented differently. Needs some work. --- PodGrab-fp.py | 926 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 926 insertions(+) create mode 100755 PodGrab-fp.py diff --git a/PodGrab-fp.py b/PodGrab-fp.py new file mode 100755 index 0000000..9a5d10b --- /dev/null +++ b/PodGrab-fp.py @@ -0,0 +1,926 @@ +#!/usr/bin/env python + +# PodGrab - A Python command line audio/video podcast downloader for RSS XML feeds. +# Supported RSS item file types: MP3, M4V, OGG, FLV, MP4, MPG/MPEG, WMA, WMV, WEBM +# Version: 1.1.2 - 06/10/2011 +# Jonathan Baker +# jon@the-node.org (http://the-node.org) + +# Version: 1.1.3 - +# - added small changes to write M3U file of podcasts downloaded today +# Werner Avenant +# werner.avenant@gmail.com (http://www.collectiveminds.co.za) + +# Version: 1.1.4 - 07/31/2015 +# - added command line switches for db location, download location, plex configuration, M3U creation +# - changed mkdir to mkdirs +# Version 1.1.5 - 8/2/2015 +# - added option to populate missing metadata in the mp3/mp4 file from the information in the feed. +# David Smith + +# Version 1.1.5 - 8/14/2015 +# - fixed bug in OPML reader that would input a feed with no href, making an entry that stopped processing and could not be removed. +# - converted from xml.dom.minidom to feedparser +# David Smith + +# Do with this code what you will, it's "open source". As a courtesy, +# I would appreciate credit if you base your code on mine. If you find +# a bug or think the code sucks balls, please let me know :-) + +# Outstanding issues:- +# - Video podcasts which which are not direct URLs and are modified by PodGrab +# in order to be grabbed won't display their size as the filenames haven't +# been stripped of their garbage URL info yet. It'll say 0 bytes, but don't +# worry, they've downloaded. + + +from __future__ import unicode_literals +import os +import sys +import argparse +import urllib2 +import xml.dom.minidom +import feedparser # Feedparser is a non-standard library, see https://pypi.python.org/pypi/feedparser and https://pythonhosted.org/feedparser for information +import datetime +from time import gmtime, strftime, strptime, mktime +import sqlite3 +import shutil +import smtplib +from email.mime.text import MIMEText +import platform +import traceback +import unicodedata +from subprocess import Popen, PIPE, call +import re + + +MODE_NONE = 70 +MODE_SUBSCRIBE = 71 +MODE_DOWNLOAD = 72 +MODE_UNSUBSCRIBE = 73 +MODE_LIST = 74 +MODE_UPDATE = 75 +MODE_MAIL_ADD = 76 +MODE_MAIL_DELETE = 77 +MODE_MAIL_LIST = 78 +MODE_EXPORT = 79 +MODE_IMPORT = 80 + +NUM_MAX_DOWNLOADS = 4 +PLEX_NAMING = 0 +CREATE_M3U = 0 +UPDATE_METADATA = 0 + +DOWNLOAD_DIRECTORY = "podcasts" +#DOWNLOAD_DIRECTORY = os.path.realpath("/home/hrehfeld/host/d/download/podcasts_podgrab") + +# Added 2011-10-06 Werner Avenant - added current_dictory here so it can be global +current_directory = '' +m3u_file = '' + +total_item = 0 +total_size = 0 +has_error = 0 + + +def main(argv): + mode = MODE_NONE + has_error = 0 + num_podcasts = 0 + error_string = "" + feed_url = "" + feed_name = "" + mail_address = "" + message = "" + mail = "" + # Added 2011-10-06 Werner Avenant + global current_directory + global m3u_file + now = datetime.datetime.now(); + m3u_file = str(now)[:10] + '.m3u' + current_directory = os.path.realpath(os.path.dirname(sys.argv[0])) + global db_name + global db_path + db_name = "PodGrab.db" + db_path=current_directory + + global UPDATE_METADATA + global DOWNLOAD_DIRECTORY + global NUM_MAX_DOWNLOADS + global PLEX_NAMING + global CREATE_M3U + global total_items + global total_size + total_items = 0 + total_size = 0 + data = "" + + + parser = argparse.ArgumentParser(description='A command line Podcast downloader for RSS XML feeds') + parser.add_argument('-s', '--subscribe', action="store", dest="sub_feed_url", help='Subscribe to the following XML feed and download latest podcast') + parser.add_argument('-d', '--download', action="store", dest="dl_feed_url", help='Bulk download all podcasts in the following XML feed or file') + parser.add_argument('-un', '--unsubscribe', action="store", dest="unsub_url", help='Unsubscribe from the following Podcast feed') + parser.add_argument('-ma', '--mail-add', action="store", dest="mail_address_add", help='Add a mail address to mail subscription updates to') + parser.add_argument('-md', '--mail-delete', action="store", dest="mail_address_delete", help='Delete a mail address') + + parser.add_argument('-l', '--list', action="store_const", const="ALL", dest="list_subs", help='Lists current Podcast subscriptions') + parser.add_argument('-u', '--update', action="store_const", const="UPDATE", dest="update_subs", help='Updates all current Podcast subscriptions') + parser.add_argument('-ml', '--mail-list', action="store_const", const="MAIL", dest="list_mail", help='Lists all current mail addresses') + + parser.add_argument('-io', '--import', action="store", dest="opml_import", help='Import subscriptions from OPML file') + parser.add_argument('-eo', '--export', action="store_const", const="OPML_EXPORT", dest="opml_export", help='Export subscriptions to OPML file') + + parser.add_argument('-pn', '--plex-naming', action="store_true", dest="plex_naming", help='Name files with Season=Year and Epsiode=Month+Day') + parser.add_argument('-max', '--max-downloads', action="store", dest="max_downloads", help='Max number of podcasts to download') + parser.add_argument('-dir', '--download-directory', action="store", dest="download_directory", help='Directory to store podcasts in') + parser.add_argument('-db', '--db_path', action="store", dest="db_path", help='Location of the PodGrab.db file') + parser.add_argument('-m3u', '--create-m3u', action="store_true", dest="create_m3u", help='Create m3u files for playlists') + parser.add_argument('-um', '--update_metadata', action="store_true", dest="update_metadata", help='Use ffmpeg to update metadata with the title and description from the feed') + + + arguments = parser.parse_args() + + if arguments.update_metadata: + print("Metadata will be updated") + UPDATE_METADATA = 1 + else: + print("Metadata will be left alone") + + if arguments.download_directory: + DOWNLOAD_DIRECTORY = arguments.download_directory + + if arguments.db_path: + db_path = arguments.db_path + + if arguments.max_downloads: + NUM_MAX_DOWNLOADS = int(arguments.max_downloads) + print("Max items per podcast is " + str(NUM_MAX_DOWNLOADS)) + + if arguments.plex_naming: + print("PLEX naming is on") + PLEX_NAMING = 1 + else: + print("PLEX naming is off") + + if arguments.create_m3u: + print("M3U files will be created") + CREATE_M3U = 1 + else: + print("M3U files will not created") + + if arguments.sub_feed_url: + feed_url = arguments.sub_feed_url + data = open_datasource(feed_url) + if not data: + error_string = "Not a valid XML file or URL feed!" + has_error = 1 + exit_clean(error_string, 1) + else: + print("XML data source opened\n") + mode = MODE_SUBSCRIBE + + elif arguments.dl_feed_url: + feed_url = arguments.dl_feed_url + data = open_datasource(feed_url) + if not data: + error_string = "Not a valid XML file or URL feed!" + has_error = 1 + exit_clean(error_string, 1) + else: + print("XML data source opened\n") + mode = MODE_DOWNLOAD + + elif arguments.unsub_url: + feed_url = arguments.unsub_url + mode = MODE_UNSUBSCRIBE + + elif arguments.list_subs: + mode = MODE_LIST + + elif arguments.update_subs: + mode = MODE_UPDATE + + elif arguments.mail_address_add: + mail_address = arguments.mail_address_add + mode = MODE_MAIL_ADD + + elif arguments.mail_address_delete: + mail_address = arguments.mail_address_delete + mode = MODE_MAIL_DELETE + + elif arguments.list_mail: + mode = MODE_MAIL_LIST + + elif arguments.opml_import: + import_file_name = arguments.opml_import + mode = MODE_IMPORT + + elif arguments.opml_export: + mode = MODE_EXPORT + + else: + error_string = "No Arguments supplied - for usage run 'PodGrab.py -h'" + has_error = 1 + exit_clean(error_string, 1) + + print("Default encoding: " + sys.getdefaultencoding()) + todays_date = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) + print("Current Directory: " + current_directory) + +# Database Check/Create + if does_database_exist(current_directory): + db_connection = connect_database(current_directory) + if not db_connection: + error_string = "Could not connect to PodGrab database file!" + has_error = 1 + exit_clean(error_string, 1) + else: + db_cursor = db_connection.cursor() + else: + print("PodGrab database missing. Creating...") + db_connection = connect_database(current_directory) + if not db_connection: + error_string = "Could not create PodGrab database file!" + has_error = 1 + exit_clean(error_string, 1) + else: + print("PodGrab database created") + db_cursor = db_connection.cursor() + setup_database(db_cursor, db_connection) + print("Database setup complete") + +# Download Directory + if not os.path.exists(DOWNLOAD_DIRECTORY): + print("Podcast download directory is missing. Creating...") + try: + os.makedirs(DOWNLOAD_DIRECTORY) + print("Download directory '" + DOWNLOAD_DIRECTORY + "' created") + except OSError: + error_string = "Could not create podcast download sub-directory!" + has_error = 1 + exit_clean(error_string, 1) + else: + print("Download directory exists: '" + DOWNLOAD_DIRECTORY + "'" ) + +# Main execution + if not has_error: + if mode == MODE_UNSUBSCRIBE: + feed_name = get_name_from_feed(db_cursor, db_connection, feed_url) + if feed_name == "None": + print("Feed does not exist in the database! Skipping...") + else: + feed_name = clean_string(feed_name) + channel_directory = DOWNLOAD_DIRECTORY + os.sep + feed_name + print("Deleting '" + channel_directory + "'...") + delete_subscription(db_cursor, db_connection, feed_url) + try : + shutil.rmtree(channel_directory) + except OSError: + print("Subscription directory has not been found - it might have been manually deleted" ) + print("Subscription '" + feed_name + "' removed") + elif mode == MODE_LIST: + print("Listing current podcast subscriptions...\n") + list_subscriptions(db_cursor, db_connection) + elif mode == MODE_UPDATE: + print("Updating all podcast subscriptions...") + subs = get_subscriptions(db_cursor, db_connection) + for sub in subs: + feed_name = sub[0] + feed_url = sub[1] + print("Feed for subscription: '" + feed_name + "' from <" + feed_url + "> is updating...") + data = open_datasource(feed_url) + if not data: + print("<" + feed_url + "> for '" + feed_name + "' is not a valid feed URL!") + else: + message = iterate_feed(data, mode, DOWNLOAD_DIRECTORY, todays_date, db_cursor, db_connection, feed_url) + print(message) + mail += message + mail = mail + "\n\n" + str(total_items) + " podcasts totalling " + str(total_size) + " bytes have been downloaded." + if has_mail_users(db_cursor, db_connection): + print("Have e-mail address(es) - attempting e-mail...") + mail_updates(db_cursor, db_connection, mail, str(total_items)) + elif mode == MODE_DOWNLOAD or mode == MODE_SUBSCRIBE: + print(iterate_feed(data, mode, DOWNLOAD_DIRECTORY, todays_date, db_cursor, db_connection, feed_url)) + elif mode == MODE_MAIL_ADD: + add_mail_user(db_cursor, db_connection, mail_address) + print("E-Mail address: " + mail_address + " has been added") + elif mode == MODE_MAIL_DELETE: + delete_mail_user(db_cursor, db_connection, mail_address) + print("E-Mail address: " + mailAddress + " has been deleted") + elif mode == MODE_MAIL_LIST: + list_mail_addresses(db_cursor, db_connection) + elif mode == MODE_EXPORT: + export_opml_file(db_cursor, db_connection, current_directory) + elif mode == MODE_IMPORT: + import_opml_file(db_cursor, db_connection, current_directory, DOWNLOAD_DIRECTORY, import_file_name) + else: + #print("Sorry, there was some sort of error: '" + error_string + "'\nExiting...\n") + #if db_connection: + # db_connection.close() + exit_clean(error_string, 1) +# +# End of main() +# + +def exit_clean(error_string, error_code): + print("Sorry, there was some sort of error: '" + error_string + "'\nExiting...\n") + #if db_connection: + # db_connection.close() + sys.exit(error_code) + + +def open_datasource(xml_url): + try: + response = urllib2.urlopen(xml_url) + except ValueError: + try: + response = open(xml_url,'r') + except ValueError: + print("ERROR - Invalid feed!") + response = False + except urllib2.URLError: + print("ERROR - Connection problems. Please try again later") + response = False + except httplib.IncompleteRead: + print("ERROR - Incomplete data read. Please try again later") + response = False + if response != False: + return response.read() + else: + return response + + +def export_opml_file(cur, conn, cur_dir): + item_count = 0 + feed_name = "" + feed_url = "" + last_ep = "" + now = datetime.datetime.now() + file_name = cur_dir + os.sep + "podgrab_subscriptions-" + str(now.year) + "-" + str(now.month) + "-" + str(now.day) + ".opml" + subs = get_subscriptions(cur, conn) + file_handle = open(file_name,"w") + print("Exporting RSS subscriptions database to: '" + file_name + "' OPML file...please wait.\n") + header = "\n\n\tPodGrab Subscriptions\n\n\n" + file_handle.writelines(header) + for sub in subs: + feed_name = sub[0] + feed_url = sub[1] + last_ep = sub[2] + file_handle.writelines("\t\n") + print("Exporting subscription '" + feed_name + "'...Done.\n") + item_count = item_count + 1 + footer = "\n" + file_handle.writelines(footer) + file_handle.close() + print(str(item_count) + " item(s) exported to: '" + file_name + "'. COMPLETE") + + +def import_opml_file(cur, conn, cur_dir, download_dir, import_file): + count = 0 + print("Importing OPML file '" + import_file + "'...") + if import_file.startswith("/") or import_file.startswith(".."): + data = open_datasource(import_file) + if not data: + print("ERROR = Could not open OPML file '" + import_file + "'") + else: + data = open_datasource(cur_dir + os.sep + import_file) + if not data: + print("ERROR - Could not open OPML file '" + cur_dir + os.sep + import_file + "'") + if data: + print("File opened...please wait") + try: + xml_data = xml.dom.minidom.parseString(data) + items = xml_data.getElementsByTagName('outline') + for item in items: + item_feed = item.getAttribute('xmlUrl').encode('utf-8') + item_name = item.getAttribute('title').encode('utf-8') + item_name = clean_string(item_name) + + print("Subscription Title: " + '"' + item_name + '"') + print("Subscription Feed: " + '"' + item_feed + '"') + item_directory = download_dir + os.sep + item_name + + # check in case the feed isn't real + # youtube opml files have a header that triggers this + if not item_feed or not re.match(r'^http', item_feed): + print("Feed not valid, ignoring") + continue + + if not os.path.exists(item_directory): + os.makedirs(item_directory) + if not does_sub_exist(cur, conn, item_feed): + insert_subscription(cur, conn, item_name, item_feed) + count = count + 1 + else: + print("This subscription is already present in the database. Skipping...") + print("\n") + print("\nA total of " + str(count) + " subscriptions have been added from OPML file: '" + import_file + "'") + print("These will be updated on the next update run.\n") + except xml.parsers.expat.ExpatError: + print("ERROR - Malformed XML syntax in feed. Skipping...") + + +def iterate_feed(data, mode, download_dir, today, cur, conn, feed): + print("Iterating feed...") + message = "" + try: + xml_data = feedparser.parse(data) + #if f.bozo: + #raise f.bozo_exception + channel_title = xml_data.feed.title + channel_link = xml_data.feed.link + print("Channel Title: === " + channel_title + " ===") + print("Channel Link: " + channel_link) + channel_title = clean_string(channel_title) + + channel_directory = download_dir + os.sep + channel_title + if not os.path.exists(channel_directory): + os.makedirs(channel_directory) + print("Current Date: " + today) + if mode == MODE_DOWNLOAD: + print("Bulk download. Processing...") + # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later + num_podcasts = iterate_channel(xml_data, today, mode, cur, conn, feed, channel_title) + print("\n" + num_podcasts + "have been downloaded") + elif mode == MODE_SUBSCRIBE: + print("Feed to subscribe to: " + feed + ".\nChecking for database duplicate...") + if not does_sub_exist(cur, conn, feed): + print("Subscribe.\nProcessing...") + # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later + num_podcasts = iterate_channel(xml_data, today, mode, cur, conn, feed, channel_title) + + print("\n" + num_podcasts + "have been downloaded from your subscription") + else: + print("Subscription already exists! Skipping...") + elif mode == MODE_UPDATE: + print("Updating RSS feeds. Processing...") + num_podcasts = iterate_channel(xml_data, today, mode, cur, conn, feed, channel_title) + message += str(num_podcasts) + " have been downloaded from your subscription: '" + channel_title + "'\n" + except Exception, e: + print("ERROR - Malformed XML syntax in feed. Skipping...") + print("ERROR - " + str(e)) + message += "0 podcasts have been found from this feed due to RSS syntax problems. Please try again later" + return message + + +def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): + global total_items + global total_size + num = 0 + saved = 0 + size = 0 + last_ep = "NULL" + print("Iterating channel...") + + if does_sub_exist(cur, conn, feed): + print("Podcast subscription exists") + + else: + print("Podcast subscription is new - getting previous podcast") + insert_subscription(cur, conn, chan.feed.title, feed) + + last_ep = get_last_subscription_downloaded(cur, conn, feed) + + ### NB NB - The logic here is that we get the "last_ep" before we enter the loop + ### The result is that it allows the code to "catch up" on missed episodes because + ### we never update the "last_ep" while inside the loop. + + for item in chan.entries: + try: + item_title = item.title + item_desc = item.description + item_date = item.published + struct_time_item = item.published_parsed + + item_file = item.enclosures[0].href + item_size = item.enclosures[0].length + item_type = item.enclosures[0].type + + struct_time_today = strptime(today, "%a, %d %b %Y %H:%M:%S") + + #item_title = item_title.strip() + #item_desc = item_desc.strip() + metadata_feed = dict() + metadata_feed['title'] = item_title + metadata_feed['description'] = item_desc + metadata_feed['date'] = item_date + metadata_feed['file'] = item_file + metadata_feed['size'] = item_size + metadata_feed['type'] = item_type + + has_error = 0 +# try: +# struct_time_item = strptime(fix_date(item_date), "%a, %d %b %Y %H:%M:%S") +# except TypeError: +# has_error = 1 +# except ValueError: +# has_error = 1 + + try: + struct_last_ep = strptime(last_ep, "%a, %d %b %Y %H:%M:%S") + except TypeError: + has_error = 1 + print("This item has a badly formatted date. Cannot download!") + except ValueError: + has_error = 1 + print("This item has a badly formatted date. Cannot download!") + + if not has_error: + if mktime(struct_time_item) > mktime(struct_last_ep) or mode == MODE_DOWNLOAD: + saved = write_podcast(item_file, channel_title, item_date, item_type, item_title, metadata_feed) + + if saved == 'File Exists': + print("File Existed - updating local database's Last Episode") + update_subscription(cur, conn, feed, fix_date(item_date)) + + if saved == 'Successful Write': + print("\nTitle: " + item_title) + print("Description: " + item_desc) + print("Date: " + item_date) + print("File: " + item_file) + print("Size: " + item_size + " bytes") + print("Type: " + item_type) + update_subscription(cur, conn, feed, fix_date(item_date)) + num += 1 + if len(item_size): + size = size + int(item_size) + total_size += size + total_items += 1 + + if (mode == MODE_SUBSCRIBE): # In subscribe mode we only want 1 this loop to execute once + break; + + if (num >= NUM_MAX_DOWNLOADS): + print("Maximum session download of " + str(NUM_MAX_DOWNLOADS) + " podcasts has been reached. Exiting.") + break + else: + print("According to database we already have the episode dated " + item_date) + break + + except IndexError as e: + #traceback.print_exc() + print("This RSS item has no downloadable URL link for the podcast for '" + item_title + "'. Skipping...") + + return(str(num) + " podcast(s) totalling " + str(size) + " byte(s)") + + +def clean_string(str): + new_string = str + if new_string.startswith("-"): + new_string = new_string.lstrip("-") + if new_string.endswith("-"): + new_string = new_string.rstrip("-") + new_string_final = '' + for c in new_string: + if c.isalnum() or c == "-" or c == "_" or c == "." or c.isspace(): + new_string_final = new_string_final + ''.join(c) + new_string_final = new_string_final.replace(' ','_') + new_string_final = new_string_final.replace('---','-') + new_string_final = new_string_final.replace('--','-') + new_string_final = new_string_final.strip() + + return new_string_final + +# Change 2011-10-06 - Changed chan_loc to channel_title to help with relative path names +# in the m3u file +def write_podcast(item, channel_title, date, type, title, metadata_feed): + (item_path, item_file_name) = os.path.split(item) + plex_info = "" + item_save_name = item_file_name + + # Added name and season to the saved file name based on the date released. This is compatible with Plex TV inputs. + if PLEX_NAMING: + struct_time_item = datetime.datetime.strptime(fix_date(date), "%a, %d %b %Y %H:%M:%S") + plex_info = channel_title + "." + struct_time_item.strftime("S%YE%m%d") + "." + item_save_name = plex_info + title + + if len(item_save_name) > 50: + item_save_name = item_save_name[:50] + + local_file = DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + clean_string(item_save_name) + + local_file = fix_file_extention(type, local_file) + + # Check if file exists, but if the file size is zero (which happens when the user + # presses Crtl-C during a download) - the the code should go ahead and download + # as if the file didn't exist + if os.path.exists(local_file) and os.path.getsize(local_file) != 0: + return 'File Exists' + else: + print("\nDownloading " + item_file_name + " as \"" + clean_string(item_save_name) + "\"" + " which was published on " + date) + try: + req = urllib2.urlopen(item) + CHUNK = 16 * 1024 + with open(local_file, 'wb') as fp: + while True: + chunk = req.read(CHUNK) + if not chunk: break + fp.write(chunk) + + item_file_name = os.path.basename(fp.name) + print("Podcast: " + item + " downloaded to: " + local_file) + + # 2011-11-06 Append to m3u file + if CREATE_M3U: + print("Creating M3U file in " + DOWNLOAD_DIRECTORY + os.sep + m3u_file) + output = open(DOWNLOAD_DIRECTORY + os.sep + m3u_file, 'a') + output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + item_file_name + "\n") + output.close() + + # add missing metadata in the file to match metadata in the feed + if UPDATE_METADATA: + metadata_file = read_metadata(local_file) + if metadata_file: + for key in sorted(iter(metadata_file)): + print("Existing Metadata: " + key + "=" + metadata_file[key]) + metadata_write = write_metadata(local_file, metadata_feed, metadata_file) + return 'Successful Write' + except urllib2.URLError as e: + print("ERROR - Could not write item to file: " + e) + return 'Write Error' + + +# Fix any odd file endings +def fix_file_extention(type, local_file): + if type == "video/quicktime" or type == "audio/mp4" or type == "video/mp4": + if not local_file.endswith(".mp4"): + local_file = local_file + ".mp4" + elif type == "video/mpeg": + if not local_file.endswith(".mpg"): + local_file = local_file + ".mpg" + elif type == "video/x-flv": + if not local_file.endswith(".flv"): + local_file = local_file + ".flv" + elif type == "video/x-ms-wmv": + if not local_file.endswith(".wmv"): + local_file = local_file + ".wmv" + elif type == "video/webm" or type == "audio/webm": + if not local_file.endswith(".webm"): + local_file = local_file + ".webm" + elif type == "audio/mpeg": + if not local_file.endswith(".mp3"): + local_file = local_file + ".mp3" + elif type == "audio/ogg" or type == "video/ogg" or type == "audio/vorbis": + if not local_file.endswith(".ogg"): + local_file = local_file + ".ogg" + elif type == "audio/x-ms-wma" or type == "audio/x-ms-wax": + if not local_file.endswith(".wma"): + local_file = local_file + ".wma" + return(local_file) + + +# read metadata from an audio or video file. Assumes that it can call ffmpg in the path. This dependency should be fixed. +# I've only tested with mp4 video files and mp3 audio files. +def read_metadata(local_file): + metadata = metadata_feed = dict() + #print("\nReading file: " + local_file) + if not os.path.exists(local_file): + print("File not found for metadata update") + return 1 + + cmd_line = ['ffmpeg', '-loglevel', 'quiet', '-i', local_file, '-f', 'ffmetadata', '-'] + + try: + process = Popen(cmd_line, stdout=PIPE, stderr=PIPE) # I'm not sure if I want to do anything with stderr yet + stdout, stderr = process.communicate() + except OSError as e: + print >>sys.stderr, "FFMPEG Failed, aborting metadata updates:", e + return 0 + for line in stdout.splitlines(): + line.rstrip() + tokens = line.partition('=') + if tokens[2]: + #print("DATA: " + tokens[0] + " = " + tokens[2]) + if tokens[0] == 'title': + metadata['TITLE_MATCH'] = tokens[2] + elif tokens[0] == 'description' or tokens[0] == 'TDES': + metadata['DESCRIPTION_MATCH'] = tokens[2] + #elif tokens[0] == 'album': + # metadata['ALBUM_MATCH'] = tokens[2] + #elif tokens[0] == 'minor_version': + # metadata['EPISODE_MATCH'] = tokens[2] + + metadata[tokens[0]] = tokens[2] + #else: + # print("Not valid metadata: ", line) + + return(metadata) + + +# write metadata to an audio or video file. Assumes that it can call ffmpg in the path. This dependency should be fixed. +def write_metadata(local_file, metadata_feed, metadata_file): + update_needed = 0 + cmd_line = ['ffmpeg', '-y', '-loglevel', 'quiet', '-i', local_file] + (item_path, item_file_name) = os.path.split(local_file) + tmp_file = item_path + os.sep + "TMP_" + item_file_name # note, for ffmpeg this needs to be the same extention + + # Which metadata do we have? + if not 'TITLE_MATCH' in metadata_file: + #print("Adding Title: " + metadata_feed['title']) + update_needed = 1 + cmd_line.extend(['-metadata', "title=" + metadata_feed['title']]) + + if not 'DESCRIPTION_MATCH' in metadata_file: + #print("Adding Description: " + metadata_feed['description']) + update_needed = 1 + cmd_line.extend(['-metadata', "description=" + metadata_feed['description']]) + + if update_needed: + print("Updating Metadata on " + local_file) + + cmd_line_mapping = ['-map', '0', '-codec', 'copy'] + cmd_line_end = [tmp_file] + + try: + rtn = call(cmd_line + cmd_line_mapping + cmd_line_end) + if rtn == 0: + os.rename(tmp_file, local_file) + else: + # I have some podcasts that seem to have extra streams in them. I found this on Apple Byte podcast which has RTP hit streams. + #print >>sys.stderr, "Child returned", rtn + print("Unknown streams found, Trying to copy just one stream of audio and video for metadata") + cmd_line_mapping = ['-codec', 'copy'] + rtn = call(cmd_line + cmd_line_mapping + cmd_line_end) + if rtn != 0: + print("Copy Failed") + if os.path.exists(tmp_file): + os.remove(tmp_file) + return rtn + else: + os.rename(tmp_file, local_file) + except OSError as e: + print >>sys.stderr, "Execution failed:", e + return 1 + else: + print("File already has embedded title and description, no need to update the file") + return 0 + + +def does_database_exist(curr_loc): + if os.path.exists(db_path + os.sep + db_name): + return 1 + else: + return 0 + + +def add_mail_user(cur, conn, address): + row = (address,) + cur.execute('INSERT INTO email(address) VALUES (?)', row) + conn.commit() + + +def delete_mail_user(cur, conn, address): + row = (address,) + cur.execute('DELETE FROM email WHERE address = ?', row) + conn.commit() + + +def get_mail_users(cur, conn): + cur.execute('SELECT address FROM email') + return cur.fetchall() + + +def list_mail_addresses(cur, conn): + cur.execute('SELECT * from email') + result = cur.fetchall() + print("Listing mail addresses...") + for address in result: + print("Address:\t" + address[0]) + + +def has_mail_users(cur, conn): + cur.execute('SELECT COUNT(*) FROM email') + if cur.fetchone() == "0": + return 0 + else: + return 1 + + +def mail_updates(cur, conn, mess, num_updates): + addresses = get_mail_users(cur, conn) + for address in addresses: + try: + subject_line = "PodGrab Update" + if int(num_updates) > 0: + subject_line += " - NEW updates!" + else: + subject_line += " - nothing new..." + mail('localhost', 'podgrab@' + platform.node(), address[0], subject_line, mess) + print("Successfully sent podcast updates e-mail to: " + address[0]) + except smtplib.SMTPException: + traceback.print_exc() + print("Could not send podcast updates e-mail to: " + address[0]) + + +def mail(server_url=None, sender='', to='', subject='', text=''): + headers = "From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (sender, to, subject) + message = headers + text + mail_server = smtplib.SMTP(server_url) + mail_server.sendmail(sender, to, message) + mail_server.quit() + + +def connect_database(curr_loc): + #conn = sqlite3.connect(curr_loc + os.sep + "PodGrab.db") + if not os.path.exists(db_path): + try: + print("Creating dir " + db_path) + os.makedirs(db_path) + except OSError: + error_string = "Could not create podcast database directory!" + return 0 + + conn = sqlite3.connect(db_path + os.sep + db_name) + return conn + + +def setup_database(cur, conn): + cur.execute("CREATE TABLE subscriptions (channel text, feed text, last_ep text)") + cur.execute("CREATE TABLE email (address text)") + conn.commit() + + +def insert_subscription(cur, conn, chan, feed): + chan.replace(' ', '-') + chan.replace('---','-') + row = (chan, feed, "Thu, 01 Jan 1970 00:00:00") # Added a correctly formatted date here so we can avoid an ugly "if date == null" in update_subscription later + cur.execute('INSERT INTO subscriptions(channel, feed, last_ep) VALUES (?, ?, ?)', row) + conn.commit() + + +def fix_date(date): + new_date = "" + split_array = date.split(' ') + for i in range(0,5): + new_date = new_date + split_array[i] + " " + return new_date.rstrip() + + +def does_sub_exist(cur, conn, feed): + row = (feed,) + cur.execute('SELECT COUNT (*) FROM subscriptions WHERE feed = ?', row) + return_string = str(cur.fetchone())[1] + if return_string == "0": + return 0 + else: + return 1 + + +def delete_subscription(cur, conn, url): + row = (url,) + cur.execute('DELETE FROM subscriptions WHERE feed = ?', row) + conn.commit() + + +def get_name_from_feed(cur, conn, url): + row = (url,) + cur.execute('SELECT channel from subscriptions WHERE feed = ?', row) + return_string = cur.fetchone() + try: + return_string = ''.join(return_string) + except TypeError: + return_string = "None" + return str(return_string) + + +def list_subscriptions(cur, conn): + count = 0 + try: + result = cur.execute('SELECT * FROM subscriptions') + for sub in result: + print("Name:\t\t" + sub[0]) + print("Feed:\t\t" + sub[1]) + print("Last Ep:\t" + sub[2] + "\n") + count += 1 + print(str(count) + " subscriptions present") + except sqlite3.OperationalError: + print("There are no current subscriptions or there was an error") + + +def get_subscriptions(cur, conn): + try: + cur.execute('SELECT * FROM subscriptions') + return cur.fetchall() + except sqlite3.OperationalError: + print("There are no current subscriptions") + return null + + +def update_subscription(cur, conn, feed, date): + # Make sure that the date we are trying to write is newer than the last episode + # Presumes that "null" dates will be saved in DB as 1970-01-01 (unix "start" time) + existing_last_ep = get_last_subscription_downloaded(cur, conn, feed) + if mktime(strptime(existing_last_ep, "%a, %d %b %Y %H:%M:%S")) <= mktime(strptime(date, "%a, %d %b %Y %H:%M:%S")): + row = (date, feed) + cur.execute('UPDATE subscriptions SET last_ep = ? where feed = ?', row) + conn.commit() + + +def get_last_subscription_downloaded(cur, conn, feed): + row = (feed,) + cur.execute('SELECT last_ep FROM subscriptions WHERE feed = ?', row) + rec = cur.fetchone() + return rec[0] + +if __name__ == "__main__": + main(sys.argv[1:])