From b8371058e369c9e1803d17ec9e66436f288d28ca Mon Sep 17 00:00:00 2001 From: Huw Date: Sat, 26 Dec 2015 08:16:25 +0000 Subject: [PATCH 01/18] Removed line 580 as its suppressing errors --- PodGrab.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PodGrab.py b/PodGrab.py index 6146836..208a4cd 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -577,7 +577,7 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): try: struct_last_ep = strptime(last_ep, "%a, %d %b %Y %H:%M:%S") - has_error = 0 + # has_error = 0 except TypeError: has_error = 1 print "This item has a badly formatted date. Cannot download!" From 3ff0d2afe8d4400f0e67ea5d0fb04b2b15513b15 Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 27 Dec 2015 21:42:29 +0000 Subject: [PATCH 02/18] urllib.urlretrieve instead of buffering to work on small memory --- PodGrab.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 208a4cd..a71fd5c 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -23,6 +23,7 @@ import os import sys import argparse +import urllib import urllib2 import xml.dom.minidom import datetime @@ -50,7 +51,7 @@ NUM_MAX_DOWNLOADS = 4 -DOWNLOAD_DIRECTORY = "podcasts" +DOWNLOAD_DIRECTORY = "downloads" # Added 2011-10-06 Werner Avenant - added current_dictory here so it can be global current_directory = '' @@ -434,18 +435,19 @@ def write_podcast(item, channel_title, date, type): else: print "\nDownloading " + item_file_name + " which was published on " + date try: - item_file = urllib2.urlopen(item) - output = open(local_file, 'wb') + urllib.urlretrieve(item, local_file) + # item_file = urllib2.urlopen(item) + # output = open(local_file, 'wb') # 2011-10-06 Werner Avenant - For some reason the file name changes when # saved to disk - probably a python feature (sorry, only wrote my first line of python today) - item_file_name = os.path.basename(output.name) - output.write(item_file.read()) - output.close() + # item_file_name = os.path.basename(output.name) + # output.write(item_file.read()) + # output.close() print "Podcast: ", item, " downloaded to: ", local_file # 2011-11-06 Append to m3u file output = open(current_directory + os.sep + m3u_file, 'a') - output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + item_file_name + "\n") + output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + local_file + "\n") output.close() return 'Successful Write' except urllib2.URLError as e: From 182475e1b5feb61072883cf7382d9b94428f702a Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 27 Dec 2015 22:12:31 +0000 Subject: [PATCH 03/18] Fix reverse order feeds & long dates --- PodGrab.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index a71fd5c..253e2ad 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -545,22 +545,33 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): saved = 0 size = 0 last_ep = "NULL" + items = chan.getElementsByTagName('item') print "Iterating channel..." if does_sub_exist(cur, conn, feed): print "Podcast subscription exists" - else: print "Podcast subscription is new - getting previous podcast" insert_subscription(cur, conn, chan.getElementsByTagName('title')[0].firstChild.data, feed) last_ep = get_last_subscription_downloaded(cur, conn, feed) + print "Determining feed order" + try: + firstdate = strptime(fix_date(items[0].getElementsByTagName('pubDate')[0].firstChild.data), "%a, %d %b %Y %H:%M:%S") + lastdate = strptime(fix_date(items[len(items)-1].getElementsByTagName('pubDate')[0].firstChild.data), "%a, %d %b %Y %H:%M:%S") + if mktime(lastdate) > mktime(firstdate): + items = reversed(items) + except TypeError: + error_reversing = 1 + except ValueError: + error_reversing = 1 + ### NB NB - The logic here is that we get the "last_ep" before we enter the loop ### The result is that it allows the code to "catch up" on missed episodes because ### we never update the "last_ep" while inside the loop. - for item in chan.getElementsByTagName('item'): + for item in items: try: item_title = item.getElementsByTagName('title')[0].firstChild.data item_date = item.getElementsByTagName('pubDate')[0].firstChild.data @@ -628,7 +639,11 @@ def fix_date(date): new_date = "" split_array = date.split(' ') for i in range(0,5): - new_date = new_date + split_array[i] + " " + if i == 2 + new_date = new_date + split_array[i] + " " + else + month = split_array[i] + new_date = new_date + month[:3] + " " return new_date.rstrip() From 2b626a7a797a7e49f1448a876fde0a10705edcf3 Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 27 Dec 2015 22:15:57 +0000 Subject: [PATCH 04/18] Missed colons --- PodGrab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 253e2ad..a825d80 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -639,9 +639,9 @@ def fix_date(date): new_date = "" split_array = date.split(' ') for i in range(0,5): - if i == 2 + if i == 2: new_date = new_date + split_array[i] + " " - else + else: month = split_array[i] new_date = new_date + month[:3] + " " return new_date.rstrip() From f77a6e1657198ac798f327658a83737a61bcc988 Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 27 Dec 2015 22:18:07 +0000 Subject: [PATCH 05/18] More output --- PodGrab.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PodGrab.py b/PodGrab.py index a825d80..6575f99 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -562,6 +562,7 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): lastdate = strptime(fix_date(items[len(items)-1].getElementsByTagName('pubDate')[0].firstChild.data), "%a, %d %b %Y %H:%M:%S") if mktime(lastdate) > mktime(firstdate): items = reversed(items) + print "Is reverse order" except TypeError: error_reversing = 1 except ValueError: From 28fe87428146d09deed4238938e9c10688e49903 Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 27 Dec 2015 23:37:12 +0000 Subject: [PATCH 06/18] Wrong way around on date trim --- PodGrab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 6575f99..4482023 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -641,10 +641,10 @@ def fix_date(date): split_array = date.split(' ') for i in range(0,5): if i == 2: - new_date = new_date + split_array[i] + " " - else: month = split_array[i] new_date = new_date + month[:3] + " " + else: + new_date = new_date + split_array[i] + " " return new_date.rstrip() From 6fc6943a90b692e6193ee6be76ebbf362c096103 Mon Sep 17 00:00:00 2001 From: Huw Date: Fri, 11 Mar 2016 06:32:44 +0000 Subject: [PATCH 07/18] Removed separate channel title directories --- PodGrab.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 4482023..3e50b60 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -9,6 +9,9 @@ # Werner Avenant - added small changes to write M3U file of podcasts downloaded today # werner.avenant@gmail.com (http://www.collectiveminds.co.za) +# HuwSy +# 2016/03/11 + # Do with this code what you will, it's "open source". As a courtesy, # I would appreciate credit if you base your code on mine. If you find # a bug or think the code sucks balls, please let me know :-) @@ -338,7 +341,8 @@ def iterate_feed(data, mode, download_dir, today, cur, conn, feed): print "Channel Link: " + channel_link channel_title = clean_string(channel_title) - channel_directory = download_dir + os.sep + channel_title + # + os.sep + channel_title + channel_directory = download_dir if not os.path.exists(channel_directory): os.makedirs(channel_directory) print "Current Date: ", today @@ -395,7 +399,8 @@ def write_podcast(item, channel_title, date, type): if len(item_file_name) > 50: item_file_name = item_file_name[:50] - local_file = current_directory + os.sep + DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + clean_string(item_file_name) + # + os.sep + channel_title + local_file = current_directory + os.sep + DOWNLOAD_DIRECTORY + os.sep + clean_string(item_file_name) if type == "video/quicktime" or type == "audio/mp4" or type == "video/mp4": if not local_file.endswith(".mp4"): local_file = local_file + ".mp4" From b33d220cfa99f5cc6ac48f9b331ce5f675bee234 Mon Sep 17 00:00:00 2001 From: Huw Date: Fri, 11 Mar 2016 06:38:29 +0000 Subject: [PATCH 08/18] Updated to include my changes --- README | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README b/README index 067d3af..43fa2e9 100644 --- a/README +++ b/README @@ -33,3 +33,14 @@ Changes after fork: - Function update_subscription will check to see if the last_ep is older than the existing last_ep - Moved NUM_MAX_DOWNLOAD to the front of the file for easy configuration + +==== CHANGES MADE AFTER NEXT FORK ==== + +Author: HuwSy + +Changes after fork: + +- Removed line 580 as it was suppressing valid errors +- Use of urllib.urlretrieve instead of buffering to work on small memory devices (i.e RPi) +- Fixes for feeds in reverse order and feeds with non standard date times (some not all date times parsed) +- Removed separate channel title directories when downloading From 4c6551b1830adfa65432df7789c10a9efdf65e3d Mon Sep 17 00:00:00 2001 From: Huw Date: Fri, 11 Mar 2016 06:39:49 +0000 Subject: [PATCH 09/18] Formatting --- README | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README b/README index 43fa2e9..3da16d2 100644 --- a/README +++ b/README @@ -40,7 +40,7 @@ Author: HuwSy Changes after fork: -- Removed line 580 as it was suppressing valid errors +- Removed line 580 as it was suppressing valid errors - Use of urllib.urlretrieve instead of buffering to work on small memory devices (i.e RPi) -- Fixes for feeds in reverse order and feeds with non standard date times (some not all date times parsed) +- Fixes for feeds in reverse order and feeds with (some) non standard date times - Removed separate channel title directories when downloading From 1733d5c1c0552989e46c5b3f76579b3507162d31 Mon Sep 17 00:00:00 2001 From: Huw Date: Thu, 28 Apr 2016 06:35:24 +0100 Subject: [PATCH 10/18] Pass user agent string --- PodGrab.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/PodGrab.py b/PodGrab.py index 3e50b60..0da983c 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -248,7 +248,9 @@ def main(argv): def open_datasource(xml_url): try: - response = urllib2.urlopen(xml_url) + opener = urllib2.build_opener() + opener.addheaders = [('User-agent', 'Mozilla/5.0')] + response = opener.open(xml_url) except ValueError: try: response = open(xml_url,'r') From 9252bc84cfa197211c5a75af68e22080aa2ba056 Mon Sep 17 00:00:00 2001 From: Huw Date: Thu, 28 Apr 2016 06:50:18 +0100 Subject: [PATCH 11/18] Default file type to mp3 on fail --- PodGrab.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 0da983c..5371d87 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -423,17 +423,18 @@ def write_podcast(item, channel_title, date, type): if not local_file.endswith(".webm"): local_file = local_file + ".webm" - elif type == "audio/mpeg": - if not local_file.endswith(".mp3"): - local_file = local_file + ".mp3" - elif type == "audio/ogg" or type == "video/ogg" or type == "audio/vorbis": if not local_file.endswith(".ogg"): local_file = local_file + ".ogg" + elif type == "audio/x-ms-wma" or type == "audio/x-ms-wax": if not local_file.endswith(".wma"): local_file = local_file + ".wma" - + + else: + if not local_file.endswith(".mp3"): + local_file = local_file + ".mp3" + # Check if file exists, but if the file size is zero (which happens when the user # presses Crtl-C during a download) - the the code should go ahead and download # as if the file didn't exist From 229cb508c9b689d1f0772198aae8e02bb1583be8 Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 1 May 2016 08:11:44 +0100 Subject: [PATCH 12/18] Dont create todays m3u by default --- PodGrab.py | 157 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 92 insertions(+), 65 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 5371d87..4432dec 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -9,7 +9,7 @@ # Werner Avenant - added small changes to write M3U file of podcasts downloaded today # werner.avenant@gmail.com (http://www.collectiveminds.co.za) -# HuwSy +# HuwSy - Many changes see: https://github.com/HuwSy/PodGrab/commits/master/PodGrab.py # 2016/03/11 # Do with this code what you will, it's "open source". As a courtesy, @@ -56,11 +56,12 @@ DOWNLOAD_DIRECTORY = "downloads" +CREATE_M3U = False + # Added 2011-10-06 Werner Avenant - added current_dictory here so it can be global current_directory = '' m3u_file = '' - total_item = 0 total_size = 0 has_error = 0 @@ -161,6 +162,7 @@ def main(argv): print "Default encoding: " + sys.getdefaultencoding() todays_date = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) print "Current Directory: ", current_directory + if does_database_exist(current_directory): connection = connect_database(current_directory) if not connection: @@ -190,6 +192,7 @@ def main(argv): has_error = 1 else: print "Download directory exists: '" + download_directory + "'" + if not has_error: if mode == MODE_UNSUBSCRIBE: feed_name = get_name_from_feed(cursor, connection, feed_url) @@ -205,9 +208,11 @@ def main(argv): except OSError: print "Subscription directory has not been found - it might have been manually deleted" print "Subscription '" + feed_name + "' removed" + elif mode == MODE_LIST: print "Listing current podcast subscriptions...\n" list_subscriptions(cursor, connection) + elif mode == MODE_UPDATE: print "Updating all podcast subscriptions..." subs = get_subscriptions(cursor, connection) @@ -226,47 +231,56 @@ def main(argv): if has_mail_users(cursor, connection): print "Have e-mail address(es) - attempting e-mail..." mail_updates(cursor, connection, mail, str(total_items)) + elif mode == MODE_DOWNLOAD or mode == MODE_SUBSCRIBE: print iterate_feed(data, mode, download_directory, todays_date, cursor, connection, feed_url) + elif mode == MODE_MAIL_ADD: add_mail_user(cursor, connection, mail_address) print "E-Mail address: " + mail_address + " has been added" + elif mode == MODE_MAIL_DELETE: delete_mail_user(cursor, connection, mail_address) print "E-Mail address: " + mailAddress + " has been deleted" + elif mode == MODE_MAIL_LIST: list_mail_addresses(cursor, connection) + elif mode == MODE_EXPORT: export_opml_file(cursor, connection, current_directory) + elif mode == MODE_IMPORT: import_opml_file(cursor, connection, current_directory, download_directory, import_file_name) + else: print "Sorry, there was some sort of error: '" + error_string + "'\nExiting...\n" if connection: connection.close() - + def open_datasource(xml_url): try: opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] response = opener.open(xml_url) - except ValueError: - try: - response = open(xml_url,'r') - except ValueError: + except ValueError: + try: + response = open(xml_url,'r') + except ValueError: print "ERROR - Invalid feed!" - response = False + response = False except urllib2.URLError: print "ERROR - Connection problems. Please try again later" response = False except httplib.IncompleteRead: print "ERROR - Incomplete data read. Please try again later" response = False + if response != False: return response.read() else: return response + def export_opml_file(cur, conn, cur_dir): item_count = 0 @@ -280,8 +294,8 @@ def export_opml_file(cur, conn, cur_dir): print "Exporting RSS subscriptions database to: '" + file_name + "' OPML file...please wait.\n" header = "\n\n\tPodGrab Subscriptions\n\n\n" file_handle.writelines(header) - for sub in subs: - feed_name = sub[0] + for sub in subs: + feed_name = sub[0] feed_url = sub[1] last_ep = sub[2] file_handle.writelines("\t\n") @@ -291,11 +305,12 @@ def export_opml_file(cur, conn, cur_dir): file_handle.writelines(footer) file_handle.close() print str(item_count) + " item(s) exported to: '" + file_name + "'. COMPLETE" - + def import_opml_file(cur, conn, cur_dir, download_dir, import_file): count = 0 print "Importing OPML file '" + import_file + "'..." + if import_file.startswith("/") or import_file.startswith(".."): data = open_datasource(import_file) if not data: @@ -304,6 +319,7 @@ def import_opml_file(cur, conn, cur_dir, download_dir, import_file): data = open_datasource(cur_dir + os.sep + import_file) if not data: print "ERROR - Could not open OPML file '" + cur_dir + os.sep + import_file + "'" + if data: print "File opened...please wait" try: @@ -315,10 +331,10 @@ def import_opml_file(cur, conn, cur_dir, download_dir, import_file): item_name = clean_string(item_name) print "Subscription Title: " + item_name print "Subscription Feed: " + item_feed - item_directory = download_dir + os.sep + item_name + item_directory = download_dir + os.sep + item_name if not os.path.exists(item_directory): - os.makedirs(item_directory) + os.makedirs(item_directory) if not does_sub_exist(cur, conn, item_feed): insert_subscription(cur, conn, item_name, item_feed) count = count + 1 @@ -329,44 +345,48 @@ def import_opml_file(cur, conn, cur_dir, download_dir, import_file): print "These will be updated on the next update run.\n" except xml.parsers.expat.ExpatError: print "ERROR - Malformed XML syntax in feed. Skipping..." - + def iterate_feed(data, mode, download_dir, today, cur, conn, feed): print "Iterating feed..." message = "" try: xml_data = xml.dom.minidom.parseString(data) - for channel in xml_data.getElementsByTagName('channel'): - channel_title = channel.getElementsByTagName('title')[0].firstChild.data - channel_link = channel.getElementsByTagName('link')[0].firstChild.data - print "Channel Title: ===" + channel_title + "===" - print "Channel Link: " + channel_link + for channel in xml_data.getElementsByTagName('channel'): + channel_title = channel.getElementsByTagName('title')[0].firstChild.data + channel_link = channel.getElementsByTagName('link')[0].firstChild.data + print "Channel Title: ===" + channel_title + "===" + print "Channel Link: " + channel_link channel_title = clean_string(channel_title) - + # + os.sep + channel_title - channel_directory = download_dir - if not os.path.exists(channel_directory): - os.makedirs(channel_directory) - print "Current Date: ", today - if mode == MODE_DOWNLOAD: - print "Bulk download. Processing..." - # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later - num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) - print "\n", num_podcasts, "have been downloaded" - elif mode == MODE_SUBSCRIBE: + channel_directory = download_dir + if not os.path.exists(channel_directory): + os.makedirs(channel_directory) + + print "Current Date: ", today + + if mode == MODE_DOWNLOAD: + print "Bulk download. Processing..." + # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later + num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) + print "\n", num_podcasts, "have been downloaded" + + elif mode == MODE_SUBSCRIBE: print "Feed to subscribe to: " + feed + ". Checking for database duplicate..." if not does_sub_exist(cur, conn, feed): - print "Subscribe. Processing..." - # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later - num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) - - print "\n", num_podcasts, "have been downloaded from your subscription" + print "Subscribe. Processing..." + # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later + num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) + print "\n", num_podcasts, "have been downloaded from your subscription" else: print "Subscription already exists! Skipping..." - elif mode == MODE_UPDATE: - print "Updating RSS feeds. Processing..." - num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) - message += str(num_podcasts) + " have been downloaded from your subscription: '" + channel_title + "'\n" + + elif mode == MODE_UPDATE: + print "Updating RSS feeds. Processing..." + num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title) + message += str(num_podcasts) + " have been downloaded from your subscription: '" + channel_title + "'\n" + except xml.parsers.expat.ExpatError: print "ERROR - Malformed XML syntax in feed. Skipping..." message += "0 podcasts have been downloaded from this feed due to RSS syntax problems. Please try again later" @@ -374,7 +394,7 @@ def iterate_feed(data, mode, download_dir, today, cur, conn, feed): print "ERROR - Unicoce encoding error in string. Cannot convert to ASCII. Skipping..." message += "0 podcasts have been downloaded from this feed due to RSS syntax problems. Please try again later" return message - + def clean_string(str): new_string = str @@ -390,8 +410,9 @@ def clean_string(str): new_string_final = new_string_final.replace(' ','-') new_string_final = new_string_final.replace('---','-') new_string_final = new_string_final.replace('--','-') - + return new_string_final + # Change 2011-10-06 - Changed chan_loc to channel_title to help with relative path names # in the m3u file @@ -406,27 +427,27 @@ def write_podcast(item, channel_title, date, type): if type == "video/quicktime" or type == "audio/mp4" or type == "video/mp4": if not local_file.endswith(".mp4"): local_file = local_file + ".mp4" - + elif type == "video/mpeg": if not local_file.endswith(".mpg"): local_file = local_file + ".mpg" - + elif type == "video/x-flv": if not local_file.endswith(".flv"): local_file = local_file + ".flv" - + elif type == "video/x-ms-wmv": if not local_file.endswith(".wmv"): local_file = local_file + ".wmv" - + elif type == "video/webm" or type == "audio/webm": if not local_file.endswith(".webm"): local_file = local_file + ".webm" - + elif type == "audio/ogg" or type == "video/ogg" or type == "audio/vorbis": if not local_file.endswith(".ogg"): local_file = local_file + ".ogg" - + elif type == "audio/x-ms-wma" or type == "audio/x-ms-wax": if not local_file.endswith(".wma"): local_file = local_file + ".wma" @@ -453,15 +474,18 @@ def write_podcast(item, channel_title, date, type): # output.close() print "Podcast: ", item, " downloaded to: ", local_file - # 2011-11-06 Append to m3u file - output = open(current_directory + os.sep + m3u_file, 'a') - output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + local_file + "\n") - output.close() + if CREATE_M3U: + # 2011-11-06 Append to m3u file + output = open(current_directory + os.sep + m3u_file, 'a') + output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + local_file + "\n") + output.close() + return 'Successful Write' + except urllib2.URLError as e: print "ERROR - Could not write item to file: ", e return 'Write Error' - + def does_database_exist(curr_loc): db_name = "PodGrab.db" @@ -469,24 +493,24 @@ def does_database_exist(curr_loc): return 1 else: return 0 - + def add_mail_user(cur, conn, address): row = (address,) cur.execute('INSERT INTO email(address) VALUES (?)', row) conn.commit() - + def delete_mail_user(cur, conn, address): row = (address,) cur.execute('DELETE FROM email WHERE address = ?', row) conn.commit() - + def get_mail_users(cur, conn): cur.execute('SELECT address FROM email') return cur.fetchall() - + def list_mail_addresses(cur, conn): cur.execute('SELECT * from email') @@ -494,7 +518,7 @@ def list_mail_addresses(cur, conn): print "Listing mail addresses..." for address in result: print "Address:\t" + address[0] - + def has_mail_users(cur, conn): cur.execute('SELECT COUNT(*) FROM email') @@ -502,7 +526,7 @@ def has_mail_users(cur, conn): return 0 else: return 1 - + def mail_updates(cur, conn, mess, num_updates): addresses = get_mail_users(cur, conn) @@ -518,20 +542,21 @@ def mail_updates(cur, conn, mess, num_updates): except smtplib.SMTPException: traceback.print_exc() print "Could not send podcast updates e-mail to: " + address[0] - + def mail(server_url=None, sender='', to='', subject='', text=''): - headers = "From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (sender, to, subject) - message = headers + text - mail_server = smtplib.SMTP(server_url) - mail_server.sendmail(sender, to, message) - mail_server.quit() - + headers = "From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (sender, to, subject) + message = headers + text + mail_server = smtplib.SMTP(server_url) + mail_server.sendmail(sender, to, message) + mail_server.quit() + def connect_database(curr_loc): conn = sqlite3.connect(curr_loc + os.sep + "PodGrab.db") return conn + def setup_database(cur, conn): cur.execute("CREATE TABLE subscriptions (channel text, feed text, last_ep text)") cur.execute("CREATE TABLE email (address text)") @@ -571,6 +596,7 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title): if mktime(lastdate) > mktime(firstdate): items = reversed(items) print "Is reverse order" + except TypeError: error_reversing = 1 except ValueError: @@ -722,5 +748,6 @@ def get_last_subscription_downloaded(cur, conn, feed): rec = cur.fetchone() return rec[0] + if __name__ == "__main__": main(sys.argv[1:]) From 2f600246fbd081de5a7a6ab3ac2d99c8714d0162 Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 1 May 2016 08:23:15 +0100 Subject: [PATCH 13/18] Global option for channel directories --- PodGrab.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 4432dec..ac7688b 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -58,6 +58,8 @@ CREATE_M3U = False +CHANNEL_DIRS = False + # Added 2011-10-06 Werner Avenant - added current_dictory here so it can be global current_directory = '' m3u_file = '' @@ -358,9 +360,11 @@ def iterate_feed(data, mode, download_dir, today, cur, conn, feed): print "Channel Title: ===" + channel_title + "===" print "Channel Link: " + channel_link channel_title = clean_string(channel_title) - - # + os.sep + channel_title + channel_directory = download_dir + if CHANNEL_DIRS: + channel_directory = download_dir + os.sep + channel_title + if not os.path.exists(channel_directory): os.makedirs(channel_directory) @@ -422,8 +426,10 @@ def write_podcast(item, channel_title, date, type): if len(item_file_name) > 50: item_file_name = item_file_name[:50] - # + os.sep + channel_title local_file = current_directory + os.sep + DOWNLOAD_DIRECTORY + os.sep + clean_string(item_file_name) + if CHANNEL_DIRS: + channel_directory = current_directory + os.sep + DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + clean_string(item_file_name) + if type == "video/quicktime" or type == "audio/mp4" or type == "video/mp4": if not local_file.endswith(".mp4"): local_file = local_file + ".mp4" From 7414763124998428541e7d34d74e28f08fb0bae6 Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 1 May 2016 08:28:25 +0100 Subject: [PATCH 14/18] Comment fix --- PodGrab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index ac7688b..5b7b790 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -9,8 +9,8 @@ # Werner Avenant - added small changes to write M3U file of podcasts downloaded today # werner.avenant@gmail.com (http://www.collectiveminds.co.za) -# HuwSy - Many changes see: https://github.com/HuwSy/PodGrab/commits/master/PodGrab.py -# 2016/03/11 +# HuwSy - Bug fix, Low Memory (RPi) fixes, Reversed feed and poorly dated feed fixes, +# User agent in feed request for some feeds and optional channel dirs and todays m3u # Do with this code what you will, it's "open source". As a courtesy, # I would appreciate credit if you base your code on mine. If you find From 610b57a0332a6b3152b39fda4ea8e22c0494446c Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 1 May 2016 08:38:18 +0100 Subject: [PATCH 15/18] Comment fixes --- PodGrab.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/PodGrab.py b/PodGrab.py index 5b7b790..6e3f9be 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -9,8 +9,7 @@ # Werner Avenant - added small changes to write M3U file of podcasts downloaded today # werner.avenant@gmail.com (http://www.collectiveminds.co.za) -# HuwSy - Bug fix, Low Memory (RPi) fixes, Reversed feed and poorly dated feed fixes, -# User agent in feed request for some feeds and optional channel dirs and todays m3u +# HuwSy - Changes see readme # Do with this code what you will, it's "open source". As a courtesy, # I would appreciate credit if you base your code on mine. If you find From 83abda0a521637197acdd9eb3c1e77344a396004 Mon Sep 17 00:00:00 2001 From: Huw Date: Sun, 1 May 2016 08:39:32 +0100 Subject: [PATCH 16/18] Comment fix --- README | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README b/README index 3da16d2..c776b48 100644 --- a/README +++ b/README @@ -43,4 +43,6 @@ Changes after fork: - Removed line 580 as it was suppressing valid errors - Use of urllib.urlretrieve instead of buffering to work on small memory devices (i.e RPi) - Fixes for feeds in reverse order and feeds with (some) non standard date times -- Removed separate channel title directories when downloading +- User agent in feed request as some feeds need this +- Optional channel dirs creation +- Optional todays m3u creation From f8ecca02cd373c15ee915867c06aa3e14283d6c0 Mon Sep 17 00:00:00 2001 From: Huw Date: Sat, 14 May 2016 07:18:18 +0100 Subject: [PATCH 17/18] Invalid and long feed fixes --- PodGrab.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/PodGrab.py b/PodGrab.py index 6e3f9be..f286c0c 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -59,6 +59,8 @@ CHANNEL_DIRS = False +MAX_RSS_LENGTH = 5242880 + # Added 2011-10-06 Werner Avenant - added current_dictory here so it can be global current_directory = '' m3u_file = '' @@ -352,6 +354,13 @@ def iterate_feed(data, mode, download_dir, today, cur, conn, feed): print "Iterating feed..." message = "" try: + data = data.replace("& ", "& ") + if len(data) > MAX_RSS_LENGTH: + ending = data[MAX_RSS_LENGTH:].find("") + if ending > -1: + ending = ending + MAX_RSS_LENGTH + data = data[:ending] + "" + xml_data = xml.dom.minidom.parseString(data) for channel in xml_data.getElementsByTagName('channel'): channel_title = channel.getElementsByTagName('title')[0].firstChild.data From b7c62cddaf1c6030b637a8f156c35f30cdc015cd Mon Sep 17 00:00:00 2001 From: Huw Date: Sat, 16 Sep 2017 21:11:27 +0100 Subject: [PATCH 18/18] UA impersination on download --- PodGrab.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PodGrab.py b/PodGrab.py index f286c0c..559636b 100755 --- a/PodGrab.py +++ b/PodGrab.py @@ -478,6 +478,7 @@ def write_podcast(item, channel_title, date, type): else: print "\nDownloading " + item_file_name + " which was published on " + date try: + urllib.URLopener.version = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 SE 2.X MetaSr 1.0' urllib.urlretrieve(item, local_file) # item_file = urllib2.urlopen(item) # output = open(local_file, 'wb')