Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 37 additions & 38 deletions PodGrab.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import os
import sys
import argparse
import urllib.request as urllib2
import urllib2
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change it so it works in both py2/3?

e.g. similar to

try:
... from urllib.request import urlopen
... from urllib.parse import urlparse
... except ImportError:
... from urlparse import urlparse
... from urllib import urlopen

import xml.dom.minidom
import datetime
from time import gmtime, strftime, strptime, mktime
Expand Down Expand Up @@ -77,9 +77,9 @@ def main(argv):
mail = ""
# Added 2011-10-06 Werner Avenant
global current_directory
global m3u_file
global m3u_file
now = datetime.datetime.now();
m3u_file = str(now)[:10] + '.m3u'
m3u_file = str(now)[:10] + '.m3u'
current_directory = os.path.realpath(os.path.dirname(sys.argv[0]))
download_directory = DOWNLOAD_DIRECTORY

Expand Down Expand Up @@ -120,7 +120,7 @@ def main(argv):
data = open_datasource(feed_url)
if not data:
error_string = "Not a valid XML file or URL feed!"
has_error = 1
has_error = 1
else:
print("XML data source opened\n")
mode = MODE_DOWNLOAD
Expand Down Expand Up @@ -159,7 +159,7 @@ def main(argv):

print("Default encoding: " + sys.getdefaultencoding())
todays_date = strftime("%a, %d %b %Y %H:%M:%S", gmtime())
print("Current Directory: ", current_directory)
print("Current Directory: " + current_directory)
if does_database_exist(current_directory):
connection = connect_database(current_directory)
if not connection:
Expand Down Expand Up @@ -336,27 +336,27 @@ def iterate_feed(data, mode, download_dir, today, cur, conn, feed):
for channel in xml_data.getElementsByTagName('channel'):
channel_title = channel.getElementsByTagName('title')[0].firstChild.data
channel_link = channel.getElementsByTagName('link')[0].firstChild.data
print("Channel Title: ===" + channel_title + "===")
print("Channel Title: === " + channel_title + " ===")
print("Channel Link: " + channel_link)
channel_title = clean_string(channel_title)

channel_directory = download_dir + os.sep + channel_title
if not os.path.exists(channel_directory):
os.makedirs(channel_directory)
print("Current Date: ", today)
print("Current Date: " + today)
if mode == MODE_DOWNLOAD:
print("Bulk download. Processing...")
# 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later
num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title)
print("\n", num_podcasts, "have been downloaded")
num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title)
print("\n" + num_podcasts + "have been downloaded")
elif mode == MODE_SUBSCRIBE:
print("Feed to subscribe to: " + feed + ". Checking for database duplicate...")
print("Feed to subscribe to: " + feed + ".\nChecking for database duplicate...")
if not does_sub_exist(cur, conn, feed):
print("Subscribe. Processing...")
print("Subscribe.\nProcessing...")
# 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later
num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title)

print("\n", num_podcasts, "have been downloaded from your subscription")
print("\n" + num_podcasts + "have been downloaded from your subscription")
else:
print("Subscription already exists! Skipping...")
elif mode == MODE_UPDATE:
Expand All @@ -383,10 +383,10 @@ def clean_string(str):
for c in new_string:
if c.isalnum() or c == "-" or c == "." or c.isspace():
new_string_final = new_string_final + ''.join(c)
new_string_final = new_string_final.strip()
new_string_final = new_string_final.replace(' ','-')
new_string_final = new_string_final.replace('---','-')
new_string_final = new_string_final.replace('--','-')
new_string_final = new_string_final.strip()

return new_string_final

Expand Down Expand Up @@ -431,39 +431,38 @@ def write_podcast(item, channel_title, date, type):
local_file = local_file + ".wma"

# Check if file exists, but if the file size is zero (which happens when the user
# presses Crtl-C during a download) - the the code should go ahead and download
# presses Crtl-C during a download) - the the code should go ahead and download
# as if the file didn't exist
if os.path.exists(local_file) and os.path.getsize(local_file) != 0:
return 'File Exists'
else:
print("\nDownloading " + item_file_name + " which was published on " + date)
try:
req = urllib2.urlopen(item)
CHUNK = 16 * 1024
with open(local_file, 'wb') as fp:
while True:
chunk = req.read(CHUNK)
if not chunk: break
fp.write(chunk)

item_file_name = os.path.basename(fp.name)

#item_file = urllib2.urlopen(item)
#output = open(local_file, 'wb')
# 2011-10-06 Werner Avenant - For some reason the file name changes when
req = urllib2.urlopen(item)
CHUNK = 16 * 1024
with open(local_file, 'wb') as fp:
while True:
chunk = req.read(CHUNK)
if not chunk: break
fp.write(chunk)

item_file_name = os.path.basename(fp.name)

#item_file = urllib2.urlopen(item)
#output = open(local_file, 'wb')
# 2011-10-06 Werner Avenant - For some reason the file name changes when
# saved to disk - probably a python feature (sorry, only wrote my first line of python today)
#item_file_name = os.path.basename(output.name)
#output.write(item_file.read())
#output.close()
print("Podcast: ", item, " downloaded to: ", local_file)

#item_file_name = os.path.basename(output.name)
#output.write(item_file.read())
#output.close()
print("Podcast: " + item + " downloaded to: " + local_file)
# 2011-11-06 Append to m3u file
output = open(current_directory + os.sep + m3u_file, 'a')
output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + item_file_name + "\n")
output.close()
return 'Successful Write'
except urllib2.URLError as e:
print("ERROR - Could not write item to file: ", e)
print("ERROR - Could not write item to file: " + e)
return 'Write Error'


Expand Down Expand Up @@ -568,9 +567,9 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title):

last_ep = get_last_subscription_downloaded(cur, conn, feed)

### NB NB - The logic here is that we get the "last_ep" before we enter the loop
### The result is that it allows the code to "catch up" on missed episodes because
### we never update the "last_ep" while inside the loop.
### NB NB - The logic here is that we get the "last_ep" before we enter the loop
### The result is that it allows the code to "catch up" on missed episodes because
### we never update the "last_ep" while inside the loop.

for item in chan.getElementsByTagName('item'):
try:
Expand Down Expand Up @@ -633,7 +632,7 @@ def iterate_channel(chan, today, mode, cur, conn, feed, channel_title):
#traceback.print_exc()
print("This RSS item has no downloadable URL link for the podcast for '" + item_title + "'. Skipping...")

return str(num) + " podcasts totalling " + str(size) + " bytes"
return(str(num) + " podcast(s) totalling " + str(size) + " byte(s)")


def fix_date(date):
Expand Down Expand Up @@ -708,7 +707,7 @@ def get_last_subscription_downloaded(cur, conn, feed):
row = (feed,)
cur.execute('SELECT last_ep FROM subscriptions WHERE feed = ?', row)
rec = cur.fetchone()
return rec[0]
return rec[0]

if __name__ == "__main__":
main(sys.argv[1:])