-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcommon.py
More file actions
151 lines (142 loc) · 5.67 KB
/
common.py
File metadata and controls
151 lines (142 loc) · 5.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
This class implements all common methods for downloading and handling the
audio files.
"""
import os
import datetime
import shutil
import ConfigParser
import urllib2
import logging
import sys
from mutagen.mp3 import MP3
from glob import iglob
class common_functions:
"""
Current time to use as a timpestamp fro podcasts
"""
now = datetime.datetime.now()
def get_html_and_split_lines(self, url):
"""
Download the html code of a webpage and split it into lines
"""
logging.debug('Downloading page and split html to separate lines')
req = urllib2.Request(url, headers={'User-Agent': "Mozilla/5.0'\
+' (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17'\
+' (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17"})
logging.debug("Request header %s" + str(req.header_items()))
logging.debug("Request url: %s" + str(req.get_full_url()))
url_content = urllib2.urlopen(req)
raw_html = url_content.read()
html_lines = raw_html.split('\n')
#logging.debug('html page lines: %s', html_lines)
return html_lines
def config_section_map(self, section):
"""
Returns a dictionary of the given config map value
"""
config = ConfigParser.ConfigParser()
config.read("pod.conf")
options = config.options(section)
dict1 = {}
for option in options:
try:
dict1[option] = config.get(section, option)
if dict1[option] == -1:
logging.debug("skip: %s" % option)
except:
logging.error("exception on %s!" % option)
dict1[option] = None
return dict1
def download_file(self, url, program, file_name):
"""
Downloads the file and store it locally
"""
u = urllib2.urlopen(url)
f = open(os.getcwd() + '/' + program + '/tmp/' + file_name, 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
logging.info('Start downloading %s', file_name)
#print "Downloading: %s Bytes: %s" % (file_name, file_size)
file_size_dl = 0
block_sz = 8192
while True:
bufferv = u.read(block_sz)
if not bufferv:
break
file_size_dl += len(bufferv)
f.write(bufferv)
status = r"%10d [%3.2f%%]" % (file_size_dl,\
file_size_dl * 100. / file_size)
status = status + chr(8) * (len(status) + 1)
print status,
f.close()
logging.info('Download completed for %s', file_name)
return True
def ensure_directory_structure(self, name):
"""
Making sure that the directory structure for storing the
podcasts is correct.
"""
logging.info('Checking directory structure for %s', name)
if not os.path.exists(name + '/tmp'):
os.makedirs(name + '/tmp')
logging.info('Directory structure checked for %s', name)
def download_all_available_files(self, html_download_lines, program):
"""
Extract the available mp3 files and call download_file function for
each one of them
"""
for line in html_download_lines:
logging.debug("File for download: %s" + line)
if "mp3" in line and "audiofile" in line:
url_proc = line.split("\"")
if program == 'ellinofreneia':
url = url_proc[1]
filename = url_proc[2][1:13].decode('utf-8')
#print filename
filename = filename.replace(' ', '') + ".mp3"
elif program == 'chatzinikolaou':
url = url_proc[3]
filename = url_proc[0][:-11].decode('utf-8')
filename = filename[11:].replace(' ', '') + ".mp3"
else:
logging.error("Not a known program. Exiting")
sys.exit()
self.download_file(url, program, filename)
def concat_files_and_move(self, name):
"""
Concatenate the downloaded files and move the result file in the
correct location
"""
complete_audio_file = name + '_' + self.now.strftime("%d%m%Y") + '.mp3'
path_complete_audio_file = os.getcwd() + '/' + name \
+ '/' + complete_audio_file
logging.info('Scanning tmp folder for available files')
destination = open(path_complete_audio_file, 'wb')
for filename in sorted(iglob(os.path.join(os.getcwd()\
+ '/' + name + '/tmp/', '*.mp3'))):
file_concat = filename.split("/")
logging.info('Concatenating file %s', file_concat[-1])
shutil.copyfileobj(open(filename, 'rb'), destination)
destination.close()
#Remove tmp files
for filename in sorted(iglob(os.path.join(os.getcwd() \
+ '/' + name + '/tmp/', '*.mp3'))):
try:
logging.info('Removing tmp file %s', filename)
os.chmod(filename, 777)
os.remove(filename)
except OSError:
pass
def get_podcast_duration(self, fileName):
logging.debug("Calculating mp3 duration for %s", fileName)
audio = MP3(fileName)
duration = datetime.timedelta(seconds = audio.info.length)
audio_length = ':'.join(str(duration).split(':')[:2])
if audio_length:
return audio_length
else:
logging.error("Something went wrong. Podcast has duration 0")
logging.info("Exiting")
sys.exit()