diff --git a/gen_conf_downloader.py b/gen_conf_downloader.py
index 86ad416..eb6a1ec 100644
--- a/gen_conf_downloader.py
+++ b/gen_conf_downloader.py
@@ -32,7 +32,7 @@
from urllib.parse import quote_plus
import urllib.request
import zlib
-from mutagen.mp3 import MP3
+import mutagen
Conference = namedtuple('Conference', 'link title year month')
Session = namedtuple('Session', 'conference link title number')
@@ -57,6 +57,7 @@
MP3_MEDIAURL_REGEX = r'{"mediaUrl":"([^"]*)","variant":"audio"}'
MP3_MEDIAURL_FILENAME_REGEX = r'.*/(.*\.mp3)'
+M4A_MEDIAURL_FILENAME_REGEX = r'.*/(.*\.m4a)'
SESSIONS_REGEX = r']*href="([^"]*)"[^>]*>
'
SESSION_TALKS_REGEX = r']*href="([^"]*)"[^>]*>'
@@ -180,12 +181,14 @@ def clean_title(title):
def create_mp3_filename(args, talk):
- return "{}-{:02d}-{}-{}-{}.mp3".format(
+ ## Took out extension. We'll add it in get_audio after determining if mp3 or m4a
+ fname = "{}-{:02d}-{}-{}-{}.".format(
talk.session.conference.year,
talk.session.conference.month,
talk.session.number * 100 + talk.number,
talk.speaker.lower().replace(" ", "-"),
args.lang)
+ return fname
def create_playlists(args, all_talks):
@@ -224,6 +227,8 @@ def download_all_content(args):
for talk in all_talks:
progress_bar.set_description_str(talk.title, refresh=True)
audio = get_audio(args, f'{LDS_ORG_URL}{decode(talk.link)}', create_mp3_filename(args, talk))
+ if args.verbose and not audio:
+ sys.stderr.write(f'No audio file found at URL')
if audio and download_audio(progress_bar, args, get_relative_path(args, talk.session), audio):
if not args.noplaylists:
update_playlists(args, playlists, talk, audio)
@@ -243,6 +248,8 @@ def download_all_content(args):
def download_audio(progress_bar, args, relpath, audio):
# If audio file doesn't yet exist, attempt to retrieve it
file_path = f'{get_output_dir(args)}/{relpath}/{audio.file}'
+ if args.verbose:
+ print("Using file path: {}".format(file_path))
if not os.path.isfile(file_path):
try:
req = urllib.request.Request(audio.link)
@@ -380,11 +387,21 @@ def get_audio_2024(args, url):
driver.get(url)
try:
options = driver.find_element(By.XPATH, '//button[@title="Options"]')
+ print("Giving options click with URL: {}".format(url))
options.click()
+ ## Some pages don't have an audio file, eg
+ ## 1993-10 Combatting Spiritual Drift—Our Global Pandemic By Elder Russell M. Nelson
+ ## https://www.churchofjesuschrist.org/study/general-conference/1993/10/combatting-spiritual-drift-our-global-pandemic?lang=eng
+ ## 2014-10 Joseph Smith By Elder Neil L. Andersen
+ ## https://www.churchofjesuschrist.org/study/general-conference/2014/10/joseph-smith?lang=eng
try:
download = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, '//button[@data-testid="download-menu-button"]'))
)
+ except:
+ pass
+ else:
+ print("Downloading with URL: {}".format(url))
download.click()
add_to_cache(args, driver.page_source, url)
return re.search(MP3_DOWNLOAD_REGEX, driver.page_source)
@@ -396,6 +413,7 @@ def get_audio_2024(args, url):
def get_audio(args, url, mp3_file_name):
link_html = get_html(args, url)
+ ext = "mp3"
mp3_link = re.search(MP3_DOWNLOAD_REGEX, link_html)
# In April 2022 the MP3 link became buried in base64 encoded script section
match = re.search(SCRIPT_BASE64_REGEX, link_html)
@@ -411,17 +429,27 @@ def get_audio(args, url, mp3_file_name):
# Search for JSON object containing mediaUrl key and value
mp3_link = re.search(MP3_MEDIAURL_REGEX, script_data)
if not mp3_link:
+ if args.verbose:
+ print("No regular link found, using post 2024 style")
mp3_link = get_audio_2024(args, url)
if not mp3_link:
sys.stderr.write(f'Problem finding mp3 link ({url}')
return
# Extract and reuse the filename from the MP3 URL
+ if args.verbose:
+ print("Defined mp3_link as: {}".format(mp3_link))
+
mp3_file = re.match(MP3_MEDIAURL_FILENAME_REGEX, mp3_link.group(1))
+ if not mp3_file:
+ ## Some file are occasionally m4a instead of mp3
+ mp3_file = re.match(M4A_MEDIAURL_FILENAME_REGEX, mp3_link.group(1))
+ ext = "m4a"
if not mp3_file:
return
# Create audio object with link and filename
+ mp3_file_name = str(mp3_file_name) + ext
return Audio(mp3_link.group(1), mp3_file_name)
@@ -636,7 +664,14 @@ def update_playlists(args, playlists, talk, audio):
month_path = get_month_path(args, talk.session.conference)
session_path = get_session_path(args, talk.session, nonumbers=True)
relative_path = get_relative_path(args, talk.session)
- duration = MP3(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
+ # ~ duration = MP3(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
+ file_name, file_extension = os.path.splitext(audio.file)
+ duration = 0
+ if file_extension == 'mp3':
+ duration = mutagen.mp3(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
+ elif file_extension == 'm4a':
+ #if not duration: #file_extension == 'm4a'
+ duration = mutagen.mp4(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
# Add this talk to the year, conference, or session playlists
playlists[f'Conferences/{year_path}'].append({'duration' : duration, 'path' : f'../{relative_path}/{audio.file}', 'title' : talk.title})
@@ -700,7 +735,10 @@ def write_playlist_file(args, playlist_path, playlist_data):
f.write("#EXTM3U\n\n")
for audio_info in playlist_data:
f.write(f"#EXTINF:{get_duration_text(audio_info['duration'])}, {audio_info['title']}\n")
- f.write(audio_info['path'].replace("/","\\"))
+ if os.name == 'nt': ## For windows use backslashes in paths
+ f.write(audio_info['path'].replace("/","\\"))
+ else: ## MacOS, Unix, Linux, BSD, leave the slashes.
+ f.write(audio_info['path'])
f.write("\n\n")
@@ -717,6 +755,8 @@ def write_playlists(args, playlists):
def write_mp3_to_disk(data, filename):
os.makedirs(os.path.dirname(filename), exist_ok=True)
+ if args.verbose:
+ print("Saving file: {}".format(filename))
with open(filename, "wb") as f:
f.write(data)