From 0eccfd32e2c531acd11f55df8aaa3aae1e700baa Mon Sep 17 00:00:00 2001
From: bradlywilson <bradlywilson@gmail.com>
Date: Sun, 26 Oct 2025 18:23:17 -0600
Subject: [PATCH] Some files are m4a on the Church website and were getting
 ignored; those are now downloaded and also added to the playists. Also there
 were a few talks that didn't have an audio file that caused the script to
 break so I added a try except to prevent that.

---
 gen_conf_downloader.py | 48 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 4 deletions(-)
diff --git a/gen_conf_downloader.py b/gen_conf_downloader.py
index 86ad416..eb6a1ec 100644
--- a/gen_conf_downloader.py
+++ b/gen_conf_downloader.py
@@ -32,7 +32,7 @@
 from urllib.parse import quote_plus
 import urllib.request
 import zlib
-from mutagen.mp3 import MP3
+import mutagen
 
 Conference = namedtuple('Conference', 'link title year month')
 Session = namedtuple('Session', 'conference link title number')
@@ -57,6 +57,7 @@
 
 MP3_MEDIAURL_REGEX = r'{"mediaUrl":"([^"]*)","variant":"audio"}'
 MP3_MEDIAURL_FILENAME_REGEX = r'.*/(.*\.mp3)'
+M4A_MEDIAURL_FILENAME_REGEX = r'.*/(.*\.m4a)'
 
 SESSIONS_REGEX = r'<a[^>]*href="([^"]*)"[^>]*><div[^>]*><p><span[^>]*>([^<]*)</span></p></div></a><ul[^>]*>(.*?)</ul>'
 SESSION_TALKS_REGEX = r'<a[^>]*href="([^"]*)"[^>]*><div[^>]*><p><span[^>]*>(.*?)</span></p><p[^>]*>([^<]*)</p></div></a>'
@@ -180,12 +181,14 @@ def clean_title(title):
 
 
 def create_mp3_filename(args, talk):
-    return "{}-{:02d}-{}-{}-{}.mp3".format(
+	## Took out extension. We'll add it in get_audio after determining if mp3 or m4a
+    fname = "{}-{:02d}-{}-{}-{}.".format(
         talk.session.conference.year,
         talk.session.conference.month,
         talk.session.number * 100 + talk.number,
         talk.speaker.lower().replace(" ", "-"),
         args.lang)
+    return fname
 
 
 def create_playlists(args, all_talks):
@@ -224,6 +227,8 @@ def download_all_content(args):
         for talk in all_talks:
             progress_bar.set_description_str(talk.title, refresh=True)
             audio = get_audio(args, f'{LDS_ORG_URL}{decode(talk.link)}', create_mp3_filename(args, talk))
+            if args.verbose and not audio:
+                sys.stderr.write(f'No audio file found at URL')
             if audio and download_audio(progress_bar, args, get_relative_path(args, talk.session), audio):
                 if not args.noplaylists:
                     update_playlists(args, playlists, talk, audio)
@@ -243,6 +248,8 @@ def download_all_content(args):
 def download_audio(progress_bar, args, relpath, audio):
     # If audio file doesn't yet exist, attempt to retrieve it
     file_path = f'{get_output_dir(args)}/{relpath}/{audio.file}'
+    if args.verbose:
+        print("Using file path: {}".format(file_path))
     if not os.path.isfile(file_path):
         try:
             req = urllib.request.Request(audio.link)
@@ -380,11 +387,21 @@ def get_audio_2024(args, url):
     driver.get(url)
     try:
         options = driver.find_element(By.XPATH, '//button[@title="Options"]')
+        print("Giving options click with URL: {}".format(url))
         options.click()
+        ## Some pages don't have an audio file, eg 
+        ## 1993-10 Combatting Spiritual Drift—Our Global Pandemic By Elder Russell M. Nelson
+        ## https://www.churchofjesuschrist.org/study/general-conference/1993/10/combatting-spiritual-drift-our-global-pandemic?lang=eng
+        ## 2014-10 Joseph Smith By Elder Neil L. Andersen
+        ## https://www.churchofjesuschrist.org/study/general-conference/2014/10/joseph-smith?lang=eng
         try:
             download = WebDriverWait(driver, 20).until(
                 EC.element_to_be_clickable((By.XPATH, '//button[@data-testid="download-menu-button"]'))
             )
+        except:
+            pass
+        else:    
+            print("Downloading with URL: {}".format(url))
             download.click()
             add_to_cache(args, driver.page_source, url)
             return re.search(MP3_DOWNLOAD_REGEX, driver.page_source)
@@ -396,6 +413,7 @@ def get_audio_2024(args, url):
 
 def get_audio(args, url, mp3_file_name):
     link_html = get_html(args, url)
+    ext = "mp3"
     mp3_link = re.search(MP3_DOWNLOAD_REGEX, link_html)
     # In April 2022 the MP3 link became buried in base64 encoded script section
     match = re.search(SCRIPT_BASE64_REGEX, link_html)
@@ -411,17 +429,27 @@ def get_audio(args, url, mp3_file_name):
         # Search for JSON object containing mediaUrl key and value
         mp3_link = re.search(MP3_MEDIAURL_REGEX, script_data)
         if not mp3_link:
+            if args.verbose:
+                print("No regular link found, using post 2024 style")
             mp3_link = get_audio_2024(args, url)
         if not mp3_link:
             sys.stderr.write(f'Problem finding mp3 link ({url}')
             return
         # Extract and reuse the filename from the MP3 URL
+        if args.verbose:
+            print("Defined mp3_link as: {}".format(mp3_link))
+            
         mp3_file = re.match(MP3_MEDIAURL_FILENAME_REGEX, mp3_link.group(1))
 
+    if not mp3_file:
+        ## Some file are occasionally m4a instead of mp3
+        mp3_file = re.match(M4A_MEDIAURL_FILENAME_REGEX, mp3_link.group(1))
+        ext = "m4a"
     if not mp3_file:
         return
 
     # Create audio object with link and filename
+    mp3_file_name = str(mp3_file_name) + ext
     return Audio(mp3_link.group(1), mp3_file_name)
 
 
@@ -636,7 +664,14 @@ def update_playlists(args, playlists, talk, audio):
     month_path = get_month_path(args, talk.session.conference)
     session_path = get_session_path(args, talk.session, nonumbers=True)
     relative_path = get_relative_path(args, talk.session)
-    duration = MP3(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
+    # ~ duration = MP3(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
+    file_name, file_extension = os.path.splitext(audio.file)
+    duration = 0
+    if file_extension == 'mp3':
+        duration = mutagen.mp3(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
+    elif file_extension == 'm4a':
+    #if not duration: #file_extension == 'm4a'
+        duration = mutagen.mp4(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
 
     # Add this talk to the year, conference, or session playlists
     playlists[f'Conferences/{year_path}'].append({'duration' : duration, 'path' : f'../{relative_path}/{audio.file}', 'title' : talk.title})
@@ -700,7 +735,10 @@ def write_playlist_file(args, playlist_path, playlist_data):
             f.write("#EXTM3U\n\n")
             for audio_info in playlist_data:
                 f.write(f"#EXTINF:{get_duration_text(audio_info['duration'])}, {audio_info['title']}\n")
-                f.write(audio_info['path'].replace("/","\\"))
+                if os.name == 'nt': ## For windows use backslashes in paths
+                    f.write(audio_info['path'].replace("/","\\"))
+                else: ## MacOS, Unix, Linux, BSD, leave the slashes.
+                    f.write(audio_info['path'])
                 f.write("\n\n")
 
 
@@ -717,6 +755,8 @@ def write_playlists(args, playlists):
 
 def write_mp3_to_disk(data, filename):
     os.makedirs(os.path.dirname(filename), exist_ok=True)
+    if args.verbose:
+        print("Saving file: {}".format(filename))
     with open(filename, "wb") as f:
         f.write(data)