Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions gen_conf_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from urllib.parse import quote_plus
import urllib.request
import zlib
from mutagen.mp3 import MP3
import mutagen

Conference = namedtuple('Conference', 'link title year month')
Session = namedtuple('Session', 'conference link title number')
Expand All @@ -57,6 +57,7 @@

MP3_MEDIAURL_REGEX = r'{"mediaUrl":"([^"]*)","variant":"audio"}'
MP3_MEDIAURL_FILENAME_REGEX = r'.*/(.*\.mp3)'
M4A_MEDIAURL_FILENAME_REGEX = r'.*/(.*\.m4a)'

SESSIONS_REGEX = r'<a[^>]*href="([^"]*)"[^>]*><div[^>]*><p><span[^>]*>([^<]*)</span></p></div></a><ul[^>]*>(.*?)</ul>'
SESSION_TALKS_REGEX = r'<a[^>]*href="([^"]*)"[^>]*><div[^>]*><p><span[^>]*>(.*?)</span></p><p[^>]*>([^<]*)</p></div></a>'
Expand Down Expand Up @@ -180,12 +181,14 @@ def clean_title(title):


def create_mp3_filename(args, talk):
return "{}-{:02d}-{}-{}-{}.mp3".format(
## Took out extension. We'll add it in get_audio after determining if mp3 or m4a
fname = "{}-{:02d}-{}-{}-{}.".format(
talk.session.conference.year,
talk.session.conference.month,
talk.session.number * 100 + talk.number,
talk.speaker.lower().replace(" ", "-"),
args.lang)
return fname


def create_playlists(args, all_talks):
Expand Down Expand Up @@ -224,6 +227,8 @@ def download_all_content(args):
for talk in all_talks:
progress_bar.set_description_str(talk.title, refresh=True)
audio = get_audio(args, f'{LDS_ORG_URL}{decode(talk.link)}', create_mp3_filename(args, talk))
if args.verbose and not audio:
sys.stderr.write(f'No audio file found at URL')
if audio and download_audio(progress_bar, args, get_relative_path(args, talk.session), audio):
if not args.noplaylists:
update_playlists(args, playlists, talk, audio)
Expand All @@ -243,6 +248,8 @@ def download_all_content(args):
def download_audio(progress_bar, args, relpath, audio):
# If audio file doesn't yet exist, attempt to retrieve it
file_path = f'{get_output_dir(args)}/{relpath}/{audio.file}'
if args.verbose:
print("Using file path: {}".format(file_path))
if not os.path.isfile(file_path):
try:
req = urllib.request.Request(audio.link)
Expand Down Expand Up @@ -380,11 +387,21 @@ def get_audio_2024(args, url):
driver.get(url)
try:
options = driver.find_element(By.XPATH, '//button[@title="Options"]')
print("Giving options click with URL: {}".format(url))
options.click()
## Some pages don't have an audio file, eg
## 1993-10 Combatting Spiritual Drift—Our Global Pandemic By Elder Russell M. Nelson
## https://www.churchofjesuschrist.org/study/general-conference/1993/10/combatting-spiritual-drift-our-global-pandemic?lang=eng
## 2014-10 Joseph Smith By Elder Neil L. Andersen
## https://www.churchofjesuschrist.org/study/general-conference/2014/10/joseph-smith?lang=eng
try:
download = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, '//button[@data-testid="download-menu-button"]'))
)
except:
pass
else:
print("Downloading with URL: {}".format(url))
download.click()
add_to_cache(args, driver.page_source, url)
return re.search(MP3_DOWNLOAD_REGEX, driver.page_source)
Expand All @@ -396,6 +413,7 @@ def get_audio_2024(args, url):

def get_audio(args, url, mp3_file_name):
link_html = get_html(args, url)
ext = "mp3"
mp3_link = re.search(MP3_DOWNLOAD_REGEX, link_html)
# In April 2022 the MP3 link became buried in base64 encoded script section
match = re.search(SCRIPT_BASE64_REGEX, link_html)
Expand All @@ -411,17 +429,27 @@ def get_audio(args, url, mp3_file_name):
# Search for JSON object containing mediaUrl key and value
mp3_link = re.search(MP3_MEDIAURL_REGEX, script_data)
if not mp3_link:
if args.verbose:
print("No regular link found, using post 2024 style")
mp3_link = get_audio_2024(args, url)
if not mp3_link:
sys.stderr.write(f'Problem finding mp3 link ({url}')
return
# Extract and reuse the filename from the MP3 URL
if args.verbose:
print("Defined mp3_link as: {}".format(mp3_link))

mp3_file = re.match(MP3_MEDIAURL_FILENAME_REGEX, mp3_link.group(1))

if not mp3_file:
## Some file are occasionally m4a instead of mp3
mp3_file = re.match(M4A_MEDIAURL_FILENAME_REGEX, mp3_link.group(1))
ext = "m4a"
if not mp3_file:
return

# Create audio object with link and filename
mp3_file_name = str(mp3_file_name) + ext
return Audio(mp3_link.group(1), mp3_file_name)


Expand Down Expand Up @@ -636,7 +664,14 @@ def update_playlists(args, playlists, talk, audio):
month_path = get_month_path(args, talk.session.conference)
session_path = get_session_path(args, talk.session, nonumbers=True)
relative_path = get_relative_path(args, talk.session)
duration = MP3(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
# ~ duration = MP3(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
file_name, file_extension = os.path.splitext(audio.file)
duration = 0
if file_extension == 'mp3':
duration = mutagen.mp3(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length
elif file_extension == 'm4a':
#if not duration: #file_extension == 'm4a'
duration = mutagen.mp4(f'{get_output_dir(args)}/{relative_path}/{audio.file}').info.length

# Add this talk to the year, conference, or session playlists
playlists[f'Conferences/{year_path}'].append({'duration' : duration, 'path' : f'../{relative_path}/{audio.file}', 'title' : talk.title})
Expand Down Expand Up @@ -700,7 +735,10 @@ def write_playlist_file(args, playlist_path, playlist_data):
f.write("#EXTM3U\n\n")
for audio_info in playlist_data:
f.write(f"#EXTINF:{get_duration_text(audio_info['duration'])}, {audio_info['title']}\n")
f.write(audio_info['path'].replace("/","\\"))
if os.name == 'nt': ## For windows use backslashes in paths
f.write(audio_info['path'].replace("/","\\"))
else: ## MacOS, Unix, Linux, BSD, leave the slashes.
f.write(audio_info['path'])
f.write("\n\n")


Expand All @@ -717,6 +755,8 @@ def write_playlists(args, playlists):

def write_mp3_to_disk(data, filename):
os.makedirs(os.path.dirname(filename), exist_ok=True)
if args.verbose:
print("Saving file: {}".format(filename))
with open(filename, "wb") as f:
f.write(data)

Expand Down