-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsamplefinder.py
More file actions
92 lines (81 loc) · 4.18 KB
/
samplefinder.py
File metadata and controls
92 lines (81 loc) · 4.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import json
import argparse
import os
from pathlib import Path
from yt_dlp import YoutubeDL
import numpy as np
import pandas as pd
from tqdm import tqdm
from audio_offset_finder.audio_offset_finder import find_offset_between_files
class ArgParser:
"""Parsing arguments passed to the script"""
def __init__(self):
self.parser = argparse.ArgumentParser(
prog='Sample Finder',
description='Compares mp3 with a set of mp3s and finds offsets between them')
self.parser.add_argument('-d', '--dir', help='Name of directory where the mp3s are stored (and downloaded if needed)', required=True)
self.parser.add_argument('-c', '--channel', help='Channel name to download from')
self.parser.add_argument('--download', help='Download all videos from a channel', action='store_true')
self.args = self.parser.parse_args()
class SampleFinder:
"""Finding sample of one mp3 in multiple others"""
def __init__(self, args):
self.results = []
self.directory = args.dir
self.curr_dir = Path().absolute()
self.mp3s_dir = self.curr_dir / self.directory
self.json_path = self.mp3s_dir / f'{self.directory}.json'
self.excel_path = self.mp3s_dir / f'{self.directory}.xlsx'
self.args = args
def download(self):
"""Downloads all videos from youtube channel"""
if self.args.download and self.args.channel:
self.mp3s_dir.mkdir(parents=True, exist_ok=True)
with YoutubeDL({
'format': 'bestaudio/best',
'final_ext': 'mp3',
'extract_flat': 'discard_in_playlist',
'postprocessors': [{'key': 'FFmpegExtractAudio',
'nopostoverwrites': False,
'preferredcodec': 'mp3',
'preferredquality': '0'},
{'key': 'FFmpegConcat',
'only_multi_video': True,
'when': 'playlist'}],
'outtmpl': f'{self.mp3s_dir}{os.sep}%(title)s.%(ext)s',
'verbose': True,
'retries': 10,
'ignoreerrors': 'only_download',
'fragment_retries': 10,
'concurrent_fragment_downloads': 5}) as ydl:
ydl.download([f'https://www.youtube.com/@{self.args.channel}'])
def find_offsets(self):
"""Finds offset of one mp3 file in multiple others"""
mp3s_in_dir = list(self.mp3s_dir.rglob('*.mp3'))
for file in tqdm(mp3s_in_dir, desc="Comparing sample to mp3s", dynamic_ncols=True, ascii=" ="):
result = find_offset_between_files('ekt.mp3', str(file))
filename_stripped = file.name
result_dict = {
"file": filename_stripped,
"offset": result["time_offset"],
"standard_score": 0 if np.isnan(result['standard_score']) else result['standard_score']
}
self.results.append(result_dict)
# sorting by standard_score descending
self.results = sorted(self.results, key=lambda x: x["standard_score"], reverse=True)
def write_results(self):
"""Writes results to JSON and XLSX files"""
with open(self.json_path, 'w', encoding='UTF-8') as f:
json.dump(self.results, f)
df = pd.DataFrame(self.results)
reshaped_data = []
for i, row in df.iterrows():
reshaped_data.append([row['file'], row['offset'], 'offset'])
reshaped_data.append([None, row['standard_score'], 'standard_score'])
reshaped_df = pd.DataFrame(reshaped_data, columns=['A', 'B', 'C'])
reshaped_df.to_excel(self.excel_path, index=False, header=False)
arg_parser = ArgParser()
sample_finder = SampleFinder(arg_parser.args)
sample_finder.download()
sample_finder.find_offsets()
sample_finder.write_results()