diff --git a/README.rst b/README.rst index 1d4a152..3038d5f 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,5 @@ ======================================= -syncstart(1) Version 1.1.1 \| syncstart +syncstart(1) Version 1.1.2 \| syncstart ======================================= SYNOPSIS @@ -7,15 +7,16 @@ SYNOPSIS Command line help:: - usage: syncstart [-h] [--version] [-v] [-b BEGIN] [-t TAKE] [-n] [-d] [-l LOWPASS] [-c] [-s] [-q] in1 in2 + usage: syncstart [options] in1 in2 - CLI interface to sync two media files using their audio or video streams. - ffmpeg needs to be available. + CLI program to compute timing offset (seconds) of media file 1 (in1) + in referenceto media file 2 (in2) using their audio or video streams. + ffmpeg is required. positional arguments: - in1 First media file to sync with second. - in2 Second media file to sync with first. + in1 Offset media file. + in2 Reference media file. options: -h, --help show this help message and exit @@ -44,9 +45,9 @@ The steps taken by ``syncstart``: - process and extract sample audio/video clips using ffmpeg with some default and optional filters - read the two clips into a 1D array and apply optional z-score normalization - compute offset via correlation using scipy ifft/fft -- print ffmpeg/ffprobe output or optionally quiet that -- show diagrams to allow MANUAL correction using ZOOM or optionally suppress that -- print result +- print ffmpeg/ffprobe output (optional) +- show diagrams to allow MANUAL correction using ZOOM (optional) +- print result as human readable, or print and return result as CSV MANUAL correction with ZOOM: @@ -79,28 +80,44 @@ INSTALLATION To install for user only, do:: - pip install --user syncstart + pip install --user syncstart Or activate a virtualenv and do:: - pip install syncstart + pip install syncstart + +To make syncstart an executable in your PATH on Windows, do:: + + # Install pip if you don't have it + py -m ensurepip + # Upgrade and ensure pip prefers official wheels + py -m pip install --upgrade pip setuptools wheel + # Install/update pipx + py -m pip install --user --upgrade pipx + # Ensure python binaries are in PATH + py -m pipx ensurepath + pipx install syncstart + # Upgrade numpy inside pipx venv + pipx runpip syncstart install --upgrade numpy + # Test syncstart.exe by reading the version + syncstart --version EXAMPLES -------- :: - # compute audio offset with default settings: - syncstart from_s10.m4a from_gopro.m4p - - # compute audio offset using first 10 seconds with denoising, normalization and a 300 Hz lowpass filter: - syncstart video1.mp4 video2.mkv -t 10 -dnl 300 - - # compute video offset using first 20 seconds, don't show plots, only output final result: - syncstart video1.mp4 video2.mkv -vsq - - # compute video offset using seconds 15 to 25 with denoising, cropping and normalization: - syncstart video1.mp4 video2.mkv -b 15 -t 10 -vdcn + # compute audio offset with default settings: + syncstart from_s10.m4a from_gopro.m4p + + # compute audio offset using first 10 seconds with denoising, normalization and a 300 Hz lowpass filter: + syncstart -t 10 -dnl 300 video1.mp4 video2.mkv + + # compute video offset using first 20 seconds, don\[aq]t show plots, only output final result: + syncstart -vsq video1.mp4 video2.mkv + + # compute video offset using seconds 15 to 25 with denoising, cropping and normalization: + syncstart -b 15 -t 10 -vdcn video1.mp4 video2.mkv License ------- diff --git a/syncstart.py b/syncstart.py index 1a2f5a7..4a64166 100755 --- a/syncstart.py +++ b/syncstart.py @@ -7,9 +7,9 @@ - process and extract sample audio/video clips using ffmpeg with some default and optional filters - read the two clips into a 1D array and apply optional z-score normalization - compute offset via correlation using scipy ifft/fft -- print ffmpeg/ffprobe output or optionally quiet that -- show diagrams to allow MANUAL correction using ZOOM or optionally suppress that -- print result +- print ffmpeg/ffprobe output (optional) +- show diagrams to allow MANUAL correction using ZOOM (optional) +- print result as human readable, or print and return result as CSV MANUAL correction with ZOOM: @@ -49,7 +49,7 @@ import sys import subprocess -__version__ = '1.1.1' +__version__ = '1.1.2' __author__ = """Roland Puntaier, drolex2""" __email__ = 'roland.puntaier@gmail.com' @@ -63,33 +63,37 @@ lowpass = 0 crop = False quiet = False -loglevel = 32 +loglevel = 8 -ffmpegvideo = 'ffmpeg -loglevel %s -hwaccel auto -ss %s -i "{}" %s -map 0:v -c:v mjpeg -q 1 -f mjpeg "{}"' -ffmpegwav = 'ffmpeg -loglevel %s -ss %s -i "{}" %s -map 0:a -c:a pcm_s16le -ac 1 -f wav "{}"' +ffmpegvideo = 'ffmpeg -hide_banner -loglevel %s -hwaccel auto -ss %s -i "{}" -t %s %s -map 0:v:0 -c:v mjpeg -q 1 -pix_fmt yuv420p -color_range pc -f mjpeg "{}"' +ffmpegwav = 'ffmpeg -hide_banner -loglevel %s -ss %s -i "{}" -t %s %s -map 0:a:0 -c:a pcm_s16le -ac 1 -f wav "{}"' audio_filters = { - 'default': 'atrim=0:%s,aresample=%s', + 'default': 'aresample=%s', 'lowpass': 'lowpass=f=%s', 'denoise': 'afftdn=nr=24:nf=-25' } + video_filters = { - 'default': 'trim=0:%s,fps=%s,format=gray,scale=-1:300', + 'default': 'fps=%s,format=gray,scale=-1:300', 'crop': 'crop=400:300', 'denoise': 'hqdn3d=3:3:2:2' } + def z_score_normalization(array): mean = np.mean(array) std_dev = np.std(array) normalized_array = (array - mean) / std_dev return normalized_array + def header(cmdstr): - hdr = '-'*len(cmdstr) + hdr = '-'*12 print('%s\n%s\n%s'%(hdr,cmdstr,hdr)) + def get_max_rate(in1,in2): probe_audio = 'ffprobe -v error -select_streams a:0 -show_entries stream=sample_rate -of default=noprint_wrappers=1'.split() probe_video = 'ffprobe -v error -select_streams v:0 -show_entries stream=avg_frame_rate -of default=noprint_wrappers=1'.split() @@ -113,6 +117,7 @@ def get_max_rate(in1,in2): exit(1) return max(rates) + def read_video(input_video): # Open input video cap = cv2.VideoCapture(str(input_video)) @@ -138,6 +143,7 @@ def read_video(input_video): cap.release() return brightdiff + def in_out(command,infile,outfile): cmdstr = command.format(infile,outfile) if not quiet: header(cmdstr) @@ -145,30 +151,32 @@ def in_out(command,infile,outfile): if 0 != ret: sys.exit(ret) + def get_sample(infile,rate): outname = pathlib.Path(infile).stem + '_sample' with tempfile.TemporaryDirectory() as tempdir: outfile = pathlib.Path(tempdir)/(outname) if video: #compare video - filters = [video_filters['default']%(take,rate)] + filters = [video_filters['default']%(rate)] if crop: filters.append(video_filters['crop']) if denoise: filters.append(video_filters['denoise']) filter_string = '-vf "' + ','.join(filters) + '"' - in_out(ffmpegvideo%(loglevel,begin,filter_string),infile,outfile) + in_out(ffmpegvideo%(loglevel,begin,take,filter_string),infile,outfile) s = read_video(outfile) else: #compare audio - filters = [audio_filters['default']%(take,rate)] + filters = [audio_filters['default']%(rate)] if int(lowpass): filters.append(audio_filters['lowpass']%lowpass) if denoise: filters.append(audio_filters['denoise']) filter_string = '-af "' + ','.join(filters) + '"' - in_out(ffmpegwav%(loglevel,begin,filter_string),infile,outfile) + in_out(ffmpegwav%(loglevel,begin,take,filter_string),infile,outfile) r,s = scipy.io.wavfile.read(outfile) return s + def fig1(title=None): fig = plt.figure(1) plt.margins(0, 0.1) @@ -181,12 +189,14 @@ def fig1(title=None): global ax ax = axs[0] + def show1(sr, s, color=None, title=None, v=None): if not color: fig1(title) if ax and v: ax.axvline(x=v,color='green') plt.plot(np.arange(len(s))/sr, s, color or 'black') if not color: plt.show() + def show2(sr,s1,s2,plus1minus2,in1,in2): fig1("Matchup") t1,t2 = (0,-plus1minus2) if plus1minus2 < 0 else (plus1minus2,0) @@ -205,6 +215,7 @@ def show2(sr,s1,s2,plus1minus2,in1,in2): f'{ffo} ({ffoclr})', ]) + def on_zoom(event_ax): nonlocal dt, choice, iszoom choice = plt.fix.get_status()[0] @@ -252,22 +263,8 @@ def on_zoom(event_ax): else: return ff,noff else: - return ff, toff - + return ff, round(toff, 3) -def corrabs(s1,s2): - ls1 = len(s1) - ls2 = len(s2) - padsize = ls1+ls2+1 - padsize = 2**(int(np.log(padsize)/np.log(2))+1) - s1pad = np.zeros(padsize) - s1pad[:ls1] = s1 - s2pad = np.zeros(padsize) - s2pad[:ls2] = s2 - corr = scipy.fft.ifft(scipy.fft.fft(s1pad)*np.conj(scipy.fft.fft(s2pad))) - ca = np.absolute(corr) - xmax = np.argmax(ca) - return ls1,ls2,padsize,xmax,ca def cli_parser(**ka): import argparse @@ -280,11 +277,11 @@ def cli_parser(**ka): if 'in1' not in ka: parser.add_argument( 'in1', - help='First media file to sync with second.') + help='Offset media file.') if 'in2' not in ka: parser.add_argument( 'in2', - help='Second media file to sync with first.') + help='Reference media file.') if 'video' not in ka: parser.add_argument( '-v','--video', @@ -352,11 +349,41 @@ def cli_parser(**ka): Output will be: file_to_advance,seconds_to_advance') return parser -def file_offset(**ka): - """CLI interface to sync two media files using their audio or video streams. - ffmpeg needs to be available. + +def estimate_offset(s1,s2): + """ + Estimate the delay needed to apply to s1 to align it with s2. + + Returns: + offset_samples [int] + Number of samples s1 is offset from s2. + Positive -> s1 lags s2 (s1 needs advancement) + Negative -> s1 leads s2 (s1 needs delay) + ca [ndarray] + Correlation magnitude array """ + ls1 = len(s1) + ls2 = len(s2) + padsize = ls1+ls2+1 + padsize = 2**(int(np.log(padsize)/np.log(2))+1) + s1pad = np.zeros(padsize) + s1pad[:ls1] = s1 + s2pad = np.zeros(padsize) + s2pad[:ls2] = s2 + corr = scipy.fft.ifft(scipy.fft.fft(s1pad)*np.conj(scipy.fft.fft(s2pad))) + ca = np.absolute(corr) + xmax = np.argmax(ca) + offset_samples = xmax + if xmax > padsize // 2: + offset_samples = xmax-padsize + return offset_samples, ca + +def file_offset(**ka): + """CLI program to compute timing offset (seconds) of media file 1 (in1) +in referenceto media file 2 (in2) using their audio or video streams. + ffmpeg is required. + """ parser = cli_parser(**ka) args = parser.parse_args().__dict__ ka.update(args) @@ -365,35 +392,30 @@ def file_offset(**ka): in1,in2,begin,take = ka['in1'],ka['in2'],ka['begin'],ka['take'] video,crop,quiet,show = ka['video'],ka['crop'],ka['quiet'],ka['show'] normalize,denoise,lowpass = ka['normalize'],ka['denoise'],ka['lowpass'] - loglevel = 16 if quiet else 32 + loglevel = 8 if quiet else 32 sr = get_max_rate(in1,in2) s1,s2 = get_sample(in1,sr),get_sample(in2,sr) if normalize: s1,s2 = z_score_normalization(s1),z_score_normalization(s2) - ls1,ls2,padsize,xmax,ca = corrabs(s1,s2) - if show: show1(sr,ca,title='Correlation',v=xmax/sr) + offset_samples, corr = estimate_offset(s1,s2) + if show: show1(sr, corr, title='Correlation', v=-offset_samples/sr) sync_text = """ -============================================================================== -%s needs 'ffmpeg -ss %s' cut to get in sync -============================================================================== +============ +The first input is offset %s' seconds. +============ """ - if xmax > padsize // 2: - if show: - file,offset = show2(sr,s1,s2,-(padsize-xmax),in1,in2) - else: - file,offset = in2,(padsize-xmax)/sr + if show: + file, offset_seconds = show2(sr, s1, s2, offset_samples, in1, in2) else: - if show: - file,offset = show2(sr,s1,s2,xmax,in1,in2) - else: - file,offset = in1,xmax/sr + file,offset_seconds = in1,round(offset_samples/sr, 3) if not quiet: #default - print(sync_text%(file,offset)) + print(sync_text%(offset_seconds)) else: #quiet ## print csv: file_to_advance,seconds_to_advance - print("%s,%s"%(file,offset)) - return file,offset + print("%s,%s"%(file,offset_seconds)) + return file,offset_seconds + main = file_offset if __name__ == '__main__':