From 73acf4e26eeb8884d565a15db38e32300089300e Mon Sep 17 00:00:00 2001
From: Wayne Lai <abev66@gmail.com>
Date: Tue, 22 Aug 2023 11:47:14 +0800
Subject: [PATCH 1/3] Do not transcode audio during seperating soundtrack

---
 cores/add.py   | 4 ++--
 cores/clean.py | 6 +++---
 cores/init.py  | 4 ++--
 cores/style.py | 4 ++--
 util/ffmpeg.py | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/cores/add.py b/cores/add.py
index edf8027..7f9434b 100644
--- a/cores/add.py
+++ b/cores/add.py
@@ -126,5 +126,5 @@ def addmosaic_video(opt,netS):
     print('Step:4/4 -- Convert images to video')
     ffmpeg.image2video( fps,
                         opt.temp_dir+'/addmosaic_image/output_%06d.'+opt.tempimage_type,
-                        opt.temp_dir+'/voice_tmp.mp3',
-                         os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.mp4'))
\ No newline at end of file
+                        opt.temp_dir+'/voice_tmp.mkv',
+                         os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.mp4'))
diff --git a/cores/clean.py b/cores/clean.py
index 285542b..227fa5b 100644
--- a/cores/clean.py
+++ b/cores/clean.py
@@ -152,7 +152,7 @@ def cleanmosaic_video_byframe(opt,netG,netM):
     print('Step:4/4 -- Convert images to video')
     ffmpeg.image2video( fps,
                 opt.temp_dir+'/replace_mosaic/output_%06d.'+opt.tempimage_type,
-                opt.temp_dir+'/voice_tmp.mp3',
+                opt.temp_dir+'/voice_tmp.mkv',
                  os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mp4'))  
 
 def cleanmosaic_video_fusion(opt,netG,netM):
@@ -245,5 +245,5 @@ def write_result():
     print('Step:4/4 -- Convert images to video')
     ffmpeg.image2video( fps,
                 opt.temp_dir+'/replace_mosaic/output_%06d.'+opt.tempimage_type,
-                opt.temp_dir+'/voice_tmp.mp3',
-                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mp4')) 
\ No newline at end of file
+                opt.temp_dir+'/voice_tmp.mkv',
+                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mp4')) 
diff --git a/cores/init.py b/cores/init.py
index 5993c58..4d23eef 100644
--- a/cores/init.py
+++ b/cores/init.py
@@ -21,11 +21,11 @@ def video_init(opt,path):
     
     print('Step:1/4 -- Convert video to images')
     util.file_init(opt)
-    ffmpeg.video2voice(path,opt.temp_dir+'/voice_tmp.mp3',opt.start_time,opt.last_time)
+    ffmpeg.video2voice(path,opt.temp_dir+'/voice_tmp.mkv',opt.start_time,opt.last_time)
     ffmpeg.video2image(path,opt.temp_dir+'/video2image/output_%06d.'+opt.tempimage_type,fps,opt.start_time,opt.last_time)
     imagepaths = os.listdir(opt.temp_dir+'/video2image')
     imagepaths.sort()
     step = {'step':2,'frame':0}
     util.savejson(os.path.join(opt.temp_dir,'step.json'),step)
 
-    return fps,imagepaths,height,width
\ No newline at end of file
+    return fps,imagepaths,height,width
diff --git a/cores/style.py b/cores/style.py
index 32834ad..8ba3a4e 100644
--- a/cores/style.py
+++ b/cores/style.py
@@ -46,5 +46,5 @@ def styletransfer_video(opt,netG):
     print('Step:4/4 -- Convert images to video')
     ffmpeg.image2video( fps,
                 opt.temp_dir+'/style_transfer/output_%06d.'+opt.tempimage_type,
-                opt.temp_dir+'/voice_tmp.mp3',
-                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_'+suffix+'.mp4')) 
\ No newline at end of file
+                opt.temp_dir+'/voice_tmp.mkv',
+                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_'+suffix+'.mp4')) 
diff --git a/util/ffmpeg.py b/util/ffmpeg.py
index 6efd686..8d22164 100755
--- a/util/ffmpeg.py
+++ b/util/ffmpeg.py
@@ -43,7 +43,7 @@ def video2image(videopath, imagepath, fps=0, start_time='00:00:00', last_time='0
     run(args)
 
 def video2voice(videopath, voicepath, start_time='00:00:00', last_time='00:00:00'):
-    args = ['ffmpeg', '-i', '"'+videopath+'"','-async 1 -f mp3','-b:a 320k']
+    args = ['ffmpeg', '-i', '"'+videopath+'"','-async 1','-vn','-c:a copy']
     if last_time != '00:00:00':
         args += ['-ss', start_time]
         args += ['-t', last_time]

From dde8ac27fd60dc207a1693234606765d1a0ede87 Mon Sep 17 00:00:00 2001
From: Wayne Lai <abev66@gmail.com>
Date: Tue, 22 Aug 2023 11:53:30 +0800
Subject: [PATCH 2/3] Use matroska as output video container format

---
 cores/add.py   | 2 +-
 cores/clean.py | 4 ++--
 cores/style.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cores/add.py b/cores/add.py
index 7f9434b..2aefcdc 100644
--- a/cores/add.py
+++ b/cores/add.py
@@ -127,4 +127,4 @@ def addmosaic_video(opt,netS):
     ffmpeg.image2video( fps,
                         opt.temp_dir+'/addmosaic_image/output_%06d.'+opt.tempimage_type,
                         opt.temp_dir+'/voice_tmp.mkv',
-                         os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.mp4'))
+                         os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.mkv'))
diff --git a/cores/clean.py b/cores/clean.py
index 227fa5b..c4383af 100644
--- a/cores/clean.py
+++ b/cores/clean.py
@@ -153,7 +153,7 @@ def cleanmosaic_video_byframe(opt,netG,netM):
     ffmpeg.image2video( fps,
                 opt.temp_dir+'/replace_mosaic/output_%06d.'+opt.tempimage_type,
                 opt.temp_dir+'/voice_tmp.mkv',
-                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mp4'))  
+                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mkv'))
 
 def cleanmosaic_video_fusion(opt,netG,netM):
     path = opt.media_path
@@ -246,4 +246,4 @@ def write_result():
     ffmpeg.image2video( fps,
                 opt.temp_dir+'/replace_mosaic/output_%06d.'+opt.tempimage_type,
                 opt.temp_dir+'/voice_tmp.mkv',
-                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mp4')) 
+                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mkv'))
diff --git a/cores/style.py b/cores/style.py
index 8ba3a4e..fe2e1e9 100644
--- a/cores/style.py
+++ b/cores/style.py
@@ -47,4 +47,4 @@ def styletransfer_video(opt,netG):
     ffmpeg.image2video( fps,
                 opt.temp_dir+'/style_transfer/output_%06d.'+opt.tempimage_type,
                 opt.temp_dir+'/voice_tmp.mkv',
-                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_'+suffix+'.mp4')) 
+                 os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_'+suffix+'.mkv'))

From 12bd52e9417a2103c4381b6daccdf78ed6723f03 Mon Sep 17 00:00:00 2001
From: Wayne Lai <abev66@gmail.com>
Date: Tue, 22 Aug 2023 11:55:11 +0800
Subject: [PATCH 3/3] Skip output audio transcoding

---
 util/ffmpeg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/ffmpeg.py b/util/ffmpeg.py
index 8d22164..7f4627c 100755
--- a/util/ffmpeg.py
+++ b/util/ffmpeg.py
@@ -53,7 +53,7 @@ def video2voice(videopath, voicepath, start_time='00:00:00', last_time='00:00:00
 def image2video(fps,imagepath,voicepath,videopath):
     os.system('ffmpeg -y -r '+str(fps)+' -i '+imagepath+' -vcodec libx264 '+os.path.split(voicepath)[0]+'/video_tmp.mp4')
     if os.path.exists(voicepath):
-        os.system('ffmpeg -i '+os.path.split(voicepath)[0]+'/video_tmp.mp4'+' -i "'+voicepath+'" -vcodec copy -acodec aac '+videopath)
+        os.system('ffmpeg -i '+os.path.split(voicepath)[0]+'/video_tmp.mp4'+' -i "'+voicepath+'" -c:v copy -c:a copy '+videopath)
     else:
         os.system('ffmpeg -i '+os.path.split(voicepath)[0]+'/video_tmp.mp4 '+videopath)