From b8c3938a0bf2ff02bd7f6938d5569a3458f183c8 Mon Sep 17 00:00:00 2001 From: Alvaro Laserna Date: Tue, 1 Jun 2021 12:37:58 +0300 Subject: [PATCH 1/7] add audio delay to the output --- src/alignment.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/alignment.cc b/src/alignment.cc index 4d4928d5..c94060e3 100644 --- a/src/alignment.cc +++ b/src/alignment.cc @@ -11,6 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include +using namespace std; #include "alignment.h" @@ -81,6 +83,12 @@ std::tuple Alignment::GloballyAlign( } AudioSignal new_deg_signal{std::move(new_deg_matrix), deg_signal.sample_rate}; + static int myStaticVar; + if (myStaticVar == 0) { + myStaticVar = 1; + cout << "Audio Delay: " << float(best_lag)/float(48000) << " at 48KHz\n"; + cout << "Audio Delay: " << float(best_lag)/float(44100) << " at 44.1KHz\n"; + } return std::make_tuple(new_deg_signal, best_lag / (double) deg_signal.sample_rate); } } From 3e069fc0f3f14ba31755dcbfba7c1f1db7f7a952 Mon Sep 17 00:00:00 2001 From: Alvaro Laserna Date: Tue, 1 Jun 2021 12:48:24 +0300 Subject: [PATCH 2/7] add other frequency --- src/alignment.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/alignment.cc b/src/alignment.cc index c94060e3..585ad01b 100644 --- a/src/alignment.cc +++ b/src/alignment.cc @@ -88,6 +88,7 @@ std::tuple Alignment::GloballyAlign( myStaticVar = 1; cout << "Audio Delay: " << float(best_lag)/float(48000) << " at 48KHz\n"; cout << "Audio Delay: " << float(best_lag)/float(44100) << " at 44.1KHz\n"; + cout << "Audio Delay: " << float(best_lag)/float(16000) << " at 16KHz\n"; } return std::make_tuple(new_deg_signal, best_lag / (double) deg_signal.sample_rate); } From fa3251b9fa90935b8fbb03c6d43fa6bac661ecfe Mon Sep 17 00:00:00 2001 From: Alvaro Laserna Date: Tue, 1 Jun 2021 16:49:35 +0300 Subject: [PATCH 3/7] fix --- src/alignment.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/alignment.cc b/src/alignment.cc index 585ad01b..976c2d0e 100644 --- a/src/alignment.cc +++ b/src/alignment.cc @@ -86,9 +86,7 @@ std::tuple Alignment::GloballyAlign( static int myStaticVar; if (myStaticVar == 0) { myStaticVar = 1; - cout << "Audio Delay: " << float(best_lag)/float(48000) << " at 48KHz\n"; - cout << "Audio Delay: " << float(best_lag)/float(44100) << " at 44.1KHz\n"; - cout << "Audio Delay: " << float(best_lag)/float(16000) << " at 16KHz\n"; + cout << "Audio Delay: " << float(best_lag)/float(ref_signal.sample_rate) << " at " << ref_signal.sample_rate << "Hz\n"; } return std::make_tuple(new_deg_signal, best_lag / (double) deg_signal.sample_rate); } From 0c17cf9b1d450f8b3b9470eecc6317e94307cb77 Mon Sep 17 00:00:00 2001 From: Alvaro Laserna Date: Tue, 1 Jun 2021 17:00:54 +0300 Subject: [PATCH 4/7] change sign of value delay --- src/alignment.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alignment.cc b/src/alignment.cc index 976c2d0e..f4bbf380 100644 --- a/src/alignment.cc +++ b/src/alignment.cc @@ -86,7 +86,7 @@ std::tuple Alignment::GloballyAlign( static int myStaticVar; if (myStaticVar == 0) { myStaticVar = 1; - cout << "Audio Delay: " << float(best_lag)/float(ref_signal.sample_rate) << " at " << ref_signal.sample_rate << "Hz\n"; + cout << "Audio Delay: " << (-1)*float(best_lag)/float(ref_signal.sample_rate) << " at " << ref_signal.sample_rate << "Hz\n"; } return std::make_tuple(new_deg_signal, best_lag / (double) deg_signal.sample_rate); } From 0ba1531a0cd795596ce1716688162e4c1585b32f Mon Sep 17 00:00:00 2001 From: Alvaro Laserna Date: Wed, 2 Jun 2021 12:13:18 +0300 Subject: [PATCH 5/7] add readme and patch alignment --- README.md | 6 ++++++ src/alignment.cc | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 05a925d6..58f80ff7 100755 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ +# ViSQOL Modified + +This is a ViSQOL modified version that returns Audio Delay and its Sample Adjustments (In Seconds). To get a better understanding, ViQOL algorithm first aligns the two samples (reference and degraded) so they can be compared. First, it does a global alignment (Considered as the Audio Delay) which uses cross-correlation to find the best match, and then it does `Voice Activity Detection` to later do `Patch Alignment` (Considered as Audio Delay Sample Adjustment). The sample adjustement or patch alginment is important to understand how the audio behaves, not just the quality, but if there are speed up/slow down regions, this will translate into higher Sample Adjustments, and so the quality of the audio will be worse if these numbers are too high (This depends on the lenght of your audio file and the aims of the application to test - 7 second -> >~ 0.05) + +The original ViSQOL code is in [here](https://github.com/google/visqol) + # ViSQOL ViSQOL (Virtual Speech Quality Objective Listener) is an objective, full-reference metric for perceived audio quality. It uses a spectro-temporal measure of similarity between a reference and a test speech signal to produce a MOS-LQO (Mean Opinion Score - Listening Quality Objective) score. MOS-LQO scores range from 1 (the worst) to 5 (the best). diff --git a/src/alignment.cc b/src/alignment.cc index f4bbf380..739baf31 100644 --- a/src/alignment.cc +++ b/src/alignment.cc @@ -86,7 +86,9 @@ std::tuple Alignment::GloballyAlign( static int myStaticVar; if (myStaticVar == 0) { myStaticVar = 1; - cout << "Audio Delay: " << (-1)*float(best_lag)/float(ref_signal.sample_rate) << " at " << ref_signal.sample_rate << "Hz\n"; + cout << "Audio Delay: " << (-1)*float(best_lag)/float(ref_signal.sample_rate) << " Seconds at " << ref_signal.sample_rate << "Hz\n"; + } else { + cout << "Audio Delay Sample Adjustment: " << (-1)*float(best_lag)/float(ref_signal.sample_rate) << " Seconds\n"; } return std::make_tuple(new_deg_signal, best_lag / (double) deg_signal.sample_rate); } From bda9d250066e325604a73049f6f5e52596b46079 Mon Sep 17 00:00:00 2001 From: Alvaro Laserna Date: Wed, 2 Jun 2021 12:16:10 +0300 Subject: [PATCH 6/7] add arxiv paper --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 58f80ff7..30ce2356 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ViSQOL Modified -This is a ViSQOL modified version that returns Audio Delay and its Sample Adjustments (In Seconds). To get a better understanding, ViQOL algorithm first aligns the two samples (reference and degraded) so they can be compared. First, it does a global alignment (Considered as the Audio Delay) which uses cross-correlation to find the best match, and then it does `Voice Activity Detection` to later do `Patch Alignment` (Considered as Audio Delay Sample Adjustment). The sample adjustement or patch alginment is important to understand how the audio behaves, not just the quality, but if there are speed up/slow down regions, this will translate into higher Sample Adjustments, and so the quality of the audio will be worse if these numbers are too high (This depends on the lenght of your audio file and the aims of the application to test - 7 second -> >~ 0.05) +This is a ViSQOL modified version that returns Audio Delay and its Sample Adjustments (In Seconds). To get a better understanding, ViQOL algorithm first aligns the two samples (reference and degraded) so they can be compared. First, it does a global alignment (Considered as the Audio Delay) which uses cross-correlation to find the best match, and then it does `Voice Activity Detection` to later do `Patch Alignment` (Considered as Audio Delay Sample Adjustment). For further explanation of the ViSQOL tool check [this arxiv paper](https://arxiv.org/pdf/2004.09584.pdf). The sample adjustement or patch alginment is important to understand how the audio behaves, not just the quality, but if there are speed up/slow down regions, this will translate into higher Sample Adjustments, and so the quality of the audio will be worse if these numbers are too high (This depends on the lenght of your audio file and the aims of the application to test - 7 second -> >~ 0.05) The original ViSQOL code is in [here](https://github.com/google/visqol) From 845f6687baedb3a8977150755d0bfb352b2199cc Mon Sep 17 00:00:00 2001 From: Alvaro Laserna Date: Wed, 2 Jun 2021 12:43:18 +0300 Subject: [PATCH 7/7] fix readme --- README.md | 2 +- testdata/.DS_Store | Bin 0 -> 6148 bytes 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 testdata/.DS_Store diff --git a/README.md b/README.md index 30ce2356..e1481d9b 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ViSQOL Modified -This is a ViSQOL modified version that returns Audio Delay and its Sample Adjustments (In Seconds). To get a better understanding, ViQOL algorithm first aligns the two samples (reference and degraded) so they can be compared. First, it does a global alignment (Considered as the Audio Delay) which uses cross-correlation to find the best match, and then it does `Voice Activity Detection` to later do `Patch Alignment` (Considered as Audio Delay Sample Adjustment). For further explanation of the ViSQOL tool check [this arxiv paper](https://arxiv.org/pdf/2004.09584.pdf). The sample adjustement or patch alginment is important to understand how the audio behaves, not just the quality, but if there are speed up/slow down regions, this will translate into higher Sample Adjustments, and so the quality of the audio will be worse if these numbers are too high (This depends on the lenght of your audio file and the aims of the application to test - 7 second -> >~ 0.05) +This is a ViSQOL modified version that returns Audio Delay and its Sample Adjustments (In Seconds). To get a better understanding, ViSQOL algorithm aligns the two samples (reference and degraded) so they can be compared. First, it does a global alignment (Considered as the Audio Delay) which uses cross-correlation to find the best match, and then it does `Voice Activity Detection` to later do `Patch Alignment` (Considered as Audio Delay Sample Adjustment). For further explanation of the ViSQOL tool check [this arxiv paper](https://arxiv.org/pdf/2004.09584.pdf). The sample adjustement or patch alginment is important to understand how the audio behaves, not just the quality, but if there are speed up/slow down regions, this will translate into higher Sample Adjustments, and so the audio "experience" will be worse if these numbers are too high (This depends on the lenght of your audio file and the aims of the application to test - 7 second -> >~ 0.05) The original ViSQOL code is in [here](https://github.com/google/visqol) diff --git a/testdata/.DS_Store b/testdata/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2977237b48d16b7c7c2f4c778c2c9ddf472939df GIT binary patch literal 6148 zcmeHKJ5Iw;5S)b+k)TLP`L4hXo}zGq93Y^8pok0tLG6ljakR{Sgk)KuG&E?|+MV~- zJJ0eIUM~P!zu#X2D*#KnBR)OM&F{OI(2Sbbi z#0Aq~T*oXyY@Q(Y!ZDE0a|icjG!J4AG8> j(T=(Cc6=2@S=W5c`@L{X3_9~cC+cUwb&*MdzgFM~bDS1P literal 0 HcmV?d00001