-
Notifications
You must be signed in to change notification settings - Fork 395
WIP: Add timit recipe #96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
18d7dd2
Update train.py
luomingshuang 906d0ad
Update train.py
luomingshuang e5feabb
Update train.py
luomingshuang f3fd279
Merge branch 'k2-fsa:master' into master
luomingshuang 69c8720
Merge branch 'k2-fsa:master' into master
luomingshuang e023a9d
Add timit recipe for icefall
luomingshuang 4beb25c
Update timit recipe
luomingshuang 5e7c733
Update prepare.sh
luomingshuang e2bb9b4
Update decode.py
luomingshuang a9cdaae
Delete RESULTS.md
luomingshuang File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Empty file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,155 @@ | ||
| #!/usr/bin/env python3 | ||
| # Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| # | ||
| # See ../../../../LICENSE for clarification regarding multiple authors | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
|
|
||
| """ | ||
| This script takes as input lang_dir and generates HLG from | ||
|
|
||
| - H, the ctc topology, built from tokens contained in lang_dir/lexicon.txt | ||
| - L, the lexicon, built from lang_dir/L_disambig.pt | ||
|
|
||
| Caution: We use a lexicon that contains disambiguation symbols | ||
|
|
||
| - G, the LM, built from data/lm/G_3_gram.fst.txt | ||
|
|
||
| The generated HLG is saved in $lang_dir/HLG.pt | ||
| """ | ||
| import argparse | ||
| import logging | ||
| from pathlib import Path | ||
|
|
||
| import k2 | ||
| import torch | ||
|
|
||
| from icefall.lexicon import Lexicon | ||
|
|
||
|
|
||
| def get_args(): | ||
| parser = argparse.ArgumentParser() | ||
| parser.add_argument( | ||
| "--lang-dir", | ||
| type=str, | ||
| help="""Input and output directory. | ||
| """, | ||
| ) | ||
|
|
||
| return parser.parse_args() | ||
|
|
||
|
|
||
| def compile_HLG(lang_dir: str) -> k2.Fsa: | ||
| """ | ||
| Args: | ||
| lang_dir: | ||
| The language directory, e.g., data/lang_phone. | ||
|
|
||
| Return: | ||
| An FSA representing HLG. | ||
| """ | ||
| lexicon = Lexicon(lang_dir) | ||
| max_token_id = max(lexicon.tokens) | ||
| logging.info(f"Building ctc_topo. max_token_id: {max_token_id}") | ||
| H = k2.ctc_topo(max_token_id) | ||
| L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt")) | ||
|
|
||
| if Path("data/lm/G.pt").is_file(): | ||
| logging.info("Loading pre-compiled G") | ||
| d = torch.load("data/lm/G.pt") | ||
| G = k2.Fsa.from_dict(d) | ||
| else: | ||
| logging.info("Loading G_3_gram.fst.txt") | ||
| with open("data/lm/G_3_gram.fst.txt") as f: | ||
| G = k2.Fsa.from_openfst(f.read(), acceptor=False) | ||
| torch.save(G.as_dict(), "data/lm/G.pt") | ||
|
|
||
| first_token_disambig_id = lexicon.token_table["#0"] | ||
| first_word_disambig_id = lexicon.word_table["#0"] | ||
|
|
||
| L = k2.arc_sort(L) | ||
| G = k2.arc_sort(G) | ||
|
|
||
| logging.info("Intersecting L and G") | ||
| LG = k2.compose(L, G) | ||
| logging.info(f"LG shape: {LG.shape}") | ||
|
|
||
| logging.info("Connecting LG") | ||
| LG = k2.connect(LG) | ||
| logging.info(f"LG shape after k2.connect: {LG.shape}") | ||
|
|
||
| logging.info(type(LG.aux_labels)) | ||
| logging.info("Determinizing LG") | ||
|
|
||
| LG = k2.determinize(LG) | ||
| logging.info(type(LG.aux_labels)) | ||
|
|
||
| logging.info("Connecting LG after k2.determinize") | ||
| LG = k2.connect(LG) | ||
|
|
||
| logging.info("Removing disambiguation symbols on LG") | ||
|
|
||
| LG.labels[LG.labels >= first_token_disambig_id] = 0 | ||
|
|
||
| LG.aux_labels.values[LG.aux_labels.values >= first_word_disambig_id] = 0 | ||
|
|
||
| LG = k2.remove_epsilon(LG) | ||
| logging.info(f"LG shape after k2.remove_epsilon: {LG.shape}") | ||
|
|
||
| LG = k2.connect(LG) | ||
| LG.aux_labels = LG.aux_labels.remove_values_eq(0) | ||
|
|
||
| logging.info("Arc sorting LG") | ||
| LG = k2.arc_sort(LG) | ||
|
|
||
| logging.info("Composing H and LG") | ||
| # CAUTION: The name of the inner_labels is fixed | ||
| # to `tokens`. If you want to change it, please | ||
| # also change other places in icefall that are using | ||
| # it. | ||
| HLG = k2.compose(H, LG, inner_labels="tokens") | ||
|
|
||
| logging.info("Connecting LG") | ||
| HLG = k2.connect(HLG) | ||
|
|
||
| logging.info("Arc sorting LG") | ||
| HLG = k2.arc_sort(HLG) | ||
| logging.info(f"HLG.shape: {HLG.shape}") | ||
|
|
||
| return HLG | ||
|
|
||
|
|
||
| def main(): | ||
| args = get_args() | ||
| lang_dir = Path(args.lang_dir) | ||
|
|
||
| if (lang_dir / "HLG.pt").is_file(): | ||
| logging.info(f"{lang_dir}/HLG.pt already exists - skipping") | ||
| return | ||
|
|
||
| logging.info(f"Processing {lang_dir}") | ||
|
|
||
| HLG = compile_HLG(lang_dir) | ||
| logging.info(f"Saving HLG.pt to {lang_dir}") | ||
| torch.save(HLG.as_dict(), f"{lang_dir}/HLG.pt") | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| formatter = ( | ||
| "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" | ||
| ) | ||
|
|
||
| logging.basicConfig(format=formatter, level=logging.INFO) | ||
|
|
||
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| #!/usr/bin/env python3 | ||
| # Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| # | ||
| # See ../../../../LICENSE for clarification regarding multiple authors | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
|
|
||
| """ | ||
| This file computes fbank features of the musan dataset. | ||
| It looks for manifests in the directory data/manifests. | ||
|
|
||
| The generated fbank features are saved in data/fbank. | ||
| """ | ||
|
|
||
| import logging | ||
| import os | ||
| from pathlib import Path | ||
|
|
||
| import torch | ||
| from lhotse import CutSet, Fbank, FbankConfig, LilcomHdf5Writer, combine | ||
| from lhotse.recipes.utils import read_manifests_if_cached | ||
|
|
||
| from icefall.utils import get_executor | ||
|
|
||
| # Torch's multithreaded behavior needs to be disabled or | ||
| # it wastes a lot of CPU and slow things down. | ||
| # Do this outside of main() in case it needs to take effect | ||
| # even when we are not invoking the main (e.g. when spawning subprocesses). | ||
| torch.set_num_threads(1) | ||
| torch.set_num_interop_threads(1) | ||
|
|
||
|
|
||
| def compute_fbank_musan(): | ||
| src_dir = Path("data/manifests") | ||
| output_dir = Path("data/fbank") | ||
| num_jobs = min(15, os.cpu_count()) | ||
| num_mel_bins = 80 | ||
|
|
||
| dataset_parts = ( | ||
| "music", | ||
| "speech", | ||
| "noise", | ||
| ) | ||
| manifests = read_manifests_if_cached( | ||
| dataset_parts=dataset_parts, output_dir=src_dir | ||
| ) | ||
| assert manifests is not None | ||
|
|
||
| musan_cuts_path = output_dir / "cuts_musan.json.gz" | ||
|
|
||
| if musan_cuts_path.is_file(): | ||
| logging.info(f"{musan_cuts_path} already exists - skipping") | ||
| return | ||
|
|
||
| logging.info("Extracting features for Musan") | ||
|
|
||
| extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) | ||
|
|
||
| with get_executor() as ex: # Initialize the executor only once. | ||
| # create chunks of Musan with duration 5 - 10 seconds | ||
| musan_cuts = ( | ||
| CutSet.from_manifests( | ||
| recordings=combine( | ||
| part["recordings"] for part in manifests.values() | ||
| ) | ||
| ) | ||
| .cut_into_windows(10.0) | ||
| .filter(lambda c: c.duration > 5) | ||
| .compute_and_store_features( | ||
| extractor=extractor, | ||
| storage_path=f"{output_dir}/feats_musan", | ||
| num_jobs=num_jobs if ex is None else 80, | ||
| executor=ex, | ||
| storage_type=LilcomHdf5Writer, | ||
| ) | ||
| ) | ||
| musan_cuts.to_json(musan_cuts_path) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| formatter = ( | ||
| "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" | ||
| ) | ||
|
|
||
| logging.basicConfig(format=formatter, level=logging.INFO) | ||
| compute_fbank_musan() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| #!/usr/bin/env python3 | ||
| # Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang | ||
| # Mingshuang Luo) | ||
| # | ||
| # See ../../../../LICENSE for clarification regarding multiple authors | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
|
|
||
| """ | ||
| This file computes fbank features of the LibriSpeech dataset. | ||
| It looks for manifests in the directory data/manifests. | ||
|
|
||
| The generated fbank features are saved in data/fbank. | ||
| """ | ||
|
|
||
| import logging | ||
| import os | ||
| from pathlib import Path | ||
|
|
||
| import torch | ||
| from lhotse import CutSet, Fbank, FbankConfig, LilcomHdf5Writer | ||
| from lhotse.recipes.utils import read_manifests_if_cached | ||
|
|
||
| from icefall.utils import get_executor | ||
|
|
||
| # Torch's multithreaded behavior needs to be disabled or | ||
| # it wastes a lot of CPU and slow things down. | ||
| # Do this outside of main() in case it needs to take effect | ||
| # even when we are not invoking the main (e.g. when spawning subprocesses). | ||
| torch.set_num_threads(1) | ||
| torch.set_num_interop_threads(1) | ||
|
|
||
|
|
||
| def compute_fbank_timit(): | ||
| src_dir = Path("data/manifests") | ||
| output_dir = Path("data/fbank") | ||
| num_jobs = min(15, os.cpu_count()) | ||
| num_mel_bins = 80 | ||
|
|
||
| dataset_parts = ( | ||
| "TRAIN", | ||
| "DEV", | ||
| "TEST", | ||
| ) | ||
| manifests = read_manifests_if_cached( | ||
| dataset_parts=dataset_parts, output_dir=src_dir | ||
| ) | ||
| assert manifests is not None | ||
|
|
||
| extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) | ||
|
|
||
| with get_executor() as ex: # Initialize the executor only once. | ||
| for partition, m in manifests.items(): | ||
| if (output_dir / f"cuts_{partition}.json.gz").is_file(): | ||
| logging.info(f"{partition} already exists - skipping.") | ||
| continue | ||
| logging.info(f"Processing {partition}") | ||
| cut_set = CutSet.from_manifests( | ||
| recordings=m["recordings"], | ||
| supervisions=m["supervisions"], | ||
| ) | ||
| if "train" in partition: | ||
| cut_set = ( | ||
| cut_set | ||
| + cut_set.perturb_speed(0.9) | ||
| + cut_set.perturb_speed(1.1) | ||
| ) | ||
| cut_set = cut_set.compute_and_store_features( | ||
| extractor=extractor, | ||
| storage_path=f"{output_dir}/feats_{partition}", | ||
| # when an executor is specified, make more partitions | ||
| num_jobs=num_jobs if ex is None else 80, | ||
| executor=ex, | ||
| storage_type=LilcomHdf5Writer, | ||
| ) | ||
| cut_set.to_json(output_dir / f"cuts_{partition}.json.gz") | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| formatter = ( | ||
| "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" | ||
| ) | ||
|
|
||
| logging.basicConfig(format=formatter, level=logging.INFO) | ||
|
|
||
| compute_fbank_timit() | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please note that in librispeech, the names of the training datasets begin with
train(lowercase).In TIMIT, I find that it is
TRAIN(uppercase) , see line 52 in this file, so thisifstatement is never executed.
Please change
traintoTRAINand re-run your experiments.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh....will do it....