Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 6c56daa

Browse files
authored
Support for flac and opus for batch transcription (#62)
* Support for flac and opus for batch transcription * Update dependencies * Update to go 1.18
1 parent f261556 commit 6c56daa

File tree

7 files changed

+148
-435
lines changed

7 files changed

+148
-435
lines changed

.github/workflows/golangci-lint.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
steps:
1313
- uses: actions/setup-go@v3
1414
with:
15-
go-version: 1.17
15+
go-version: 1.18
1616
- uses: actions/checkout@v3
1717
- name: golangci-lint
1818
uses: golangci/golangci-lint-action@v3

.github/workflows/release.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
steps:
1313
- uses: actions/setup-go@v3
1414
with:
15-
go-version: 1.17
15+
go-version: 1.18
1616
- uses: actions/checkout@v3
1717
- name: golangci-lint
1818
uses: golangci/golangci-lint-action@v3
@@ -24,7 +24,7 @@ jobs:
2424
- name: Install Go
2525
uses: actions/setup-go@v3
2626
with:
27-
go-version: 1.17
27+
go-version: 1.18
2828
- uses: actions/checkout@v3
2929
- name: build
3030
run: make build
@@ -41,7 +41,7 @@ jobs:
4141
fetch-depth: 0
4242
- uses: actions/setup-go@v3
4343
with:
44-
go-version: 1.17
44+
go-version: 1.18
4545
- uses: docker/login-action@v2
4646
with:
4747
username: ${{ secrets.DOCKERHUB_USERNAME }}

.github/workflows/test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
- name: Install Go
1313
uses: actions/setup-go@v3
1414
with:
15-
go-version: 1.17
15+
go-version: 1.18
1616
- uses: actions/checkout@v3
1717
- name: build
1818
run: make build

cmd/common.go

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ func readAudioCorpus(filename string) ([]AudioCorpusItem, error) {
233233
return nil, err
234234
}
235235
ac := make([]AudioCorpusItem, 0)
236-
if strings.HasSuffix(filename, "wav") {
236+
if strings.HasSuffix(filename, "wav") || strings.HasSuffix(filename, "opus") || strings.HasSuffix(filename, "flac") {
237237
return []AudioCorpusItem{{Audio: filename}}, nil
238238
}
239239
jd := json.NewDecoder(f)
@@ -325,15 +325,19 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
325325
audioFilePath = corpusPath
326326
}
327327

328-
err = readAudio(audioFilePath, aci, func(buffer audio.IntBuffer, n int) error {
329-
buffer16 := make([]uint16, len(buffer.Data))
330-
for i, x := range buffer.Data {
331-
buffer16[i] = uint16(x)
332-
}
333-
buf := new(bytes.Buffer)
334-
err = binary.Write(buf, binary.LittleEndian, buffer16)
335-
if err != nil {
336-
return fmt.Errorf("binary.Write: %v", err)
328+
f, err := os.Open(audioFilePath)
329+
if err != nil {
330+
barClearOnError(bar)
331+
return nil, err
332+
}
333+
buf := make([]byte, 65536)
334+
for {
335+
n, err := f.Read(buf)
336+
if err == io.EOF {
337+
break
338+
} else if err != nil {
339+
barClearOnError(bar)
340+
return nil, err
337341
}
338342

339343
err = paStream.Send(&sluv1.ProcessAudioRequest{
@@ -343,16 +347,14 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
343347
Channels: 1,
344348
SampleRateHertz: 16000,
345349
},
346-
Source: &sluv1.ProcessAudioRequest_Audio{Audio: buf.Bytes()},
350+
Source: &sluv1.ProcessAudioRequest_Audio{Audio: buf[:n]},
347351
})
352+
348353
if err != nil {
349-
return fmt.Errorf("sending %d process audio request failed: %w", buf.Len(), err)
354+
barClearOnError(bar)
355+
return nil, err
350356
}
351-
return nil
352-
})
353-
if err != nil {
354-
barClearOnError(bar)
355-
return nil, err
357+
356358
}
357359

358360
err = bar.Add(1)
@@ -362,6 +364,10 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
362364
}
363365

364366
paResp, err := paStream.CloseAndRecv()
367+
if err != nil {
368+
barClearOnError(bar)
369+
return nil, err
370+
}
365371
bID := paResp.GetOperation().GetId()
366372
pending[bID] = aci
367373
}
@@ -375,6 +381,7 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
375381
var results []AudioCorpusItem
376382

377383
bar = getBar("Transcribing", "utt", inputSize)
384+
isDone := false
378385
for {
379386
for bID, aci := range pending {
380387
status, err := client.QueryStatus(ctx, &sluv1.QueryStatusRequest{Id: bID})
@@ -386,6 +393,13 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
386393
case sluv1.Operation_STATUS_DONE:
387394
trs := status.GetOperation().GetTranscripts()
388395
words := make([]string, len(trs))
396+
if !isDone && len(trs) == 0 {
397+
// Results might not be available immediately after done state is reached, so if we do
398+
// not have any, let's wait for a bit.
399+
isDone = true
400+
time.Sleep(2 * time.Second)
401+
continue
402+
}
389403
for i, tr := range trs {
390404
words[i] = tr.Word
391405
}

go.mod

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,45 @@
11
module github.com/speechly/cli
22

3-
go 1.17
3+
go 1.18
44

55
require (
66
github.com/agnivade/levenshtein v1.1.1
77
github.com/go-audio/audio v1.0.0
88
github.com/go-audio/wav v1.1.0
9-
github.com/mattn/go-isatty v0.0.16
9+
github.com/mattn/go-isatty v0.0.17
1010
github.com/mitchellh/go-homedir v1.1.0
11-
github.com/schollz/progressbar/v3 v3.11.0
12-
github.com/speechly/api/go v0.0.0-20220920060221-2531f4783d08
13-
github.com/speechly/nwalgo v0.0.0-20221109101309-d1a337619dd3
14-
github.com/spf13/cobra v1.3.0
15-
github.com/spf13/viper v1.10.0
16-
golang.org/x/text v0.4.0
17-
google.golang.org/grpc v1.42.0
18-
google.golang.org/protobuf v1.27.1
11+
github.com/schollz/progressbar/v3 v3.13.0
12+
github.com/speechly/api/go v0.0.0-20230221135950-6d68efe6ac91
13+
github.com/speechly/nwalgo v0.0.0-20221109110948-f6606115e74b
14+
github.com/spf13/cobra v1.6.1
15+
github.com/spf13/pflag v1.0.5
16+
github.com/spf13/viper v1.15.0
17+
golang.org/x/text v0.7.0
18+
google.golang.org/grpc v1.53.0
19+
google.golang.org/protobuf v1.28.1
1920
)
2021

2122
require (
22-
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
23-
github.com/fsnotify/fsnotify v1.5.1 // indirect
23+
github.com/fsnotify/fsnotify v1.6.0 // indirect
2424
github.com/go-audio/riff v1.0.0 // indirect
2525
github.com/golang/protobuf v1.5.2 // indirect
2626
github.com/hashicorp/hcl v1.0.0 // indirect
27-
github.com/inconshreveable/mousetrap v1.0.0 // indirect
28-
github.com/kr/text v0.2.0 // indirect
29-
github.com/magiconair/properties v1.8.5 // indirect
30-
github.com/mattn/go-runewidth v0.0.13 // indirect
27+
github.com/inconshreveable/mousetrap v1.1.0 // indirect
28+
github.com/magiconair/properties v1.8.7 // indirect
29+
github.com/mattn/go-runewidth v0.0.14 // indirect
3130
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
32-
github.com/mitchellh/mapstructure v1.4.3 // indirect
33-
github.com/pelletier/go-toml v1.9.4 // indirect
34-
github.com/rivo/uniseg v0.2.0 // indirect
35-
github.com/russross/blackfriday/v2 v2.1.0 // indirect
36-
github.com/spf13/afero v1.6.0 // indirect
37-
github.com/spf13/cast v1.4.1 // indirect
31+
github.com/mitchellh/mapstructure v1.5.0 // indirect
32+
github.com/pelletier/go-toml/v2 v2.0.6 // indirect
33+
github.com/rivo/uniseg v0.4.4 // indirect
34+
github.com/spf13/afero v1.9.4 // indirect
35+
github.com/spf13/cast v1.5.0 // indirect
3836
github.com/spf13/jwalterweatherman v1.1.0 // indirect
39-
github.com/spf13/pflag v1.0.5 // indirect
40-
github.com/subosito/gotenv v1.2.0 // indirect
41-
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
42-
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261 // indirect
43-
golang.org/x/term v0.0.0-20220722155259-a9ba230a4035 // indirect
44-
google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa // indirect
37+
github.com/subosito/gotenv v1.4.2 // indirect
38+
golang.org/x/net v0.7.0 // indirect
39+
golang.org/x/sys v0.5.0 // indirect
40+
golang.org/x/term v0.5.0 // indirect
41+
google.golang.org/genproto v0.0.0-20230223222841-637eb2293923 // indirect
4542
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
46-
gopkg.in/ini.v1 v1.66.2 // indirect
47-
gopkg.in/yaml.v2 v2.4.0 // indirect
43+
gopkg.in/ini.v1 v1.67.0 // indirect
44+
gopkg.in/yaml.v3 v3.0.1 // indirect
4845
)

0 commit comments

Comments
 (0)