Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit c704d56

Browse files
authored
Optionally ignore normalized entity values and casing in matching (#64)
1 parent 6c56daa commit c704d56

5 files changed

Lines changed: 26 additions & 14 deletions

File tree

cmd/annotate.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ speechly annotate --app <app_id> --reference-date 2021-01-20 --input input.txt >
8383
}
8484

8585
if evaluate {
86-
evaluateAnnotatedUtterances(wluResponsesToString(res.Responses), annotated)
86+
evaluateAnnotatedUtterances(wluResponsesToString(res.Responses), annotated, false)
8787
os.Exit(0)
8888
}
8989

cmd/common.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import (
2525
"github.com/speechly/cli/pkg/clients"
2626
"github.com/speechly/nwalgo"
2727
"github.com/spf13/cobra"
28+
"golang.org/x/text/cases"
29+
"golang.org/x/text/language"
2830
"google.golang.org/protobuf/types/known/timestamppb"
2931
)
3032

@@ -189,19 +191,25 @@ func scanLines(file *os.File) []string {
189191
return lines
190192
}
191193

192-
func evaluateAnnotatedUtterances(annotatedData []string, groundTruthData []string) {
194+
func evaluateAnnotatedUtterances(annotatedData []string, groundTruthData []string, relaxed bool) {
193195
if len(annotatedData) != len(groundTruthData) {
194196
log.Fatalf(
195197
"Inputs should have same length, but input has %d items and ground-truths %d items.",
196198
len(annotatedData),
197199
len(groundTruthData),
198200
)
199201
}
202+
var entValRE = regexp.MustCompile(`\|[^]]+]`)
203+
caser := cases.Lower(language.AmericanEnglish)
200204

201205
n := float64(len(annotatedData))
202206
hits := 0.0
203207
for i, aUtt := range annotatedData {
204208
gtUtt := groundTruthData[i]
209+
if relaxed {
210+
aUtt = entValRE.ReplaceAllString(caser.String(aUtt), "]")
211+
gtUtt = entValRE.ReplaceAllString(caser.String(gtUtt), "]")
212+
}
205213
aln1, aln2, _ := nwalgo.Align(gtUtt, aUtt, "*", 1, -1, -1)
206214
if strings.TrimSpace(aUtt) == strings.TrimSpace(gtUtt) {
207215
hits += 1.0

cmd/evaluate.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,12 @@ speechly evaluate nlu <app_id> ground-truths.txt --reference-date 2021-01-20`,
3333
if err != nil {
3434
log.Fatalf("WLU failed: %v", err)
3535
}
36+
isRelaxed, err := cmd.Flags().GetBool("relax")
37+
if err != nil {
38+
log.Fatalf("WLU failed: %v", err)
39+
}
3640

37-
evaluateAnnotatedUtterances(wluResponsesToString(res.Responses), annotated)
41+
evaluateAnnotatedUtterances(wluResponsesToString(res.Responses), annotated, isRelaxed)
3842
},
3943
}
4044

@@ -84,6 +88,7 @@ func init() {
8488
RootCmd.AddCommand(evaluateCmd)
8589
evaluateCmd.AddCommand(nluCmd)
8690
nluCmd.Flags().StringP("reference-date", "r", "", "Reference date in YYYY-MM-DD format, if not provided use current date.")
91+
nluCmd.Flags().Bool("relax", false, "Ignore normalized entity values and casing in matching.")
8792

8893
evaluateCmd.AddCommand(asrCmd)
8994
asrCmd.Flags().Bool("streaming", false, "Use the Streaming API instead of the Batch API.")

cmd/transcribe.go

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,16 @@ speechly transcribe files.jsonl --model /path/to/model/bundle`,
4646
}
4747

4848
var results []AudioCorpusItem
49-
if appID != "" {
50-
if useStreaming {
51-
results, err = transcribeWithStreamingAPI(ctx, appID, inputPath, false)
52-
} else {
53-
results, err = transcribeWithBatchAPI(ctx, appID, inputPath, false)
54-
}
5549

56-
printResults(results, inputPath, err == nil)
57-
if err != nil {
58-
log.Fatalf("Transcribing failed: %v", err)
59-
}
60-
return
50+
if useStreaming {
51+
results, err = transcribeWithStreamingAPI(ctx, appID, inputPath, false)
52+
} else {
53+
results, err = transcribeWithBatchAPI(ctx, appID, inputPath, false)
54+
}
55+
56+
printResults(results, inputPath, err == nil)
57+
if err != nil {
58+
log.Fatalf("Transcribing failed: %v", err)
6159
}
6260
},
6361
}

docs/evaluate_nlu.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ To run NLU evaluation, you need a set of ground truth annotations. Use the `anno
1414

1515
* `--help` `-h` _(bool)_ - help for nlu
1616
* `--reference-date` `-r` _(string)_ - Reference date in YYYY-MM-DD format, if not provided use current date.
17+
* `--relax` _(bool)_ - Ignore normalized entity values and casing in matching.
1718

1819
### Examples
1920

0 commit comments

Comments
 (0)