Skip to content

Commit 1c4cef8

Browse files
authored
feat(speech): adds adaptation samples for V2 (GoogleCloudPlatform#7581)
* feat(speech): adds adaptation samples for V2
1 parent 253f162 commit 1c4cef8

8 files changed

+921
-0
lines changed
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/*
2+
* Copyright 2023 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
// [START speech_adaptation_v2_custom_class_reference]
20+
import com.google.api.gax.longrunning.OperationFuture;
21+
import com.google.cloud.speech.v2.AutoDetectDecodingConfig;
22+
import com.google.cloud.speech.v2.CreateCustomClassRequest;
23+
import com.google.cloud.speech.v2.CreatePhraseSetRequest;
24+
import com.google.cloud.speech.v2.CustomClass;
25+
import com.google.cloud.speech.v2.CustomClass.ClassItem;
26+
import com.google.cloud.speech.v2.OperationMetadata;
27+
import com.google.cloud.speech.v2.PhraseSet;
28+
import com.google.cloud.speech.v2.PhraseSet.Phrase;
29+
import com.google.cloud.speech.v2.RecognitionConfig;
30+
import com.google.cloud.speech.v2.RecognizeRequest;
31+
import com.google.cloud.speech.v2.RecognizeResponse;
32+
import com.google.cloud.speech.v2.SpeechAdaptation;
33+
import com.google.cloud.speech.v2.SpeechAdaptation.AdaptationPhraseSet;
34+
import com.google.cloud.speech.v2.SpeechClient;
35+
import com.google.cloud.speech.v2.SpeechRecognitionAlternative;
36+
import com.google.cloud.speech.v2.SpeechRecognitionResult;
37+
import com.google.protobuf.ByteString;
38+
import java.io.IOException;
39+
import java.nio.file.Files;
40+
import java.nio.file.Path;
41+
import java.nio.file.Paths;
42+
import java.util.List;
43+
import java.util.concurrent.ExecutionException;
44+
45+
public class AdaptationCustomClassReferenceV2 {
46+
public static void main(String[] args) throws IOException, InterruptedException,
47+
ExecutionException {
48+
String projectId = "my-project-id";
49+
String recognizerName = "projects/[PROJECT_ID]/locations/global/recognizers/[RECOGNIZER_ID]";
50+
String customClassId = "my-class-id";
51+
String phraseSetId = "my-phrase-set-id";
52+
String audioFilePath = "path/to/audiofile";
53+
54+
createCustomClassV2(projectId, recognizerName, customClassId, phraseSetId, audioFilePath);
55+
56+
}
57+
58+
public static void createCustomClassV2(String projectId, String recognizerName,
59+
String customClassId, String phraseSetId, String audioFilePath) throws
60+
IOException, InterruptedException, ExecutionException {
61+
62+
// Initialize client that will be used to send requests. This client only needs to be created
63+
// once, and can be reused for multiple requests. After completing all of your requests, call
64+
// the "close" method on the client to safely clean up any remaining background resources.
65+
try (SpeechClient speechClient = SpeechClient.create()) {
66+
String parent = String.format("projects/%s/locations/global", projectId);
67+
68+
// Create a persistent CustomClass to reference in phrases.
69+
ClassItem.Builder classItem = ClassItem.newBuilder()
70+
.setValue("Chromecast");
71+
72+
CustomClass.Builder customClassBuilder = CustomClass.newBuilder()
73+
.addItems(classItem);
74+
75+
CreateCustomClassRequest createCustomClassRequest = CreateCustomClassRequest.newBuilder()
76+
.setParent(parent)
77+
.setCustomClassId(customClassId)
78+
.setCustomClass(customClassBuilder)
79+
.build();
80+
81+
OperationFuture<CustomClass, OperationMetadata> classOperation =
82+
speechClient.createCustomClassAsync(createCustomClassRequest);
83+
CustomClass customClass = classOperation.get();
84+
85+
// Create a persistent PhraseSet to reference in a recognition request
86+
Phrase.Builder phrase = Phrase.newBuilder()
87+
.setValue(String.format("${%s}", customClass.getName()))
88+
.setBoost(20);
89+
90+
PhraseSet.Builder phraseSetBuilder = PhraseSet.newBuilder()
91+
.addPhrases(phrase);
92+
93+
CreatePhraseSetRequest createPhraseSetRequest = CreatePhraseSetRequest.newBuilder()
94+
.setParent(parent)
95+
.setPhraseSetId(phraseSetId)
96+
.setPhraseSet(phraseSetBuilder)
97+
.build();
98+
99+
OperationFuture<PhraseSet, OperationMetadata> phraseOperation =
100+
speechClient.createPhraseSetAsync(createPhraseSetRequest);
101+
PhraseSet phraseSet = phraseOperation.get();
102+
103+
System.out.printf("Custom class name: %s\n", customClass.getName());
104+
System.out.printf("Phrase set name: %s\n", phraseSet.getName());
105+
106+
// Transcribe audio using speech adaptation
107+
Path path = Paths.get(audioFilePath);
108+
byte[] data = Files.readAllBytes(path);
109+
ByteString audioBytes = ByteString.copyFrom(data);
110+
111+
// Add a reference to the PhraseSet into the recognition request
112+
AdaptationPhraseSet.Builder adaptationPhraseSet = AdaptationPhraseSet.newBuilder()
113+
.setPhraseSet(phraseSet.getName());
114+
115+
SpeechAdaptation.Builder adaptation = SpeechAdaptation.newBuilder()
116+
.addPhraseSets(adaptationPhraseSet);
117+
118+
RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder()
119+
.setAutoDecodingConfig(AutoDetectDecodingConfig.newBuilder().build())
120+
.setAdaptation(adaptation)
121+
.build();
122+
123+
RecognizeRequest request = RecognizeRequest.newBuilder()
124+
.setConfig(recognitionConfig)
125+
.setRecognizer(recognizerName)
126+
.setContent(audioBytes)
127+
.build();
128+
129+
RecognizeResponse response = speechClient.recognize(request);
130+
List<SpeechRecognitionResult> results = response.getResultsList();
131+
132+
for (SpeechRecognitionResult result : results) {
133+
// There can be several alternative transcripts for a given chunk of speech. Just use the
134+
// first (most likely) one here.
135+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
136+
System.out.printf("Transcription: %s%n", alternative.getTranscript());
137+
}
138+
}
139+
}
140+
}
141+
// [END speech_adaptation_v2_custom_class_reference]
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* Copyright 2023 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
// [START speech_adaptation_v2_inline_custom_class]
20+
import com.google.cloud.speech.v2.AutoDetectDecodingConfig;
21+
import com.google.cloud.speech.v2.CustomClass;
22+
import com.google.cloud.speech.v2.PhraseSet;
23+
import com.google.cloud.speech.v2.PhraseSet.Phrase;
24+
import com.google.cloud.speech.v2.RecognitionConfig;
25+
import com.google.cloud.speech.v2.RecognizeRequest;
26+
import com.google.cloud.speech.v2.RecognizeResponse;
27+
import com.google.cloud.speech.v2.SpeechAdaptation;
28+
import com.google.cloud.speech.v2.SpeechAdaptation.AdaptationPhraseSet;
29+
import com.google.cloud.speech.v2.SpeechClient;
30+
import com.google.cloud.speech.v2.SpeechRecognitionAlternative;
31+
import com.google.cloud.speech.v2.SpeechRecognitionResult;
32+
import com.google.protobuf.ByteString;
33+
import java.io.IOException;
34+
import java.nio.file.Files;
35+
import java.nio.file.Path;
36+
import java.nio.file.Paths;
37+
import java.util.List;
38+
39+
40+
public class AdaptationInlineCustomClassV2 {
41+
public static void main(String[] args) throws IOException {
42+
String recognizerName = "projects/[PROJECT_ID]/locations/global/recognizers/[RECOGNIZER_ID]";
43+
String audioFilePath = "path/to/audioFile";
44+
45+
buildInlineCustomClassV2(recognizerName, audioFilePath);
46+
}
47+
48+
public static void buildInlineCustomClassV2(String recognizerName, String audioFilePath)
49+
throws IOException {
50+
51+
// Initialize client that will be used to send requests. This client only needs to be created
52+
// once, and can be reused for multiple requests. After completing all of your requests, call
53+
// the "close" method on the client to safely clean up any remaining background resources.
54+
try (SpeechClient speechClient = SpeechClient.create()) {
55+
56+
// Create an inline phrase set to produce a more accurate transcript.
57+
CustomClass.ClassItem classItem = CustomClass.ClassItem.newBuilder()
58+
.setValue("Chromecast")
59+
.build();
60+
61+
CustomClass customClass = CustomClass.newBuilder()
62+
.setName("ChromeCast")
63+
.addItems(classItem)
64+
.build();
65+
66+
Phrase phrase = Phrase.newBuilder()
67+
.setBoost(20)
68+
.setValue("Chromecast")
69+
.build();
70+
71+
PhraseSet phraseSet = PhraseSet.newBuilder()
72+
.addPhrases(phrase)
73+
.build();
74+
75+
AdaptationPhraseSet adaptation = AdaptationPhraseSet.newBuilder()
76+
.setInlinePhraseSet(phraseSet)
77+
.build();
78+
79+
SpeechAdaptation speechAdaptation = SpeechAdaptation.newBuilder()
80+
.addPhraseSets(adaptation)
81+
.addCustomClasses(customClass)
82+
.build();
83+
84+
// Transcribe audio using speech adaptation
85+
Path path = Paths.get(audioFilePath);
86+
byte[] data = Files.readAllBytes(path);
87+
ByteString audioBytes = ByteString.copyFrom(data);
88+
89+
RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder()
90+
.setAutoDecodingConfig(AutoDetectDecodingConfig.newBuilder().build())
91+
.setAdaptation(speechAdaptation)
92+
.build();
93+
94+
RecognizeRequest request = RecognizeRequest.newBuilder()
95+
.setConfig(recognitionConfig)
96+
.setRecognizer(recognizerName)
97+
.setContent(audioBytes)
98+
.build();
99+
100+
RecognizeResponse response = speechClient.recognize(request);
101+
List<SpeechRecognitionResult> results = response.getResultsList();
102+
103+
for (SpeechRecognitionResult result : results) {
104+
// There can be several alternative transcripts for a given chunk of speech. Just use the
105+
// first (most likely) one here.
106+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
107+
System.out.printf("Transcription: %s%n", alternative.getTranscript());
108+
}
109+
}
110+
}
111+
}
112+
// [END speech_adaptation_v2_inline_custom_class]
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* Copyright 2023 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
// [START speech_adaptation_v2_inline_phrase_set]
20+
import com.google.cloud.speech.v2.AutoDetectDecodingConfig;
21+
import com.google.cloud.speech.v2.PhraseSet;
22+
import com.google.cloud.speech.v2.PhraseSet.Phrase;
23+
import com.google.cloud.speech.v2.RecognitionConfig;
24+
import com.google.cloud.speech.v2.RecognizeRequest;
25+
import com.google.cloud.speech.v2.RecognizeResponse;
26+
import com.google.cloud.speech.v2.SpeechAdaptation;
27+
import com.google.cloud.speech.v2.SpeechAdaptation.AdaptationPhraseSet;
28+
import com.google.cloud.speech.v2.SpeechClient;
29+
import com.google.cloud.speech.v2.SpeechRecognitionAlternative;
30+
import com.google.cloud.speech.v2.SpeechRecognitionResult;
31+
import com.google.protobuf.ByteString;
32+
import java.io.IOException;
33+
import java.nio.file.Files;
34+
import java.nio.file.Path;
35+
import java.nio.file.Paths;
36+
import java.util.List;
37+
38+
public class AdaptationInlinePhraseSetV2 {
39+
public static void main(String[] args) throws IOException {
40+
String recognizerName = "projects/[PROJECT_ID]/locations/global/recognizers/[RECOGNIZER_ID]";
41+
String audioFilePath = "path/to/audiofile";
42+
43+
buildInlinePhraseSetV2(recognizerName, audioFilePath);
44+
}
45+
46+
public static void buildInlinePhraseSetV2(String recognizerName, String audioFilePath)
47+
throws IOException {
48+
49+
// Initialize client that will be used to send requests. This client only needs to be created
50+
// once, and can be reused for multiple requests. After completing all of your requests, call
51+
// the "close" method on the client to safely clean up any remaining background resources.
52+
try (SpeechClient speechClient = SpeechClient.create()) {
53+
54+
// Create an inline phrase set to produce a more accurate transcript.
55+
Phrase phrase = Phrase.newBuilder()
56+
.setBoost(10)
57+
.setValue("Chromecast")
58+
.build();
59+
60+
PhraseSet phraseSet = PhraseSet.newBuilder()
61+
.addPhrases(phrase)
62+
.build();
63+
64+
AdaptationPhraseSet adaptation = AdaptationPhraseSet.newBuilder()
65+
.setInlinePhraseSet(phraseSet)
66+
.build();
67+
68+
SpeechAdaptation speechAdaptation = SpeechAdaptation.newBuilder()
69+
.addPhraseSets(adaptation)
70+
.build();
71+
72+
// Transcribe audio using speech adaptation
73+
Path path = Paths.get(audioFilePath);
74+
byte[] data = Files.readAllBytes(path);
75+
ByteString audioBytes = ByteString.copyFrom(data);
76+
77+
RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder()
78+
.setAutoDecodingConfig(AutoDetectDecodingConfig.newBuilder().build())
79+
.setAdaptation(speechAdaptation)
80+
.build();
81+
82+
RecognizeRequest request = RecognizeRequest.newBuilder()
83+
.setConfig(recognitionConfig)
84+
.setRecognizer(recognizerName)
85+
.setContent(audioBytes)
86+
.build();
87+
88+
RecognizeResponse response = speechClient.recognize(request);
89+
List<SpeechRecognitionResult> results = response.getResultsList();
90+
91+
for (SpeechRecognitionResult result : results) {
92+
// There can be several alternative transcripts for a given chunk of speech. Just use the
93+
// first (most likely) one here.
94+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
95+
System.out.printf("Transcription: %s%n", alternative.getTranscript());
96+
}
97+
}
98+
}
99+
}
100+
// [END speech_adaptation_v2_inline_phrase_set]

0 commit comments

Comments
 (0)