Skip to content

Commit e38f6d2

Browse files
Google APIscopybara-github
authored andcommitted
feat: Support safety settings for Gemini voices and deprecate relax_safety_filters
feat: Support `enable_textnorm` for Gemini voices. feat: Mark `advanced_voice_options` as optional. docs: A comment for field `relax_safety_filters` in message `.google.cloud.texttospeech.v1beta1.AdvancedVoiceOptions` is changed docs: A comment for field `advanced_voice_options` in message `.google.cloud.texttospeech.v1beta1.SynthesizeSpeechRequest` is changed docs: A comment for field `markup` in message `.google.cloud.texttospeech.v1beta1.SynthesisInput` is changed docs: A comment for field `markup` in message `.google.cloud.texttospeech.v1beta1.StreamingSynthesisInput` is changed PiperOrigin-RevId: 892462011
1 parent a481d8b commit e38f6d2

File tree

2 files changed

+84
-12
lines changed

2 files changed

+84
-12
lines changed

google/cloud/texttospeech/v1beta1/cloud_tts.proto

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025 Google LLC
1+
// Copyright 2026 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -164,18 +164,85 @@ message Voice {
164164

165165
// Used for advanced voice options.
166166
message AdvancedVoiceOptions {
167+
// Safety setting for a single harm category.
168+
message SafetySetting {
169+
// The harm category to apply the safety setting to.
170+
HarmCategory category = 1;
171+
172+
// The harm block threshold for the safety setting.
173+
HarmBlockThreshold threshold = 2;
174+
}
175+
176+
// Safety settings for the request.
177+
message SafetySettings {
178+
// The safety settings for the request.
179+
repeated SafetySetting settings = 1;
180+
}
181+
182+
// Harm categories that will block the content.
183+
enum HarmCategory {
184+
// Default value. This value is unused.
185+
HARM_CATEGORY_UNSPECIFIED = 0;
186+
187+
// Content that promotes violence or incites hatred against individuals or
188+
// groups based on certain attributes.
189+
HARM_CATEGORY_HATE_SPEECH = 1;
190+
191+
// Content that promotes, facilitates, or enables dangerous activities.
192+
HARM_CATEGORY_DANGEROUS_CONTENT = 2;
193+
194+
// Abusive, threatening, or content intended to bully, torment, or ridicule.
195+
HARM_CATEGORY_HARASSMENT = 3;
196+
197+
// Content that contains sexually explicit material.
198+
HARM_CATEGORY_SEXUALLY_EXPLICIT = 4;
199+
}
200+
201+
// Harm block thresholds for the safety settings.
202+
enum HarmBlockThreshold {
203+
// The harm block threshold is unspecified.
204+
HARM_BLOCK_THRESHOLD_UNSPECIFIED = 0;
205+
206+
// Block content with a low harm probability or higher.
207+
BLOCK_LOW_AND_ABOVE = 1;
208+
209+
// Block content with a medium harm probability or higher.
210+
BLOCK_MEDIUM_AND_ABOVE = 2;
211+
212+
// Block content with a high harm probability.
213+
BLOCK_ONLY_HIGH = 3;
214+
215+
// Do not block any content, regardless of its harm probability.
216+
BLOCK_NONE = 4;
217+
218+
// Turn off the safety filter entirely.
219+
OFF = 5;
220+
}
221+
167222
// Only for Journey voices. If false, the synthesis is context aware
168223
// and has a higher latency.
169224
optional bool low_latency_journey_synthesis = 1;
170225

171-
// Optional. Input only. If true, relaxes safety filters for Gemini TTS. Only
172-
// supported for accounts linked to Invoiced (Offline) Cloud billing accounts.
173-
// Otherwise, will return result
174-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
226+
// Optional. Input only. Deprecated, use safety_settings instead.
227+
// If true, relaxes safety filters for Gemini TTS.
175228
bool relax_safety_filters = 8 [
229+
deprecated = true,
176230
(google.api.field_behavior) = INPUT_ONLY,
177231
(google.api.field_behavior) = OPTIONAL
178232
];
233+
234+
// Optional. Input only. This applies to Gemini TTS only. If set, the category
235+
// specified in the safety setting will be blocked if the harm probability is
236+
// above the threshold. Otherwise, the safety filter will be disabled by
237+
// default.
238+
SafetySettings safety_settings = 9 [
239+
(google.api.field_behavior) = INPUT_ONLY,
240+
(google.api.field_behavior) = OPTIONAL
241+
];
242+
243+
// Optional. If true, textnorm will be applied to text input. This feature is
244+
// enabled by default. Only applies for Gemini TTS.
245+
optional bool enable_textnorm = 2 [(google.api.field_behavior) = OPTIONAL];
179246
}
180247

181248
// The top-level message sent by the client for the `SynthesizeSpeech` method.
@@ -201,8 +268,9 @@ message SynthesizeSpeechRequest {
201268
// Whether and what timepoints are returned in the response.
202269
repeated TimepointType enable_time_pointing = 4;
203270

204-
// Advanced voice options.
205-
optional AdvancedVoiceOptions advanced_voice_options = 8;
271+
// Optional. Advanced voice options.
272+
optional AdvancedVoiceOptions advanced_voice_options = 8
273+
[(google.api.field_behavior) = OPTIONAL];
206274
}
207275

208276
// Pronunciation customization for a phrase.
@@ -311,8 +379,8 @@ message SynthesisInput {
311379
// The raw text to be synthesized.
312380
string text = 1;
313381

314-
// Markup for HD voices specifically. This field may not be used with any
315-
// other voices.
382+
// Markup for Chirp 3: HD voices specifically. This field may not be used
383+
// with any other voices.
316384
string markup = 5;
317385

318386
// The SSML document to be synthesized. The SSML document must be valid
@@ -554,6 +622,10 @@ message StreamingSynthesizeConfig {
554622
// be inside a phoneme tag.
555623
CustomPronunciations custom_pronunciations = 5
556624
[(google.api.field_behavior) = OPTIONAL];
625+
626+
// Optional. Advanced voice options.
627+
optional AdvancedVoiceOptions advanced_voice_options = 7
628+
[(google.api.field_behavior) = OPTIONAL];
557629
}
558630

559631
// Input to be synthesized.
@@ -564,8 +636,8 @@ message StreamingSynthesisInput {
564636
// in the output audio.
565637
string text = 1;
566638

567-
// Markup for HD voices specifically. This field may not be used with any
568-
// other voices.
639+
// Markup for Chirp 3: HD voices specifically. This field may not be used
640+
// with any other voices.
569641
string markup = 5;
570642

571643
// Multi-speaker markup for Gemini TTS. This field may not

google/cloud/texttospeech/v1beta1/cloud_tts_lrs.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025 Google LLC
1+
// Copyright 2026 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

0 commit comments

Comments
 (0)