1- // Copyright 2025 Google LLC
1+ // Copyright 2026 Google LLC
22//
33// Licensed under the Apache License, Version 2.0 (the "License");
44// you may not use this file except in compliance with the License.
@@ -164,18 +164,85 @@ message Voice {
164164
165165// Used for advanced voice options.
166166message AdvancedVoiceOptions {
167+ // Safety setting for a single harm category.
168+ message SafetySetting {
169+ // The harm category to apply the safety setting to.
170+ HarmCategory category = 1 ;
171+
172+ // The harm block threshold for the safety setting.
173+ HarmBlockThreshold threshold = 2 ;
174+ }
175+
176+ // Safety settings for the request.
177+ message SafetySettings {
178+ // The safety settings for the request.
179+ repeated SafetySetting settings = 1 ;
180+ }
181+
182+ // Harm categories that will block the content.
183+ enum HarmCategory {
184+ // Default value. This value is unused.
185+ HARM_CATEGORY_UNSPECIFIED = 0 ;
186+
187+ // Content that promotes violence or incites hatred against individuals or
188+ // groups based on certain attributes.
189+ HARM_CATEGORY_HATE_SPEECH = 1 ;
190+
191+ // Content that promotes, facilitates, or enables dangerous activities.
192+ HARM_CATEGORY_DANGEROUS_CONTENT = 2 ;
193+
194+ // Abusive, threatening, or content intended to bully, torment, or ridicule.
195+ HARM_CATEGORY_HARASSMENT = 3 ;
196+
197+ // Content that contains sexually explicit material.
198+ HARM_CATEGORY_SEXUALLY_EXPLICIT = 4 ;
199+ }
200+
201+ // Harm block thresholds for the safety settings.
202+ enum HarmBlockThreshold {
203+ // The harm block threshold is unspecified.
204+ HARM_BLOCK_THRESHOLD_UNSPECIFIED = 0 ;
205+
206+ // Block content with a low harm probability or higher.
207+ BLOCK_LOW_AND_ABOVE = 1 ;
208+
209+ // Block content with a medium harm probability or higher.
210+ BLOCK_MEDIUM_AND_ABOVE = 2 ;
211+
212+ // Block content with a high harm probability.
213+ BLOCK_ONLY_HIGH = 3 ;
214+
215+ // Do not block any content, regardless of its harm probability.
216+ BLOCK_NONE = 4 ;
217+
218+ // Turn off the safety filter entirely.
219+ OFF = 5 ;
220+ }
221+
167222 // Only for Journey voices. If false, the synthesis is context aware
168223 // and has a higher latency.
169224 optional bool low_latency_journey_synthesis = 1 ;
170225
171- // Optional. Input only. If true, relaxes safety filters for Gemini TTS. Only
172- // supported for accounts linked to Invoiced (Offline) Cloud billing accounts.
173- // Otherwise, will return result
174- // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
226+ // Optional. Input only. Deprecated, use safety_settings instead.
227+ // If true, relaxes safety filters for Gemini TTS.
175228 bool relax_safety_filters = 8 [
229+ deprecated = true ,
176230 (google.api.field_behavior ) = INPUT_ONLY ,
177231 (google.api.field_behavior ) = OPTIONAL
178232 ];
233+
234+ // Optional. Input only. This applies to Gemini TTS only. If set, the category
235+ // specified in the safety setting will be blocked if the harm probability is
236+ // above the threshold. Otherwise, the safety filter will be disabled by
237+ // default.
238+ SafetySettings safety_settings = 9 [
239+ (google.api.field_behavior ) = INPUT_ONLY ,
240+ (google.api.field_behavior ) = OPTIONAL
241+ ];
242+
243+ // Optional. If true, textnorm will be applied to text input. This feature is
244+ // enabled by default. Only applies for Gemini TTS.
245+ optional bool enable_textnorm = 2 [(google.api.field_behavior ) = OPTIONAL ];
179246}
180247
181248// The top-level message sent by the client for the `SynthesizeSpeech` method.
@@ -201,8 +268,9 @@ message SynthesizeSpeechRequest {
201268 // Whether and what timepoints are returned in the response.
202269 repeated TimepointType enable_time_pointing = 4 ;
203270
204- // Advanced voice options.
205- optional AdvancedVoiceOptions advanced_voice_options = 8 ;
271+ // Optional. Advanced voice options.
272+ optional AdvancedVoiceOptions advanced_voice_options = 8
273+ [(google.api.field_behavior ) = OPTIONAL ];
206274}
207275
208276// Pronunciation customization for a phrase.
@@ -311,8 +379,8 @@ message SynthesisInput {
311379 // The raw text to be synthesized.
312380 string text = 1 ;
313381
314- // Markup for HD voices specifically. This field may not be used with any
315- // other voices.
382+ // Markup for Chirp 3: HD voices specifically. This field may not be used
383+ // with any other voices.
316384 string markup = 5 ;
317385
318386 // The SSML document to be synthesized. The SSML document must be valid
@@ -554,6 +622,10 @@ message StreamingSynthesizeConfig {
554622 // be inside a phoneme tag.
555623 CustomPronunciations custom_pronunciations = 5
556624 [(google.api.field_behavior ) = OPTIONAL ];
625+
626+ // Optional. Advanced voice options.
627+ optional AdvancedVoiceOptions advanced_voice_options = 7
628+ [(google.api.field_behavior ) = OPTIONAL ];
557629}
558630
559631// Input to be synthesized.
@@ -564,8 +636,8 @@ message StreamingSynthesisInput {
564636 // in the output audio.
565637 string text = 1 ;
566638
567- // Markup for HD voices specifically. This field may not be used with any
568- // other voices.
639+ // Markup for Chirp 3: HD voices specifically. This field may not be used
640+ // with any other voices.
569641 string markup = 5 ;
570642
571643 // Multi-speaker markup for Gemini TTS. This field may not
0 commit comments