Skip to content

Commit 83a2c8c

Browse files
authored
feat(js): Support speech configuration for Gemini TTS models (#3016)
1 parent 22fa3da commit 83a2c8c

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed

js/plugins/googleai/src/gemini.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,59 @@ const SafetySettingsSchema = z.object({
8686
]),
8787
});
8888

89+
const VoiceConfigSchema = z
90+
.object({
91+
prebuiltVoiceConfig: z
92+
.object({
93+
// TODO: Make this an array of objects so we can also specify the description
94+
// for each voiceName.
95+
voiceName: z
96+
.union([
97+
z.enum([
98+
'Zephyr',
99+
'Puck',
100+
'Charon',
101+
'Kore',
102+
'Fenrir',
103+
'Leda',
104+
'Orus',
105+
'Aoede',
106+
'Callirrhoe',
107+
'Autonoe',
108+
'Enceladus',
109+
'Iapetus',
110+
'Umbriel',
111+
'Algieba',
112+
'Despina',
113+
'Erinome',
114+
'Algenib',
115+
'Rasalgethi',
116+
'Laomedeia',
117+
'Achernar',
118+
'Alnilam',
119+
'Schedar',
120+
'Gacrux',
121+
'Pulcherrima',
122+
'Achird',
123+
'Zubenelgenubi',
124+
'Vindemiatrix',
125+
'Sadachbia',
126+
'Sadaltager',
127+
'Sulafat',
128+
]),
129+
// To allow any new string values
130+
z.string(),
131+
])
132+
.describe('Name of the preset voice to use')
133+
.optional(),
134+
})
135+
.describe('Configuration for the prebuilt speaker to use')
136+
.passthrough()
137+
.optional(),
138+
})
139+
.describe('Configuration for the voice to use')
140+
.passthrough();
141+
89142
export const GeminiConfigSchema = GenerationCommonConfigSchema.extend({
90143
apiKey: z
91144
.string()
@@ -139,6 +192,35 @@ export const GeminiConfigSchema = GenerationCommonConfigSchema.extend({
139192
}).passthrough();
140193
export type GeminiConfig = z.infer<typeof GeminiConfigSchema>;
141194

195+
export const GeminiTtsConfigSchema = GeminiConfigSchema.extend({
196+
speechConfig: z
197+
.object({
198+
voiceConfig: VoiceConfigSchema.optional(),
199+
multiSpeakerVoiceConfig: z
200+
.object({
201+
speakerVoiceConfigs: z
202+
.array(
203+
z
204+
.object({
205+
speaker: z.string().describe('Name of the speaker to use'),
206+
voiceConfig: VoiceConfigSchema,
207+
})
208+
.describe(
209+
'Configuration for a single speaker in a multi speaker setup'
210+
)
211+
.passthrough()
212+
)
213+
.describe('Configuration for all the enabled speaker voices'),
214+
})
215+
.describe('Configuration for multi-speaker setup')
216+
.passthrough()
217+
.optional(),
218+
})
219+
.describe('Speech generation config')
220+
.passthrough()
221+
.optional(),
222+
}).passthrough();
223+
142224
export const gemini10Pro = modelRef({
143225
name: 'googleai/gemini-1.0-pro',
144226
info: {
@@ -302,6 +384,23 @@ export const gemini25FlashPreview0417 = modelRef({
302384
configSchema: GeminiConfigSchema,
303385
});
304386

387+
export const gemini25FlashPreviewTts = modelRef({
388+
name: 'googleai/gemini-2.5-flash-preview-tts',
389+
info: {
390+
label: 'Google AI - Gemini 2.5 Flash Preview TTS',
391+
versions: [],
392+
supports: {
393+
multiturn: false,
394+
media: false,
395+
tools: false,
396+
toolChoice: false,
397+
systemRole: false,
398+
constrained: 'no-tools',
399+
},
400+
},
401+
configSchema: GeminiTtsConfigSchema,
402+
});
403+
305404
export const gemini25ProExp0325 = modelRef({
306405
name: 'googleai/gemini-2.5-pro-exp-03-25',
307406
info: {
@@ -347,6 +446,7 @@ export const SUPPORTED_V15_MODELS = {
347446
'gemini-2.5-pro-exp-03-25': gemini25ProExp0325,
348447
'gemini-2.5-pro-preview-03-25': gemini25ProPreview0325,
349448
'gemini-2.5-flash-preview-04-17': gemini25FlashPreview0417,
449+
'gemini-2.5-flash-preview-tts': gemini25FlashPreviewTts,
350450
};
351451

352452
export const GENERIC_GEMINI_MODEL = modelRef({

0 commit comments

Comments
 (0)