@@ -336,6 +336,9 @@ class RawTranscriptionConfig(BaseModel):
336
336
speaker_labels : Optional [bool ]
337
337
"Enable Speaker Diarization."
338
338
339
+ speakers_expected : Optional [int ]
340
+ "The number of speakers you expect to be in your audio file."
341
+
339
342
# content_safety: bool = False
340
343
# "Enable Content Safety Detection."
341
344
@@ -406,6 +409,7 @@ def __init__(
406
409
redact_pii_policies : Optional [PIIRedactionPolicy ] = None ,
407
410
redact_pii_sub : Optional [PIISubstitutionPolicy ] = None ,
408
411
speaker_labels : Optional [bool ] = None ,
412
+ speakers_expected : Optional [int ] = None ,
409
413
# content_safety: bool = False,
410
414
# iab_categories: bool = False,
411
415
custom_spelling : Optional [Dict [str , Union [str , Sequence [str ]]]] = None ,
@@ -439,6 +443,7 @@ def __init__(
439
443
redact_pii_policies: The list of PII Redaction policies to enable.
440
444
redact_pii_sub: The replacement logic for detected PII.
441
445
speaker_labels: Enable Speaker Diarization.
446
+ speakers_expected: The number of speakers you expect to hear in your audio file. Up to 10 speakers are supported.
442
447
content_safety: Enable Content Safety Detection.
443
448
iab_categories: Enable Topic Detection.
444
449
custom_spelling: Customize how words are spelled and formatted using to and from values.
@@ -480,7 +485,7 @@ def __init__(
480
485
redact_pii_policies ,
481
486
redact_pii_sub ,
482
487
)
483
- self .speaker_labels = speaker_labels
488
+ self .set_speaker_diarization ( speaker_labels , speakers_expected )
484
489
# self.content_safety = content_safety
485
490
# self.iab_categories = iab_categories
486
491
self .set_custom_spelling (custom_spelling , override = True )
@@ -633,11 +638,11 @@ def speaker_labels(self) -> Optional[bool]:
633
638
634
639
return self ._raw_transcription_config .speaker_labels
635
640
636
- @speaker_labels . setter
637
- def speaker_labels (self , enable : Optional [ bool ] ) -> None :
638
- "Enable Speaker Diarization feature ."
641
+ @property
642
+ def speakers_expected (self ) -> Optional [ int ] :
643
+ "Returns the number of speakers expected to be in the audio file. Used in combination with the `speaker_labels` parameter ."
639
644
640
- self ._raw_transcription_config .speaker_labels = enable
645
+ return self ._raw_transcription_config .speakers_expected
641
646
642
647
# @property
643
648
# def content_safety(self) -> bool:
@@ -799,6 +804,28 @@ def set_casing_and_formatting(
799
804
800
805
return self
801
806
807
+ def set_speaker_diarization (
808
+ self ,
809
+ enable : bool = True ,
810
+ speakers_expected : Optional [int ] = None ,
811
+ ) -> Self :
812
+ """
813
+ Whether to enable Speaker Diarization on the transcript.
814
+
815
+ Args:
816
+ `enable`: Enable Speaker Diarization
817
+ `speakers_expected`: The number of speakers in the audio file.
818
+ """
819
+
820
+ if not enable :
821
+ self ._raw_transcription_config .speaker_labels = None
822
+ self ._raw_transcription_config .speakers_expected = None
823
+ else :
824
+ self ._raw_transcription_config .speaker_labels = True
825
+ self ._raw_transcription_config .speakers_expected = speakers_expected
826
+
827
+ return self
828
+
802
829
def set_webhook (
803
830
self ,
804
831
url : Optional [str ],
0 commit comments