@@ -550,19 +550,17 @@ class RawTranscriptionConfig(BaseModel):
550
550
"""
551
551
Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
552
552
553
- Automatic Language Detection is supported for the following languages:
553
+ See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
554
+ """
554
555
555
- - English
556
- - Spanish
557
- - French
558
- - German
559
- - Italian
560
- - Portuguese
561
- - Dutch
556
+ language_confidence_threshold : Optional [float ]
557
+ """
558
+ The confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
559
+ if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
562
560
"""
563
561
564
562
speech_threshold : Optional [float ]
565
- "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
563
+ "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive. "
566
564
567
565
speech_model : Optional [SpeechModel ]
568
566
"""
@@ -608,6 +606,7 @@ def __init__(
608
606
summary_type : Optional [SummarizationType ] = None ,
609
607
auto_highlights : Optional [bool ] = None ,
610
608
language_detection : Optional [bool ] = None ,
609
+ language_confidence_threshold : Optional [float ] = None ,
611
610
speech_threshold : Optional [float ] = None ,
612
611
raw_transcription_config : Optional [RawTranscriptionConfig ] = None ,
613
612
speech_model : Optional [SpeechModel ] = None ,
@@ -644,8 +643,10 @@ def __init__(
644
643
summary_model: The summarization model to use in case `summarization` is enabled
645
644
summary_type: The summarization type to use in case `summarization` is enabled
646
645
auto_highlights: Detect important phrases and words in your transcription text.
647
- language_detection: Identify the dominant language that’s spoken in an audio file, and route the file to the appropriate model for the detected language.
648
- speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive
646
+ language_detection: Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
647
+ language_confidence_threshold: The confidence threshold that must be reached if `language_detection` is enabled.
648
+ An error will be returned if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
649
+ speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive.
649
650
raw_transcription_config: Create the config from a `RawTranscriptionConfig`
650
651
"""
651
652
self ._raw_transcription_config = raw_transcription_config
@@ -691,6 +692,7 @@ def __init__(
691
692
)
692
693
self .auto_highlights = auto_highlights
693
694
self .language_detection = language_detection
695
+ self .language_confidence_threshold = language_confidence_threshold
694
696
self .speech_threshold = speech_threshold
695
697
self .speech_model = speech_model
696
698
@@ -1021,19 +1023,26 @@ def language_detection(self, enable: Optional[bool]) -> None:
1021
1023
"""
1022
1024
Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
1023
1025
1024
- Automatic Language Detection is supported for the following languages:
1025
-
1026
- - English
1027
- - Spanish
1028
- - French
1029
- - German
1030
- - Italian
1031
- - Portuguese
1032
- - Dutch
1026
+ See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
1033
1027
"""
1034
1028
1035
1029
self ._raw_transcription_config .language_detection = enable
1036
1030
1031
+ @property
1032
+ def language_confidence_threshold (self ) -> Optional [float ]:
1033
+ "Returns the confidence threshold that must be reached for automatic language detection."
1034
+
1035
+ return self ._raw_transcription_config .language_confidence_threshold
1036
+
1037
+ @language_confidence_threshold .setter
1038
+ def language_confidence_threshold (self , threshold : Optional [float ]) -> None :
1039
+ """
1040
+ Set the confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
1041
+ if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
1042
+ """
1043
+
1044
+ self ._raw_transcription_config .language_confidence_threshold = threshold
1045
+
1037
1046
@property
1038
1047
def speech_threshold (self ) -> Optional [float ]:
1039
1048
"Returns the current speech threshold."
@@ -1042,10 +1051,7 @@ def speech_threshold(self) -> Optional[float]:
1042
1051
1043
1052
@speech_threshold .setter
1044
1053
def speech_threshold (self , threshold : Optional [float ]) -> None :
1045
- "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
1046
-
1047
- if threshold is not None and (threshold < 0 or threshold > 1 ):
1048
- raise ValueError ("speech_threshold must be between 0 and 1 (inclusive)." )
1054
+ "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive."
1049
1055
1050
1056
self ._raw_transcription_config .speech_threshold = threshold
1051
1057
@@ -1638,17 +1644,15 @@ class BaseTranscript(BaseModel):
1638
1644
"""
1639
1645
Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
1640
1646
1641
- Automatic Language Detection is supported for the following languages:
1642
-
1643
- - English
1644
- - Spanish
1645
- - French
1646
- - German
1647
- - Italian
1648
- - Portuguese
1649
- - Dutch
1647
+ See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
1650
1648
"""
1651
1649
1650
+ language_confidence_threshold : Optional [float ]
1651
+ "The confidence threshold that must be reached if `language_detection` is enabled."
1652
+
1653
+ language_confidence : Optional [float ]
1654
+ "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)."
1655
+
1652
1656
speech_threshold : Optional [float ]
1653
1657
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
1654
1658
0 commit comments