Skip to content

Commit 7bb9465

Browse files
committed
feat(python/sdk): add ALD features language_confidence and language_confidence_threshold (#6005)
GitOrigin-RevId: a8b115633b714ba2dd61af547877456786829850
1 parent 832afe2 commit 7bb9465

File tree

3 files changed

+38
-45
lines changed

3 files changed

+38
-45
lines changed

assemblyai/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.32.0"
1+
__version__ = "0.33.0"

assemblyai/types.py

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -550,19 +550,17 @@ class RawTranscriptionConfig(BaseModel):
550550
"""
551551
Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
552552
553-
Automatic Language Detection is supported for the following languages:
553+
See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
554+
"""
554555

555-
- English
556-
- Spanish
557-
- French
558-
- German
559-
- Italian
560-
- Portuguese
561-
- Dutch
556+
language_confidence_threshold: Optional[float]
557+
"""
558+
The confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
559+
if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
562560
"""
563561

564562
speech_threshold: Optional[float]
565-
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
563+
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive."
566564

567565
speech_model: Optional[SpeechModel]
568566
"""
@@ -608,6 +606,7 @@ def __init__(
608606
summary_type: Optional[SummarizationType] = None,
609607
auto_highlights: Optional[bool] = None,
610608
language_detection: Optional[bool] = None,
609+
language_confidence_threshold: Optional[float] = None,
611610
speech_threshold: Optional[float] = None,
612611
raw_transcription_config: Optional[RawTranscriptionConfig] = None,
613612
speech_model: Optional[SpeechModel] = None,
@@ -644,8 +643,10 @@ def __init__(
644643
summary_model: The summarization model to use in case `summarization` is enabled
645644
summary_type: The summarization type to use in case `summarization` is enabled
646645
auto_highlights: Detect important phrases and words in your transcription text.
647-
language_detection: Identify the dominant language that’s spoken in an audio file, and route the file to the appropriate model for the detected language.
648-
speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive
646+
language_detection: Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
647+
language_confidence_threshold: The confidence threshold that must be reached if `language_detection` is enabled.
648+
An error will be returned if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
649+
speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive.
649650
raw_transcription_config: Create the config from a `RawTranscriptionConfig`
650651
"""
651652
self._raw_transcription_config = raw_transcription_config
@@ -691,6 +692,7 @@ def __init__(
691692
)
692693
self.auto_highlights = auto_highlights
693694
self.language_detection = language_detection
695+
self.language_confidence_threshold = language_confidence_threshold
694696
self.speech_threshold = speech_threshold
695697
self.speech_model = speech_model
696698

@@ -1021,19 +1023,26 @@ def language_detection(self, enable: Optional[bool]) -> None:
10211023
"""
10221024
Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
10231025
1024-
Automatic Language Detection is supported for the following languages:
1025-
1026-
- English
1027-
- Spanish
1028-
- French
1029-
- German
1030-
- Italian
1031-
- Portuguese
1032-
- Dutch
1026+
See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
10331027
"""
10341028

10351029
self._raw_transcription_config.language_detection = enable
10361030

1031+
@property
1032+
def language_confidence_threshold(self) -> Optional[float]:
1033+
"Returns the confidence threshold that must be reached for automatic language detection."
1034+
1035+
return self._raw_transcription_config.language_confidence_threshold
1036+
1037+
@language_confidence_threshold.setter
1038+
def language_confidence_threshold(self, threshold: Optional[float]) -> None:
1039+
"""
1040+
Set the confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
1041+
if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
1042+
"""
1043+
1044+
self._raw_transcription_config.language_confidence_threshold = threshold
1045+
10371046
@property
10381047
def speech_threshold(self) -> Optional[float]:
10391048
"Returns the current speech threshold."
@@ -1042,10 +1051,7 @@ def speech_threshold(self) -> Optional[float]:
10421051

10431052
@speech_threshold.setter
10441053
def speech_threshold(self, threshold: Optional[float]) -> None:
1045-
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
1046-
1047-
if threshold is not None and (threshold < 0 or threshold > 1):
1048-
raise ValueError("speech_threshold must be between 0 and 1 (inclusive).")
1054+
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive."
10491055

10501056
self._raw_transcription_config.speech_threshold = threshold
10511057

@@ -1638,17 +1644,15 @@ class BaseTranscript(BaseModel):
16381644
"""
16391645
Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
16401646
1641-
Automatic Language Detection is supported for the following languages:
1642-
1643-
- English
1644-
- Spanish
1645-
- French
1646-
- German
1647-
- Italian
1648-
- Portuguese
1649-
- Dutch
1647+
See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
16501648
"""
16511649

1650+
language_confidence_threshold: Optional[float]
1651+
"The confidence threshold that must be reached if `language_detection` is enabled."
1652+
1653+
language_confidence: Optional[float]
1654+
"The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)."
1655+
16521656
speech_threshold: Optional[float]
16531657
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
16541658

tests/unit/test_config.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,3 @@ def test_configuration_are_none_by_default():
1818
pytest.fail(
1919
f"Configuration field {name} is {value} and not None by default."
2020
)
21-
22-
23-
def test_speech_threshold_fails_if_outside_range():
24-
"""
25-
Tests that an exception is raised if the value for speech_threshold is outside the range of [0, 1].
26-
"""
27-
28-
with pytest.raises(ValueError, match="speech_threshold"):
29-
aai.TranscriptionConfig(speech_threshold=1.5)
30-
with pytest.raises(ValueError, match="speech_threshold"):
31-
aai.TranscriptionConfig(speech_threshold=-0.5)

0 commit comments

Comments
 (0)