From 141c80413e4cf708d0b8976cdb524e900895a788 Mon Sep 17 00:00:00 2001 From: "David E. Weekly" Date: Fri, 12 Apr 2024 16:22:20 -0700 Subject: [PATCH 1/9] Add audio redaction quality Add support for redact_pii_audio_quality --- assemblyai/types.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/assemblyai/types.py b/assemblyai/types.py index 00c349d..0e71fcf 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -202,6 +202,11 @@ class WordBoost(str, Enum): high = "high" +class RedactedAudioQuality(str, Enum): + mp3 = "mp3" + wav = "wav" + + class EntityType(str, Enum): """ Used for AssemblyAI's Entity Detection feature. @@ -454,6 +459,8 @@ class RawTranscriptionConfig(BaseModel): "Redact PII from the transcribed text." redact_pii_audio: Optional[bool] "Generate a copy of the original media file with spoken PII 'beeped' out." + redact_pii_audio_quality: Optional[RedactedAudioQuality] + "Set the quality level of the redacted audio: mp3 or wav" redact_pii_policies: Optional[List[PIIRedactionPolicy]] "The list of PII Redaction policies to enable." redact_pii_sub: Optional[PIISubstitutionPolicy] @@ -543,6 +550,7 @@ def __init__( filter_profanity: Optional[bool] = None, redact_pii: Optional[bool] = None, redact_pii_audio: Optional[bool] = None, + redact_pii_audio_quality: Optional[RedactedAudioQuality] = None, redact_pii_policies: Optional[List[PIIRedactionPolicy]] = None, redact_pii_sub: Optional[PIISubstitutionPolicy] = None, speaker_labels: Optional[bool] = None, @@ -580,6 +588,7 @@ def __init__( filter_profanity: Filter profanity from the transcribed text. redact_pii: Redact PII from the transcribed text. redact_pii_audio: Generate a copy of the original media file with spoken PII 'beeped' out (new audio only available for 24 hours). + redact_pii_audio_quality: Select the quality level for audio redaction: mp3 or wav redact_pii_policies: The list of PII Redaction policies to enable. redact_pii_sub: The replacement logic for detected PII. speaker_labels: Enable Speaker Diarization. @@ -623,6 +632,7 @@ def __init__( self.set_redact_pii( redact_pii, redact_pii_audio, + redact_pii_audio_quality, redact_pii_policies, redact_pii_sub, ) @@ -773,6 +783,12 @@ def redact_pii_audio(self) -> Optional[bool]: return self._raw_transcription_config.redact_pii_audio + @property + def redact_pii_audio_quality(self) -> Optional[RedactedAudioQuality]: + "The quality of the redacted audio (mp3 or wav)" + + return self._raw_transcription_config.redact_pii_audio_quality + @property def redact_pii_policies(self) -> Optional[List[PIIRedactionPolicy]]: "Returns a list of set of defined PII redaction policies." @@ -1122,6 +1138,7 @@ def set_redact_pii( self, enable: Optional[bool] = True, redact_audio: Optional[bool] = None, + redact_audio_quality: Optional[RedactedAudioQuality] = None, policies: Optional[List[PIIRedactionPolicy]] = None, substitution: Optional[PIISubstitutionPolicy] = None, ) -> Self: @@ -1131,6 +1148,7 @@ def set_redact_pii( Args: enable: whether to enable or disable the PII Redaction feature. redact_audio: Generate a copy of the original media file with spoken PII 'beeped' out. NOTE: The copy is available for 24h + redact_audio_quality: The quality level of the redacted audio: either mp3 or wav policies: A list of PII redaction policies to enable. substitution: The replacement logic for detected PII (`PIISubstutionPolicy.hash` by default). """ @@ -1138,6 +1156,7 @@ def set_redact_pii( if not enable: self._raw_transcription_config.redact_pii = None self._raw_transcription_config.redact_pii_audio = None + self._raw_transcription_config.redact_pii_audio_quality = None self._raw_transcription_config.redact_pii_policies = None self._raw_transcription_config.redact_pii_sub = None @@ -1148,6 +1167,7 @@ def set_redact_pii( self._raw_transcription_config.redact_pii = True self._raw_transcription_config.redact_pii_audio = redact_audio + self._raw_transcription_config.redact_pii_audio_quality = redact_audio_quality self._raw_transcription_config.redact_pii_policies = policies self._raw_transcription_config.redact_pii_sub = substitution @@ -1527,6 +1547,8 @@ class BaseTranscript(BaseModel): "Redact PII from the transcribed text." redact_pii_audio: Optional[bool] "Generate a copy of the original media file with spoken PII 'beeped' out." + redact_pii_audio_quality: Optional[RedactedAudioQuality] + "The audio quality level of the redacted audio (wav or mp3)" redact_pii_policies: Optional[List[PIIRedactionPolicy]] "The list of PII Redaction policies to enable." redact_pii_sub: Optional[PIISubstitutionPolicy] From 7d223cf05356a28dde138d82d97060fef0c0ea89 Mon Sep 17 00:00:00 2001 From: "David E. Weekly" Date: Thu, 18 Apr 2024 21:16:13 -0700 Subject: [PATCH 2/9] Update assemblyai/types.py Co-authored-by: Patrick Loeber <98830383+ploeber@users.noreply.github.com> --- assemblyai/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyai/types.py b/assemblyai/types.py index 0e71fcf..cac0223 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -460,7 +460,7 @@ class RawTranscriptionConfig(BaseModel): redact_pii_audio: Optional[bool] "Generate a copy of the original media file with spoken PII 'beeped' out." redact_pii_audio_quality: Optional[RedactedAudioQuality] - "Set the quality level of the redacted audio: mp3 or wav" + "The quality of the redacted audio file in case `redact_pii_audio` is enabled." redact_pii_policies: Optional[List[PIIRedactionPolicy]] "The list of PII Redaction policies to enable." redact_pii_sub: Optional[PIISubstitutionPolicy] From 086025bed48d1117e8fe043dc82952d004164a4c Mon Sep 17 00:00:00 2001 From: "David E. Weekly" Date: Thu, 18 Apr 2024 21:16:19 -0700 Subject: [PATCH 3/9] Update assemblyai/types.py Co-authored-by: Patrick Loeber <98830383+ploeber@users.noreply.github.com> --- assemblyai/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyai/types.py b/assemblyai/types.py index cac0223..f196b14 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -588,7 +588,7 @@ def __init__( filter_profanity: Filter profanity from the transcribed text. redact_pii: Redact PII from the transcribed text. redact_pii_audio: Generate a copy of the original media file with spoken PII 'beeped' out (new audio only available for 24 hours). - redact_pii_audio_quality: Select the quality level for audio redaction: mp3 or wav + redact_pii_audio_quality: The quality of the redacted audio file in case `redact_pii_audio` is enabled. redact_pii_policies: The list of PII Redaction policies to enable. redact_pii_sub: The replacement logic for detected PII. speaker_labels: Enable Speaker Diarization. From a54f8c4c112f87da9a167427f1e1304d47ab5cc7 Mon Sep 17 00:00:00 2001 From: "David E. Weekly" Date: Thu, 18 Apr 2024 21:16:24 -0700 Subject: [PATCH 4/9] Update assemblyai/types.py Co-authored-by: Patrick Loeber <98830383+ploeber@users.noreply.github.com> --- assemblyai/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyai/types.py b/assemblyai/types.py index f196b14..9ef6e95 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -785,7 +785,7 @@ def redact_pii_audio(self) -> Optional[bool]: @property def redact_pii_audio_quality(self) -> Optional[RedactedAudioQuality]: - "The quality of the redacted audio (mp3 or wav)" + "The quality of the redacted audio file in case `redact_pii_audio` is enabled." return self._raw_transcription_config.redact_pii_audio_quality From f2c97c8ae1b0ca52c3853b2923b3639dc1461e23 Mon Sep 17 00:00:00 2001 From: "David E. Weekly" Date: Thu, 18 Apr 2024 21:16:30 -0700 Subject: [PATCH 5/9] Update assemblyai/types.py Co-authored-by: Patrick Loeber <98830383+ploeber@users.noreply.github.com> --- assemblyai/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyai/types.py b/assemblyai/types.py index 9ef6e95..a6731bf 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -1548,7 +1548,7 @@ class BaseTranscript(BaseModel): redact_pii_audio: Optional[bool] "Generate a copy of the original media file with spoken PII 'beeped' out." redact_pii_audio_quality: Optional[RedactedAudioQuality] - "The audio quality level of the redacted audio (wav or mp3)" + "The quality of the redacted audio file in case `redact_pii_audio` is enabled." redact_pii_policies: Optional[List[PIIRedactionPolicy]] "The list of PII Redaction policies to enable." redact_pii_sub: Optional[PIISubstitutionPolicy] From bdf036a57cb063d6f2c8e661e81616779f37ef1f Mon Sep 17 00:00:00 2001 From: "David E. Weekly" Date: Thu, 18 Apr 2024 21:16:38 -0700 Subject: [PATCH 6/9] Update assemblyai/types.py Co-authored-by: Patrick Loeber <98830383+ploeber@users.noreply.github.com> --- assemblyai/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyai/types.py b/assemblyai/types.py index a6731bf..4a506fa 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -1148,7 +1148,7 @@ def set_redact_pii( Args: enable: whether to enable or disable the PII Redaction feature. redact_audio: Generate a copy of the original media file with spoken PII 'beeped' out. NOTE: The copy is available for 24h - redact_audio_quality: The quality level of the redacted audio: either mp3 or wav + redact_audio_quality: The quality of the redacted audio file in case `redact_audio` is enabled. policies: A list of PII redaction policies to enable. substitution: The replacement logic for detected PII (`PIISubstutionPolicy.hash` by default). """ From 2aaf9dcf5863b4669b688154e61eae1169704603 Mon Sep 17 00:00:00 2001 From: "David E. Weekly" Date: Thu, 18 Apr 2024 21:18:30 -0700 Subject: [PATCH 7/9] Rename to PIIRedactedAudioQuality Per suggestion from @ploeber --- assemblyai/types.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/assemblyai/types.py b/assemblyai/types.py index 4a506fa..e861580 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -202,7 +202,7 @@ class WordBoost(str, Enum): high = "high" -class RedactedAudioQuality(str, Enum): +class PIIRedactedAudioQuality(str, Enum): mp3 = "mp3" wav = "wav" @@ -459,7 +459,7 @@ class RawTranscriptionConfig(BaseModel): "Redact PII from the transcribed text." redact_pii_audio: Optional[bool] "Generate a copy of the original media file with spoken PII 'beeped' out." - redact_pii_audio_quality: Optional[RedactedAudioQuality] + redact_pii_audio_quality: Optional[PIIRedactedAudioQuality] "The quality of the redacted audio file in case `redact_pii_audio` is enabled." redact_pii_policies: Optional[List[PIIRedactionPolicy]] "The list of PII Redaction policies to enable." @@ -550,7 +550,7 @@ def __init__( filter_profanity: Optional[bool] = None, redact_pii: Optional[bool] = None, redact_pii_audio: Optional[bool] = None, - redact_pii_audio_quality: Optional[RedactedAudioQuality] = None, + redact_pii_audio_quality: Optional[PIIRedactedAudioQuality] = None, redact_pii_policies: Optional[List[PIIRedactionPolicy]] = None, redact_pii_sub: Optional[PIISubstitutionPolicy] = None, speaker_labels: Optional[bool] = None, @@ -784,7 +784,7 @@ def redact_pii_audio(self) -> Optional[bool]: return self._raw_transcription_config.redact_pii_audio @property - def redact_pii_audio_quality(self) -> Optional[RedactedAudioQuality]: + def redact_pii_audio_quality(self) -> Optional[PIIRedactedAudioQuality]: "The quality of the redacted audio file in case `redact_pii_audio` is enabled." return self._raw_transcription_config.redact_pii_audio_quality @@ -1138,7 +1138,7 @@ def set_redact_pii( self, enable: Optional[bool] = True, redact_audio: Optional[bool] = None, - redact_audio_quality: Optional[RedactedAudioQuality] = None, + redact_audio_quality: Optional[PIIRedactedAudioQuality] = None, policies: Optional[List[PIIRedactionPolicy]] = None, substitution: Optional[PIISubstitutionPolicy] = None, ) -> Self: @@ -1547,7 +1547,7 @@ class BaseTranscript(BaseModel): "Redact PII from the transcribed text." redact_pii_audio: Optional[bool] "Generate a copy of the original media file with spoken PII 'beeped' out." - redact_pii_audio_quality: Optional[RedactedAudioQuality] + redact_pii_audio_quality: Optional[PIIRedactedAudioQuality] "The quality of the redacted audio file in case `redact_pii_audio` is enabled." redact_pii_policies: Optional[List[PIIRedactionPolicy]] "The list of PII Redaction policies to enable." From 3a8db55f8865f9ebb5b68c8a7e14f3448fcdea80 Mon Sep 17 00:00:00 2001 From: David Weekly Date: Thu, 18 Apr 2024 21:22:38 -0700 Subject: [PATCH 8/9] Add PIIRedactedAudioQuality to types. --- assemblyai/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/assemblyai/__init__.py b/assemblyai/__init__.py index 2d45027..882b3d1 100644 --- a/assemblyai/__init__.py +++ b/assemblyai/__init__.py @@ -117,6 +117,7 @@ "Utterance", "UtteranceWord", "Paragraph", + "PIIRedactedAudioQuality", "PIISubstitutionPolicy", "PIIRedactionPolicy", "RawTranscriptionConfig", From 4322b6c88f9521ba889bc52c4f3c79c079bf33e0 Mon Sep 17 00:00:00 2001 From: Patrick Loeber <98830383+ploeber@users.noreply.github.com> Date: Fri, 19 Apr 2024 11:08:42 +0200 Subject: [PATCH 9/9] import type in `__init__.py` --- assemblyai/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/assemblyai/__init__.py b/assemblyai/__init__.py index 882b3d1..a268b29 100644 --- a/assemblyai/__init__.py +++ b/assemblyai/__init__.py @@ -32,6 +32,7 @@ LemurTaskResponse, LemurTranscriptSource, Paragraph, + PIIRedactedAudioQuality, PIIRedactionPolicy, PIISubstitutionPolicy, RawTranscriptionConfig, @@ -97,8 +98,6 @@ "LemurQuestionResponse", "LemurSummaryResponse", "LemurTaskResponse", - "PIIRedactionPolicy", - "PIISubstitutionPolicy", "Sentence", "Sentiment", "SentimentType",