From d670944b80430636fc32c2d5984611458d2591c9 Mon Sep 17 00:00:00 2001 From: AssemblyAI Date: Tue, 30 May 2023 20:06:19 +0200 Subject: [PATCH] Project import generated by Copybara. GitOrigin-RevId: 5ef7f00a3469d8b3030822c34ceea554cbc81a10 --- README.md | 45 +++++++++++++++++++++++++- assemblyai/transcriber.py | 66 ++++++++++++++++++++++++++++++--------- setup.py | 2 +- 3 files changed, 97 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index beb3435..a6a03cc 100644 --- a/README.md +++ b/README.md @@ -260,8 +260,51 @@ Visit one of our Playgrounds: - [Transcription Playground](https://www.assemblyai.com/playground) -# Advanced (TODO) +# Advanced +## How the SDK handles Default Configurations + +### Defining Defaults + +When no `TranscriptionConfig` is being passed to the `Transcriber` or its methods, it will use a default instance of a `TranscriptionConfig`. + +If you would like to re-use the same `TranscriptionConfig` for all your transcriptions, +you can set it on the `Transcriber` directly: + +```python +config = aai.TranscriptionConfig(punctuate=False, format_text=False) + +transcriber = aai.Transcriber(config=config) + +# will use the same config for all `.transcribe*(...)` operations +transcriber.transcribe("https://example.org/audio.wav") +``` + +### Overriding Defaults + +You can override the default configuration later via the `.config` property of the `Transcriber`: + +```python +transcriber = aai.Transcriber() + +# override the `Transcriber`'s config with a new config +transcriber.config = aai.TranscriptionConfig(punctuate=False, format_text=False) +``` + + +In case you want to override the `Transcriber`'s configuration for a specific operation with a different one, you can do so via the `config` parameter of a `.transcribe*(...)` method: + +```python +config = aai.TranscriptionConfig(punctuate=False, format_text=False) +# set a default configuration +transcriber = aai.Transcriber(config=config) + +transcriber.transcribe( + "https://example.com/audio.mp3", + # overrides the above configuration on the `Transcriber` with the following + config=aai.TranscriptionConfig(dual_channel=True, disfluencies=True) +) +``` ## Synchronous vs Asynchronous diff --git a/assemblyai/transcriber.py b/assemblyai/transcriber.py index 0554f6b..d0818c3 100644 --- a/assemblyai/transcriber.py +++ b/assemblyai/transcriber.py @@ -442,8 +442,10 @@ def __init__( self, *, client: _client.Client, + config: types.TranscriptionConfig, ) -> None: self._client = client + self.config = config def transcribe_url( self, @@ -517,7 +519,7 @@ def transcribe( poll: bool, ) -> Transcript: if config is None: - config = types.TranscriptionConfig() + config = self.config if urlparse(data).scheme in {"http", "https"}: return self.transcribe_url( @@ -536,9 +538,12 @@ def transcribe_group( self, *, data: List[str], - config: types.TranscriptionConfig, + config: Optional[types.TranscriptionConfig], poll: bool, ) -> TranscriptGroup: + if config is None: + config = self.config + executor = concurrent.futures.ThreadPoolExecutor(max_workers=8) future_transcripts: Dict[concurrent.futures.Future[Transcript], str] = {} @@ -576,6 +581,7 @@ def __init__( self, *, client: Optional[_client.Client] = None, + config: Optional[types.TranscriptionConfig] = None, max_workers: Optional[int] = None, ) -> None: """ @@ -584,13 +590,29 @@ def __init__( Args: `client`: The `Client` to use for the `Transcriber`. If `None` is given, the default settings for the `Client` will be used. + `config`: The default configuration for the `Transcriber`. If `None` is given, + the default configuration of a `TranscriptionConfig` will be used. `max_workers`: The maximum number of parallel jobs when using the `_async` methods on the `Transcriber`. By default it uses `os.cpu_count() - 1` + + Example: + To use the `Transcriber` with the default settings, you can simply do: + ``` + transcriber = aai.Transcriber() + ``` + + To use the `Transcriber` with a custom configuration, you can do: + ``` + config = aai.TranscriptionConfig(punctuate=False, format_text=False) + + transcriber = aai.Transcriber(config=config) + ``` """ self._client = client or _client.Client.get_default() self._impl = _TranscriberImpl( client=self._client, + config=config or types.TranscriptionConfig(), ) if not max_workers: @@ -600,6 +622,23 @@ def __init__( max_workers=max_workers, ) + @property + def config(self) -> types.TranscriptionConfig: + """ + Returns the default configuration of the `Transcriber`. + """ + return self._impl.config + + @config.setter + def config(self, config: types.TranscriptionConfig) -> None: + """ + Sets the default configuration of the `Transcriber`. + + Args: + `config`: The new default configuration. + """ + self._impl.config = config + def submit( self, data: str, @@ -610,7 +649,8 @@ def submit( Args: data: An URL or a local file (as path) - config: Transcription options and features. + config: Transcription options and features. If `None` is given, the Transcriber's + default configuration will be used. """ return self._impl.transcribe( data=data, @@ -628,8 +668,8 @@ def transcribe( Args: data: An URL or a local file (as path) - config: Transcription options and features. - poll: Whether the transcript should be polled for its completion. + config: Transcription options and features. If `None` is given, the Transcriber's + default configuration will be used. """ return self._impl.transcribe( @@ -648,8 +688,8 @@ def transcribe_async( Args: data: An URL or a local file (as path) - config: Transcription options and features. - poll: Whether the transcript should be polled for its completion. + config: Transcription options and features. If `None` is given, the Transcriber's + default configuration will be used. """ return self._executor.submit( @@ -669,11 +709,9 @@ def transcribe_group( Args: data: A list of paths or URLs (can be mixed) - config: Transcription options and features. - poll: Whether the transcripts should be polled for their completion. + config: Transcription options and features. If `None` is given, the Transcriber's + default configuration will be used. """ - if config is None: - config = types.TranscriptionConfig() return self._impl.transcribe_group( data=data, @@ -683,7 +721,7 @@ def transcribe_group( def transcribe_group_async( self, - data: str, + data: List[str], config: Optional[types.TranscriptionConfig] = None, ) -> concurrent.futures.Future[TranscriptGroup]: """ @@ -692,8 +730,8 @@ def transcribe_group_async( Args: data: A list of paths or URLs (can be mixed) - config: Transcription options and features. - poll: Whether the transcripts should be polled for their completion. + config: Transcription options and features. If `None` is given, the Transcriber's + default configuration will be used. """ return self._executor.submit( diff --git a/setup.py b/setup.py index 7387091..10e5636 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="assemblyai", - version="0.3.3", + version="0.4.0", description="AssemblyAI Python SDK", author="AssemblyAI", author_email="engineering.sdk@assemblyai.com",