Skip to content

Commit 2d0a9bd

Browse files
s0h3ylAssemblyAI
andauthored
feat: allow passing TranscriptionConfig to Transcriber (#5)
Co-authored-by: AssemblyAI <[email protected]>
1 parent bb8c6d3 commit 2d0a9bd

File tree

3 files changed

+97
-16
lines changed

3 files changed

+97
-16
lines changed

README.md

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,51 @@ Visit one of our Playgrounds:
260260
- [Transcription Playground](https://www.assemblyai.com/playground)
261261

262262

263-
# Advanced (TODO)
263+
# Advanced
264264

265+
## How the SDK handles Default Configurations
266+
267+
### Defining Defaults
268+
269+
When no `TranscriptionConfig` is being passed to the `Transcriber` or its methods, it will use a default instance of a `TranscriptionConfig`.
270+
271+
If you would like to re-use the same `TranscriptionConfig` for all your transcriptions,
272+
you can set it on the `Transcriber` directly:
273+
274+
```python
275+
config = aai.TranscriptionConfig(punctuate=False, format_text=False)
276+
277+
transcriber = aai.Transcriber(config=config)
278+
279+
# will use the same config for all `.transcribe*(...)` operations
280+
transcriber.transcribe("https://example.org/audio.wav")
281+
```
282+
283+
### Overriding Defaults
284+
285+
You can override the default configuration later via the `.config` property of the `Transcriber`:
286+
287+
```python
288+
transcriber = aai.Transcriber()
289+
290+
# override the `Transcriber`'s config with a new config
291+
transcriber.config = aai.TranscriptionConfig(punctuate=False, format_text=False)
292+
```
293+
294+
295+
In case you want to override the `Transcriber`'s configuration for a specific operation with a different one, you can do so via the `config` parameter of a `.transcribe*(...)` method:
296+
297+
```python
298+
config = aai.TranscriptionConfig(punctuate=False, format_text=False)
299+
# set a default configuration
300+
transcriber = aai.Transcriber(config=config)
301+
302+
transcriber.transcribe(
303+
"https://example.com/audio.mp3",
304+
# overrides the above configuration on the `Transcriber` with the following
305+
config=aai.TranscriptionConfig(dual_channel=True, disfluencies=True)
306+
)
307+
```
265308

266309
## Synchronous vs Asynchronous
267310

assemblyai/transcriber.py

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -442,8 +442,10 @@ def __init__(
442442
self,
443443
*,
444444
client: _client.Client,
445+
config: types.TranscriptionConfig,
445446
) -> None:
446447
self._client = client
448+
self.config = config
447449

448450
def transcribe_url(
449451
self,
@@ -517,7 +519,7 @@ def transcribe(
517519
poll: bool,
518520
) -> Transcript:
519521
if config is None:
520-
config = types.TranscriptionConfig()
522+
config = self.config
521523

522524
if urlparse(data).scheme in {"http", "https"}:
523525
return self.transcribe_url(
@@ -536,9 +538,12 @@ def transcribe_group(
536538
self,
537539
*,
538540
data: List[str],
539-
config: types.TranscriptionConfig,
541+
config: Optional[types.TranscriptionConfig],
540542
poll: bool,
541543
) -> TranscriptGroup:
544+
if config is None:
545+
config = self.config
546+
542547
executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)
543548
future_transcripts: Dict[concurrent.futures.Future[Transcript], str] = {}
544549

@@ -576,6 +581,7 @@ def __init__(
576581
self,
577582
*,
578583
client: Optional[_client.Client] = None,
584+
config: Optional[types.TranscriptionConfig] = None,
579585
max_workers: Optional[int] = None,
580586
) -> None:
581587
"""
@@ -584,13 +590,29 @@ def __init__(
584590
Args:
585591
`client`: The `Client` to use for the `Transcriber`. If `None` is given, the
586592
default settings for the `Client` will be used.
593+
`config`: The default configuration for the `Transcriber`. If `None` is given,
594+
the default configuration of a `TranscriptionConfig` will be used.
587595
`max_workers`: The maximum number of parallel jobs when using the `_async`
588596
methods on the `Transcriber`. By default it uses `os.cpu_count() - 1`
597+
598+
Example:
599+
To use the `Transcriber` with the default settings, you can simply do:
600+
```
601+
transcriber = aai.Transcriber()
602+
```
603+
604+
To use the `Transcriber` with a custom configuration, you can do:
605+
```
606+
config = aai.TranscriptionConfig(punctuate=False, format_text=False)
607+
608+
transcriber = aai.Transcriber(config=config)
609+
```
589610
"""
590611
self._client = client or _client.Client.get_default()
591612

592613
self._impl = _TranscriberImpl(
593614
client=self._client,
615+
config=config or types.TranscriptionConfig(),
594616
)
595617

596618
if not max_workers:
@@ -600,6 +622,23 @@ def __init__(
600622
max_workers=max_workers,
601623
)
602624

625+
@property
626+
def config(self) -> types.TranscriptionConfig:
627+
"""
628+
Returns the default configuration of the `Transcriber`.
629+
"""
630+
return self._impl.config
631+
632+
@config.setter
633+
def config(self, config: types.TranscriptionConfig) -> None:
634+
"""
635+
Sets the default configuration of the `Transcriber`.
636+
637+
Args:
638+
`config`: The new default configuration.
639+
"""
640+
self._impl.config = config
641+
603642
def submit(
604643
self,
605644
data: str,
@@ -610,7 +649,8 @@ def submit(
610649
611650
Args:
612651
data: An URL or a local file (as path)
613-
config: Transcription options and features.
652+
config: Transcription options and features. If `None` is given, the Transcriber's
653+
default configuration will be used.
614654
"""
615655
return self._impl.transcribe(
616656
data=data,
@@ -628,8 +668,8 @@ def transcribe(
628668
629669
Args:
630670
data: An URL or a local file (as path)
631-
config: Transcription options and features.
632-
poll: Whether the transcript should be polled for its completion.
671+
config: Transcription options and features. If `None` is given, the Transcriber's
672+
default configuration will be used.
633673
"""
634674

635675
return self._impl.transcribe(
@@ -648,8 +688,8 @@ def transcribe_async(
648688
649689
Args:
650690
data: An URL or a local file (as path)
651-
config: Transcription options and features.
652-
poll: Whether the transcript should be polled for its completion.
691+
config: Transcription options and features. If `None` is given, the Transcriber's
692+
default configuration will be used.
653693
"""
654694

655695
return self._executor.submit(
@@ -669,11 +709,9 @@ def transcribe_group(
669709
670710
Args:
671711
data: A list of paths or URLs (can be mixed)
672-
config: Transcription options and features.
673-
poll: Whether the transcripts should be polled for their completion.
712+
config: Transcription options and features. If `None` is given, the Transcriber's
713+
default configuration will be used.
674714
"""
675-
if config is None:
676-
config = types.TranscriptionConfig()
677715

678716
return self._impl.transcribe_group(
679717
data=data,
@@ -683,7 +721,7 @@ def transcribe_group(
683721

684722
def transcribe_group_async(
685723
self,
686-
data: str,
724+
data: List[str],
687725
config: Optional[types.TranscriptionConfig] = None,
688726
) -> concurrent.futures.Future[TranscriptGroup]:
689727
"""
@@ -692,8 +730,8 @@ def transcribe_group_async(
692730
693731
Args:
694732
data: A list of paths or URLs (can be mixed)
695-
config: Transcription options and features.
696-
poll: Whether the transcripts should be polled for their completion.
733+
config: Transcription options and features. If `None` is given, the Transcriber's
734+
default configuration will be used.
697735
"""
698736

699737
return self._executor.submit(

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
setup(
99
name="assemblyai",
10-
version="0.3.3",
10+
version="0.4.0",
1111
description="AssemblyAI Python SDK",
1212
author="AssemblyAI",
1313
author_email="[email protected]",

0 commit comments

Comments
 (0)