Skip to content

Commit 18eb91b

Browse files
s0h3ylAssemblyAI
andauthored
feat: add real-time functionality (#20)
Co-authored-by: AssemblyAI <[email protected]>
1 parent 7714cd2 commit 18eb91b

File tree

9 files changed

+1170
-17
lines changed

9 files changed

+1170
-17
lines changed

.github/workflows/test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ jobs:
3838
python-version: ${{ matrix.py }}
3939
- name: Setup test suite
4040
run: |
41+
sudo apt-get update && sudo apt-get install -y portaudio19-dev
4142
python_version="${{ matrix.py }}"
4243
python_version="${python_version/./}"
4344
tox -f "py$python_version" -vvvv --notest

README.md

Lines changed: 123 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,23 @@ With a single API call, get access to AI models built on the latest AI breakthro
1818

1919
# Overview
2020

21+
- [AssemblyAI's Python SDK](#assemblyais-python-sdk)
22+
- [Overview](#overview)
2123
- [Documentation](#documentation)
22-
- [Installation](#installation)
23-
- [Example](#examples)
24-
- [Core Examples](#core-examples)
25-
- [LeMUR Examples](#lemur-examples)
26-
- [Audio Intelligence Examples](#audio-intelligence-examples)
27-
- [Playgrounds](#playgrounds)
28-
- [Advanced](#advanced-todo)
24+
- [Quick Start](#quick-start)
25+
- [Installation](#installation)
26+
- [Examples](#examples)
27+
- [**Core Examples**](#core-examples)
28+
- [**LeMUR Examples**](#lemur-examples)
29+
- [**Audio Intelligence Examples**](#audio-intelligence-examples)
30+
- [**Real-Time Examples**](#real-time-examples)
31+
- [Playgrounds](#playgrounds)
32+
- [Advanced](#advanced)
33+
- [How the SDK handles Default Configurations](#how-the-sdk-handles-default-configurations)
34+
- [Defining Defaults](#defining-defaults)
35+
- [Overriding Defaults](#overriding-defaults)
36+
- [Synchronous vs Asynchronous](#synchronous-vs-asynchronous)
37+
- [Polling Intervals](#polling-intervals)
2938

3039
# Documentation
3140

@@ -470,6 +479,113 @@ for result in transcript.auto_highlights.results:
470479

471480
---
472481

482+
### **Real-Time Examples**
483+
484+
[Read more about our Real-Time service.](https://www.assemblyai.com/docs/Guides/real-time_streaming_transcription)
485+
486+
<details>
487+
<summary>Stream your Microphone in Real-Time</summary>
488+
489+
```python
490+
import assemblyai as aai
491+
492+
def on_open(session_opened: aai.RealtimeSessionOpened):
493+
"This function is called when the connection has been established."
494+
495+
print("Session ID:", session_opened.session_id)
496+
497+
def on_data(transcript: aai.RealtimeTranscript):
498+
"This function is called when a new transcript has been received."
499+
500+
if not transcript.text:
501+
return
502+
503+
if isinstance(transcript, aai.RealtimeFinalTranscript):
504+
print(transcript.text, end="\r\n")
505+
else:
506+
print(transcript.text, end="\r")
507+
508+
def on_error(error: aai.RealtimeError):
509+
"This function is called when the connection has been closed."
510+
511+
print("An error occured:", error)
512+
513+
def on_close():
514+
"This function is called when the connection has been closed."
515+
516+
print("Closing Session")
517+
518+
519+
# Create the Real-Time transcriber
520+
transcriber = aai.RealtimeTranscriber(
521+
on_data=on_data,
522+
on_error=on_error,
523+
sample_rate=44_100,
524+
on_open=on_open, # optional
525+
on_close=on_close, # optional
526+
)
527+
528+
529+
# Open a microphone stream
530+
microphone_stream = aai.extras.MicrophoneStream()
531+
532+
# Press CTRL+C to abort
533+
transcriber.stream(microphone_stream)
534+
535+
transcriber.close()
536+
```
537+
538+
</details>
539+
540+
<details>
541+
<summary>Transcribe a Local Audio File in Real-Time</summary>
542+
543+
```python
544+
import assemblyai as aai
545+
546+
547+
def on_data(transcript: aai.RealtimeTranscript):
548+
"This function is called when a new transcript has been received."
549+
550+
if not transcript.text:
551+
return
552+
553+
if isinstance(transcript, aai.RealtimeFinalTranscript):
554+
print(transcript.text, end="\r\n")
555+
else:
556+
print(transcript.text, end="\r")
557+
558+
def on_error(error: aai.RealtimeError):
559+
"This function is called when the connection has been closed."
560+
561+
print("An error occured:", error)
562+
563+
564+
# Create the Real-Time transcriber
565+
transcriber = aai.RealtimeTranscriber(
566+
on_data=on_data,
567+
on_error=on_error,
568+
sample_rate=44_100,
569+
on_open=on_open, # optional
570+
on_close=on_close, # optional
571+
)
572+
573+
574+
# Only WAV/PCM16 single channel supported for now
575+
file_stream = aai.extras.stream_file(
576+
filepath="audio.wav",
577+
sample_rate=44_100,
578+
)
579+
580+
transcriber.stream(file_stream)
581+
582+
transcriber.close()
583+
```
584+
585+
</details>
586+
587+
---
588+
473589
## Playgrounds
474590

475591
Visit one of our Playgrounds:

assemblyai/__init__.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
from . import extras
12
from .client import Client
23
from .lemur import Lemur
3-
from .transcriber import Transcriber, Transcript, TranscriptGroup
4+
from .transcriber import RealtimeTranscriber, Transcriber, Transcript, TranscriptGroup
45
from .types import (
56
AssemblyAIError,
67
AutohighlightResponse,
@@ -24,6 +25,12 @@
2425
PIIRedactionPolicy,
2526
PIISubstitutionPolicy,
2627
RawTranscriptionConfig,
28+
RealtimeError,
29+
RealtimeFinalTranscript,
30+
RealtimePartialTranscript,
31+
RealtimeSessionOpened,
32+
RealtimeTranscript,
33+
RealtimeWord,
2734
Sentence,
2835
Sentiment,
2936
SentimentType,
@@ -93,6 +100,14 @@
93100
"Word",
94101
"WordBoost",
95102
"WordSearchMatch",
103+
"RealtimeError",
104+
"RealtimeFinalTranscript",
105+
"RealtimePartialTranscript",
106+
"RealtimeSessionOpened",
107+
"RealtimeTranscript",
108+
"RealtimeWord",
96109
# package globals
97110
"settings",
111+
# packages
112+
"extras",
98113
]

assemblyai/extras.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import time
2+
from typing import Generator
3+
4+
try:
5+
import pyaudio
6+
except ImportError:
7+
raise ImportError(
8+
"You must install the extras for this SDK to use this feature. "
9+
"Run `pip install assemblyai[extras]` to install the extras. "
10+
"Make sure to install `apt install portaudio19-dev` (Debian/Ubuntu) or "
11+
"`brew install portaudio` (MacOS) before installing the extras."
12+
)
13+
14+
15+
class MicrophoneStream:
16+
def __init__(
17+
self,
18+
sample_rate: int = 44_100,
19+
):
20+
"""
21+
Creates a stream of audio from the microphone.
22+
23+
Args:
24+
chunk_size: The size of each chunk of audio to read from the microphone.
25+
channels: The number of channels to record audio from.
26+
sample_rate: The sample rate to record audio at.
27+
"""
28+
29+
self._pyaudio = pyaudio.PyAudio()
30+
self.sample_rate = sample_rate
31+
32+
self._chunk_size = int(self.sample_rate * 0.1)
33+
self._stream = self._pyaudio.open(
34+
format=pyaudio.paInt16,
35+
channels=1,
36+
rate=sample_rate,
37+
input=True,
38+
frames_per_buffer=self._chunk_size,
39+
)
40+
41+
self._open = True
42+
43+
def __iter__(self):
44+
"""
45+
Returns the iterator object.
46+
"""
47+
48+
return self
49+
50+
def __next__(self):
51+
"""
52+
Reads a chunk of audio from the microphone.
53+
"""
54+
if not self._open:
55+
raise StopIteration
56+
57+
try:
58+
return self._stream.read(self._chunk_size)
59+
except KeyboardInterrupt:
60+
raise StopIteration
61+
62+
def close(self):
63+
"""
64+
Closes the stream.
65+
"""
66+
67+
self._open = False
68+
69+
if self._stream.is_active():
70+
self._stream.stop_stream()
71+
72+
self._stream.close()
73+
self._pyaudio.terminate()
74+
75+
76+
def stream_file(
77+
filepath: str,
78+
sample_rate: int,
79+
) -> Generator[bytes, None, None]:
80+
"""
81+
Mimics a stream of audio data by reading it chunk by chunk from a file.
82+
83+
NOTE: Only supports WAV/PCM16 files as of now.
84+
85+
Args:
86+
filepath: The path to the file to stream.
87+
sample_rate: The sample rate of the audio file.
88+
89+
Returns: A generator that yields chunks of audio data.
90+
"""
91+
92+
with open(filepath, "rb") as f:
93+
while True:
94+
data = f.read(int(sample_rate * 0.30) * 2)
95+
enough_data = ((len(data) / (16 / 8)) / sample_rate) * 1_000
96+
97+
if not data or enough_data < 300.0:
98+
break
99+
100+
yield data
101+
102+
time.sleep(0.15)

0 commit comments

Comments
 (0)