AssemblyAI · s0h3yl · Jun 20, 2023 · Jun 19, 2023
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -38,6 +38,7 @@ jobs:
           python-version: ${{ matrix.py }}
       - name: Setup test suite
         run: |
+          sudo apt-get update && sudo apt-get install -y portaudio19-dev
           python_version="${{ matrix.py }}"
           python_version="${python_version/./}"
           tox -f "py$python_version" -vvvv --notest

diff --git a/README.md b/README.md
@@ -18,14 +18,23 @@ With a single API call, get access to AI models built on the latest AI breakthro
 
 # Overview
 
+- [AssemblyAI's Python SDK](#assemblyais-python-sdk)
+- [Overview](#overview)
 - [Documentation](#documentation)
-- [Installation](#installation)
-- [Example](#examples)
-  - [Core Examples](#core-examples)
-  - [LeMUR Examples](#lemur-examples)
-  - [Audio Intelligence Examples](#audio-intelligence-examples)
-- [Playgrounds](#playgrounds)
-- [Advanced](#advanced-todo)
+- [Quick Start](#quick-start)
+  - [Installation](#installation)
+  - [Examples](#examples)
+    - [**Core Examples**](#core-examples)
+    - [**LeMUR Examples**](#lemur-examples)
+    - [**Audio Intelligence Examples**](#audio-intelligence-examples)
+    - [**Real-Time Examples**](#real-time-examples)
+  - [Playgrounds](#playgrounds)
+- [Advanced](#advanced)
+  - [How the SDK handles Default Configurations](#how-the-sdk-handles-default-configurations)
+    - [Defining Defaults](#defining-defaults)
+    - [Overriding Defaults](#overriding-defaults)
+  - [Synchronous vs Asynchronous](#synchronous-vs-asynchronous)
+  - [Polling Intervals](#polling-intervals)
 
 # Documentation
 
@@ -470,6 +479,113 @@ for result in transcript.auto_highlights.results:
 
 ---
 
+### **Real-Time Examples**
+
+[Read more about our Real-Time service.](https://www.assemblyai.com/docs/Guides/real-time_streaming_transcription)
+
+<details>
+  <summary>Stream your Microphone in Real-Time</summary>
+
+```python
+import assemblyai as aai
+
+def on_open(session_opened: aai.RealtimeSessionOpened):
+  "This function is called when the connection has been established."
+
+  print("Session ID:", session_opened.session_id)
+
+def on_data(transcript: aai.RealtimeTranscript):
+  "This function is called when a new transcript has been received."
+
+  if not transcript.text:
+    return
+
+  if isinstance(transcript, aai.RealtimeFinalTranscript):
+    print(transcript.text, end="\r\n")
+  else:
+    print(transcript.text, end="\r")
+
+def on_error(error: aai.RealtimeError):
+  "This function is called when the connection has been closed."
+
+  print("An error occured:", error)
+
+def on_close():
+  "This function is called when the connection has been closed."
+
+  print("Closing Session")
+
+
+# Create the Real-Time transcriber
+transcriber = aai.RealtimeTranscriber(
+  on_data=on_data,
+  on_error=on_error,
+  sample_rate=44_100,
+  on_open=on_open, # optional
+  on_close=on_close, # optional
+)
+
+
+# Open a microphone stream
+microphone_stream = aai.extras.MicrophoneStream()
+
+# Press CTRL+C to abort
+transcriber.stream(microphone_stream)
+
+transcriber.close()
+```
+
+</details>
+
+<details>
+  <summary>Transcribe a Local Audio File in Real-Time</summary>
+
+```python
+import assemblyai as aai
+
+
+def on_data(transcript: aai.RealtimeTranscript):
+  "This function is called when a new transcript has been received."
+
+  if not transcript.text:
+    return
+
+  if isinstance(transcript, aai.RealtimeFinalTranscript):
+    print(transcript.text, end="\r\n")
+  else:
+    print(transcript.text, end="\r")
+
+def on_error(error: aai.RealtimeError):
+  "This function is called when the connection has been closed."
+
+  print("An error occured:", error)
+
+
+# Create the Real-Time transcriber
+transcriber = aai.RealtimeTranscriber(
+  on_data=on_data,
+  on_error=on_error,
+  sample_rate=44_100,
+  on_open=on_open, # optional
+  on_close=on_close, # optional
+)
+
+
+# Only WAV/PCM16 single channel supported for now
+file_stream = aai.extras.stream_file(
+  filepath="audio.wav",
+  sample_rate=44_100,
+)
+
+transcriber.stream(file_stream)
+
+transcriber.close()
+```
+
+</details>
+
+---
+
 ## Playgrounds
 
 Visit one of our Playgrounds:

diff --git a/assemblyai/__init__.py b/assemblyai/__init__.py
@@ -1,6 +1,7 @@
+from . import extras
 from .client import Client
 from .lemur import Lemur
-from .transcriber import Transcriber, Transcript, TranscriptGroup
+from .transcriber import RealtimeTranscriber, Transcriber, Transcript, TranscriptGroup
 from .types import (
     AssemblyAIError,
     AutohighlightResponse,
@@ -24,6 +25,12 @@
     PIIRedactionPolicy,
     PIISubstitutionPolicy,
     RawTranscriptionConfig,
+    RealtimeError,
+    RealtimeFinalTranscript,
+    RealtimePartialTranscript,
+    RealtimeSessionOpened,
+    RealtimeTranscript,
+    RealtimeWord,
     Sentence,
     Sentiment,
     SentimentType,
@@ -93,6 +100,14 @@
     "Word",
     "WordBoost",
     "WordSearchMatch",
+    "RealtimeError",
+    "RealtimeFinalTranscript",
+    "RealtimePartialTranscript",
+    "RealtimeSessionOpened",
+    "RealtimeTranscript",
+    "RealtimeWord",
     # package globals
     "settings",
+    # packages
+    "extras",
 ]
diff --git a/assemblyai/extras.py b/assemblyai/extras.py
@@ -0,0 +1,102 @@
+import time
+from typing import Generator
+
+try:
+    import pyaudio
+except ImportError:
+    raise ImportError(
+        "You must install the extras for this SDK to use this feature. "
+        "Run `pip install assemblyai[extras]` to install the extras. "
+        "Make sure to install `apt install portaudio19-dev` (Debian/Ubuntu) or "
+        "`brew install portaudio` (MacOS) before installing the extras."
+    )
+
+
+class MicrophoneStream:
+    def __init__(
+        self,
+        sample_rate: int = 44_100,
+    ):
+        """
+        Creates a stream of audio from the microphone.
+
+        Args:
+            chunk_size: The size of each chunk of audio to read from the microphone.
+            channels: The number of channels to record audio from.
+            sample_rate: The sample rate to record audio at.
+        """
+
+        self._pyaudio = pyaudio.PyAudio()
+        self.sample_rate = sample_rate
+
+        self._chunk_size = int(self.sample_rate * 0.1)
+        self._stream = self._pyaudio.open(
+            format=pyaudio.paInt16,
+            channels=1,
+            rate=sample_rate,
+            input=True,
+            frames_per_buffer=self._chunk_size,
+        )
+
+        self._open = True
+
+    def __iter__(self):
+        """
+        Returns the iterator object.
+        """
+
+        return self
+
+    def __next__(self):
+        """
+        Reads a chunk of audio from the microphone.
+        """
+        if not self._open:
+            raise StopIteration
+
+        try:
+            return self._stream.read(self._chunk_size)
+        except KeyboardInterrupt:
+            raise StopIteration
+
+    def close(self):
+        """
+        Closes the stream.
+        """
+
+        self._open = False
+
+        if self._stream.is_active():
+            self._stream.stop_stream()
+
+        self._stream.close()
+        self._pyaudio.terminate()
+
+
+def stream_file(
+    filepath: str,
+    sample_rate: int,
+) -> Generator[bytes, None, None]:
+    """
+    Mimics a stream of audio data by reading it chunk by chunk from a file.
+
+    NOTE: Only supports WAV/PCM16 files as of now.
+
+    Args:
+        filepath: The path to the file to stream.
+        sample_rate: The sample rate of the audio file.
+
+    Returns: A generator that yields chunks of audio data.
+    """
+
+    with open(filepath, "rb") as f:
+        while True:
+            data = f.read(int(sample_rate * 0.30) * 2)
+            enough_data = ((len(data) / (16 / 8)) / sample_rate) * 1_000
+
+            if not data or enough_data < 300.0:
+                break
+
+            yield data
+
+            time.sleep(0.15)