Skip to content

chore: sync code base with OSS repository #68

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,18 @@ print(transcript.text)
</details>

<details>
<summary>Transcribe from stream</summary>
<summary>Transcribe binary data</summary>

```python
import assemblyai as aai

# Upload binary data.
upload_url = aai.extras.file_from_stream(data)

transcriber = aai.Transcriber()

# Binary data is supported directly:
transcript = transcriber.transcribe(data)

# Or: Upload data separately:
upload_url = transcriber.upload_file(data)
transcript = transcriber.transcribe(upload_url)
```

Expand Down
8 changes: 8 additions & 0 deletions assemblyai/extras.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import time
from typing import BinaryIO, Generator
from warnings import warn

from . import api
from .client import Client
Expand Down Expand Up @@ -116,6 +117,8 @@ def stream_file(

def file_from_stream(data: BinaryIO) -> str:
"""
DeprecationWarning: `file_from_stream()` is deprecated and will be removed in 1.0.0. Use `Transcriber.upload_file()` instead.

Uploads the given stream and returns the uploaded audio url.

This function can be used to transcribe data that's already
Expand All @@ -132,6 +135,11 @@ def file_from_stream(data: BinaryIO) -> str:
Args:
`data`: A file-like object (in binary mode)
"""
warn(
"`file_from_stream()` is deprecated and will be removed in 1.0.0. Use `Transcriber.upload_file()` instead.",
DeprecationWarning,
stacklevel=2,
)
return api.upload_file(
client=Client.get_default().http_client,
audio_file=data,
Expand Down
115 changes: 77 additions & 38 deletions assemblyai/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import time
from typing import (
Any,
BinaryIO,
Callable,
Dict,
Generator,
Expand Down Expand Up @@ -697,6 +698,19 @@ def __init__(
self._client = client
self.config = config

def upload_file(self, data: Union[str, BinaryIO]) -> str:
if isinstance(data, str):
with open(data, "rb") as audio_file:
return api.upload_file(
client=self._client.http_client,
audio_file=audio_file,
)
else:
return api.upload_file(
client=self._client.http_client,
audio_file=data,
)

def transcribe_url(
self,
*,
Expand Down Expand Up @@ -735,26 +749,25 @@ def transcribe_url(
def transcribe_file(
self,
*,
path: str,
data: Union[str, BinaryIO],
config: types.TranscriptionConfig,
poll: bool,
) -> Transcript:
with open(path, "rb") as audio_file:
try:
audio_url = api.upload_file(
client=self._client.http_client,
audio_file=audio_file,
)
except Exception as exc:
return Transcript.from_response(
client=self._client,
response=types.TranscriptResponse(
audio_url=path,
**config.raw.dict(exclude_none=True),
status=types.TranscriptStatus.error,
error=str(exc),
),
)
try:
audio_url = self.upload_file(data)
except OSError:
# If the file cannot be opened, pass it to the user.
raise
except Exception as exc:
return Transcript.from_response(
client=self._client,
response=types.TranscriptResponse(
audio_url=data if isinstance(data, str) else "",
**config.raw.dict(exclude_none=True),
status=types.TranscriptStatus.error,
error=str(exc),
),
)

return self.transcribe_url(
url=audio_url,
Expand All @@ -764,30 +777,30 @@ def transcribe_file(

def transcribe(
self,
data: str,
data: Union[str, BinaryIO],
config: Optional[types.TranscriptionConfig],
poll: bool,
) -> Transcript:
if config is None:
config = self.config

if urlparse(data).scheme in {"http", "https"}:
if isinstance(data, str) and urlparse(data).scheme in {"http", "https"}:
return self.transcribe_url(
url=data,
config=config,
poll=poll,
)

return self.transcribe_file(
path=data,
data=data,
config=config,
poll=poll,
)

def transcribe_group(
self,
*,
data: List[str],
data: List[Union[str, BinaryIO]],
config: Optional[types.TranscriptionConfig],
poll: bool,
) -> TranscriptGroup:
Expand Down Expand Up @@ -889,16 +902,43 @@ def config(self, config: types.TranscriptionConfig) -> None:
"""
self._impl.config = config

def upload_file(self, data: Union[str, BinaryIO]) -> str:
"""
Uploads an audio file which can be specified as local path or binary object.

Args:
`data`: A local file (as path), or a binary object.

Returns: The URL of the uploaded audio file.
"""
return self._impl.upload_file(data=data)

def upload_file_async(
self, data: Union[str, BinaryIO]
) -> concurrent.futures.Future[str]:
"""
Uploads an audio file which can be specified as local path or binary object.

Args:
`data`: A local file (as path), or a binary object.

Returns: The URL of the uploaded audio file.
"""
return self._executor.submit(
self._impl.upload_file,
data=data,
)

def submit(
self,
data: str,
data: Union[str, BinaryIO],
config: Optional[types.TranscriptionConfig] = None,
) -> Transcript:
"""
Submits a transcription job without waiting for its completion.

Args:
data: An URL or a local file (as path)
data: An URL, a local file (as path), or a binary object.
config: Transcription options and features. If `None` is given, the Transcriber's
default configuration will be used.
"""
Expand All @@ -910,14 +950,14 @@ def submit(

def submit_group(
self,
data: List[str],
data: List[Union[str, BinaryIO]],
config: Optional[types.TranscriptionConfig] = None,
) -> TranscriptGroup:
"""
Submits multiple transcription jobs without waiting for their completion.

Args:
data: A list of paths or URLs (can be mixed)
data: A list of local paths, URLs, or binary objects (can be mixed).
config: Transcription options and features. If `None` is given, the Transcriber's
default configuration will be used.
"""
Expand All @@ -929,14 +969,14 @@ def submit_group(

def transcribe(
self,
data: str,
data: Union[str, BinaryIO],
config: Optional[types.TranscriptionConfig] = None,
) -> Transcript:
"""
Transcribes an audio file whose location can be specified via a URL or file path.
Transcribes an audio file which can be specified as local path, URL, or binary object.

Args:
data: An URL or a local file (as path)
data: An URL, a local file (as path), or a binary object.
config: Transcription options and features. If `None` is given, the Transcriber's
default configuration will be used.
"""
Expand All @@ -949,14 +989,14 @@ def transcribe(

def transcribe_async(
self,
data: str,
data: Union[str, BinaryIO],
config: Optional[types.TranscriptionConfig] = None,
) -> concurrent.futures.Future[Transcript]:
"""
Transcribes an audio file whose location can be specified via a URL or file path.
Transcribes an audio file which can be specified as local path, URL, or binary object.

Args:
data: An URL or a local file (as path)
data: An URL, a local file (as path), or a binary object.
config: Transcription options and features. If `None` is given, the Transcriber's
default configuration will be used.
"""
Expand All @@ -970,14 +1010,14 @@ def transcribe_async(

def transcribe_group(
self,
data: List[str],
data: List[Union[str, BinaryIO]],
config: Optional[types.TranscriptionConfig] = None,
) -> TranscriptGroup:
"""
Transcribes a list of files (as paths) or URLs with the given configs.
Transcribes a list of files (as local paths, URLs, or binary objects).

Args:
data: A list of paths or URLs (can be mixed)
data: A list of local paths, URLs, or binary objects (can be mixed).
config: Transcription options and features. If `None` is given, the Transcriber's
default configuration will be used.
"""
Expand All @@ -990,15 +1030,14 @@ def transcribe_group(

def transcribe_group_async(
self,
data: List[str],
data: List[Union[str, BinaryIO]],
config: Optional[types.TranscriptionConfig] = None,
) -> concurrent.futures.Future[TranscriptGroup]:
"""
Transcribes a list of files (as paths) or URLs with the given configs asynchronously
by returning a `concurrent.futures.Future[TranscriptGroup]` object.
Transcribes a list of files (as local paths, URLs, or binary objects) asynchronously.

Args:
data: A list of paths or URLs (can be mixed)
data: A list of local paths, URLs, or binary objects (can be mixed).
config: Transcription options and features. If `None` is given, the Transcriber's
default configuration will be used.
"""
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name="assemblyai",
version="0.25.0",
version="0.26.0",
description="AssemblyAI Python SDK",
author="AssemblyAI",
author_email="[email protected]",
Expand Down
Loading