From 60137e77517a75732c8b3b26437bea43cf5131ab Mon Sep 17 00:00:00 2001 From: Patrick Loeber <98830383+ploeber@users.noreply.github.com> Date: Wed, 25 Oct 2023 16:21:34 -0400 Subject: [PATCH 1/9] fix(python/sdk): Fix README for sentiment analysis example (#2472) GitOrigin-RevId: c2d97c625499679492c8d3c9083a39bc74211c83 --- README.md | 35 +--- assemblyai/lemur.py | 24 +-- assemblyai/transcriber.py | 2 +- assemblyai/types.py | 9 +- setup.py | 2 +- tests/unit/test_auto_chapters.py | 2 +- tests/unit/test_auto_highlights.py | 2 +- tests/unit/test_content_safety.py | 4 +- tests/unit/test_entity_detection.py | 2 +- tests/unit/test_lemur.py | 216 +----------------------- tests/unit/test_realtime_transcriber.py | 1 + tests/unit/test_sentiment_analysis.py | 2 +- tests/unit/test_summarization.py | 9 +- 13 files changed, 28 insertions(+), 282 deletions(-) diff --git a/README.md b/README.md index cf781bc..eb2a7de 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![AssemblyAI Twitter](https://img.shields.io/twitter/follow/AssemblyAI?label=%40AssemblyAI&style=social)](https://twitter.com/AssemblyAI) [![AssemblyAI YouTube](https://img.shields.io/youtube/channel/subscribers/UCtatfZMf-8EkIwASXM4ts0A)](https://www.youtube.com/@AssemblyAI) [![Discord](https://img.shields.io/discord/875120158014853141?logo=discord&label=Discord&link=https%3A%2F%2Fdiscord.com%2Fchannels%2F875120158014853141&style=social) -](https://assemblyai.com/discord) +](https://discord.gg/5aQNZyq3) # AssemblyAI's Python SDK @@ -266,37 +266,6 @@ print(result.response) - -
- Use LeMUR to with Input Text - -```python -import assemblyai as aai - -transcriber = aai.Transcriber() -config = aai.TranscriptionConfig( - speaker_labels=True, -) -transcript = transcriber.transcribe("https://example.org/customer.mp3", config=config) - -# Example converting speaker label utterances into LeMUR input text -text = "" - -for utt in transcript.utterances: - text += f"Speaker {utt.speaker}:\n{utt.text}\n" - -result = aai.Lemur().task( - "You are a helpful coach. Provide an analysis of the transcript " - "and offer areas to improve with exact quotes. Include no preamble. " - "Start with an overall summary then get into the examples with feedback.", - input_text=text -) - -print(result.response) -``` - -
-
Delete data previously sent to LeMUR @@ -524,7 +493,7 @@ transcript = transcriber.transcribe( for entity in transcript.entities: print(entity.text) # i.e. "Dan Gilbert" - print(entity.entity_type) # i.e. EntityType.person + print(entity.type) # i.e. EntityType.person print(f"Timestamp: {entity.start} - {entity.end}") ``` diff --git a/assemblyai/lemur.py b/assemblyai/lemur.py index c81ab34..5debd48 100644 --- a/assemblyai/lemur.py +++ b/assemblyai/lemur.py @@ -14,11 +14,7 @@ def __init__( ) -> None: self._client = client - self._sources = ( - [types.LemurSourceRequest.from_lemur_source(s) for s in sources] - if sources is not None - else [] - ) + self._sources = [types.LemurSourceRequest.from_lemur_source(s) for s in sources] def question( self, @@ -28,7 +24,6 @@ def question( final_model: Optional[types.LemurModel], max_output_size: Optional[int], temperature: Optional[float], - input_text: Optional[str], ) -> types.LemurQuestionResponse: response = api.lemur_question( client=self._client.http_client, @@ -39,7 +34,6 @@ def question( final_model=final_model, max_output_size=max_output_size, temperature=temperature, - input_text=input_text, ), http_timeout=timeout, ) @@ -54,7 +48,6 @@ def summarize( max_output_size: Optional[int], timeout: Optional[float], temperature: Optional[float], - input_text: Optional[str], ) -> types.LemurSummaryResponse: response = api.lemur_summarize( client=self._client.http_client, @@ -65,7 +58,6 @@ def summarize( final_model=final_model, max_output_size=max_output_size, temperature=temperature, - input_text=input_text, ), http_timeout=timeout, ) @@ -80,7 +72,6 @@ def action_items( max_output_size: Optional[int], timeout: Optional[float], temperature: Optional[float], - input_text: Optional[str], ) -> types.LemurActionItemsResponse: response = api.lemur_action_items( client=self._client.http_client, @@ -91,7 +82,6 @@ def action_items( final_model=final_model, max_output_size=max_output_size, temperature=temperature, - input_text=input_text, ), http_timeout=timeout, ) @@ -105,7 +95,6 @@ def task( max_output_size: Optional[int], timeout: Optional[float], temperature: Optional[float], - input_text: Optional[str], ): response = api.lemur_task( client=self._client.http_client, @@ -115,7 +104,6 @@ def task( final_model=final_model, max_output_size=max_output_size, temperature=temperature, - input_text=input_text, ), http_timeout=timeout, ) @@ -133,7 +121,7 @@ class Lemur: def __init__( self, - sources: Optional[List[types.LemurSource]] = None, + sources: List[types.LemurSource], client: Optional[_client.Client] = None, ) -> None: """ @@ -159,7 +147,6 @@ def question( max_output_size: Optional[int] = None, timeout: Optional[float] = None, temperature: Optional[float] = None, - input_text: Optional[str] = None, ) -> types.LemurQuestionResponse: """ Question & Answer allows you to ask free form questions about one or many transcripts. @@ -191,7 +178,6 @@ def question( max_output_size=max_output_size, timeout=timeout, temperature=temperature, - input_text=input_text, ) def summarize( @@ -202,7 +188,6 @@ def summarize( max_output_size: Optional[int] = None, timeout: Optional[float] = None, temperature: Optional[float] = None, - input_text: Optional[str] = None, ) -> types.LemurSummaryResponse: """ Summary allows you to distill a piece of audio into a few impactful sentences. @@ -229,7 +214,6 @@ def summarize( max_output_size=max_output_size, timeout=timeout, temperature=temperature, - input_text=input_text, ) def action_items( @@ -240,7 +224,6 @@ def action_items( max_output_size: Optional[int] = None, timeout: Optional[float] = None, temperature: Optional[float] = None, - input_text: Optional[str] = None, ) -> types.LemurActionItemsResponse: """ Action Items allows you to generate action items from one or many transcripts. @@ -268,7 +251,6 @@ def action_items( max_output_size=max_output_size, timeout=timeout, temperature=temperature, - input_text=input_text, ) def task( @@ -278,7 +260,6 @@ def task( max_output_size: Optional[int] = None, timeout: Optional[float] = None, temperature: Optional[float] = None, - input_text: Optional[str] = None, ) -> types.LemurTaskResponse: """ Task feature allows you to submit a custom prompt to the model. @@ -301,7 +282,6 @@ def task( max_output_size=max_output_size, timeout=timeout, temperature=temperature, - input_text=input_text, ) @classmethod diff --git a/assemblyai/transcriber.py b/assemblyai/transcriber.py index 72f83b3..996471c 100644 --- a/assemblyai/transcriber.py +++ b/assemblyai/transcriber.py @@ -983,7 +983,7 @@ def __init__( client: _client.Client, ) -> None: self._client = client - self._websocket: Optional[websockets.sync.client.ClientConnection] = None + self._websocket: Optional[websockets_client.ClientConnection] = None self._on_open = on_open self._on_data = on_data diff --git a/assemblyai/types.py b/assemblyai/types.py index f34243c..ef10d82 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -835,7 +835,7 @@ def auto_chapters(self, enable: Optional[bool]) -> None: "Enable Auto Chapters." # Validate required params are also set - if enable and self.punctuate is False: + if enable and self.punctuate == False: raise ValueError( "If `auto_chapters` is enabled, then `punctuate` must not be disabled" ) @@ -1146,11 +1146,11 @@ def set_summarize( return self # Validate that required parameters are also set - if self._raw_transcription_config.punctuate is False: + if self._raw_transcription_config.punctuate == False: raise ValueError( "If `summarization` is enabled, then `punctuate` must not be disabled" ) - if self._raw_transcription_config.format_text is False: + if self._raw_transcription_config.format_text == False: raise ValueError( "If `summarization` is enabled, then `format_text` must not be disabled" ) @@ -1666,7 +1666,7 @@ def __init__( """ from . import Transcript - if isinstance(transcript, str): + if type(transcript) == str: transcript = Transcript(transcript_id=transcript) super().__init__(transcript) @@ -1773,7 +1773,6 @@ class BaseLemurRequest(BaseModel): final_model: Optional[LemurModel] max_output_size: Optional[int] temperature: Optional[float] - input_text: Optional[str] class LemurTaskRequest(BaseLemurRequest): diff --git a/setup.py b/setup.py index 84e81c1..5ff7e4f 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="assemblyai", - version="0.20.0", + version="0.18.0", description="AssemblyAI Python SDK", author="AssemblyAI", author_email="engineering.sdk@assemblyai.com", diff --git a/tests/unit/test_auto_chapters.py b/tests/unit/test_auto_chapters.py index e3ba39b..8b03965 100644 --- a/tests/unit/test_auto_chapters.py +++ b/tests/unit/test_auto_chapters.py @@ -68,7 +68,7 @@ def test_auto_chapters_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("auto_chapters") is True + assert request_body.get("auto_chapters") == True # Check that transcript was properly parsed from JSON response assert transcript.error is None diff --git a/tests/unit/test_auto_highlights.py b/tests/unit/test_auto_highlights.py index 4648760..8472faf 100644 --- a/tests/unit/test_auto_highlights.py +++ b/tests/unit/test_auto_highlights.py @@ -64,7 +64,7 @@ def test_auto_highlights_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("auto_highlights") is True + assert request_body.get("auto_highlights") == True # Check that transcript was properly parsed from JSON response assert transcript.error is None diff --git a/tests/unit/test_content_safety.py b/tests/unit/test_content_safety.py index 2dc6fa6..4b76978 100644 --- a/tests/unit/test_content_safety.py +++ b/tests/unit/test_content_safety.py @@ -98,7 +98,7 @@ def test_content_safety_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("content_safety") is True + assert request_body.get("content_safety") == True # Check that transcript was properly parsed from JSON response assert transcript.error is None @@ -202,7 +202,7 @@ def test_content_safety_with_confidence_threshold(httpx_mock: HTTPXMock): ), ) - assert request.get("content_safety") is True + assert request.get("content_safety") == True assert request.get("content_safety_confidence") == confidence diff --git a/tests/unit/test_entity_detection.py b/tests/unit/test_entity_detection.py index 4c44bab..73e1a61 100644 --- a/tests/unit/test_entity_detection.py +++ b/tests/unit/test_entity_detection.py @@ -52,7 +52,7 @@ def test_entity_detection_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("entity_detection") is True + assert request_body.get("entity_detection") == True # Check that transcript was properly parsed from JSON response assert transcript.error is None diff --git a/tests/unit/test_lemur.py b/tests/unit/test_lemur.py index d273112..04ff316 100644 --- a/tests/unit/test_lemur.py +++ b/tests/unit/test_lemur.py @@ -14,7 +14,7 @@ aai.settings.api_key = "test" -def test_lemur_single_question_succeeds_transcript(httpx_mock: HTTPXMock): +def test_lemur_single_question_succeeds(httpx_mock: HTTPXMock): """ Tests whether asking a single question succeeds. """ @@ -64,54 +64,7 @@ def test_lemur_single_question_succeeds_transcript(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_single_question_succeeds_input_text(httpx_mock: HTTPXMock): - """ - Tests whether asking a single question succeeds with input text. - """ - - # create a mock response of a LemurQuestionResponse - mock_lemur_answer = factories.generate_dict_factory( - factories.LemurQuestionResponse - )() - - # we only want to mock one answer - mock_lemur_answer["response"] = [mock_lemur_answer["response"][0]] - - # mock the specific endpoints - httpx_mock.add_response( - url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/question-answer", - status_code=httpx.codes.OK, - method="POST", - json=mock_lemur_answer, - ) - - # prepare the question to be asked - question = aai.LemurQuestion( - question="Which cars do the callers want to buy?", - context="Callers are interested in buying cars", - answer_options=["Toyota", "Honda", "Ford", "Chevrolet"], - ) - # test input_text input - # mimic the usage of the SDK - lemur = aai.Lemur() - result = lemur.question( - question, input_text="This transcript is a test transcript." - ) - - # check whether answer is not a list - assert isinstance(result, aai.LemurQuestionResponse) - - answers = result.response - - # check the response - assert answers[0].question == mock_lemur_answer["response"][0]["question"] - assert answers[0].answer == mock_lemur_answer["response"][0]["answer"] - - # check whether we mocked everything - assert len(httpx_mock.get_requests()) == 1 - - -def test_lemur_multiple_question_succeeds_transcript(httpx_mock: HTTPXMock): +def test_lemur_multiple_question_succeeds(httpx_mock: HTTPXMock): """ Tests whether asking multiple questions succeeds. """ @@ -164,59 +117,6 @@ def test_lemur_multiple_question_succeeds_transcript(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_multiple_question_succeeds_input_text(httpx_mock: HTTPXMock): - """ - Tests whether asking multiple questions succeeds. - """ - - # create a mock response of a LemurQuestionResponse - mock_lemur_answer = factories.generate_dict_factory( - factories.LemurQuestionResponse - )() - - # prepare the questions to be asked - questions = [ - aai.LemurQuestion( - question="Which cars do the callers want to buy?", - ), - aai.LemurQuestion( - question="What price range are the callers looking for?", - ), - ] - - # update the mock questions with the questions - mock_lemur_answer["response"][0]["question"] = questions[0].question - mock_lemur_answer["response"][1]["question"] = questions[1].question - - # mock the specific endpoints - httpx_mock.add_response( - url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/question-answer", - status_code=httpx.codes.OK, - method="POST", - json=mock_lemur_answer, - ) - - # test input_text input - # mimic the usage of the SDK - lemur = aai.Lemur() - result = lemur.question( - questions, input_text="This transcript is a test transcript." - ) - assert isinstance(result, aai.LemurQuestionResponse) - - answers = result.response - # check whether answers is a list - assert isinstance(answers, list) - - # check the response - for idx, answer in enumerate(answers): - assert answer.question == mock_lemur_answer["response"][idx]["question"] - assert answer.answer == mock_lemur_answer["response"][idx]["answer"] - - # check whether we mocked everything - assert len(httpx_mock.get_requests()) == 1 - - def test_lemur_question_fails(httpx_mock: HTTPXMock): """ Tests whether asking a question fails. @@ -249,7 +149,7 @@ def test_lemur_question_fails(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_summarize_succeeds_transcript(httpx_mock: HTTPXMock): +def test_lemur_summarize_succeeds(httpx_mock: HTTPXMock): """ Tests whether summarizing a transcript via LeMUR succeeds. """ @@ -284,41 +184,6 @@ def test_lemur_summarize_succeeds_transcript(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_summarize_succeeds_input_text(httpx_mock: HTTPXMock): - """ - Tests whether summarizing a transcript via LeMUR succeeds with input text. - """ - - # create a mock response of a LemurSummaryResponse - mock_lemur_summary = factories.generate_dict_factory( - factories.LemurSummaryResponse - )() - - # mock the specific endpoints - httpx_mock.add_response( - url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/summary", - status_code=httpx.codes.OK, - method="POST", - json=mock_lemur_summary, - ) - - # test input_text input - lemur = aai.Lemur() - result = lemur.summarize( - context="Callers asking for cars", answer_format="TLDR", input_text="Test test" - ) - - assert isinstance(result, aai.LemurSummaryResponse) - - summary = result.response - - # check the response - assert summary == mock_lemur_summary["response"] - - # check whether we mocked everything - assert len(httpx_mock.get_requests()) == 1 - - def test_lemur_summarize_fails(httpx_mock: HTTPXMock): """ Tests whether summarizing a transcript via LeMUR fails. @@ -344,7 +209,7 @@ def test_lemur_summarize_fails(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_action_items_succeeds_transcript(httpx_mock: HTTPXMock): +def test_lemur_action_items_succeeds(httpx_mock: HTTPXMock): """ Tests whether generating action items for a transcript via LeMUR succeeds. """ @@ -382,43 +247,6 @@ def test_lemur_action_items_succeeds_transcript(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_action_items_succeeds_input_text(httpx_mock: HTTPXMock): - """ - Tests whether generating action items for a transcript via LeMUR succeeds. - """ - - # create a mock response of a LemurActionItemsResponse - mock_lemur_action_items = factories.generate_dict_factory( - factories.LemurActionItemsResponse - )() - - # mock the specific endpoints - httpx_mock.add_response( - url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/action-items", - status_code=httpx.codes.OK, - method="POST", - json=mock_lemur_action_items, - ) - - # test input_text input - lemur = aai.Lemur() - result = lemur.action_items( - context="Customers asking for help with resolving their problem", - answer_format="Three bullet points", - input_text="Test test", - ) - - assert isinstance(result, aai.LemurActionItemsResponse) - - action_items = result.response - - # check the response - assert action_items == mock_lemur_action_items["response"] - - # check whether we mocked everything - assert len(httpx_mock.get_requests()) == 1 - - def test_lemur_action_items_fails(httpx_mock: HTTPXMock): """ Tests whether generating action items for a transcript via LeMUR fails. @@ -447,7 +275,7 @@ def test_lemur_action_items_fails(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_task_succeeds_transcript(httpx_mock: HTTPXMock): +def test_lemur_task_succeeds(httpx_mock: HTTPXMock): """ Tests whether creating a task request succeeds. """ @@ -482,38 +310,6 @@ def test_lemur_task_succeeds_transcript(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_task_succeeds_input_text(httpx_mock: HTTPXMock): - """ - Tests whether creating a task request succeeds. - """ - - # create a mock response of a LemurSummaryResponse - mock_lemur_task_response = factories.generate_dict_factory( - factories.LemurTaskResponse - )() - - # mock the specific endpoints - httpx_mock.add_response( - url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/task", - status_code=httpx.codes.OK, - method="POST", - json=mock_lemur_task_response, - ) - # test input_text input - lemur = aai.Lemur() - result = lemur.task( - prompt="Create action items of the meeting", input_text="Test test" - ) - - # check the response - assert isinstance(result, aai.LemurTaskResponse) - - assert result.response == mock_lemur_task_response["response"] - - # check whether we mocked everything - assert len(httpx_mock.get_requests()) == 1 - - def test_lemur_ask_coach_fails(httpx_mock: HTTPXMock): """ Tests whether creating a task request fails. @@ -589,7 +385,7 @@ def test_lemur_purge_request_data_fails(httpx_mock: HTTPXMock): json=mock_lemur_purge_response, ) - with pytest.raises(aai.LemurError): + with pytest.raises(aai.LemurError) as error: aai.Lemur.purge_request_data(mock_request_id) assert len(httpx_mock.get_requests()) == 1 diff --git a/tests/unit/test_realtime_transcriber.py b/tests/unit/test_realtime_transcriber.py index 64d3f20..dbd3fbd 100644 --- a/tests/unit/test_realtime_transcriber.py +++ b/tests/unit/test_realtime_transcriber.py @@ -1,6 +1,7 @@ import datetime import json import uuid +from typing import Optional from unittest.mock import MagicMock from urllib.parse import urlencode diff --git a/tests/unit/test_sentiment_analysis.py b/tests/unit/test_sentiment_analysis.py index e8fdfd9..aeeda95 100644 --- a/tests/unit/test_sentiment_analysis.py +++ b/tests/unit/test_sentiment_analysis.py @@ -47,7 +47,7 @@ def test_sentiment_analysis_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("sentiment_analysis") is True + assert request_body.get("sentiment_analysis") == True # Check that transcript was properly parsed from JSON response assert transcript.error is None diff --git a/tests/unit/test_summarization.py b/tests/unit/test_summarization.py index 020487f..53d5e55 100644 --- a/tests/unit/test_summarization.py +++ b/tests/unit/test_summarization.py @@ -5,6 +5,7 @@ import tests.unit.factories as factories import tests.unit.unit_test_utils as test_utils import assemblyai as aai +from tests.unit import factories aai.settings.api_key = "test" @@ -73,9 +74,9 @@ def test_default_summarization_params(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("summarization") is True - assert request_body.get("summary_model") is None - assert request_body.get("summary_type") is None + assert request_body.get("summarization") == True + assert request_body.get("summary_model") == None + assert request_body.get("summary_type") == None # Check that transcript was properly parsed from JSON response assert transcript.error is None @@ -105,7 +106,7 @@ def test_summarization_with_params(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("summarization") is True + assert request_body.get("summarization") == True assert request_body.get("summary_model") == summary_model assert request_body.get("summary_type") == summary_type From 14a0cea3e74ff7823fa37a28eb4c6ab14face6a2 Mon Sep 17 00:00:00 2001 From: Patrick Loeber <98830383+ploeber@users.noreply.github.com> Date: Wed, 25 Oct 2023 17:16:09 -0400 Subject: [PATCH 2/9] fix(python/sdk): Fix README for Entity Detection example (#2489) GitOrigin-RevId: 19545c51d4272d32010ae22b288014f0b23e9cde --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eb2a7de..8086e3b 100644 --- a/README.md +++ b/README.md @@ -493,7 +493,7 @@ transcript = transcriber.transcribe( for entity in transcript.entities: print(entity.text) # i.e. "Dan Gilbert" - print(entity.type) # i.e. EntityType.person + print(entity.entity_type) # i.e. EntityType.person print(f"Timestamp: {entity.start} - {entity.end}") ``` From 65d6ac323307b9e0fc37e5af1e62762bd9b67b42 Mon Sep 17 00:00:00 2001 From: Patrick Loeber <98830383+ploeber@users.noreply.github.com> Date: Tue, 7 Nov 2023 11:15:03 +0100 Subject: [PATCH 3/9] style(python/sdk): Fix linting in Python SDK (#2636) GitOrigin-RevId: 675799264e2de79f8c4259a16d3e859c668f571a --- assemblyai/transcriber.py | 2 +- assemblyai/types.py | 8 ++++---- tests/unit/test_auto_chapters.py | 2 +- tests/unit/test_auto_highlights.py | 2 +- tests/unit/test_content_safety.py | 4 ++-- tests/unit/test_entity_detection.py | 2 +- tests/unit/test_lemur.py | 2 +- tests/unit/test_realtime_transcriber.py | 1 - tests/unit/test_sentiment_analysis.py | 2 +- tests/unit/test_summarization.py | 9 ++++----- 10 files changed, 16 insertions(+), 18 deletions(-) diff --git a/assemblyai/transcriber.py b/assemblyai/transcriber.py index 996471c..72f83b3 100644 --- a/assemblyai/transcriber.py +++ b/assemblyai/transcriber.py @@ -983,7 +983,7 @@ def __init__( client: _client.Client, ) -> None: self._client = client - self._websocket: Optional[websockets_client.ClientConnection] = None + self._websocket: Optional[websockets.sync.client.ClientConnection] = None self._on_open = on_open self._on_data = on_data diff --git a/assemblyai/types.py b/assemblyai/types.py index ef10d82..d913a11 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -835,7 +835,7 @@ def auto_chapters(self, enable: Optional[bool]) -> None: "Enable Auto Chapters." # Validate required params are also set - if enable and self.punctuate == False: + if enable and self.punctuate is False: raise ValueError( "If `auto_chapters` is enabled, then `punctuate` must not be disabled" ) @@ -1146,11 +1146,11 @@ def set_summarize( return self # Validate that required parameters are also set - if self._raw_transcription_config.punctuate == False: + if self._raw_transcription_config.punctuate is False: raise ValueError( "If `summarization` is enabled, then `punctuate` must not be disabled" ) - if self._raw_transcription_config.format_text == False: + if self._raw_transcription_config.format_text is False: raise ValueError( "If `summarization` is enabled, then `format_text` must not be disabled" ) @@ -1666,7 +1666,7 @@ def __init__( """ from . import Transcript - if type(transcript) == str: + if isinstance(transcript, str): transcript = Transcript(transcript_id=transcript) super().__init__(transcript) diff --git a/tests/unit/test_auto_chapters.py b/tests/unit/test_auto_chapters.py index 8b03965..e3ba39b 100644 --- a/tests/unit/test_auto_chapters.py +++ b/tests/unit/test_auto_chapters.py @@ -68,7 +68,7 @@ def test_auto_chapters_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("auto_chapters") == True + assert request_body.get("auto_chapters") is True # Check that transcript was properly parsed from JSON response assert transcript.error is None diff --git a/tests/unit/test_auto_highlights.py b/tests/unit/test_auto_highlights.py index 8472faf..4648760 100644 --- a/tests/unit/test_auto_highlights.py +++ b/tests/unit/test_auto_highlights.py @@ -64,7 +64,7 @@ def test_auto_highlights_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("auto_highlights") == True + assert request_body.get("auto_highlights") is True # Check that transcript was properly parsed from JSON response assert transcript.error is None diff --git a/tests/unit/test_content_safety.py b/tests/unit/test_content_safety.py index 4b76978..2dc6fa6 100644 --- a/tests/unit/test_content_safety.py +++ b/tests/unit/test_content_safety.py @@ -98,7 +98,7 @@ def test_content_safety_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("content_safety") == True + assert request_body.get("content_safety") is True # Check that transcript was properly parsed from JSON response assert transcript.error is None @@ -202,7 +202,7 @@ def test_content_safety_with_confidence_threshold(httpx_mock: HTTPXMock): ), ) - assert request.get("content_safety") == True + assert request.get("content_safety") is True assert request.get("content_safety_confidence") == confidence diff --git a/tests/unit/test_entity_detection.py b/tests/unit/test_entity_detection.py index 73e1a61..4c44bab 100644 --- a/tests/unit/test_entity_detection.py +++ b/tests/unit/test_entity_detection.py @@ -52,7 +52,7 @@ def test_entity_detection_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("entity_detection") == True + assert request_body.get("entity_detection") is True # Check that transcript was properly parsed from JSON response assert transcript.error is None diff --git a/tests/unit/test_lemur.py b/tests/unit/test_lemur.py index 04ff316..0d753a6 100644 --- a/tests/unit/test_lemur.py +++ b/tests/unit/test_lemur.py @@ -385,7 +385,7 @@ def test_lemur_purge_request_data_fails(httpx_mock: HTTPXMock): json=mock_lemur_purge_response, ) - with pytest.raises(aai.LemurError) as error: + with pytest.raises(aai.LemurError): aai.Lemur.purge_request_data(mock_request_id) assert len(httpx_mock.get_requests()) == 1 diff --git a/tests/unit/test_realtime_transcriber.py b/tests/unit/test_realtime_transcriber.py index dbd3fbd..64d3f20 100644 --- a/tests/unit/test_realtime_transcriber.py +++ b/tests/unit/test_realtime_transcriber.py @@ -1,7 +1,6 @@ import datetime import json import uuid -from typing import Optional from unittest.mock import MagicMock from urllib.parse import urlencode diff --git a/tests/unit/test_sentiment_analysis.py b/tests/unit/test_sentiment_analysis.py index aeeda95..e8fdfd9 100644 --- a/tests/unit/test_sentiment_analysis.py +++ b/tests/unit/test_sentiment_analysis.py @@ -47,7 +47,7 @@ def test_sentiment_analysis_enabled(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("sentiment_analysis") == True + assert request_body.get("sentiment_analysis") is True # Check that transcript was properly parsed from JSON response assert transcript.error is None diff --git a/tests/unit/test_summarization.py b/tests/unit/test_summarization.py index 53d5e55..020487f 100644 --- a/tests/unit/test_summarization.py +++ b/tests/unit/test_summarization.py @@ -5,7 +5,6 @@ import tests.unit.factories as factories import tests.unit.unit_test_utils as test_utils import assemblyai as aai -from tests.unit import factories aai.settings.api_key = "test" @@ -74,9 +73,9 @@ def test_default_summarization_params(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("summarization") == True - assert request_body.get("summary_model") == None - assert request_body.get("summary_type") == None + assert request_body.get("summarization") is True + assert request_body.get("summary_model") is None + assert request_body.get("summary_type") is None # Check that transcript was properly parsed from JSON response assert transcript.error is None @@ -106,7 +105,7 @@ def test_summarization_with_params(httpx_mock: HTTPXMock): ) # Check that request body was properly defined - assert request_body.get("summarization") == True + assert request_body.get("summarization") is True assert request_body.get("summary_model") == summary_model assert request_body.get("summary_type") == summary_type From 90be20bca6965338447c85a7e818959e17793652 Mon Sep 17 00:00:00 2001 From: Niels Swimberghe <3382717+Swimburger@users.noreply.github.com> Date: Wed, 15 Nov 2023 11:51:31 -0500 Subject: [PATCH 4/9] docs(*): Update Discord link in READMEs (#2773) GitOrigin-RevId: 2d85e3abd579b7270166fe9faaf88f534e9487da --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8086e3b..211f848 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![AssemblyAI Twitter](https://img.shields.io/twitter/follow/AssemblyAI?label=%40AssemblyAI&style=social)](https://twitter.com/AssemblyAI) [![AssemblyAI YouTube](https://img.shields.io/youtube/channel/subscribers/UCtatfZMf-8EkIwASXM4ts0A)](https://www.youtube.com/@AssemblyAI) [![Discord](https://img.shields.io/discord/875120158014853141?logo=discord&label=Discord&link=https%3A%2F%2Fdiscord.com%2Fchannels%2F875120158014853141&style=social) -](https://discord.gg/5aQNZyq3) +](https://assemblyai.com/discord) # AssemblyAI's Python SDK From ca111f884605333ffb3bfb9c2cde966ac30961a9 Mon Sep 17 00:00:00 2001 From: Justin Hazen <102247715+jhazenaai@users.noreply.github.com> Date: Mon, 20 Nov 2023 09:20:42 -0700 Subject: [PATCH 5/9] feat(sdk): allow input text into the python sdk (#2738) GitOrigin-RevId: 9c36a4ad2fa7bd831920145ecf1c29b16765cc11 --- README.md | 31 ++++++ assemblyai/lemur.py | 24 ++++- assemblyai/types.py | 1 + tests/unit/test_lemur.py | 214 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 263 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 211f848..cf781bc 100644 --- a/README.md +++ b/README.md @@ -266,6 +266,37 @@ print(result.response)
+ +
+ Use LeMUR to with Input Text + +```python +import assemblyai as aai + +transcriber = aai.Transcriber() +config = aai.TranscriptionConfig( + speaker_labels=True, +) +transcript = transcriber.transcribe("https://example.org/customer.mp3", config=config) + +# Example converting speaker label utterances into LeMUR input text +text = "" + +for utt in transcript.utterances: + text += f"Speaker {utt.speaker}:\n{utt.text}\n" + +result = aai.Lemur().task( + "You are a helpful coach. Provide an analysis of the transcript " + "and offer areas to improve with exact quotes. Include no preamble. " + "Start with an overall summary then get into the examples with feedback.", + input_text=text +) + +print(result.response) +``` + +
+
Delete data previously sent to LeMUR diff --git a/assemblyai/lemur.py b/assemblyai/lemur.py index 5debd48..c81ab34 100644 --- a/assemblyai/lemur.py +++ b/assemblyai/lemur.py @@ -14,7 +14,11 @@ def __init__( ) -> None: self._client = client - self._sources = [types.LemurSourceRequest.from_lemur_source(s) for s in sources] + self._sources = ( + [types.LemurSourceRequest.from_lemur_source(s) for s in sources] + if sources is not None + else [] + ) def question( self, @@ -24,6 +28,7 @@ def question( final_model: Optional[types.LemurModel], max_output_size: Optional[int], temperature: Optional[float], + input_text: Optional[str], ) -> types.LemurQuestionResponse: response = api.lemur_question( client=self._client.http_client, @@ -34,6 +39,7 @@ def question( final_model=final_model, max_output_size=max_output_size, temperature=temperature, + input_text=input_text, ), http_timeout=timeout, ) @@ -48,6 +54,7 @@ def summarize( max_output_size: Optional[int], timeout: Optional[float], temperature: Optional[float], + input_text: Optional[str], ) -> types.LemurSummaryResponse: response = api.lemur_summarize( client=self._client.http_client, @@ -58,6 +65,7 @@ def summarize( final_model=final_model, max_output_size=max_output_size, temperature=temperature, + input_text=input_text, ), http_timeout=timeout, ) @@ -72,6 +80,7 @@ def action_items( max_output_size: Optional[int], timeout: Optional[float], temperature: Optional[float], + input_text: Optional[str], ) -> types.LemurActionItemsResponse: response = api.lemur_action_items( client=self._client.http_client, @@ -82,6 +91,7 @@ def action_items( final_model=final_model, max_output_size=max_output_size, temperature=temperature, + input_text=input_text, ), http_timeout=timeout, ) @@ -95,6 +105,7 @@ def task( max_output_size: Optional[int], timeout: Optional[float], temperature: Optional[float], + input_text: Optional[str], ): response = api.lemur_task( client=self._client.http_client, @@ -104,6 +115,7 @@ def task( final_model=final_model, max_output_size=max_output_size, temperature=temperature, + input_text=input_text, ), http_timeout=timeout, ) @@ -121,7 +133,7 @@ class Lemur: def __init__( self, - sources: List[types.LemurSource], + sources: Optional[List[types.LemurSource]] = None, client: Optional[_client.Client] = None, ) -> None: """ @@ -147,6 +159,7 @@ def question( max_output_size: Optional[int] = None, timeout: Optional[float] = None, temperature: Optional[float] = None, + input_text: Optional[str] = None, ) -> types.LemurQuestionResponse: """ Question & Answer allows you to ask free form questions about one or many transcripts. @@ -178,6 +191,7 @@ def question( max_output_size=max_output_size, timeout=timeout, temperature=temperature, + input_text=input_text, ) def summarize( @@ -188,6 +202,7 @@ def summarize( max_output_size: Optional[int] = None, timeout: Optional[float] = None, temperature: Optional[float] = None, + input_text: Optional[str] = None, ) -> types.LemurSummaryResponse: """ Summary allows you to distill a piece of audio into a few impactful sentences. @@ -214,6 +229,7 @@ def summarize( max_output_size=max_output_size, timeout=timeout, temperature=temperature, + input_text=input_text, ) def action_items( @@ -224,6 +240,7 @@ def action_items( max_output_size: Optional[int] = None, timeout: Optional[float] = None, temperature: Optional[float] = None, + input_text: Optional[str] = None, ) -> types.LemurActionItemsResponse: """ Action Items allows you to generate action items from one or many transcripts. @@ -251,6 +268,7 @@ def action_items( max_output_size=max_output_size, timeout=timeout, temperature=temperature, + input_text=input_text, ) def task( @@ -260,6 +278,7 @@ def task( max_output_size: Optional[int] = None, timeout: Optional[float] = None, temperature: Optional[float] = None, + input_text: Optional[str] = None, ) -> types.LemurTaskResponse: """ Task feature allows you to submit a custom prompt to the model. @@ -282,6 +301,7 @@ def task( max_output_size=max_output_size, timeout=timeout, temperature=temperature, + input_text=input_text, ) @classmethod diff --git a/assemblyai/types.py b/assemblyai/types.py index d913a11..f34243c 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -1773,6 +1773,7 @@ class BaseLemurRequest(BaseModel): final_model: Optional[LemurModel] max_output_size: Optional[int] temperature: Optional[float] + input_text: Optional[str] class LemurTaskRequest(BaseLemurRequest): diff --git a/tests/unit/test_lemur.py b/tests/unit/test_lemur.py index 0d753a6..d273112 100644 --- a/tests/unit/test_lemur.py +++ b/tests/unit/test_lemur.py @@ -14,7 +14,7 @@ aai.settings.api_key = "test" -def test_lemur_single_question_succeeds(httpx_mock: HTTPXMock): +def test_lemur_single_question_succeeds_transcript(httpx_mock: HTTPXMock): """ Tests whether asking a single question succeeds. """ @@ -64,7 +64,54 @@ def test_lemur_single_question_succeeds(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_multiple_question_succeeds(httpx_mock: HTTPXMock): +def test_lemur_single_question_succeeds_input_text(httpx_mock: HTTPXMock): + """ + Tests whether asking a single question succeeds with input text. + """ + + # create a mock response of a LemurQuestionResponse + mock_lemur_answer = factories.generate_dict_factory( + factories.LemurQuestionResponse + )() + + # we only want to mock one answer + mock_lemur_answer["response"] = [mock_lemur_answer["response"][0]] + + # mock the specific endpoints + httpx_mock.add_response( + url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/question-answer", + status_code=httpx.codes.OK, + method="POST", + json=mock_lemur_answer, + ) + + # prepare the question to be asked + question = aai.LemurQuestion( + question="Which cars do the callers want to buy?", + context="Callers are interested in buying cars", + answer_options=["Toyota", "Honda", "Ford", "Chevrolet"], + ) + # test input_text input + # mimic the usage of the SDK + lemur = aai.Lemur() + result = lemur.question( + question, input_text="This transcript is a test transcript." + ) + + # check whether answer is not a list + assert isinstance(result, aai.LemurQuestionResponse) + + answers = result.response + + # check the response + assert answers[0].question == mock_lemur_answer["response"][0]["question"] + assert answers[0].answer == mock_lemur_answer["response"][0]["answer"] + + # check whether we mocked everything + assert len(httpx_mock.get_requests()) == 1 + + +def test_lemur_multiple_question_succeeds_transcript(httpx_mock: HTTPXMock): """ Tests whether asking multiple questions succeeds. """ @@ -117,6 +164,59 @@ def test_lemur_multiple_question_succeeds(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 +def test_lemur_multiple_question_succeeds_input_text(httpx_mock: HTTPXMock): + """ + Tests whether asking multiple questions succeeds. + """ + + # create a mock response of a LemurQuestionResponse + mock_lemur_answer = factories.generate_dict_factory( + factories.LemurQuestionResponse + )() + + # prepare the questions to be asked + questions = [ + aai.LemurQuestion( + question="Which cars do the callers want to buy?", + ), + aai.LemurQuestion( + question="What price range are the callers looking for?", + ), + ] + + # update the mock questions with the questions + mock_lemur_answer["response"][0]["question"] = questions[0].question + mock_lemur_answer["response"][1]["question"] = questions[1].question + + # mock the specific endpoints + httpx_mock.add_response( + url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/question-answer", + status_code=httpx.codes.OK, + method="POST", + json=mock_lemur_answer, + ) + + # test input_text input + # mimic the usage of the SDK + lemur = aai.Lemur() + result = lemur.question( + questions, input_text="This transcript is a test transcript." + ) + assert isinstance(result, aai.LemurQuestionResponse) + + answers = result.response + # check whether answers is a list + assert isinstance(answers, list) + + # check the response + for idx, answer in enumerate(answers): + assert answer.question == mock_lemur_answer["response"][idx]["question"] + assert answer.answer == mock_lemur_answer["response"][idx]["answer"] + + # check whether we mocked everything + assert len(httpx_mock.get_requests()) == 1 + + def test_lemur_question_fails(httpx_mock: HTTPXMock): """ Tests whether asking a question fails. @@ -149,7 +249,7 @@ def test_lemur_question_fails(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_summarize_succeeds(httpx_mock: HTTPXMock): +def test_lemur_summarize_succeeds_transcript(httpx_mock: HTTPXMock): """ Tests whether summarizing a transcript via LeMUR succeeds. """ @@ -184,6 +284,41 @@ def test_lemur_summarize_succeeds(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 +def test_lemur_summarize_succeeds_input_text(httpx_mock: HTTPXMock): + """ + Tests whether summarizing a transcript via LeMUR succeeds with input text. + """ + + # create a mock response of a LemurSummaryResponse + mock_lemur_summary = factories.generate_dict_factory( + factories.LemurSummaryResponse + )() + + # mock the specific endpoints + httpx_mock.add_response( + url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/summary", + status_code=httpx.codes.OK, + method="POST", + json=mock_lemur_summary, + ) + + # test input_text input + lemur = aai.Lemur() + result = lemur.summarize( + context="Callers asking for cars", answer_format="TLDR", input_text="Test test" + ) + + assert isinstance(result, aai.LemurSummaryResponse) + + summary = result.response + + # check the response + assert summary == mock_lemur_summary["response"] + + # check whether we mocked everything + assert len(httpx_mock.get_requests()) == 1 + + def test_lemur_summarize_fails(httpx_mock: HTTPXMock): """ Tests whether summarizing a transcript via LeMUR fails. @@ -209,7 +344,7 @@ def test_lemur_summarize_fails(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_action_items_succeeds(httpx_mock: HTTPXMock): +def test_lemur_action_items_succeeds_transcript(httpx_mock: HTTPXMock): """ Tests whether generating action items for a transcript via LeMUR succeeds. """ @@ -247,6 +382,43 @@ def test_lemur_action_items_succeeds(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 +def test_lemur_action_items_succeeds_input_text(httpx_mock: HTTPXMock): + """ + Tests whether generating action items for a transcript via LeMUR succeeds. + """ + + # create a mock response of a LemurActionItemsResponse + mock_lemur_action_items = factories.generate_dict_factory( + factories.LemurActionItemsResponse + )() + + # mock the specific endpoints + httpx_mock.add_response( + url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/action-items", + status_code=httpx.codes.OK, + method="POST", + json=mock_lemur_action_items, + ) + + # test input_text input + lemur = aai.Lemur() + result = lemur.action_items( + context="Customers asking for help with resolving their problem", + answer_format="Three bullet points", + input_text="Test test", + ) + + assert isinstance(result, aai.LemurActionItemsResponse) + + action_items = result.response + + # check the response + assert action_items == mock_lemur_action_items["response"] + + # check whether we mocked everything + assert len(httpx_mock.get_requests()) == 1 + + def test_lemur_action_items_fails(httpx_mock: HTTPXMock): """ Tests whether generating action items for a transcript via LeMUR fails. @@ -275,7 +447,7 @@ def test_lemur_action_items_fails(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 -def test_lemur_task_succeeds(httpx_mock: HTTPXMock): +def test_lemur_task_succeeds_transcript(httpx_mock: HTTPXMock): """ Tests whether creating a task request succeeds. """ @@ -310,6 +482,38 @@ def test_lemur_task_succeeds(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 +def test_lemur_task_succeeds_input_text(httpx_mock: HTTPXMock): + """ + Tests whether creating a task request succeeds. + """ + + # create a mock response of a LemurSummaryResponse + mock_lemur_task_response = factories.generate_dict_factory( + factories.LemurTaskResponse + )() + + # mock the specific endpoints + httpx_mock.add_response( + url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/task", + status_code=httpx.codes.OK, + method="POST", + json=mock_lemur_task_response, + ) + # test input_text input + lemur = aai.Lemur() + result = lemur.task( + prompt="Create action items of the meeting", input_text="Test test" + ) + + # check the response + assert isinstance(result, aai.LemurTaskResponse) + + assert result.response == mock_lemur_task_response["response"] + + # check whether we mocked everything + assert len(httpx_mock.get_requests()) == 1 + + def test_lemur_ask_coach_fails(httpx_mock: HTTPXMock): """ Tests whether creating a task request fails. From 0585fad943434b3cb9c68bca03aa9249fa5d249b Mon Sep 17 00:00:00 2001 From: Robert McHardy Date: Tue, 21 Nov 2023 12:17:51 +0000 Subject: [PATCH 6/9] ci: replace black/isort by ruff (#2833) Co-authored-by: Robert McHardy GitOrigin-RevId: cb5146ac07c6bb87458c778eafc00ae1ef2c3b7a --- assemblyai/lemur.py | 3 +-- assemblyai/transcriber.py | 3 +-- tests/unit/test_summarization.py | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/assemblyai/lemur.py b/assemblyai/lemur.py index c81ab34..66b9451 100644 --- a/assemblyai/lemur.py +++ b/assemblyai/lemur.py @@ -1,8 +1,7 @@ from typing import Any, Dict, List, Optional, Union -from . import api +from . import api, types from . import client as _client -from . import types class _LemurImpl: diff --git a/assemblyai/transcriber.py b/assemblyai/transcriber.py index 72f83b3..82ca638 100644 --- a/assemblyai/transcriber.py +++ b/assemblyai/transcriber.py @@ -27,9 +27,8 @@ from typing_extensions import Self from websockets.sync.client import connect as websocket_connect -from . import api +from . import api, lemur, types from . import client as _client -from . import lemur, types class _TranscriptImpl: diff --git a/tests/unit/test_summarization.py b/tests/unit/test_summarization.py index 020487f..33966a1 100644 --- a/tests/unit/test_summarization.py +++ b/tests/unit/test_summarization.py @@ -26,7 +26,8 @@ def test_summarization_fails_without_required_field( httpx_mock, {}, config=aai.TranscriptionConfig( - summarization=True, **{required_field: False} # type: ignore + summarization=True, + **{required_field: False}, # type: ignore ), ) From bee3f3691c49f01ff45c72442972fa1ae43f2f0f Mon Sep 17 00:00:00 2001 From: Justin Hazen <102247715+jhazenaai@users.noreply.github.com> Date: Wed, 22 Nov 2023 13:31:53 -0700 Subject: [PATCH 7/9] fix(python/sdk): add docstrings for input_text (#2856) GitOrigin-RevId: 7b804d89828e88ad1d56ae94f0ec2929bb899d5b --- assemblyai/lemur.py | 4 ++++ setup.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/assemblyai/lemur.py b/assemblyai/lemur.py index 66b9451..1d91d4f 100644 --- a/assemblyai/lemur.py +++ b/assemblyai/lemur.py @@ -176,6 +176,7 @@ def question( max_output_size: Max output size in tokens timeout: The timeout in seconds to wait for the answer(s). temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic. + input_text: Custom formatted transcript data. Use instead of transcript_ids. Returns: One or a list of answer objects. """ @@ -217,6 +218,7 @@ def summarize( max_output_size: Max output size in tokens timeout: The timeout in seconds to wait for the summary. temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic. + input_text: Custom formatted transcript data. Use instead of transcript_ids. Returns: The summary as a string. """ @@ -256,6 +258,7 @@ def action_items( max_output_size: Max output size in tokens timeout: The timeout in seconds to wait for the action items response. temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic. + input_text: Custom formatted transcript data. Use instead of transcript_ids. Returns: The action items as a string. """ @@ -290,6 +293,7 @@ def task( max_output_size: Max output size in tokens timeout: The timeout in seconds to wait for the task. temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic. + input_text: Custom formatted transcript data. Use instead of transcript_ids. Returns: A response to a question or task submitted via custom prompt (with source transcripts or other sources taken into the context) """ diff --git a/setup.py b/setup.py index 5ff7e4f..e7d5069 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="assemblyai", - version="0.18.0", + version="0.20.1", description="AssemblyAI Python SDK", author="AssemblyAI", author_email="engineering.sdk@assemblyai.com", From d97cbcf99e415266d0559b52eb0184e0c4d819a7 Mon Sep 17 00:00:00 2001 From: Martin Schweiger <34636718+m-ods@users.noreply.github.com> Date: Mon, 4 Dec 2023 08:46:14 +0800 Subject: [PATCH 8/9] fix(python/sdk): add fields to Sentence/Paragraph/Word models (#2930) GitOrigin-RevId: 389efe93eee0d3a10cef95cadf31f91971c695c5 --- assemblyai/types.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/assemblyai/types.py b/assemblyai/types.py index f34243c..0554ae2 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -1247,6 +1247,7 @@ class Word(BaseModel): start: int end: int confidence: float + speaker: Optional[str] class UtteranceWord(Word): @@ -1382,6 +1383,10 @@ class RedactedAudioResponse(BaseModel): class Sentence(Word): words: List[Word] + start: int + end: int + confidence: int + speaker: Optional[str] class SentencesResponse(BaseModel): @@ -1392,6 +1397,10 @@ class SentencesResponse(BaseModel): class Paragraph(Word): words: List[Word] + start: int + end: int + confidence: int + text: str class ParagraphsResponse(BaseModel): @@ -1921,7 +1930,7 @@ class RealtimeTranscript(BaseModel): text: str "The transcript for your audio" - words: List[Word] + words: List[RealtimeWord] """ An array of objects, with the information for each word in the transcription text. Will include the `start`/`end` time (in milliseconds) of the word, the `confidence` score of the word, From 7bd4de670906a8c4336f8186343d4082a402a3d9 Mon Sep 17 00:00:00 2001 From: Justin Hazen <102247715+jhazenaai@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:01:29 -0700 Subject: [PATCH 9/9] feat(mistral): add mistral support to the sdk (#3157) GitOrigin-RevId: 67e94fa0b2f00c8343d3c19988c3c8a682575cdd --- assemblyai/lemur.py | 8 ++++---- assemblyai/types.py | 7 ++++++- tests/unit/test_lemur.py | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/assemblyai/lemur.py b/assemblyai/lemur.py index 1d91d4f..d028f67 100644 --- a/assemblyai/lemur.py +++ b/assemblyai/lemur.py @@ -172,7 +172,7 @@ def question( Args: questions: One or a list of questions to ask. context: The context which is shared among all questions. This can be a string or a dictionary. - final_model: The model that is used for the final prompt after compression is performed (options: "basic" and "default"). + final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", and "assemblyai/mistral-7b"). max_output_size: Max output size in tokens timeout: The timeout in seconds to wait for the answer(s). temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic. @@ -214,7 +214,7 @@ def summarize( Args: context: An optional context on the transcript. answer_format: The format on how the summary shall be summarized. - final_model: The model that is used for the final prompt after compression is performed (options: "basic" and "default"). + final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", and "assemblyai/mistral-7b"). max_output_size: Max output size in tokens timeout: The timeout in seconds to wait for the summary. temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic. @@ -254,7 +254,7 @@ def action_items( Args: context: An optional context on the transcript. answer_format: The preferred format for the result action items. - final_model: The model that is used for the final prompt after compression is performed (options: "basic" and "default"). + final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", and "assemblyai/mistral-7b"). max_output_size: Max output size in tokens timeout: The timeout in seconds to wait for the action items response. temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic. @@ -289,7 +289,7 @@ def task( Args: prompt: The prompt to use for this task. - final_model: The model that is used for the final prompt after compression is performed (options: "basic" and "default"). + final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", and "assemblyai/mistral-7b"). max_output_size: Max output size in tokens timeout: The timeout in seconds to wait for the task. temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic. diff --git a/assemblyai/types.py b/assemblyai/types.py index 0554ae2..9f5bb37 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -1704,7 +1704,7 @@ def from_lemur_source(cls, source: LemurSource) -> Self: class LemurModel(str, Enum): """ - LeMUR features two model modes, Basic and Default, that allow you to configure your request + LeMUR features three model modes, Basic, Default and Mistral 7B, that allow you to configure your request to suit your needs. These options tell LeMUR whether to use the more advanced Default model or the cheaper, faster, but simplified Basic model. The implicit setting is Default when no option is explicitly passed in. @@ -1729,6 +1729,11 @@ class LemurModel(str, Enum): for complex/subjective tasks where answers require more nuance to be effective. """ + mistral7b = "assemblyai/mistral-7b" + """ + Mistral 7B is an open source model that works well for summarization and answering questions. + """ + class LemurQuestionAnswer(BaseModel): """ diff --git a/tests/unit/test_lemur.py b/tests/unit/test_lemur.py index d273112..6a67b62 100644 --- a/tests/unit/test_lemur.py +++ b/tests/unit/test_lemur.py @@ -514,6 +514,40 @@ def test_lemur_task_succeeds_input_text(httpx_mock: HTTPXMock): assert len(httpx_mock.get_requests()) == 1 +def test_lemur_task_succeeds_mistral(httpx_mock: HTTPXMock): + """ + Tests whether creating a task request succeeds with mistral. + """ + + # create a mock response of a LemurSummaryResponse + mock_lemur_task_response = factories.generate_dict_factory( + factories.LemurTaskResponse + )() + + # mock the specific endpoints + httpx_mock.add_response( + url=f"{aai.settings.base_url}{ENDPOINT_LEMUR}/task", + status_code=httpx.codes.OK, + method="POST", + json=mock_lemur_task_response, + ) + # test input_text input + lemur = aai.Lemur() + result = lemur.task( + final_model=aai.LemurModel.mistral7b, + prompt="Create action items of the meeting", + input_text="Test test", + ) + + # check the response + assert isinstance(result, aai.LemurTaskResponse) + + assert result.response == mock_lemur_task_response["response"] + + # check whether we mocked everything + assert len(httpx_mock.get_requests()) == 1 + + def test_lemur_ask_coach_fails(httpx_mock: HTTPXMock): """ Tests whether creating a task request fails.