diff --git a/gptscript/datasets.py b/gptscript/datasets.py index e9de278..9bd97fd 100644 --- a/gptscript/datasets.py +++ b/gptscript/datasets.py @@ -1,5 +1,7 @@ +import base64 from typing import Dict -from pydantic import BaseModel +from pydantic import BaseModel, field_serializer, field_validator, BeforeValidator + class DatasetElementMeta(BaseModel): name: str @@ -9,7 +11,17 @@ class DatasetElementMeta(BaseModel): class DatasetElement(BaseModel): name: str description: str - contents: str + contents: bytes + + @field_serializer("contents") + def serialize_contents(self, value: bytes) -> str: + return base64.b64encode(value).decode("utf-8") + + @field_validator("contents", mode="before") + def deserialize_contents(cls, value) -> bytes: + if isinstance(value, str): + return base64.b64decode(value) + return value class DatasetMeta(BaseModel): diff --git a/gptscript/gptscript.py b/gptscript/gptscript.py index 918cbdb..745e2ca 100644 --- a/gptscript/gptscript.py +++ b/gptscript/gptscript.py @@ -242,7 +242,7 @@ async def create_dataset(self, workspace_id: str, name: str, description: str = ) return Dataset.model_validate_json(res) - async def add_dataset_element(self, workspace_id: str, datasetID: str, elementName: str, elementContent: str, + async def add_dataset_element(self, workspace_id: str, datasetID: str, elementName: str, elementContent: bytes, elementDescription: str = "") -> DatasetElementMeta: if workspace_id == "": workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"] @@ -251,7 +251,7 @@ async def add_dataset_element(self, workspace_id: str, datasetID: str, elementNa raise ValueError("datasetID cannot be empty") elif elementName == "": raise ValueError("elementName cannot be empty") - elif elementContent == "": + elif not elementContent: raise ValueError("elementContent cannot be empty") res = await self._run_basic_command( @@ -260,7 +260,7 @@ async def add_dataset_element(self, workspace_id: str, datasetID: str, elementNa "input": json.dumps({ "datasetID": datasetID, "elementName": elementName, - "elementContent": elementContent, + "elementContent": base64.b64encode(elementContent).decode("utf-8"), "elementDescription": elementDescription, }), "workspaceID": workspace_id, diff --git a/tests/test_gptscript.py b/tests/test_gptscript.py index 74440e4..9da5792 100644 --- a/tests/test_gptscript.py +++ b/tests/test_gptscript.py @@ -771,29 +771,29 @@ async def test_datasets(gptscript): assert len(dataset.elements) == 0, "Expected dataset elements to be empty" # Add an element - element_meta = await gptscript.add_dataset_element(workspace_id, dataset.id, "element1", "element1 contents", + element_meta = await gptscript.add_dataset_element(workspace_id, dataset.id, "element1", b"element1 contents", "element1 description") assert element_meta.name == "element1", "Expected element name to match" assert element_meta.description == "element1 description", "Expected element description to match" # Add two more elements await gptscript.add_dataset_elements(workspace_id, dataset.id, [ - DatasetElement(name="element2", contents="element2 contents", description="element2 description"), - DatasetElement(name="element3", contents="element3 contents", description="element3 description"), + DatasetElement(name="element2", contents=b"element2 contents", description="element2 description"), + DatasetElement(name="element3", contents=b"element3 contents", description="element3 description"), ]) # Get the elements e1 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element1") assert e1.name == "element1", "Expected element name to match" - assert e1.contents == "element1 contents", "Expected element contents to match" + assert e1.contents == b"element1 contents", "Expected element contents to match" assert e1.description == "element1 description", "Expected element description to match" e2 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element2") assert e2.name == "element2", "Expected element name to match" - assert e2.contents == "element2 contents", "Expected element contents to match" + assert e2.contents == b"element2 contents", "Expected element contents to match" assert e2.description == "element2 description", "Expected element description to match" e3 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element3") assert e3.name == "element3", "Expected element name to match" - assert e3.contents == "element3 contents", "Expected element contents to match" + assert e3.contents == b"element3 contents", "Expected element contents to match" assert e3.description == "element3 description", "Expected element description to match" # List elements in the dataset