databricks · varun-edachali-dbx · Jul 28, 2025 · May 28, 2025 · May 30, 2025 · Jun 3, 2025
diff --git a/examples/experimental/tests/test_sea_async_query.py b/examples/experimental/tests/test_sea_async_query.py
@@ -52,12 +52,20 @@ def test_sea_async_query_with_cloud_fetch():
             f"Successfully opened SEA session with ID: {connection.get_session_id_hex()}"
         )
 
-        # Execute a simple query asynchronously
+        # Execute a query that generates large rows to force multiple chunks
+        requested_row_count = 5000
         cursor = connection.cursor()
+        query = f"""
+        SELECT 
+            id, 
+            concat('value_', repeat('a', 10000)) as test_value
+        FROM range(1, {requested_row_count} + 1) AS t(id)
+        """
+
         logger.info(
-            "Executing asynchronous query with cloud fetch: SELECT 1 as test_value"
+            f"Executing asynchronous query with cloud fetch to generate {requested_row_count} rows"
         )
-        cursor.execute_async("SELECT 1 as test_value")
+        cursor.execute_async(query)
         logger.info(
             "Asynchronous query submitted successfully with cloud fetch enabled"
         )
@@ -70,8 +78,25 @@ def test_sea_async_query_with_cloud_fetch():
 
         logger.info("Query is no longer pending, getting results...")
         cursor.get_async_execution_result()
+
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        actual_row_count = len(results)
+
+        logger.info(
+            f"Requested {requested_row_count} rows, received {actual_row_count} rows"
+        )
+
+        # Verify total row count
+        if actual_row_count != requested_row_count:
+            logger.error(
+                f"FAIL: Row count mismatch. Expected {requested_row_count}, got {actual_row_count}"
+            )
+            return False
+
         logger.info(
-            "Successfully retrieved asynchronous query results with cloud fetch enabled"
+            "PASS: Received correct number of rows with cloud fetch and all fetch methods work correctly"
         )
 
         # Close resources
@@ -131,12 +156,20 @@ def test_sea_async_query_without_cloud_fetch():
             f"Successfully opened SEA session with ID: {connection.get_session_id_hex()}"
         )
 
-        # Execute a simple query asynchronously
+        # For non-cloud fetch, use a smaller row count to avoid exceeding inline limits
+        requested_row_count = 100
         cursor = connection.cursor()
+        query = f"""
+        SELECT 
+            id, 
+            concat('value_', repeat('a', 100)) as test_value
+        FROM range(1, {requested_row_count} + 1) AS t(id)
+        """
+
         logger.info(
-            "Executing asynchronous query without cloud fetch: SELECT 1 as test_value"
+            f"Executing asynchronous query without cloud fetch to generate {requested_row_count} rows"
         )
-        cursor.execute_async("SELECT 1 as test_value")
+        cursor.execute_async(query)
         logger.info(
             "Asynchronous query submitted successfully with cloud fetch disabled"
         )
@@ -149,8 +182,24 @@ def test_sea_async_query_without_cloud_fetch():
 
         logger.info("Query is no longer pending, getting results...")
         cursor.get_async_execution_result()
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        actual_row_count = len(results)
+
+        logger.info(
+            f"Requested {requested_row_count} rows, received {actual_row_count} rows"
+        )
+
+        # Verify total row count
+        if actual_row_count != requested_row_count:
+            logger.error(
+                f"FAIL: Row count mismatch. Expected {requested_row_count}, got {actual_row_count}"
+            )
+            return False
+
         logger.info(
-            "Successfully retrieved asynchronous query results with cloud fetch disabled"
+            "PASS: Received correct number of rows without cloud fetch and all fetch methods work correctly"
         )
 
         # Close resources

diff --git a/examples/experimental/tests/test_sea_sync_query.py b/examples/experimental/tests/test_sea_sync_query.py
@@ -50,13 +50,34 @@ def test_sea_sync_query_with_cloud_fetch():
             f"Successfully opened SEA session with ID: {connection.get_session_id_hex()}"
         )
 
-        # Execute a simple query
+        # Execute a query that generates large rows to force multiple chunks
+        requested_row_count = 10000
         cursor = connection.cursor()
+        query = f"""
+        SELECT 
+            id, 
+            concat('value_', repeat('a', 10000)) as test_value
+        FROM range(1, {requested_row_count} + 1) AS t(id)
+        """
+
+        logger.info(
+            f"Executing synchronous query with cloud fetch to generate {requested_row_count} rows"
+        )
+        cursor.execute(query)
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        actual_row_count = len(results)
         logger.info(
-            "Executing synchronous query with cloud fetch: SELECT 1 as test_value"
+            f"{actual_row_count} rows retrieved against {requested_row_count} requested"
         )
-        cursor.execute("SELECT 1 as test_value")
-        logger.info("Query executed successfully with cloud fetch enabled")
+
+        # Verify total row count
+        if actual_row_count != requested_row_count:
+            logger.error(
+                f"FAIL: Row count mismatch. Expected {requested_row_count}, got {actual_row_count}"
+            )
+            return False
 
         # Close resources
         cursor.close()
@@ -115,13 +136,30 @@ def test_sea_sync_query_without_cloud_fetch():
             f"Successfully opened SEA session with ID: {connection.get_session_id_hex()}"
         )
 
-        # Execute a simple query
+        # For non-cloud fetch, use a smaller row count to avoid exceeding inline limits
+        requested_row_count = 100
         cursor = connection.cursor()
         logger.info(
-            "Executing synchronous query without cloud fetch: SELECT 1 as test_value"
+            f"Executing synchronous query without cloud fetch: SELECT {requested_row_count} rows"
+        )
+        cursor.execute(
+            "SELECT id, 'test_value_' || CAST(id as STRING) as test_value FROM range(1, 101)"
         )
-        cursor.execute("SELECT 1 as test_value")
-        logger.info("Query executed successfully with cloud fetch disabled")
+
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        actual_row_count = len(results)
+        logger.info(
+            f"{actual_row_count} rows retrieved against {requested_row_count} requested"
+        )
+
+        # Verify total row count
+        if actual_row_count != requested_row_count:
+            logger.error(
+                f"FAIL: Row count mismatch. Expected {requested_row_count}, got {actual_row_count}"
+            )
+            return False
 
         # Close resources
         cursor.close()

diff --git a/src/databricks/sql/backend/sea/backend.py b/src/databricks/sql/backend/sea/backend.py
@@ -5,7 +5,12 @@
 import re
 from typing import Any, Dict, Tuple, List, Optional, Union, TYPE_CHECKING, Set
 
-from databricks.sql.backend.sea.models.base import ResultManifest, StatementStatus
+from databricks.sql.backend.sea.models.base import (
+    ExternalLink,
+    ResultManifest,
+    StatementStatus,
+)
+from databricks.sql.backend.sea.models.responses import GetChunksResponse
 from databricks.sql.backend.sea.utils.constants import (
     ALLOWED_SESSION_CONF_TO_DEFAULT_VALUES_MAP,
     ResultFormat,
@@ -19,7 +24,7 @@
 if TYPE_CHECKING:
     from databricks.sql.client import Cursor
 
-from databricks.sql.result_set import SeaResultSet
+from databricks.sql.backend.sea.result_set import SeaResultSet
 
 from databricks.sql.backend.databricks_client import DatabricksClient
 from databricks.sql.backend.types import (
@@ -110,6 +115,7 @@ class SeaDatabricksClient(DatabricksClient):
     STATEMENT_PATH = BASE_PATH + "statements"
     STATEMENT_PATH_WITH_ID = STATEMENT_PATH + "/{}"
     CANCEL_STATEMENT_PATH_WITH_ID = STATEMENT_PATH + "/{}/cancel"
+    CHUNK_PATH_WITH_ID_AND_INDEX = STATEMENT_PATH + "/{}/result/chunks/{}"
 
     # SEA constants
     POLL_INTERVAL_SECONDS = 0.2
@@ -296,7 +302,7 @@ def close_session(self, session_id: SessionId) -> None:
 
     def _extract_description_from_manifest(
         self, manifest: ResultManifest
-    ) -> Optional[List]:
+    ) -> List[Tuple]:
         """
         Extract column description from a manifest object, in the format defined by
         the spec: https://peps.python.org/pep-0249/#description
@@ -311,9 +317,6 @@ def _extract_description_from_manifest(
         schema_data = manifest.schema
         columns_data = schema_data.get("columns", [])
 
-        if not columns_data:
-            return None
-
         columns = []
         for col_data in columns_data:
             # Format: (name, type_code, display_size, internal_size, precision, scale, null_ok)
@@ -337,7 +340,7 @@ def _extract_description_from_manifest(
                 )
             )
 
-        return columns if columns else None
+        return columns
 
     def _results_message_to_execute_response(
         self, response: Union[ExecuteStatementResponse, GetStatementResponse]
@@ -358,7 +361,7 @@ def _results_message_to_execute_response(
 
         # Check for compression
         lz4_compressed = (
-            response.manifest.result_compression == ResultCompression.LZ4_FRAME
+            response.manifest.result_compression == ResultCompression.LZ4_FRAME.value
         )
 
         execute_response = ExecuteResponse(
@@ -647,6 +650,27 @@ def get_execution_result(
         response = self._poll_query(command_id)
         return self._response_to_result_set(response, cursor)
 
+    def get_chunk_links(
+        self, statement_id: str, chunk_index: int
+    ) -> List[ExternalLink]:
+        """
+        Get links for chunks starting from the specified index.
+        Args:
+            statement_id: The statement ID
+            chunk_index: The starting chunk index
+        Returns:
+            ExternalLink: External link for the chunk
+        """
+
+        response_data = self._http_client._make_request(
+            method="GET",
+            path=self.CHUNK_PATH_WITH_ID_AND_INDEX.format(statement_id, chunk_index),
+        )
+        response = GetChunksResponse.from_dict(response_data)
+
+        links = response.external_links or []
+        return links
+
     # == Metadata Operations ==
 
     def get_catalogs(

diff --git a/src/databricks/sql/backend/sea/models/__init__.py b/src/databricks/sql/backend/sea/models/__init__.py
@@ -26,6 +26,7 @@
     ExecuteStatementResponse,
     GetStatementResponse,
     CreateSessionResponse,
+    GetChunksResponse,
 )
 
 __all__ = [
@@ -47,4 +48,5 @@
     "ExecuteStatementResponse",
     "GetStatementResponse",
     "CreateSessionResponse",
+    "GetChunksResponse",
 ]
diff --git a/src/databricks/sql/backend/sea/models/responses.py b/src/databricks/sql/backend/sea/models/responses.py
@@ -160,3 +160,37 @@ class CreateSessionResponse:
     def from_dict(cls, data: Dict[str, Any]) -> "CreateSessionResponse":
         """Create a CreateSessionResponse from a dictionary."""
         return cls(session_id=data.get("session_id", ""))
+
+
+@dataclass
+class GetChunksResponse:
+    """
+    Response from getting chunks for a statement.
+
+    The response model can be found in the docs, here:
+    https://docs.databricks.com/api/workspace/statementexecution/getstatementresultchunkn
+    """
+
+    data: Optional[List[List[Any]]] = None
+    external_links: Optional[List[ExternalLink]] = None
+    byte_count: Optional[int] = None
+    chunk_index: Optional[int] = None
+    next_chunk_index: Optional[int] = None
+    next_chunk_internal_link: Optional[str] = None
+    row_count: Optional[int] = None
+    row_offset: Optional[int] = None
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "GetChunksResponse":
+        """Create a GetChunksResponse from a dictionary."""
+        result = _parse_result({"result": data})
+        return cls(
+            data=result.data,
+            external_links=result.external_links,
+            byte_count=result.byte_count,
+            chunk_index=result.chunk_index,
+            next_chunk_index=result.next_chunk_index,
+            next_chunk_internal_link=result.next_chunk_internal_link,
+            row_count=result.row_count,
+            row_offset=result.row_offset,
+        )