From 7545c75f935c482124714f9781230df1d780e7cb Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Tue, 13 Sep 2022 17:08:44 +0200 Subject: [PATCH 01/17] Enabling compression Signed-off-by: Mohit Singla --- poetry.lock | 92 ++++++++++++++++++++-------- pyproject.toml | 1 + src/databricks/sql/thrift_backend.py | 32 +++++++--- 3 files changed, 92 insertions(+), 33 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8d6e98937..86a15a5a6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -15,10 +15,10 @@ optional = false python-versions = ">=3.5" [package.extras] -tests_no_zope = ["cloudpickle", "pytest-mypy-plugins", "mypy (>=0.900,!=0.940)", "pytest (>=4.3.0)", "pympler", "hypothesis", "coverage[toml] (>=5.0.2)"] -tests = ["cloudpickle", "zope.interface", "pytest-mypy-plugins", "mypy (>=0.900,!=0.940)", "pytest (>=4.3.0)", "pympler", "hypothesis", "coverage[toml] (>=5.0.2)"] -docs = ["sphinx-notfound-page", "zope.interface", "sphinx", "furo"] -dev = ["cloudpickle", "pre-commit", "sphinx-notfound-page", "sphinx", "furo", "zope.interface", "pytest-mypy-plugins", "mypy (>=0.900,!=0.940)", "pytest (>=4.3.0)", "pympler", "hypothesis", "coverage[toml] (>=5.0.2)"] +dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"] +docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] +tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"] +tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] [[package]] name = "black" @@ -38,10 +38,10 @@ typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implem typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} [package.extras] -uvloop = ["uvloop (>=0.15.2)"] -jupyter = ["tokenize-rt (>=3.2.0)", "ipython (>=7.8.0)"] -d = ["aiohttp (>=3.7.4)"] colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "click" @@ -76,9 +76,9 @@ typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} zipp = ">=0.5" [package.extras] -testing = ["importlib-resources (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-black (>=0.3.7)", "pytest-perf (>=0.9.2)", "flufl.flake8", "pyfakefs", "packaging", "pytest-enabler (>=1.3)", "pytest-cov", "pytest-flake8", "pytest-checkdocs (>=2.4)", "pytest (>=6)"] +docs = ["jaraco.packaging (>=9)", "rst.linker (>=1.9)", "sphinx"] perf = ["ipython"] -docs = ["rst.linker (>=1.9)", "jaraco.packaging (>=9)", "sphinx"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] [[package]] name = "iniconfig" @@ -88,6 +88,19 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "lz4" +version = "4.0.2" +description = "LZ4 Bindings for Python" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] +flake8 = ["flake8"] +tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] + [[package]] name = "mypy" version = "0.950" @@ -103,9 +116,9 @@ typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} typing-extensions = ">=3.10" [package.extras] -reports = ["lxml"] -python2 = ["typed-ast (>=1.4.0,<2)"] dmypy = ["psutil (>=4.0)"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] [[package]] name = "mypy-extensions" @@ -144,16 +157,16 @@ python-versions = ">=3.7.1" [package.dependencies] numpy = [ - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, - {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, - {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, {version = ">=1.17.3", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, + {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, ] python-dateutil = ">=2.7.3" pytz = ">=2017.3" [package.extras] -test = ["pytest-xdist", "pytest (>=6.0)", "hypothesis (>=3.58)"] +test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"] [[package]] name = "pathspec" @@ -172,8 +185,8 @@ optional = false python-versions = ">=3.7" [package.extras] -test = ["pytest (>=6)", "pytest-mock (>=3.6)", "pytest-cov (>=2.7)", "appdirs (==1.4.4)"] -docs = ["sphinx (>=4)", "sphinx-autodoc-typehints (>=1.12)", "proselint (>=0.10.2)", "furo (>=2021.7.5b38)"] +docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx (>=4)", "sphinx-autodoc-typehints (>=1.12)"] +test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"] [[package]] name = "pluggy" @@ -187,8 +200,8 @@ python-versions = ">=3.6" importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} [package.extras] -testing = ["pytest-benchmark", "pytest"] -dev = ["tox", "pre-commit"] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] [[package]] name = "py" @@ -240,7 +253,7 @@ py = ">=1.8.2" tomli = ">=1.0.0" [package.extras] -testing = ["xmlschema", "requests", "pygments (>=2.7.2)", "nose", "mock", "hypothesis (>=3.56)", "argcomplete"] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] [[package]] name = "python-dateutil" @@ -318,13 +331,13 @@ optional = false python-versions = ">=3.7" [package.extras] -testing = ["pytest-mypy (>=0.9.1)", "pytest-black (>=0.3.7)", "func-timeout", "jaraco.itertools", "pytest-enabler (>=1.3)", "pytest-cov", "pytest-flake8", "pytest-checkdocs (>=2.4)", "pytest (>=6)"] -docs = ["jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "jaraco.packaging (>=9)", "sphinx"] +docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] +testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "dc86dcd1caa1cc8571222441c4f73a20f14260bbb49765c66d407b9bc7e0887c" +content-hash = "08e0b01a5619c290a688a40066eaa19d5ac99f6164bc7c32415f2f307fc8b542" [metadata.files] atomicwrites = [ @@ -375,6 +388,29 @@ iniconfig = [ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, ] +lz4 = [ + {file = "lz4-4.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3881573c3db902db370e072eb64b40c7c8289b94b2a731e051858cc198f890e8"}, + {file = "lz4-4.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:154e6e9f58a7bafc4d2a1395160305b78fc82fa708bfa58cf0ad977c443d1f8f"}, + {file = "lz4-4.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4cfa82f26b4f1835c797bd70e5ce20d5f1ee897b9a0c53e62d607f9029f521ce"}, + {file = "lz4-4.0.2-cp310-cp310-win32.whl", hash = "sha256:fba1730cd2327a9d013192a9878714cc82f4877d2ada556222d03ea6428a80ed"}, + {file = "lz4-4.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:61dbcca64e8e1655e06b588356c4b2515bccc1d7e84065f858a685abd96f0cf2"}, + {file = "lz4-4.0.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:56ea660097fec87f0c6746146b316775037f8dd886a4c5915360e5b32b7112d0"}, + {file = "lz4-4.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed86ab22bfe1f4cd4fc983704134a8fdf746c1121a398f8f14cbd014c1a5b0ae"}, + {file = "lz4-4.0.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:345608de23b4d68fbdef373f1e53d6c5abd99a062d4ff922e3350f47775ab123"}, + {file = "lz4-4.0.2-cp37-cp37m-win32.whl", hash = "sha256:5fe9db7627674875e4279c2ed50b1e38fb91ec3093347f871ed996e58edbb488"}, + {file = "lz4-4.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:3fa0f000d8ce39e643e9e5c49fc4d1985156ffb177e3123a0f22551f5864841b"}, + {file = "lz4-4.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6f3b3670f52f0871885258bcbc746f483760434336f0bc5581f161cc5d4b0c9a"}, + {file = "lz4-4.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea2c2182a5b0ad03f33ac09db0925a1738a1d65751a3e058110bd900c643d359"}, + {file = "lz4-4.0.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:439898dd4176a724243002003c3f733eb6ce48a5988175f54c8560e0b100b7a6"}, + {file = "lz4-4.0.2-cp38-cp38-win32.whl", hash = "sha256:35e6caced0229b90151d31d9cf1eaa541e597f8021bf5b70ff9e6374e3e43b23"}, + {file = "lz4-4.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:1bd56282f6993e013ccf7f6edf1530c2a13d1662741e2be072349c7f70bc0682"}, + {file = "lz4-4.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1ed9a1875dc2a489f3b665d0211984689d0e76585e55650b044a64dbd2d22992"}, + {file = "lz4-4.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b18a6d6d9071c03dbf9e30bbe22e4476f24f1a4d73b1e975605ad3ce725e6c"}, + {file = "lz4-4.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9d141719d3cbb7933809642a61b68b8f595ddf85657016521756ddcf826b85cd"}, + {file = "lz4-4.0.2-cp39-cp39-win32.whl", hash = "sha256:a8e02c2477bd704f43113ac8dd966c361187383591388818d74e1b73e4674759"}, + {file = "lz4-4.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:ee73357412c5505f6ba0ea61ff71455e2e4c1e04d8e60f17f3cd937261d773fa"}, + {file = "lz4-4.0.2.tar.gz", hash = "sha256:083b7172c2938412ae37c3a090250bfdd9e4a6e855442594f86c3608ed12729b"}, +] mypy = [ {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"}, {file = "mypy-0.950-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0"}, @@ -517,12 +553,18 @@ pytest = [ {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, ] -python-dateutil = [] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] pytz = [ {file = "pytz-2022.2.1-py2.py3-none-any.whl", hash = "sha256:220f481bdafa09c3955dfbdddb7b57780e9a94f5127e35456a48589b9e0c0197"}, {file = "pytz-2022.2.1.tar.gz", hash = "sha256:cea221417204f2d1a2aa03ddae3e867921971d0d76f14d87abb4414415bbdcf5"}, ] -six = [] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] thrift = [ {file = "thrift-0.13.0.tar.gz", hash = "sha256:9af1c86bf73433afc6010ed376a6c6aca2b54099cc0d61895f640870a9ae7d89"}, ] diff --git a/pyproject.toml b/pyproject.toml index 4aa3823ff..fcd4978e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ python = "^3.7.1" thrift = "^0.13.0" pandas = "^1.3.0" pyarrow = "^9.0.0" +lz4 = "^4.0.2" [tool.poetry.dev-dependencies] pytest = "^7.1.2" diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index cf665d5ec..b6b5f531f 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -4,6 +4,7 @@ import math import time import threading +import lz4.frame from uuid import uuid4 from ssl import CERT_NONE, CERT_OPTIONAL, CERT_REQUIRED, create_default_context @@ -435,7 +436,7 @@ def open_session(self, session_configuration, catalog, schema): initial_namespace = None open_session_req = ttypes.TOpenSessionReq( - client_protocol_i64=ttypes.TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V5, + client_protocol_i64=ttypes.TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V6, client_protocol=None, initialNamespace=initial_namespace, canUseMultipleCatalogs=True, @@ -491,7 +492,9 @@ def _poll_for_status(self, op_handle): ) return self.make_request(self._client.GetOperationStatus, req) - def _create_arrow_table(self, t_row_set, schema_bytes, description): + def _create_arrow_table( + self, t_row_set, are_arrow_results_compressed, schema_bytes, description + ): if t_row_set.columns is not None: ( arrow_table, @@ -504,7 +507,7 @@ def _create_arrow_table(self, t_row_set, schema_bytes, description): arrow_table, num_rows, ) = ThriftBackend._convert_arrow_based_set_to_arrow_table( - t_row_set.arrowBatches, schema_bytes + t_row_set.arrowBatches, are_arrow_results_compressed, schema_bytes ) else: raise OperationalError("Unsupported TRowSet instance {}".format(t_row_set)) @@ -529,13 +532,18 @@ def _convert_decimals_in_arrow_table(table, description): return table @staticmethod - def _convert_arrow_based_set_to_arrow_table(arrow_batches, schema_bytes): + def _convert_arrow_based_set_to_arrow_table( + arrow_batches, are_arrow_results_compressed, schema_bytes + ): ba = bytearray() ba += schema_bytes n_rows = 0 for arrow_batch in arrow_batches: n_rows += arrow_batch.rowCount - ba += arrow_batch.batch + if are_arrow_results_compressed: + ba += lz4.frame.decompress(arrow_batch.batch) + else: + ba += arrow_batch.batch arrow_table = pyarrow.ipc.open_stream(ba).read_all() return arrow_table, n_rows @@ -710,11 +718,19 @@ def _results_message_to_execute_response(self, resp, operation_state): .to_pybytes() ) + are_arrow_results_compressed = ( + t_result_set_metadata_resp and t_result_set_metadata_resp.lz4Compressed + ) + if direct_results and direct_results.resultSet: assert direct_results.resultSet.results.startRowOffset == 0 assert direct_results.resultSetMetadata + arrow_results, n_rows = self._create_arrow_table( - direct_results.resultSet.results, schema_bytes, description + direct_results.resultSet.results, + are_arrow_results_compressed, + schema_bytes, + description, ) arrow_queue_opt = ArrowQueue(arrow_results, n_rows, 0) else: @@ -786,7 +802,7 @@ def execute_command(self, operation, session_handle, max_rows, max_bytes, cursor maxRows=max_rows, maxBytes=max_bytes ), canReadArrowResult=True, - canDecompressLZ4Result=False, + canDecompressLZ4Result=True, canDownloadResult=False, confOverlay={ # We want to receive proper Timestamp arrow types. @@ -925,7 +941,7 @@ def fetch_results( ) ) arrow_results, n_rows = self._create_arrow_table( - resp.results, arrow_schema_bytes, description + resp.results, are_arrow_results_compressed, arrow_schema_bytes, description ) arrow_queue = ArrowQueue(arrow_results, n_rows) From d726a0d77e64b7304b1ded6759b314310f901753 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Tue, 13 Sep 2022 18:15:20 +0200 Subject: [PATCH 02/17] Correction Signed-off-by: Mohit Singla --- src/databricks/sql/thrift_backend.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index b6b5f531f..4e6c98fef 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -717,11 +717,9 @@ def _results_message_to_execute_response(self, resp, operation_state): .serialize() .to_pybytes() ) - are_arrow_results_compressed = ( t_result_set_metadata_resp and t_result_set_metadata_resp.lz4Compressed ) - if direct_results and direct_results.resultSet: assert direct_results.resultSet.results.startRowOffset == 0 assert direct_results.resultSetMetadata @@ -940,6 +938,9 @@ def fetch_results( expected_row_start_offset, resp.results.startRowOffset ) ) + are_arrow_results_compressed = ( + resp.resultSetMetadata and resp.resultSetMetadata.lz4Compressed + ) arrow_results, n_rows = self._create_arrow_table( resp.results, are_arrow_results_compressed, arrow_schema_bytes, description ) From 2d0f030964e0a74fc6867db56c7b21ee2b93f11d Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Wed, 14 Sep 2022 12:14:51 +0200 Subject: [PATCH 03/17] Modifying unit tests Signed-off-by: Mohit Singla --- tests/unit/test_thrift_backend.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index e8c5a727f..922f590a3 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -828,7 +828,7 @@ def test_create_arrow_table_raises_error_for_unsupported_type(self): t_row_set = ttypes.TRowSet() thrift_backend = ThriftBackend("foobar", 443, "path", []) with self.assertRaises(OperationalError): - thrift_backend._create_arrow_table(t_row_set, None, Mock()) + thrift_backend._create_arrow_table(t_row_set, Mock(), None, Mock()) @patch.object(ThriftBackend, "_convert_arrow_based_set_to_arrow_table") @patch.object(ThriftBackend, "_convert_column_based_set_to_arrow_table") @@ -841,16 +841,17 @@ def test_create_arrow_table_calls_correct_conversion_method(self, convert_col_mo schema = Mock() cols = Mock() arrow_batches = Mock() + are_arrow_results_compressed = Mock() description = Mock() t_col_set = ttypes.TRowSet(columns=cols) - thrift_backend._create_arrow_table(t_col_set, schema, description) + thrift_backend._create_arrow_table(t_col_set, are_arrow_results_compressed, schema, description) convert_arrow_mock.assert_not_called() convert_col_mock.assert_called_once_with(cols, description) t_arrow_set = ttypes.TRowSet(arrowBatches=arrow_batches) - thrift_backend._create_arrow_table(t_arrow_set, schema, Mock()) - convert_arrow_mock.assert_called_once_with(arrow_batches, schema) + thrift_backend._create_arrow_table(t_arrow_set, are_arrow_results_compressed, schema, Mock()) + convert_arrow_mock.assert_called_once_with(arrow_batches, are_arrow_results_compressed, schema) def test_convert_column_based_set_to_arrow_table_without_nulls(self): # Deliberately duplicate the column name to check that dups work From b25833630d08b13ce353e01ee04ca600daedfe2c Mon Sep 17 00:00:00 2001 From: Moe Derakhshani Date: Tue, 13 Sep 2022 21:27:27 -0700 Subject: [PATCH 04/17] OAuth implementation (#15) This PR: * Adds the foundation for OAuth against Databricks account on AWS with BYOIDP. * It copies one internal module that Steve Weis @sweisdb wrote for Databricks CLI (oauth.py). Once ecosystem-dev team (Serge, Pieter) build a python sdk core we will move this code to their repo as a dependency. * the PR provides authenticators with visitor pattern format for stamping auth-token which later is intended to be moved to the repo owned by Serge @nfx and and Pieter @pietern --- .github/workflows/code-quality-checks.yml | 2 +- poetry.lock | 454 ++++++++++++------ pyproject.toml | 3 + src/databricks/sql/__init__.py | 2 +- src/databricks/sql/auth/__init__.py | 0 src/databricks/sql/auth/auth.py | 96 ++++ src/databricks/sql/auth/authenticators.py | 120 +++++ src/databricks/sql/auth/oauth.py | 243 ++++++++++ src/databricks/sql/auth/oauth_http_handler.py | 44 ++ src/databricks/sql/auth/thrift_http_client.py | 35 ++ src/databricks/sql/client.py | 121 +++-- src/databricks/sql/experimental/__init__.py | 0 .../sql/experimental/oauth_persistence.py | 70 +++ src/databricks/sql/thrift_backend.py | 26 +- tests/unit/__init__.py | 0 tests/unit/test_auth.py | 68 +++ tests/unit/test_oauth_persistence.py | 34 ++ tests/unit/test_thrift_backend.py | 114 ++--- 18 files changed, 1196 insertions(+), 236 deletions(-) create mode 100644 src/databricks/sql/auth/__init__.py create mode 100644 src/databricks/sql/auth/auth.py create mode 100644 src/databricks/sql/auth/authenticators.py create mode 100644 src/databricks/sql/auth/oauth.py create mode 100644 src/databricks/sql/auth/oauth_http_handler.py create mode 100644 src/databricks/sql/auth/thrift_http_client.py create mode 100644 src/databricks/sql/experimental/__init__.py create mode 100644 src/databricks/sql/experimental/oauth_persistence.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/test_auth.py create mode 100644 tests/unit/test_oauth_persistence.py diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index b6462322a..6648242c1 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -154,4 +154,4 @@ jobs: # black the code #---------------------------------------------- - name: Mypy - run: poetry run mypy src + run: poetry run mypy --install-types --non-interactive src diff --git a/poetry.lock b/poetry.lock index 86a15a5a6..0494b28b1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,3 +1,17 @@ +[[package]] +name = "astroid" +version = "2.11.7" +description = "An abstract syntax tree for Python with inference support." +category = "dev" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +lazy-object-proxy = ">=1.4.0" +typed-ast = {version = ">=1.4.0,<2.0", markers = "implementation_name == \"cpython\" and python_version < \"3.8\""} +typing-extensions = {version = ">=3.10", markers = "python_version < \"3.10\""} +wrapt = ">=1.11,<2" + [[package]] name = "atomicwrites" version = "1.4.1" @@ -43,6 +57,25 @@ d = ["aiohttp (>=3.7.4)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "certifi" +version = "2022.6.15" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "charset-normalizer" +version = "2.1.1" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.6.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + [[package]] name = "click" version = "8.1.3" @@ -63,6 +96,25 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "dill" +version = "0.3.5.1" +description = "serialize all of python" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" + +[package.extras] +graph = ["objgraph (>=1.7.2)"] + +[[package]] +name = "idna" +version = "3.3" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "importlib-metadata" version = "4.12.0" @@ -101,6 +153,36 @@ docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] flake8 = ["flake8"] tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] +[[package]] +name = "isort" +version = "5.10.1" +description = "A Python utility / library to sort Python imports." +category = "dev" +optional = false +python-versions = ">=3.6.1,<4.0" + +[package.extras] +colors = ["colorama (>=0.4.3,<0.5.0)"] +pipfile_deprecated_finder = ["pipreqs", "requirementslib"] +plugins = ["setuptools"] +requirements_deprecated_finder = ["pip-api", "pipreqs"] + +[[package]] +name = "lazy-object-proxy" +version = "1.7.1" +description = "A fast and thorough lazy object proxy." +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +category = "dev" +optional = false +python-versions = ">=3.6" + [[package]] name = "mypy" version = "0.950" @@ -136,6 +218,19 @@ category = "main" optional = false python-versions = ">=3.7" +[[package]] +name = "oauthlib" +version = "3.2.0" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + [[package]] name = "packaging" version = "21.3" @@ -222,6 +317,27 @@ python-versions = ">=3.7" [package.dependencies] numpy = ">=1.16.6" +[[package]] +name = "pylint" +version = "2.13.9" +description = "python code static checker" +category = "dev" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +astroid = ">=2.11.5,<=2.12.0-dev0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +dill = ">=0.2" +isort = ">=4.2.5,<6" +mccabe = ">=0.6,<0.8" +platformdirs = ">=2.2.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} + +[package.extras] +testutil = ["gitpython (>3)"] + [[package]] name = "pyparsing" version = "3.0.9" @@ -274,6 +390,24 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "requests" +version = "2.28.1" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<3" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] + [[package]] name = "six" version = "1.16.0" @@ -322,6 +456,27 @@ category = "dev" optional = false python-versions = ">=3.7" +[[package]] +name = "urllib3" +version = "1.26.12" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "wrapt" +version = "1.14.1" +description = "Module for decorators, wrappers and monkey patching." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + [[package]] name = "zipp" version = "3.8.1" @@ -337,56 +492,85 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "08e0b01a5619c290a688a40066eaa19d5ac99f6164bc7c32415f2f307fc8b542" +content-hash = "f283eca35466a0294e09deb8535da2633219db696ad8bbc74dffd4592b0d66ad" [metadata.files] -atomicwrites = [ - {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, +astroid = [ + {file = "astroid-2.11.7-py3-none-any.whl", hash = "sha256:86b0a340a512c65abf4368b80252754cda17c02cdbbd3f587dddf98112233e7b"}, + {file = "astroid-2.11.7.tar.gz", hash = "sha256:bb24615c77f4837c707669d16907331374ae8a964650a66999da3f5ca68dc946"}, ] +atomicwrites = [] attrs = [ {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, ] -black = [ - {file = "black-22.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f586c26118bc6e714ec58c09df0157fe2d9ee195c764f630eb0d8e7ccce72e69"}, - {file = "black-22.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b270a168d69edb8b7ed32c193ef10fd27844e5c60852039599f9184460ce0807"}, - {file = "black-22.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6797f58943fceb1c461fb572edbe828d811e719c24e03375fd25170ada53825e"}, - {file = "black-22.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c85928b9d5f83b23cee7d0efcb310172412fbf7cb9d9ce963bd67fd141781def"}, - {file = "black-22.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:f6fe02afde060bbeef044af7996f335fbe90b039ccf3f5eb8f16df8b20f77666"}, - {file = "black-22.6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cfaf3895a9634e882bf9d2363fed5af8888802d670f58b279b0bece00e9a872d"}, - {file = "black-22.6.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94783f636bca89f11eb5d50437e8e17fbc6a929a628d82304c80fa9cd945f256"}, - {file = "black-22.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:2ea29072e954a4d55a2ff58971b83365eba5d3d357352a07a7a4df0d95f51c78"}, - {file = "black-22.6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e439798f819d49ba1c0bd9664427a05aab79bfba777a6db94fd4e56fae0cb849"}, - {file = "black-22.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187d96c5e713f441a5829e77120c269b6514418f4513a390b0499b0987f2ff1c"}, - {file = "black-22.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:074458dc2f6e0d3dab7928d4417bb6957bb834434516f21514138437accdbe90"}, - {file = "black-22.6.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a218d7e5856f91d20f04e931b6f16d15356db1c846ee55f01bac297a705ca24f"}, - {file = "black-22.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:568ac3c465b1c8b34b61cd7a4e349e93f91abf0f9371eda1cf87194663ab684e"}, - {file = "black-22.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6c1734ab264b8f7929cef8ae5f900b85d579e6cbfde09d7387da8f04771b51c6"}, - {file = "black-22.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9a3ac16efe9ec7d7381ddebcc022119794872abce99475345c5a61aa18c45ad"}, - {file = "black-22.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b9fd45787ba8aa3f5e0a0a98920c1012c884622c6c920dbe98dbd05bc7c70fbf"}, - {file = "black-22.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7ba9be198ecca5031cd78745780d65a3f75a34b2ff9be5837045dce55db83d1c"}, - {file = "black-22.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a3db5b6409b96d9bd543323b23ef32a1a2b06416d525d27e0f67e74f1446c8f2"}, - {file = "black-22.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:560558527e52ce8afba936fcce93a7411ab40c7d5fe8c2463e279e843c0328ee"}, - {file = "black-22.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b154e6bbde1e79ea3260c4b40c0b7b3109ffcdf7bc4ebf8859169a6af72cd70b"}, - {file = "black-22.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4af5bc0e1f96be5ae9bd7aaec219c901a94d6caa2484c21983d043371c733fc4"}, - {file = "black-22.6.0-py3-none-any.whl", hash = "sha256:ac609cf8ef5e7115ddd07d85d988d074ed00e10fbc3445aee393e70164a2219c"}, - {file = "black-22.6.0.tar.gz", hash = "sha256:6c6d39e28aed379aec40da1c65434c77d75e65bb59a1e1c283de545fb4e7c6c9"}, +black = [] +certifi = [ + {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"}, + {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"}, ] -click = [ - {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, - {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +charset-normalizer = [ + {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, + {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, ] -colorama = [ - {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, - {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, +click = [] +colorama = [] +dill = [ + {file = "dill-0.3.5.1-py2.py3-none-any.whl", hash = "sha256:33501d03270bbe410c72639b350e941882a8b0fd55357580fbc873fba0c59302"}, + {file = "dill-0.3.5.1.tar.gz", hash = "sha256:d75e41f3eff1eee599d738e76ba8f4ad98ea229db8b085318aa2b3333a208c86"}, ] -importlib-metadata = [ - {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"}, - {file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"}, +idna = [ + {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, + {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, ] -iniconfig = [ - {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, - {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +importlib-metadata = [] +iniconfig = [] +isort = [ + {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, + {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"}, +] +lazy-object-proxy = [ + {file = "lazy-object-proxy-1.7.1.tar.gz", hash = "sha256:d609c75b986def706743cdebe5e47553f4a5a1da9c5ff66d76013ef396b5a8a4"}, + {file = "lazy_object_proxy-1.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bb8c5fd1684d60a9902c60ebe276da1f2281a318ca16c1d0a96db28f62e9166b"}, + {file = "lazy_object_proxy-1.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a57d51ed2997e97f3b8e3500c984db50a554bb5db56c50b5dab1b41339b37e36"}, + {file = "lazy_object_proxy-1.7.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd45683c3caddf83abbb1249b653a266e7069a09f486daa8863fb0e7496a9fdb"}, + {file = "lazy_object_proxy-1.7.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8561da8b3dd22d696244d6d0d5330618c993a215070f473b699e00cf1f3f6443"}, + {file = "lazy_object_proxy-1.7.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fccdf7c2c5821a8cbd0a9440a456f5050492f2270bd54e94360cac663398739b"}, + {file = "lazy_object_proxy-1.7.1-cp310-cp310-win32.whl", hash = "sha256:898322f8d078f2654d275124a8dd19b079080ae977033b713f677afcfc88e2b9"}, + {file = "lazy_object_proxy-1.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:85b232e791f2229a4f55840ed54706110c80c0a210d076eee093f2b2e33e1bfd"}, + {file = "lazy_object_proxy-1.7.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:46ff647e76f106bb444b4533bb4153c7370cdf52efc62ccfc1a28bdb3cc95442"}, + {file = "lazy_object_proxy-1.7.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12f3bb77efe1367b2515f8cb4790a11cffae889148ad33adad07b9b55e0ab22c"}, + {file = "lazy_object_proxy-1.7.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c19814163728941bb871240d45c4c30d33b8a2e85972c44d4e63dd7107faba44"}, + {file = "lazy_object_proxy-1.7.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:e40f2013d96d30217a51eeb1db28c9ac41e9d0ee915ef9d00da639c5b63f01a1"}, + {file = "lazy_object_proxy-1.7.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:2052837718516a94940867e16b1bb10edb069ab475c3ad84fd1e1a6dd2c0fcfc"}, + {file = "lazy_object_proxy-1.7.1-cp36-cp36m-win32.whl", hash = "sha256:6a24357267aa976abab660b1d47a34aaf07259a0c3859a34e536f1ee6e76b5bb"}, + {file = "lazy_object_proxy-1.7.1-cp36-cp36m-win_amd64.whl", hash = "sha256:6aff3fe5de0831867092e017cf67e2750c6a1c7d88d84d2481bd84a2e019ec35"}, + {file = "lazy_object_proxy-1.7.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6a6e94c7b02641d1311228a102607ecd576f70734dc3d5e22610111aeacba8a0"}, + {file = "lazy_object_proxy-1.7.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4ce15276a1a14549d7e81c243b887293904ad2d94ad767f42df91e75fd7b5b6"}, + {file = "lazy_object_proxy-1.7.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e368b7f7eac182a59ff1f81d5f3802161932a41dc1b1cc45c1f757dc876b5d2c"}, + {file = "lazy_object_proxy-1.7.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6ecbb350991d6434e1388bee761ece3260e5228952b1f0c46ffc800eb313ff42"}, + {file = "lazy_object_proxy-1.7.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:553b0f0d8dbf21890dd66edd771f9b1b5f51bd912fa5f26de4449bfc5af5e029"}, + {file = "lazy_object_proxy-1.7.1-cp37-cp37m-win32.whl", hash = "sha256:c7a683c37a8a24f6428c28c561c80d5f4fd316ddcf0c7cab999b15ab3f5c5c69"}, + {file = "lazy_object_proxy-1.7.1-cp37-cp37m-win_amd64.whl", hash = "sha256:df2631f9d67259dc9620d831384ed7732a198eb434eadf69aea95ad18c587a28"}, + {file = "lazy_object_proxy-1.7.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:07fa44286cda977bd4803b656ffc1c9b7e3bc7dff7d34263446aec8f8c96f88a"}, + {file = "lazy_object_proxy-1.7.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4dca6244e4121c74cc20542c2ca39e5c4a5027c81d112bfb893cf0790f96f57e"}, + {file = "lazy_object_proxy-1.7.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91ba172fc5b03978764d1df5144b4ba4ab13290d7bab7a50f12d8117f8630c38"}, + {file = "lazy_object_proxy-1.7.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:043651b6cb706eee4f91854da4a089816a6606c1428fd391573ef8cb642ae4f7"}, + {file = "lazy_object_proxy-1.7.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b9e89b87c707dd769c4ea91f7a31538888aad05c116a59820f28d59b3ebfe25a"}, + {file = "lazy_object_proxy-1.7.1-cp38-cp38-win32.whl", hash = "sha256:9d166602b525bf54ac994cf833c385bfcc341b364e3ee71e3bf5a1336e677b55"}, + {file = "lazy_object_proxy-1.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:8f3953eb575b45480db6568306893f0bd9d8dfeeebd46812aa09ca9579595148"}, + {file = "lazy_object_proxy-1.7.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dd7ed7429dbb6c494aa9bc4e09d94b778a3579be699f9d67da7e6804c422d3de"}, + {file = "lazy_object_proxy-1.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ed0c2b380eb6248abdef3cd425fc52f0abd92d2b07ce26359fcbc399f636ad"}, + {file = "lazy_object_proxy-1.7.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7096a5e0c1115ec82641afbdd70451a144558ea5cf564a896294e346eb611be1"}, + {file = "lazy_object_proxy-1.7.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f769457a639403073968d118bc70110e7dce294688009f5c24ab78800ae56dc8"}, + {file = "lazy_object_proxy-1.7.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:39b0e26725c5023757fc1ab2a89ef9d7ab23b84f9251e28f9cc114d5b59c1b09"}, + {file = "lazy_object_proxy-1.7.1-cp39-cp39-win32.whl", hash = "sha256:2130db8ed69a48a3440103d4a520b89d8a9405f1b06e2cc81640509e8bf6548f"}, + {file = "lazy_object_proxy-1.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:677ea950bef409b47e51e733283544ac3d660b709cfce7b187f5ace137960d61"}, + {file = "lazy_object_proxy-1.7.1-pp37.pp38-none-any.whl", hash = "sha256:d66906d5785da8e0be7360912e99c9188b70f52c422f9fc18223347235691a84"}, +] +mccabe = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] lz4 = [ {file = "lz4-4.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3881573c3db902db370e072eb64b40c7c8289b94b2a731e051858cc198f890e8"}, @@ -436,10 +620,7 @@ mypy = [ {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"}, {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"}, ] -mypy-extensions = [ - {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, - {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, -] +mypy-extensions = [] numpy = [ {file = "numpy-1.21.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50"}, {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a"}, @@ -470,53 +651,13 @@ numpy = [ {file = "numpy-1.21.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4"}, {file = "numpy-1.21.1.zip", hash = "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd"}, ] -packaging = [ - {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, - {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, -] -pandas = [ - {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:62d5b5ce965bae78f12c1c0df0d387899dd4211ec0bdc52822373f13a3a022b9"}, - {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adfeb11be2d54f275142c8ba9bf67acee771b7186a5745249c7d5a06c670136b"}, - {file = "pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a8c055d58873ad81cae290d974d13dd479b82cbb975c3e1fa2cf1920715296"}, - {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd541ab09e1f80a2a1760032d665f6e032d8e44055d602d65eeea6e6e85498cb"}, - {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2651d75b9a167cc8cc572cf787ab512d16e316ae00ba81874b560586fa1325e0"}, - {file = "pandas-1.3.5-cp310-cp310-win_amd64.whl", hash = "sha256:aaf183a615ad790801fa3cf2fa450e5b6d23a54684fe386f7e3208f8b9bfbef6"}, - {file = "pandas-1.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:344295811e67f8200de2390093aeb3c8309f5648951b684d8db7eee7d1c81fb7"}, - {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:552020bf83b7f9033b57cbae65589c01e7ef1544416122da0c79140c93288f56"}, - {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cce0c6bbeb266b0e39e35176ee615ce3585233092f685b6a82362523e59e5b4"}, - {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d28a3c65463fd0d0ba8bbb7696b23073efee0510783340a44b08f5e96ffce0c"}, - {file = "pandas-1.3.5-cp37-cp37m-win32.whl", hash = "sha256:a62949c626dd0ef7de11de34b44c6475db76995c2064e2d99c6498c3dba7fe58"}, - {file = "pandas-1.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8025750767e138320b15ca16d70d5cdc1886e8f9cc56652d89735c016cd8aea6"}, - {file = "pandas-1.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fe95bae4e2d579812865db2212bb733144e34d0c6785c0685329e5b60fcb85dd"}, - {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f261553a1e9c65b7a310302b9dbac31cf0049a51695c14ebe04e4bfd4a96f02"}, - {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6dbec5f3e6d5dc80dcfee250e0a2a652b3f28663492f7dab9a24416a48ac39"}, - {file = "pandas-1.3.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3bc49af96cd6285030a64779de5b3688633a07eb75c124b0747134a63f4c05f"}, - {file = "pandas-1.3.5-cp38-cp38-win32.whl", hash = "sha256:b6b87b2fb39e6383ca28e2829cddef1d9fc9e27e55ad91ca9c435572cdba51bf"}, - {file = "pandas-1.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:a395692046fd8ce1edb4c6295c35184ae0c2bbe787ecbe384251da609e27edcb"}, - {file = "pandas-1.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bd971a3f08b745a75a86c00b97f3007c2ea175951286cdda6abe543e687e5f2f"}, - {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37f06b59e5bc05711a518aa10beaec10942188dccb48918bb5ae602ccbc9f1a0"}, - {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c21778a688d3712d35710501f8001cdbf96eb70a7c587a3d5613573299fdca6"}, - {file = "pandas-1.3.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3345343206546545bc26a05b4602b6a24385b5ec7c75cb6059599e3d56831da2"}, - {file = "pandas-1.3.5-cp39-cp39-win32.whl", hash = "sha256:c69406a2808ba6cf580c2255bcf260b3f214d2664a3a4197d0e640f573b46fd3"}, - {file = "pandas-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:32e1a26d5ade11b547721a72f9bfc4bd113396947606e00d5b4a5b79b3dcb006"}, - {file = "pandas-1.3.5.tar.gz", hash = "sha256:1e4285f5de1012de20ca46b188ccf33521bff61ba5c5ebd78b4fb28e5416a9f1"}, -] -pathspec = [ - {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, - {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, -] -platformdirs = [ - {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, - {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"}, -] -pluggy = [ - {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, - {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, -] -py = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] +oauthlib = [] +packaging = [] +pandas = [] +pathspec = [] +platformdirs = [] +pluggy = [] +py = [] pyarrow = [ {file = "pyarrow-9.0.0-cp310-cp310-macosx_10_13_universal2.whl", hash = "sha256:767cafb14278165ad539a2918c14c1b73cf20689747c21375c38e3fe62884902"}, {file = "pyarrow-9.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0238998dc692efcb4e41ae74738d7c1234723271ccf520bd8312dca07d49ef8d"}, @@ -545,13 +686,15 @@ pyarrow = [ {file = "pyarrow-9.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:fe2ce795fa1d95e4e940fe5661c3c58aee7181c730f65ac5dd8794a77228de59"}, {file = "pyarrow-9.0.0.tar.gz", hash = "sha256:7fb02bebc13ab55573d1ae9bb5002a6d20ba767bf8569b52fce5301d42495ab7"}, ] -pyparsing = [ - {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, - {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +pylint = [ + {file = "pylint-2.13.9-py3-none-any.whl", hash = "sha256:705c620d388035bdd9ff8b44c5bcdd235bfb49d276d488dd2c8ff1736aa42526"}, + {file = "pylint-2.13.9.tar.gz", hash = "sha256:095567c96e19e6f57b5b907e67d265ff535e588fe26b12b5ebe1fc5645b2c731"}, ] -pytest = [ - {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, - {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, +pyparsing = [] +pytest = [] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] python-dateutil = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, @@ -561,48 +704,83 @@ pytz = [ {file = "pytz-2022.2.1-py2.py3-none-any.whl", hash = "sha256:220f481bdafa09c3955dfbdddb7b57780e9a94f5127e35456a48589b9e0c0197"}, {file = "pytz-2022.2.1.tar.gz", hash = "sha256:cea221417204f2d1a2aa03ddae3e867921971d0d76f14d87abb4414415bbdcf5"}, ] -six = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] -thrift = [ - {file = "thrift-0.13.0.tar.gz", hash = "sha256:9af1c86bf73433afc6010ed376a6c6aca2b54099cc0d61895f640870a9ae7d89"}, -] -tomli = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] -typed-ast = [ - {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"}, - {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"}, - {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"}, - {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"}, - {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"}, - {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"}, - {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"}, - {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"}, - {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"}, - {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"}, - {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"}, - {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"}, - {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"}, - {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"}, - {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"}, - {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"}, - {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"}, - {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"}, - {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"}, - {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"}, - {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"}, - {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"}, - {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"}, - {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"}, +requests = [ + {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, + {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, ] -typing-extensions = [ - {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"}, - {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"}, +six = [] +thrift = [] +tomli = [] +typed-ast = [] +typing-extensions = [] +urllib3 = [ + {file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"}, + {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"}, ] -zipp = [ - {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"}, - {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"}, +wrapt = [ + {file = "wrapt-1.14.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:1b376b3f4896e7930f1f772ac4b064ac12598d1c38d04907e696cc4d794b43d3"}, + {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:903500616422a40a98a5a3c4ff4ed9d0066f3b4c951fa286018ecdf0750194ef"}, + {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5a9a0d155deafd9448baff28c08e150d9b24ff010e899311ddd63c45c2445e28"}, + {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ddaea91abf8b0d13443f6dac52e89051a5063c7d014710dcb4d4abb2ff811a59"}, + {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:36f582d0c6bc99d5f39cd3ac2a9062e57f3cf606ade29a0a0d6b323462f4dd87"}, + {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:7ef58fb89674095bfc57c4069e95d7a31cfdc0939e2a579882ac7d55aadfd2a1"}, + {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:e2f83e18fe2f4c9e7db597e988f72712c0c3676d337d8b101f6758107c42425b"}, + {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ee2b1b1769f6707a8a445162ea16dddf74285c3964f605877a20e38545c3c462"}, + {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:833b58d5d0b7e5b9832869f039203389ac7cbf01765639c7309fd50ef619e0b1"}, + {file = "wrapt-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80bb5c256f1415f747011dc3604b59bc1f91c6e7150bd7db03b19170ee06b320"}, + {file = "wrapt-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07f7a7d0f388028b2df1d916e94bbb40624c59b48ecc6cbc232546706fac74c2"}, + {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02b41b633c6261feff8ddd8d11c711df6842aba629fdd3da10249a53211a72c4"}, + {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fe803deacd09a233e4762a1adcea5db5d31e6be577a43352936179d14d90069"}, + {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:257fd78c513e0fb5cdbe058c27a0624c9884e735bbd131935fd49e9fe719d310"}, + {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4fcc4649dc762cddacd193e6b55bc02edca674067f5f98166d7713b193932b7f"}, + {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:11871514607b15cfeb87c547a49bca19fde402f32e2b1c24a632506c0a756656"}, + {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"}, + {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"}, + {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"}, + {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"}, + {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"}, + {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"}, + {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:a85d2b46be66a71bedde836d9e41859879cc54a2a04fad1191eb50c2066f6e9d"}, + {file = "wrapt-1.14.1-cp35-cp35m-win32.whl", hash = "sha256:dbcda74c67263139358f4d188ae5faae95c30929281bc6866d00573783c422b7"}, + {file = "wrapt-1.14.1-cp35-cp35m-win_amd64.whl", hash = "sha256:b21bb4c09ffabfa0e85e3a6b623e19b80e7acd709b9f91452b8297ace2a8ab00"}, + {file = "wrapt-1.14.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9e0fd32e0148dd5dea6af5fee42beb949098564cc23211a88d799e434255a1f4"}, + {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9736af4641846491aedb3c3f56b9bc5568d92b0692303b5a305301a95dfd38b1"}, + {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b02d65b9ccf0ef6c34cba6cf5bf2aab1bb2f49c6090bafeecc9cd81ad4ea1c1"}, + {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21ac0156c4b089b330b7666db40feee30a5d52634cc4560e1905d6529a3897ff"}, + {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:9f3e6f9e05148ff90002b884fbc2a86bd303ae847e472f44ecc06c2cd2fcdb2d"}, + {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:6e743de5e9c3d1b7185870f480587b75b1cb604832e380d64f9504a0535912d1"}, + {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d79d7d5dc8a32b7093e81e97dad755127ff77bcc899e845f41bf71747af0c569"}, + {file = "wrapt-1.14.1-cp36-cp36m-win32.whl", hash = "sha256:81b19725065dcb43df02b37e03278c011a09e49757287dca60c5aecdd5a0b8ed"}, + {file = "wrapt-1.14.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b014c23646a467558be7da3d6b9fa409b2c567d2110599b7cf9a0c5992b3b471"}, + {file = "wrapt-1.14.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:88bd7b6bd70a5b6803c1abf6bca012f7ed963e58c68d76ee20b9d751c74a3248"}, + {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5901a312f4d14c59918c221323068fad0540e34324925c8475263841dbdfe68"}, + {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77c85fedff92cf788face9bfa3ebaa364448ebb1d765302e9af11bf449ca36d"}, + {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d649d616e5c6a678b26d15ece345354f7c2286acd6db868e65fcc5ff7c24a77"}, + {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7d2872609603cb35ca513d7404a94d6d608fc13211563571117046c9d2bcc3d7"}, + {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:ee6acae74a2b91865910eef5e7de37dc6895ad96fa23603d1d27ea69df545015"}, + {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2b39d38039a1fdad98c87279b48bc5dce2c0ca0d73483b12cb72aa9609278e8a"}, + {file = "wrapt-1.14.1-cp37-cp37m-win32.whl", hash = "sha256:60db23fa423575eeb65ea430cee741acb7c26a1365d103f7b0f6ec412b893853"}, + {file = "wrapt-1.14.1-cp37-cp37m-win_amd64.whl", hash = "sha256:709fe01086a55cf79d20f741f39325018f4df051ef39fe921b1ebe780a66184c"}, + {file = "wrapt-1.14.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c0ce1e99116d5ab21355d8ebe53d9460366704ea38ae4d9f6933188f327b456"}, + {file = "wrapt-1.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3fb1677c720409d5f671e39bac6c9e0e422584e5f518bfd50aa4cbbea02433f"}, + {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:642c2e7a804fcf18c222e1060df25fc210b9c58db7c91416fb055897fc27e8cc"}, + {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b7c050ae976e286906dd3f26009e117eb000fb2cf3533398c5ad9ccc86867b1"}, + {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef3f72c9666bba2bab70d2a8b79f2c6d2c1a42a7f7e2b0ec83bb2f9e383950af"}, + {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:01c205616a89d09827986bc4e859bcabd64f5a0662a7fe95e0d359424e0e071b"}, + {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5a0f54ce2c092aaf439813735584b9537cad479575a09892b8352fea5e988dc0"}, + {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2cf71233a0ed05ccdabe209c606fe0bac7379fdcf687f39b944420d2a09fdb57"}, + {file = "wrapt-1.14.1-cp38-cp38-win32.whl", hash = "sha256:aa31fdcc33fef9eb2552cbcbfee7773d5a6792c137b359e82879c101e98584c5"}, + {file = "wrapt-1.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1967f46ea8f2db647c786e78d8cc7e4313dbd1b0aca360592d8027b8508e24d"}, + {file = "wrapt-1.14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3232822c7d98d23895ccc443bbdf57c7412c5a65996c30442ebe6ed3df335383"}, + {file = "wrapt-1.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:988635d122aaf2bdcef9e795435662bcd65b02f4f4c1ae37fbee7401c440b3a7"}, + {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cca3c2cdadb362116235fdbd411735de4328c61425b0aa9f872fd76d02c4e86"}, + {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d52a25136894c63de15a35bc0bdc5adb4b0e173b9c0d07a2be9d3ca64a332735"}, + {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40e7bc81c9e2b2734ea4bc1aceb8a8f0ceaac7c5299bc5d69e37c44d9081d43b"}, + {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b9b7a708dd92306328117d8c4b62e2194d00c365f18eff11a9b53c6f923b01e3"}, + {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6a9a25751acb379b466ff6be78a315e2b439d4c94c1e99cb7266d40a537995d3"}, + {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:34aa51c45f28ba7f12accd624225e2b1e5a3a45206aa191f6f9aac931d9d56fe"}, + {file = "wrapt-1.14.1-cp39-cp39-win32.whl", hash = "sha256:dee0ce50c6a2dd9056c20db781e9c1cfd33e77d2d569f5d1d9321c641bb903d5"}, + {file = "wrapt-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:dee60e1de1898bde3b238f18340eec6148986da0455d8ba7848d50470a7a32fb"}, + {file = "wrapt-1.14.1.tar.gz", hash = "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d"}, ] +zipp = [] diff --git a/pyproject.toml b/pyproject.toml index fcd4978e2..bfba04b90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,10 +14,13 @@ thrift = "^0.13.0" pandas = "^1.3.0" pyarrow = "^9.0.0" lz4 = "^4.0.2" +requests=">2.18.1" +oauthlib=">=3.1.0" [tool.poetry.dev-dependencies] pytest = "^7.1.2" mypy = "^0.950" +pylint = ">=2.12.0" black = "^22.3.0" [tool.poetry.urls] diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index cac42a3ef..ce1cf471a 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -44,7 +44,7 @@ def TimestampFromTicks(ticks): return Timestamp(*time.localtime(ticks)[:6]) -def connect(server_hostname, http_path, access_token, **kwargs): +def connect(server_hostname, http_path, access_token=None, **kwargs): from .client import Connection return Connection(server_hostname, http_path, access_token, **kwargs) diff --git a/src/databricks/sql/auth/__init__.py b/src/databricks/sql/auth/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/sql/auth/auth.py b/src/databricks/sql/auth/auth.py new file mode 100644 index 000000000..31198a617 --- /dev/null +++ b/src/databricks/sql/auth/auth.py @@ -0,0 +1,96 @@ +from enum import Enum +from typing import List + +from databricks.sql.auth.authenticators import ( + AuthProvider, + AccessTokenAuthProvider, + BasicAuthProvider, + DatabricksOAuthProvider, +) +from databricks.sql.experimental.oauth_persistence import OAuthPersistence + + +class AuthType(Enum): + DATABRICKS_OAUTH = "databricks-oauth" + # other supported types (access_token, user/pass) can be inferred + # we can add more types as needed later + + +class ClientContext: + def __init__( + self, + hostname: str, + username: str = None, + password: str = None, + access_token: str = None, + auth_type: str = None, + oauth_scopes: List[str] = None, + oauth_client_id: str = None, + oauth_redirect_port_range: List[int] = None, + use_cert_as_auth: str = None, + tls_client_cert_file: str = None, + oauth_persistence=None, + ): + self.hostname = hostname + self.username = username + self.password = password + self.access_token = access_token + self.auth_type = auth_type + self.oauth_scopes = oauth_scopes + self.oauth_client_id = oauth_client_id + self.oauth_redirect_port_range = oauth_redirect_port_range + self.use_cert_as_auth = use_cert_as_auth + self.tls_client_cert_file = tls_client_cert_file + self.oauth_persistence = oauth_persistence + + +def get_auth_provider(cfg: ClientContext): + if cfg.auth_type == AuthType.DATABRICKS_OAUTH.value: + assert cfg.oauth_redirect_port_range is not None + assert cfg.oauth_client_id is not None + assert cfg.oauth_scopes is not None + + return DatabricksOAuthProvider( + cfg.hostname, + cfg.oauth_persistence, + cfg.oauth_redirect_port_range, + cfg.oauth_client_id, + cfg.oauth_scopes, + ) + elif cfg.access_token is not None: + return AccessTokenAuthProvider(cfg.access_token) + elif cfg.username is not None and cfg.password is not None: + return BasicAuthProvider(cfg.username, cfg.password) + elif cfg.use_cert_as_auth and cfg.tls_client_cert_file: + # no op authenticator. authentication is performed using ssl certificate outside of headers + return AuthProvider() + else: + raise RuntimeError("No valid authentication settings!") + + +PYSQL_OAUTH_SCOPES = ["sql", "offline_access"] +PYSQL_OAUTH_CLIENT_ID = "databricks-sql-python" +PYSQL_OAUTH_REDIRECT_PORT_RANGE = list(range(8020, 8025)) + + +def normalize_host_name(hostname: str): + maybe_scheme = "https://" if not hostname.startswith("https://") else "" + maybe_trailing_slash = "/" if not hostname.endswith("/") else "" + return f"{maybe_scheme}{hostname}{maybe_trailing_slash}" + + +def get_python_sql_connector_auth_provider(hostname: str, **kwargs): + cfg = ClientContext( + hostname=normalize_host_name(hostname), + auth_type=kwargs.get("auth_type"), + access_token=kwargs.get("access_token"), + username=kwargs.get("_username"), + password=kwargs.get("_password"), + use_cert_as_auth=kwargs.get("_use_cert_as_auth"), + tls_client_cert_file=kwargs.get("_tls_client_cert_file"), + oauth_scopes=PYSQL_OAUTH_SCOPES, + oauth_client_id=PYSQL_OAUTH_CLIENT_ID, + oauth_redirect_port_range=PYSQL_OAUTH_REDIRECT_PORT_RANGE, + oauth_persistence=kwargs.get("experimental_oauth_persistence"), + ) + return get_auth_provider(cfg) diff --git a/src/databricks/sql/auth/authenticators.py b/src/databricks/sql/auth/authenticators.py new file mode 100644 index 000000000..8209931da --- /dev/null +++ b/src/databricks/sql/auth/authenticators.py @@ -0,0 +1,120 @@ +import base64 +import logging +from typing import Dict, List + +from databricks.sql.auth.oauth import OAuthManager + +# Private API: this is an evolving interface and it will change in the future. +# Please must not depend on it in your applications. +from databricks.sql.experimental.oauth_persistence import OAuthToken, OAuthPersistence + + +class AuthProvider: + def add_headers(self, request_headers: Dict[str, str]): + pass + + +# Private API: this is an evolving interface and it will change in the future. +# Please must not depend on it in your applications. +class AccessTokenAuthProvider(AuthProvider): + def __init__(self, access_token: str): + self.__authorization_header_value = "Bearer {}".format(access_token) + + def add_headers(self, request_headers: Dict[str, str]): + request_headers["Authorization"] = self.__authorization_header_value + + +# Private API: this is an evolving interface and it will change in the future. +# Please must not depend on it in your applications. +class BasicAuthProvider(AuthProvider): + def __init__(self, username: str, password: str): + auth_credentials = f"{username}:{password}".encode("UTF-8") + auth_credentials_base64 = base64.standard_b64encode(auth_credentials).decode( + "UTF-8" + ) + + self.__authorization_header_value = f"Basic {auth_credentials_base64}" + + def add_headers(self, request_headers: Dict[str, str]): + request_headers["Authorization"] = self.__authorization_header_value + + +# Private API: this is an evolving interface and it will change in the future. +# Please must not depend on it in your applications. +class DatabricksOAuthProvider(AuthProvider): + SCOPE_DELIM = " " + + def __init__( + self, + hostname: str, + oauth_persistence: OAuthPersistence, + redirect_port_range: List[int], + client_id: str, + scopes: List[str], + ): + try: + self.oauth_manager = OAuthManager( + port_range=redirect_port_range, client_id=client_id + ) + self._hostname = hostname + self._scopes_as_str = DatabricksOAuthProvider.SCOPE_DELIM.join(scopes) + self._oauth_persistence = oauth_persistence + self._client_id = client_id + self._access_token = None + self._refresh_token = None + self._initial_get_token() + except Exception as e: + logging.error(f"unexpected error", e, exc_info=True) + raise e + + def add_headers(self, request_headers: Dict[str, str]): + self._update_token_if_expired() + request_headers["Authorization"] = f"Bearer {self._access_token}" + + def _initial_get_token(self): + try: + if self._access_token is None or self._refresh_token is None: + if self._oauth_persistence: + token = self._oauth_persistence.read(self._hostname) + if token: + self._access_token = token.access_token + self._refresh_token = token.refresh_token + + if self._access_token and self._refresh_token: + self._update_token_if_expired() + else: + (access_token, refresh_token) = self.oauth_manager.get_tokens( + hostname=self._hostname, scope=self._scopes_as_str + ) + self._access_token = access_token + self._refresh_token = refresh_token + self._oauth_persistence.persist( + self._hostname, OAuthToken(access_token, refresh_token) + ) + except Exception as e: + logging.error(f"unexpected error in oauth initialization", e, exc_info=True) + raise e + + def _update_token_if_expired(self): + try: + ( + fresh_access_token, + fresh_refresh_token, + is_refreshed, + ) = self.oauth_manager.check_and_refresh_access_token( + hostname=self._hostname, + access_token=self._access_token, + refresh_token=self._refresh_token, + ) + if not is_refreshed: + return + else: + self._access_token = fresh_access_token + self._refresh_token = fresh_refresh_token + + if self._oauth_persistence: + token = OAuthToken(self._access_token, self._refresh_token) + self._oauth_persistence.persist(self._hostname, token) + except Exception as e: + logging.error(f"unexpected error in oauth token update", e, exc_info=True) + raise e diff --git a/src/databricks/sql/auth/oauth.py b/src/databricks/sql/auth/oauth.py new file mode 100644 index 000000000..0f49aa88f --- /dev/null +++ b/src/databricks/sql/auth/oauth.py @@ -0,0 +1,243 @@ +import base64 +import hashlib +import json +import logging +import secrets +import webbrowser +from datetime import datetime, timezone +from http.server import HTTPServer +from typing import List + +import oauthlib.oauth2 +import requests +from oauthlib.oauth2.rfc6749.errors import OAuth2Error +from requests.exceptions import RequestException + +from databricks.sql.auth.oauth_http_handler import OAuthHttpSingleRequestHandler + +logger = logging.getLogger(__name__) + + +class OAuthManager: + OIDC_REDIRECTOR_PATH = "oidc" + + def __init__(self, port_range: List[int], client_id: str): + self.port_range = port_range + self.client_id = client_id + self.redirect_port = None + + @staticmethod + def __token_urlsafe(nbytes=32): + return secrets.token_urlsafe(nbytes) + + @staticmethod + def __get_redirect_url(redirect_port: int): + return f"http://localhost:{redirect_port}" + + @staticmethod + def __fetch_well_known_config(idp_url: str): + known_config_url = f"{idp_url}/.well-known/oauth-authorization-server" + try: + response = requests.get(url=known_config_url) + except RequestException as e: + logger.error( + f"Unable to fetch OAuth configuration from {idp_url}.\n" + "Verify it is a valid workspace URL and that OAuth is " + "enabled on this account." + ) + raise e + + if response.status_code != 200: + msg = ( + f"Received status {response.status_code} OAuth configuration from " + f"{idp_url}.\n Verify it is a valid workspace URL and " + "that OAuth is enabled on this account." + ) + logger.error(msg) + raise RuntimeError(msg) + try: + return response.json() + except requests.exceptions.JSONDecodeError as e: + logger.error( + f"Unable to decode OAuth configuration from {idp_url}.\n" + "Verify it is a valid workspace URL and that OAuth is " + "enabled on this account." + ) + raise e + + @staticmethod + def __get_idp_url(host: str): + maybe_scheme = "https://" if not host.startswith("https://") else "" + maybe_trailing_slash = "/" if not host.endswith("/") else "" + return f"{maybe_scheme}{host}{maybe_trailing_slash}{OAuthManager.OIDC_REDIRECTOR_PATH}" + + @staticmethod + def __get_challenge(): + verifier_string = OAuthManager.__token_urlsafe(32) + digest = hashlib.sha256(verifier_string.encode("UTF-8")).digest() + challenge_string = ( + base64.urlsafe_b64encode(digest).decode("UTF-8").replace("=", "") + ) + return verifier_string, challenge_string + + def __get_authorization_code(self, client, auth_url, scope, state, challenge): + handler = OAuthHttpSingleRequestHandler("Databricks Sql Connector") + + last_error = None + for port in self.port_range: + try: + with HTTPServer(("", port), handler) as httpd: + redirect_url = OAuthManager.__get_redirect_url(port) + (auth_req_uri, _, _) = client.prepare_authorization_request( + authorization_url=auth_url, + redirect_url=redirect_url, + scope=scope, + state=state, + code_challenge=challenge, + code_challenge_method="S256", + ) + logger.info(f"Opening {auth_req_uri}") + + webbrowser.open_new(auth_req_uri) + logger.info( + f"Listening for OAuth authorization callback at {redirect_url}" + ) + httpd.handle_request() + self.redirect_port = port + break + except OSError as e: + if e.errno == 48: + logger.info(f"Port {port} is in use") + last_error = e + except Exception as e: + logger.error("unexpected error", e) + if self.redirect_port is None: + logger.error( + f"Tried all the ports {self.port_range} for oauth redirect, but can't find free port" + ) + raise last_error + + if not handler.request_path: + msg = f"No path parameters were returned to the callback at {redirect_url}" + logger.error(msg) + raise RuntimeError(msg) + # This is a kludge because the parsing library expects https callbacks + # We should probably set it up using https + full_redirect_url = ( + f"https://localhost:{self.redirect_port}/{handler.request_path}" + ) + try: + authorization_code_response = client.parse_request_uri_response( + full_redirect_url, state=state + ) + except OAuth2Error as e: + logger.error(f"OAuth Token Request error {e.description}") + raise e + return authorization_code_response + + def __send_auth_code_token_request( + self, client, token_request_url, redirect_url, code, verifier + ): + token_request_body = client.prepare_request_body( + code=code, redirect_uri=redirect_url + ) + data = f"{token_request_body}&code_verifier={verifier}" + return self.__send_token_request(token_request_url, data) + + @staticmethod + def __send_token_request(token_request_url, data): + headers = { + "Accept": "application/json", + "Content-Type": "application/x-www-form-urlencoded", + } + response = requests.post(url=token_request_url, data=data, headers=headers) + return response.json() + + def __send_refresh_token_request(self, hostname, refresh_token): + idp_url = OAuthManager.__get_idp_url(hostname) + oauth_config = OAuthManager.__fetch_well_known_config(idp_url) + token_request_url = oauth_config["token_endpoint"] + client = oauthlib.oauth2.WebApplicationClient(self.client_id) + token_request_body = client.prepare_refresh_body( + refresh_token=refresh_token, client_id=client.client_id + ) + return OAuthManager.__send_token_request(token_request_url, token_request_body) + + @staticmethod + def __get_tokens_from_response(oauth_response): + access_token = oauth_response["access_token"] + refresh_token = ( + oauth_response["refresh_token"] + if "refresh_token" in oauth_response + else None + ) + return access_token, refresh_token + + def check_and_refresh_access_token( + self, hostname: str, access_token: str, refresh_token: str + ): + now = datetime.now(tz=timezone.utc) + # If we can't decode an expiration time, this will be expired by default. + expiration_time = now + try: + # This token has already been verified and we are just parsing it. + # If it has been tampered with, it will be rejected on the server side. + # This avoids having to fetch the public key from the issuer and perform + # an unnecessary signature verification. + access_token_payload = access_token.split(".")[1] + # add padding + access_token_payload = access_token_payload + "=" * ( + -len(access_token_payload) % 4 + ) + decoded = json.loads(base64.standard_b64decode(access_token_payload)) + expiration_time = datetime.fromtimestamp(decoded["exp"], tz=timezone.utc) + except Exception as e: + logger.error(e) + raise e + + if expiration_time > now: + # The access token is fine. Just return it. + return access_token, refresh_token, False + + if not refresh_token: + msg = f"OAuth access token expired on {expiration_time}." + logger.error(msg) + raise RuntimeError(msg) + + # Try to refresh using the refresh token + logger.debug( + f"Attempting to refresh OAuth access token that expired on {expiration_time}" + ) + oauth_response = self.__send_refresh_token_request(hostname, refresh_token) + fresh_access_token, fresh_refresh_token = self.__get_tokens_from_response( + oauth_response + ) + return fresh_access_token, fresh_refresh_token, True + + def get_tokens(self, hostname: str, scope=None): + idp_url = self.__get_idp_url(hostname) + oauth_config = self.__fetch_well_known_config(idp_url) + # We are going to override oauth_config["authorization_endpoint"] use the + # /oidc redirector on the hostname, which may inject additional parameters. + auth_url = f"{hostname}oidc/v1/authorize" + state = OAuthManager.__token_urlsafe(16) + (verifier, challenge) = OAuthManager.__get_challenge() + client = oauthlib.oauth2.WebApplicationClient(self.client_id) + try: + auth_response = self.__get_authorization_code( + client, auth_url, scope, state, challenge + ) + except OAuth2Error as e: + msg = f"OAuth Authorization Error: {e.description}" + logger.error(msg) + raise e + + assert self.redirect_port is not None + redirect_url = OAuthManager.__get_redirect_url(self.redirect_port) + + token_request_url = oauth_config["token_endpoint"] + code = auth_response["code"] + oauth_response = self.__send_auth_code_token_request( + client, token_request_url, redirect_url, code, verifier + ) + return self.__get_tokens_from_response(oauth_response) diff --git a/src/databricks/sql/auth/oauth_http_handler.py b/src/databricks/sql/auth/oauth_http_handler.py new file mode 100644 index 000000000..72c6ce517 --- /dev/null +++ b/src/databricks/sql/auth/oauth_http_handler.py @@ -0,0 +1,44 @@ +from http.server import BaseHTTPRequestHandler + + +class OAuthHttpSingleRequestHandler(BaseHTTPRequestHandler): + RESPONSE_BODY_TEMPLATE = """ + + Close this Tab + + + +

Please close this tab.

+

+ The {!!!PLACE_HOLDER!!!} received a response. You may close this tab. +

+ +""" + + def __init__(self, tool_name): + self.response_body = self.RESPONSE_BODY_TEMPLATE.replace( + "{!!!PLACE_HOLDER!!!}", tool_name + ).encode("utf-8") + self.request_path = None + + def __call__(self, *args, **kwargs): + """Handle a request.""" + super().__init__(*args, **kwargs) + + def do_GET(self): # nopep8 + self.send_response(200, "Success") + self.send_header("Content-type", "text/html") + self.end_headers() + self.wfile.write(self.response_body) + self.request_path = self.path + + def log_message(self, format, *args): + # pylint: disable=redefined-builtin + # pylint: disable=unused-argument + return diff --git a/src/databricks/sql/auth/thrift_http_client.py b/src/databricks/sql/auth/thrift_http_client.py new file mode 100644 index 000000000..7ed35e54a --- /dev/null +++ b/src/databricks/sql/auth/thrift_http_client.py @@ -0,0 +1,35 @@ +import logging +from typing import Dict + +import thrift + +logger = logging.getLogger(__name__) + + +class THttpClient(thrift.transport.THttpClient.THttpClient): + def __init__( + self, + auth_provider, + uri_or_host, + port=None, + path=None, + cafile=None, + cert_file=None, + key_file=None, + ssl_context=None, + ): + super().__init__( + uri_or_host, port, path, cafile, cert_file, key_file, ssl_context + ) + self.__auth_provider = auth_provider + + def setCustomHeaders(self, headers: Dict[str, str]): + self._headers = headers + super().setCustomHeaders(headers) + + def flush(self): + headers = dict(self._headers) + self.__auth_provider.add_headers(headers) + self._headers = headers + self.setCustomHeaders(self._headers) + super().flush() diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 2286c89bd..e3190d457 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -1,19 +1,16 @@ -import base64 -import datetime -from decimal import Decimal -import logging -import re from typing import Dict, Tuple, List, Optional, Any, Union import pandas import pyarrow -from databricks.sql import USER_AGENT_NAME, __version__ +from databricks.sql import __version__ from databricks.sql import * from databricks.sql.exc import OperationalError from databricks.sql.thrift_backend import ThriftBackend from databricks.sql.utils import ExecuteResponse, ParamEscaper from databricks.sql.types import Row +from databricks.sql.auth.auth import get_python_sql_connector_auth_provider +from databricks.sql.experimental.oauth_persistence import OAuthPersistence logger = logging.getLogger(__name__) @@ -26,7 +23,7 @@ def __init__( self, server_hostname: str, http_path: str, - access_token: str, + access_token: Optional[str] = None, http_headers: Optional[List[Tuple[str, str]]] = None, session_configuration: Dict[str, Any] = None, catalog: Optional[str] = None, @@ -36,15 +33,77 @@ def __init__( """ Connect to a Databricks SQL endpoint or a Databricks cluster. - :param server_hostname: Databricks instance host name. - :param http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) - or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123) - :param access_token: Http Bearer access token, e.g. Databricks Personal Access Token. - :param http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request - :param session_configuration: An optional dictionary of Spark session parameters. Defaults to None. - Execute the SQL command `SET -v` to get a full list of available commands. - :param catalog: An optional initial catalog to use. Requires DBR version 9.0+ - :param schema: An optional initial schema to use. Requires DBR version 9.0+ + Parameters: + :param server_hostname: Databricks instance host name. + :param http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) + or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123) + :param access_token: `str`, optional + Http Bearer access token, e.g. Databricks Personal Access Token. + Unless if you use auth_type=`databricks-oauth` you need to pass `access_token. + Examples: + connection = sql.connect( + server_hostname='dbc-12345.staging.cloud.databricks.com', + http_path='sql/protocolv1/o/6789/12abc567', + access_token='dabpi12345678' + ) + :param http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request + :param session_configuration: An optional dictionary of Spark session parameters. Defaults to None. + Execute the SQL command `SET -v` to get a full list of available commands. + :param catalog: An optional initial catalog to use. Requires DBR version 9.0+ + :param schema: An optional initial schema to use. Requires DBR version 9.0+ + + Other Parameters: + auth_type: `str`, optional + `databricks-oauth` : to use oauth with fine-grained permission scopes, set to `databricks-oauth`. + This is currently in private preview for Databricks accounts on AWS. + This supports User to Machine OAuth authentication for Databricks on AWS with + any IDP configured. This is only for interactive python applications and open a browser window. + Note this is beta (private preview) + + experimental_oauth_persistence: configures preferred storage for persisting oauth tokens. + This has to be a class implementing `OAuthPersistence`. + When `auth_type` is set to `databricks-oauth` without persisting the oauth token in a persistence storage + the oauth tokens will only be maintained in memory and if the python process restarts the end user + will have to login again. + Note this is beta (private preview) + + For persisting the oauth token in a prod environment you should subclass and implement OAuthPersistence + + from databricks.sql.experimental.oauth_persistence import OAuthPersistence, OAuthToken + class MyCustomImplementation(OAuthPersistence): + def __init__(self, file_path): + self._file_path = file_path + + def persist(self, token: OAuthToken): + # implement this method to persist token.refresh_token and token.access_token + + def read(self) -> Optional[OAuthToken]: + # implement this method to return an instance of the persisted token + + + connection = sql.connect( + server_hostname='dbc-12345.staging.cloud.databricks.com', + http_path='sql/protocolv1/o/6789/12abc567', + auth_type="databricks-oauth", + experimental_oauth_persistence=MyCustomImplementation() + ) + + For development purpose you can use the existing `DevOnlyFilePersistence` which stores the + raw oauth token in the provided file path. Please note this is only for development and for prod you should provide your + own implementation of OAuthPersistence. + + Examples: + # for development only + from databricks.sql.experimental.oauth_persistence import DevOnlyFilePersistence + + connection = sql.connect( + server_hostname='dbc-12345.staging.cloud.databricks.com', + http_path='sql/protocolv1/o/6789/12abc567', + auth_type="databricks-oauth", + experimental_oauth_persistence=DevOnlyFilePersistence("~/dev-oauth.json") + ) + + """ # Internal arguments in **kwargs: @@ -85,30 +144,18 @@ def __init__( # Databricks runtime will return native Arrow types for timestamps instead of Arrow strings # (True by default) + if access_token: + access_token_kv = {"access_token": access_token} + kwargs = {**kwargs, **access_token_kv} + self.open = False self.host = server_hostname self.port = kwargs.get("_port", 443) self.disable_pandas = kwargs.get("_disable_pandas", False) - authorization_header = [] - if kwargs.get("_username") and kwargs.get("_password"): - auth_credentials = "{username}:{password}".format( - username=kwargs.get("_username"), password=kwargs.get("_password") - ).encode("UTF-8") - auth_credentials_base64 = base64.standard_b64encode( - auth_credentials - ).decode("UTF-8") - authorization_header = [ - ("Authorization", "Basic {}".format(auth_credentials_base64)) - ] - elif access_token: - authorization_header = [("Authorization", "Bearer {}".format(access_token))] - elif not ( - kwargs.get("_use_cert_as_auth") and kwargs.get("_tls_client_cert_file") - ): - raise ValueError( - "No valid authentication settings. Please provide an access token." - ) + auth_provider = get_python_sql_connector_auth_provider( + server_hostname, **kwargs + ) if not kwargs.get("_user_agent_entry"): useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__) @@ -117,12 +164,14 @@ def __init__( USER_AGENT_NAME, __version__, kwargs.get("_user_agent_entry") ) - base_headers = [("User-Agent", useragent_header)] + authorization_header + base_headers = [("User-Agent", useragent_header)] + self.thrift_backend = ThriftBackend( self.host, self.port, http_path, (http_headers or []) + base_headers, + auth_provider, **kwargs ) diff --git a/src/databricks/sql/experimental/__init__.py b/src/databricks/sql/experimental/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/sql/experimental/oauth_persistence.py b/src/databricks/sql/experimental/oauth_persistence.py new file mode 100644 index 000000000..bd0066d90 --- /dev/null +++ b/src/databricks/sql/experimental/oauth_persistence.py @@ -0,0 +1,70 @@ +import logging +import json +from typing import Optional + +logger = logging.getLogger(__name__) + + +class OAuthToken: + def __init__(self, access_token, refresh_token): + self._access_token = access_token + self._refresh_token = refresh_token + + @property + def access_token(self) -> str: + return self._access_token + + @property + def refresh_token(self) -> str: + return self._refresh_token + + +class OAuthPersistence: + def persist(self, hostname: str, oauth_token: OAuthToken): + pass + + def read(self, hostname: str) -> Optional[OAuthToken]: + pass + + +# Note this is only intended to be used for development +class DevOnlyFilePersistence(OAuthPersistence): + def __init__(self, file_path): + self._file_path = file_path + + def persist(self, hostname: str, token: OAuthToken): + logger.info(f"persisting token in {self._file_path}") + + # Data to be written + dictionary = { + "refresh_token": token.refresh_token, + "access_token": token.access_token, + "hostname": hostname, + } + + # Serializing json + json_object = json.dumps(dictionary, indent=4) + + with open(self._file_path, "w") as outfile: + outfile.write(json_object) + + def read(self, hostname: str) -> Optional[OAuthToken]: + try: + with open(self._file_path, "r") as infile: + json_as_string = infile.read() + + token_as_json = json.loads(json_as_string) + hostname_in_token = token_as_json["hostname"] + if hostname != hostname_in_token: + msg = ( + f"token was persisted for host {hostname_in_token} does not match {hostname} " + f"This is a dev only persistence and it only supports a single Databricks hostname." + f"\n manually delete {self._file_path} file and restart this process" + ) + logger.error(msg) + raise Exception(msg) + return OAuthToken( + token_as_json["access_token"], token_as_json["refresh_token"] + ) + except Exception as e: + return None diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 4e6c98fef..86551144c 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -5,21 +5,22 @@ import time import threading import lz4.frame -from uuid import uuid4 -from ssl import CERT_NONE, CERT_OPTIONAL, CERT_REQUIRED, create_default_context +from ssl import CERT_NONE, CERT_REQUIRED, create_default_context import pyarrow import thrift.transport.THttpClient import thrift.protocol.TBinaryProtocol import thrift.transport.TSocket import thrift.transport.TTransport -from thrift.Thrift import TException +import databricks.sql.auth.thrift_http_client +from databricks.sql.auth.authenticators import AuthProvider from databricks.sql.thrift_api.TCLIService import TCLIService, ttypes from databricks.sql import * from databricks.sql.thrift_api.TCLIService.TCLIService import ( Client as TCLIServiceClient, ) + from databricks.sql.utils import ( ArrowQueue, ExecuteResponse, @@ -54,7 +55,13 @@ class ThriftBackend: BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128] def __init__( - self, server_hostname: str, port, http_path: str, http_headers, **kwargs + self, + server_hostname: str, + port, + http_path: str, + http_headers, + auth_provider: AuthProvider, + **kwargs, ): # Internal arguments in **kwargs: # _user_agent_entry @@ -134,7 +141,10 @@ def __init__( password=tls_client_cert_key_password, ) - self._transport = thrift.transport.THttpClient.THttpClient( + self._auth_provider = auth_provider + + self._transport = databricks.sql.auth.thrift_http_client.THttpClient( + auth_provider=self._auth_provider, uri_or_host=uri, ssl_context=ssl_context, ) @@ -208,6 +218,12 @@ def _extract_error_message_from_headers(headers): err_msg = headers[DATABRICKS_ERROR_OR_REDIRECT_HEADER] if DATABRICKS_REASON_HEADER in headers: err_msg += ": " + headers[DATABRICKS_REASON_HEADER] + + if not err_msg: + # if authentication token is invalid we need this branch + if DATABRICKS_REASON_HEADER in headers: + err_msg += ": " + headers[DATABRICKS_REASON_HEADER] + return err_msg def _handle_request_error(self, error_info, attempt, elapsed): diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py new file mode 100644 index 000000000..59660f17c --- /dev/null +++ b/tests/unit/test_auth.py @@ -0,0 +1,68 @@ +import unittest + +from databricks.sql.auth.auth import AccessTokenAuthProvider, BasicAuthProvider, AuthProvider +from databricks.sql.auth.auth import get_python_sql_connector_auth_provider + + +class Auth(unittest.TestCase): + + def test_access_token_provider(self): + access_token = "aBc2" + auth = AccessTokenAuthProvider(access_token=access_token) + + http_request = {'myKey': 'myVal'} + auth.add_headers(http_request) + self.assertEqual(http_request['Authorization'], 'Bearer aBc2') + self.assertEqual(len(http_request.keys()), 2) + self.assertEqual(http_request['myKey'], 'myVal') + + def test_basic_auth_provider(self): + username = "moderakh" + password = "Elevate Databricks 123!!!" + auth = BasicAuthProvider(username=username, password=password) + + http_request = {'myKey': 'myVal'} + auth.add_headers(http_request) + + self.assertEqual(http_request['Authorization'], 'Basic bW9kZXJha2g6RWxldmF0ZSBEYXRhYnJpY2tzIDEyMyEhIQ==') + self.assertEqual(len(http_request.keys()), 2) + self.assertEqual(http_request['myKey'], 'myVal') + + def test_noop_auth_provider(self): + auth = AuthProvider() + + http_request = {'myKey': 'myVal'} + auth.add_headers(http_request) + + self.assertEqual(len(http_request.keys()), 1) + self.assertEqual(http_request['myKey'], 'myVal') + + def test_get_python_sql_connector_auth_provider_access_token(self): + hostname = "moderakh-test.cloud.databricks.com" + kwargs = {'access_token': 'dpi123'} + auth_provider = get_python_sql_connector_auth_provider(hostname, **kwargs) + self.assertTrue(type(auth_provider).__name__, "AccessTokenAuthProvider") + + headers = {} + auth_provider.add_headers(headers) + self.assertEqual(headers['Authorization'], 'Bearer dpi123') + + def test_get_python_sql_connector_auth_provider_username_password(self): + username = "moderakh" + password = "Elevate Databricks 123!!!" + hostname = "moderakh-test.cloud.databricks.com" + kwargs = {'_username': username, '_password': password} + auth_provider = get_python_sql_connector_auth_provider(hostname, **kwargs) + self.assertTrue(type(auth_provider).__name__, "BasicAuthProvider") + + headers = {} + auth_provider.add_headers(headers) + self.assertEqual(headers['Authorization'], 'Basic bW9kZXJha2g6RWxldmF0ZSBEYXRhYnJpY2tzIDEyMyEhIQ==') + + def test_get_python_sql_connector_auth_provider_noop(self): + tls_client_cert_file = "fake.cert" + use_cert_as_auth = "abc" + hostname = "moderakh-test.cloud.databricks.com" + kwargs = {'_tls_client_cert_file': tls_client_cert_file, '_use_cert_as_auth': use_cert_as_auth} + auth_provider = get_python_sql_connector_auth_provider(hostname, **kwargs) + self.assertTrue(type(auth_provider).__name__, "CredentialProvider") diff --git a/tests/unit/test_oauth_persistence.py b/tests/unit/test_oauth_persistence.py new file mode 100644 index 000000000..10677c160 --- /dev/null +++ b/tests/unit/test_oauth_persistence.py @@ -0,0 +1,34 @@ + +import unittest + +from databricks.sql.auth.auth import AccessTokenAuthProvider, BasicAuthProvider, AuthProvider +from databricks.sql.auth.auth import get_python_sql_connector_auth_provider +from databricks.sql.experimental.oauth_persistence import DevOnlyFilePersistence, OAuthToken +import tempfile +import os + + +class OAuthPersistenceTests(unittest.TestCase): + + def test_DevOnlyFilePersistence_read_my_write(self): + with tempfile.TemporaryDirectory() as tempdir: + test_json_file_path = os.path.join(tempdir, 'test.json') + persistence_manager = DevOnlyFilePersistence(test_json_file_path) + access_token = "abc#$%%^&^*&*()()_=-/" + refresh_token = "#$%%^^&**()+)_gter243]xyz" + token = OAuthToken(access_token=access_token, refresh_token=refresh_token) + persistence_manager.persist("https://randomserver", token) + new_token = persistence_manager.read("https://randomserver") + + self.assertEqual(new_token.access_token, access_token) + self.assertEqual(new_token.refresh_token, refresh_token) + + def test_DevOnlyFilePersistence_file_does_not_exist(self): + with tempfile.TemporaryDirectory() as tempdir: + test_json_file_path = os.path.join(tempdir, 'test.json') + persistence_manager = DevOnlyFilePersistence(test_json_file_path) + new_token = persistence_manager.read("https://randomserver") + + self.assertEqual(new_token, None) + + # TODO moderakh add test for file with invalid format (should return None) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 922f590a3..e071b2924 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -10,6 +10,7 @@ import databricks.sql from databricks.sql.thrift_api.TCLIService import ttypes from databricks.sql import * +from databricks.sql.auth.authenticators import AuthProvider from databricks.sql.thrift_backend import ThriftBackend @@ -59,7 +60,7 @@ def test_make_request_checks_thrift_status_code(self): mock_method = Mock() mock_method.__name__ = "method name" mock_method.return_value = mock_response - thrift_backend = ThriftBackend("foo", 123, "bar", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError): thrift_backend.make_request(mock_method, Mock()) @@ -67,7 +68,7 @@ def _make_type_desc(self, type): return ttypes.TTypeDesc(types=[ttypes.TTypeEntry(ttypes.TPrimitiveTypeEntry(type=type))]) def _make_fake_thrift_backend(self): - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend._hive_schema_to_arrow_schema = Mock() thrift_backend._hive_schema_to_description = Mock() thrift_backend._create_arrow_table = MagicMock() @@ -137,12 +138,12 @@ def test_okay_protocol_versions_succeed(self, tcli_service_client_cass): thrift_backend = self._make_fake_thrift_backend() thrift_backend.open_session({}, None, None) - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") def test_headers_are_set(self, t_http_client_class): - ThriftBackend("foo", 123, "bar", [("header", "value")]) + ThriftBackend("foo", 123, "bar", [("header", "value")], auth_provider=AuthProvider()) t_http_client_class.return_value.setCustomHeaders.assert_called_with({"header": "value"}) - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") @patch("databricks.sql.thrift_backend.create_default_context") def test_tls_cert_args_are_propagated(self, mock_create_default_context, t_http_client_class): mock_cert_key_file = Mock() @@ -154,6 +155,7 @@ def test_tls_cert_args_are_propagated(self, mock_create_default_context, t_http_ "foo", 123, "bar", [], + auth_provider=AuthProvider(), _tls_client_cert_file=mock_cert_file, _tls_client_cert_key_file=mock_cert_key_file, _tls_client_cert_key_password=mock_cert_key_password, @@ -167,40 +169,40 @@ def test_tls_cert_args_are_propagated(self, mock_create_default_context, t_http_ self.assertEqual(mock_ssl_context.verify_mode, CERT_REQUIRED) self.assertEqual(t_http_client_class.call_args[1]["ssl_context"], mock_ssl_context) - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") @patch("databricks.sql.thrift_backend.create_default_context") def test_tls_no_verify_is_respected(self, mock_create_default_context, t_http_client_class): - ThriftBackend("foo", 123, "bar", [], _tls_no_verify=True) + ThriftBackend("foo", 123, "bar", [], auth_provider=AuthProvider(), _tls_no_verify=True) mock_ssl_context = mock_create_default_context.return_value self.assertFalse(mock_ssl_context.check_hostname) self.assertEqual(mock_ssl_context.verify_mode, CERT_NONE) self.assertEqual(t_http_client_class.call_args[1]["ssl_context"], mock_ssl_context) - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") @patch("databricks.sql.thrift_backend.create_default_context") def test_tls_verify_hostname_is_respected(self, mock_create_default_context, t_http_client_class): - ThriftBackend("foo", 123, "bar", [], _tls_verify_hostname=False) + ThriftBackend("foo", 123, "bar", [], auth_provider=AuthProvider(), _tls_verify_hostname=False) mock_ssl_context = mock_create_default_context.return_value self.assertFalse(mock_ssl_context.check_hostname) self.assertEqual(mock_ssl_context.verify_mode, CERT_REQUIRED) self.assertEqual(t_http_client_class.call_args[1]["ssl_context"], mock_ssl_context) - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") def test_port_and_host_are_respected(self, t_http_client_class): - ThriftBackend("hostname", 123, "path_value", []) + ThriftBackend("hostname", 123, "path_value", [], auth_provider=AuthProvider()) self.assertEqual(t_http_client_class.call_args[1]["uri_or_host"], "https://hostname:123/path_value") - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") def test_socket_timeout_is_propagated(self, t_http_client_class): - ThriftBackend("hostname", 123, "path_value", [], _socket_timeout=129) + ThriftBackend("hostname", 123, "path_value", [], auth_provider=AuthProvider(), _socket_timeout=129) self.assertEqual(t_http_client_class.return_value.setTimeout.call_args[0][0], 129 * 1000) - ThriftBackend("hostname", 123, "path_value", [], _socket_timeout=0) + ThriftBackend("hostname", 123, "path_value", [], auth_provider=AuthProvider(), _socket_timeout=0) self.assertEqual(t_http_client_class.return_value.setTimeout.call_args[0][0], 0) - ThriftBackend("hostname", 123, "path_value", [], _socket_timeout=None) + ThriftBackend("hostname", 123, "path_value", [], auth_provider=AuthProvider(), _socket_timeout=None) self.assertEqual(t_http_client_class.return_value.setTimeout.call_args[0][0], None) def test_non_primitive_types_raise_error(self): @@ -268,7 +270,7 @@ def test_hive_schema_to_description_preserves_scale_and_precision(self): def test_make_request_checks_status_code(self): error_codes = [ttypes.TStatusCode.ERROR_STATUS, ttypes.TStatusCode.INVALID_HANDLE_STATUS] - thrift_backend = ThriftBackend("foo", 123, "bar", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) for code in error_codes: mock_error_response = Mock() @@ -301,7 +303,7 @@ def test_handle_execute_response_checks_operation_state_in_direct_results(self): resultSetMetadata=None, resultSet=None, closeOperation=None)) - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError) as cm: thrift_backend._handle_execute_response(t_execute_resp, Mock()) @@ -329,7 +331,7 @@ def test_handle_execute_response_checks_operation_state_in_polls(self, tcli_serv operationHandle=self.operation_handle) tcli_service_instance.GetOperationStatus.return_value = op_state_resp - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError) as cm: thrift_backend._handle_execute_response(t_execute_resp, Mock()) @@ -354,7 +356,7 @@ def test_get_status_uses_display_message_if_available(self, tcli_service_class): tcli_service_instance.GetOperationStatus.return_value = t_get_operation_status_resp tcli_service_instance.ExecuteStatement.return_value = t_execute_resp - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError) as cm: thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock()) @@ -384,7 +386,7 @@ def test_direct_results_uses_display_message_if_available(self, tcli_service_cla tcli_service_instance.ExecuteStatement.return_value = t_execute_resp - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError) as cm: thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock()) @@ -427,7 +429,7 @@ def test_handle_execute_response_checks_direct_results_for_error_statuses(self): for error_resp in [resp_1, resp_2, resp_3, resp_4]: with self.subTest(error_resp=error_resp): - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError) as cm: thrift_backend._handle_execute_response(error_resp, Mock()) @@ -463,7 +465,7 @@ def test_handle_execute_response_can_handle_without_direct_results(self, tcli_se tcli_service_instance.GetOperationStatus.side_effect = [ op_state_1, op_state_2, op_state_3 ] - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) results_message_response = thrift_backend._handle_execute_response( execute_resp, Mock()) self.assertEqual(results_message_response.status, @@ -488,7 +490,7 @@ def test_handle_execute_response_can_handle_with_direct_results(self): directResults=direct_results_message, operationHandle=self.operation_handle) - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend._results_message_to_execute_response = Mock() thrift_backend._handle_execute_response(execute_resp, Mock()) @@ -642,7 +644,7 @@ def test_arrow_batches_row_count_are_respected(self, tcli_service_class): pyarrow.field("column3", pyarrow.binary()) ]).serialize().to_pybytes() - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) arrow_queue, has_more_results = thrift_backend.fetch_results( op_handle=Mock(), max_rows=1, @@ -658,7 +660,7 @@ def test_execute_statement_calls_client_and_handle_execute_response(self, tcli_s tcli_service_instance = tcli_service_class.return_value response = Mock() tcli_service_instance.ExecuteStatement.return_value = response - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend._handle_execute_response = Mock() cursor_mock = Mock() @@ -676,7 +678,7 @@ def test_get_catalogs_calls_client_and_handle_execute_response(self, tcli_servic tcli_service_instance = tcli_service_class.return_value response = Mock() tcli_service_instance.GetCatalogs.return_value = response - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend._handle_execute_response = Mock() cursor_mock = Mock() @@ -693,7 +695,7 @@ def test_get_schemas_calls_client_and_handle_execute_response(self, tcli_service tcli_service_instance = tcli_service_class.return_value response = Mock() tcli_service_instance.GetSchemas.return_value = response - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend._handle_execute_response = Mock() cursor_mock = Mock() @@ -718,7 +720,7 @@ def test_get_tables_calls_client_and_handle_execute_response(self, tcli_service_ tcli_service_instance = tcli_service_class.return_value response = Mock() tcli_service_instance.GetTables.return_value = response - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend._handle_execute_response = Mock() cursor_mock = Mock() @@ -747,7 +749,7 @@ def test_get_columns_calls_client_and_handle_execute_response(self, tcli_service tcli_service_instance = tcli_service_class.return_value response = Mock() tcli_service_instance.GetColumns.return_value = response - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend._handle_execute_response = Mock() cursor_mock = Mock() @@ -776,14 +778,14 @@ def test_open_session_user_provided_session_id_optional(self, tcli_service_class tcli_service_instance = tcli_service_class.return_value tcli_service_instance.OpenSession.return_value = self.open_session_resp - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend.open_session({}, None, None) self.assertEqual(len(tcli_service_instance.OpenSession.call_args_list), 1) @patch("databricks.sql.thrift_backend.TCLIService.Client") def test_op_handle_respected_in_close_command(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend.close_command(self.operation_handle) self.assertEqual(tcli_service_instance.CloseOperation.call_args[0][0].operationHandle, self.operation_handle) @@ -791,7 +793,7 @@ def test_op_handle_respected_in_close_command(self, tcli_service_class): @patch("databricks.sql.thrift_backend.TCLIService.Client") def test_session_handle_respected_in_close_session(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) thrift_backend.close_session(self.session_handle) self.assertEqual(tcli_service_instance.CloseSession.call_args[0][0].sessionHandle, self.session_handle) @@ -826,7 +828,7 @@ def test_non_arrow_non_column_based_set_triggers_exception(self, tcli_service_cl def test_create_arrow_table_raises_error_for_unsupported_type(self): t_row_set = ttypes.TRowSet() - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(OperationalError): thrift_backend._create_arrow_table(t_row_set, Mock(), None, Mock()) @@ -834,7 +836,7 @@ def test_create_arrow_table_raises_error_for_unsupported_type(self): @patch.object(ThriftBackend, "_convert_column_based_set_to_arrow_table") def test_create_arrow_table_calls_correct_conversion_method(self, convert_col_mock, convert_arrow_mock): - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) convert_arrow_mock.return_value = (MagicMock(), Mock()) convert_col_mock.return_value = (MagicMock(), Mock()) @@ -975,7 +977,7 @@ def test_handle_execute_response_sets_active_op_handle(self): @patch("databricks.sql.thrift_backend._retry_policy", new_callable=retry_policy_factory) def test_make_request_will_retry_GetOperationStatus( self, mock_retry_policy, mock_GetOperationStatus, t_transport_class): - + import thrift, errno from databricks.sql.thrift_api.TCLIService.TCLIService import Client from databricks.sql.exc import RequestError @@ -992,13 +994,14 @@ def test_make_request_will_retry_GetOperationStatus( operationHandle=self.operation_handle, getProgressUpdate=False, ) - + EXPECTED_RETRIES = 2 thrift_backend = ThriftBackend( "foobar", 443, "path", [], + auth_provider=AuthProvider(), _retry_stop_after_attempts_count=EXPECTED_RETRIES, _retry_delay_default=1) @@ -1010,7 +1013,7 @@ def test_make_request_will_retry_GetOperationStatus( self.assertEqual(f'{EXPECTED_RETRIES}/{EXPECTED_RETRIES}', cm.exception.context["attempt"]) # Unusual OSError code - mock_GetOperationStatus.side_effect = OSError(errno.EEXIST, "File does not exist") + mock_GetOperationStatus.side_effect = OSError(errno.EEXIST, "File does not exist") with self.assertLogs("databricks.sql.thrift_backend", level=logging.WARNING) as cm: with self.assertRaises(RequestError): @@ -1018,14 +1021,14 @@ def test_make_request_will_retry_GetOperationStatus( # There should be two warning log messages: one for each retry self.assertEqual(len(cm.output), EXPECTED_RETRIES) - + # The warnings should be identical self.assertEqual(cm.output[1], cm.output[0]) - + # The warnings should include this text self.assertIn(f"{this_gos_name} failed with code {errno.EEXIST} and will attempt to retry", cm.output[0]) - - + + @patch("thrift.transport.THttpClient.THttpClient") def test_make_request_wont_retry_if_headers_not_present(self, t_transport_class): t_transport_instance = t_transport_class.return_value @@ -1035,7 +1038,7 @@ def test_make_request_wont_retry_if_headers_not_present(self, t_transport_class) mock_method.__name__ = "method name" mock_method.side_effect = Exception("This method fails") - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(OperationalError) as cm: thrift_backend.make_request(mock_method, Mock()) @@ -1051,14 +1054,14 @@ def test_make_request_wont_retry_if_error_code_not_429_or_503(self, t_transport_ mock_method.__name__ = "method name" mock_method.side_effect = Exception("This method fails") - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(OperationalError) as cm: thrift_backend.make_request(mock_method, Mock()) self.assertIn("This method fails", str(cm.exception.message_with_context())) - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") @patch("databricks.sql.thrift_backend._retry_policy", new_callable=retry_policy_factory) def test_make_request_will_retry_stop_after_attempts_count_if_retryable( self, mock_retry_policy, t_transport_class): @@ -1073,6 +1076,7 @@ def test_make_request_will_retry_stop_after_attempts_count_if_retryable( "foobar", 443, "path", [], + auth_provider=AuthProvider(), _retry_stop_after_attempts_count=14, _retry_delay_max=0, _retry_delay_min=0) @@ -1085,14 +1089,14 @@ def test_make_request_will_retry_stop_after_attempts_count_if_retryable( self.assertEqual(mock_method.call_count, 14) - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") def test_make_request_will_read_error_message_headers_if_set(self, t_transport_class): t_transport_instance = t_transport_class.return_value mock_method = Mock() mock_method.__name__ = "method name" mock_method.side_effect = Exception("This method fails") - thrift_backend = ThriftBackend("foobar", 443, "path", []) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) error_headers = [[("x-thriftserver-error-message", "thrift server error message")], [("x-databricks-error-or-redirect-message", "databricks error message")], @@ -1174,7 +1178,7 @@ def test_retry_args_passthrough(self, mock_http_client): "_retry_stop_after_attempts_count": 1, "_retry_stop_after_attempts_duration": 100 } - backend = ThriftBackend("foobar", 443, "path", [], **retry_delay_args) + backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider(), **retry_delay_args) for (arg, val) in retry_delay_args.items(): self.assertEqual(getattr(backend, arg), val) @@ -1189,7 +1193,7 @@ def test_retry_args_bounding(self, mock_http_client): k: v[i][0] for (k, v) in retry_delay_test_args_and_expected_values.items() } - backend = ThriftBackend("foobar", 443, "path", [], **retry_delay_args) + backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider(), **retry_delay_args) retry_delay_expected_vals = { k: v[i][1] for (k, v) in retry_delay_test_args_and_expected_values.items() @@ -1209,7 +1213,7 @@ def test_configuration_passthrough(self, tcli_client_class): "42": "42" } - backend = ThriftBackend("foobar", 443, "path", []) + backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) backend.open_session(mock_config, None, None) open_session_req = tcli_client_class.return_value.OpenSession.call_args[0][0] @@ -1220,7 +1224,7 @@ def test_cant_set_timestamp_as_string_to_true(self, tcli_client_class): tcli_service_instance = tcli_client_class.return_value tcli_service_instance.OpenSession.return_value = self.open_session_resp mock_config = {"spark.thriftserver.arrowBasedRowSet.timestampAsString": True} - backend = ThriftBackend("foobar", 443, "path", []) + backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(databricks.sql.Error) as cm: backend.open_session(mock_config, None, None) @@ -1238,7 +1242,7 @@ def _construct_open_session_with_namespace(self, can_use_multiple_cats, cat, sch def test_initial_namespace_passthrough_to_open_session(self, tcli_client_class): tcli_service_instance = tcli_client_class.return_value - backend = ThriftBackend("foobar", 443, "path", []) + backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) initial_cat_schem_args = [("cat", None), (None, "schem"), ("cat", "schem")] for cat, schem in initial_cat_schem_args: @@ -1257,7 +1261,7 @@ def test_can_use_multiple_catalogs_is_set_in_open_session_req(self, tcli_client_ tcli_service_instance = tcli_client_class.return_value tcli_service_instance.OpenSession.return_value = self.open_session_resp - backend = ThriftBackend("foobar", 443, "path", []) + backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) backend.open_session({}, None, None) open_session_req = tcli_client_class.return_value.OpenSession.call_args[0][0] @@ -1267,7 +1271,7 @@ def test_can_use_multiple_catalogs_is_set_in_open_session_req(self, tcli_client_ def test_can_use_multiple_catalogs_is_false_fails_with_initial_catalog(self, tcli_client_class): tcli_service_instance = tcli_client_class.return_value - backend = ThriftBackend("foobar", 443, "path", []) + backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) # If the initial catalog is set, but server returns canUseMultipleCatalogs=False, we # expect failure. If the initial catalog isn't set, then canUseMultipleCatalogs=False # is fine @@ -1301,7 +1305,7 @@ def test_protocol_v3_fails_if_initial_namespace_set(self, tcli_client_class): initialNamespace=ttypes.TNamespace(catalogName="cat", schemaName="schem") ) - backend = ThriftBackend("foobar", 443, "path", []) + backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(InvalidServerResponseError) as cm: backend.open_session({}, "cat", "schem") @@ -1325,7 +1329,7 @@ def test_execute_command_sets_complex_type_fields_correctly(self, mock_handle_ex if decimals is not None: complex_arg_types["_use_arrow_native_decimals"] = decimals - thrift_backend = ThriftBackend("foobar", 443, "path", [], **complex_arg_types) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider(), **complex_arg_types) thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock()) t_execute_statement_req = tcli_service_instance.ExecuteStatement.call_args[0][0] From ed2239b9f466f91f91707859e98dacbb0e1f6639 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Wed, 14 Sep 2022 13:22:44 +0200 Subject: [PATCH 05/17] Resolved merge conflict --- poetry.lock | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/poetry.lock b/poetry.lock index 0494b28b1..0083344d7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -183,6 +183,14 @@ category = "dev" optional = false python-versions = ">=3.6" +[[package]] +name = "lz4" +version = "4.0.2" +description = "LZ4 Bindings for Python" +category = "main" +optional = false +python-versions = ">=3.7" + [[package]] name = "mypy" version = "0.950" From 4b5bb5385fece4f9d937b49e10906cdc46101e7f Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Wed, 14 Sep 2022 18:15:59 +0200 Subject: [PATCH 06/17] Requesting getResultMetadata everytime Signed-off-by: Mohit Singla --- src/databricks/sql/thrift_backend.py | 1 + tests/e2e/common/large_queries_mixin.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 86551144c..29feea6bc 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -945,6 +945,7 @@ def fetch_results( maxRows=max_rows, maxBytes=max_bytes, orientation=ttypes.TFetchOrientation.FETCH_NEXT, + includeResultSetMetadata=True ) resp = self.make_request(self._client.FetchResults, req) diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py index d59e0a9fe..cf2736c0c 100644 --- a/tests/e2e/common/large_queries_mixin.py +++ b/tests/e2e/common/large_queries_mixin.py @@ -80,6 +80,7 @@ def test_long_running_query(self): scale0 = 10000 scale_factor = 1 with self.cursor() as cursor: + cursor.execute("SET use_cached_result=false") while duration < min_duration: self.assertLess(scale_factor, 512, msg="Detected infinite loop") start = time.time() From 865ce904577fc25468c70c7ca917c64024104599 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Wed, 14 Sep 2022 18:44:51 +0200 Subject: [PATCH 07/17] Reformatting Signed-off-by: Mohit Singla --- src/databricks/sql/thrift_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 29feea6bc..79ff872c0 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -945,7 +945,7 @@ def fetch_results( maxRows=max_rows, maxBytes=max_bytes, orientation=ttypes.TFetchOrientation.FETCH_NEXT, - includeResultSetMetadata=True + includeResultSetMetadata=True, ) resp = self.make_request(self._client.FetchResults, req) From 889dab208b2ab30d4c9cedcd02fcce4cd51ec615 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Thu, 15 Sep 2022 13:37:03 +0200 Subject: [PATCH 08/17] Refactored Signed-off-by: Mohit Singla --- src/databricks/sql/client.py | 2 ++ src/databricks/sql/thrift_backend.py | 23 +++++++++-------------- src/databricks/sql/utils.py | 2 +- tests/unit/test_fetches.py | 4 +++- tests/unit/test_thrift_backend.py | 2 ++ 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index e3190d457..7f74db3ce 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -614,6 +614,7 @@ def __init__( self.has_been_closed_server_side = execute_response.has_been_closed_server_side self.has_more_rows = execute_response.has_more_rows self.buffer_size_bytes = result_buffer_size_bytes + self.lz4_compressed = execute_response.lz4_compressed self.arraysize = arraysize self.thrift_backend = thrift_backend self.description = execute_response.description @@ -642,6 +643,7 @@ def _fill_results_buffer(self): max_rows=self.arraysize, max_bytes=self.buffer_size_bytes, expected_row_start_offset=self._next_row_index, + lz4_compressed=self.lz4_compressed, arrow_schema_bytes=self._arrow_schema_bytes, description=self.description, ) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 79ff872c0..b8b637edf 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -508,9 +508,7 @@ def _poll_for_status(self, op_handle): ) return self.make_request(self._client.GetOperationStatus, req) - def _create_arrow_table( - self, t_row_set, are_arrow_results_compressed, schema_bytes, description - ): + def _create_arrow_table(self, t_row_set, lz4_compressed, schema_bytes, description): if t_row_set.columns is not None: ( arrow_table, @@ -523,7 +521,7 @@ def _create_arrow_table( arrow_table, num_rows, ) = ThriftBackend._convert_arrow_based_set_to_arrow_table( - t_row_set.arrowBatches, are_arrow_results_compressed, schema_bytes + t_row_set.arrowBatches, lz4_compressed, schema_bytes ) else: raise OperationalError("Unsupported TRowSet instance {}".format(t_row_set)) @@ -549,14 +547,14 @@ def _convert_decimals_in_arrow_table(table, description): @staticmethod def _convert_arrow_based_set_to_arrow_table( - arrow_batches, are_arrow_results_compressed, schema_bytes + arrow_batches, lz4_compressed, schema_bytes ): ba = bytearray() ba += schema_bytes n_rows = 0 for arrow_batch in arrow_batches: n_rows += arrow_batch.rowCount - if are_arrow_results_compressed: + if lz4_compressed: ba += lz4.frame.decompress(arrow_batch.batch) else: ba += arrow_batch.batch @@ -716,7 +714,6 @@ def _results_message_to_execute_response(self, resp, operation_state): ] ) ) - direct_results = resp.directResults has_been_closed_server_side = direct_results and direct_results.closeOperation has_more_rows = ( @@ -733,7 +730,7 @@ def _results_message_to_execute_response(self, resp, operation_state): .serialize() .to_pybytes() ) - are_arrow_results_compressed = ( + lz4_compressed = ( t_result_set_metadata_resp and t_result_set_metadata_resp.lz4Compressed ) if direct_results and direct_results.resultSet: @@ -742,7 +739,7 @@ def _results_message_to_execute_response(self, resp, operation_state): arrow_results, n_rows = self._create_arrow_table( direct_results.resultSet.results, - are_arrow_results_compressed, + lz4_compressed, schema_bytes, description, ) @@ -754,6 +751,7 @@ def _results_message_to_execute_response(self, resp, operation_state): status=operation_state, has_been_closed_server_side=has_been_closed_server_side, has_more_rows=has_more_rows, + lz4_compressed=lz4_compressed, command_handle=resp.operationHandle, description=description, arrow_schema_bytes=schema_bytes, @@ -930,6 +928,7 @@ def fetch_results( max_rows, max_bytes, expected_row_start_offset, + lz4_compressed, arrow_schema_bytes, description, ): @@ -945,7 +944,6 @@ def fetch_results( maxRows=max_rows, maxBytes=max_bytes, orientation=ttypes.TFetchOrientation.FETCH_NEXT, - includeResultSetMetadata=True, ) resp = self.make_request(self._client.FetchResults, req) @@ -955,11 +953,8 @@ def fetch_results( expected_row_start_offset, resp.results.startRowOffset ) ) - are_arrow_results_compressed = ( - resp.resultSetMetadata and resp.resultSetMetadata.lz4Compressed - ) arrow_results, n_rows = self._create_arrow_table( - resp.results, are_arrow_results_compressed, arrow_schema_bytes, description + resp.results, lz4_compressed, arrow_schema_bytes, description ) arrow_queue = ArrowQueue(arrow_results, n_rows) diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py index 2961a1f59..410c7144e 100644 --- a/src/databricks/sql/utils.py +++ b/src/databricks/sql/utils.py @@ -40,7 +40,7 @@ def remaining_rows(self) -> pyarrow.Table: ExecuteResponse = namedtuple( "ExecuteResponse", - "status has_been_closed_server_side has_more_rows description " + "status has_been_closed_server_side has_more_rows description lz4_compressed " "command_handle arrow_queue arrow_schema_bytes", ) diff --git a/tests/unit/test_fetches.py b/tests/unit/test_fetches.py index a4308d57a..29853b72a 100644 --- a/tests/unit/test_fetches.py +++ b/tests/unit/test_fetches.py @@ -38,6 +38,7 @@ def make_dummy_result_set_from_initial_results(initial_results): has_been_closed_server_side=True, has_more_rows=False, description=Mock(), + lz4_compressed=False, command_handle=None, arrow_queue=arrow_queue, arrow_schema_bytes=schema.serialize().to_pybytes())) @@ -50,7 +51,7 @@ def make_dummy_result_set_from_initial_results(initial_results): def make_dummy_result_set_from_batch_list(batch_list): batch_index = 0 - def fetch_results(op_handle, max_rows, max_bytes, expected_row_start_offset, + def fetch_results(op_handle, max_rows, max_bytes, expected_row_start_offset, lz4_compressed, arrow_schema_bytes, description): nonlocal batch_index results = FetchTests.make_arrow_queue(batch_list[batch_index]) @@ -71,6 +72,7 @@ def fetch_results(op_handle, max_rows, max_bytes, expected_row_start_offset, has_more_rows=True, description=[(f'col{col_id}', 'integer', None, None, None, None, None) for col_id in range(num_cols)], + lz4_compressed=False, command_handle=None, arrow_queue=None, arrow_schema_bytes=None)) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index e071b2924..3f29d7b14 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -618,6 +618,7 @@ def test_handle_execute_response_reads_has_more_rows_in_result_response( max_rows=1, max_bytes=1, expected_row_start_offset=0, + lz4_compressed=False, arrow_schema_bytes=Mock(), description=Mock()) @@ -650,6 +651,7 @@ def test_arrow_batches_row_count_are_respected(self, tcli_service_class): max_rows=1, max_bytes=1, expected_row_start_offset=0, + lz4_compressed=False, arrow_schema_bytes=schema, description=MagicMock()) From 2b96ca90ac150ed491c42926b9262aa90ddb8efa Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Thu, 15 Sep 2022 14:18:06 +0200 Subject: [PATCH 09/17] Matching naming Signed-off-by: Mohit Singla --- tests/unit/test_thrift_backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 3f29d7b14..efd195a10 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -845,17 +845,17 @@ def test_create_arrow_table_calls_correct_conversion_method(self, convert_col_mo schema = Mock() cols = Mock() arrow_batches = Mock() - are_arrow_results_compressed = Mock() + lz4_compressed = Mock() description = Mock() t_col_set = ttypes.TRowSet(columns=cols) - thrift_backend._create_arrow_table(t_col_set, are_arrow_results_compressed, schema, description) + thrift_backend._create_arrow_table(t_col_set, lz4_compressed, schema, description) convert_arrow_mock.assert_not_called() convert_col_mock.assert_called_once_with(cols, description) t_arrow_set = ttypes.TRowSet(arrowBatches=arrow_batches) - thrift_backend._create_arrow_table(t_arrow_set, are_arrow_results_compressed, schema, Mock()) - convert_arrow_mock.assert_called_once_with(arrow_batches, are_arrow_results_compressed, schema) + thrift_backend._create_arrow_table(t_arrow_set, lz4_compressed, schema, Mock()) + convert_arrow_mock.assert_called_once_with(arrow_batches, lz4_compressed, schema) def test_convert_column_based_set_to_arrow_table_without_nulls(self): # Deliberately duplicate the column name to check that dups work From 4aaac41077249a2138ac9dd431a8eeed61de3b58 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Sat, 24 Sep 2022 14:48:26 +0200 Subject: [PATCH 10/17] Allowing the compression to be set by user Signed-off-by: Mohit Singla --- src/databricks/sql/client.py | 3 ++- src/databricks/sql/thrift_backend.py | 14 ++++++++------ tests/unit/test_thrift_backend.py | 10 +++++----- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 7f74db3ce..bba8ea063 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -297,7 +297,7 @@ def _check_not_closed(self): raise Error("Attempting operation on closed cursor") def execute( - self, operation: str, parameters: Optional[Dict[str, str]] = None + self, operation: str, parameters: Optional[Dict[str, str]] = None, use_lz4_compression: bool = True ) -> "Cursor": """ Execute a query and wait for execution to complete. @@ -318,6 +318,7 @@ def execute( session_handle=self.connection._session_handle, max_rows=self.arraysize, max_bytes=self.buffer_size_bytes, + use_lz4_compression=use_lz4_compression, cursor=self, ) self.active_result_set = ResultSet( diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index b8b637edf..7f2edfa1c 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -552,11 +552,13 @@ def _convert_arrow_based_set_to_arrow_table( ba = bytearray() ba += schema_bytes n_rows = 0 - for arrow_batch in arrow_batches: - n_rows += arrow_batch.rowCount - if lz4_compressed: + if lz4_compressed: + for arrow_batch in arrow_batches: + n_rows += arrow_batch.rowCount ba += lz4.frame.decompress(arrow_batch.batch) - else: + else: + for arrow_batch in arrow_batches: + n_rows += arrow_batch.rowCount ba += arrow_batch.batch arrow_table = pyarrow.ipc.open_stream(ba).read_all() return arrow_table, n_rows @@ -795,7 +797,7 @@ def _check_direct_results_for_error(t_spark_direct_results): t_spark_direct_results.closeOperation ) - def execute_command(self, operation, session_handle, max_rows, max_bytes, cursor): + def execute_command(self, operation, session_handle, max_rows, max_bytes, use_lz4_compression, cursor): assert session_handle is not None spark_arrow_types = ttypes.TSparkArrowTypes( @@ -814,7 +816,7 @@ def execute_command(self, operation, session_handle, max_rows, max_bytes, cursor maxRows=max_rows, maxBytes=max_bytes ), canReadArrowResult=True, - canDecompressLZ4Result=True, + canDecompressLZ4Result=use_lz4_compression, canDownloadResult=False, confOverlay={ # We want to receive proper Timestamp arrow types. diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index efd195a10..b759a3aba 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -358,7 +358,7 @@ def test_get_status_uses_display_message_if_available(self, tcli_service_class): thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError) as cm: - thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock()) + thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock(), Mock()) self.assertEqual(display_message, str(cm.exception)) self.assertIn(diagnostic_info, str(cm.exception.message_with_context())) @@ -388,7 +388,7 @@ def test_direct_results_uses_display_message_if_available(self, tcli_service_cla thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError) as cm: - thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock()) + thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock(), Mock()) self.assertEqual(display_message, str(cm.exception)) self.assertIn(diagnostic_info, str(cm.exception.message_with_context())) @@ -666,7 +666,7 @@ def test_execute_statement_calls_client_and_handle_execute_response(self, tcli_s thrift_backend._handle_execute_response = Mock() cursor_mock = Mock() - thrift_backend.execute_command("foo", Mock(), 100, 200, cursor_mock) + thrift_backend.execute_command("foo", Mock(), 100, 200, Mock(), cursor_mock) # Check call to client req = tcli_service_instance.ExecuteStatement.call_args[0][0] get_direct_results = ttypes.TSparkGetDirectResults(maxRows=100, maxBytes=200) @@ -825,7 +825,7 @@ def test_non_arrow_non_column_based_set_triggers_exception(self, tcli_service_cl thrift_backend = self._make_fake_thrift_backend() with self.assertRaises(OperationalError) as cm: - thrift_backend.execute_command("foo", Mock(), 100, 100, Mock()) + thrift_backend.execute_command("foo", Mock(), 100, 100, Mock(), Mock()) self.assertIn("Expected results to be in Arrow or column based format", str(cm.exception)) def test_create_arrow_table_raises_error_for_unsupported_type(self): @@ -1332,7 +1332,7 @@ def test_execute_command_sets_complex_type_fields_correctly(self, mock_handle_ex complex_arg_types["_use_arrow_native_decimals"] = decimals thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider(), **complex_arg_types) - thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock()) + thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock(), Mock()) t_execute_statement_req = tcli_service_instance.ExecuteStatement.call_args[0][0] # If the value is unset, the native type should default to True From 023c9dbaeab2f5e47088e940aaef915f6654a80b Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Sat, 24 Sep 2022 19:14:56 +0200 Subject: [PATCH 11/17] Adding unit test Signed-off-by: Mohit Singla --- src/databricks/sql/client.py | 10 ++++++++-- src/databricks/sql/thrift_backend.py | 6 ++++-- tests/e2e/common/large_queries_mixin.py | 1 - tests/unit/test_fetches.py | 4 ++-- tests/unit/test_thrift_backend.py | 19 +++++++++++++++++++ 5 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index bba8ea063..655954eb3 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -1,4 +1,5 @@ from typing import Dict, Tuple, List, Optional, Any, Union +from xmlrpc.client import boolean import pandas import pyarrow @@ -251,6 +252,7 @@ def __init__( thrift_backend: ThriftBackend, result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES, arraysize: int = DEFAULT_ARRAY_SIZE, + lz4_compression: bool = True, ) -> None: """ These objects represent a database cursor, which is used to manage the context of a fetch @@ -264,6 +266,7 @@ def __init__( self.buffer_size_bytes = result_buffer_size_bytes self.active_result_set: Union[ResultSet, None] = None self.arraysize = arraysize + self.lz4_compression: bool = lz4_compression # Note that Cursor closed => active result set closed, but not vice versa self.open = True self.executing_command_id = None @@ -297,7 +300,7 @@ def _check_not_closed(self): raise Error("Attempting operation on closed cursor") def execute( - self, operation: str, parameters: Optional[Dict[str, str]] = None, use_lz4_compression: bool = True + self, operation: str, parameters: Optional[Dict[str, str]] = None ) -> "Cursor": """ Execute a query and wait for execution to complete. @@ -318,7 +321,7 @@ def execute( session_handle=self.connection._session_handle, max_rows=self.arraysize, max_bytes=self.buffer_size_bytes, - use_lz4_compression=use_lz4_compression, + lz4_compression=self.lz4_compression, cursor=self, ) self.active_result_set = ResultSet( @@ -591,6 +594,9 @@ def setoutputsize(self, size, column=None): """Does nothing by default""" pass + def setLZ4Compression(self, lz4_compression): + self.lz4_compression = lz4_compression + class ResultSet: def __init__( diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 7f2edfa1c..9e2d68fed 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -797,7 +797,9 @@ def _check_direct_results_for_error(t_spark_direct_results): t_spark_direct_results.closeOperation ) - def execute_command(self, operation, session_handle, max_rows, max_bytes, use_lz4_compression, cursor): + def execute_command( + self, operation, session_handle, max_rows, max_bytes, lz4_compression, cursor + ): assert session_handle is not None spark_arrow_types = ttypes.TSparkArrowTypes( @@ -816,7 +818,7 @@ def execute_command(self, operation, session_handle, max_rows, max_bytes, use_lz maxRows=max_rows, maxBytes=max_bytes ), canReadArrowResult=True, - canDecompressLZ4Result=use_lz4_compression, + canDecompressLZ4Result=lz4_compression, canDownloadResult=False, confOverlay={ # We want to receive proper Timestamp arrow types. diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py index cf2736c0c..d59e0a9fe 100644 --- a/tests/e2e/common/large_queries_mixin.py +++ b/tests/e2e/common/large_queries_mixin.py @@ -80,7 +80,6 @@ def test_long_running_query(self): scale0 = 10000 scale_factor = 1 with self.cursor() as cursor: - cursor.execute("SET use_cached_result=false") while duration < min_duration: self.assertLess(scale_factor, 512, msg="Detected infinite loop") start = time.time() diff --git a/tests/unit/test_fetches.py b/tests/unit/test_fetches.py index 29853b72a..33fca0751 100644 --- a/tests/unit/test_fetches.py +++ b/tests/unit/test_fetches.py @@ -38,7 +38,7 @@ def make_dummy_result_set_from_initial_results(initial_results): has_been_closed_server_side=True, has_more_rows=False, description=Mock(), - lz4_compressed=False, + lz4_compressed=Mock(), command_handle=None, arrow_queue=arrow_queue, arrow_schema_bytes=schema.serialize().to_pybytes())) @@ -72,7 +72,7 @@ def fetch_results(op_handle, max_rows, max_bytes, expected_row_start_offset, lz4 has_more_rows=True, description=[(f'col{col_id}', 'integer', None, None, None, None, None) for col_id in range(num_cols)], - lz4_compressed=False, + lz4_compressed=Mock(), command_handle=None, arrow_queue=None, arrow_schema_bytes=None)) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index b759a3aba..40614e434 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -857,6 +857,25 @@ def test_create_arrow_table_calls_correct_conversion_method(self, convert_col_mo thrift_backend._create_arrow_table(t_arrow_set, lz4_compressed, schema, Mock()) convert_arrow_mock.assert_called_once_with(arrow_batches, lz4_compressed, schema) + @patch("lz4.frame.decompress") + @patch("pyarrow.ipc.open_stream") + def test_convert_arrow_based_set_to_arrow_table(self, open_stream_mock, lz4_decompress_mock): + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) + + lz4_decompress_mock.return_value = bytearray('Testing','utf-8') + + schema = pyarrow.schema([ + pyarrow.field("column1", pyarrow.int32()), + ]).serialize().to_pybytes() + + arrow_batches = [ttypes.TSparkArrowBatch(batch=bytearray('Testing','utf-8'), rowCount=1) for _ in range(10)] + thrift_backend._convert_arrow_based_set_to_arrow_table(arrow_batches, False, schema) + lz4_decompress_mock.assert_not_called() + + thrift_backend._convert_arrow_based_set_to_arrow_table(arrow_batches, True, schema) + lz4_decompress_mock.assert_called() + + def test_convert_column_based_set_to_arrow_table_without_nulls(self): # Deliberately duplicate the column name to check that dups work field_names = ["column1", "column2", "column3", "column3"] From 6a43ab2a3294b25ac8134fad6c6a29f8d1483698 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Sat, 24 Sep 2022 19:41:38 +0200 Subject: [PATCH 12/17] Moving compression config to cursor Signed-off-by: Mohit Singla --- src/databricks/sql/client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 655954eb3..cbc08d491 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -208,6 +208,7 @@ def cursor( self, arraysize: int = DEFAULT_ARRAY_SIZE, buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES, + lz4_compression: bool = True ) -> "Cursor": """ Return a new Cursor object using the connection. @@ -222,6 +223,7 @@ def cursor( self.thrift_backend, arraysize=arraysize, result_buffer_size_bytes=buffer_size_bytes, + lz4_compression=lz4_compression ) self._cursors.append(cursor) return cursor @@ -252,7 +254,7 @@ def __init__( thrift_backend: ThriftBackend, result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES, arraysize: int = DEFAULT_ARRAY_SIZE, - lz4_compression: bool = True, + lz4_compression: bool = True ) -> None: """ These objects represent a database cursor, which is used to manage the context of a fetch From 8cdf57d7011e4cafa49b4b71137e9dc56026eae8 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Mon, 26 Sep 2022 10:10:35 +0200 Subject: [PATCH 13/17] Reformatting Signed-off-by: Mohit Singla --- src/databricks/sql/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index cbc08d491..73762f7ee 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -208,7 +208,7 @@ def cursor( self, arraysize: int = DEFAULT_ARRAY_SIZE, buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES, - lz4_compression: bool = True + lz4_compression: bool = True, ) -> "Cursor": """ Return a new Cursor object using the connection. @@ -223,7 +223,7 @@ def cursor( self.thrift_backend, arraysize=arraysize, result_buffer_size_bytes=buffer_size_bytes, - lz4_compression=lz4_compression + lz4_compression=lz4_compression, ) self._cursors.append(cursor) return cursor @@ -254,7 +254,7 @@ def __init__( thrift_backend: ThriftBackend, result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES, arraysize: int = DEFAULT_ARRAY_SIZE, - lz4_compression: bool = True + lz4_compression: bool = True, ) -> None: """ These objects represent a database cursor, which is used to manage the context of a fetch From f3cdf24e86ce16721558fac28432c7b784649ef0 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Tue, 27 Sep 2022 23:26:18 +0200 Subject: [PATCH 14/17] Adding and modifying tests Signed-off-by: Mohit Singla --- tests/e2e/common/large_queries_mixin.py | 52 ++++++++++++++----------- tests/e2e/driver_tests.py | 14 +++++++ tests/unit/test_thrift_backend.py | 23 +++++++++++ 3 files changed, 66 insertions(+), 23 deletions(-) diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py index d59e0a9fe..26264fd0a 100644 --- a/tests/e2e/common/large_queries_mixin.py +++ b/tests/e2e/common/large_queries_mixin.py @@ -49,11 +49,13 @@ def test_query_with_large_wide_result_set(self): # This is used by PyHive tests to determine the buffer size self.arraysize = 1000 with self.cursor() as cursor: - uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)]) - cursor.execute("SELECT id, {uuids} FROM RANGE({rows})".format(uuids=uuids, rows=rows)) - for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): - self.assertEqual(row[0], row_id) # Verify no rows are dropped in the middle. - self.assertEqual(len(row[1]), 36) + for lz4_compression in [False, True]: + cursor.setLZ4Compression(lz4_compression) + uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)]) + cursor.execute("SELECT id, {uuids} FROM RANGE({rows})".format(uuids=uuids, rows=rows)) + for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): + self.assertEqual(row[0], row_id) # Verify no rows are dropped in the middle. + self.assertEqual(len(row[1]), 36) def test_query_with_large_narrow_result_set(self): resultSize = 300 * 1000 * 1000 # 300 MB @@ -65,9 +67,11 @@ def test_query_with_large_narrow_result_set(self): # This is used by PyHive tests to determine the buffer size self.arraysize = 10000000 with self.cursor() as cursor: - cursor.execute("SELECT * FROM RANGE({rows})".format(rows=rows)) - for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): - self.assertEqual(row[0], row_id) + for lz4_compression in [False, True]: + cursor.setLZ4Compression(lz4_compression) + cursor.execute("SELECT * FROM RANGE({rows})".format(rows=rows)) + for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): + self.assertEqual(row[0], row_id) def test_long_running_query(self): """ Incrementally increase query size until it takes at least 5 minutes, @@ -80,21 +84,23 @@ def test_long_running_query(self): scale0 = 10000 scale_factor = 1 with self.cursor() as cursor: - while duration < min_duration: - self.assertLess(scale_factor, 512, msg="Detected infinite loop") - start = time.time() + for lz4_compression in [False, True]: + cursor.setLZ4Compression(lz4_compression) + while duration < min_duration: + self.assertLess(scale_factor, 512, msg="Detected infinite loop") + start = time.time() - cursor.execute("""SELECT count(*) - FROM RANGE({scale}) x - JOIN RANGE({scale0}) y - ON from_unixtime(x.id * y.id, "yyyy-MM-dd") LIKE "%not%a%date%" - """.format(scale=scale_factor * scale0, scale0=scale0)) + cursor.execute("""SELECT count(*) + FROM RANGE({scale}) x + JOIN RANGE({scale0}) y + ON from_unixtime(x.id * y.id, "yyyy-MM-dd") LIKE "%not%a%date%" + """.format(scale=scale_factor * scale0, scale0=scale0)) - n, = cursor.fetchone() - self.assertEqual(n, 0) + n, = cursor.fetchone() + self.assertEqual(n, 0) - duration = time.time() - start - current_fraction = duration / min_duration - print('Took {} s with scale factor={}'.format(duration, scale_factor)) - # Extrapolate linearly to reach 5 min and add 50% padding to push over the limit - scale_factor = math.ceil(1.5 * scale_factor / current_fraction) + duration = time.time() - start + current_fraction = duration / min_duration + print('Took {} s with scale factor={}'.format(duration, scale_factor)) + # Extrapolate linearly to reach 5 min and add 50% padding to push over the limit + scale_factor = math.ceil(1.5 * scale_factor / current_fraction) diff --git a/tests/e2e/driver_tests.py b/tests/e2e/driver_tests.py index 9e400770d..502a081ae 100644 --- a/tests/e2e/driver_tests.py +++ b/tests/e2e/driver_tests.py @@ -510,6 +510,20 @@ def test_timezone_with_timestamp(self): self.assertEqual(arrow_result_table.field(0).type, ts_type) self.assertEqual(arrow_result_value, expected.timestamp() * 1000000) + @skipUnless(pysql_supports_arrow(), 'arrow test needs arrow support') + def test_can_flip_compression(self): + with self.cursor() as cursor: + cursor.execute("SELECT array(1,2,3,4)") + cursor.fetchall() + lz4_compressed = cursor.active_result_set.lz4_compressed + #The endpoint should support compression + self.assertEqual(lz4_compressed, True) + cursor.setLZ4Compression(False) + cursor.execute("SELECT array(1,2,3,4)") + cursor.fetchall() + lz4_compressed = cursor.active_result_set.lz4_compressed + self.assertEqual(lz4_compressed, False) + def _should_have_native_complex_types(self): return pysql_has_version(">=", 2) and is_thrift_v5_plus(self.arguments) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 40614e434..358a72947 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -309,6 +309,29 @@ def test_handle_execute_response_checks_operation_state_in_direct_results(self): thrift_backend._handle_execute_response(t_execute_resp, Mock()) self.assertIn("some information about the error", str(cm.exception)) + def test_handle_execute_response_sets_compression_in_direct_results(self): + for resp_type in self.execute_response_types: + lz4Compressed=Mock() + resultSet=MagicMock() + resultSet.results.startRowOffset = 0 + t_execute_resp = resp_type( + status=Mock(), + operationHandle=Mock(), + directResults=ttypes.TSparkDirectResults( + operationStatus= Mock(), + resultSetMetadata=ttypes.TGetResultSetMetadataResp( + status=self.okay_status, + resultFormat=ttypes.TSparkRowSetType.ARROW_BASED_SET, + schema=MagicMock(), + arrowSchema=MagicMock(), + lz4Compressed=lz4Compressed), + resultSet=resultSet, + closeOperation=None)) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) + + execute_response = thrift_backend._handle_execute_response(t_execute_resp, Mock()) + self.assertEqual(execute_response.lz4_compressed, lz4Compressed) + @patch("databricks.sql.thrift_backend.TCLIService.Client") def test_handle_execute_response_checks_operation_state_in_polls(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value From 8168c71b2a6f427f1fab6e37fd9cf672ea88e401 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Wed, 28 Sep 2022 00:38:04 +0200 Subject: [PATCH 15/17] Correcting poetry.lock --- poetry.lock | 4 ---- 1 file changed, 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 85c28277c..f4e8f7477 100644 --- a/poetry.lock +++ b/poetry.lock @@ -697,10 +697,6 @@ python-dateutil = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] -python-dateutil = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] pytz = [ {file = "pytz-2022.2.1-py2.py3-none-any.whl", hash = "sha256:220f481bdafa09c3955dfbdddb7b57780e9a94f5127e35456a48589b9e0c0197"}, {file = "pytz-2022.2.1.tar.gz", hash = "sha256:cea221417204f2d1a2aa03ddae3e867921971d0d76f14d87abb4414415bbdcf5"}, From 8a8a1b538eeceb60bcb7e27524cf138e184626ed Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Wed, 28 Sep 2022 01:34:05 +0200 Subject: [PATCH 16/17] Moving lz4 compression in kwargs Signed-off-by: Mohit Singla --- poetry.lock | 229 +++++++++++++++++++----- src/databricks/sql/client.py | 10 +- tests/e2e/common/large_queries_mixin.py | 50 +++--- tests/e2e/driver_tests.py | 2 +- 4 files changed, 215 insertions(+), 76 deletions(-) diff --git a/poetry.lock b/poetry.lock index f4e8f7477..e0d197995 100644 --- a/poetry.lock +++ b/poetry.lock @@ -8,6 +8,7 @@ python-versions = ">=3.6.2" [package.dependencies] lazy-object-proxy = ">=1.4.0" +setuptools = ">=20.0" typed-ast = {version = ">=1.4.0,<2.0", markers = "implementation_name == \"cpython\" and python_version < \"3.8\""} typing-extensions = {version = ">=3.10", markers = "python_version < \"3.10\""} wrapt = ">=1.11,<2" @@ -140,19 +141,6 @@ category = "dev" optional = false python-versions = "*" -[[package]] -name = "lz4" -version = "4.0.2" -description = "LZ4 Bindings for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] -flake8 = ["flake8"] -tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] - [[package]] name = "isort" version = "5.10.1" @@ -175,6 +163,19 @@ category = "dev" optional = false python-versions = ">=3.6" +[[package]] +name = "lz4" +version = "4.0.2" +description = "LZ4 Bindings for Python" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] +flake8 = ["flake8"] +tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] + [[package]] name = "mccabe" version = "0.7.0" @@ -408,6 +409,19 @@ urllib3 = ">=1.21.1,<1.27" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "setuptools" +version = "65.4.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mock", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "six" version = "1.16.0" @@ -492,19 +506,45 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "f283eca35466a0294e09deb8535da2633219db696ad8bbc74dffd4592b0d66ad" +content-hash = "5de07f9b2c9a2f80ca0411f0f99b6b529b00b034f2ad13199cf29c862e125a57" [metadata.files] astroid = [ {file = "astroid-2.11.7-py3-none-any.whl", hash = "sha256:86b0a340a512c65abf4368b80252754cda17c02cdbbd3f587dddf98112233e7b"}, {file = "astroid-2.11.7.tar.gz", hash = "sha256:bb24615c77f4837c707669d16907331374ae8a964650a66999da3f5ca68dc946"}, ] -atomicwrites = [] +atomicwrites = [ + {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, +] attrs = [ {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, ] -black = [] +black = [ + {file = "black-22.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f586c26118bc6e714ec58c09df0157fe2d9ee195c764f630eb0d8e7ccce72e69"}, + {file = "black-22.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b270a168d69edb8b7ed32c193ef10fd27844e5c60852039599f9184460ce0807"}, + {file = "black-22.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6797f58943fceb1c461fb572edbe828d811e719c24e03375fd25170ada53825e"}, + {file = "black-22.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c85928b9d5f83b23cee7d0efcb310172412fbf7cb9d9ce963bd67fd141781def"}, + {file = "black-22.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:f6fe02afde060bbeef044af7996f335fbe90b039ccf3f5eb8f16df8b20f77666"}, + {file = "black-22.6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cfaf3895a9634e882bf9d2363fed5af8888802d670f58b279b0bece00e9a872d"}, + {file = "black-22.6.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94783f636bca89f11eb5d50437e8e17fbc6a929a628d82304c80fa9cd945f256"}, + {file = "black-22.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:2ea29072e954a4d55a2ff58971b83365eba5d3d357352a07a7a4df0d95f51c78"}, + {file = "black-22.6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e439798f819d49ba1c0bd9664427a05aab79bfba777a6db94fd4e56fae0cb849"}, + {file = "black-22.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187d96c5e713f441a5829e77120c269b6514418f4513a390b0499b0987f2ff1c"}, + {file = "black-22.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:074458dc2f6e0d3dab7928d4417bb6957bb834434516f21514138437accdbe90"}, + {file = "black-22.6.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a218d7e5856f91d20f04e931b6f16d15356db1c846ee55f01bac297a705ca24f"}, + {file = "black-22.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:568ac3c465b1c8b34b61cd7a4e349e93f91abf0f9371eda1cf87194663ab684e"}, + {file = "black-22.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6c1734ab264b8f7929cef8ae5f900b85d579e6cbfde09d7387da8f04771b51c6"}, + {file = "black-22.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9a3ac16efe9ec7d7381ddebcc022119794872abce99475345c5a61aa18c45ad"}, + {file = "black-22.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b9fd45787ba8aa3f5e0a0a98920c1012c884622c6c920dbe98dbd05bc7c70fbf"}, + {file = "black-22.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7ba9be198ecca5031cd78745780d65a3f75a34b2ff9be5837045dce55db83d1c"}, + {file = "black-22.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a3db5b6409b96d9bd543323b23ef32a1a2b06416d525d27e0f67e74f1446c8f2"}, + {file = "black-22.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:560558527e52ce8afba936fcce93a7411ab40c7d5fe8c2463e279e843c0328ee"}, + {file = "black-22.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b154e6bbde1e79ea3260c4b40c0b7b3109ffcdf7bc4ebf8859169a6af72cd70b"}, + {file = "black-22.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4af5bc0e1f96be5ae9bd7aaec219c901a94d6caa2484c21983d043371c733fc4"}, + {file = "black-22.6.0-py3-none-any.whl", hash = "sha256:ac609cf8ef5e7115ddd07d85d988d074ed00e10fbc3445aee393e70164a2219c"}, + {file = "black-22.6.0.tar.gz", hash = "sha256:6c6d39e28aed379aec40da1c65434c77d75e65bb59a1e1c283de545fb4e7c6c9"}, +] certifi = [ {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"}, {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"}, @@ -513,8 +553,14 @@ charset-normalizer = [ {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, ] -click = [] -colorama = [] +click = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] +colorama = [ + {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, + {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, +] dill = [ {file = "dill-0.3.5.1-py2.py3-none-any.whl", hash = "sha256:33501d03270bbe410c72639b350e941882a8b0fd55357580fbc873fba0c59302"}, {file = "dill-0.3.5.1.tar.gz", hash = "sha256:d75e41f3eff1eee599d738e76ba8f4ad98ea229db8b085318aa2b3333a208c86"}, @@ -523,8 +569,14 @@ idna = [ {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, ] -importlib-metadata = [] -iniconfig = [] +importlib-metadata = [ + {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"}, + {file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"}, +] +iniconfig = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] isort = [ {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"}, @@ -568,10 +620,6 @@ lazy-object-proxy = [ {file = "lazy_object_proxy-1.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:677ea950bef409b47e51e733283544ac3d660b709cfce7b187f5ace137960d61"}, {file = "lazy_object_proxy-1.7.1-pp37.pp38-none-any.whl", hash = "sha256:d66906d5785da8e0be7360912e99c9188b70f52c422f9fc18223347235691a84"}, ] -mccabe = [ - {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, - {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, -] lz4 = [ {file = "lz4-4.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3881573c3db902db370e072eb64b40c7c8289b94b2a731e051858cc198f890e8"}, {file = "lz4-4.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:154e6e9f58a7bafc4d2a1395160305b78fc82fa708bfa58cf0ad977c443d1f8f"}, @@ -594,7 +642,10 @@ lz4 = [ {file = "lz4-4.0.2-cp39-cp39-win32.whl", hash = "sha256:a8e02c2477bd704f43113ac8dd966c361187383591388818d74e1b73e4674759"}, {file = "lz4-4.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:ee73357412c5505f6ba0ea61ff71455e2e4c1e04d8e60f17f3cd937261d773fa"}, {file = "lz4-4.0.2.tar.gz", hash = "sha256:083b7172c2938412ae37c3a090250bfdd9e4a6e855442594f86c3608ed12729b"}, - +] +mccabe = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] mypy = [ {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"}, @@ -621,7 +672,10 @@ mypy = [ {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"}, {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"}, ] -mypy-extensions = [] +mypy-extensions = [ + {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, + {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, +] numpy = [ {file = "numpy-1.21.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50"}, {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a"}, @@ -652,13 +706,57 @@ numpy = [ {file = "numpy-1.21.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4"}, {file = "numpy-1.21.1.zip", hash = "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd"}, ] -oauthlib = [] -packaging = [] -pandas = [] -pathspec = [] -platformdirs = [] -pluggy = [] -py = [] +oauthlib = [ + {file = "oauthlib-3.2.0-py3-none-any.whl", hash = "sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe"}, + {file = "oauthlib-3.2.0.tar.gz", hash = "sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2"}, +] +packaging = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] +pandas = [ + {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:62d5b5ce965bae78f12c1c0df0d387899dd4211ec0bdc52822373f13a3a022b9"}, + {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adfeb11be2d54f275142c8ba9bf67acee771b7186a5745249c7d5a06c670136b"}, + {file = "pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a8c055d58873ad81cae290d974d13dd479b82cbb975c3e1fa2cf1920715296"}, + {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd541ab09e1f80a2a1760032d665f6e032d8e44055d602d65eeea6e6e85498cb"}, + {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2651d75b9a167cc8cc572cf787ab512d16e316ae00ba81874b560586fa1325e0"}, + {file = "pandas-1.3.5-cp310-cp310-win_amd64.whl", hash = "sha256:aaf183a615ad790801fa3cf2fa450e5b6d23a54684fe386f7e3208f8b9bfbef6"}, + {file = "pandas-1.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:344295811e67f8200de2390093aeb3c8309f5648951b684d8db7eee7d1c81fb7"}, + {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:552020bf83b7f9033b57cbae65589c01e7ef1544416122da0c79140c93288f56"}, + {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cce0c6bbeb266b0e39e35176ee615ce3585233092f685b6a82362523e59e5b4"}, + {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d28a3c65463fd0d0ba8bbb7696b23073efee0510783340a44b08f5e96ffce0c"}, + {file = "pandas-1.3.5-cp37-cp37m-win32.whl", hash = "sha256:a62949c626dd0ef7de11de34b44c6475db76995c2064e2d99c6498c3dba7fe58"}, + {file = "pandas-1.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8025750767e138320b15ca16d70d5cdc1886e8f9cc56652d89735c016cd8aea6"}, + {file = "pandas-1.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fe95bae4e2d579812865db2212bb733144e34d0c6785c0685329e5b60fcb85dd"}, + {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f261553a1e9c65b7a310302b9dbac31cf0049a51695c14ebe04e4bfd4a96f02"}, + {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6dbec5f3e6d5dc80dcfee250e0a2a652b3f28663492f7dab9a24416a48ac39"}, + {file = "pandas-1.3.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3bc49af96cd6285030a64779de5b3688633a07eb75c124b0747134a63f4c05f"}, + {file = "pandas-1.3.5-cp38-cp38-win32.whl", hash = "sha256:b6b87b2fb39e6383ca28e2829cddef1d9fc9e27e55ad91ca9c435572cdba51bf"}, + {file = "pandas-1.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:a395692046fd8ce1edb4c6295c35184ae0c2bbe787ecbe384251da609e27edcb"}, + {file = "pandas-1.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bd971a3f08b745a75a86c00b97f3007c2ea175951286cdda6abe543e687e5f2f"}, + {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37f06b59e5bc05711a518aa10beaec10942188dccb48918bb5ae602ccbc9f1a0"}, + {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c21778a688d3712d35710501f8001cdbf96eb70a7c587a3d5613573299fdca6"}, + {file = "pandas-1.3.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3345343206546545bc26a05b4602b6a24385b5ec7c75cb6059599e3d56831da2"}, + {file = "pandas-1.3.5-cp39-cp39-win32.whl", hash = "sha256:c69406a2808ba6cf580c2255bcf260b3f214d2664a3a4197d0e640f573b46fd3"}, + {file = "pandas-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:32e1a26d5ade11b547721a72f9bfc4bd113396947606e00d5b4a5b79b3dcb006"}, + {file = "pandas-1.3.5.tar.gz", hash = "sha256:1e4285f5de1012de20ca46b188ccf33521bff61ba5c5ebd78b4fb28e5416a9f1"}, +] +pathspec = [ + {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, + {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, +] +platformdirs = [ + {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, + {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +py = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] pyarrow = [ {file = "pyarrow-9.0.0-cp310-cp310-macosx_10_13_universal2.whl", hash = "sha256:767cafb14278165ad539a2918c14c1b73cf20689747c21375c38e3fe62884902"}, {file = "pyarrow-9.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0238998dc692efcb4e41ae74738d7c1234723271ccf520bd8312dca07d49ef8d"}, @@ -691,8 +789,14 @@ pylint = [ {file = "pylint-2.13.9-py3-none-any.whl", hash = "sha256:705c620d388035bdd9ff8b44c5bcdd235bfb49d276d488dd2c8ff1736aa42526"}, {file = "pylint-2.13.9.tar.gz", hash = "sha256:095567c96e19e6f57b5b907e67d265ff535e588fe26b12b5ebe1fc5645b2c731"}, ] -pyparsing = [] -pytest = [] +pyparsing = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] +pytest = [ + {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, + {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, +] python-dateutil = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, @@ -705,11 +809,51 @@ requests = [ {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, ] -six = [] -thrift = [] -tomli = [] -typed-ast = [] -typing-extensions = [] +setuptools = [ + {file = "setuptools-65.4.0-py3-none-any.whl", hash = "sha256:c2d2709550f15aab6c9110196ea312f468f41cd546bceb24127a1be6fdcaeeb1"}, + {file = "setuptools-65.4.0.tar.gz", hash = "sha256:a8f6e213b4b0661f590ccf40de95d28a177cd747d098624ad3f69c40287297e9"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +thrift = [ + {file = "thrift-0.13.0.tar.gz", hash = "sha256:9af1c86bf73433afc6010ed376a6c6aca2b54099cc0d61895f640870a9ae7d89"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +typed-ast = [ + {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"}, + {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"}, + {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"}, + {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"}, + {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"}, + {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"}, + {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"}, + {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"}, + {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"}, + {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"}, + {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"}, + {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"}, + {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"}, + {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"}, + {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"}, + {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"}, + {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"}, + {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"}, + {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"}, + {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"}, + {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"}, + {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"}, + {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"}, + {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"}, +] +typing-extensions = [ + {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"}, + {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"}, +] urllib3 = [ {file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"}, {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"}, @@ -780,4 +924,7 @@ wrapt = [ {file = "wrapt-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:dee60e1de1898bde3b238f18340eec6148986da0455d8ba7848d50470a7a32fb"}, {file = "wrapt-1.14.1.tar.gz", hash = "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d"}, ] -zipp = [] +zipp = [ + {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"}, + {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"}, +] diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 73762f7ee..936e4f4db 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -153,6 +153,7 @@ def read(self) -> Optional[OAuthToken]: self.host = server_hostname self.port = kwargs.get("_port", 443) self.disable_pandas = kwargs.get("_disable_pandas", False) + self.lz4_compression = kwargs.get("enable_query_result_lz4_compression", True) auth_provider = get_python_sql_connector_auth_provider( server_hostname, **kwargs @@ -208,7 +209,6 @@ def cursor( self, arraysize: int = DEFAULT_ARRAY_SIZE, buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES, - lz4_compression: bool = True, ) -> "Cursor": """ Return a new Cursor object using the connection. @@ -223,7 +223,6 @@ def cursor( self.thrift_backend, arraysize=arraysize, result_buffer_size_bytes=buffer_size_bytes, - lz4_compression=lz4_compression, ) self._cursors.append(cursor) return cursor @@ -254,7 +253,6 @@ def __init__( thrift_backend: ThriftBackend, result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES, arraysize: int = DEFAULT_ARRAY_SIZE, - lz4_compression: bool = True, ) -> None: """ These objects represent a database cursor, which is used to manage the context of a fetch @@ -268,7 +266,6 @@ def __init__( self.buffer_size_bytes = result_buffer_size_bytes self.active_result_set: Union[ResultSet, None] = None self.arraysize = arraysize - self.lz4_compression: bool = lz4_compression # Note that Cursor closed => active result set closed, but not vice versa self.open = True self.executing_command_id = None @@ -323,7 +320,7 @@ def execute( session_handle=self.connection._session_handle, max_rows=self.arraysize, max_bytes=self.buffer_size_bytes, - lz4_compression=self.lz4_compression, + lz4_compression=self.connection.lz4_compression, cursor=self, ) self.active_result_set = ResultSet( @@ -596,9 +593,6 @@ def setoutputsize(self, size, column=None): """Does nothing by default""" pass - def setLZ4Compression(self, lz4_compression): - self.lz4_compression = lz4_compression - class ResultSet: def __init__( diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py index 26264fd0a..3e1e45bc4 100644 --- a/tests/e2e/common/large_queries_mixin.py +++ b/tests/e2e/common/large_queries_mixin.py @@ -50,13 +50,15 @@ def test_query_with_large_wide_result_set(self): self.arraysize = 1000 with self.cursor() as cursor: for lz4_compression in [False, True]: - cursor.setLZ4Compression(lz4_compression) + cursor.connection.lz4_compression=lz4_compression uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)]) cursor.execute("SELECT id, {uuids} FROM RANGE({rows})".format(uuids=uuids, rows=rows)) + self.assertEqual(lz4_compression, cursor.active_result_set.lz4_compressed) for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): self.assertEqual(row[0], row_id) # Verify no rows are dropped in the middle. self.assertEqual(len(row[1]), 36) + def test_query_with_large_narrow_result_set(self): resultSize = 300 * 1000 * 1000 # 300 MB width = 8 # sizeof(long) @@ -67,11 +69,9 @@ def test_query_with_large_narrow_result_set(self): # This is used by PyHive tests to determine the buffer size self.arraysize = 10000000 with self.cursor() as cursor: - for lz4_compression in [False, True]: - cursor.setLZ4Compression(lz4_compression) - cursor.execute("SELECT * FROM RANGE({rows})".format(rows=rows)) - for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): - self.assertEqual(row[0], row_id) + cursor.execute("SELECT * FROM RANGE({rows})".format(rows=rows)) + for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): + self.assertEqual(row[0], row_id) def test_long_running_query(self): """ Incrementally increase query size until it takes at least 5 minutes, @@ -84,23 +84,21 @@ def test_long_running_query(self): scale0 = 10000 scale_factor = 1 with self.cursor() as cursor: - for lz4_compression in [False, True]: - cursor.setLZ4Compression(lz4_compression) - while duration < min_duration: - self.assertLess(scale_factor, 512, msg="Detected infinite loop") - start = time.time() - - cursor.execute("""SELECT count(*) - FROM RANGE({scale}) x - JOIN RANGE({scale0}) y - ON from_unixtime(x.id * y.id, "yyyy-MM-dd") LIKE "%not%a%date%" - """.format(scale=scale_factor * scale0, scale0=scale0)) - - n, = cursor.fetchone() - self.assertEqual(n, 0) - - duration = time.time() - start - current_fraction = duration / min_duration - print('Took {} s with scale factor={}'.format(duration, scale_factor)) - # Extrapolate linearly to reach 5 min and add 50% padding to push over the limit - scale_factor = math.ceil(1.5 * scale_factor / current_fraction) + while duration < min_duration: + self.assertLess(scale_factor, 512, msg="Detected infinite loop") + start = time.time() + + cursor.execute("""SELECT count(*) + FROM RANGE({scale}) x + JOIN RANGE({scale0}) y + ON from_unixtime(x.id * y.id, "yyyy-MM-dd") LIKE "%not%a%date%" + """.format(scale=scale_factor * scale0, scale0=scale0)) + + n, = cursor.fetchone() + self.assertEqual(n, 0) + + duration = time.time() - start + current_fraction = duration / min_duration + print('Took {} s with scale factor={}'.format(duration, scale_factor)) + # Extrapolate linearly to reach 5 min and add 50% padding to push over the limit + scale_factor = math.ceil(1.5 * scale_factor / current_fraction) diff --git a/tests/e2e/driver_tests.py b/tests/e2e/driver_tests.py index 502a081ae..deb4e7dd2 100644 --- a/tests/e2e/driver_tests.py +++ b/tests/e2e/driver_tests.py @@ -518,7 +518,7 @@ def test_can_flip_compression(self): lz4_compressed = cursor.active_result_set.lz4_compressed #The endpoint should support compression self.assertEqual(lz4_compressed, True) - cursor.setLZ4Compression(False) + cursor.connection.lz4_compression=False cursor.execute("SELECT array(1,2,3,4)") cursor.fetchall() lz4_compressed = cursor.active_result_set.lz4_compressed From 0b4312ffe4b85e479b4c5433da0d7cd5364de419 Mon Sep 17 00:00:00 2001 From: Mohit Singla Date: Thu, 13 Oct 2022 13:15:41 +0200 Subject: [PATCH 17/17] Nit Signed-off-by: Mohit Singla --- src/databricks/sql/client.py | 1 - src/databricks/sql/thrift_backend.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 936e4f4db..4f0332e69 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -1,5 +1,4 @@ from typing import Dict, Tuple, List, Optional, Any, Union -from xmlrpc.client import boolean import pandas import pyarrow diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 9e2d68fed..48d7c2012 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -732,9 +732,7 @@ def _results_message_to_execute_response(self, resp, operation_state): .serialize() .to_pybytes() ) - lz4_compressed = ( - t_result_set_metadata_resp and t_result_set_metadata_resp.lz4Compressed - ) + lz4_compressed = t_result_set_metadata_resp.lz4Compressed if direct_results and direct_results.resultSet: assert direct_results.resultSet.results.startRowOffset == 0 assert direct_results.resultSetMetadata