diff --git a/poetry.lock b/poetry.lock index 391bbcb57..e0d197995 100644 --- a/poetry.lock +++ b/poetry.lock @@ -8,6 +8,7 @@ python-versions = ">=3.6.2" [package.dependencies] lazy-object-proxy = ">=1.4.0" +setuptools = ">=20.0" typed-ast = {version = ">=1.4.0,<2.0", markers = "implementation_name == \"cpython\" and python_version < \"3.8\""} typing-extensions = {version = ">=3.10", markers = "python_version < \"3.10\""} wrapt = ">=1.11,<2" @@ -162,6 +163,19 @@ category = "dev" optional = false python-versions = ">=3.6" +[[package]] +name = "lz4" +version = "4.0.2" +description = "LZ4 Bindings for Python" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] +flake8 = ["flake8"] +tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] + [[package]] name = "mccabe" version = "0.7.0" @@ -395,6 +409,19 @@ urllib3 = ">=1.21.1,<1.27" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "setuptools" +version = "65.4.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mock", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "six" version = "1.16.0" @@ -479,19 +506,45 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "f283eca35466a0294e09deb8535da2633219db696ad8bbc74dffd4592b0d66ad" +content-hash = "5de07f9b2c9a2f80ca0411f0f99b6b529b00b034f2ad13199cf29c862e125a57" [metadata.files] astroid = [ {file = "astroid-2.11.7-py3-none-any.whl", hash = "sha256:86b0a340a512c65abf4368b80252754cda17c02cdbbd3f587dddf98112233e7b"}, {file = "astroid-2.11.7.tar.gz", hash = "sha256:bb24615c77f4837c707669d16907331374ae8a964650a66999da3f5ca68dc946"}, ] -atomicwrites = [] +atomicwrites = [ + {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, +] attrs = [ {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, ] -black = [] +black = [ + {file = "black-22.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f586c26118bc6e714ec58c09df0157fe2d9ee195c764f630eb0d8e7ccce72e69"}, + {file = "black-22.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b270a168d69edb8b7ed32c193ef10fd27844e5c60852039599f9184460ce0807"}, + {file = "black-22.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6797f58943fceb1c461fb572edbe828d811e719c24e03375fd25170ada53825e"}, + {file = "black-22.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c85928b9d5f83b23cee7d0efcb310172412fbf7cb9d9ce963bd67fd141781def"}, + {file = "black-22.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:f6fe02afde060bbeef044af7996f335fbe90b039ccf3f5eb8f16df8b20f77666"}, + {file = "black-22.6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cfaf3895a9634e882bf9d2363fed5af8888802d670f58b279b0bece00e9a872d"}, + {file = "black-22.6.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94783f636bca89f11eb5d50437e8e17fbc6a929a628d82304c80fa9cd945f256"}, + {file = "black-22.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:2ea29072e954a4d55a2ff58971b83365eba5d3d357352a07a7a4df0d95f51c78"}, + {file = "black-22.6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e439798f819d49ba1c0bd9664427a05aab79bfba777a6db94fd4e56fae0cb849"}, + {file = "black-22.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187d96c5e713f441a5829e77120c269b6514418f4513a390b0499b0987f2ff1c"}, + {file = "black-22.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:074458dc2f6e0d3dab7928d4417bb6957bb834434516f21514138437accdbe90"}, + {file = "black-22.6.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a218d7e5856f91d20f04e931b6f16d15356db1c846ee55f01bac297a705ca24f"}, + {file = "black-22.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:568ac3c465b1c8b34b61cd7a4e349e93f91abf0f9371eda1cf87194663ab684e"}, + {file = "black-22.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6c1734ab264b8f7929cef8ae5f900b85d579e6cbfde09d7387da8f04771b51c6"}, + {file = "black-22.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9a3ac16efe9ec7d7381ddebcc022119794872abce99475345c5a61aa18c45ad"}, + {file = "black-22.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b9fd45787ba8aa3f5e0a0a98920c1012c884622c6c920dbe98dbd05bc7c70fbf"}, + {file = "black-22.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7ba9be198ecca5031cd78745780d65a3f75a34b2ff9be5837045dce55db83d1c"}, + {file = "black-22.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a3db5b6409b96d9bd543323b23ef32a1a2b06416d525d27e0f67e74f1446c8f2"}, + {file = "black-22.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:560558527e52ce8afba936fcce93a7411ab40c7d5fe8c2463e279e843c0328ee"}, + {file = "black-22.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b154e6bbde1e79ea3260c4b40c0b7b3109ffcdf7bc4ebf8859169a6af72cd70b"}, + {file = "black-22.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4af5bc0e1f96be5ae9bd7aaec219c901a94d6caa2484c21983d043371c733fc4"}, + {file = "black-22.6.0-py3-none-any.whl", hash = "sha256:ac609cf8ef5e7115ddd07d85d988d074ed00e10fbc3445aee393e70164a2219c"}, + {file = "black-22.6.0.tar.gz", hash = "sha256:6c6d39e28aed379aec40da1c65434c77d75e65bb59a1e1c283de545fb4e7c6c9"}, +] certifi = [ {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"}, {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"}, @@ -500,8 +553,14 @@ charset-normalizer = [ {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, ] -click = [] -colorama = [] +click = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] +colorama = [ + {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, + {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, +] dill = [ {file = "dill-0.3.5.1-py2.py3-none-any.whl", hash = "sha256:33501d03270bbe410c72639b350e941882a8b0fd55357580fbc873fba0c59302"}, {file = "dill-0.3.5.1.tar.gz", hash = "sha256:d75e41f3eff1eee599d738e76ba8f4ad98ea229db8b085318aa2b3333a208c86"}, @@ -510,8 +569,14 @@ idna = [ {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, ] -importlib-metadata = [] -iniconfig = [] +importlib-metadata = [ + {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"}, + {file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"}, +] +iniconfig = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] isort = [ {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"}, @@ -555,6 +620,29 @@ lazy-object-proxy = [ {file = "lazy_object_proxy-1.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:677ea950bef409b47e51e733283544ac3d660b709cfce7b187f5ace137960d61"}, {file = "lazy_object_proxy-1.7.1-pp37.pp38-none-any.whl", hash = "sha256:d66906d5785da8e0be7360912e99c9188b70f52c422f9fc18223347235691a84"}, ] +lz4 = [ + {file = "lz4-4.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3881573c3db902db370e072eb64b40c7c8289b94b2a731e051858cc198f890e8"}, + {file = "lz4-4.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:154e6e9f58a7bafc4d2a1395160305b78fc82fa708bfa58cf0ad977c443d1f8f"}, + {file = "lz4-4.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4cfa82f26b4f1835c797bd70e5ce20d5f1ee897b9a0c53e62d607f9029f521ce"}, + {file = "lz4-4.0.2-cp310-cp310-win32.whl", hash = "sha256:fba1730cd2327a9d013192a9878714cc82f4877d2ada556222d03ea6428a80ed"}, + {file = "lz4-4.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:61dbcca64e8e1655e06b588356c4b2515bccc1d7e84065f858a685abd96f0cf2"}, + {file = "lz4-4.0.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:56ea660097fec87f0c6746146b316775037f8dd886a4c5915360e5b32b7112d0"}, + {file = "lz4-4.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed86ab22bfe1f4cd4fc983704134a8fdf746c1121a398f8f14cbd014c1a5b0ae"}, + {file = "lz4-4.0.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:345608de23b4d68fbdef373f1e53d6c5abd99a062d4ff922e3350f47775ab123"}, + {file = "lz4-4.0.2-cp37-cp37m-win32.whl", hash = "sha256:5fe9db7627674875e4279c2ed50b1e38fb91ec3093347f871ed996e58edbb488"}, + {file = "lz4-4.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:3fa0f000d8ce39e643e9e5c49fc4d1985156ffb177e3123a0f22551f5864841b"}, + {file = "lz4-4.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6f3b3670f52f0871885258bcbc746f483760434336f0bc5581f161cc5d4b0c9a"}, + {file = "lz4-4.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea2c2182a5b0ad03f33ac09db0925a1738a1d65751a3e058110bd900c643d359"}, + {file = "lz4-4.0.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:439898dd4176a724243002003c3f733eb6ce48a5988175f54c8560e0b100b7a6"}, + {file = "lz4-4.0.2-cp38-cp38-win32.whl", hash = "sha256:35e6caced0229b90151d31d9cf1eaa541e597f8021bf5b70ff9e6374e3e43b23"}, + {file = "lz4-4.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:1bd56282f6993e013ccf7f6edf1530c2a13d1662741e2be072349c7f70bc0682"}, + {file = "lz4-4.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1ed9a1875dc2a489f3b665d0211984689d0e76585e55650b044a64dbd2d22992"}, + {file = "lz4-4.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b18a6d6d9071c03dbf9e30bbe22e4476f24f1a4d73b1e975605ad3ce725e6c"}, + {file = "lz4-4.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9d141719d3cbb7933809642a61b68b8f595ddf85657016521756ddcf826b85cd"}, + {file = "lz4-4.0.2-cp39-cp39-win32.whl", hash = "sha256:a8e02c2477bd704f43113ac8dd966c361187383591388818d74e1b73e4674759"}, + {file = "lz4-4.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:ee73357412c5505f6ba0ea61ff71455e2e4c1e04d8e60f17f3cd937261d773fa"}, + {file = "lz4-4.0.2.tar.gz", hash = "sha256:083b7172c2938412ae37c3a090250bfdd9e4a6e855442594f86c3608ed12729b"}, +] mccabe = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, @@ -584,7 +672,10 @@ mypy = [ {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"}, {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"}, ] -mypy-extensions = [] +mypy-extensions = [ + {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, + {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, +] numpy = [ {file = "numpy-1.21.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50"}, {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a"}, @@ -615,13 +706,57 @@ numpy = [ {file = "numpy-1.21.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4"}, {file = "numpy-1.21.1.zip", hash = "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd"}, ] -oauthlib = [] -packaging = [] -pandas = [] -pathspec = [] -platformdirs = [] -pluggy = [] -py = [] +oauthlib = [ + {file = "oauthlib-3.2.0-py3-none-any.whl", hash = "sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe"}, + {file = "oauthlib-3.2.0.tar.gz", hash = "sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2"}, +] +packaging = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] +pandas = [ + {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:62d5b5ce965bae78f12c1c0df0d387899dd4211ec0bdc52822373f13a3a022b9"}, + {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adfeb11be2d54f275142c8ba9bf67acee771b7186a5745249c7d5a06c670136b"}, + {file = "pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a8c055d58873ad81cae290d974d13dd479b82cbb975c3e1fa2cf1920715296"}, + {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd541ab09e1f80a2a1760032d665f6e032d8e44055d602d65eeea6e6e85498cb"}, + {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2651d75b9a167cc8cc572cf787ab512d16e316ae00ba81874b560586fa1325e0"}, + {file = "pandas-1.3.5-cp310-cp310-win_amd64.whl", hash = "sha256:aaf183a615ad790801fa3cf2fa450e5b6d23a54684fe386f7e3208f8b9bfbef6"}, + {file = "pandas-1.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:344295811e67f8200de2390093aeb3c8309f5648951b684d8db7eee7d1c81fb7"}, + {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:552020bf83b7f9033b57cbae65589c01e7ef1544416122da0c79140c93288f56"}, + {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cce0c6bbeb266b0e39e35176ee615ce3585233092f685b6a82362523e59e5b4"}, + {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d28a3c65463fd0d0ba8bbb7696b23073efee0510783340a44b08f5e96ffce0c"}, + {file = "pandas-1.3.5-cp37-cp37m-win32.whl", hash = "sha256:a62949c626dd0ef7de11de34b44c6475db76995c2064e2d99c6498c3dba7fe58"}, + {file = "pandas-1.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8025750767e138320b15ca16d70d5cdc1886e8f9cc56652d89735c016cd8aea6"}, + {file = "pandas-1.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fe95bae4e2d579812865db2212bb733144e34d0c6785c0685329e5b60fcb85dd"}, + {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f261553a1e9c65b7a310302b9dbac31cf0049a51695c14ebe04e4bfd4a96f02"}, + {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6dbec5f3e6d5dc80dcfee250e0a2a652b3f28663492f7dab9a24416a48ac39"}, + {file = "pandas-1.3.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3bc49af96cd6285030a64779de5b3688633a07eb75c124b0747134a63f4c05f"}, + {file = "pandas-1.3.5-cp38-cp38-win32.whl", hash = "sha256:b6b87b2fb39e6383ca28e2829cddef1d9fc9e27e55ad91ca9c435572cdba51bf"}, + {file = "pandas-1.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:a395692046fd8ce1edb4c6295c35184ae0c2bbe787ecbe384251da609e27edcb"}, + {file = "pandas-1.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bd971a3f08b745a75a86c00b97f3007c2ea175951286cdda6abe543e687e5f2f"}, + {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37f06b59e5bc05711a518aa10beaec10942188dccb48918bb5ae602ccbc9f1a0"}, + {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c21778a688d3712d35710501f8001cdbf96eb70a7c587a3d5613573299fdca6"}, + {file = "pandas-1.3.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3345343206546545bc26a05b4602b6a24385b5ec7c75cb6059599e3d56831da2"}, + {file = "pandas-1.3.5-cp39-cp39-win32.whl", hash = "sha256:c69406a2808ba6cf580c2255bcf260b3f214d2664a3a4197d0e640f573b46fd3"}, + {file = "pandas-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:32e1a26d5ade11b547721a72f9bfc4bd113396947606e00d5b4a5b79b3dcb006"}, + {file = "pandas-1.3.5.tar.gz", hash = "sha256:1e4285f5de1012de20ca46b188ccf33521bff61ba5c5ebd78b4fb28e5416a9f1"}, +] +pathspec = [ + {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, + {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, +] +platformdirs = [ + {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, + {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +py = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] pyarrow = [ {file = "pyarrow-9.0.0-cp310-cp310-macosx_10_13_universal2.whl", hash = "sha256:767cafb14278165ad539a2918c14c1b73cf20689747c21375c38e3fe62884902"}, {file = "pyarrow-9.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0238998dc692efcb4e41ae74738d7c1234723271ccf520bd8312dca07d49ef8d"}, @@ -654,8 +789,14 @@ pylint = [ {file = "pylint-2.13.9-py3-none-any.whl", hash = "sha256:705c620d388035bdd9ff8b44c5bcdd235bfb49d276d488dd2c8ff1736aa42526"}, {file = "pylint-2.13.9.tar.gz", hash = "sha256:095567c96e19e6f57b5b907e67d265ff535e588fe26b12b5ebe1fc5645b2c731"}, ] -pyparsing = [] -pytest = [] +pyparsing = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] +pytest = [ + {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, + {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, +] python-dateutil = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, @@ -668,11 +809,51 @@ requests = [ {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, ] -six = [] -thrift = [] -tomli = [] -typed-ast = [] -typing-extensions = [] +setuptools = [ + {file = "setuptools-65.4.0-py3-none-any.whl", hash = "sha256:c2d2709550f15aab6c9110196ea312f468f41cd546bceb24127a1be6fdcaeeb1"}, + {file = "setuptools-65.4.0.tar.gz", hash = "sha256:a8f6e213b4b0661f590ccf40de95d28a177cd747d098624ad3f69c40287297e9"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +thrift = [ + {file = "thrift-0.13.0.tar.gz", hash = "sha256:9af1c86bf73433afc6010ed376a6c6aca2b54099cc0d61895f640870a9ae7d89"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +typed-ast = [ + {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"}, + {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"}, + {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"}, + {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"}, + {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"}, + {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"}, + {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"}, + {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"}, + {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"}, + {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"}, + {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"}, + {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"}, + {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"}, + {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"}, + {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"}, + {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"}, + {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"}, + {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"}, + {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"}, + {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"}, + {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"}, + {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"}, + {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"}, + {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"}, +] +typing-extensions = [ + {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"}, + {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"}, +] urllib3 = [ {file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"}, {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"}, @@ -743,4 +924,7 @@ wrapt = [ {file = "wrapt-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:dee60e1de1898bde3b238f18340eec6148986da0455d8ba7848d50470a7a32fb"}, {file = "wrapt-1.14.1.tar.gz", hash = "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d"}, ] -zipp = [] +zipp = [ + {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"}, + {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"}, +] diff --git a/pyproject.toml b/pyproject.toml index 04428e552..bfba04b90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ python = "^3.7.1" thrift = "^0.13.0" pandas = "^1.3.0" pyarrow = "^9.0.0" +lz4 = "^4.0.2" requests=">2.18.1" oauthlib=">=3.1.0" diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index e3190d457..4f0332e69 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -152,6 +152,7 @@ def read(self) -> Optional[OAuthToken]: self.host = server_hostname self.port = kwargs.get("_port", 443) self.disable_pandas = kwargs.get("_disable_pandas", False) + self.lz4_compression = kwargs.get("enable_query_result_lz4_compression", True) auth_provider = get_python_sql_connector_auth_provider( server_hostname, **kwargs @@ -318,6 +319,7 @@ def execute( session_handle=self.connection._session_handle, max_rows=self.arraysize, max_bytes=self.buffer_size_bytes, + lz4_compression=self.connection.lz4_compression, cursor=self, ) self.active_result_set = ResultSet( @@ -614,6 +616,7 @@ def __init__( self.has_been_closed_server_side = execute_response.has_been_closed_server_side self.has_more_rows = execute_response.has_more_rows self.buffer_size_bytes = result_buffer_size_bytes + self.lz4_compressed = execute_response.lz4_compressed self.arraysize = arraysize self.thrift_backend = thrift_backend self.description = execute_response.description @@ -642,6 +645,7 @@ def _fill_results_buffer(self): max_rows=self.arraysize, max_bytes=self.buffer_size_bytes, expected_row_start_offset=self._next_row_index, + lz4_compressed=self.lz4_compressed, arrow_schema_bytes=self._arrow_schema_bytes, description=self.description, ) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index b5e29f394..48d7c2012 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -4,6 +4,7 @@ import math import time import threading +import lz4.frame from ssl import CERT_NONE, CERT_REQUIRED, create_default_context import pyarrow @@ -451,7 +452,7 @@ def open_session(self, session_configuration, catalog, schema): initial_namespace = None open_session_req = ttypes.TOpenSessionReq( - client_protocol_i64=ttypes.TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V5, + client_protocol_i64=ttypes.TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V6, client_protocol=None, initialNamespace=initial_namespace, canUseMultipleCatalogs=True, @@ -507,7 +508,7 @@ def _poll_for_status(self, op_handle): ) return self.make_request(self._client.GetOperationStatus, req) - def _create_arrow_table(self, t_row_set, schema_bytes, description): + def _create_arrow_table(self, t_row_set, lz4_compressed, schema_bytes, description): if t_row_set.columns is not None: ( arrow_table, @@ -520,7 +521,7 @@ def _create_arrow_table(self, t_row_set, schema_bytes, description): arrow_table, num_rows, ) = ThriftBackend._convert_arrow_based_set_to_arrow_table( - t_row_set.arrowBatches, schema_bytes + t_row_set.arrowBatches, lz4_compressed, schema_bytes ) else: raise OperationalError("Unsupported TRowSet instance {}".format(t_row_set)) @@ -545,13 +546,20 @@ def _convert_decimals_in_arrow_table(table, description): return table @staticmethod - def _convert_arrow_based_set_to_arrow_table(arrow_batches, schema_bytes): + def _convert_arrow_based_set_to_arrow_table( + arrow_batches, lz4_compressed, schema_bytes + ): ba = bytearray() ba += schema_bytes n_rows = 0 - for arrow_batch in arrow_batches: - n_rows += arrow_batch.rowCount - ba += arrow_batch.batch + if lz4_compressed: + for arrow_batch in arrow_batches: + n_rows += arrow_batch.rowCount + ba += lz4.frame.decompress(arrow_batch.batch) + else: + for arrow_batch in arrow_batches: + n_rows += arrow_batch.rowCount + ba += arrow_batch.batch arrow_table = pyarrow.ipc.open_stream(ba).read_all() return arrow_table, n_rows @@ -708,7 +716,6 @@ def _results_message_to_execute_response(self, resp, operation_state): ] ) ) - direct_results = resp.directResults has_been_closed_server_side = direct_results and direct_results.closeOperation has_more_rows = ( @@ -725,12 +732,16 @@ def _results_message_to_execute_response(self, resp, operation_state): .serialize() .to_pybytes() ) - + lz4_compressed = t_result_set_metadata_resp.lz4Compressed if direct_results and direct_results.resultSet: assert direct_results.resultSet.results.startRowOffset == 0 assert direct_results.resultSetMetadata + arrow_results, n_rows = self._create_arrow_table( - direct_results.resultSet.results, schema_bytes, description + direct_results.resultSet.results, + lz4_compressed, + schema_bytes, + description, ) arrow_queue_opt = ArrowQueue(arrow_results, n_rows, 0) else: @@ -740,6 +751,7 @@ def _results_message_to_execute_response(self, resp, operation_state): status=operation_state, has_been_closed_server_side=has_been_closed_server_side, has_more_rows=has_more_rows, + lz4_compressed=lz4_compressed, command_handle=resp.operationHandle, description=description, arrow_schema_bytes=schema_bytes, @@ -783,7 +795,9 @@ def _check_direct_results_for_error(t_spark_direct_results): t_spark_direct_results.closeOperation ) - def execute_command(self, operation, session_handle, max_rows, max_bytes, cursor): + def execute_command( + self, operation, session_handle, max_rows, max_bytes, lz4_compression, cursor + ): assert session_handle is not None spark_arrow_types = ttypes.TSparkArrowTypes( @@ -802,7 +816,7 @@ def execute_command(self, operation, session_handle, max_rows, max_bytes, cursor maxRows=max_rows, maxBytes=max_bytes ), canReadArrowResult=True, - canDecompressLZ4Result=False, + canDecompressLZ4Result=lz4_compression, canDownloadResult=False, confOverlay={ # We want to receive proper Timestamp arrow types. @@ -916,6 +930,7 @@ def fetch_results( max_rows, max_bytes, expected_row_start_offset, + lz4_compressed, arrow_schema_bytes, description, ): @@ -941,7 +956,7 @@ def fetch_results( ) ) arrow_results, n_rows = self._create_arrow_table( - resp.results, arrow_schema_bytes, description + resp.results, lz4_compressed, arrow_schema_bytes, description ) arrow_queue = ArrowQueue(arrow_results, n_rows) diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py index 2961a1f59..410c7144e 100644 --- a/src/databricks/sql/utils.py +++ b/src/databricks/sql/utils.py @@ -40,7 +40,7 @@ def remaining_rows(self) -> pyarrow.Table: ExecuteResponse = namedtuple( "ExecuteResponse", - "status has_been_closed_server_side has_more_rows description " + "status has_been_closed_server_side has_more_rows description lz4_compressed " "command_handle arrow_queue arrow_schema_bytes", ) diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py index d59e0a9fe..3e1e45bc4 100644 --- a/tests/e2e/common/large_queries_mixin.py +++ b/tests/e2e/common/large_queries_mixin.py @@ -49,11 +49,15 @@ def test_query_with_large_wide_result_set(self): # This is used by PyHive tests to determine the buffer size self.arraysize = 1000 with self.cursor() as cursor: - uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)]) - cursor.execute("SELECT id, {uuids} FROM RANGE({rows})".format(uuids=uuids, rows=rows)) - for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): - self.assertEqual(row[0], row_id) # Verify no rows are dropped in the middle. - self.assertEqual(len(row[1]), 36) + for lz4_compression in [False, True]: + cursor.connection.lz4_compression=lz4_compression + uuids = ", ".join(["uuid() uuid{}".format(i) for i in range(cols)]) + cursor.execute("SELECT id, {uuids} FROM RANGE({rows})".format(uuids=uuids, rows=rows)) + self.assertEqual(lz4_compression, cursor.active_result_set.lz4_compressed) + for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): + self.assertEqual(row[0], row_id) # Verify no rows are dropped in the middle. + self.assertEqual(len(row[1]), 36) + def test_query_with_large_narrow_result_set(self): resultSize = 300 * 1000 * 1000 # 300 MB @@ -85,10 +89,10 @@ def test_long_running_query(self): start = time.time() cursor.execute("""SELECT count(*) - FROM RANGE({scale}) x - JOIN RANGE({scale0}) y - ON from_unixtime(x.id * y.id, "yyyy-MM-dd") LIKE "%not%a%date%" - """.format(scale=scale_factor * scale0, scale0=scale0)) + FROM RANGE({scale}) x + JOIN RANGE({scale0}) y + ON from_unixtime(x.id * y.id, "yyyy-MM-dd") LIKE "%not%a%date%" + """.format(scale=scale_factor * scale0, scale0=scale0)) n, = cursor.fetchone() self.assertEqual(n, 0) diff --git a/tests/e2e/driver_tests.py b/tests/e2e/driver_tests.py index 9e400770d..deb4e7dd2 100644 --- a/tests/e2e/driver_tests.py +++ b/tests/e2e/driver_tests.py @@ -510,6 +510,20 @@ def test_timezone_with_timestamp(self): self.assertEqual(arrow_result_table.field(0).type, ts_type) self.assertEqual(arrow_result_value, expected.timestamp() * 1000000) + @skipUnless(pysql_supports_arrow(), 'arrow test needs arrow support') + def test_can_flip_compression(self): + with self.cursor() as cursor: + cursor.execute("SELECT array(1,2,3,4)") + cursor.fetchall() + lz4_compressed = cursor.active_result_set.lz4_compressed + #The endpoint should support compression + self.assertEqual(lz4_compressed, True) + cursor.connection.lz4_compression=False + cursor.execute("SELECT array(1,2,3,4)") + cursor.fetchall() + lz4_compressed = cursor.active_result_set.lz4_compressed + self.assertEqual(lz4_compressed, False) + def _should_have_native_complex_types(self): return pysql_has_version(">=", 2) and is_thrift_v5_plus(self.arguments) diff --git a/tests/unit/test_fetches.py b/tests/unit/test_fetches.py index a4308d57a..33fca0751 100644 --- a/tests/unit/test_fetches.py +++ b/tests/unit/test_fetches.py @@ -38,6 +38,7 @@ def make_dummy_result_set_from_initial_results(initial_results): has_been_closed_server_side=True, has_more_rows=False, description=Mock(), + lz4_compressed=Mock(), command_handle=None, arrow_queue=arrow_queue, arrow_schema_bytes=schema.serialize().to_pybytes())) @@ -50,7 +51,7 @@ def make_dummy_result_set_from_initial_results(initial_results): def make_dummy_result_set_from_batch_list(batch_list): batch_index = 0 - def fetch_results(op_handle, max_rows, max_bytes, expected_row_start_offset, + def fetch_results(op_handle, max_rows, max_bytes, expected_row_start_offset, lz4_compressed, arrow_schema_bytes, description): nonlocal batch_index results = FetchTests.make_arrow_queue(batch_list[batch_index]) @@ -71,6 +72,7 @@ def fetch_results(op_handle, max_rows, max_bytes, expected_row_start_offset, has_more_rows=True, description=[(f'col{col_id}', 'integer', None, None, None, None, None) for col_id in range(num_cols)], + lz4_compressed=Mock(), command_handle=None, arrow_queue=None, arrow_schema_bytes=None)) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 488de5842..78181ef8e 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -309,6 +309,29 @@ def test_handle_execute_response_checks_operation_state_in_direct_results(self): thrift_backend._handle_execute_response(t_execute_resp, Mock()) self.assertIn("some information about the error", str(cm.exception)) + def test_handle_execute_response_sets_compression_in_direct_results(self): + for resp_type in self.execute_response_types: + lz4Compressed=Mock() + resultSet=MagicMock() + resultSet.results.startRowOffset = 0 + t_execute_resp = resp_type( + status=Mock(), + operationHandle=Mock(), + directResults=ttypes.TSparkDirectResults( + operationStatus= Mock(), + resultSetMetadata=ttypes.TGetResultSetMetadataResp( + status=self.okay_status, + resultFormat=ttypes.TSparkRowSetType.ARROW_BASED_SET, + schema=MagicMock(), + arrowSchema=MagicMock(), + lz4Compressed=lz4Compressed), + resultSet=resultSet, + closeOperation=None)) + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) + + execute_response = thrift_backend._handle_execute_response(t_execute_resp, Mock()) + self.assertEqual(execute_response.lz4_compressed, lz4Compressed) + @patch("databricks.sql.thrift_backend.TCLIService.Client") def test_handle_execute_response_checks_operation_state_in_polls(self, tcli_service_class): tcli_service_instance = tcli_service_class.return_value @@ -358,7 +381,7 @@ def test_get_status_uses_display_message_if_available(self, tcli_service_class): thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError) as cm: - thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock()) + thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock(), Mock()) self.assertEqual(display_message, str(cm.exception)) self.assertIn(diagnostic_info, str(cm.exception.message_with_context())) @@ -388,7 +411,7 @@ def test_direct_results_uses_display_message_if_available(self, tcli_service_cla thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(DatabaseError) as cm: - thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock()) + thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock(), Mock()) self.assertEqual(display_message, str(cm.exception)) self.assertIn(diagnostic_info, str(cm.exception.message_with_context())) @@ -618,6 +641,7 @@ def test_handle_execute_response_reads_has_more_rows_in_result_response( max_rows=1, max_bytes=1, expected_row_start_offset=0, + lz4_compressed=False, arrow_schema_bytes=Mock(), description=Mock()) @@ -650,6 +674,7 @@ def test_arrow_batches_row_count_are_respected(self, tcli_service_class): max_rows=1, max_bytes=1, expected_row_start_offset=0, + lz4_compressed=False, arrow_schema_bytes=schema, description=MagicMock()) @@ -664,7 +689,7 @@ def test_execute_statement_calls_client_and_handle_execute_response(self, tcli_s thrift_backend._handle_execute_response = Mock() cursor_mock = Mock() - thrift_backend.execute_command("foo", Mock(), 100, 200, cursor_mock) + thrift_backend.execute_command("foo", Mock(), 100, 200, Mock(), cursor_mock) # Check call to client req = tcli_service_instance.ExecuteStatement.call_args[0][0] get_direct_results = ttypes.TSparkGetDirectResults(maxRows=100, maxBytes=200) @@ -823,14 +848,14 @@ def test_non_arrow_non_column_based_set_triggers_exception(self, tcli_service_cl thrift_backend = self._make_fake_thrift_backend() with self.assertRaises(OperationalError) as cm: - thrift_backend.execute_command("foo", Mock(), 100, 100, Mock()) + thrift_backend.execute_command("foo", Mock(), 100, 100, Mock(), Mock()) self.assertIn("Expected results to be in Arrow or column based format", str(cm.exception)) def test_create_arrow_table_raises_error_for_unsupported_type(self): t_row_set = ttypes.TRowSet() thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) with self.assertRaises(OperationalError): - thrift_backend._create_arrow_table(t_row_set, None, Mock()) + thrift_backend._create_arrow_table(t_row_set, Mock(), None, Mock()) @patch.object(ThriftBackend, "_convert_arrow_based_set_to_arrow_table") @patch.object(ThriftBackend, "_convert_column_based_set_to_arrow_table") @@ -843,16 +868,36 @@ def test_create_arrow_table_calls_correct_conversion_method(self, convert_col_mo schema = Mock() cols = Mock() arrow_batches = Mock() + lz4_compressed = Mock() description = Mock() t_col_set = ttypes.TRowSet(columns=cols) - thrift_backend._create_arrow_table(t_col_set, schema, description) + thrift_backend._create_arrow_table(t_col_set, lz4_compressed, schema, description) convert_arrow_mock.assert_not_called() convert_col_mock.assert_called_once_with(cols, description) t_arrow_set = ttypes.TRowSet(arrowBatches=arrow_batches) - thrift_backend._create_arrow_table(t_arrow_set, schema, Mock()) - convert_arrow_mock.assert_called_once_with(arrow_batches, schema) + thrift_backend._create_arrow_table(t_arrow_set, lz4_compressed, schema, Mock()) + convert_arrow_mock.assert_called_once_with(arrow_batches, lz4_compressed, schema) + + @patch("lz4.frame.decompress") + @patch("pyarrow.ipc.open_stream") + def test_convert_arrow_based_set_to_arrow_table(self, open_stream_mock, lz4_decompress_mock): + thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider()) + + lz4_decompress_mock.return_value = bytearray('Testing','utf-8') + + schema = pyarrow.schema([ + pyarrow.field("column1", pyarrow.int32()), + ]).serialize().to_pybytes() + + arrow_batches = [ttypes.TSparkArrowBatch(batch=bytearray('Testing','utf-8'), rowCount=1) for _ in range(10)] + thrift_backend._convert_arrow_based_set_to_arrow_table(arrow_batches, False, schema) + lz4_decompress_mock.assert_not_called() + + thrift_backend._convert_arrow_based_set_to_arrow_table(arrow_batches, True, schema) + lz4_decompress_mock.assert_called() + def test_convert_column_based_set_to_arrow_table_without_nulls(self): # Deliberately duplicate the column name to check that dups work @@ -1329,8 +1374,7 @@ def test_execute_command_sets_complex_type_fields_correctly(self, mock_handle_ex complex_arg_types["_use_arrow_native_decimals"] = decimals thrift_backend = ThriftBackend("foobar", 443, "path", [], auth_provider=AuthProvider(), **complex_arg_types) - thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock()) - + thrift_backend.execute_command(Mock(), Mock(), 100, 100, Mock(), Mock()) t_execute_statement_req = tcli_service_instance.ExecuteStatement.call_args[0][0] # If the value is unset, the native type should default to True self.assertEqual(t_execute_statement_req.useArrowNativeTypes.timestampAsArrow,