From 946e6c71fb44d61c1b8d81831df437bbcc2a465e Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Fri, 1 Oct 2021 13:25:31 -0400 Subject: [PATCH 1/6] added a getAsZip and putAsZip function for both directories and single files; with tests --- Algorithmia/datafile.py | 30 ++++++++++++++++++- Test/datafile_test.py | 17 +++++++++++ Test/resources/zip_directory/root.json | 1 + .../zip_directory/subdirectory/__init__.py | 3 ++ .../zip_directory/subdirectory/subdir.json | 1 + 5 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 Test/resources/zip_directory/root.json create mode 100644 Test/resources/zip_directory/subdirectory/__init__.py create mode 100644 Test/resources/zip_directory/subdirectory/subdir.json diff --git a/Algorithmia/datafile.py b/Algorithmia/datafile.py index 24936d8..7ed6cc5 100644 --- a/Algorithmia/datafile.py +++ b/Algorithmia/datafile.py @@ -7,6 +7,8 @@ from datetime import datetime import os.path import pkgutil +import shutil +import zipfile from Algorithmia.util import getParentAndBase from Algorithmia.data import DataObject, DataObjectType @@ -50,6 +52,18 @@ def getFile(self, as_path=False): else: return open(f.name) + def getAsZip(self): + local_file_path = self.getFile(as_path=True) + directory_path = tempfile.mkdtemp() + with zipfile.ZipFile(local_file_path, 'r') as ziph: + ziph.extractall(directory_path) + if len(ziph.namelist()) > 1: + output_path = directory_path + else: + filename = ziph.namelist()[0] + output_path = os.path.join(directory_path, filename) + return output_path + def getName(self): _, name = getParentAndBase(self.path) return name @@ -145,6 +159,20 @@ def putNumpy(self, array): else: raise DataApiError("Attempted to .putNumpy() a file without numpy available, please install numpy.") + def putAsZip(self, path): + temp = tempfile.NamedTemporaryFile(delete=False).name + if os.path.isdir(path): + with zipfile.ZipFile(temp, 'w') as ziph: + for root, dirs, files in os.walk(path): + for file in files: + f_path = os.path.join(root, file) + arc_path = os.path.relpath(os.path.join(root, file), path) + ziph.write(f_path, arc_path) + else: + with zipfile.ZipFile(temp, 'w') as ziph: + ziph.write(path) + return self.putFile(temp) + def delete(self): # Delete from data api result = self.client.deleteHelper(self.url) @@ -256,7 +284,7 @@ def __del__(self): filepath = self.local_file.name self.local_file.close() if self.cleanup: - os.remove(filepath) + os.remove(filepath) def readable(self): return True diff --git a/Test/datafile_test.py b/Test/datafile_test.py index 7619442..4085893 100644 --- a/Test/datafile_test.py +++ b/Test/datafile_test.py @@ -9,6 +9,7 @@ import Algorithmia import json from Algorithmia.datafile import DataFile, LocalDataFile, AdvancedDataFile +from pathlib import Path class DataFileTest(unittest.TestCase): def setUp(self): @@ -151,5 +152,21 @@ def test_putJson_getJson(self): self.assertDictEqual(result, payload) self.assertEqual(str(result), str(payload)) + def test_putZipDir_getZipDir(self): + local_directory = os.path.join(os.getcwd(), "resources/zip_directory") + remote_directory = "data://.my/empty/datafile.zip" + df = AdvancedDataFile(self.client, remote_directory, cleanup=True) + response = df.putAsZip(local_directory) + self.assertEqual(response, df) + + unzipped_local_path = df.getAsZip() + self.assertTrue(os.path.isdir(unzipped_local_path)) + found_files = [] + for _, _, files in os.walk(unzipped_local_path): + for file in files: + found_files.append(file) + self.assertEqual(len(found_files), 3) + + if __name__ == '__main__': unittest.main() diff --git a/Test/resources/zip_directory/root.json b/Test/resources/zip_directory/root.json new file mode 100644 index 0000000..5eed32d --- /dev/null +++ b/Test/resources/zip_directory/root.json @@ -0,0 +1 @@ +{"location": "root"} \ No newline at end of file diff --git a/Test/resources/zip_directory/subdirectory/__init__.py b/Test/resources/zip_directory/subdirectory/__init__.py new file mode 100644 index 0000000..b1a5ec6 --- /dev/null +++ b/Test/resources/zip_directory/subdirectory/__init__.py @@ -0,0 +1,3 @@ +from .build_wait import get_build +from .publish_algo import publish_algo +from .test_algo import test_algo diff --git a/Test/resources/zip_directory/subdirectory/subdir.json b/Test/resources/zip_directory/subdirectory/subdir.json new file mode 100644 index 0000000..aab19d7 --- /dev/null +++ b/Test/resources/zip_directory/subdirectory/subdir.json @@ -0,0 +1 @@ +{"foo": "bar"} \ No newline at end of file From 45bc1c70082f2ec65f283924d56bf249081b957e Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Fri, 1 Oct 2021 13:29:57 -0400 Subject: [PATCH 2/6] correct relative path import issue --- Test/datafile_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Test/datafile_test.py b/Test/datafile_test.py index 4085893..a39116b 100644 --- a/Test/datafile_test.py +++ b/Test/datafile_test.py @@ -153,7 +153,7 @@ def test_putJson_getJson(self): self.assertEqual(str(result), str(payload)) def test_putZipDir_getZipDir(self): - local_directory = os.path.join(os.getcwd(), "resources/zip_directory") + local_directory = os.path.join(os.getcwd(), "Test/resources/zip_directory") remote_directory = "data://.my/empty/datafile.zip" df = AdvancedDataFile(self.client, remote_directory, cleanup=True) response = df.putAsZip(local_directory) From 0b0c3985457ea2e93701dfca3c068efa99aa4f7e Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Fri, 1 Oct 2021 13:36:49 -0400 Subject: [PATCH 3/6] removed unnecessary imports, python 2.7+ support --- Algorithmia/datafile.py | 1 - Test/datafile_test.py | 1 - 2 files changed, 2 deletions(-) diff --git a/Algorithmia/datafile.py b/Algorithmia/datafile.py index 7ed6cc5..efc3a35 100644 --- a/Algorithmia/datafile.py +++ b/Algorithmia/datafile.py @@ -7,7 +7,6 @@ from datetime import datetime import os.path import pkgutil -import shutil import zipfile from Algorithmia.util import getParentAndBase diff --git a/Test/datafile_test.py b/Test/datafile_test.py index a39116b..38a6746 100644 --- a/Test/datafile_test.py +++ b/Test/datafile_test.py @@ -9,7 +9,6 @@ import Algorithmia import json from Algorithmia.datafile import DataFile, LocalDataFile, AdvancedDataFile -from pathlib import Path class DataFileTest(unittest.TestCase): def setUp(self): From be67da0855e230c1e79f362799693f3e424b9ea0 Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Sat, 2 Oct 2021 04:54:11 -0700 Subject: [PATCH 4/6] good doc string Co-authored-by: lemonez <36384768+lemonez@users.noreply.github.com> --- Algorithmia/datafile.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Algorithmia/datafile.py b/Algorithmia/datafile.py index efc3a35..0afb174 100644 --- a/Algorithmia/datafile.py +++ b/Algorithmia/datafile.py @@ -159,6 +159,10 @@ def putNumpy(self, array): raise DataApiError("Attempted to .putNumpy() a file without numpy available, please install numpy.") def putAsZip(self, path): + """Zip file/directory and upload to data API location defined by `DataFile` object. + + Accepts either a single file or a directory containing other files and directories. + """ temp = tempfile.NamedTemporaryFile(delete=False).name if os.path.isdir(path): with zipfile.ZipFile(temp, 'w') as ziph: From 4bdcf7e62c1877d5b7e5a0572693208e3b601ccb Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Sat, 2 Oct 2021 04:54:23 -0700 Subject: [PATCH 5/6] good doc string Co-authored-by: lemonez <36384768+lemonez@users.noreply.github.com> --- Algorithmia/datafile.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Algorithmia/datafile.py b/Algorithmia/datafile.py index 0afb174..fcf1fae 100644 --- a/Algorithmia/datafile.py +++ b/Algorithmia/datafile.py @@ -52,6 +52,11 @@ def getFile(self, as_path=False): return open(f.name) def getAsZip(self): + """Download/decompress file/directory and return path to file/directory. + + Expects the `DataFile` object to point to a zip-compatible data API location. + Either returns the directory or a path to the file, depending on whether a directory or file was zipped. + """ local_file_path = self.getFile(as_path=True) directory_path = tempfile.mkdtemp() with zipfile.ZipFile(local_file_path, 'r') as ziph: From 3887d718233d9e11dccedb80e9dabc9f697cb172 Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Mon, 4 Oct 2021 12:31:49 -0700 Subject: [PATCH 6/6] Updated docstrings --- Algorithmia/datafile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Algorithmia/datafile.py b/Algorithmia/datafile.py index fcf1fae..bb786e0 100644 --- a/Algorithmia/datafile.py +++ b/Algorithmia/datafile.py @@ -54,7 +54,7 @@ def getFile(self, as_path=False): def getAsZip(self): """Download/decompress file/directory and return path to file/directory. - Expects the `DataFile` object to point to a zip-compatible data API location. + Expects the `DataFile` object to contain a data API path pointing to a file/directory compressed with a zip-based compression algorithm. Either returns the directory or a path to the file, depending on whether a directory or file was zipped. """ local_file_path = self.getFile(as_path=True)