diff --git a/Algorithmia/datafile.py b/Algorithmia/datafile.py index 24936d8..bb786e0 100644 --- a/Algorithmia/datafile.py +++ b/Algorithmia/datafile.py @@ -7,6 +7,7 @@ from datetime import datetime import os.path import pkgutil +import zipfile from Algorithmia.util import getParentAndBase from Algorithmia.data import DataObject, DataObjectType @@ -50,6 +51,23 @@ def getFile(self, as_path=False): else: return open(f.name) + def getAsZip(self): + """Download/decompress file/directory and return path to file/directory. + + Expects the `DataFile` object to contain a data API path pointing to a file/directory compressed with a zip-based compression algorithm. + Either returns the directory or a path to the file, depending on whether a directory or file was zipped. + """ + local_file_path = self.getFile(as_path=True) + directory_path = tempfile.mkdtemp() + with zipfile.ZipFile(local_file_path, 'r') as ziph: + ziph.extractall(directory_path) + if len(ziph.namelist()) > 1: + output_path = directory_path + else: + filename = ziph.namelist()[0] + output_path = os.path.join(directory_path, filename) + return output_path + def getName(self): _, name = getParentAndBase(self.path) return name @@ -145,6 +163,24 @@ def putNumpy(self, array): else: raise DataApiError("Attempted to .putNumpy() a file without numpy available, please install numpy.") + def putAsZip(self, path): + """Zip file/directory and upload to data API location defined by `DataFile` object. + + Accepts either a single file or a directory containing other files and directories. + """ + temp = tempfile.NamedTemporaryFile(delete=False).name + if os.path.isdir(path): + with zipfile.ZipFile(temp, 'w') as ziph: + for root, dirs, files in os.walk(path): + for file in files: + f_path = os.path.join(root, file) + arc_path = os.path.relpath(os.path.join(root, file), path) + ziph.write(f_path, arc_path) + else: + with zipfile.ZipFile(temp, 'w') as ziph: + ziph.write(path) + return self.putFile(temp) + def delete(self): # Delete from data api result = self.client.deleteHelper(self.url) @@ -256,7 +292,7 @@ def __del__(self): filepath = self.local_file.name self.local_file.close() if self.cleanup: - os.remove(filepath) + os.remove(filepath) def readable(self): return True diff --git a/Test/datafile_test.py b/Test/datafile_test.py index 7619442..38a6746 100644 --- a/Test/datafile_test.py +++ b/Test/datafile_test.py @@ -151,5 +151,21 @@ def test_putJson_getJson(self): self.assertDictEqual(result, payload) self.assertEqual(str(result), str(payload)) + def test_putZipDir_getZipDir(self): + local_directory = os.path.join(os.getcwd(), "Test/resources/zip_directory") + remote_directory = "data://.my/empty/datafile.zip" + df = AdvancedDataFile(self.client, remote_directory, cleanup=True) + response = df.putAsZip(local_directory) + self.assertEqual(response, df) + + unzipped_local_path = df.getAsZip() + self.assertTrue(os.path.isdir(unzipped_local_path)) + found_files = [] + for _, _, files in os.walk(unzipped_local_path): + for file in files: + found_files.append(file) + self.assertEqual(len(found_files), 3) + + if __name__ == '__main__': unittest.main() diff --git a/Test/resources/zip_directory/root.json b/Test/resources/zip_directory/root.json new file mode 100644 index 0000000..5eed32d --- /dev/null +++ b/Test/resources/zip_directory/root.json @@ -0,0 +1 @@ +{"location": "root"} \ No newline at end of file diff --git a/Test/resources/zip_directory/subdirectory/__init__.py b/Test/resources/zip_directory/subdirectory/__init__.py new file mode 100644 index 0000000..b1a5ec6 --- /dev/null +++ b/Test/resources/zip_directory/subdirectory/__init__.py @@ -0,0 +1,3 @@ +from .build_wait import get_build +from .publish_algo import publish_algo +from .test_algo import test_algo diff --git a/Test/resources/zip_directory/subdirectory/subdir.json b/Test/resources/zip_directory/subdirectory/subdir.json new file mode 100644 index 0000000..aab19d7 --- /dev/null +++ b/Test/resources/zip_directory/subdirectory/subdir.json @@ -0,0 +1 @@ +{"foo": "bar"} \ No newline at end of file