Skip to content

AML- 8 DATA API support for automatically zipping/unzipping nested directories #112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion Algorithmia/datafile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from datetime import datetime
import os.path
import pkgutil
import zipfile

from Algorithmia.util import getParentAndBase
from Algorithmia.data import DataObject, DataObjectType
Expand Down Expand Up @@ -50,6 +51,23 @@ def getFile(self, as_path=False):
else:
return open(f.name)

def getAsZip(self):
"""Download/decompress file/directory and return path to file/directory.

Expects the `DataFile` object to contain a data API path pointing to a file/directory compressed with a zip-based compression algorithm.
Either returns the directory or a path to the file, depending on whether a directory or file was zipped.
"""
local_file_path = self.getFile(as_path=True)
directory_path = tempfile.mkdtemp()
with zipfile.ZipFile(local_file_path, 'r') as ziph:
ziph.extractall(directory_path)
if len(ziph.namelist()) > 1:
output_path = directory_path
else:
filename = ziph.namelist()[0]
output_path = os.path.join(directory_path, filename)
return output_path

def getName(self):
_, name = getParentAndBase(self.path)
return name
Expand Down Expand Up @@ -145,6 +163,24 @@ def putNumpy(self, array):
else:
raise DataApiError("Attempted to .putNumpy() a file without numpy available, please install numpy.")

def putAsZip(self, path):
"""Zip file/directory and upload to data API location defined by `DataFile` object.

Accepts either a single file or a directory containing other files and directories.
"""
temp = tempfile.NamedTemporaryFile(delete=False).name
if os.path.isdir(path):
with zipfile.ZipFile(temp, 'w') as ziph:
for root, dirs, files in os.walk(path):
for file in files:
f_path = os.path.join(root, file)
arc_path = os.path.relpath(os.path.join(root, file), path)
ziph.write(f_path, arc_path)
else:
with zipfile.ZipFile(temp, 'w') as ziph:
ziph.write(path)
return self.putFile(temp)

def delete(self):
# Delete from data api
result = self.client.deleteHelper(self.url)
Expand Down Expand Up @@ -256,7 +292,7 @@ def __del__(self):
filepath = self.local_file.name
self.local_file.close()
if self.cleanup:
os.remove(filepath)
os.remove(filepath)

def readable(self):
return True
Expand Down
16 changes: 16 additions & 0 deletions Test/datafile_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,5 +151,21 @@ def test_putJson_getJson(self):
self.assertDictEqual(result, payload)
self.assertEqual(str(result), str(payload))

def test_putZipDir_getZipDir(self):
local_directory = os.path.join(os.getcwd(), "Test/resources/zip_directory")
remote_directory = "data://.my/empty/datafile.zip"
df = AdvancedDataFile(self.client, remote_directory, cleanup=True)
response = df.putAsZip(local_directory)
self.assertEqual(response, df)

unzipped_local_path = df.getAsZip()
self.assertTrue(os.path.isdir(unzipped_local_path))
found_files = []
for _, _, files in os.walk(unzipped_local_path):
for file in files:
found_files.append(file)
self.assertEqual(len(found_files), 3)


if __name__ == '__main__':
unittest.main()
1 change: 1 addition & 0 deletions Test/resources/zip_directory/root.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"location": "root"}
3 changes: 3 additions & 0 deletions Test/resources/zip_directory/subdirectory/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .build_wait import get_build
from .publish_algo import publish_algo
from .test_algo import test_algo
1 change: 1 addition & 0 deletions Test/resources/zip_directory/subdirectory/subdir.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"foo": "bar"}