Skip to content

Commit 149ace5

Browse files
committed
fix #896 : added support for pathlib.Path objects in all i/o functions/methods/classes
1 parent 35cdfc3 commit 149ace5

17 files changed

+194
-167
lines changed

doc/source/changes/version_0_34.rst.inc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ New features
4949
Miscellaneous improvements
5050
^^^^^^^^^^^^^^^^^^^^^^^^^^
5151

52-
* improved something.
52+
* made all I/O functions/methods/constructors to accept either a string or a pathlib.Path object
53+
for all arguments representing a path (closes :issue:`896`).
5354

5455

5556
Fixes

larray/core/array.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
from collections import OrderedDict
2929
from itertools import product, chain, groupby
3030
from collections.abc import Iterable, Sequence
31+
from pathlib import Path
3132
import builtins
32-
import os
3333
import functools
3434
import warnings
3535

@@ -6891,7 +6891,7 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dr
68916891
68926892
Parameters
68936893
----------
6894-
filepath : str
6894+
filepath : str or Path
68956895
path where the csv file has to be written.
68966896
sep : str, optional
68976897
separator for the csv file. Defaults to `,`.
@@ -6912,8 +6912,8 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dr
69126912
69136913
Examples
69146914
--------
6915-
>>> tmpdir = getfixture('tmpdir')
6916-
>>> fname = os.path.join(tmpdir.strpath, 'test.csv')
6915+
>>> tmp_path = getfixture('tmp_path')
6916+
>>> fname = tmp_path / 'test.csv'
69176917
>>> a = ndtest('nat=BE,FO;sex=M,F')
69186918
>>> a
69196919
nat\sex M F
@@ -6965,7 +6965,7 @@ def to_hdf(self, filepath, key) -> None:
69656965
69666966
Parameters
69676967
----------
6968-
filepath : str
6968+
filepath : str or Path
69696969
Path where the hdf file has to be written.
69706970
key : str or Group
69716971
Key (path) of the array within the HDF file (see Notes below).
@@ -7037,7 +7037,7 @@ def to_excel(self, filepath=None, sheet=None, position='A1', overwrite_file=Fals
70377037
70387038
Parameters
70397039
----------
7040-
filepath : str or int or None, optional
7040+
filepath : str or Path or int or None, optional
70417041
Path where the excel file has to be written. If None (default), creates a new Excel Workbook in a live Excel
70427042
instance (Windows only). Use -1 to use the currently active Excel Workbook. Use a name without extension
70437043
(.xlsx) to use any unsaved* workbook.
@@ -7092,19 +7092,20 @@ def to_excel(self, filepath=None, sheet=None, position='A1', overwrite_file=Fals
70927092
if engine is None:
70937093
engine = 'xlwings' if xw is not None else None
70947094

7095+
if isinstance(filepath, str):
7096+
filepath = Path(filepath)
7097+
70957098
if engine == 'xlwings':
70967099
from larray.inout.xw_excel import open_excel
70977100

70987101
close = False
70997102
new_workbook = False
71007103
if filepath is None:
71017104
new_workbook = True
7102-
elif isinstance(filepath, str):
7103-
basename, ext = os.path.splitext(filepath)
7104-
if ext:
7105-
if not os.path.isfile(filepath):
7106-
new_workbook = True
7107-
close = True
7105+
elif isinstance(filepath, Path) and filepath.suffix:
7106+
if not filepath.is_file():
7107+
new_workbook = True
7108+
close = True
71087109
if new_workbook or overwrite_file:
71097110
new_workbook = overwrite_file = True
71107111

larray/core/session.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
import os
21
import sys
32
import re
43
import fnmatch
54
import warnings
65
from collections import OrderedDict
76
from collections.abc import Iterable
7+
from pathlib import Path
88

99
import numpy as np
1010

@@ -87,7 +87,7 @@ def __init__(self, *args, **kwargs):
8787
if len(args) == 1:
8888
assert len(kwargs) == 0
8989
a0 = args[0]
90-
if isinstance(a0, str):
90+
if isinstance(a0, (str, Path)):
9191
# assume a0 is a filename
9292
self.load(a0)
9393
else:
@@ -356,7 +356,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
356356
357357
Parameters
358358
----------
359-
fname : str
359+
fname : str or Path
360360
This can be either the path to a single file, a path to a directory containing .csv files or a pattern
361361
representing several .csv files.
362362
names : list of str, optional
@@ -427,13 +427,18 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
427427
if display:
428428
print("opening", fname)
429429
if fname is None:
430-
if all([os.path.splitext(name)[1] == '.csv' for name in names]):
430+
if all([Path(name).suffix == '.csv' for name in names]):
431431
engine = ext_default_engine['csv']
432432
else:
433433
raise ValueError(f"List of paths to only CSV files expected. Got {names}")
434+
elif isinstance(fname, str):
435+
fname = Path(fname)
436+
if not isinstance(fname, Path):
437+
raise TypeError(f"Expected a string or a Path object for the 'fname' argument. "
438+
f"Got object of type '{type(fname).__name__}' instead.")
434439
if engine == 'auto':
435-
_, ext = os.path.splitext(fname)
436-
ext = ext.strip('.') if '.' in ext else 'csv'
440+
ext = fname.suffix
441+
ext = ext.strip('.') if ext else 'csv'
437442
engine = ext_default_engine[ext]
438443
handler_cls = get_file_handler(engine)
439444
if engine == 'pandas_csv' and 'sep' in kwargs:
@@ -455,7 +460,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
455460
456461
Parameters
457462
----------
458-
fname : str
463+
fname : str or Path
459464
Path of the file for the dump.
460465
If objects are saved in CSV files, the path corresponds to a directory.
461466
names : list of str or None, optional
@@ -515,9 +520,14 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
515520
dumping arr1 ... done
516521
dumping arr4 ... done
517522
"""
523+
if isinstance(fname, str):
524+
fname = Path(fname)
525+
if not isinstance(fname, Path):
526+
raise TypeError(f"Expected a string or a Path object for the 'fname' argument. "
527+
f"Got object of type '{type(fname).__name__}' instead.")
518528
if engine == 'auto':
519-
_, ext = os.path.splitext(fname)
520-
ext = ext.strip('.') if '.' in ext else 'csv'
529+
ext = fname.suffix
530+
ext = ext.strip('.') if ext else 'csv'
521531
engine = ext_default_engine[ext]
522532
handler_cls = get_file_handler(engine)
523533
if engine == 'pandas_csv' and 'sep' in kwargs:

larray/inout/common.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import os
22
from datetime import date, time, datetime
33
from collections import OrderedDict
4+
from pathlib import Path
45

5-
from typing import List, Tuple
6+
from typing import Optional, Union, List, Tuple
67

78
from larray.core.axis import Axis
89
from larray.core.group import Group
@@ -41,15 +42,20 @@ class FileHandler:
4142
4243
Parameters
4344
----------
44-
fname : str
45+
fname : str or Path or None
4546
Filename.
4647
4748
Attributes
4849
----------
49-
fname : str
50+
fname : Path
5051
Filename.
5152
"""
52-
def __init__(self, fname, overwrite_file=False):
53+
def __init__(self, fname: Optional[Union[str, Path]], overwrite_file: bool = False):
54+
if isinstance(fname, str):
55+
fname = Path(fname)
56+
if fname is not None and not isinstance(fname, Path):
57+
raise TypeError(f"Expected a string or a pathlib.Path object for the 'fname' argument. "
58+
f"Got an object of type {type(fname).__name__} instead.")
5359
self.fname = fname
5460
self.original_file_name = None
5561
self.overwrite_file = overwrite_file
@@ -96,13 +102,12 @@ def close(self):
96102
raise NotImplementedError()
97103

98104
def _get_original_file_name(self):
99-
if self.overwrite_file and os.path.isfile(self.fname):
105+
if self.overwrite_file and self.fname.is_file():
100106
self.original_file_name = self.fname
101-
fname, ext = os.path.splitext(self.fname)
102-
self.fname = f'{fname}~{ext}'
107+
self.fname = self.fname.parent / (self.fname.stem + '~' + self.fname.suffix)
103108

104109
def _update_original_file(self):
105-
if self.original_file_name is not None and os.path.isfile(self.fname):
110+
if self.original_file_name is not None and self.fname.is_file():
106111
os.remove(self.original_file_name)
107112
os.rename(self.fname, self.original_file_name)
108113

larray/inout/csv.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
import csv
33
import warnings
4-
from glob import glob
4+
from pathlib import Path
55

66
import pandas as pd
77
import numpy as np
@@ -83,7 +83,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
8383
Examples
8484
--------
8585
>>> csv_dir = get_example_filepath('examples')
86-
>>> fname = csv_dir + '/population.csv'
86+
>>> fname = csv_dir / 'population.csv'
8787
8888
>>> # The data below is derived from a subset of the demo_pjan table from Eurostat
8989
>>> read_csv(fname)
@@ -97,7 +97,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
9797
9898
Missing label combinations
9999
100-
>>> fname = csv_dir + '/population_missing_values.csv'
100+
>>> fname = csv_dir / 'population_missing_values.csv'
101101
>>> # let's take a look inside the CSV file.
102102
>>> # they are missing label combinations: (Paris, male) and (New York, female)
103103
>>> with open(fname) as f:
@@ -129,7 +129,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
129129
130130
Specify the number of axes of the output array (useful when the name of the last axis is implicit)
131131
132-
>>> fname = csv_dir + '/population_missing_axis_name.csv'
132+
>>> fname = csv_dir / 'population_missing_axis_name.csv'
133133
>>> # let's take a look inside the CSV file.
134134
>>> # The name of the last axis is missing.
135135
>>> with open(fname) as f:
@@ -164,7 +164,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
164164
165165
Read array saved in "narrow" format (wide=False)
166166
167-
>>> fname = csv_dir + '/population_narrow_format.csv'
167+
>>> fname = csv_dir / 'population_narrow_format.csv'
168168
>>> # let's take a look inside the CSV file.
169169
>>> # Here, data are stored in a 'narrow' format.
170170
>>> with open(fname) as f:
@@ -260,46 +260,44 @@ def __init__(self, fname, overwrite_file=False, sep=','):
260260
self.sep = sep
261261
self.axes = None
262262
self.groups = None
263-
if fname is None:
263+
if self.fname is None:
264264
self.pattern = None
265265
self.directory = None
266-
elif '.csv' in fname or '*' in fname or '?' in fname:
267-
self.pattern = fname
268-
self.directory = os.path.dirname(fname)
266+
elif self.fname.suffix == '.csv' or '*' in self.fname.name or '?' in self.fname.name:
267+
self.pattern = self.fname.name
268+
self.directory = fname.parent
269269
else:
270270
# assume fname is a directory.
271-
# Not testing for os.path.isdir(fname) here because when writing, the directory might not exist.
272-
self.pattern = os.path.join(fname, '*.csv')
273-
self.directory = fname
271+
# Not testing for fname.is_dir() here because when writing, the directory might not exist.
272+
self.pattern = '*.csv'
273+
self.directory = self.fname
274274

275275
def _get_original_file_name(self):
276276
pass
277277

278-
def _to_filepath(self, key: str) -> str:
278+
def _to_filepath(self, key) -> Path:
279279
if self.directory is not None:
280-
return os.path.join(self.directory, f'{key}.csv')
280+
return self.directory / f'{key}.csv'
281281
else:
282-
return key
282+
return Path(key)
283283

284284
def _open_for_read(self):
285-
if self.directory and not os.path.isdir(self.directory):
285+
if self.directory and not self.directory.is_dir():
286286
raise ValueError(f"Directory '{self.directory}' does not exist")
287287

288288
def _open_for_write(self):
289289
if self.directory is not None:
290290
try:
291291
os.makedirs(self.directory)
292292
except OSError:
293-
if not os.path.isdir(self.directory):
293+
if not self.directory.is_dir():
294294
raise ValueError(f"Path {self.directory} must represent a directory")
295295

296296
def list_items(self) -> List[Tuple[str, str]]:
297-
fnames = glob(self.pattern) if self.pattern is not None else []
298-
# drop directory
299-
fnames = [os.path.basename(fname) for fname in fnames]
300-
# strip extension from files
301-
# XXX: unsure we should use sorted here
302-
fnames = sorted([os.path.splitext(fname)[0] for fname in fnames])
297+
fnames = self.directory.glob(self.pattern) if self.pattern is not None else []
298+
# stem = filename without extension
299+
# FIXME : not sure sorted is required here
300+
fnames = sorted([fname.stem for fname in fnames])
303301
return [(name, 'Array') for name in fnames if name != '__metadata__']
304302

305303
def _read_item(self, key, type, *args, **kwargs) -> Array:
@@ -316,7 +314,7 @@ def _dump_item(self, key, value, *args, **kwargs):
316314

317315
def _read_metadata(self) -> Metadata:
318316
filepath = self._to_filepath('__metadata__')
319-
if os.path.isfile(filepath):
317+
if filepath.is_file():
320318
meta = read_csv(filepath, wide=False)
321319
return Metadata.from_array(meta)
322320
else:

larray/inout/excel.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import warnings
2-
import os
32

43
import numpy as np
54
import pandas as pd
@@ -39,7 +38,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
3938
4039
Parameters
4140
----------
42-
filepath : str
41+
filepath : str or Path
4342
Path where the Excel file has to be read or use -1 to refer to the currently active workbook.
4443
sheet : str, Group or int, optional
4544
Name or index of the Excel sheet containing the array to be read.
@@ -241,8 +240,7 @@ def _open_for_read(self):
241240
self.handle = pd.ExcelFile(self.fname)
242241

243242
def _open_for_write(self):
244-
_, ext = os.path.splitext(self.fname)
245-
engine = 'xlsxwriter' if ext == '.xlsx' and xlsxwriter is not None else None
243+
engine = 'xlsxwriter' if (self.fname.suffix == '.xlsx' and xlsxwriter is not None) else None
246244
self.handle = pd.ExcelWriter(self.fname, engine=engine)
247245

248246
def list_items(self) -> List[Tuple[str, str]]:

larray/inout/hdf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
3131
3232
Parameters
3333
----------
34-
filepath_or_buffer : str or pandas.HDFStore
34+
filepath_or_buffer : str or Path or pandas.HDFStore
3535
Path and name where the HDF5 file is stored or a HDFStore object.
3636
key : str or Group
3737
Name of the scalar or axis or group or array.
@@ -133,6 +133,10 @@ class PandasHDFHandler(FileHandler):
133133
r"""
134134
Handler for HDF5 files using Pandas.
135135
"""
136+
def __init__(self, fname, overwrite_file=False):
137+
assert fname is not None
138+
super(PandasHDFHandler, self).__init__(fname, overwrite_file)
139+
136140
def _open_for_read(self):
137141
self.handle = HDFStore(self.fname, mode='r')
138142

0 commit comments

Comments
 (0)