From 7476876c1c6026163446135a96bd3a80a261ef39 Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Fri, 19 Jan 2018 17:33:18 -0800 Subject: [PATCH] More aggressively validate ambiguous file paths Closes #52 * Error on filepaths with trailing slash because they do not have a name and thus appear to be directories. * Add validation for bad paths to ``OBSFile``. * Warn if filesystem paths would not pass this validation (otherwise you have the possibility of something that works locally and not on OBS). This can be set to raise instead if you set ``stor.raise_on_ambiguous_filesystem_path`` to be True * Deprecate ``is_ambiguous()`` which didn't 100% serve the purpose we wanted anyways (since you can always tell when user means directory because of the user of ``copytree()`` and/or ``-r``) --- docs/release_notes.rst | 7 ++++++ stor/base.py | 13 +++++++--- stor/default.cfg | 3 +++ stor/obs.py | 5 ++++ stor/swift.py | 8 +++--- stor/test.py | 4 +++ stor/tests/test_posix.py | 19 -------------- stor/tests/test_utils.py | 54 ++++++++++++++++++++++++++++++++++++++-- stor/utils.py | 52 +++++++++++++++++++++++++++++++++++--- 9 files changed, 132 insertions(+), 33 deletions(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 3c7b89fb..de66e37e 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -1,6 +1,13 @@ Release Notes ============= +v1.5.5 +------ + +* Raise errors earlier when calling ``open()`` with an invalid OBS path. +* Add ``stor.test.TestCase`` that allows mocking out of both swift AND s3 + functionality in a test case. + v1.5.4 ------ diff --git a/stor/base.py b/stor/base.py index bae3dca5..46c7f478 100644 --- a/stor/base.py +++ b/stor/base.py @@ -267,6 +267,8 @@ def open(self, *args, **kwargs): Args: mode (str): first positional arg, mode of file descriptor encoding (str): text encoding to use (Python 3 only) + Raises: + ValueError: if ambiguous path - see ``stor.utils.validate_file_path`` for more. """ raise NotImplementedError @@ -370,14 +372,17 @@ class FileSystemPath(Path): """ def open(self, *args, **kwargs): """ - Opens a path and retains interface compatibility with - `SwiftPath` by popping the unused ``swift_upload_args`` keyword - argument. + Opens a path. Creates parent directory if it does not exist. + + Raises: + ValueError: if ambiguous path - see ``stor.utils.validate_file_path`` for more. """ - kwargs.pop('swift_upload_kwargs', None) + if kwargs.pop('swift_upload_kwargs', None): + warnings.warn('swift_upload_kwargs will be removed in stor 2.0') self.parent.makedirs_p() + utils.validate_file_path(self) return builtins.open(self, *args, **kwargs) def __enter__(self): diff --git a/stor/default.cfg b/stor/default.cfg index 82ee1afd..3211d10c 100644 --- a/stor/default.cfg +++ b/stor/default.cfg @@ -1,5 +1,8 @@ [stor] +# set to raise if file paths are ambiguous - see stor.utils.validate_file_path for more. +raise_on_ambiguous_filesystem_path = False + [s3] # See boto3 docs for more detail on these parameters - all passed directly to boto3.session.Session *if* set diff --git a/stor/obs.py b/stor/obs.py index 5d79bb13..92a3d928 100644 --- a/stor/obs.py +++ b/stor/obs.py @@ -1,6 +1,7 @@ import locale import posixpath import sys +import warnings from cached_property import cached_property import six @@ -90,6 +91,7 @@ def is_ambiguous(self): """Returns true if it cannot be determined if the path is a file or directory """ + warnings.warn('this function is deprecated and will be removed in stor 2.0') return not self.endswith('/') and not self.ext def _get_parts(self): @@ -282,11 +284,14 @@ def __init__(self, pth, mode='r', encoding=None, **kwargs): use binary mode OR explicitly set an encoding when reading/writing text (because writers from different computers may store data on OBS in different ways). Python 3 only. + Raises: + ValueError: if the mode is not valid or the path is ambiguous (see ``stor.copy``) """ if mode not in self._VALID_MODES: raise ValueError('invalid mode for file: %r' % mode) if six.PY2 and encoding: # pragma: no cover raise TypeError('encoding not supported in Python 2') + utils.validate_file_path(pth) self._path = pth self.mode = mode self._kwargs = kwargs diff --git a/stor/swift.py b/stor/swift.py index 51c4aacd..a37b34b2 100644 --- a/stor/swift.py +++ b/stor/swift.py @@ -688,11 +688,7 @@ def open(self, mode='r', encoding=None, swift_upload_options=None): ("r" or "rb") and writing ("w", "wb") encoding (str): text encoding to use. Defaults to ``locale.getpreferredencoding(False)`` (Python 3 only) - swift_upload_options (dict): DEPRECATED (use `stor.settings.use()` - instead). A dictionary of arguments that will be - passed as keyword args to `SwiftPath.upload` if any writes - occur on the opened resource. - + swift_upload_options (dict): DEPRECATED (use `stor.settings.use()` instead). Returns: SwiftFile: The swift object. @@ -700,6 +696,8 @@ def open(self, mode='r', encoding=None, swift_upload_options=None): SwiftError: A swift client error occurred. """ swift_upload_options = swift_upload_options or {} + if swift_upload_options: + warnings.warn('swift_upload_options will be removed in stor 2.0') return SwiftFile(self, mode=mode, encoding=encoding, **swift_upload_options) @_swift_retry(exceptions=(ConditionNotMetError, UnavailableError)) diff --git a/stor/test.py b/stor/test.py index b5c31dd5..3f8b5b0d 100644 --- a/stor/test.py +++ b/stor/test.py @@ -207,3 +207,7 @@ def setUp(self): del s3._thread_local.s3_transfer_config except AttributeError: pass + + +class StorTestCase(S3TestCase, SwiftTestCase): + """A TestCase class that mocks BOTH s3 and stor for tests""" diff --git a/stor/tests/test_posix.py b/stor/tests/test_posix.py index fca2a28c..0337def5 100644 --- a/stor/tests/test_posix.py +++ b/stor/tests/test_posix.py @@ -92,25 +92,6 @@ def test_posix_file_destination(self): self.assertTrue(dest.exists()) self.assertEquals(dest.open().read(), '1') - def test_ambigious_swift_resource_destination(self): - with stor.NamedTemporaryDirectory() as tmp_d: - source = tmp_d / '1' - with open(source, 'w') as tmp_file: - tmp_file.write('1') - - dest = 'swift://tenant/container/ambiguous-resource' - with self.assertRaisesRegexp(ValueError, 'OBS destination'): - utils.copy(source, dest) - - def test_ambigious_swift_container_destination(self): - with stor.NamedTemporaryDirectory() as tmp_d: - source = tmp_d / '1' - with open(source, 'w') as tmp_file: - tmp_file.write('1') - - dest = 'swift://tenant/ambiguous-container' - with self.assertRaisesRegexp(ValueError, 'OBS destination'): - utils.copy(source, dest) def test_tenant_swift_destination(self): with stor.NamedTemporaryDirectory() as tmp_d: diff --git a/stor/tests/test_utils.py b/stor/tests/test_utils.py index 1d1b7766..d36cbd86 100644 --- a/stor/tests/test_utils.py +++ b/stor/tests/test_utils.py @@ -1,12 +1,14 @@ import errno import logging -import mock import ntpath import os import stat import unittest + from testfixtures import LogCapture +import mock +import pytest import stor from stor import Path @@ -15,6 +17,7 @@ from stor.swift import SwiftPath from stor.windows import WindowsPath from stor import utils +from stor.test import StorTestCase class TestBaseProgressLogger(unittest.TestCase): @@ -112,7 +115,7 @@ def test_w_broken_symlink(self): ) with utils.NamedTemporaryDirectory(dir=swift_dir) as tmp_dir: symlink = tmp_dir / 'broken.symlink' - symlink_source = tmp_dir / 'nonexistent' + symlink_source = tmp_dir / 'nonexistent.txt' # put something in symlink source so that Python doesn't complain with stor.open(symlink_source, 'w') as fp: fp.write('blah') @@ -460,3 +463,50 @@ def test_path_no_perms(self): self.mock_copy.side_effect = stor.exceptions.FailedUploadError('foo') self.assertFalse(utils.is_writeable('s3://stor-test/foo/bar')) self.assertFalse(self.mock_remove.called) + + +@mock.patch.object(stor.base.FileSystemPath, 'makedirs', lambda *args, **kwargs: None) +class TestAmbiguityValidation(StorTestCase): + # someday we'll be able to use pytest.parametrized, but not today + DRIVES = ["swift://", "s3://", "/"] + def test_open_detects_invalid_modes(self): + def tester(drive): + with self.assertRaisesRegexp(ValueError, 'mode'): + with stor.open(stor.join(drive, 'dummy/something/path.txt'), 'invalid'): + assert False, 'I should not be able to get into this block' + for drive in self.DRIVES: + tester(drive) + + @mock.patch('stor.base.builtins.open', lambda *args, **kwargs: None) + def test_open_should_detect_invalid_paths(self): + def tester(drive, subpath): + with self.assertRaisesRegexp(ValueError, 'file paths'): + with stor.open(stor.join(drive, subpath), 'w'): + assert False, 'I should not be able to get into this block' + with self.assertRaisesRegexp(ValueError, 'file paths'): + # prevent from going out of scope + f = stor.open(stor.join(drive, subpath)) + assert False and not f + with self.assertRaisesRegexp(ValueError, 'file paths'): + utils.copy('whatever', stor.join(drive, subpath)) + with stor.settings.use({'stor': {'raise_on_ambiguous_filesystem_path': True}}): + for drive in self.DRIVES: + for subpath in [ + 'tenant/ambiguous-container', 'T/C/trailing-slash/', 'T/C/no-extension']: + tester(drive, subpath) + with pytest.warns(UserWarning): + stor.open('/a/') + + @mock.patch('stor.base.builtins.open', lambda *args, **kwargs: None) + def test_validate_path(self): + def tester(subpath, regex): + assert stor.settings.get()['stor']['raise_on_ambiguous_filesystem_path'] == False + with pytest.warns(UserWarning): + utils.validate_file_path(stor.join('/', subpath)) + with stor.settings.use({'stor': {'raise_on_ambiguous_filesystem_path': True}}): + for drive in ['swift://', 's3://', '/']: + with self.assertRaisesRegexp(ValueError, regex): + utils.validate_file_path(stor.join(drive, subpath)) + for subpath in ['T', 'T/C', 'T/C/no-extension']: + tester(subpath, 'without extension are ambiguous') + tester('T/C/something/', 'trailing slash') diff --git a/stor/utils.py b/stor/utils.py index a475e119..82cc4371 100644 --- a/stor/utils.py +++ b/stor/utils.py @@ -7,6 +7,7 @@ import shutil from subprocess import check_call import tempfile +import warnings from stor import exceptions @@ -312,6 +313,51 @@ def is_writeable(path, swift_retry_options=None): return answer +def validate_file_path(dest): + """Raises ValueError if dest is an ambiguous or invalid path. + + Args: + dest (Path): path to validate + Raises: + ValueError: if path is ambiguous and is not a filesystem path (see below) + Note: + if an ambiguous filesystem path and ``raise_on_ambiguous_filesystem_path`` is true, will raise + a ValueError as well, otherwise it warns. + + In OBS-land, we cannot differentiate between files and directories, except by convention. + Conventionally, directories are either prefixes or zero-sized objects ending with a trailing + slash. With POSIX copies, it's conventional to do something like:: + cp source/myfile.txt dest + + In which case the system checks whether dest is a directory and, if it is, dumps to + ``dest/myfile.txt`` or otherwise dumps to ``dest``. + + We can't do that kind of validation and don't want the outcome of a command to depend upon + whether there's a file with the same prefix or not. Thus, we raise an error if you try to use + `stor.copy()`. (to manipulate files without extensions, you can always use `stor.copytree()`. + """ + import stor.settings + + # we use a try/except so that it's easier for the reader to tell why the exception was + # generated. + try: + if not dest.name: + raise ValueError('file paths may not end with trailing slash') + if not dest.ext: + raise ValueError('file paths without extension are ambiguous') + except ValueError as e: + if (is_filesystem_path(dest) and + not stor.settings.get()['stor']['raise_on_ambiguous_filesystem_path']): + warnings.warn( + 'In stor 2.0, ambiguous filesystem paths will raise an error (%s)' % str(e), + # this works for stor.copy, will not look right when the copy method is used + stacklevel=4 + ) + return + print dest, is_filesystem_path(dest), stor.settings.get()['stor'], e + raise + + def copy(source, dest, swift_retry_options=None): """Copies a source file to a destination file. @@ -347,18 +393,18 @@ def copy(source, dest, swift_retry_options=None): >>> local_p.copy('swift://tenant/container/dir') Traceback (most recent call last): ... - ValueError: OBS destination must be file with extension or directory with slash + ValueError: OBS file destinations without extension are ambiguous """ from stor import Path from stor.obs import OBSUploadObject source = Path(source) + validate_file_path(source) dest = Path(dest) + validate_file_path(dest) swift_retry_options = swift_retry_options or {} if is_obs_path(source) and is_obs_path(dest): raise ValueError('cannot copy one OBS path to another OBS path') - if is_obs_path(dest) and dest.is_ambiguous(): - raise ValueError('OBS destination must be file with extension or directory with slash') if is_filesystem_path(dest): dest.parent.makedirs_p()