Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SVCS-552] folder_file_op cut down revalidate calls. #311

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions waterbutler/core/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,10 @@ def kind(self) -> str:
@property
@abc.abstractmethod
def id(self) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update the DocStr to mention what this id is for both types.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

"""
This property consolidated "path-based" ids and "id-based" ids under a single property. This
makes inter provider move/copies easier. This should be identical to the path object
identifier as this is used to revalidate paths.
""" This property consolidates path-based and id-based file/folder/object identity under a
single one: ``id``, which makes inter provider move/copy actions easier. For path-based
providers, the value is identical to ``path`` property. For id-based providers, it is the
raw identity by definition (``self.raw['id']`` for most providers).
"""
raise NotImplementedError

Expand Down
9 changes: 5 additions & 4 deletions waterbutler/providers/bitbucket/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ def __init__(self, raw, path_obj, owner=None, repo=None):
self.owner = owner
self.repo = repo

@property
def id(self):
return self.build_path()

@property
def provider(self):
return 'bitbucket'

# Bitbucket is a path-based provider and uses the default ``revalidate_path()``.
@property
def id(self):
return self.path

@property
def path(self):
return self.build_path()
Expand Down
9 changes: 5 additions & 4 deletions waterbutler/providers/box/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ def __init__(self, raw, path_obj):
super().__init__(raw)
self._path_obj = path_obj

@property
def id(self):
return self.raw['id']

@property
def provider(self):
return 'box'

# Box is an id-based provider and uses its own ``revalidate_path()``.
@property
def id(self):
return self.raw['id']

@property
def materialized_path(self):
return str(self._path_obj)
Expand Down
12 changes: 8 additions & 4 deletions waterbutler/providers/cloudfiles/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@

class BaseCloudFilesMetadata(metadata.BaseMetadata):

@property
def id(self):
return self.path

@property
def provider(self):
return 'cloudfiles'

# CloudFiles is a path-based provider. However, it never called ``revalidate_path()`` even when
# it was the backend storage provider for OSFStorage. The provider is no longer active until it
# is upgraded to an addon provider for the users.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See GoogleCloud.

@property
def id(self):
# TODO: Do we need this ``id`` to be actually set?
return self.path


class CloudFilesFileMetadata(BaseCloudFilesMetadata, metadata.BaseFileMetadata):

Expand Down
12 changes: 8 additions & 4 deletions waterbutler/providers/dataverse/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@

class BaseDataverseMetadata(metadata.BaseMetadata):

@property
def id(self):
return str(self.raw['id'])

@property
def provider(self):
return 'dataverse'

# TODO: Should this be in ``DataverseFileMetadata``?
# TODO: Does ``self.raw['id']`` exists for ``DataverseDatasetMetadata``?
# TODO: DataVerse's ``revalidate_path()`` looks very different. Is this an issue?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to GitHub, your fix calls (functionally, not literally) revalidate_path() differently than the original way. This might be an issue.

# DataVerse is an id-based provider and has its own ``revalidate_path()``.
@property
def id(self):
return str(self.raw['id'])


class DataverseFileMetadata(BaseDataverseMetadata, metadata.BaseFileMetadata):

Expand Down
10 changes: 6 additions & 4 deletions waterbutler/providers/dropbox/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ def __init__(self, raw, folder):
super().__init__(raw)
self._folder = folder

@property
def id(self):
return self.raw['id']

@property
def provider(self):
return 'dropbox'

# TODO: Is Dropbox path-based instead of id-based?
# TODO: Dropbox does not have a ``revalidate_path()``, should we use path instead of id?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no revalidate_path() for dropbox which makes guess that it is a path based (or at lease both)?

@property
def id(self):
return self.raw['id']

def build_path(self, path):
# TODO write a test for this
if path.lower().startswith(self._folder.lower()):
Expand Down
2 changes: 2 additions & 0 deletions waterbutler/providers/figshare/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(self, raw, raw_file=None):
else:
self.raw_file = self.raw['files'][0]

# FigShare are id-based providers and use their own ``revalidate_path()``.
@property
def id(self):
return self.raw_file['id']
Expand Down Expand Up @@ -116,6 +117,7 @@ class FigshareFolderMetadata(BaseFigshareMetadata, metadata.BaseFolderMetadata):
considered folders.
"""

# FigShare are id-based providers
@property
def id(self):
return self.raw['id']
Expand Down
9 changes: 5 additions & 4 deletions waterbutler/providers/filesystem/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ def __init__(self, raw, folder):
super().__init__(raw)
self._folder = folder

@property
def id(self):
return self.build_path(self.raw['path'])

@property
def provider(self):
return 'filesystem'

# FileSystem is a path-based provider and uses the default ``revalidate_path()``.
@property
def id(self):
return self.path

def build_path(self, path):
# TODO write a test for this
if path.lower().startswith(self._folder.lower()):
Expand Down
10 changes: 6 additions & 4 deletions waterbutler/providers/github/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@ def __init__(self, raw, commit=None, ref=None):
self.commit = commit
self.ref = ref

@property
def id(self):
return self.path

@property
def provider(self):
return 'github'

# GitHub is a path-based provider. However, it has its own ``revalidate_path()``.
# TODO: Investigate if this is an issue.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GitHub may not work because your fix calls (functionally, not literally) the default revalidate_path() instead of the customized one.

@property
def id(self):
return self.path

@property
def extra(self):
ret = {}
Expand Down
9 changes: 5 additions & 4 deletions waterbutler/providers/gitlab/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ def __init__(self, raw, path):
super().__init__(raw)
self._path_obj = path

@property
def id(self) -> str:
return self.build_path()

@property
def provider(self) -> str:
return 'gitlab'

# GitLab is a path-based provider and uses the default ``revalidate_path()``.
@property
def id(self) -> str:
return self.path

@property
def path(self) -> str:
return self.build_path()
Expand Down
3 changes: 3 additions & 0 deletions waterbutler/providers/googlecloud/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ class BaseGoogleCloudMetadata(metadata.BaseMetadata, metaclass=abc.ABCMeta):
def provider(self) -> str:
return 'googlecloud'

# GoogleCloud is a path-based provider. However, it never calls ``revalidate_path()`` since it
# is the backend storage provider for OSFStorage.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to CloudFiles (when it was the storage provider), GoogleCloud never uses revalidate_path(). We should set the id to be None or empty so we know it is never used accidentally or cause any confusion in the further.

@property
def id(self) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add GoogleCloud to pass tests.

# TODO: Do we need this ``id`` to be actually set?
return self.path

@property
Expand Down
2 changes: 2 additions & 0 deletions waterbutler/providers/googledrive/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(self, raw, path):
super().__init__(raw, path)
self._path._is_folder = True

# GoogleDrive is an id-based provider and uses its own ``revalidate_path()``.
@property
def id(self):
return self.raw['id']
Expand Down Expand Up @@ -61,6 +62,7 @@ class GoogleDriveFileMetadata(BaseGoogleDriveMetadata, metadata.BaseFileMetadata
[2] https://developers.google.com/drive/v2/reference/files/get
"""

# GoogleDrive is an id-based provider and uses its own ``revalidate_path()``.
@property
def id(self):
return self.raw['id']
Expand Down
9 changes: 5 additions & 4 deletions waterbutler/providers/onedrive/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ def __init__(self, raw, path_obj):
super().__init__(raw)
self._path_obj = path_obj

@property
def id(self):
return self.raw.get('id')

@property
def provider(self):
return 'onedrive'

# OneDrive is an id-based provider and uses its own ``revalidate_path()``.
@property
def id(self):
return self.raw.get('id')

@property
def materialized_path(self):
return str(self._path_obj)
Expand Down
2 changes: 2 additions & 0 deletions waterbutler/providers/osfstorage/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def __init__(self, raw, materialized):
super().__init__(raw)
self._materialized = materialized

# TODO: Is OSFStorage also path-based?
# OSFStorage is an id-based provider and uses its own ``revalidate_path()``.
@property
def id(self):
return self.raw['id']
Expand Down
12 changes: 6 additions & 6 deletions waterbutler/providers/owncloud/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@ def __init__(self, href, folder, attributes=None):
self._folder = folder
self._href = href

@property
def id(self):
return self._href[len(self._folder) - 1:]

@property
def provider(self):
return 'owncloud'

# OwnCloud is a path-based provider and uses the default ``revalidate_path()``.
@property
def id(self):
return self.path

@property
def name(self):
return self._href.strip('/').split('/')[-1]

@property
def path(self):
path = self._href[len(self._folder) - 1:]
return path
return self._href[len(self._folder) - 1:]

@property
def size(self):
Expand Down
9 changes: 5 additions & 4 deletions waterbutler/providers/s3/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@

class S3Metadata(metadata.BaseMetadata):

@property
def id(self):
return self.path

@property
def provider(self):
return 's3'

# S3 is a path-based provider and uses the default ``revalidate_path()``.
@property
def id(self):
return self.path

@property
def name(self):
return os.path.split(self.path)[1]
Expand Down