Skip to content

Commit

Permalink
output: optimize obj collection (#6277)
Browse files Browse the repository at this point in the history
* output: str only once

Calling `__str__` takes a long time and if we are dealing with a large
dataset, it will result in a very substantial time. For example,
locally, for 700K objects, it took 20sec more.

Related to #6276

* output: don't use os.path.join

Takes around 2sec for 700K objects.

* output: use fs.sep

* output: remove unused sep
  • Loading branch information
efiop authored Jul 3, 2021
1 parent cfcb7e1 commit aa255a1
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 3 deletions.
2 changes: 2 additions & 0 deletions dvc/fs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class RemoteMissingDepsError(DvcException):


class BaseFileSystem:
sep = "/"

scheme = "base"
REQUIRES: ClassVar[Dict[str, str]] = {}
PATH_CLS = URLInfo # type: Any
Expand Down
2 changes: 2 additions & 0 deletions dvc/fs/dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ class DvcFileSystem(BaseFileSystem): # pylint:disable=abstract-method
repo: DVC repo.
"""

sep = os.sep

scheme = "local"
PARAM_CHECKSUM = "md5"

Expand Down
2 changes: 2 additions & 0 deletions dvc/fs/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
class GitFileSystem(BaseFileSystem): # pylint:disable=abstract-method
"""Proxies the repo file access methods to Git objects"""

sep = os.sep

scheme = "local"

def __init__(self, root_dir, trie):
Expand Down
2 changes: 2 additions & 0 deletions dvc/fs/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@


class LocalFileSystem(BaseFileSystem):
sep = os.sep

scheme = Schemes.LOCAL
PATH_CLS = PathInfo
PARAM_CHECKSUM = "md5"
Expand Down
2 changes: 2 additions & 0 deletions dvc/fs/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class RepoFileSystem(BaseFileSystem): # pylint:disable=abstract-method
kwargs: Additional keyword arguments passed to the `DvcFileSystem()`.
"""

sep = os.sep

scheme = "local"
PARAM_CHECKSUM = "md5"

Expand Down
4 changes: 1 addition & 3 deletions dvc/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,6 @@ class Output:
IsStageFileError = OutputIsStageFileError # type: Type[DvcException]
IsIgnoredError = OutputIsIgnoredError # type: Type[DvcException]

sep = "/"

def __init__(
self,
stage,
Expand Down Expand Up @@ -887,7 +885,7 @@ def _set_obj_names(self, obj):
obj.name = str(self)
if isinstance(obj, Tree):
for key, entry_obj in obj:
entry_obj.name = os.path.join(str(self), *key)
entry_obj.name = self.fs.sep.join([obj.name, *key])

def get_used_external(
self, **kwargs
Expand Down

0 comments on commit aa255a1

Please sign in to comment.