Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.8] gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846) #108279

Merged
merged 3 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Doc/library/tarfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,11 @@ A ``TarInfo`` object has the following public data attributes:
Name of the target file name, which is only present in :class:`TarInfo` objects
of type :const:`LNKTYPE` and :const:`SYMTYPE`.

For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
that contains the link.
For hard links (``LNKTYPE``), the *linkname* is relative to the root of
the archive.


.. attribute:: TarInfo.uid
:type: int
Expand Down
11 changes: 9 additions & 2 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,7 @@ def __init__(self, tarinfo):
class AbsoluteLinkError(FilterError):
def __init__(self, tarinfo):
self.tarinfo = tarinfo
super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
super().__init__(f'{tarinfo.name!r} is a link to an absolute path')

class LinkOutsideDestinationError(FilterError):
def __init__(self, tarinfo, path):
Expand Down Expand Up @@ -800,7 +800,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
if member.islnk() or member.issym():
if os.path.isabs(member.linkname):
raise AbsoluteLinkError(member)
target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
if member.issym():
target_path = os.path.join(dest_path,
os.path.dirname(name),
member.linkname)
else:
target_path = os.path.join(dest_path,
member.linkname)
target_path = os.path.realpath(target_path)
if os.path.commonpath([target_path, dest_path]) != dest_path:
raise LinkOutsideDestinationError(member, target_path)
return new_attrs
Expand Down
144 changes: 137 additions & 7 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2984,10 +2984,12 @@ def __exit__(self, *exc):
self.bio = None

def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
mode=None, **kwargs):
mode=None, size=None, **kwargs):
"""Add a member to the test archive. Call within `with`."""
name = str(name)
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
if size is not None:
tarinfo.size = size
if mode:
tarinfo.mode = _filemode_to_int(mode)
if symlink_to is not None:
Expand Down Expand Up @@ -3051,7 +3053,8 @@ def check_context(self, tar, filter):
raise self.raised_exception
self.assertEqual(self.expected_paths, set())

def expect_file(self, name, type=None, symlink_to=None, mode=None):
def expect_file(self, name, type=None, symlink_to=None, mode=None,
size=None):
"""Check a single file. See check_context."""
if self.raised_exception:
raise self.raised_exception
Expand Down Expand Up @@ -3085,6 +3088,8 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None):
self.assertTrue(path.is_fifo())
else:
raise NotImplementedError(type)
if size is not None:
self.assertEqual(path.stat().st_size, size)
for parent in path.parents:
self.expected_paths.discard(parent)

Expand Down Expand Up @@ -3130,8 +3135,15 @@ def test_parent_symlink(self):
# Test interplaying symlinks
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
with ArchiveMaker() as arc:

# `current` links to `.` which is both:
# - the destination directory
# - `current` itself
arc.add('current', symlink_to='.')

# effectively points to ./../
arc.add('parent', symlink_to='current/..')

arc.add('parent/evil')

if support.can_symlink():
Expand Down Expand Up @@ -3172,9 +3184,46 @@ def test_parent_symlink(self):
def test_parent_symlink2(self):
# Test interplaying symlinks
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives

# Posix and Windows have different pathname resolution:
# either symlink or a '..' component resolve first.
# Let's see which we are on.
if support.can_symlink():
testpath = os.path.join(TEMPDIR, 'resolution_test')
os.mkdir(testpath)

# testpath/current links to `.` which is all of:
# - `testpath`
# - `testpath/current`
# - `testpath/current/current`
# - etc.
os.symlink('.', os.path.join(testpath, 'current'))

# we'll test where `testpath/current/../file` ends up
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
pass

if os.path.exists(os.path.join(testpath, 'file')):
# Windows collapses 'current\..' to '.' first, leaving
# 'testpath\file'
dotdot_resolves_early = True
elif os.path.exists(os.path.join(testpath, '..', 'file')):
# Posix resolves 'current' to '.' first, leaving
# 'testpath/../file'
dotdot_resolves_early = False
else:
raise AssertionError('Could not determine link resolution')

with ArchiveMaker() as arc:

# `current` links to `.` which is both the destination directory
# and `current` itself
arc.add('current', symlink_to='.')

# `current/parent` is also available as `./parent`,
# and effectively points to `./../`
arc.add('current/parent', symlink_to='..')

arc.add('parent/evil')

with self.check_context(arc.open(), 'fully_trusted'):
Expand All @@ -3188,6 +3237,7 @@ def test_parent_symlink2(self):

with self.check_context(arc.open(), 'tar'):
if support.can_symlink():
# Fail when extracting a file outside destination
self.expect_exception(
tarfile.OutsideDestinationError,
"'parent/evil' would be extracted to "
Expand All @@ -3198,10 +3248,24 @@ def test_parent_symlink2(self):
self.expect_file('parent/evil')

with self.check_context(arc.open(), 'data'):
self.expect_exception(
tarfile.LinkOutsideDestinationError,
"""'current/parent' would link to ['"].*['"], """
+ "which is outside the destination")
if support.can_symlink():
if dotdot_resolves_early:
# Fail when extracting a file outside destination
self.expect_exception(
tarfile.OutsideDestinationError,
"'parent/evil' would be extracted to "
+ """['"].*evil['"], which is outside """
+ "the destination")
else:
# Fail as soon as we have a symlink outside the destination
self.expect_exception(
tarfile.LinkOutsideDestinationError,
"'current/parent' would link to "
+ """['"].*outerdir['"], which is outside """
+ "the destination")
else:
self.expect_file('current/')
self.expect_file('parent/evil')

def test_absolute_symlink(self):
# Test symlink to an absolute path
Expand Down Expand Up @@ -3230,11 +3294,29 @@ def test_absolute_symlink(self):
with self.check_context(arc.open(), 'data'):
self.expect_exception(
tarfile.AbsoluteLinkError,
"'parent' is a symlink to an absolute path")
"'parent' is a link to an absolute path")

def test_absolute_hardlink(self):
# Test hardlink to an absolute path
# Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
with ArchiveMaker() as arc:
arc.add('parent', hardlink_to=self.outerdir / 'foo')

with self.check_context(arc.open(), 'fully_trusted'):
self.expect_exception(KeyError, ".*foo. not found")

with self.check_context(arc.open(), 'tar'):
self.expect_exception(KeyError, ".*foo. not found")

with self.check_context(arc.open(), 'data'):
self.expect_exception(
tarfile.AbsoluteLinkError,
"'parent' is a link to an absolute path")

def test_sly_relative0(self):
# Inspired by 'relative0' in jwilk/traversal-archives
with ArchiveMaker() as arc:
# points to `../../tmp/moo`
arc.add('../moo', symlink_to='..//tmp/moo')

try:
Expand Down Expand Up @@ -3284,6 +3366,54 @@ def test_sly_relative2(self):
+ """['"].*moo['"], which is outside the """
+ "destination")

def test_deep_symlink(self):
# Test that symlinks and hardlinks inside a directory
# point to the correct file (`target` of size 3).
# If links aren't supported we get a copy of the file.
with ArchiveMaker() as arc:
arc.add('targetdir/target', size=3)
# a hardlink's linkname is relative to the archive
arc.add('linkdir/hardlink', hardlink_to=os.path.join(
'targetdir', 'target'))
# a symlink's linkname is relative to the link's directory
arc.add('linkdir/symlink', symlink_to=os.path.join(
'..', 'targetdir', 'target'))

for filter in 'tar', 'data', 'fully_trusted':
with self.check_context(arc.open(), filter):
self.expect_file('targetdir/target', size=3)
self.expect_file('linkdir/hardlink', size=3)
if support.can_symlink():
self.expect_file('linkdir/symlink', size=3,
symlink_to='../targetdir/target')
else:
self.expect_file('linkdir/symlink', size=3)

def test_chains(self):
# Test chaining of symlinks/hardlinks.
# Symlinks are created before the files they point to.
with ArchiveMaker() as arc:
arc.add('linkdir/symlink', symlink_to='hardlink')
arc.add('symlink2', symlink_to=os.path.join(
'linkdir', 'hardlink2'))
arc.add('targetdir/target', size=3)
arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')

for filter in 'tar', 'data', 'fully_trusted':
with self.check_context(arc.open(), filter):
self.expect_file('targetdir/target', size=3)
self.expect_file('linkdir/hardlink', size=3)
self.expect_file('linkdir/hardlink2', size=3)
if support.can_symlink():
self.expect_file('linkdir/symlink', size=3,
symlink_to='hardlink')
self.expect_file('symlink2', size=3,
symlink_to='linkdir/hardlink2')
else:
self.expect_file('linkdir/symlink', size=3)
self.expect_file('symlink2', size=3)

def test_modes(self):
# Test how file modes are extracted
# (Note that the modes are ignored on platforms without working chmod)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:func:`tarfile.data_filter` now takes the location of symlinks into account
when determining their target, so it will no longer reject some valid
tarballs with ``LinkOutsideDestinationError``.
Loading