diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index fda4870bb12419d..aafeeed2d11ab02 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -3463,7 +3463,9 @@ def read(self, size=-1): class StoredZipExtFileRandomReadTest(unittest.TestCase): - def test_random_read(self): + """Tests whether an uncompressed, unencrypted zip entry can be randomly + seek and read without reading redundant bytes.""" + def test_stored_seek_and_read(self): sio = StatIO() # 20000 bytes @@ -3472,12 +3474,13 @@ def test_random_read(self): # The seek length must be greater than ZipExtFile.MIN_READ_SIZE # as `ZipExtFile._read2()` reads in blocks of this size and we # need to seek out of the buffered data - min_size = zipfile.ZipExtFile.MIN_READ_SIZE - self.assertGreaterEqual(10002, min_size) # for forward seek test - self.assertGreaterEqual(5003, min_size) # for backward seek test + read_buffer_size = zipfile.ZipExtFile.MIN_READ_SIZE + self.assertGreaterEqual(10002, read_buffer_size) # for forward seek test + self.assertGreaterEqual(5003, read_buffer_size) # for backward seek test # The read length must be less than MIN_READ_SIZE, since we assume that # only 1 block is read in the test. - self.assertGreaterEqual(min_size, 100) # for read() calls + read_length = 100 + self.assertGreaterEqual(read_buffer_size, read_length) # for read() calls with zipfile.ZipFile(sio, "w", compression=zipfile.ZIP_STORED) as zipf: zipf.writestr("foo.txt", txt) @@ -3490,33 +3493,40 @@ def test_random_read(self): # forward seek old_count = sio.bytes_read - fp.seek(10002, os.SEEK_CUR) - self.assertEqual(fp.tell(), 10002) + forward_seek_len = 10002 + current_pos = 0 + fp.seek(forward_seek_len, os.SEEK_CUR) + self.assertEqual(fp.tell(), forward_seek_len) self.assertEqual(fp._left, fp._compress_left) - arr = fp.read(100) - self.assertEqual(fp.tell(), 10102) - self.assertEqual(arr, txt[10002:10102]) + current_pos += forward_seek_len + arr = fp.read(read_length) + self.assertEqual(fp.tell(), current_pos) + self.assertEqual(arr, txt[current_pos:current_pos + read_length]) + current_pos += read_length self.assertEqual(fp._left, fp._compress_left) - d = sio.bytes_read - old_count - self.assertLessEqual(d, min_size) + read_count = sio.bytes_read - old_count + self.assertLessEqual(read_count, read_buffer_size) # backward seek old_count = sio.bytes_read - fp.seek(-5003, os.SEEK_CUR) - self.assertEqual(fp.tell(), 5099) # 5099 = 10102 - 5003 + backward_seek_len = 5003 + fp.seek(-backward_seek_len, os.SEEK_CUR) + self.assertEqual(fp.tell(), current_pos - backward_seek_len) # 5099 = 10102 - 5003 + current_pos -= backward_seek_len self.assertEqual(fp._left, fp._compress_left) - arr = fp.read(100) - self.assertEqual(fp.tell(), 5199) - self.assertEqual(arr, txt[5099:5199]) + arr = fp.read(read_length) + self.assertEqual(fp.tell(), current_pos) + self.assertEqual(arr, txt[current_pos:current_pos + read_length]) self.assertEqual(fp._left, fp._compress_left) - d = sio.bytes_read - old_count - self.assertLessEqual(d, min_size) + read_count = sio.bytes_read - old_count + self.assertLessEqual(read_count, read_buffer_size) # eof flags test fp.seek(0, os.SEEK_END) - self.assertTrue(fp._eof) fp.seek(12345, os.SEEK_SET) - self.assertFalse(fp._eof) + current_pos = 12345 + arr = fp.read(read_length) + self.assertEqual(arr, txt[current_pos:current_pos + read_length]) if __name__ == "__main__": diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 26e899658bab505..ed70885b74d3317 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1162,7 +1162,7 @@ def seek(self, offset, whence=os.SEEK_SET): self._offset = buff_offset read_offset = 0 # Fast seek uncompressed unencrypted file - elif self._compress_type == ZIP_STORED and self._decrypter is None: + elif self._compress_type == ZIP_STORED and self._decrypter is None and read_offset != 0: # disable CRC checking after first seeking - it would be invalid self._expected_crc = None # seek actual file taking already buffered data into account