-
Notifications
You must be signed in to change notification settings - Fork 25
/
arkit.py
136 lines (111 loc) · 6.18 KB
/
arkit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
'''
ARK: Survival Evolved Toolkit
Only supports Python3, Python2 is end of life and outdated thus not supported.
Purpose:
Provide a Python toolkit for ARK. Originally designed to unpack the workshop archives.
Notice:
I use PEP 8 as per it was intended; if you want to PEP 8 me read it first instead of being foolish: "A Foolish Consistency is the Hobgoblin of Little Minds"
'''
import struct
import zlib
import sys
import logging
__author__ = "James E"
__contact__ = "https://github.com/project-umbrella/arkit.py"
__copyright__ = "Copyright 2015, Project Umbrella"
__version__ = "0.0.0.1"
__status__ = "Prototype"
__date__ = "16 October 2015"
__license__ = "GPL v3.0 https://github.com/project-umbrella/arkit.py/blob/master/LICENSE"
logging.basicConfig(stream=sys.stderr, level=logging.CRITICAL)
class UnpackException(Exception):
pass
class SignatureUnpackException(UnpackException):
pass
class CorruptUnpackException(UnpackException):
pass
def unpack(src, dst):
'''
Unpacks ARK's Steam Workshop *.z archives.
Accepts two arguments:
src = Source File/Archive
dst = Destination File
Error Handling:
Currently logs errors via logging with an archive integrity as well as raising a custom exception. Also logs some debug and info messages.
All file system errors are handled by python core.
Process:
1. Open the source file.
2. Read header information from archive:
- 00 (8 bytes) signature (6 bytes) and format ver (2 bytes)
- 08 (8 byes) unpacked/uncompressed chunk size
- 10 (8 bytes) packed/compressed full size
- 18 (8 bytes) unpacked/uncompressed size
- 20 (8 bytes) first chunk packed/compressed size
- 26 (8 bytes) first chunk unpacked/uncompressed size
- 20 and 26 repeat until the total of all the unpacked/uncompressed chunk sizes matches the unpacked/uncompressed full size.
2. Read all the archive data and verify integrity (there should only be one partial chunk, and each chunk should match the archives header).
3. Write the file.
Development Note:
- Not thoroughly tested for errors. There may be instances where this method may fail either to extract a valid archive or detect a corrupt archive.
- Prevent overwriting files unless requested to do so.
- Create a batch method.
'''
with open(src, 'rb') as f:
sigver = struct.unpack('q', f.read(8))[0]
unpacked_chunk = f.read(8)
packed = f.read(8)
unpacked = f.read(8)
size_unpacked_chunk = struct.unpack('q', unpacked_chunk)[0]
size_packed = struct.unpack('q', packed)[0]
size_unpacked = struct.unpack('q', unpacked)[0]
#Verify the integrity of the Archive Header
if sigver == 2653586369:
if isinstance(size_unpacked_chunk, int) and isinstance(size_packed , int) and isinstance(size_unpacked , int):
logging.info("Archive is valid.")
logging.debug("Archive header size information. Unpacked Chunk: {}({}) Full Packed: {}({}) Full Unpacked: {}({})".format(size_unpacked_chunk, unpacked_chunk, size_packed, packed, size_unpacked, unpacked))
#Obtain the Archive Compression Index
compression_index = []
size_indexed = 0
while size_indexed < size_unpacked:
raw_compressed = f.read(8)
raw_uncompressed = f.read(8)
compressed = struct.unpack('q', raw_compressed)[0]
uncompressed = struct.unpack('q', raw_uncompressed)[0]
compression_index.append((compressed, uncompressed))
size_indexed += uncompressed
logging.debug("{}: {}/{} ({}/{}) - {} - {}".format(len(compression_index), size_indexed, size_unpacked, compressed, uncompressed, raw_compressed, raw_uncompressed))
if size_unpacked != size_indexed:
msg = "Header-Index mismatch. Header indicates it should only have {} bytes when uncompressed but the index indicates {} bytes.".format(size_unpacked, size_indexed)
logging.critical(msg)
raise CorruptUnpackException(msg)
#Read the actual archive data
data = b''
read_data = 0
for compressed, uncompressed in compression_index:
compressed_data = f.read(compressed)
uncompressed_data = zlib.decompress(compressed_data)
#Verify the size of the data is consistent with the archives index
if len(uncompressed_data) == uncompressed:
data += uncompressed_data
read_data += 1
#Verify there is only one partial chunk
if len(uncompressed_data) != size_unpacked_chunk and read_data != len(compression_index):
msg = "Index contains more than one partial chunk: was {} when the full chunk size is {}, chunk {}/{}".format(len(uncompressed_data), size_unpacked_chunk, read_data, len(compression_index))
logging.critical(msg)
raise CorruptUnpackException(msg)
else:
msg = "Uncompressed chunk size is not the same as in the index: was {} but should be {}.".format(len(uncompressed_data), uncompressed)
logging.critical(msg)
raise CorruptUnpackException(msg)
else:
msg = "Data types in the headers should be int's. Size Types: unpacked_chunk({}), packed({}), unpacked({})".format(sigver, type(size_unpacked_chunk), type(size_packed), type(size_unpacked))
logging.critical(msg)
raise CorruptUnpackException(msg)
else:
msg = "The signature and format version is incorrect. Signature was {} should be 2653586369.".format(sigver)
logging.critical(msg)
raise SignatureUnpackException(msg)
#Write the extracted data to disk
with open(dst, 'wb') as f:
f.write(data)
logging.info("Archive has been extracted.")