-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathextract.py
executable file
·154 lines (120 loc) · 5.42 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env python3
import argparse
import json
import logging
import os
import shutil
import subprocess
import sys
from contextlib import suppress
from pathlib import Path
from tempfile import TemporaryDirectory
VERSION = '0.1.1'
NAME = 'FACT_extractor interface'
DEFAULT_CONTAINER = 'fkiecad/fact_extractor'
def parse_arguments():
parser = argparse.ArgumentParser(
description='Command line interface for FACT_extractor.\nExtract arbitrary container or compression formats with one utility.'
)
parser.add_argument('-v', '--version', action='version', version=set_version())
parser.add_argument('-c', '--container', help='docker container', default=DEFAULT_CONTAINER)
parser.add_argument('-m', '--memory', help='memory limit for docker container (in MB)', default='512')
parser.add_argument('-o', '--output_directory', help='path to extracted files', default=None)
parser.add_argument('-r', '--report_file', help='write report to a file', default=None)
parser.add_argument('-V', '--verbose', action='store_true', default=False, help='increase verbosity')
parser.add_argument('-e', '--extract_everything', action='store_true', help='also extract empty files')
parser.add_argument('FILE', type=str, nargs=1, help='File for extraction')
return parser.parse_args()
def setup_logging(verbose):
console_log = logging.StreamHandler()
console_log.setFormatter(
logging.Formatter(fmt='[%(asctime)s][%(module)s][%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
)
logger = logging.getLogger('')
logger.setLevel(logging.DEBUG if verbose else logging.INFO)
logger.addHandler(console_log)
def container_exists(container):
return (
subprocess.run(
f'docker history {container}', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False
).returncode
== 0
)
def default_container_status():
format_parameter = '{{.Tag}},{{.CreatedAt}}'
try:
process_result = subprocess.run(
f'docker image ls {DEFAULT_CONTAINER} --format "{format_parameter}"',
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=False,
)
tag, creation_time = process_result.stdout.decode().strip().split(',')
except ValueError:
tag, creation_time = 'n/e', 'n/e'
return tag, creation_time
def set_version():
container_tag, container_creation = default_container_status()
return (
f'{NAME}\n'
f'Programm version:\t{VERSION}\n'
f'Default container:\t{DEFAULT_CONTAINER}\n'
f'Container tag:\t\t{container_tag}\n'
f'Container creation:\t{container_creation}'
)
def call_docker(input_file, container, target, report_file, memory_limit, tmpdir=None, extract_everything=False):
arguments = f'--chown {os.getuid()}:{os.getgid()}'
arguments += ' --extract_everything' if extract_everything else ''
tmpdir = tmpdir if tmpdir else TemporaryDirectory()
try:
for subpath in ['files', 'reports', 'input']:
Path(tmpdir.name, subpath).mkdir(exist_ok=True)
shutil.copy(input_file, str(Path(tmpdir.name, 'input', Path(input_file).name)))
command = f'docker run --rm --ulimit nofile=20000:50000 -m {memory_limit}m -v {tmpdir.name}:/tmp/extractor -v /dev:/dev --privileged {container} {arguments}'
subprocess.run(command, shell=True, check=False)
with suppress(shutil.Error):
shutil.copytree(str(Path(tmpdir.name, 'files')), target)
handle_report(report_file, tmpdir.name)
finally:
tmpdir.cleanup()
def handle_report(report_file, tmp):
indented_report = json.dumps(json.loads(Path(tmp, 'reports', 'meta.json').read_text()), indent=4)
if report_file:
Path(report_file).write_text(indented_report)
else:
print(indented_report) # noqa: T201
def main():
arguments = parse_arguments()
setup_logging(arguments.verbose)
output_directory = arguments.output_directory if arguments.output_directory else str(Path() / 'extracted_files')
if Path(output_directory).exists():
logging.error(
f'Target directory exists ({output_directory}). Please choose a non-existing directory with -o option.'
)
return 1
if not container_exists(arguments.container):
logging.error(
f"Container {arguments.container} doesn't exist. Please specify an existing container with the -c option."
)
logging.info(f'You can download the default container with "docker pull {DEFAULT_CONTAINER}"')
return 1
if not Path(arguments.FILE[0]).is_file():
logging.error(f"Given input file {arguments.FILE[0]} doesn't exist. Please give an existing path.")
return 1
if arguments.report_file and not Path(arguments.report_file).parent.is_dir():
logging.error(f'Report file ({arguments.report_file}) can not be created. Check if parent directory exists.')
return 1
if arguments.report_file and Path(arguments.report_file).exists():
logging.warning('Warning: Report file will be overwritten.')
call_docker(
input_file=arguments.FILE[0],
container=arguments.container,
target=output_directory,
report_file=arguments.report_file,
memory_limit=arguments.memory,
extract_everything=arguments.extract_everything,
)
return 0
if __name__ == '__main__':
sys.exit(main())