-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_grist_hashes.py
67 lines (56 loc) · 1.88 KB
/
check_grist_hashes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import argparse
import logging
import pathlib
import sys
import yaml
from cyoa_archives.grist.api import GristAPIWrapper
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
# Parse args
parser = argparse.ArgumentParser(
description="Parse a subreddit for submissions using praw."
)
parser.add_argument("-c", "--config_file", help="Configuration file to use")
args = parser.parse_args()
if args.config_file:
filepath = pathlib.Path(args.config_file)
try:
with open(filepath) as f:
config = yaml.safe_load(f)
except OSError:
print(f"Could not read file: {filepath}")
sys.exit(1)
# Set up API
api = GristAPIWrapper.from_config(config.get('grist'))
title_pd = api.fetch_table_pd('CYOAs', col_names=['uuid', 'official_title'])
grist_pd = api.fetch_table_pd('Records', col_names=[
'id', 'cyoa_uuid', 'image_hashes', 'cyoa', 'title'
])
cyoa_pd = grist_pd.loc[grist_pd['cyoa'] > 0]
logger.debug(len(cyoa_pd))
cyoa_titles = {}
for index, row in title_pd.iterrows():
uuid = row['uuid']
cyoa_titles[uuid] = row['official_title']
# Iterate and track hashes
hash_table = {}
for index, row in cyoa_pd.iterrows():
g_id = row['id']
cyoa = row['cyoa']
cyoa_uuid = row['cyoa_uuid']
image_hashes = row['image_hashes']
title = row['title']
if not image_hashes:
continue
# Loop through hashes
for hash_string in image_hashes.split(','):
trimmed_hash = hash_string.strip()
if trimmed_hash not in hash_table:
hash_table[trimmed_hash] = [cyoa_titles[cyoa_uuid]]
else:
if cyoa_titles[cyoa_uuid] not in hash_table[trimmed_hash]:
hash_table[trimmed_hash].append(cyoa_titles[cyoa_uuid])
# Loop through hashes and print collisions
for hash_string, value in hash_table.items():
if len(value) > 1:
print(f'{hash_string}\t{value}')