-
Notifications
You must be signed in to change notification settings - Fork 0
/
message_match.py
622 lines (435 loc) · 23 KB
/
message_match.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
# Module containing the MessageMatch class
import re
from typing import List
import regexes
from bible_books import bible_dict, bible_chapt_dict
from bible_versions import bible_version_set, version_map
# Class of objects representing the initial matches found in the message
class MessageMatch:
# Using slots so the bot runs faster and uses less memory
__slots__ = ("msg", "matches", "matches_index", "dic")
def __init__(self, msg: str) -> None:
self.msg = msg
self.matches = []
self.matches_index = []
self.dic = {}
# Converts the string in the matches to integers
def convert_to_int(self, match_list: List[str]) -> List[int]:
return [int(i) for i in match_list]
# The function to add a pipe character in front of the numbers in full chapters for easier splitting
def add_pipe(self, match: re.Match) -> str:
# Adds a pipe character to the front of the match and removes all the spaces
return f"|{match.group()}".replace(" ", "")
# Makes the match list have 3 items
# For easier manipulation when the match is placed into a range
def standardise(self, match_list: List[str]) -> List[int]:
match_list = self.convert_to_int(match_list)
if len(match_list) < 3:
last_verse = match_list[1] + 1
match_list.append(last_verse)
else:
last_verse = match_list[2] + 1
match_list[2] = last_verse
# Returns the match list
return match_list
# Finds the book number, like 1 John or 1 Timothy
def find_book_num(self, book_index: int) -> str:
bookindex = book_index-1
# Checks if the book index is negative
if bookindex < 0:
# Sets the booknum to nothing
return ""
# If the book index is positive
else:
# Gets the book number
booknum = self.msg[bookindex]
# Checks if the character right before the book title is a newline character
if self.msg[bookindex+1] == "\n":
# Set the book number to nothing if the character before the book title is a newline character
return ""
# Checks if the book number is 1, 2 or 3 and sets the book number to the respective number
elif booknum in "1234":
return booknum
# Checks if the "book number" is k
elif booknum == "k":
# Checks the k is part of the word greek or grk or gk
if self.msg[bookindex-4:bookindex+1] == "greek" or self.msg[bookindex-2:bookindex+1] == "grk" or self.msg[bookindex-1] == "g":
# Returns the word "greek"
return "greek"
# Checks if the "book number" is d
elif booknum == "d":
# Checks if d is part of the word add
if self.msg[bookindex-2:bookindex+1] == "add":
# Returns the word "add"
return "add"
# Checks if the "book number" is f
elif booknum == "f":
# Checks if f is part of the word of
if self.msg[bookindex-1:bookindex+1] == "of":
# Returns the word "of"
return "of"
# Checks if the "book number" is o
elif booknum == "o":
# Checks if o is part of the word to
if self.msg[bookindex-1:bookindex+1] == "to":
# Returns the word "to"
return "to"
# Checks if the "book number" is p
elif booknum == "p":
# Checks if p is part of the word ep
if self.msg[bookindex-1:bookindex+1] == "ep":
# Returns the word "ep"
return "ep"
# Sets the book number to nothing otherwise
else:
return ""
# Makes the book title the recognisable by the list of shortened bible book titles in bibleshort
def shorten_book(self, book: str) -> str:
# Removes the ( bracket
book = book.replace("(", "").strip()
# For Philippians, Philemon, Prayer of Azariah, Letter of Jeremiah, Greek Esther and Prayer of Manasseh
book_title = book[:5]
# For Judges and all the books with numbers in front of them (e.g. 1 John, 2 Chronicles and 1 Esdras)
if book_title not in bible_dict:
book_title = book[:4]
# For the rest of the books
if book_title not in bible_dict:
book_title = book[:3]
# Returns the book title
return book_title.strip()
# Finds the title of the book
def find_book_title(self, match_index: int) -> str:
msg = self.msg
index = match_index
book_index = 0
# Iterating backwards from the previously matched start index
while index > 0:
index -= 1
try:
# If the current index isn't one less than the match index, finds a space and returns the bookindex
if msg[index].isspace() and index != match_index - 1:
break
# Immediately invalidates if a symbol other than a bracket is found
elif msg[index] in "!@#$%^&*_-+={}[],?<>":
break
# To handle an index error (no idea how it happens but it happens)
except IndexError:
break
# In the case of no spaces before the words
book_index = index
# Gets the book number
book_num = self.find_book_num(book_index)
# Assigns the book title to the book attribute
book = self.shorten_book(msg[index:match_index])
# Makes the full book title
full_book_title = f"{book_num} {book}"
# Returns the book title
return full_book_title.strip()
# Combines the standardisation, the insertion of the book title and adding to the list into object lists into one single function
def add_to_lists(self, book_code: str, match_index: int, match_list: List[str], bible_version: str) -> None:
# Converts all the numbers in the list to integers and standardises the list
match_list = self.standardise(match_list)
# Insert the book title into the match list
match_list.insert(0, book_code)
# Appends the version to the back of the list
match_list.append(bible_version)
# Checks if the match_index already exists in the list
if match_index in self.matches_index or match_list in self.matches:
# Stops the program from adding the duplicates to the list
return
# Appends the match index to the list of match indexes
self.matches_index.append(match_index)
# Appends the match list to the list of matches
self.matches.append(match_list)
# Function to search the match for the bible version
def search_version(self, match: str) -> (str, str):
# Use the version regex in the regexes file
regex = regexes.bible_version_regex
# Searches the string for the bible version
match_obj = regex.search(match.strip())
# Checks if the bible version is not given
if not match_obj:
# Returns a tuple of the match and an empty string
return (match, "")
# Otherwise, remove the version from the match
match = regex.sub("", match).strip()
# Gets the bible version
bible_version = match_obj.group().strip().upper()
# Gets the bible version from the version mapping
bible_version = version_map.get(bible_version, bible_version)
# Check if the version is in the set of accepted versions
if bible_version in bible_version_set:
# Returns a tuple of the match and the version
return (match, bible_version)
# If it's not
else:
# Returns a tuple of the match and an empty string
return (match, "")
# The function to convert the multiple bible verses with a semicolon into multiple lists
def multi_num_convert(self, book_code: str, match_index: int, match: str) -> None:
# Gets the match and bible version from the search_version function
match, bible_version = self.search_version(match)
# Replace semicolons with a comma
match = match.replace(";", ",")
# Split the match using commas to get a list of verses
verses_list = match.split(",")
# The default chapter number
default_chapter = 0
# The index to increase by 1 so that the verses will be sorted in the order that they are mentioned
current_index = match_index
# Iterates the list of matches
for verse in verses_list:
# Checks if the item in the list contains a colon
if ":" in verse:
# Splits the item in the list using the colon
verse_list = verse.split(":")
# Assigns the default chapter to the first number of the list
default_chapter = int(verse_list[0])
# Splits the message like a normal number bible verse to get the bible chapter, the starting and ending verse
match_list = verse.replace(":", "-").split("-")
# Adds the match list and the match index to the object list
self.add_to_lists(book_code, current_index, match_list, bible_version)
# If there is no colon
else:
# Splits the message using a dash
matches_list = verse.split("-")
# Checks if the matches list just contains an empty character
if len(matches_list[0]) == 0:
# Continues the iteration
continue
# Adds the default chapter to the start of the list
matches_list.insert(0, default_chapter)
# Adds the match list and the match index to the object list
self.add_to_lists(book_code, current_index, matches_list, bible_version)
# Increases the match index by 1 so the verses are sorted correctly
current_index += 1
# The function to convert a single bible verse to a list
def num_convert(self, book_code: str, match_index: int, match: str) -> None:
# Gets the match and bible version from the search_version function
match, bible_version = self.search_version(match)
# Replaces the dashes with spaces in the match
match_no_dash = match.replace("-", " ")
# The list containing the bible chapter and the bible verse
match_list = match_no_dash.replace(":", " ").split()
# Adds the match list and the match index to the object list
self.add_to_lists(book_code, match_index, match_list, bible_version)
# The converter to decide if the number portion has multiple verses and passes the information to the respective convert functions
def num_converter(self, match_index: int, match: str) -> None:
# The book title
book_title = self.find_book_title(match_index)
# Checks if the book title is not in the dictionary and stops any further execution of the program
if book_title not in bible_dict:
return
# The book code that is going to be passed to the other functions
book_code = bible_dict[book_title]
# If there are multiple bible verses
if "," in match or ";" in match:
self.multi_num_convert(book_code, match_index, match)
# Single bible verse
else:
self.num_convert(book_code, match_index, match)
# The function to convert multiple bible verses with the chapter ... verse ... format into multiple lists to pass to the VerseMatch class
def multi_chapt_convert(self, book_code: str, match_index: int, match: str) -> None:
# Gets the match and bible version from the search_version function
match, bible_version = self.search_version(match)
# Replace semicolons with commas
match = match.replace(";", ",")
# Splits the match based on commas
verse_list = match.split(",")
# The front portion of the match
front_part = verse_list[0].replace("-", " ").split()
# The current match index
current_index = match_index + 1
# Iterates the front part list and removes the items that are not digits
for part in front_part.copy():
# Checks if the item is not a digit
if not part.isdigit():
# Removes the item from the list
front_part.remove(part)
# The chapter number of the bible book
chapter_num = int(front_part[0])
# Appends the front part to the object list
self.add_to_lists(book_code, match_index, front_part, bible_version)
# Iterates the list of verses behind the front part
for verse in verse_list[1:]:
# Splits the match based on a dash
match_list = verse.split("-")
# Adds the chapter number to the front of the list
match_list.insert(0, chapter_num)
# Appends the verse to the object list
self.add_to_lists(book_code, current_index, match_list, bible_version)
# Increases the current index by 1 so the verses are sorted correctly
current_index += 1
# The function to convert a single bible verse with the format chapter ... verse ... to a list to pass to the VerseMatch class
def chapt_convert(self, book_code: str, match_index: int, match: str) -> None:
# Gets the match and bible version from the search_version function
match, bible_version = self.search_version(match)
# The list containing the important information about the match
match_list = match.replace("-", " ").split()
# Iterates the match list and removes all the words
for i in match_list.copy():
# If the item is not a digit, remove it, leaving the list with only numbers
if not i.isdigit():
match_list.remove(i)
# Adds the match list and the match index to the object list
self.add_to_lists(book_code, match_index, match_list, bible_version)
# The converter to decide if the chapter ... verse ... portion has multiple verses and passes the information to the respective convert functions
def chapt_converter(self, match_index: int, match: str) -> None:
# The book title
book_title = self.find_book_title(match_index)
# Checks if the book title is not in the dictionary and stops any further execution of the program
if book_title not in bible_dict:
return
# The book code that is going to be passed to the other functions
book_code = bible_dict[book_title]
# If there are multiple bible verses
if "," in match:
self.multi_chapt_convert(book_code, match_index, match)
# Single bible verse
else:
self.chapt_convert(book_code, match_index, match)
# Function to iterate the chapter list and add it to the object list
def append_chapters(self, match_index: int, book_code: str, chapter_list: List[str], bible_version: str) -> None:
# The current index so that the matches will be in order
current_index = match_index
# Iterates the list of chapters
for chapter in chapter_list:
# Splits the chapter using dashes
chapter_nums = chapter.split("-")
# Check if the list has a length of one
if len(chapter_nums) == 1:
# Appends the chapter information to the object list
self.add_to_lists(book_code, current_index, [chapter_nums[0], 1, 176], bible_version)
# If the length is not one
else:
# Iterates the chapters
for i in range(int(chapter_nums[0]), int(chapter_nums[1])+1):
# Adds the chapter information to the object list
self.add_to_lists(book_code, current_index, [i, 1, 176], bible_version)
# Increases the current index by 1
current_index += 1
# Increases the current index by 1 so the verses will be sorted correctly
current_index += 1
# Removes empty characters from the chapter list
def remove_empty(self, chapter_list: List[str]) -> List[str]:
# Returns the chapter list without any empty characters
return [char for char in chapter_list if len(char.strip()) != 0]
# The converter to convert the chapter match with the word chapter in it to a list to pass to the VerseMatch class
def full_chapt_converter(self, match_index: int, match: str) -> None:
# Gets the match and bible version from the search_version function
match, bible_version = self.search_version(match)
# Places a pipe character (|) in front of the numbers for easy splitting
piped_match = regexes.num_portion_regex.sub(self.add_pipe, match)
# Splits the match at the pipe character
match_list = piped_match.split("|")
# The front portion of the match
front_part = match_list[0]
# Makes a list from the front part
front_list = front_part.split()
# Tries to remove the word "chapter" from the list
if "chapter" in front_list:
front_list.remove("chapter")
# If it doesn't exist, remove the word "chapters" from the list
elif "chapters" in front_list:
front_list.remove("chapters")
# The book of the bible mentioned
book_title = " ".join(front_list).strip()
# Checks if the book title is not inside the dictionary
if book_title not in bible_chapt_dict:
# Stops further execution if it's not
return
# The book code that is going to be used to search the bible verses
book_code = bible_chapt_dict[book_title]
# Gets the chapter number portion and replace all the semicolons
# with a comma
chapter_number_portion = match_list[1].replace(";", ",")
# Splits the chapter number portion into a list using commas
chapter_list = self.remove_empty(chapter_number_portion.split(","))
# Adds the chapters to the object list
self.append_chapters(match_index, book_code, chapter_list, bible_version)
# The converter to convert the chapter match without the word chapter in it to a list to be passed to the VerseMatch class
def full_num_converter(self, match_index: int, match: str) -> None:
# Gets the match and bible version from the search_version function
match, bible_version = self.search_version(match)
# Places a pipe character (|) in front of the numbers for easy splitting
piped_match = regexes.num_portion_regex.sub(self.add_pipe, match)
# Gets the book number
book_num = self.find_book_num(match_index)
# Splits the piped match using the pipe character
match_list = piped_match.split("|")
# Gets the book title from the book number and the front part of the match
book_title = f"{book_num} {match_list[0]}".strip()
# Checks if the book title is not in the list of accepted bible books
if book_title not in bible_chapt_dict:
# Stops any further execution
return
# Gets the book code to pass to the VerseMatch class
book_code = bible_chapt_dict[book_title]
# Gets the chapter number portion and replace all the semicolons
# with a comma
chapter_number_portion = match_list[1].replace(";", ",")
# Splits the chapter number portion of the match using commas
chapter_list = self.remove_empty(chapter_number_portion.split(","))
# Makes the current index variable so the matches can be sorted
self.append_chapters(match_index, book_code, chapter_list, bible_version)
# The reading phase of the previous version of the bot is now fully contained in this function
# Converter function that decides what to do with the different types of matches and pass them to their respective converters
def converter(self, match_index: int, match: str) -> None:
# For bible verses that are in the format John 3:16
if ":" in match:
self.num_converter(match_index, match)
# For bible verses that are in the format John Chapter 3 Verse 16
elif "chapter" in match and "verse" in match:
self.chapt_converter(match_index, match)
# For bible chapters that are in the format John Chapter 3
elif "chapter" in match:
self.full_chapt_converter(match_index, match)
# For bible chapters that are in the format John 3
else:
self.full_num_converter(match_index-1, match)
# Finds the number portion of the bible verse and passes the match and the match index to the converter function
def find_num(self) -> None:
text = self.msg
num_regex = regexes.number_regex
self.matches.clear()
self.matches_index.clear()
for match in num_regex.finditer(text):
self.converter(match.start(), match.group())
# Finds the chapter ... verse ... portion of the bible verse and passes the match and the match index to the converter function
def find_chapt(self) -> None:
text = self.msg
chapt_regex = regexes.chapter_regex
for match in chapt_regex.finditer(text):
self.converter(match.start(), match.group())
# Finds the chapters mentioned without the word chapter and passes the match and the match index to the converter function
def find_full_num(self) -> None:
text = self.msg
full_num_regex = regexes.full_chapter_num_regex
for match in full_num_regex.finditer(text):
self.converter(match.start(), match.group())
# Finds the chapters mentioned with the word chapter and passes the match and the match index to the converter function
def find_full_chapt(self) -> None:
text = self.msg
full_chapt_regex = regexes.full_chapter_chapt_regex
for match in full_chapt_regex.finditer(text):
self.converter(match.start(), match.group())
# A function to call all the 4 find functions above
def find_all(self) -> None:
self.find_num()
self.find_chapt()
self.find_full_chapt()
self.find_full_num()
# Makes a dictionary of matches sorted by the match index
def make_dic(self) -> None:
# For measuring performance
# start_time = time.perf_counter()
self.find_all()
self.dic = {}
# Create a temporary dictionary to sort through
temp_dic = dict(zip(self.matches_index, self.matches))
# Sorting according to the match index
for i in sorted(temp_dic):
# Sets the key and value pair in the new dictionary to the one in the temporary dictionary
self.dic[i] = temp_dic[i]
# For measuring performance
# logging.debug(f"MessageMatch time taken: {time.perf_counter() - start_time}")