From 3e2fd667d8495e4b1de08120f74187a231f4c344 Mon Sep 17 00:00:00 2001 From: robsiemb Date: Fri, 4 Oct 2024 15:28:10 -0400 Subject: [PATCH] At-plusplus support (#4) * Handle plusplussing of user slack entities in cases where we can do so. - Move slack entity plusplus filtering into scanPlusPlus so that we can translate entities, not just forbid them. - Add helper functions for zdaemon to determine its own slack user id. - Fix a number of email address canonicalization issues along the way. - In particular, ANDREW.CMU.EDU and ABTECH.ORG are treated as identical namespaces. * Ping on slack displays now slack user id. Resolves #1 --- zdaemon/common.py | 52 +++++++++++++++--- zdaemon/config.py | 3 +- zdaemon/plusplus.py | 127 +++++++++++++++++++++++++++++++++++--------- zdaemon/zdaemon.py | 4 ++ zdaemon/zudaemon | 9 +++- 5 files changed, 162 insertions(+), 33 deletions(-) diff --git a/zdaemon/common.py b/zdaemon/common.py index 5271d17..611a081 100644 --- a/zdaemon/common.py +++ b/zdaemon/common.py @@ -26,7 +26,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from cachetools import cached, TTLCache +from cachetools import cached, LRUCache, TTLCache import logging import re import time @@ -101,6 +101,13 @@ def sendz(zclass, instance, message, unfurl=False): return False +def slack_active(): + ''' + Returns true if there is a slack client available. + ''' + return _SLACK_CLIENT is not None + + def _sendsErrorCheck(res, channel_id, thread_ts, message): if res['ok'] != True: error_thread = '[NONE]' @@ -183,6 +190,21 @@ def slackReact(message, emojiname): _log.error("sendsBlock Slack Error: " + e.response["error"]) +# We use LRU cache here with size 1 to just cache the result forever, since our +# userid really shouldn't change while we're running. +@cached(cache=LRUCache(maxsize=1)) +def get_zdaemon_userid(): + '''Returns our own userid''' + if _SLACK_CLIENT is None: + raise Exception("_SLACK_CLIENT not configured") + + res = _SLACK_CLIENT.auth_test() + if not res['ok']: + raise Exception("OK:False when fetching bot id via auth_test (result: %s)" % res) + + return res['user_id'] + + # Cache bot userid lookups for a day (if we really need to change bot ids, just kick zdaemon) @cached(cache=TTLCache(maxsize=64, ttl=86400)) def get_slack_bot_userid(botid): @@ -211,11 +233,22 @@ def get_slack_user_profile(userid): def get_slack_user_email(userid, lhs_only=True): '''Returns the user's slack profile's email. By default only returns the left hand side. - Hard-codes the slack bridge bot to be "bridge-bot@ABTECH.ORG" + Since slack doesn't let bot user profiles have a userid, this hard-codes the + slack bridge bot to be "bridge-bot@ABTECH.ORG", and itself as "zdaemon@ABTECH.ORG". ''' + # Apparently, slack likes these to be uppercase. + userid = userid.upper() + + # print("searching %s" % userid) + if userid == cfg.SLACK_BRIDGE_BOT_ID: return "bridge-bot@ABTECH.ORG" + if userid == get_zdaemon_userid(): + return "zdaemon@ABTECH.ORG" + + # We don't need a TTLCache for this function, since this next call does all the + # expensive work and is, itself, cached. profile_result = get_slack_user_profile(userid) if 'email' not in profile_result: @@ -361,19 +394,24 @@ def get_slack_thread(event): def realID(id): """Converts the passed identifier into a more canonical form. Mostly deals with the same user across many realms. + + Specifically, we treat identically named ABTECH.ORG and ANDREW.CMU.EDU users as identical. + + Note: We also are assuming that the email addresses match typical ANDREW.CMU.EDU naming + conventions (e.g. nothing other than alphanumerics on the LHS -- though we'll deal with + a few special characters as well like ., +, and -) """ name = id.rstrip() - m = re.match(r'(\w+)@ABTECH.ORG', name) + m = re.fullmatch(r'([\.\+\-\w]+)@ABTECH\.ORG', name, re.IGNORECASE) if m: name = m.group(1) else: - m = re.match(r'(\w+)@ANDREW.CMU.EDU', name) + m = re.fullmatch(r'([\.\+\-\w]+)@ANDREW\.CMU\.EDU', name, re.IGNORECASE) if m: name = m.group(1) - # If we don't have a realm at this point, - # canonicalize to lowercase. - if not re.match(r'(\w+)@(\w+)', name): + # If we don't have a realm at this point, canonicalize to lowercase. + if not '@' in name: name = name.lower() return name diff --git a/zdaemon/config.py b/zdaemon/config.py index 84aa540..559e7d3 100644 --- a/zdaemon/config.py +++ b/zdaemon/config.py @@ -145,7 +145,8 @@ def init_zdaemon_config(options, load_channels=True, config_file_only=False): raise Exception("SLACK_CHANNEL_WHITELIST is present but doesn't appear to be a list") slack_channel_whitelist = wl if "SLACK_BRIDGE_BOT_ID" in file_data: - SLACK_BRIDGE_BOT_ID = file_data["SLACK_BRIDGE_BOT_ID"] + # Make sure to force this to uppercase. + SLACK_BRIDGE_BOT_ID = file_data["SLACK_BRIDGE_BOT_ID"].upper() if "SENDCUBE_ENABLE" in file_data: diff --git a/zdaemon/plusplus.py b/zdaemon/plusplus.py index 152038e..92baaa7 100644 --- a/zdaemon/plusplus.py +++ b/zdaemon/plusplus.py @@ -30,14 +30,16 @@ import functools import pytz +from random import randrange import re import sqlite3 import time import unicodedata as ud import config as cfg -from common import realID, sendz, sendsText, get_slack_thread, get_slack_user_email, hasRTLCharacters -from random import randrange +from common import sendz +from common import slack_active, sendsText, get_slack_thread, get_slack_user_email +from common import realID, hasRTLCharacters # sqlite Schema # @@ -207,27 +209,18 @@ def _plusplus(cursor, sender, display_sender, if inc != 1 and inc != -1: raise Exception("_plusplus: bad increment %s" % inc) - thing_id = realID(thing) - self_pp_penalty = False - - # Detect someone plusplussing a slack entity, and forbid. - # Note that this code still runs on zulip, but it probably is an error there as well. - m = re.match(r'<([@#])([a-z0-9]+)(|.*)?>', thing_id) - if m: - type = m.group(1) - entity = m.group(2) - - hint = "" - if type == '@': - hint = " If this is a user, please use their andrew id." - elif type == '#': - hint = " If this is a channel, consider omitting the hash mark." - - reply("It looks like you might be trying to plusplus the slack entity: %s, " - "but this is not supported.%s" % (entity, hint)) - return None + # Legacy zdaemon made this call, but realID() didn't actually have correct regexes, so + # it never worked! The behavior is surprising when it is working, as plusplus will report + # the score of the canonical id, but display the name of the noncanonical ID. e.g. + # "zdaemon@ABTECH.ORG: 1023482" while recording & reporting the score for "zdaemon" + # + # We think users plusplusing an email address, even an andrew or abtech one, expect that + # the string for the email itself is the thing that gets changed. + # thing_id = realID(thing) + thing_id = thing # Self Plusplus Penalty + self_pp_penalty = False if (thing_id == sender and inc == 1): reply("Whoa, @bold(loser) trying to plusplus themselves.\n" \ "Changing to %s--" % thing_id) @@ -305,6 +298,80 @@ def _plusplus(cursor, sender, display_sender, return ret +def _ppSlackEntityFilter(thing_id, reply): + '''Attempts to resolve plusplus targets that are slack entities into better targets. + + thing_id is the thing to examine + reply is used to send error messages if the thing is disallowed. + + returns canonical name (might be the same) or None if this should be disallowed. + ''' + # Detect someone plusplussing a slack entity, and forbid. + # Note that this code still runs on zulip, but it probably is an error there as well. + m = re.fullmatch(r'<([@#])([a-z0-9]+)(|.*)?>', thing_id) + if m: + thing_is_slack_entity = True + type = m.group(1) + entity = m.group(2) + + hint = "" + + if type == '@': + hint += " If this is a user, please use their andrew id." + elif type == '#': + hint += " If this is a channel, consider omitting the hash mark." + + try: + if type == '@' and slack_active(): + # Best effort attempt to convert this to an andrew id. + email = get_slack_user_email(entity, False) + + # Only used if we fail to canonicalize. + hint += " When I looked it up, I got %s which didn't look like an andrew account." % email + + # Remember that everyone gets trapped with using regexes for email addresses. + # Luckily the ones we care about here are not plus addressed and don't use the + # full rfc*822 formatting, since we're specifally looking for the andrew ID. + if re.fullmatch(r'([\-\.\w]+)@([\.\w]+)', email): + new_thing = realID(email) + + # Check if it still looks like an email address. + if not re.fullmatch(r'([\-\.\w]+)@([\.\w]+)', new_thing): + # It canonicalized successfully! + thing_id = new_thing + thing_is_slack_entity = False + except: + # If an exception happens, proceed almost as if it wasn't valid anyway. + hint += " I tried to look it up, but I wasn't able to." + pass + + # If we haven't turned this into something usable, abort! + if thing_is_slack_entity: + reply("It looks like you might be trying to plusplus the slack entity: %s, " + "but this is not supported.%s" % (entity.upper(), hint)) + return None + + return thing_id + + +def _ppSlackEmailFilter(thing): + ''' + Slack always turns emails into markdown, we need to strip that out before using it for plusplus. + ''' + if not slack_active(): + return thing + + # These look like "" + # + # We're knowingly walking into the trap of generating a regex for an email address. + # It is an imperfect world. Be nice. + m = re.fullmatch(r'', thing) + if m: + thing = m.group(1) + + return thing + + def scanPlusPlus(sender, message, reply, display_sender=None): #log("In scanplusplus: %s" % message) @@ -326,7 +393,6 @@ def scanPlusPlus(sender, message, reply, display_sender=None): cur = dbh.cursor() cur.execute("BEGIN") try: - m = pattern.search(haystack) while m is not None: haystack = m.group(3) @@ -335,6 +401,21 @@ def scanPlusPlus(sender, message, reply, display_sender=None): # print ("%s / %s" % (thing, op)) + # Set up next loop. Done before we decide if we need to slack-entity-filter + # this item for readability reasons. + # + # Don't reference m inside the loop after this point! + m = pattern.search(haystack) + + # Forbid #channels and @users that we can't convert to an andrew account. + thing = _ppSlackEntityFilter(thing, reply) + if thing is None: + # Need to filter it! + continue + + # Strip slack email address markdown. + thing = _ppSlackEmailFilter(thing) + if (thing == "year"): results['year'] = datetime.now(_PIT_TIMEZONE).year elif (thing == "month"): @@ -365,8 +446,6 @@ def scanPlusPlus(sender, message, reply, display_sender=None): if (res is not None): results[thing] = res - m = pattern.search(haystack) - cur.execute("COMMIT") except Exception as e: cur.execute("ROLLBACK") diff --git a/zdaemon/zdaemon.py b/zdaemon/zdaemon.py index 7db20a2..ad21e4d 100644 --- a/zdaemon/zdaemon.py +++ b/zdaemon/zdaemon.py @@ -91,6 +91,10 @@ def ping_text(slack=False): msg += "*Plusplus*: %d (totaling %d)\n" % (ppstats['count'], ppstats['sum']) msg += "*Pronouns*: she/her\n" msg += "*Home Address*: %s\n" % socket.gethostname() + + if slack: + msg += "*Slack User Id*: %s\n" % common.get_zdaemon_userid() + msg += "*Server Uptime*: %s\n" % uptime.rstrip() msg += "*Last Restart*: %s\n" % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime(common.ZDAEMON_START_TIME)) diff --git a/zdaemon/zudaemon b/zdaemon/zudaemon index d4fb78c..f68b311 100755 --- a/zdaemon/zudaemon +++ b/zdaemon/zudaemon @@ -141,8 +141,15 @@ if cfg.SLACK_ENABLE: hello_string = "zdaemon started (%s)" % socket.gethostname() zd.sendToMaintainer(hello_string) -# These are blocking calls, and will continuously poll for new messages +# handler.start() or zd.runzulip are blocking calls, +# and will continuously poll for new messages until we die. if cfg.SLACK_ENABLE: + # This next line is not only informative, but also primes our cache! + # + # Note that it can throw an exception if the slack API is unhappy for some reason, + # but in that case it probably would have failed at the sendToMaintainer() above. + print ("I am Slack User ID: %s" % zd.get_zdaemon_userid()) + handler = SocketModeHandler(app, cfg.SLACK_APP_TOKEN) handler.start() else: