Skip to content

Commit

Permalink
At-plusplus support (#4)
Browse files Browse the repository at this point in the history
* Handle plusplussing of user slack entities in cases where we can do so.

- Move slack entity plusplus filtering into scanPlusPlus so that we
  can translate entities, not just forbid them.
- Add helper functions for zdaemon to determine its own slack user id.
- Fix a number of email address canonicalization issues along the way.
- In particular, ANDREW.CMU.EDU and ABTECH.ORG are treated as identical
  namespaces.

* Ping on slack displays now slack user id.

Resolves #1
  • Loading branch information
robsiemb authored Oct 4, 2024
1 parent c8752db commit 3e2fd66
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 33 deletions.
52 changes: 45 additions & 7 deletions zdaemon/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from cachetools import cached, TTLCache
from cachetools import cached, LRUCache, TTLCache
import logging
import re
import time
Expand Down Expand Up @@ -101,6 +101,13 @@ def sendz(zclass, instance, message, unfurl=False):
return False


def slack_active():
'''
Returns true if there is a slack client available.
'''
return _SLACK_CLIENT is not None


def _sendsErrorCheck(res, channel_id, thread_ts, message):
if res['ok'] != True:
error_thread = '[NONE]'
Expand Down Expand Up @@ -183,6 +190,21 @@ def slackReact(message, emojiname):
_log.error("sendsBlock Slack Error: " + e.response["error"])


# We use LRU cache here with size 1 to just cache the result forever, since our
# userid really shouldn't change while we're running.
@cached(cache=LRUCache(maxsize=1))
def get_zdaemon_userid():
'''Returns our own userid'''
if _SLACK_CLIENT is None:
raise Exception("_SLACK_CLIENT not configured")

res = _SLACK_CLIENT.auth_test()
if not res['ok']:
raise Exception("OK:False when fetching bot id via auth_test (result: %s)" % res)

return res['user_id']


# Cache bot userid lookups for a day (if we really need to change bot ids, just kick zdaemon)
@cached(cache=TTLCache(maxsize=64, ttl=86400))
def get_slack_bot_userid(botid):
Expand Down Expand Up @@ -211,11 +233,22 @@ def get_slack_user_profile(userid):
def get_slack_user_email(userid, lhs_only=True):
'''Returns the user's slack profile's email. By default only returns the left hand side.
Hard-codes the slack bridge bot to be "[email protected]"
Since slack doesn't let bot user profiles have a userid, this hard-codes the
slack bridge bot to be "[email protected]", and itself as "[email protected]".
'''
# Apparently, slack likes these to be uppercase.
userid = userid.upper()

# print("searching %s" % userid)

if userid == cfg.SLACK_BRIDGE_BOT_ID:
return "[email protected]"

if userid == get_zdaemon_userid():
return "[email protected]"

# We don't need a TTLCache for this function, since this next call does all the
# expensive work and is, itself, cached.
profile_result = get_slack_user_profile(userid)

if 'email' not in profile_result:
Expand Down Expand Up @@ -361,19 +394,24 @@ def get_slack_thread(event):
def realID(id):
"""Converts the passed identifier into a more canonical form.
Mostly deals with the same user across many realms.
Specifically, we treat identically named ABTECH.ORG and ANDREW.CMU.EDU users as identical.
Note: We also are assuming that the email addresses match typical ANDREW.CMU.EDU naming
conventions (e.g. nothing other than alphanumerics on the LHS -- though we'll deal with
a few special characters as well like ., +, and -)
"""
name = id.rstrip()
m = re.match(r'(\w+)@ABTECH.ORG', name)
m = re.fullmatch(r'([\.\+\-\w]+)@ABTECH\.ORG', name, re.IGNORECASE)
if m:
name = m.group(1)
else:
m = re.match(r'(\w+)@ANDREW.CMU.EDU', name)
m = re.fullmatch(r'([\.\+\-\w]+)@ANDREW\.CMU\.EDU', name, re.IGNORECASE)
if m:
name = m.group(1)

# If we don't have a realm at this point,
# canonicalize to lowercase.
if not re.match(r'(\w+)@(\w+)', name):
# If we don't have a realm at this point, canonicalize to lowercase.
if not '@' in name:
name = name.lower()

return name
Expand Down
3 changes: 2 additions & 1 deletion zdaemon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def init_zdaemon_config(options, load_channels=True, config_file_only=False):
raise Exception("SLACK_CHANNEL_WHITELIST is present but doesn't appear to be a list")
slack_channel_whitelist = wl
if "SLACK_BRIDGE_BOT_ID" in file_data:
SLACK_BRIDGE_BOT_ID = file_data["SLACK_BRIDGE_BOT_ID"]
# Make sure to force this to uppercase.
SLACK_BRIDGE_BOT_ID = file_data["SLACK_BRIDGE_BOT_ID"].upper()


if "SENDCUBE_ENABLE" in file_data:
Expand Down
127 changes: 103 additions & 24 deletions zdaemon/plusplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,16 @@

import functools
import pytz
from random import randrange
import re
import sqlite3
import time
import unicodedata as ud

import config as cfg
from common import realID, sendz, sendsText, get_slack_thread, get_slack_user_email, hasRTLCharacters
from random import randrange
from common import sendz
from common import slack_active, sendsText, get_slack_thread, get_slack_user_email
from common import realID, hasRTLCharacters

# sqlite Schema
#
Expand Down Expand Up @@ -207,27 +209,18 @@ def _plusplus(cursor, sender, display_sender,
if inc != 1 and inc != -1:
raise Exception("_plusplus: bad increment %s" % inc)

thing_id = realID(thing)
self_pp_penalty = False

# Detect someone plusplussing a slack entity, and forbid.
# Note that this code still runs on zulip, but it probably is an error there as well.
m = re.match(r'<([@#])([a-z0-9]+)(|.*)?>', thing_id)
if m:
type = m.group(1)
entity = m.group(2)

hint = ""
if type == '@':
hint = " If this is a user, please use their andrew id."
elif type == '#':
hint = " If this is a channel, consider omitting the hash mark."

reply("It looks like you might be trying to plusplus the slack entity: %s, "
"but this is not supported.%s" % (entity, hint))
return None
# Legacy zdaemon made this call, but realID() didn't actually have correct regexes, so
# it never worked! The behavior is surprising when it is working, as plusplus will report
# the score of the canonical id, but display the name of the noncanonical ID. e.g.
# "[email protected]: 1023482" while recording & reporting the score for "zdaemon"
#
# We think users plusplusing an email address, even an andrew or abtech one, expect that
# the string for the email itself is the thing that gets changed.
# thing_id = realID(thing)
thing_id = thing

# Self Plusplus Penalty
self_pp_penalty = False
if (thing_id == sender and inc == 1):
reply("Whoa, @bold(loser) trying to plusplus themselves.\n" \
"Changing to %s--" % thing_id)
Expand Down Expand Up @@ -305,6 +298,80 @@ def _plusplus(cursor, sender, display_sender,
return ret


def _ppSlackEntityFilter(thing_id, reply):
'''Attempts to resolve plusplus targets that are slack entities into better targets.
thing_id is the thing to examine
reply is used to send error messages if the thing is disallowed.
returns canonical name (might be the same) or None if this should be disallowed.
'''
# Detect someone plusplussing a slack entity, and forbid.
# Note that this code still runs on zulip, but it probably is an error there as well.
m = re.fullmatch(r'<([@#])([a-z0-9]+)(|.*)?>', thing_id)
if m:
thing_is_slack_entity = True
type = m.group(1)
entity = m.group(2)

hint = ""

if type == '@':
hint += " If this is a user, please use their andrew id."
elif type == '#':
hint += " If this is a channel, consider omitting the hash mark."

try:
if type == '@' and slack_active():
# Best effort attempt to convert this to an andrew id.
email = get_slack_user_email(entity, False)

# Only used if we fail to canonicalize.
hint += " When I looked it up, I got %s which didn't look like an andrew account." % email

# Remember that everyone gets trapped with using regexes for email addresses.
# Luckily the ones we care about here are not plus addressed and don't use the
# full rfc*822 formatting, since we're specifally looking for the andrew ID.
if re.fullmatch(r'([\-\.\w]+)@([\.\w]+)', email):
new_thing = realID(email)

# Check if it still looks like an email address.
if not re.fullmatch(r'([\-\.\w]+)@([\.\w]+)', new_thing):
# It canonicalized successfully!
thing_id = new_thing
thing_is_slack_entity = False
except:
# If an exception happens, proceed almost as if it wasn't valid anyway.
hint += " I tried to look it up, but I wasn't able to."
pass

# If we haven't turned this into something usable, abort!
if thing_is_slack_entity:
reply("It looks like you might be trying to plusplus the slack entity: %s, "
"but this is not supported.%s" % (entity.upper(), hint))
return None

return thing_id


def _ppSlackEmailFilter(thing):
'''
Slack always turns emails into markdown, we need to strip that out before using it for plusplus.
'''
if not slack_active():
return thing

# These look like "<mailto:[email protected]|[email protected]>"
#
# We're knowingly walking into the trap of generating a regex for an email address.
# It is an imperfect world. Be nice.
m = re.fullmatch(r'<mailto:([\-\.\+\w]+@[\.\w]+)\|\1>', thing)
if m:
thing = m.group(1)

return thing


def scanPlusPlus(sender, message, reply, display_sender=None):
#log("In scanplusplus: %s" % message)

Expand All @@ -326,7 +393,6 @@ def scanPlusPlus(sender, message, reply, display_sender=None):
cur = dbh.cursor()
cur.execute("BEGIN")
try:

m = pattern.search(haystack)
while m is not None:
haystack = m.group(3)
Expand All @@ -335,6 +401,21 @@ def scanPlusPlus(sender, message, reply, display_sender=None):

# print ("%s / %s" % (thing, op))

# Set up next loop. Done before we decide if we need to slack-entity-filter
# this item for readability reasons.
#
# Don't reference m inside the loop after this point!
m = pattern.search(haystack)

# Forbid #channels and @users that we can't convert to an andrew account.
thing = _ppSlackEntityFilter(thing, reply)
if thing is None:
# Need to filter it!
continue

# Strip slack email address markdown.
thing = _ppSlackEmailFilter(thing)

if (thing == "year"):
results['year'] = datetime.now(_PIT_TIMEZONE).year
elif (thing == "month"):
Expand Down Expand Up @@ -365,8 +446,6 @@ def scanPlusPlus(sender, message, reply, display_sender=None):
if (res is not None):
results[thing] = res

m = pattern.search(haystack)

cur.execute("COMMIT")
except Exception as e:
cur.execute("ROLLBACK")
Expand Down
4 changes: 4 additions & 0 deletions zdaemon/zdaemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ def ping_text(slack=False):
msg += "*Plusplus*: %d (totaling %d)\n" % (ppstats['count'], ppstats['sum'])
msg += "*Pronouns*: she/her\n"
msg += "*Home Address*: %s\n" % socket.gethostname()

if slack:
msg += "*Slack User Id*: %s\n" % common.get_zdaemon_userid()

msg += "*Server Uptime*: %s\n" % uptime.rstrip()
msg += "*Last Restart*: %s\n" % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime(common.ZDAEMON_START_TIME))

Expand Down
9 changes: 8 additions & 1 deletion zdaemon/zudaemon
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,15 @@ if cfg.SLACK_ENABLE:
hello_string = "zdaemon started (%s)" % socket.gethostname()
zd.sendToMaintainer(hello_string)

# These are blocking calls, and will continuously poll for new messages
# handler.start() or zd.runzulip are blocking calls,
# and will continuously poll for new messages until we die.
if cfg.SLACK_ENABLE:
# This next line is not only informative, but also primes our cache!
#
# Note that it can throw an exception if the slack API is unhappy for some reason,
# but in that case it probably would have failed at the sendToMaintainer() above.
print ("I am Slack User ID: %s" % zd.get_zdaemon_userid())

handler = SocketModeHandler(app, cfg.SLACK_APP_TOKEN)
handler.start()
else:
Expand Down

0 comments on commit 3e2fd66

Please sign in to comment.