Skip to content

Commit

Permalink
I1544 backend dependencies (#1554)
Browse files Browse the repository at this point in the history
  • Loading branch information
pushyamig authored Dec 18, 2023
1 parent 9fb4601 commit b5f8752
Show file tree
Hide file tree
Showing 17 changed files with 132 additions and 94 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ WORKDIR /code
COPY requirements.txt .
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential curl apt-transport-https libpq-dev netcat-traditional jq python3-dev xmlsec1 cron git && \
build-essential curl apt-transport-https libpq-dev netcat-traditional default-libmysqlclient-dev pkg-config jq python3-dev xmlsec1 cron git && \
apt-get upgrade -y

# Install MariaDB from the mariadb repository rather than using Debians
Expand Down
29 changes: 16 additions & 13 deletions config/cron_udp.hjson
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
left join entity.course_grade cg
on cse.course_section_id = cg.course_section_id and cse.person_id = cg.person_id
where
co.lms_int_id in %(course_ids)s
and cse.role in ('Student', 'TeachingAssistant', 'Teacher')
co.lms_int_id = ANY(%(course_ids)s)
and cse.role = ANY(ARRAY['Student', 'Teacher', 'TeachingAssistant']::text[])
and cse.role_status = 'Enrolled'
and cse.enrollment_status = 'Active'
order by user_id
Expand All @@ -56,14 +56,14 @@
la.visibility = 'everyone'
and la.status = 'published'
and la.course_offering_id = co.id
and co.lms_int_id in %(course_ids)s
and co.lms_int_id = ANY(%(course_ids)s)
), assignment_grp as (
select lg.*
from entity.learner_activity_group lg, keymap.course_offering co
where
lg.status = 'available'
and lg.course_offering_id = co.id
and co.lms_int_id in %(course_ids)s
and co.lms_int_id = ANY(%(course_ids)s)
), assign_more as (
select distinct(a.learner_activity_group_id), da.group_points
from assignment_details a
Expand Down Expand Up @@ -125,7 +125,7 @@
la.visibility = 'everyone'
and la.status = 'published'
and la.course_offering_id = co.id
and co.lms_int_id in %(course_ids)s
and co.lms_int_id = ANY(%(course_ids)s)
and la.learner_activity_id = la_km.id
and la.learner_activity_group_id = lag_km.id
)
Expand All @@ -147,7 +147,7 @@
keymap.course_offering co_km
where
lag.course_offering_id = co_km.id
and co_km.lms_int_id in %(course_ids)s
and co_km.lms_int_id = ANY(%(course_ids)s)
group by co_km.lms_int_id
''',
"term":
Expand Down Expand Up @@ -182,7 +182,7 @@
LEFT OUTER JOIN entity.academic_term at1 on (co.academic_term_id = at1.academic_term_id),
keymap.course_offering co2,
keymap.academic_term at2
WHERE co2.lms_int_id in %(course_ids)s
WHERE co2.lms_int_id = ANY(%(course_ids)s)
and co.course_offering_id = co2.id
and at1.academic_term_id = at2.id
''',
Expand All @@ -196,7 +196,7 @@
where
f.course_offering_id = co_km.id
and f.file_id = f_km.id
and co_km.lms_int_id in %(course_ids)s
and co_km.lms_int_id = ANY(%(course_ids)s)
order by id
''',
"submission":
Expand All @@ -212,7 +212,7 @@
left join keymap.course_offering co
on cs.le_current_course_offering_id = co.id
where
co.lms_int_id in %(course_ids)s
co.lms_int_id = ANY(:course_ids)
and cse.role_status ='Enrolled'
and cse."role" = 'Student'
and cse.enrollment_status = 'Active'
Expand All @@ -228,13 +228,13 @@
lar.published_score as published_score,
lar.response_date as submitted_at,
lar.graded_date as graded_at,
timezone(%(time_zone)s, lar.posted_at AT TIME ZONE 'UTC') as grade_posted_local_date,
timezone(:time_zone, lar.posted_at AT TIME ZONE 'UTC') as grade_posted_local_date,
lar.grading_status as submission_workflow_state,
la.title as title,
lar.learner_activity_result_id as learner_activity_result_id,
lar.person_id as short_user_id,
cast(lar2.lms_int_id as BIGINT) as submission_id,
(cast(%(canvas_data_id_increment)s as bigint) + cast(p.lms_ext_id as bigint)) as canvas_user_id
(cast(:canvas_data_id_increment as bigint) + cast(p.lms_ext_id as bigint)) as canvas_user_id
from entity.learner_activity_result lar
join enrollment on lar.person_id= enrollment.user_id
join enrollment e on lar.person_id = e.user_id
Expand All @@ -244,7 +244,7 @@
left join keymap.course_offering co on co.id = la.course_offering_id
join keymap.person p on p.id = lar.person_id
where
co.lms_int_id in %(course_ids)s
co.lms_int_id = ANY(:course_ids)
and la.status = 'published'
)
select
Expand All @@ -267,7 +267,10 @@
grade_posted_local_date
from
submission
);
)
''',
"submission_with_avg_score":
'''
select
f.id::bigint,
f.assignment_id::bigint assignment_id,
Expand Down
2 changes: 1 addition & 1 deletion config/env_sample.hjson
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
# Enable secure cookies, also set your trusted origin (example of instructure.com)
# This needs to be true for deployments or when testing LTI with ngrok or loophole.
"CSRF_COOKIE_SECURE": false,
"CSRF_TRUSTED_ORIGINS": ["instructure.com"],
"CSRF_TRUSTED_ORIGINS": ["https://*.instructure.com", "https://*.umich.edu"],
# If you have a proxy that sets this header then set this to true. Default is false
"USE_X_FORWARDED_HOST": false,
# SameSite settings for Session and CSRF (defaults in settings.py should work), if you do want non-string None set to null.
Expand Down
8 changes: 5 additions & 3 deletions dashboard/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def clean(self):
return self.cleaned_data


@admin.register(AcademicTerms)
class TermAdmin(admin.ModelAdmin):
exclude = ('id',)
list_display = ('canvas_id', 'name', 'date_start', 'date_end')
Expand All @@ -77,6 +78,7 @@ def has_add_permission(self, request):
return False


@admin.register(Course)
class CourseAdmin(admin.ModelAdmin):
inlines = [CourseViewOptionInline, ]
form = CourseForm
Expand All @@ -95,11 +97,13 @@ def clear_course_updated_dates(self, request, queryset):
self.message_user(request, "All selected last updated values cleared.")

# Need this method to correctly display the line breaks
@admin.display(
description="Course View Option(s)"
)
def _courseviewoption(self, obj):
return mark_safe(linebreaksbr(obj.courseviewoption))

# Known mypy issue: https://github.com/python/mypy/issues/708
_courseviewoption.short_description = "Course View Option(s)" # type: ignore[attr-defined]

def course_link(self, obj):
return format_html('<a href="{}">Link</a>', obj.absolute_url)
Expand Down Expand Up @@ -160,8 +164,6 @@ def has_change_permission(request, obj=None):
def has_delete_permission(request, obj=None):
return False

admin.site.register(AcademicTerms, TermAdmin)
admin.site.register(Course, CourseAdmin)

# Remove the pinax LogAdmin and add ours
admin.site.unregister(Log)
Expand Down
2 changes: 1 addition & 1 deletion dashboard/common/db_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def create_sqlalchemy_engine(db_params: DjangoDBParams) -> Engine:
if new_db_params['ENGINE'] == (BACKENDS_PATH + 'mysql'):
return create_engine(f'mysql+mysqldb://{core_string}?charset=utf8mb4')
else:
return create_engine('postgresql://' + core_string)
return create_engine('postgresql+psycopg://' + core_string)


def canvas_id_to_incremented_id(canvas_id):
Expand Down
77 changes: 49 additions & 28 deletions dashboard/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,22 @@
import logging
from collections import namedtuple
from typing import Any, Dict, List, Union
from zoneinfo import ZoneInfo

import hjson
import pandas as pd
import pytz
import pangres

from django.conf import settings
from django.db import connections as conns, models
from django.db.models import QuerySet
from django_cron import CronJobBase, Schedule
from google.cloud import bigquery
from sqlalchemy import types
from sqlalchemy import types, text
from sqlalchemy.engine import ResultProxy
from sqlalchemy.orm import sessionmaker

from dashboard.common import db_util, utils
from dashboard.common import db_util
from dashboard.models import Course, Resource, AcademicTerms, User


Expand Down Expand Up @@ -67,17 +68,17 @@ def util_function(sql_string, mysql_table, param_object=None, table_identifier=N


# execute database query
def execute_db_query(query: str, params: List = None) -> ResultProxy:
with engine.connect() as connection:
def execute_db_query(query: str, params: Dict = None) -> ResultProxy:
with engine.begin() as connection:
connection.detach()
if params:
return connection.execute(query, params)
return connection.execute(text(query), params)
else:
return connection.execute(query)
return connection.execute(text(query))


# remove all records inside the specified table
def delete_all_records_in_table(table_name: str, where_clause: str = "", where_params: List = None):
def delete_all_records_in_table(table_name: str, where_clause: str = "", where_params: Dict = None):
# delete all records in the table first, can have an optional where clause
result_proxy = execute_db_query(f"delete from {table_name} {where_clause}", where_params)
return(f"\n{result_proxy.rowcount} rows deleted from {table_name}\n")
Expand All @@ -99,7 +100,7 @@ def soft_update_datetime_field(
f'Skipped update of {field_name} for {model_name} instance ({model_inst.id}); existing value was found')
else:
if warehouse_field_value:
warehouse_field_value = warehouse_field_value.replace(tzinfo=pytz.UTC)
warehouse_field_value = warehouse_field_value.replace(tzinfo=ZoneInfo('UTC'))
setattr(model_inst, field_name, warehouse_field_value)
logger.info(f'Updated {field_name} for {model_name} instance ({model_inst.id})')
return [field_name]
Expand All @@ -124,7 +125,7 @@ def verify_course_ids(self):
logger.debug("in checking course")
supported_courses = Course.objects.get_supported_courses()
course_ids = [str(x) for x in supported_courses.values_list('id', flat=True)]
courses_data = pd.read_sql(queries['course'], data_warehouse_engine, params={'course_ids': tuple(course_ids)})
courses_data = pd.read_sql(queries['course'], data_warehouse_engine, params={'course_ids': course_ids})
# error out when course id is invalid, otherwise add DataFrame to list
for course_id, data_last_updated in supported_courses:
if course_id not in list(courses_data['id']):
Expand All @@ -151,7 +152,7 @@ def update_user(self):
# cron status
status = ""

logger.debug("in update with data warehouse user")
logger.info("in update with data warehouse user")

# delete all records in the table first
status += delete_all_records_in_table("user")
Expand All @@ -160,7 +161,7 @@ def update_user(self):
status += util_function(
queries['user'],
'user',
{'course_ids': tuple(self.valid_locked_course_ids),
{'course_ids': self.valid_locked_course_ids,
'canvas_data_id_increment': settings.CANVAS_DATA_ID_INCREMENT
})

Expand Down Expand Up @@ -193,13 +194,13 @@ def update_canvas_resource(self):
# cron status
status = ""

logger.debug("in update canvas resource")
logger.info("in update canvas resource")

# Select all the files for these courses
# convert int array to str array
df_attach = pd.read_sql(queries['resource'],
data_warehouse_engine,
params={'course_ids': tuple(self.valid_locked_course_ids)})
params={'course_ids': self.valid_locked_course_ids })
logger.debug(df_attach)
# Update these back again based on the dataframe
# Remove any rows where file_state is not available!
Expand All @@ -217,6 +218,8 @@ def update_resource_access(self):
# cron status
status = ""

logger.info("in update resource access")

# return string with concatenated SQL insert result
return_string = ""

Expand All @@ -231,7 +234,7 @@ def update_resource_access(self):

logger.info(f"Deleting all records in resource_access after {data_last_updated}")

status += delete_all_records_in_table("resource_access", f"WHERE access_time > %s", [data_last_updated, ])
status += delete_all_records_in_table("resource_access", f"WHERE access_time > :data_last_updated", {'data_last_updated': data_last_updated })

# loop through multiple course ids, 20 at a time
# (This is set by the CRON_BQ_IN_LIMIT from settings)
Expand Down Expand Up @@ -393,7 +396,7 @@ def update_resource_access(self):
student_enrollment_type = User.EnrollmentType.STUDENT
student_enrollment_df = pd.read_sql(
'select user_id, course_id from user where enrollment_type= %s',
engine, params={student_enrollment_type})
engine, params=[(str(student_enrollment_type),)])
resource_access_df = pd.merge(
resource_access_df, student_enrollment_df,
on=['user_id', 'course_id'],
Expand Down Expand Up @@ -437,6 +440,8 @@ def update_groups(self):
# cron status
status = ""

logger.info("update_groups(): ")

# delete all records in assignment_group table
status += delete_all_records_in_table("assignment_groups")

Expand All @@ -447,7 +452,7 @@ def update_groups(self):
# loop through multiple course ids
status += util_function(queries['assignment_groups'],
'assignment_groups',
{'course_ids': tuple(self.valid_locked_course_ids)})
{'course_ids': self.valid_locked_course_ids})

return status

Expand All @@ -463,7 +468,7 @@ def update_assignment(self):
# loop through multiple course ids
status += util_function(queries['assignment'],
'assignment',
{'course_ids': tuple(self.valid_locked_course_ids),
{'course_ids': self.valid_locked_course_ids,
'time_zone': settings.TIME_ZONE})

return status
Expand All @@ -480,14 +485,30 @@ def submission(self):

# loop through multiple course ids
# filter out not released grades (submission_dim.posted_at date is not null) and partial grades (submission_dim.workflow_state != 'graded')
status += util_function(queries['submission'],
'submission',
{
'course_ids': tuple(self.valid_locked_course_ids),
'canvas_data_id_increment': settings.CANVAS_DATA_ID_INCREMENT,
'time_zone': settings.TIME_ZONE
})
query_params = {
'course_ids': self.valid_locked_course_ids,
'time_zone': settings.TIME_ZONE,
'canvas_data_id_increment': settings.CANVAS_DATA_ID_INCREMENT,
}
Session = sessionmaker(bind=data_warehouse_engine)
try:
# Create a session
with Session() as session:
# Execute the first query to create the temporary table
session.execute(text(queries['submission']).bindparams(**query_params))

# Execute the second query using the temporary table
result = session.execute(text(queries['submission_with_avg_score']))
df = pd.DataFrame(result.fetchall(), columns=result.keys())
df = df.drop_duplicates(keep='first')
df.to_sql(con=engine, name='submission', if_exists='append', index=False)

except Exception as e:
logger.exception('Error running sql on table submission', str(e))
raise
status+=f"{str(df.shape[0])} submission: {query_params}\n"

# returns the row size of dataframe
return status

def weight_consideration(self):
Expand All @@ -503,7 +524,7 @@ def weight_consideration(self):
# loop through multiple course ids
status += util_function(queries['assignment_weight'],
'assignment_weight_consideration',
{'course_ids': tuple(self.valid_locked_course_ids)},
{'course_ids': self.valid_locked_course_ids },
'weight')

logger.debug(status + "\n\n")
Expand Down Expand Up @@ -543,7 +564,7 @@ def update_course(self, warehouse_courses_data: pd.DataFrame) -> str:
Updates course records with data returned from verify_course_ids, only making changes when necessary.
"""
status: str = ''
logger.debug('update_course()')
logger.info('update_course()')

logger.debug(warehouse_courses_data.to_json(orient='records'))
courses: QuerySet = Course.objects.filter(id__in=self.valid_locked_course_ids)
Expand Down Expand Up @@ -588,7 +609,7 @@ def do(self) -> str:

status = ""

run_start = datetime.now(pytz.UTC)
run_start = datetime.now(ZoneInfo('UTC'))
status += f"Start cron: {str(run_start)} UTC\n"
course_verification = self.verify_course_ids()
invalid_course_id_list = course_verification.invalid_course_ids
Expand Down
Loading

0 comments on commit b5f8752

Please sign in to comment.