From 0dcee25599e8f96b0fb92f3b10735055d3e0f63f Mon Sep 17 00:00:00 2001 From: Zhen Qian Date: Mon, 27 Jun 2022 16:12:49 -0400 Subject: [PATCH] Fixes #1378 Resources Accessed does not load when the course contains files with a semicolon in the filename (#1383) * issue_1378 stop doing resource id and name concatenation when calculating the resource access data frame * issue_1378 added resource_id_type as the unique identifier for resource_access records * issue_1378 added the resource_id into data frame for resource access view * Update dashboard/views.py * issue_1378 changes based on the PR review: to do the ID CONCAT in the SQL rather than in Pandas * issue_1378 change based on the PR review: 'do the ID CONCAT in the SQL rather than in Pandas' * issue_1378 removed empty spaces for sql queries * issue_1378 drop the not used resource_type attribute from query Co-authored-by: Code Hugger (Matthew Jones) --- dashboard/views.py | 65 +++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/dashboard/views.py b/dashboard/views.py index fff08933a..a18326a9a 100644 --- a/dashboard/views.py +++ b/dashboard/views.py @@ -299,7 +299,12 @@ def resource_access_within_week(request, course_id=0): # get time range based on week number passed in via request - sqlString = f"""SELECT a.resource_id as resource_id, r.resource_type as resource_type, r.name as resource_name, u.current_grade as current_grade, a.user_id as user_id + sqlString = f"""SELECT a.resource_id as resource_id, + r.resource_type as resource_type, + CONCAT(r.resource_id, r.resource_type) as resource_id_type, + r.name as name, + u.current_grade as current_grade, + a.user_id as user_id FROM resource r, resource_access a, user u, course c, academic_terms t WHERE a.resource_id = r.resource_id and a.user_id = u.user_id and a.course_id = c.id and c.term_id = t.id @@ -321,17 +326,11 @@ def resource_access_within_week(request, course_id=0): "enrollment_type": 'StudentEnrollment' }) logger.debug(df) - # return if there is no data during this interval if (df.empty): return HttpResponse("{}") - # group by resource_id, and resource_name - # reformat for output - df['resource_id_name'] = df['resource_id'].astype(str).str.cat(df['resource_name'], sep=';') - - df=df.drop(['resource_id', 'resource_name'], axis=1) - df.set_index(['resource_id_name']) + df.set_index(['resource_id_type']) # drop resource records when the resource has been accessed multiple times by one user df.drop_duplicates(inplace=True) @@ -339,42 +338,53 @@ def resource_access_within_week(request, course_id=0): df['grade'] = df['current_grade'].map(gpa_map) # calculate the percentage - df['percent'] = df.groupby(['resource_id_name', 'grade'])['resource_id_name'].transform('count') / total_number_student + df['percent'] = df.groupby(['resource_id_type', 'grade'])['resource_id_type'].transform('count') / total_number_student df=df.drop(['current_grade', 'user_id'], axis=1) # now only keep the resource access stats by grade level df.drop_duplicates(inplace=True) - resource_id_name=df["resource_id_name"].unique() + + resource_id_type=df["resource_id_type"].unique() #df.reset_index(inplace=True) # zero filled dataframe with resource name as row name, and grade as column name - output_df=pd.DataFrame(0.0, index=resource_id_name, columns=[GRADE_A, GRADE_B, GRADE_C, GRADE_LOW, NO_GRADE_STRING, RESOURCE_TYPE_STRING]) - output_df=output_df.rename_axis('resource_id_name') + output_df=pd.DataFrame(0.0, index=resource_id_type, columns=['r_id', 'r_name', GRADE_A, GRADE_B, GRADE_C, GRADE_LOW, NO_GRADE_STRING, RESOURCE_TYPE_STRING]) + output_df=output_df.rename_axis('resource_id_type') output_df=output_df.astype({RESOURCE_TYPE_STRING: str}) + output_df=output_df.astype({'r_name': str}) + output_df=output_df.astype({'r_id': str}) + for index, row in df.iterrows(): # set value - output_df.at[row['resource_id_name'], row['grade']] = row['percent'] - output_df.at[row['resource_id_name'], RESOURCE_TYPE_STRING] = row[RESOURCE_TYPE_STRING] + output_df.at[row['resource_id_type'], row['grade']] = row['percent'] + output_df.at[row['resource_id_type'], RESOURCE_TYPE_STRING] = row[RESOURCE_TYPE_STRING] + output_df.at[row['resource_id_type'], 'r_name'] = row['name'] + output_df.at[row['resource_id_type'], 'r_id'] = row['resource_id'] output_df.reset_index(inplace=True) # now insert person's own viewing records: what resources the user has viewed, and the last access timestamp - selfSqlString = f"""select CONCAT(r.resource_id, ';', r.name) as resource_id_name, count(*) as self_access_count, max(a.access_time) as self_access_last_time - from resource_access a, user u, resource r - where a.user_id = u.user_id - and a.resource_id = r.resource_id - and u.sis_name=%(current_user)s + selfSqlString = f""" + select + r.resource_id as resource_id, + CONCAT(r.resource_id, r.resource_type) as resource_id_type, + r.name as name, + count(*) as self_access_count, + max(a.access_time) as self_access_last_time + from resource_access a, user u, resource r + where a.user_id = u.user_id + and a.resource_id = r.resource_id + and u.sis_name=%(current_user)s and a.course_id = %(course_id)s and a.course_id = u.course_id - group by CONCAT(r.resource_id, ';', r.name)""" + group by r.resource_id, r.resource_type, r.name""" logger.debug(selfSqlString) logger.debug("current_user=" + current_user) selfDf= pd.read_sql(selfSqlString, conn, params={"current_user":current_user, "course_id": course_id}) - - output_df = output_df.join(selfDf.set_index('resource_id_name'), on='resource_id_name', how='left') + output_df = output_df.join(selfDf.set_index('resource_id_type'), on=['resource_id_type'], how='left') output_df["total_percent"] = output_df.apply(lambda row: row[GRADE_A] + row[GRADE_B] + row[GRADE_C] + row[GRADE_LOW] + row.NO_GRADE, axis=1) if (grade != "all"): @@ -385,7 +395,7 @@ def resource_access_within_week(request, course_id=0): output_df["total_percent"] = output_df[i_grade] else: output_df=output_df.drop([i_grade], axis=1) - + output_df=output_df[output_df.resource_type.isin(filter_list)] # if no checkboxes are checked send nothing @@ -402,24 +412,21 @@ def resource_access_within_week(request, course_id=0): output_df.fillna(0, inplace=True) #replace null value with 0 - output_df[['resource_id_part','resource_name_part']] = output_df['resource_id_name'].str.split(';', expand=True) - output_df['resource_name'] = output_df.apply( lambda row: (RESOURCE_ACCESS_CONFIG.get(row.resource_type).get("urls").get("prefix") + - row.resource_id_part + + str(row.r_id) + RESOURCE_ACCESS_CONFIG.get(row.resource_type).get("urls").get("postfix") + CANVAS_FILE_ID_NAME_SEPARATOR + - row.resource_name_part + CANVAS_FILE_ID_NAME_SEPARATOR + + str(row.r_name) + CANVAS_FILE_ID_NAME_SEPARATOR + RESOURCE_VALUES.get(RESOURCE_VALUES_MAP.get(row.resource_type)).get('icon') ), axis=1) # RESOURCE_VALUES_MAP {'canvas': 'files', 'leccap': 'videos', 'mivideo': 'videos'} output_df['resource_type'] = output_df['resource_type'].replace(RESOURCE_VALUES_MAP) - output_df.drop(columns=['resource_id_part', 'resource_name_part', 'resource_id_name'], inplace=True) + output_df.drop(columns=['name', 'resource_id_type'], inplace=True) logger.debug(output_df.to_json(orient='records')) - return HttpResponse(output_df.to_json(orient='records'),content_type='application/json')