Skip to content

Commit

Permalink
pre-existing metadata dataframe scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
drelliche committed Aug 23, 2023
1 parent 59d84cc commit a1ac6d4
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 0 deletions.
123 changes: 123 additions & 0 deletions .github/scripts/process_metadata.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
### This script will create the file knowledge_graph.py containing all of the possible data for visualizing connections between modules.
### Run this script from the main education_modules directory

metadata_df=assets/media/module_data.py

### Set up the basics of creating a pandas dataframe
echo "import pandas as pd
df=pd.DataFrame()" > $metadata_df


### Make every module a graph node

echo >> $metadata_df

for FOLDER in *
do
if [[ -s $FOLDER/$FOLDER.md && "$FOLDER" != "a_sample_module_template" ]] ## Only do this for folders that have a course .md file inside an identically named folder in education_modules
then
### pull the one-line macros
for CATEGORY in "title" "author" "estimated_time_in_minutes"
do
category_metadata="`grep -m 1 "$CATEGORY": $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]'`"
echo "df.loc[\"$FOLDER\", \"$CATEGORY\"] = \"$category_metadata\"" >> $metadata_df
done

### good_first_module is not yet everywhere, but will be a required field
if grep "good_first_module" -q $FOLDER/$FOLDER.md
then
good_first_module="`grep -m 1 good_first_module: $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]'`"
echo "df.loc[\"$FOLDER\", \"good_first_module\"] = \"$good_first_module\" " >> $metadata_df
fi

### Coding metadata and sequence metadata will always be in some modules but not others
for CATEGORY in "coding_required" "coding_language" "coding_level" "sequence_name" "next_sequential_module" "data_task" "data_domain"
do
if grep "$CATEGORY" -q $FOLDER/$FOLDER.md
then
category_metadata="`grep -m 1 "$CATEGORY": $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]'`"
echo "df.loc[\"$FOLDER\", \"$CATEGORY\"] = \"$category_metadata\"" >> $metadata_df
fi
done

#### TODO Some comments and long descriptions contain double quotes... this is a problem. For the moment they have been replaced with the character +
comment="`grep -m 1 comment: $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]' | tr '"' '+'`"
echo "df.loc[\"$FOLDER\", \"comment\"] = \"$comment\" " >> $metadata_df
long_description="`grep -m 1 long_description: $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]' | tr '"' '+'`"
echo "df.loc[\"$FOLDER\", \"long_description\"] = \"$long_description\" " >> $metadata_df

### Start pulling the data from block macros. So far there is only one of those. First find the line number where the "@learning_objectives" first appears
start=$(( $(grep -n -m 1 "@learning_objectives" $FOLDER/$FOLDER.md | cut -f1 -d:) +1 ))

end=$(( $(tail -n +$start $FOLDER/$FOLDER.md | grep -n -m 1 "@end" | cut -f1 -d:) - 1 ))
#### TODO figure out line breaks!
learning_objectives=$(tail -n +$start $FOLDER/$FOLDER.md | head -n $end | tr '\n' '&' | tr '"' '+')
echo "df.loc[\"$FOLDER\", \"learning_objectives\"] = \"$learning_objectives\" " >> $metadata_df

#### pre_reqs (The "IF" is because not every module has the prereqs in this format yet...)
if grep "@pre_reqs" -q $FOLDER/$FOLDER.md
then
start=$(( $(grep -n -m 1 "@pre_reqs" $FOLDER/$FOLDER.md | cut -f1 -d:) +1 ))

end=$(( $(tail -n +$start $FOLDER/$FOLDER.md | grep -n -m 1 "@end" | cut -f1 -d:) - 1 ))
#### TODO figure out line breaks!
pre_reqs=$(tail -n +$start $FOLDER/$FOLDER.md | head -n $end | tr '\n' '&' | tr '"' '+')
echo "df.loc[\"$FOLDER\", \"Prerequisties\"] = \"$pre_reqs\" " >> $metadata_df
fi

#### sets_you_up_for (The "IF" is because not every module has this yet...)
if grep "@sets_you_up_for" -q $FOLDER/$FOLDER.md
then
start=$(( $(grep -n -m 1 "@sets_you_up_for" $FOLDER/$FOLDER.md | cut -f1 -d:) +1 ))

end=$(( $(tail -n +$start $FOLDER/$FOLDER.md | grep -n -m 1 "@end" | cut -f1 -d:) - 1 ))

sets_you_up_for=$(tail -n +$start $FOLDER/$FOLDER.md | head -n $end | tr '\n' ' ' | tr '-' ' ')
echo "df.loc[\"$FOLDER\", \"Sets You Up For\"] = \"$sets_you_up_for\" " >> $metadata_df
fi

#### depends_on_knowledge_available_in (The "IF" is because not every module has this yet...)
if grep "@depends_on_knowledge_available_in" -q $FOLDER/$FOLDER.md
then
start=$(( $(grep -n -m 1 "@depends_on_knowledge_available_in" $FOLDER/$FOLDER.md | cut -f1 -d:) +1 ))

end=$(( $(tail -n +$start $FOLDER/$FOLDER.md | grep -n -m 1 "@end" | cut -f1 -d:) - 1 ))

depends_on_knowledge_available_in=$(tail -n +$start $FOLDER/$FOLDER.md | head -n $end | tr '\n' ' ' | tr '-' ' ')
echo "df.loc[\"$FOLDER\", \"Depends On Knowledge In\"] = \"$depends_on_knowledge_available_in\" " >> $metadata_df
fi
fi
done

### Find all links to other modules:

echo "df[\"Linked Courses\"] = [list() for x in range(len(df.index))]" >> $metadata_df

for FOLDER in *
do
if [[ -s $FOLDER/$FOLDER.md && "$FOLDER" != "a_sample_module_template" ]]
then
echo "a = df.loc[\"$FOLDER\", \"Linked Courses\"]" >> $metadata_df
for LINKED_COURSE in *
do
if [[ -s $LINKED_COURSE/$LINKED_COURSE.md && "$LINKED_COURSE" != "a_sample_module_template" && "$LINKED_COURSE" != "$FOLDER" ]]
then
# echo $FOLDER, $LINKED_COURSE
if [ "$(grep -c $LINKED_COURSE $FOLDER/$FOLDER.md)" -ge 1 ]
then
echo "a.append(\"$LINKED_COURSE\")" >> $metadata_df

fi

fi
done
echo "df.at[\"$FOLDER\", \"Linked Courses\"] = list(a)" >> $metadata_df
fi
done

### Debugging code, modify as needed:

#echo "print(df.loc[:,[\"coding_required\", \"coding_language\", \"coding_level\", \"sequence_name\", \"next_sequential_module\"]])">>$metadata_df
#
#
#python assets/module_discovery_app/module_data.py
48 changes: 48 additions & 0 deletions .github/workflows/pull_metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# This is a workflow to pull all of the metadata from all of the modules into a single file
# each time something is merged to main, this workflow will run and rebuild the metadata file.

name: pull_metadata

# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the "main" branch
merge:
branches: [ "main" ]
paths: ['Prompts/**', 'Weekly_Emails/Email_Text/**', 'scripts/**']

#pull_request:
# branches: [ "main" ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3

# Runs a set of commands using the runners shell
- name: Run all scripts
run: |
bash .github/scripts/process_metadata.sh
- name: Commit newly updated files
run:
git config --local user.name actions-user

git config --local user.email "[email protected]"

git fetch

git add *

git commit -am "update metadata records"

git push -f origin main

0 comments on commit a1ac6d4

Please sign in to comment.