-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pre-existing metadata dataframe scripts
- Loading branch information
Showing
2 changed files
with
171 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
### This script will create the file knowledge_graph.py containing all of the possible data for visualizing connections between modules. | ||
### Run this script from the main education_modules directory | ||
|
||
metadata_df=assets/media/module_data.py | ||
|
||
### Set up the basics of creating a pandas dataframe | ||
echo "import pandas as pd | ||
df=pd.DataFrame()" > $metadata_df | ||
|
||
|
||
### Make every module a graph node | ||
|
||
echo >> $metadata_df | ||
|
||
for FOLDER in * | ||
do | ||
if [[ -s $FOLDER/$FOLDER.md && "$FOLDER" != "a_sample_module_template" ]] ## Only do this for folders that have a course .md file inside an identically named folder in education_modules | ||
then | ||
### pull the one-line macros | ||
for CATEGORY in "title" "author" "estimated_time_in_minutes" | ||
do | ||
category_metadata="`grep -m 1 "$CATEGORY": $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]'`" | ||
echo "df.loc[\"$FOLDER\", \"$CATEGORY\"] = \"$category_metadata\"" >> $metadata_df | ||
done | ||
|
||
### good_first_module is not yet everywhere, but will be a required field | ||
if grep "good_first_module" -q $FOLDER/$FOLDER.md | ||
then | ||
good_first_module="`grep -m 1 good_first_module: $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]'`" | ||
echo "df.loc[\"$FOLDER\", \"good_first_module\"] = \"$good_first_module\" " >> $metadata_df | ||
fi | ||
|
||
### Coding metadata and sequence metadata will always be in some modules but not others | ||
for CATEGORY in "coding_required" "coding_language" "coding_level" "sequence_name" "next_sequential_module" "data_task" "data_domain" | ||
do | ||
if grep "$CATEGORY" -q $FOLDER/$FOLDER.md | ||
then | ||
category_metadata="`grep -m 1 "$CATEGORY": $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]'`" | ||
echo "df.loc[\"$FOLDER\", \"$CATEGORY\"] = \"$category_metadata\"" >> $metadata_df | ||
fi | ||
done | ||
|
||
#### TODO Some comments and long descriptions contain double quotes... this is a problem. For the moment they have been replaced with the character + | ||
comment="`grep -m 1 comment: $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]' | tr '"' '+'`" | ||
echo "df.loc[\"$FOLDER\", \"comment\"] = \"$comment\" " >> $metadata_df | ||
long_description="`grep -m 1 long_description: $FOLDER/$FOLDER.md | sed "s/^[^ ]* //" | sed "s/^[ ]* //" | tr -dc '[:print:]' | tr '"' '+'`" | ||
echo "df.loc[\"$FOLDER\", \"long_description\"] = \"$long_description\" " >> $metadata_df | ||
|
||
### Start pulling the data from block macros. So far there is only one of those. First find the line number where the "@learning_objectives" first appears | ||
start=$(( $(grep -n -m 1 "@learning_objectives" $FOLDER/$FOLDER.md | cut -f1 -d:) +1 )) | ||
|
||
end=$(( $(tail -n +$start $FOLDER/$FOLDER.md | grep -n -m 1 "@end" | cut -f1 -d:) - 1 )) | ||
#### TODO figure out line breaks! | ||
learning_objectives=$(tail -n +$start $FOLDER/$FOLDER.md | head -n $end | tr '\n' '&' | tr '"' '+') | ||
echo "df.loc[\"$FOLDER\", \"learning_objectives\"] = \"$learning_objectives\" " >> $metadata_df | ||
|
||
#### pre_reqs (The "IF" is because not every module has the prereqs in this format yet...) | ||
if grep "@pre_reqs" -q $FOLDER/$FOLDER.md | ||
then | ||
start=$(( $(grep -n -m 1 "@pre_reqs" $FOLDER/$FOLDER.md | cut -f1 -d:) +1 )) | ||
|
||
end=$(( $(tail -n +$start $FOLDER/$FOLDER.md | grep -n -m 1 "@end" | cut -f1 -d:) - 1 )) | ||
#### TODO figure out line breaks! | ||
pre_reqs=$(tail -n +$start $FOLDER/$FOLDER.md | head -n $end | tr '\n' '&' | tr '"' '+') | ||
echo "df.loc[\"$FOLDER\", \"Prerequisties\"] = \"$pre_reqs\" " >> $metadata_df | ||
fi | ||
|
||
#### sets_you_up_for (The "IF" is because not every module has this yet...) | ||
if grep "@sets_you_up_for" -q $FOLDER/$FOLDER.md | ||
then | ||
start=$(( $(grep -n -m 1 "@sets_you_up_for" $FOLDER/$FOLDER.md | cut -f1 -d:) +1 )) | ||
|
||
end=$(( $(tail -n +$start $FOLDER/$FOLDER.md | grep -n -m 1 "@end" | cut -f1 -d:) - 1 )) | ||
|
||
sets_you_up_for=$(tail -n +$start $FOLDER/$FOLDER.md | head -n $end | tr '\n' ' ' | tr '-' ' ') | ||
echo "df.loc[\"$FOLDER\", \"Sets You Up For\"] = \"$sets_you_up_for\" " >> $metadata_df | ||
fi | ||
|
||
#### depends_on_knowledge_available_in (The "IF" is because not every module has this yet...) | ||
if grep "@depends_on_knowledge_available_in" -q $FOLDER/$FOLDER.md | ||
then | ||
start=$(( $(grep -n -m 1 "@depends_on_knowledge_available_in" $FOLDER/$FOLDER.md | cut -f1 -d:) +1 )) | ||
|
||
end=$(( $(tail -n +$start $FOLDER/$FOLDER.md | grep -n -m 1 "@end" | cut -f1 -d:) - 1 )) | ||
|
||
depends_on_knowledge_available_in=$(tail -n +$start $FOLDER/$FOLDER.md | head -n $end | tr '\n' ' ' | tr '-' ' ') | ||
echo "df.loc[\"$FOLDER\", \"Depends On Knowledge In\"] = \"$depends_on_knowledge_available_in\" " >> $metadata_df | ||
fi | ||
fi | ||
done | ||
|
||
### Find all links to other modules: | ||
|
||
echo "df[\"Linked Courses\"] = [list() for x in range(len(df.index))]" >> $metadata_df | ||
|
||
for FOLDER in * | ||
do | ||
if [[ -s $FOLDER/$FOLDER.md && "$FOLDER" != "a_sample_module_template" ]] | ||
then | ||
echo "a = df.loc[\"$FOLDER\", \"Linked Courses\"]" >> $metadata_df | ||
for LINKED_COURSE in * | ||
do | ||
if [[ -s $LINKED_COURSE/$LINKED_COURSE.md && "$LINKED_COURSE" != "a_sample_module_template" && "$LINKED_COURSE" != "$FOLDER" ]] | ||
then | ||
# echo $FOLDER, $LINKED_COURSE | ||
if [ "$(grep -c $LINKED_COURSE $FOLDER/$FOLDER.md)" -ge 1 ] | ||
then | ||
echo "a.append(\"$LINKED_COURSE\")" >> $metadata_df | ||
|
||
fi | ||
|
||
fi | ||
done | ||
echo "df.at[\"$FOLDER\", \"Linked Courses\"] = list(a)" >> $metadata_df | ||
fi | ||
done | ||
|
||
### Debugging code, modify as needed: | ||
|
||
#echo "print(df.loc[:,[\"coding_required\", \"coding_language\", \"coding_level\", \"sequence_name\", \"next_sequential_module\"]])">>$metadata_df | ||
# | ||
# | ||
#python assets/module_discovery_app/module_data.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# This is a workflow to pull all of the metadata from all of the modules into a single file | ||
# each time something is merged to main, this workflow will run and rebuild the metadata file. | ||
|
||
name: pull_metadata | ||
|
||
# Controls when the workflow will run | ||
on: | ||
# Triggers the workflow on push or pull request events but only for the "main" branch | ||
merge: | ||
branches: [ "main" ] | ||
paths: ['Prompts/**', 'Weekly_Emails/Email_Text/**', 'scripts/**'] | ||
|
||
#pull_request: | ||
# branches: [ "main" ] | ||
|
||
# Allows you to run this workflow manually from the Actions tab | ||
workflow_dispatch: | ||
|
||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel | ||
jobs: | ||
# This workflow contains a single job called "build" | ||
build: | ||
# The type of runner that the job will run on | ||
runs-on: ubuntu-latest | ||
|
||
# Steps represent a sequence of tasks that will be executed as part of the job | ||
steps: | ||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it | ||
- uses: actions/checkout@v3 | ||
|
||
# Runs a set of commands using the runners shell | ||
- name: Run all scripts | ||
run: | | ||
bash .github/scripts/process_metadata.sh | ||
- name: Commit newly updated files | ||
run: | ||
git config --local user.name actions-user | ||
|
||
git config --local user.email "[email protected]" | ||
|
||
git fetch | ||
|
||
git add * | ||
|
||
git commit -am "update metadata records" | ||
|
||
git push -f origin main |