Skip to content

CZ data - update readme #193

CZ data - update readme

CZ data - update readme #193

Workflow file for this run

# This is a basic workflow to help you get started with Actions
name: Create sample files
# Controls when the action will run.
on:
pull_request_target:
branches: [ data ]
types: [closed] # synchronize is temporary for debugging
# Allows you to run this workflow manually from the Actions tab
# workflow_dispatch:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
Changes:
runs-on: ubuntu-latest
if: ${{ github.repository_owner == 'clarin-eric' && github.event_name == 'pull_request_target' && github.event.pull_request.merged == true }}
outputs:
parla_changed: '${{ steps.detect-changes.outputs.parla_changed }}'
steps:
- name: Checkout ParlaMint
uses: actions/checkout@v3
with:
path: ParlaMint
fetch-depth: 2 # when called a pull_request - test is run on test merge -> full history is not needed to detect changes
- name: Setup # initialize dependencies cache - used in next steps
uses: ./ParlaMint/.github/actions/ParlaMintEnvSetup
- name: ParlaMint status (Detect changed files and get list of parliaments that should be processed)
id: detect-changes
uses: ./ParlaMint/.github/actions/ParlaMintStatus
# Creates sample files and commits them
CreateSample:
needs: [Changes]
runs-on: ubuntu-latest
if: ${{ needs.Changes.outputs.parla_changed != '[]' && github.event_name == 'pull_request_target' && github.event.pull_request.merged == true }}
steps:
- name: Checkout ParlaMint
uses: actions/checkout@v3
with:
path: ParlaMint
- name: Setup
uses: ./ParlaMint/.github/actions/ParlaMintEnvSetup
with:
repo_token: ${{secrets.GITHUB_TOKEN}}
- name: Create Parliaments Samples ${{needs.Changes.outputs.parla_changed}}
run: |
cd $GITHUB_WORKSPACE/ParlaMint
for parla in $(jq -r '.[]' <<< '${{needs.Changes.outputs.parla_changed}}' ); do
echo "::group::Processing ParlaMint-$parla"
DIR="${{env.SAMPLE_DIR}}/$parla"
mkdir $DIR
echo "::notice::New sample files [$parla] TEXT"
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Samples/ParlaMint-$parla/ParlaMint-$parla.xml
echo "::notice::New sample files [$parla] ANNOTATED"
if [ -f "Samples/ParlaMint-$parla/ParlaMint-$parla.ana.xml" ] ; then
java -jar $GITHUB_WORKSPACE/Saxon.jar outDir=$DIR revRespPers='GitHub Action' -xsl:${{env.SAMPLE_SCRIPT}} Samples/ParlaMint-$parla/ParlaMint-$parla.ana.xml
else
echo "::warning::skipping annotated conversion - missing corpus root file"
fi
echo "::notice::Move new sample files to Samples/ParlaMint-$parla"
ls $DIR|grep "ParlaMint-$parla[\.\_]"|xargs -I {} mv $DIR/{} Samples/ParlaMint-$parla/
echo "::endgroup::"
done
- name: Remove unused data from repository
run: |
cd $GITHUB_WORKSPACE/ParlaMint
shopt -s globstar
for parla in $(jq -r '.[]' <<< '${{needs.Changes.outputs.parla_changed}}' ); do
for file in $(echo Samples/ParlaMint-$parla/**/ParlaMint-$parla\_*) ; do
echo "testing $file"
xmlfile=$(echo $file|sed -E 's/(-meta\.tsv|\.conllu|\.txt|\.vert)$/.xml/;s/^Samples\/ParlaMint-[^\/]*\///')
cat Samples/ParlaMint-$parla/ParlaMint-$parla{,.ana}.xml | grep -Fq "$xmlfile" || git rm "$file"
done
done
- name: Create Derived files and Validate ${{needs.Changes.outputs.parla_changed}}
uses: ./ParlaMint/.github/actions/ParlaMintValidate
with:
parlas: '${{needs.Changes.outputs.parla_changed}}'
- name: Commit new sample files ${{needs.Changes.outputs.parla_changed}}
if: ${{ success() }}
run: |
cd $GITHUB_WORKSPACE/ParlaMint
for parla in $(jq -r '.[]' <<< '${{needs.Changes.outputs.parla_changed}}' ); do
git add Samples/ParlaMint-$parla/ParlaMint-*.{txt,tsv,conllu,vert,xml} || echo "::warning:: $parla suppress fatal: pathspec '<FILE>' did not match any files"
git diff --name-only Samples/ParlaMint-$parla
done
git status
git commit -a -m "action: generating ParlaMint-${{needs.Changes.outputs.parla_changed}} sample files with #${{github.event.number}}"
git push