Skip to content

Commit

Permalink
Add basic file checks
Browse files Browse the repository at this point in the history
  • Loading branch information
p-goulart committed May 2, 2024
1 parent 5a9d600 commit 188e712
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 0 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,20 @@ on:
workflow_dispatch: {}

jobs:
check_files:
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v3

- name: Check Hunspell encoding
run: bash ./scripts/check_encoding.sh

- name: Check tagger file newlines
run: bash ./scripts/check_newlines.sh
build:
runs-on: ubuntu-latest
needs: check_files
strategy:
matrix:
python-version: ["3.11"]
Expand Down
24 changes: 24 additions & 0 deletions scripts/check_encoding.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env bash
# Shell script to check if all .dic and .aff files are encoded in ISO-8859-1

get_encoding() {
for ext in dic aff; do
find ./data/spelling-dict/hunspell -type f -name "pt*.${ext}" -exec file {} \;
done
}

FILE_ENCODINGS=$(get_encoding)

check_encoding() {
echo "${FILE_ENCODINGS}" | grep -v "ISO-8859 text"
}


if [[ -z $(check_encoding) ]]; then
echo "All .dic and .aff files are encoded in ISO-8859-1, we're good!"
exit 0
else
echo "Some .dic and .aff files are not encoded in ISO-8859-1, please fix this."
echo "${FILE_ENCODINGS}"
exit 1
fi
18 changes: 18 additions & 0 deletions scripts/check_newlines.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

# Check for files that do not end with a newline

check_newlines() {
find ./data/src-dict -name "*.txt" -type f -print0 | xargs -0 -n1 bash -c 'tail -c1 "$1" | read -r _ || echo "$1"' bash
}

NO_NEWLINE_FILES=$(check_newlines)

if [[ -z "${NO_NEWLINE_FILES}" ]]; then
echo "All files end with a blank line, which is good."
exit 0
else
echo "Some files do not end with a newline:"
echo "${NO_NEWLINE_FILES}"
exit 1
fi

0 comments on commit 188e712

Please sign in to comment.