Add hyperlinks and paths validation. #25
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (C) 2024 Intel Corporation | |
# SPDX-License-Identifier: Apache-2.0 | |
name: Check Paths and Hyperlinks | |
on: | |
pull_request: | |
branches: [main] | |
types: [opened, reopened, ready_for_review, synchronize] | |
jobs: | |
check-the-validity-of-hyperlinks-in-README: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean Up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout Repo docs | |
uses: actions/checkout@v4 | |
- name: Check the Validity of Hyperlinks | |
run: | | |
cd ${{github.workspace}} | |
fail="FALSE" | |
url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin') | |
if [ -n "$url_lines" ]; then | |
for url_line in $url_lines; do | |
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') | |
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) | |
if [[ "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/EZa7vjON10ZCpMvE7U-SPMwBRXbVHqe1Ybsa-fmnXayNUA?e=f6FPsl" == "$url" || "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/ESMIcBseFTdIuqkoB7TZy6ABfwR9CkfV49TvTa1X_Jihkg?e=zMH7O7" == "$url" ]]; then | |
echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person." | |
else | |
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response" -ne 200 ]; then | |
echo "**********Validation failed, status code: $response, try again**********" | |
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response_retry" -eq 200 ]; then | |
echo "*****Retry successful*****" | |
else | |
urls_line+=("$url_line") | |
echo "Status code: $response_retry, Link $url validation failed, will retry later." | |
fi | |
fi | |
fi | |
done | |
fi | |
echo "**************Start Retry**************" | |
for link in "${urls_line[@]}"; do | |
url=$(echo "$link"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') | |
path=$(echo "$link"|cut -d':' -f1 | cut -d'/' -f2-) | |
# do_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") | |
# if [ "$do_retry" -eq 200 ]; then | |
# echo "$url Retry successful" | |
# else | |
# echo "Invalid link from ${{github.workspace}}/$path: $url status code: $do_retry" | |
# fail="TRUE" | |
# fi | |
attempt_num=1 | |
while [ $attempt_num -le 5 ]; do | |
do_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$do_retry" -eq 200 ]; then | |
echo "$url Retry successful" | |
break | |
else | |
echo "$url Validation failed, retrying..." | |
((attempt_num++)) | |
sleep 3 | |
fi | |
done | |
if [ $attempt_num -gt 5 ]; then | |
echo "Invalid link from ${{github.workspace}}/$path: $url status code: $do_retry" | |
fail="TRUE" | |
fi | |
done | |
if [[ "$fail" == "TRUE" ]]; then | |
exit 1 | |
else | |
echo "All hyperlinks are valid." | |
fi | |
shell: bash | |
check-the-validity-of-relative-path: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout Repo docs | |
uses: actions/checkout@v4 | |
- name: Checking Relative Path Validity | |
run: | | |
cd ${{github.workspace}} | |
fail="FALSE" | |
repo_name=${{ github.event.pull_request.head.repo.full_name }} | |
if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then | |
owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1) | |
branch="https://github.com/$owner/docs/tree/${{ github.event.pull_request.head.ref }}" | |
else | |
branch="https://github.com/opea-project/docs/blob/${{ github.event.pull_request.head.ref }}" | |
fi | |
link_head="https://github.com/opea-project/docs/blob/main" | |
IFS=$'\n' | |
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http'|grep -Ev 'mailto') | |
if [ -n "$png_lines" ]; then | |
for png_line in $png_lines; do | |
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) | |
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) | |
if [[ "${png_path:0:1}" == "/" ]]; then | |
check_path=${{github.workspace}}$png_path | |
elif [[ "${png_path:0:1}" == "#" ]]; then | |
check_path=${{github.workspace}}/$refer_path$png_path | |
else | |
check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path | |
fi | |
real_path=$(realpath $check_path) | |
if [ $? -ne 0 ]; then | |
echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist" | |
fail="TRUE" | |
else | |
url=$link_head$(echo "$real_path" | sed 's|.*/docs||') | |
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response" -ne 200 ]; then | |
echo "**********Validation failed, status code: $response try again**********" | |
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response_retry" -eq 200 ]; then | |
echo "*****Retry successful*****" | |
else | |
echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}" | |
url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||') | |
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") | |
if [ "$response" -ne 200 ]; then | |
echo "**********Validation failed, status code: $response_retry, try again**********" | |
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") | |
if [ "$response_retry" -eq 200 ]; then | |
echo "*****Retry successful*****" | |
else | |
echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path status code: $response_retry" | |
echo "$png_line" | |
fail="TRUE" | |
fi | |
else | |
echo "Check branch ${{ github.event.pull_request.head.ref }} successfully." | |
fi | |
fi | |
fi | |
fi | |
done | |
fi | |
IFS=$OLDIFS | |
if [[ "$fail" == "TRUE" ]]; then | |
exit 1 | |
else | |
echo "All relative links valid." | |
fi | |
shell: bash |