Skip to content

Add hyperlinks and paths validation. #25

Add hyperlinks and paths validation.

Add hyperlinks and paths validation. #25

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Check Paths and Hyperlinks
on:
pull_request:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize]
jobs:
check-the-validity-of-hyperlinks-in-README:
runs-on: ubuntu-latest
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout Repo docs
uses: actions/checkout@v4
- name: Check the Validity of Hyperlinks
run: |
cd ${{github.workspace}}
fail="FALSE"
url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin')
if [ -n "$url_lines" ]; then
for url_line in $url_lines; do
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
if [[ "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/EZa7vjON10ZCpMvE7U-SPMwBRXbVHqe1Ybsa-fmnXayNUA?e=f6FPsl" == "$url" || "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/ESMIcBseFTdIuqkoB7TZy6ABfwR9CkfV49TvTa1X_Jihkg?e=zMH7O7" == "$url" ]]; then
echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person."
else
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, status code: $response, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successful*****"
else
urls_line+=("$url_line")
echo "Status code: $response_retry, Link $url validation failed, will retry later."
fi
fi
fi
done
fi
echo "**************Start Retry**************"
for link in "${urls_line[@]}"; do
url=$(echo "$link"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$link"|cut -d':' -f1 | cut -d'/' -f2-)
# do_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
# if [ "$do_retry" -eq 200 ]; then
# echo "$url Retry successful"
# else
# echo "Invalid link from ${{github.workspace}}/$path: $url status code: $do_retry"
# fail="TRUE"
# fi
attempt_num=1
while [ $attempt_num -le 5 ]; do
do_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$do_retry" -eq 200 ]; then
echo "$url Retry successful"
break
else
echo "$url Validation failed, retrying..."
((attempt_num++))
sleep 3
fi
done
if [ $attempt_num -gt 5 ]; then
echo "Invalid link from ${{github.workspace}}/$path: $url status code: $do_retry"
fail="TRUE"
fi
done
if [[ "$fail" == "TRUE" ]]; then
exit 1
else
echo "All hyperlinks are valid."
fi
shell: bash
check-the-validity-of-relative-path:
runs-on: ubuntu-latest
steps:
- name: Clean up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout Repo docs
uses: actions/checkout@v4
- name: Checking Relative Path Validity
run: |
cd ${{github.workspace}}
fail="FALSE"
repo_name=${{ github.event.pull_request.head.repo.full_name }}
if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then
owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1)
branch="https://github.com/$owner/docs/tree/${{ github.event.pull_request.head.ref }}"
else
branch="https://github.com/opea-project/docs/blob/${{ github.event.pull_request.head.ref }}"
fi
link_head="https://github.com/opea-project/docs/blob/main"
IFS=$'\n'
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http'|grep -Ev 'mailto')
if [ -n "$png_lines" ]; then
for png_line in $png_lines; do
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
if [[ "${png_path:0:1}" == "/" ]]; then
check_path=${{github.workspace}}$png_path
elif [[ "${png_path:0:1}" == "#" ]]; then
check_path=${{github.workspace}}/$refer_path$png_path
else
check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path
fi
real_path=$(realpath $check_path)
if [ $? -ne 0 ]; then
echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist"
fail="TRUE"
else
url=$link_head$(echo "$real_path" | sed 's|.*/docs||')
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, status code: $response try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successful*****"
else
echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}"
url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||')
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, status code: $response_retry, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successful*****"
else
echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path status code: $response_retry"
echo "$png_line"
fail="TRUE"
fi
else
echo "Check branch ${{ github.event.pull_request.head.ref }} successfully."
fi
fi
fi
fi
done
fi
IFS=$OLDIFS
if [[ "$fail" == "TRUE" ]]; then
exit 1
else
echo "All relative links valid."
fi
shell: bash