Skip to content

Commit

Permalink
[FIX]: Handle broken links checker edge-cases (#2545)
Browse files Browse the repository at this point in the history
  • Loading branch information
Joaquín R. Montes authored Sep 28, 2023
1 parent 4089b64 commit 92b16c7
Show file tree
Hide file tree
Showing 12 changed files with 1,176 additions and 46 deletions.
10 changes: 5 additions & 5 deletions .github/actions/validate-docs-links/lib/index.js

Large diffs are not rendered by default.

462 changes: 462 additions & 0 deletions .github/actions/validate-docs-links/lib/licenses.txt

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions .github/actions/validate-docs-links/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
"devDependencies": {
"@types/github-slugger": "^1.3.0",
"@types/node": "^20.4.9",
"ts-node": "^10.9.1",
"@vercel/ncc": "0.34.0"
"@vercel/ncc": "0.34.0",
"ts-node": "^10.9.1"
},
"dependencies": {
"@actions/core": "^1.10.0",
Expand All @@ -27,6 +27,7 @@
"rehype-raw": "4.0.1",
"remark-parse": "7.0.1",
"remark-rehype": "5.0.0",
"remark-mdx": "^2.3.0",
"typescript": "^5.1.6",
"unified": "8.4.1",
"unist-util-visit": "2.0.0"
Expand Down
68 changes: 41 additions & 27 deletions .github/actions/validate-docs-links/src/checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const fs = require('fs/promises')
const path = require('path')
const unified = require('unified')
const markdown = require('remark-parse')
const remarkMdx = require('remark-mdx')
const remarkToRehype = require('remark-rehype')
const raw = require('rehype-raw')
const visit = require('unist-util-visit')
Expand All @@ -10,7 +11,7 @@ const GithubSlugger = require('github-slugger')
import type { Node, Data } from 'unist'
/**
* This script validates internal links in /docs including internal,
* hash, source and related links. It does not validate external links.
* hash, source and relative links. It does not validate external links.
* 1. Collects all .mdx files.
* 2. For each file, it extracts the content, metadata, and heading slugs.
* 3. It creates a document map to efficiently lookup documents by path.
Expand Down Expand Up @@ -38,7 +39,7 @@ interface Errors {
link: string[]
hash: string[]
source: string[]
related: string[]
relative: string[]
}

type ErrorType = Exclude<keyof Errors, 'doc'>
Expand All @@ -65,6 +66,7 @@ type FailureFunction = (message: string) => void

const RELATIVE_PATH = '/'
const EXCLUDED_HASHES: string[] = []
const EXCLUDED_PATHS: string[] = ['/movies.json']

const slugger = new GithubSlugger()

Expand Down Expand Up @@ -115,6 +117,7 @@ function getHeadingsFromMarkdownTree(tree: Node<Data>): string[] {
// Create a processor to parse MDX content
const markdownProcessor = unified()
.use(markdown)
.use(remarkMdx)
.use(remarkToRehype, { allowDangerousHTML: true })
.use(raw)
.use(function compiler() {
Expand Down Expand Up @@ -159,21 +162,15 @@ function validateInternalLink(errors: Errors, href: string): void {
// /docs/api/example#heading -> ["api/example", "heading""]
const [link, hash] = href.split('#')

if (EXCLUDED_PATHS.includes(link)) return

// check if doc page exists
const foundPage = documentMap.get(link.replace(/^\/+/, ''))


if (!foundPage) {
errors.link.push(href)
} else if (hash && !EXCLUDED_HASHES.includes(hash)) {
// TODO: Check if this block is still needed
// // Account for documents that pull their content from another document
// const foundPageSource = foundPage.source
// ? documentMap.get(foundPage.source)
// : undefined

// Check if the hash link points to an existing section within the document
// const hashFound = (foundPageSource || foundPage).headings.includes(hash)
const hashFound = foundPage.headings.includes(hash)

if (!hashFound) {
Expand Down Expand Up @@ -205,29 +202,46 @@ function traverseTreeAndValidateLinks(tree: any, doc: Document, setFailed: Failu
link: [],
hash: [],
source: [],
related: [],
relative: [],
}

try {
visit(tree, (node: any) => {
if (node.type === 'element' && node.tagName === 'a') {
const href = node.properties.href
// Matches markdown links like [text](link)
const linkRegex = /\[[^\[\]]+\]\([^\(\)]+\)/gm
// Matches all links that use some kind of protocol (e.g. http://, https://, mailto:, etc.)
const nonInternalLinkRegex = /^(?:[a-z+]+:)?\/\/|^[a-z]+:/i;

if (!href) return
function validateNodes (node: any, parse: boolean = false) {
// Handle links in custom components that were not correctly parsed
if (node.type === 'text' && linkRegex.test(node.value)) {
const customComponentTree = markdownProcessor.parse(node.value)
traverseRecursively(customComponentTree)
}

if (href.startsWith(RELATIVE_PATH)) {
validateInternalLink(errors, href)
} else if (href.startsWith('#')) {
validateHashLink(errors, href, doc)
}
if (node.type === 'element' && node.tagName === 'a' || node.type === 'link' || node.type === 'buttonlink') {
const href = node.properties?.href ?? node.url
if (!href) return

if (href.startsWith(RELATIVE_PATH)) {
validateInternalLink(errors, href)
} else if (href.startsWith('#')) {
validateHashLink(errors, href, doc)
} else if (!nonInternalLinkRegex.test(href)) {
errors.relative.push(href)
}
})
}
}

validateSourceLinks(doc, errors)
} catch (error) {
setFailed('Error traversing tree: ' + error)
function traverseRecursively (tree: any) {
try {
visit(tree, validateNodes)
validateSourceLinks(doc, errors)
} catch (error) {
setFailed('Error traversing tree: ' + error)
}
}

traverseRecursively(tree)

return errors
}

Expand Down Expand Up @@ -261,7 +275,7 @@ export async function validateAllInternalLinks(basePath: string, setFailed: Fail
link: [],
hash: [],
source: [],
related: [],
relative: [],
} as Errors
}
})
Expand All @@ -272,7 +286,7 @@ export async function validateAllInternalLinks(basePath: string, setFailed: Fail

let errorRows: string[] = []

const errorTypes: ErrorType[] = ['link', 'hash', 'source', 'related']
const errorTypes: ErrorType[] = ['link', 'hash', 'source', 'relative']
allErrors.forEach((errors) => {
const {
doc: { path: docPath },
Expand Down
Loading

0 comments on commit 92b16c7

Please sign in to comment.