Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX]: Handle broken links checker edge-cases #2545

Merged
merged 9 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/actions/validate-docs-links/lib/index.js

Large diffs are not rendered by default.

462 changes: 462 additions & 0 deletions .github/actions/validate-docs-links/lib/licenses.txt

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions .github/actions/validate-docs-links/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
"devDependencies": {
"@types/github-slugger": "^1.3.0",
"@types/node": "^20.4.9",
"ts-node": "^10.9.1",
"@vercel/ncc": "0.34.0"
"@vercel/ncc": "0.34.0",
"ts-node": "^10.9.1"
},
"dependencies": {
"@actions/core": "^1.10.0",
Expand All @@ -27,6 +27,7 @@
"rehype-raw": "4.0.1",
"remark-parse": "7.0.1",
"remark-rehype": "5.0.0",
"remark-mdx": "^2.3.0",
"typescript": "^5.1.6",
"unified": "8.4.1",
"unist-util-visit": "2.0.0"
Expand Down
68 changes: 41 additions & 27 deletions .github/actions/validate-docs-links/src/checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const fs = require('fs/promises')
const path = require('path')
const unified = require('unified')
const markdown = require('remark-parse')
const remarkMdx = require('remark-mdx')
const remarkToRehype = require('remark-rehype')
const raw = require('rehype-raw')
const visit = require('unist-util-visit')
Expand All @@ -10,7 +11,7 @@ const GithubSlugger = require('github-slugger')
import type { Node, Data } from 'unist'
/**
* This script validates internal links in /docs including internal,
* hash, source and related links. It does not validate external links.
* hash, source and relative links. It does not validate external links.
* 1. Collects all .mdx files.
* 2. For each file, it extracts the content, metadata, and heading slugs.
* 3. It creates a document map to efficiently lookup documents by path.
Expand Down Expand Up @@ -38,7 +39,7 @@ interface Errors {
link: string[]
hash: string[]
source: string[]
related: string[]
relative: string[]
}

type ErrorType = Exclude<keyof Errors, 'doc'>
Expand All @@ -65,6 +66,7 @@ type FailureFunction = (message: string) => void

const RELATIVE_PATH = '/'
const EXCLUDED_HASHES: string[] = []
const EXCLUDED_PATHS: string[] = ['/movies.json']

const slugger = new GithubSlugger()

Expand Down Expand Up @@ -115,6 +117,7 @@ function getHeadingsFromMarkdownTree(tree: Node<Data>): string[] {
// Create a processor to parse MDX content
const markdownProcessor = unified()
.use(markdown)
.use(remarkMdx)
.use(remarkToRehype, { allowDangerousHTML: true })
.use(raw)
.use(function compiler() {
Expand Down Expand Up @@ -159,21 +162,15 @@ function validateInternalLink(errors: Errors, href: string): void {
// /docs/api/example#heading -> ["api/example", "heading""]
const [link, hash] = href.split('#')

if (EXCLUDED_PATHS.includes(link)) return

// check if doc page exists
const foundPage = documentMap.get(link.replace(/^\/+/, ''))


if (!foundPage) {
errors.link.push(href)
} else if (hash && !EXCLUDED_HASHES.includes(hash)) {
// TODO: Check if this block is still needed
// // Account for documents that pull their content from another document
// const foundPageSource = foundPage.source
// ? documentMap.get(foundPage.source)
// : undefined

// Check if the hash link points to an existing section within the document
// const hashFound = (foundPageSource || foundPage).headings.includes(hash)
const hashFound = foundPage.headings.includes(hash)

if (!hashFound) {
Expand Down Expand Up @@ -205,29 +202,46 @@ function traverseTreeAndValidateLinks(tree: any, doc: Document, setFailed: Failu
link: [],
hash: [],
source: [],
related: [],
relative: [],
}

try {
visit(tree, (node: any) => {
if (node.type === 'element' && node.tagName === 'a') {
const href = node.properties.href
// Matches markdown links like [text](link)
const linkRegex = /\[[^\[\]]+\]\([^\(\)]+\)/gm
// Matches all links that use some kind of protocol (e.g. http://, https://, mailto:, etc.)
const nonInternalLinkRegex = /^(?:[a-z+]+:)?\/\/|^[a-z]+:/i;

if (!href) return
function validateNodes (node: any, parse: boolean = false) {
// Handle links in custom components that were not correctly parsed
if (node.type === 'text' && linkRegex.test(node.value)) {
const customComponentTree = markdownProcessor.parse(node.value)
traverseRecursively(customComponentTree)
}

if (href.startsWith(RELATIVE_PATH)) {
validateInternalLink(errors, href)
} else if (href.startsWith('#')) {
validateHashLink(errors, href, doc)
}
if (node.type === 'element' && node.tagName === 'a' || node.type === 'link' || node.type === 'buttonlink') {
const href = node.properties?.href ?? node.url
if (!href) return

if (href.startsWith(RELATIVE_PATH)) {
validateInternalLink(errors, href)
} else if (href.startsWith('#')) {
validateHashLink(errors, href, doc)
} else if (!nonInternalLinkRegex.test(href)) {
errors.relative.push(href)
}
})
}
}

validateSourceLinks(doc, errors)
} catch (error) {
setFailed('Error traversing tree: ' + error)
function traverseRecursively (tree: any) {
try {
visit(tree, validateNodes)
validateSourceLinks(doc, errors)
} catch (error) {
setFailed('Error traversing tree: ' + error)
}
}

traverseRecursively(tree)

return errors
}

Expand Down Expand Up @@ -261,7 +275,7 @@ export async function validateAllInternalLinks(basePath: string, setFailed: Fail
link: [],
hash: [],
source: [],
related: [],
relative: [],
} as Errors
}
})
Expand All @@ -272,7 +286,7 @@ export async function validateAllInternalLinks(basePath: string, setFailed: Fail

let errorRows: string[] = []

const errorTypes: ErrorType[] = ['link', 'hash', 'source', 'related']
const errorTypes: ErrorType[] = ['link', 'hash', 'source', 'relative']
allErrors.forEach((errors) => {
const {
doc: { path: docPath },
Expand Down
Loading