From 0457368038be45cf5344063e98929f9cb4aec800 Mon Sep 17 00:00:00 2001 From: Patrice Chalin Date: Mon, 21 Oct 2024 11:35:50 -0400 Subject: [PATCH] [i18n, CI] How to deal with broken links in non-en pages when updating en pages (#5448) --- .cspell.yml | 2 + .htmltest.yml | 8 +- content/en/blog/_index.md | 6 +- content/en/docs/contributing/localization.md | 28 ++++ .../concepts/instrumentation/libraries.md | 4 + .../concepts/instrumentation/libraries.md | 4 + package.json | 9 +- scripts/htmltest-config.pl | 135 ++++++++++++++++++ scripts/htmltest-config.sh | 8 ++ 9 files changed, 196 insertions(+), 8 deletions(-) create mode 100755 scripts/htmltest-config.pl create mode 100755 scripts/htmltest-config.sh diff --git a/.cspell.yml b/.cspell.yml index 8e7b3da37da6..57d3c4fbc4f2 100644 --- a/.cspell.yml +++ b/.cspell.yml @@ -53,3 +53,5 @@ dictionaries: - softwareTerms # Other - companies +words: # Valid words across all locales + - htmltest diff --git a/.htmltest.yml b/.htmltest.yml index 69135fd32180..0d3a9386b8e2 100644 --- a/.htmltest.yml +++ b/.htmltest.yml @@ -7,9 +7,13 @@ IgnoreInternalEmptyHash: true # TODO: remove after resolution of https://github. CheckMailto: false TestFilesConcurrently: true IgnoreDirs: + # DO NOT EDIT! IgnoreDirs list is auto-generated from markdown file front matter. + # TODO drop next line after https://github.com/open-telemetry/opentelemetry.io/issues/5423 is fixed for ja pages: + - ^ja/docs/concepts/instrumentation/libraries/ + # TODO drop next line after https://github.com/open-telemetry/opentelemetry.io/issues/5423 is fixed for pt pages: + - ^pt/docs/concepts/instrumentation/libraries/ - ^blog/(\d+/)?page/\d+ - # TODO drop after https://github.com/open-telemetry/opentelemetry.io/issues/5423 is fixed: - - (ja|pt)/docs/concepts/instrumentation/libraries/ + # DO NOT EDIT! IgnoreDirs list is auto-generated from markdown file front matter. IgnoreInternalURLs: # list of paths IgnoreURLs: # list of regexs of paths or URLs to be ignored - ^/api$ diff --git a/content/en/blog/_index.md b/content/en/blog/_index.md index 022dd0feceae..7d0b3e2d57bf 100644 --- a/content/en/blog/_index.md +++ b/content/en/blog/_index.md @@ -1,7 +1,9 @@ --- title: Blog -menu: - main: { weight: 50 } +menu: { main: { weight: 50 } } redirects: [{ from: '', to: '2024/ 301!' }] outputs: [HTML, RSS] +htmltest: + IgnoreDirs: + - ^blog/(\d+/)?page/\d+ --- diff --git a/content/en/docs/contributing/localization.md b/content/en/docs/contributing/localization.md index f1a25fd11505..2d9d2e55673e 100644 --- a/content/en/docs/contributing/localization.md +++ b/content/en/docs/contributing/localization.md @@ -11,6 +11,34 @@ English is the default language, with US English as the default (implicit) local A growing number of other localizations are supported, as can be seen from the languages dropdown menu in the top nav. +## English language maintainer guidance + +### When link checking fails for non-English pages + +English is the default language of the OpenTelemetry website. After you add, +edit, or reorganized English language documentation, link checking may fail for +non-English pages. When this happens: + + + +- Do **not** fix the broken links. Each non-English page is associated with a + specific commit of the corresponding English page, as identified by the git + commit hash value of the `default_lang_commit` front matter key. +- Configure the link checker to ignore the non-English pages by adding the + following to the page's front matter, or to the closest common ancestor file, + when more than one page has link errors: + ```yaml + htmltest: + # TODO: remove the IgnoreDirs once broken links are fixed + IgnoreDirs: + - path-regex/to/non-en/directory/contain/files/to/ignore + - path-2-etc + ``` +- Run `npm run check:links` and include any updates to the `.htmltest.yml` + config file with your PR. + + + ## Translation guidance We recommend that you follow the guidance offered in this section when diff --git a/content/ja/docs/concepts/instrumentation/libraries.md b/content/ja/docs/concepts/instrumentation/libraries.md index e7c89111f324..38d1ed962104 100644 --- a/content/ja/docs/concepts/instrumentation/libraries.md +++ b/content/ja/docs/concepts/instrumentation/libraries.md @@ -3,6 +3,10 @@ title: ライブラリ description: ライブラリにネイティブ計装を追加する方法を紹介します。 weight: 40 default_lang_commit: d8c5612 +htmltest: + IgnoreDirs: + # TODO drop next line after https://github.com/open-telemetry/opentelemetry.io/issues/5423 is fixed for ja pages: + - ^ja/docs/concepts/instrumentation/libraries/ --- OpenTelemetryは、多くのライブラリに[計装ライブラリ][instrumentation libraries]を提供していて、これは通常、ライブラリフックやモンキーパッチライブラリコードを通して行われます。 diff --git a/content/pt/docs/concepts/instrumentation/libraries.md b/content/pt/docs/concepts/instrumentation/libraries.md index 3410ddd032f4..f9dd1e39938d 100644 --- a/content/pt/docs/concepts/instrumentation/libraries.md +++ b/content/pt/docs/concepts/instrumentation/libraries.md @@ -3,6 +3,10 @@ title: Bibliotecas description: Aprenda como adicionar instrumentação nativa à sua biblioteca. weight: 40 default_lang_commit: a570a00c7a238ffe26528d7bfb20efdbaf939c39 +htmltest: + IgnoreDirs: + # TODO drop next line after https://github.com/open-telemetry/opentelemetry.io/issues/5423 is fixed for pt pages: + - ^pt/docs/concepts/instrumentation/libraries/ --- O OpenTelemetry fornece [bibliotecas de instrumentação][] para várias diff --git a/package.json b/package.json index 5684f3fa47c1..0beecdac83d4 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,4 @@ { - "spelling": "cSpell:ignore docsy elemetry htmltest hugo loglevel netlify nowrap prebuild precheck preinstall postbuild postget refcache textlint -", "Notes": [ "The 'all' runs _all_ named scripts in sequence, even if one fails; and exits with failure in that case." ], @@ -86,8 +85,8 @@ "prebuild:preview": "npm run _prebuild", "prebuild:production": "npm run _prebuild", "prebuild": "npm run _prebuild", - "precheck:links:internal": "npm run build", - "precheck:links": "npm run build", + "precheck:links:internal": "npm run build && npm run update:htmltest-config", + "precheck:links": "npm run build && npm run update:htmltest-config", "prefix:submodules": "npm run update:submodule", "prenetlify-build:production": "echo 'IgnoreTagAttribute: rel' >> .htmltest.yml", "prepare": "npm run seq -- get:submodule _prepare:docsy", @@ -102,6 +101,7 @@ "test-and-fix": "npm run seq -- check fix:dict fix:filenames", "test": "npm run check", "update:docsy-dep": "npm install --save-dev autoprefixer@latest postcss-cli@latest", + "update:htmltest-config": "scripts/htmltest-config.sh", "update:hugo": "npm install --save-dev --save-exact hugo-extended@latest", "update:hugo+": "npm run update:hugo && npm run update:docsy-dep", "update:netlify": "npm install --save-optional netlify-cli@latest", @@ -160,5 +160,6 @@ "prettier": { "proseWrap": "always", "singleQuote": true - } + }, + "spelling": "cSpell:ignore docsy elemetry htmltest hugo loglevel netlify nowrap postnetlify prebuild precheck preinstall postbuild postget refcache textlint -" } diff --git a/scripts/htmltest-config.pl b/scripts/htmltest-config.pl new file mode 100755 index 000000000000..ebbf0d61a911 --- /dev/null +++ b/scripts/htmltest-config.pl @@ -0,0 +1,135 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my $gD = 0; + +sub main { + my @ignore_dirs; + + collect_htmltest_config_from_front_matter(\@ignore_dirs, @ARGV); + update_htmltest_config_file(\@ignore_dirs); +} + +sub collect_htmltest_config_from_front_matter { + my ($ignore_dirs_ref, @files) = @_; + + foreach my $file_path (@files) { + my @htmltest_config = extract_htmltest_config($file_path); + next unless @htmltest_config; + push @$ignore_dirs_ref, @htmltest_config; + } +} + +sub extract_htmltest_config { + # Returns list of htmlconfig lines extracted from the front matter of $file_path + my ($file_path) = @_; + + open my $fh, '<', $file_path or die "Could not open '$file_path': $!"; + my $content = do { local $/; <$fh> }; + close $fh; + + return unless $content =~ /---\n(.*?)\n---/s; + + my $front_matter = $1; + my @htmltest_config = _extract_htmltest_config($front_matter); + + return unless @htmltest_config; + + if (@htmltest_config == 1) { + warn "Warning: Failed to extract htmltest config from front matter in file '$file_path'.\n"; + return; + } + + shift @htmltest_config; + + if (@htmltest_config >= 1 && $htmltest_config[0] =~ /^IgnoreDirs:/i) { + return _extract_ignore_dirs($file_path, @htmltest_config) + } + + # TODO: Add support for `IgnoreURLs`. + + warn "Warning: Unrecognized htmltest config from front matter in file '$file_path'.\n"; +} + +sub _extract_ignore_dirs { + my ($file_path, + @ignore_dirs_config_lines # Can include comment lines + ) = @_; + my @config; + + foreach my $line (@ignore_dirs_config_lines) { + next if $line =~ /^IgnoreDirs:\s*$/i; + if ($line =~ /\s*#/) { + push @config, $line; + } elsif ($line =~ /^IgnoreDirs:\s*\[\s*(.*?)\s*\]/i || $line =~ /^\s*-\s*(.*?)$/) { + push @config, (split /\s*,\s*/, $1); + } else { + warn "Warning: Unrecognized htmltest IgnoreDirs config from front matter in file '$file_path': $line\n"; + } + } + return @config; +} + +sub _extract_htmltest_config { + # Returns a list of htmltext config lines with whitespace trimmed away. + + my ($front_matter) = @_; + my @lines = split /\n/, $front_matter; + my @htmltest_lines; + my $in_htmltest_section = 0; + + foreach my $line (@lines) { + if ($line =~ /^htmltest:(.*?)(#.*)?$/) { + $in_htmltest_section = 1; + push @htmltest_lines, $line; + } elsif ($in_htmltest_section) { + if ($line =~ /^(\s{2,})(.*)$/) { + push @htmltest_lines, $2; + printf " > Config line: $line" if $gD; + } else { + last; + } + } + } + return @htmltest_lines; +} + +sub update_htmltest_config_file { + my ($ignore_dirs_ref) = @_; + my $htmltest_config_path = '.htmltest.yml'; + my $do_not_edit_msg = " # DO NOT EDIT! IgnoreDirs list is auto-generated from markdown file front matter.\n"; + + # Read config file as array of lines + open my $fh, '<', $htmltest_config_path or die "Could not open '$htmltest_config_path' for reading: $!"; + my @lines = <$fh>; + close $fh; + + # Replace the existing IgnoreDirs entries with the new ones + my $in_ignore_dirs = 0; + my @new_lines; + foreach my $line (@lines) { + if ($line =~ /^IgnoreDirs:/) { + push @new_lines, ($line, $do_not_edit_msg); + foreach my $ignore_dir (@$ignore_dirs_ref) { + my $prefix = $ignore_dir =~ /^#/ ? ' ' : ' - '; + push @new_lines, "$prefix$ignore_dir\n"; + } + push @new_lines, $do_not_edit_msg; + $in_ignore_dirs = 1; + } elsif ($in_ignore_dirs) { + next if $line =~ /^\s*([#-]|$)/; + $in_ignore_dirs = 0; + push @new_lines, $line; + } else { + push @new_lines, $line; + } + } + + open my $fh_out, '>', $htmltest_config_path or die "Could not open '$htmltest_config_path' for writing: $!"; + print $fh_out @new_lines; + close $fh_out; +} + +main(); diff --git a/scripts/htmltest-config.sh b/scripts/htmltest-config.sh new file mode 100755 index 000000000000..02696c7c919c --- /dev/null +++ b/scripts/htmltest-config.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# +# We handle listing all markdown files here because it is more portable across +# supported operating systems. + +SCRIPT_DIR=$(dirname $0) +FILES=$(find content -name "*.md") +exec $SCRIPT_DIR/htmltest-config.pl $FILES