From be54d6b5da1e94a30092e4ad7de548382a813edc Mon Sep 17 00:00:00 2001 From: Patrice Chalin Date: Fri, 9 Aug 2024 10:21:03 -0400 Subject: [PATCH] [infra] Move en words into separate word list, set up lists for es and pt (#5011) --- .cspell.yml | 159 +++-------------------- .cspell/en-words.txt | 138 ++++++++++++++++++++ .cspell/es-palabras.txt | 2 + .cspell/pt-palavras.txt | 0 content/es/_index.md | 1 - package.json | 2 +- scripts/normalize-cspell-front-matter.pl | 27 ++-- 7 files changed, 179 insertions(+), 150 deletions(-) create mode 100644 .cspell/en-words.txt create mode 100644 .cspell/es-palabras.txt create mode 100644 .cspell/pt-palavras.txt diff --git a/.cspell.yml b/.cspell.yml index 8ae286116398..1ea7437cfd1e 100644 --- a/.cspell.yml +++ b/.cspell.yml @@ -12,9 +12,6 @@ ignorePaths: - vendors.yaml - content/ja - content/zh -# words here are only listed for their spelling, if there is a certain way -# to write a word (e.g. OpenTelemetry vs Opentelemetry or cloud native vs -# cloud-native), edit the text-lint rules in .textlintrc.yml patterns: - name: CodeBlock pattern: | @@ -28,140 +25,26 @@ languageSettings: - languageId: markdown ignoreRegExpList: - CodeBlock +dictionaryDefinitions: + - name: en-words + path: .cspell/en-words.txt + - name: es-palabras + path: .cspell/es-palabras.txt + - name: pt-palavras + path: .cspell/pt-palavras.txt dictionaries: - [pt-br, es-es, companies, cpp, dotnet, golang, node, softwareTerms] -words: - - accountingservice - - actix - - adservice - - alibaba - - Alolita - - APAC - - appdynamics - - appender - - appenders - - aspecto - - autoconfiguration - - autoinstrumentation - - autoloaded - - autoloader - - autoloading - - backoff - - caml - - cartservice - - cassandra - - checkoutservice - - Chronosphere - - classpath - - cncf - - currencyservice - - daemonset - - datadog - - discoverability + # Natural languages + - es-es + - pt-br + # Local word lists + - en-words + - es-palabras + - pt-palavras + # Programming languages and software terms + - cpp - dotnet - - Dyla - - dynatrace - - emailservice - - EMEA - - erlang - - errorf - - featureflagservice - - frauddetectionservice - - frontendproxy - - github - - gitpod - - grafana - - Hausenblas - - hugo - - initializers - - instana - - istio - - jaeger - - jaegertracing - - javaagent - - javadoc - - jboss - - jdbc - - julia - - Juraci - - knative - - kotlin - - Kröhling - - kubecon - - kubernetes - - laravel - - lifecycles - - lightstep - - Loffay - - Mancuso - - microservices - - mongodb - - ndjson - - Neumann - - nginx - - ocaml - - opamp - - opencensus - - opensearch - - opentelemetry - - opentracing - - openzipkin - - OSTIF - - otel - - otel-comms - - otel-endusers - - otelcol - - otep - - otlp - - packagist - - pageinfo - - Paixão - - parentbased - - Pavol - - paymentservice - - postgresql - - Pranay - - Prateek - - prepper - - productcatalogservice - - prometheus - - proto - - protobuf - - quantile - - quantiles - - quarkus - - quoteservice - - recommendationservice - - redis - - relref - - Rexed - - Rynn - - semconv - - servlet - - Severin - - Sharma - - shippingservice - - Socha - - Stalnaker - - stdoutmetric - - Strimzi - - symfony - - tabpane - - textlint - - thanos - - tocstop - - tracecontext - - traceidratio - - traceloop - - traceparent - - traefik - - Trask - - uids - - unsampled - - unshallow - - upstreamed - - Villela - - wordpress - - WSGI - - zend - - zipkin + - golang + - node + - softwareTerms + # Other + - companies diff --git a/.cspell/en-words.txt b/.cspell/en-words.txt new file mode 100644 index 000000000000..d8157679f184 --- /dev/null +++ b/.cspell/en-words.txt @@ -0,0 +1,138 @@ +# Words listed here are only for their spelling. If there is a certain way to +# capitalize the word, add capitalization rules to text-lint rules in +# .textlintrc.yml + +accountingservice +actix +adservice +alibaba +Alolita +APAC +appdynamics +appender +appenders +aspecto +autoconfiguration +autoinstrumentation +autoloaded +autoloader +autoloading +backoff +caml +cartservice +cassandra +checkoutservice +Chronosphere +classpath +cncf +currencyservice +daemonset +datadog +discoverability +dotnet +Dyla +dynatrace +emailservice +EMEA +erlang +errorf +featureflagservice +frauddetectionservice +frontendproxy +github +gitpod +grafana +Hausenblas +hugo +initializers +instana +istio +jaeger +jaegertracing +javaagent +javadoc +jboss +jdbc +julia +Juraci +knative +kotlin +Kröhling +kubecon +kubernetes +laravel +lifecycles +lightstep +Loffay +Mancuso +microservices +mongodb +ndjson +Neumann +nginx +ocaml +opamp +opencensus +opensearch +opentelemetry +opentracing +openzipkin +OSTIF +otel +otel-comms +otel-endusers +otelcol +otep +otlp +packagist +pageinfo +Paixão +parentbased +Pavol +paymentservice +postgresql +Pranay +Prateek +prepper +productcatalogservice +prometheus +proto +protobuf +quantile +quantiles +quarkus +quoteservice +recommendationservice +redis +relref +Rexed +Rynn +semconv +servlet +Severin +Sharma +shippingservice +Socha +Stalnaker +stdoutmetric +Strimzi +symfony +tabpane +textlint +thanos +tocstop +tracecontext +traceidratio +traceloop +traceparent +traefik +Trask +uids +unsampled +unshallow +upstreamed +Villela +wordpress +WSGI +zend +zipkin diff --git a/.cspell/es-palabras.txt b/.cspell/es-palabras.txt new file mode 100644 index 000000000000..ff067e39be9d --- /dev/null +++ b/.cspell/es-palabras.txt @@ -0,0 +1,2 @@ +observabilidad +telemetría \ No newline at end of file diff --git a/.cspell/pt-palavras.txt b/.cspell/pt-palavras.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/content/es/_index.md b/content/es/_index.md index 614be35fa10f..055348df93ce 100644 --- a/content/es/_index.md +++ b/content/es/_index.md @@ -9,7 +9,6 @@ developer_note: de imagen que contenga la palabra "background" en su nombre. show_banner: true default_lang_commit: 7ac35d6b429165bbe6c28bdd91feeae83fd35142 -cSpell:ignore: observabilidad telemetría ---
diff --git a/package.json b/package.json index 5804b4f39eb4..b737502604d4 100644 --- a/package.json +++ b/package.json @@ -61,7 +61,7 @@ "diff:check": "npm run _diff:check || (echo; echo 'WARNING: the files above have not been committed'; echo)", "diff:fail": "npm run _diff:check || (echo; echo 'ERROR: the files above have changed. Locally rerun `npm run test-and-fix` and commit changes'; echo; exit 1)", "fix:all": "npm run seq -- $(npm -s run _list:fix:*)", - "fix:dict": "find content layouts -name \"*.md\" -print0 | xargs -0 scripts/normalize-cspell-front-matter.pl", + "fix:dict": "find content/en layouts -name \"*.md\" -print0 | xargs -0 scripts/normalize-cspell-front-matter.pl", "fix:filenames": "npm run _rename-to-kebab-case", "fix:format": "npm run format", "fix:i18n:all": "scripts/check-i18n.sh -a -c HEAD", diff --git a/scripts/normalize-cspell-front-matter.pl b/scripts/normalize-cspell-front-matter.pl index 4f13dce1d120..929f3e3b181b 100755 --- a/scripts/normalize-cspell-front-matter.pl +++ b/scripts/normalize-cspell-front-matter.pl @@ -8,7 +8,7 @@ my $lineLenLimit = 79; my $last_file = ''; my $last_line = ''; -my %dictionary = getSiteWideDictWords('.cspell.yml', '.textlintrc.yml'); +my %dictionary = getSiteWideDictWords('.cspell/en-words.txt', '.textlintrc.yml'); while (<>) { if (/^\s*(spelling: |-\s*)?cSpell:ignore:?\s*(.*)$/ @@ -43,15 +43,16 @@ sub getSiteWideDictWords { my $dictionary_file = shift; my $textlintrc_file = shift; - my %dictionary = readYmlListOfWords('words', $dictionary_file); - my %textlintDictionary = readYmlListOfWords('terms', $textlintrc_file); + my %dictionary = readYmOrPlainlListOfWords('', $dictionary_file); + my %textlintDictionary = readYmOrPlainlListOfWords('terms', $textlintrc_file); # Merge dictionaries @dictionary{keys %textlintDictionary} = values %textlintDictionary; return %dictionary; } -sub readYmlListOfWords { +sub readYmOrPlainlListOfWords { + # Read plain list of words if $wordsFieldName is empty my $wordsFieldName = shift; my $file_path = shift; my $fh = FileHandle->new($file_path, "r") or die "Could not open file '$file_path': $!"; @@ -60,18 +61,24 @@ sub readYmlListOfWords { my %dictionary; my $indentation = ''; - my $in_terms = 0; + my $in_terms = $wordsFieldName eq '' ? 1 : 0; foreach my $line (@lines) { chomp $line; - if ($line =~ /^(\s*)$wordsFieldName:/) { - $indentation = $1 || ''; + next if $line =~ /^\s*#|^\s*$/; + # print "> $line\n" if $wordsFieldName; + + if ($wordsFieldName && $line =~ /^(\s*)$wordsFieldName:/) { + $indentation = "$1 - " || ''; $in_terms = 1; - # print STDOUT "Found terms!"; - } elsif ($line =~ /^$indentation - (\w[^\s]*)$/ && $in_terms) { + # print "> FOUND $wordsFieldName keyword\n" + } elsif ($line =~ /^$indentation(\w[^\s]*)$/ && $in_terms) { my $term = $1; $dictionary{$term} = 1 if $term; - } elsif ($line !~ /^ / && $in_terms) { + } elsif ($wordsFieldName && $line !~ /^ / && $in_terms) { $in_terms = 0; + # print "FINISHE word list\n" if $in_terms; + } else { + # print "OOPS LINE DID NOT MATCH\n" if $in_terms; } }