Skip to content

Commit

Permalink
MWPW-151548 Updates for FaaS to Marketo validation
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon32 committed Jun 4, 2024
1 parent 530a8b6 commit c952b92
Show file tree
Hide file tree
Showing 11 changed files with 4,192 additions and 31 deletions.
3,968 changes: 3,968 additions & 0 deletions bacom-validation/list.json

Large diffs are not rendered by default.

95 changes: 95 additions & 0 deletions bacom-validation/locales.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
[
"",
"ae_ar",
"ae_en",
"africa",
"ar",
"at",
"au",
"be_en",
"be_fr",
"be_nl",
"bg",
"br",
"ca_fr",
"ca",
"ch_de",
"ch_fr",
"ch_it",
"cl",
"cn",
"co",
"cr",
"cy_en",
"cz",
"de",
"dk",
"ec",
"ee",
"eg_ar",
"eg_en",
"el",
"es",
"fi",
"fr",
"gr_el",
"gr_en",
"gt",
"hk_en",
"hk_zh",
"hu",
"id_en",
"id_id",
"ie",
"il_en",
"il_he",
"in_hi",
"in",
"it",
"jp",
"kr",
"kw_ar",
"kw_en",
"la",
"langstore",
"lt",
"lu_de",
"lu_en",
"lu_fr",
"lv",
"mena_ar",
"mena_en",
"mt",
"mx",
"my_en",
"my_ms",
"ng",
"nl",
"no",
"nz",
"pe",
"ph_en",
"ph_fil",
"pl",
"pr",
"pt",
"qa_ar",
"qa_en",
"ro",
"ru",
"sa_ar",
"sa_en",
"se",
"sg",
"si",
"sk",
"th_en",
"th_th",
"tr",
"tw",
"ua",
"uk",
"vn_en",
"vn_vi",
"za"
]
32 changes: 32 additions & 0 deletions bacom-validation/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# FaaS to Marketo Form Validation

Validation of markdown files for the BACOM project for the migration of FaaS to Marketo forms.

## Steps

### 1. Download Markdown

See the [Download Markdown Readme](../download-markdown/readme.md) for full details.

In the `download-markdown` directory, copy the `.env.example` file to `.env` and add your authorization token.

Run the markdown download script to download the markdown files from the specified domain and stage path.

```bash
node download-markdown/download-markdown.js 'bacom-validation' 'live' 'https://main--bacom--adobecom.hlx.live'
node download-markdown/download-markdown.js 'bacom-validation' 'page' 'https://main--bacom--adobecom.hlx.page'
```

Here we are downloading the markdown files from the `live` and `page` domains into `live` and `page` directories using the `output/list.json` file.

### 2. Validate Markdown

See the [Validation Readme](../validation/README.md) for full details.

Run the markdown validation script to validate the downloaded markdown files.

```bash
node validation/link-validator.js 'bacom-validation' 'live' 'page'
```

Here we are validating the markdown files in the `live` and `page` directories using the `output/list.json` file.
1 change: 1 addition & 0 deletions download-markdown/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
AUTHORIZATION_TOKEN=your_token_here
42 changes: 35 additions & 7 deletions download-markdown/download-markdown.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import path from 'path';
import fs from 'fs';
import dotenv from 'dotenv';
import { fetch, timeoutSignal, AbortError } from '@adobe/fetch';
import { saveToFile, entryToPath } from '../bulk-update/document-manager/document-manager.js';
import { localizeStageUrl } from '../bulk-update/bulk-update.js';

dotenv.config({ path: 'download-markdown/.env' });

const delay = (milliseconds) => new Promise((resolve) => { setTimeout(resolve, milliseconds); });

const ALLOW_SKIP = true; // Allow skipping files that already exist
const PAGE_DELAY = 500; // 500ms delay for fetching from hlx.page
const LIVE_DELAY = 0; // 0ms delay for fetching from live site
const TIMEOUT = 5000; // 5s timeout for fetching markdown
const { AUTHORIZATION_TOKEN } = process.env;

/**
* Reads a JSON file from the specified directory.
Expand Down Expand Up @@ -39,7 +43,12 @@ export async function fetchMarkdown(url, fetchWaitMs, fetchFn = fetch) {
console.log(`Fetching markdown ${url}, delay ${fetchWaitMs}ms, timeout ${TIMEOUT}ms`);
await delay(fetchWaitMs); // Wait 500ms to avoid rate limiting, not needed for live.
const signal = timeoutSignal(TIMEOUT); // 5s timeout
const response = await fetchFn(url, { signal });
const headers = {};
if (AUTHORIZATION_TOKEN) {
headers.Authorization = AUTHORIZATION_TOKEN;
}

const response = await fetchFn(url, { signal, headers });

if (!response.ok) {
console.warn('Failed to fetch markdown.', response.status, response.statusText);
Expand Down Expand Up @@ -127,6 +136,10 @@ export function downloadMarkdown(folder, list, locales, siteURL, stagePath, fetc
});

fs.mkdirSync(folder, { recursive: true });
// save the list of entries to a file
saveToFile(path.join(folder, 'download-list.json'), JSON.stringify(stagedUrls.map(([, stageUrl]) => stageUrl), null, 2));
saveToFile(path.join(folder, 'preview.txt'), stagedUrls.map(([, stageUrl]) => stageUrl).join('\n'));

return downloadMDs(stagedUrls, folder, fetchFn);
}

Expand All @@ -139,20 +152,30 @@ export function downloadMarkdown(folder, list, locales, siteURL, stagePath, fetc
* @param {string} stagePath - The path to the staging environment.
* @returns {Promise<void>} A promise that resolves when the download process is complete.
*/
async function init(migrationDir, outputDir, siteUrl, stagePath = '') {
async function main(migrationDir, outputDir, siteUrl, stagePath = '') {
const list = readJsonFile('output/list.json', migrationDir);
const locales = readJsonFile('locales.json', migrationDir);

if (!list || !locales) {
console.error('Missing list or locales');
if (!list) {
console.error('Missing list');
process.exit(1);
}

if (!locales) {
console.error('Missing locales.json, continuing without localization');
}

if (!siteUrl) {
console.error('Missing siteUrl');
process.exit(1);
}

if (AUTHORIZATION_TOKEN) {
console.log('Using authorization token for fetching markdown');
} else {
console.log('No authorization token found, fetching markdown without token');
}

const markdownFolder = path.join(migrationDir, 'md', outputDir);
const failed = await downloadMarkdown(markdownFolder, list, locales, siteUrl, stagePath);

Expand All @@ -162,11 +185,16 @@ async function init(migrationDir, outputDir, siteUrl, stagePath = '') {
}
}

// example usage: node tools/download-markdown/download-markdown.js 'blog-test' 'uploaded' 'https://main--bacom-blog--adobecom.hlx.page' '/drafts/staged-content'
/**
* Run the markdown downloader
* Example usage: node download-markdown/download-markdown.js 'blog-test' 'uploaded' 'https://main--bacom-blog--adobecom.hlx.page' '/drafts/staged-content'
*/
if (import.meta.url === `file://${process.argv[1]}`) {
const args = process.argv.slice(2);
const [folder, outputDir, siteUrl, stagePath] = args;
// defaults for debugging
const DEFAULTS = ['blog-test', 'uploaded', 'https://main--bacom-blog--adobecom.hlx.page', '/drafts/staged-content'];
const [folder, outputDir, siteUrl, stagePath] = args.length ? args : DEFAULTS;

await init(folder, outputDir, siteUrl, stagePath);
await main(folder, outputDir, siteUrl, stagePath);
process.exit(0);
}
16 changes: 12 additions & 4 deletions download-markdown/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ Run the script with the following command:

```bash
node download-markdown/download-markdown.js <migration-dir> <output-dir> <domain> <stage-path>
# example:
node download-markdown/download-markdown.js 'blog-test' 'uploaded' 'https://main--bacom-blog--adobecom.hlx.page' '/drafts/staged-content'
```

Where:
* `<migration-dir>` is the directory containing the output/list.json and locales.json files.
* `<output-dir>` is the md directory where the markdown files will be saved.
* `<domain>` is the domain from which the markdown files will be fetched.
* `<stage-path>` is the path used to construct the full URLs for downloading the markdown files.
- `<migration-dir>` is the directory containing the output/list.json and locales.json files.
- `<output-dir>` is the md directory where the markdown files will be saved.
- `<domain>` is the domain from which the markdown files will be fetched.
- `<stage-path>` is the path used to construct the full URLs for downloading the markdown files.

## Localization Support

Expand All @@ -40,6 +42,12 @@ The locale will be placed between the domain and stage path in the URL.

For example, if the stage path is '/drafts/staged-content', and the entry is 'de/entry' the URL is 'https://main--bacom-blog--adobecom.hlx.page/de/drafts/staged-content/entry.md'.

## Authentication Support

The dotenv library is used to load environment variables from a .env file.
Copy the .env.example file to .env and set the `AUTHORIZATION_TOKEN` variable in the .env file.
The authorization token will be used in the header of the markdown fetch request.

## Download Process

The script downloads markdown files based on the provided domain, locale, stage path, and entry path.
Expand Down
12 changes: 12 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"@adobe/helix-docx2md": "^1.5.2",
"@adobe/helix-html-pipeline": "^6.7.4",
"@adobe/helix-md2docx": "^2.1.38",
"dotenv": "^16.4.5",
"exceljs": "^4.4.0",
"fs-extra": "^11.2.0",
"glob": "^10.3.3",
Expand Down
28 changes: 14 additions & 14 deletions validation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,35 @@ The validator differs from the link validation done during bulk updating by chec

This tool is made to be used in conjunction with the `download-markdown` tool but can be used independently with any markdown.

## Prerequisites

Before running the script, ensure the following structure is set up in your migration directory:

1. A new or existing migration directory, e.g. `blog-test`.
2. An `output` folder containing a `list.json` file with an array of entries.
3. A `md` folder containing the source and updated markdown files.

## Usage

Run the migration script directly, ensuring to set the path:

```bash
node validation/link-validator.js {path to list.json} {path to md directory} {source} {updated}

example: node validation/link-validator.js './blog-test/output/list.json' './blog-test/md' 'source' 'updated'
node validation/link-validator.js <migration-dir> <source> <updated>
# example:
node validation/link-validator.js 'blog-test' 'source' 'updated'
```

Where:
- `{path to list.json}` is the path to the list.json file containing the list of paths to validate.
- `{path to md directory}` is the path to the directory containing the source and updated markdown files.
- `{source}` is the folder containing the source markdown files.
- `{updated}` is the folder containing the updated markdown files.
- `<migration-dir>` is the directory containing the output/list.json and locales.json files.
- `<source>` is the folder containing the source markdown files.
- `<updated>` is the folder containing the updated markdown files.

## How it works

The script reads markdown files from the provided paths and extracts all the links from each file. It then compares the links from the source and updated files. If the links match exactly, it logs that they match. If they don't, it performs a deep comparison, logging the differences and any anomalies it detects.

The script generates two reports: a standard report and a deep comparison report. The standard report logs the overall results of the comparison, while the deep comparison report logs detailed information about each link and any anomalies detected.

## Prerequisites

Before running the script, ensure the following structure is set up in your migration directory:

1. A new or existing migration directory, e.g. `blog-test`.
2. An `output` folder containing a `list.json` file with an array of entries.
3. A `md` folder containing the source and updated markdown files.

## Reviewing the Reports

Expand Down
5 changes: 5 additions & 0 deletions validation/deep-compare.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ export const DOUBLE_HASH = 'Double Hash';
export const HOST_MATCH = 'Host Match';
export const PATHNAME_MATCH = 'Pathname Match';
export const SEARCH_MATCH = 'Search Match';
export const MILO_TOOL = 'Milo Tool';

export const ANOMALY_EMPTY_LINK = 'Empty link';
export const ANOMALY_MISSING_LINK = 'Missing link';
export const ANOMALY_WHITESPACE_CORRUPTION = 'Whitespace corruption';
export const ANOMALY_ASCII_URL_CORRUPTION = 'ASCII URL corruption';
export const ANOMALY_MULTIPLE_HASHTAGS = 'Multiple hashtags';
export const ANOMALY_UNKNOWN = 'Unknown anomaly';
export const ANOMALY_MILO_TOOL = 'Milo tool update';

/**
* Calculates the Levenshtein distance between two strings.
Expand Down Expand Up @@ -126,6 +128,7 @@ export function observeUrl(oldUrl = '', newUrl = '') {
const validUrl = !!(oldUrlObj?.href && newUrlObj?.href);

observations[DOUBLE_HASH] = oldUrl?.match(/#/g)?.length > 1 || newUrl?.match(/#/g)?.length > 1;
observations[MILO_TOOL] = oldUrl?.match(/milo\.adobe\.com\/tools/g) || newUrl?.match(/milo\.adobe\.com\/tools/g);
observations[VALID_URL] = validUrl;

if (validUrl) {
Expand Down Expand Up @@ -183,6 +186,8 @@ export function detectAnomaly(observation) {
anomaly = ANOMALY_ASCII_URL_CORRUPTION;
} else if (url[DOUBLE_HASH]) {
anomaly = ANOMALY_MULTIPLE_HASHTAGS;
} else if (url[MILO_TOOL] && !url[MATCH]) {
anomaly = ANOMALY_MILO_TOOL;
} else {
anomaly = ANOMALY_UNKNOWN;
}
Expand Down
Loading

0 comments on commit c952b92

Please sign in to comment.