Skip to content

Commit

Permalink
refactor: add CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
setchy committed Feb 11, 2024
1 parent a2048ef commit 42a1c61
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 27 deletions.
32 changes: 23 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,40 @@ _Note: to use the CSV or JSON data formats you will need to use the GitHub RAW f

The datasets are refreshed every 6 months in alignment with Thoughtworks officially publishing their latest Volume.

The process for generating the CSVs has been automated.
The process for generating the CSVs and JSON volumes has been automated.

### /api-parser (experimental)
### /web-scraper (recommended)

The latest implementation which uses the public Thoughtworks Radar Search REST API.
An implementation which used a three-step process of parsing the public HTML content.

To execute, simply run `npm start`.
```
Usage: tech-radar-volumes [options]
### /web-scraper (recommended)
A CLI tool to fetch and process ThoughtWorks Tech Radar data
Options:
-l, --links fetch all radar blip page links from archive
-d, --data fetch detailed blip history from archive
-v, --volumes <type> generate CSV and JSON volumes (choices: "all", "csv", "json")
-h, --help display help for command
```

The original implementation which used a three-step process of parsing the public HTML content.
You can run this CLI via `pnpm start --help`

#### npm run generate:links
#### --links

This will extract _all_ blip links from the https://thoughtworks.com/radar/search and place into `data/links.json`

#### npm run generate:data
#### --data

Using the contents of `data/links.json`, fetch each of the publication entries and place into `data/master.json`

### npm run generate:volumes
### --volumes

Using the contents of `data/master.json`, generate CSV and JSON files for each publication/volume and place into `volumes/*`

### /api-parser (experimental)

The latest implementation which uses the public Thoughtworks Radar Search REST API.

To execute, simply run `npm start`.
6 changes: 2 additions & 4 deletions web-scraper/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"@types/node": "20.11.17",
"@typescript-eslint/eslint-plugin": "6.21.0",
"@typescript-eslint/parser": "6.21.0",
"commander": "12.0.0",
"eslint": "8.56.0",
"eslint-config-prettier": "9.1.0",
"jsdom": "24.0.0",
Expand All @@ -25,10 +26,7 @@
"typescript": "5.3.3"
},
"scripts": {
"start": "run-s -l generate:*",
"generate:links": "ts-node src/links/index.ts",
"generate:data": "ts-node src/timeline/index.ts",
"generate:volumes": "ts-node src/files/index.ts",
"start": "ts-node src/index.ts",
"lint": "run-s -c -l eslint prettier",
"lint-fix": "run-s -c -l eslint:fix prettier-fix",
"eslint": "eslint . --ext .ts --config .eslintrc",
Expand Down
8 changes: 8 additions & 0 deletions web-scraper/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 13 additions & 6 deletions web-scraper/src/files/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import {
import { escapeDescriptionHTML } from './utils';
import { getStatus, getVolumeFileName } from './utils';

function generateVolumes() {
export function generateVolumes(reportType: 'all' | 'csv' | 'json') {
const data = JSON.parse(fs.readFileSync(FILES.DATA.MASTER).toString());

const groupedByVolumes = _.groupBy(data, 'volume');
Expand All @@ -21,9 +21,18 @@ function generateVolumes() {
(entry) => entry.name.toLowerCase(),
]);

generateCSV(volume, sortedData);

generateJSON(volume, sortedData);
switch (reportType) {
case 'csv':
generateCSV(volume, sortedData);
break;
case 'json':
generateJSON(volume, sortedData);
break;
default:
generateCSV(volume, sortedData);
generateJSON(volume, sortedData);
break;
}
});
}

Expand Down Expand Up @@ -65,5 +74,3 @@ function generateJSON(volume: string, volumeData: any[]) {
console.log('Creating JSON file', jsonFilename);
fs.writeFileSync('../' + jsonFilename, JSON.stringify(jsonData, null, 4));
}

generateVolumes();
36 changes: 36 additions & 0 deletions web-scraper/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { Command, Option } from 'commander';
import { extractRadarLinks } from './links';
import { generateVolumes } from './files';
import { generateMasterData } from './timeline';
const program = new Command();

program
.name('tech-radar-volumes')
.description('A CLI tool to fetch and process ThoughtWorks Tech Radar data')
.option('-l, --links', 'fetch all radar blip page links from archive')
.option('-d, --data', 'fetch detailed blip history from archive')
.addOption(
new Option(
'-v, --volumes <type>',
'generate CSV and JSON volumes',
).choices(['all', 'csv', 'json']),
);

program.parse(process.argv);

const options = program.opts();

if (options.links) {
console.log('fetching all radar blip page links from archive');
extractRadarLinks();
} else if (options.data) {
console.log('fetching detailed blip history from archive');
generateMasterData();
} else if (options.volumes) {
console.log(`generating ${options.volumes} volumes`);
generateVolumes(options.volumes);
} else {
extractRadarLinks().then(() =>
generateMasterData().then(() => generateVolumes('all')),
);
}
2 changes: 0 additions & 2 deletions web-scraper/src/links/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ export async function extractRadarLinks(): Promise<string[]> {
return uniqueLinks;
}

extractRadarLinks();

async function getTotalRecordCount(): Promise<number | null> {
return await page.evaluate(() => {
const spanElement = document.querySelector(
Expand Down
10 changes: 4 additions & 6 deletions web-scraper/src/timeline/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import fs from 'fs';

let page: Page;

async function generateMasterData() {
const browser = await puppeteer.launch({ headless: 'new' });
export async function generateMasterData() {
const browser = await puppeteer.launch();
page = await browser.newPage();

const masterData: MasterData = {
Expand Down Expand Up @@ -85,8 +85,8 @@ export async function extractBlipTimeline(
'.hero-banner__overlay__container__title',
);
const blipName =
(await blipNameElement?.evaluate(
(element) => element.textContent?.trim(),
(await blipNameElement?.evaluate((element) =>
element.textContent?.trim(),
)) || '';

timelineEntries.forEach((blipPublicationHtml) => {
Expand Down Expand Up @@ -162,5 +162,3 @@ function calculateBlipMovements(blipMasterData: MasterData) {
}
}
}

generateMasterData();

0 comments on commit 42a1c61

Please sign in to comment.