-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5857 from guardian/mob/thrasher-tracker
Thrasher tracker
- Loading branch information
Showing
8 changed files
with
226 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
export const fetchJSON = async <T>( | ||
url: Parameters<typeof fetch>[0], | ||
{ | ||
headers, | ||
parser, | ||
}: { | ||
headers?: HeadersInit; | ||
parser: (data: unknown) => T | Promise<T>; | ||
}, | ||
): Promise<T> => { | ||
const data: unknown = await fetch(url, { headers }).then((r) => r.json()); | ||
return parser(data); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
import { array, object, string } from 'https://deno.land/x/[email protected]/mod.ts'; | ||
import { fetchJSON } from './json.ts'; | ||
import prettyBytes from 'https://esm.sh/pretty-bytes'; | ||
import { octokit } from './github.ts'; | ||
|
||
// -- Constants -- // | ||
|
||
const fronts = ['uk', 'us', 'international', 'au'] as const; | ||
|
||
const regex = | ||
/(http|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-])/g; | ||
|
||
const frontSchema = object({ | ||
pressedPage: object({ | ||
collections: array( | ||
object({ | ||
id: string(), | ||
collectionType: string(), | ||
displayName: string(), | ||
curated: array( | ||
object({ | ||
enriched: object({ | ||
embedHtml: string().optional(), | ||
embedCss: string().optional(), | ||
embedJs: string().optional(), | ||
}), | ||
}), | ||
), | ||
}), | ||
), | ||
}), | ||
}); | ||
|
||
/** | ||
* We ignore all fonts extensions because browsers will only load | ||
* `woff2`one of these resources at a time. | ||
*/ | ||
const _fontsExtensions = ['woff', 'ttf']; | ||
|
||
const supportedResourceExtensions = [ | ||
'js', | ||
'png', | ||
'woff2', | ||
'gif', | ||
'jpg', | ||
'mp4', | ||
'css', | ||
]; | ||
|
||
// -- Methods -- // | ||
|
||
const getExtension = (url: URL) => url.pathname.split('.').slice(-1)[0]; | ||
|
||
const isSupportedResourceType = (url: URL): boolean => | ||
supportedResourceExtensions.includes(getExtension(url)); | ||
|
||
const getResourceSize = async (url: URL): Promise<number> => { | ||
const response = await fetch(url); | ||
return (await response.blob()).size; | ||
}; | ||
|
||
const getThrasherResources = (urls: URL[]) => { | ||
return Promise.all( | ||
urls.map(async (url) => { | ||
const size = await getResourceSize(url); | ||
return { url, size }; | ||
}), | ||
); | ||
}; | ||
|
||
const getFrontThrashers = async (path: string) => { | ||
const url = new URL(`https://theguardian.com/${path}.json?dcr`); | ||
const { | ||
pressedPage: { collections }, | ||
} = await fetchJSON(url, { parser: frontSchema.parse }); | ||
|
||
const thrashers = collections.filter( | ||
(collection) => collection.collectionType === 'fixed/thrasher', | ||
); | ||
|
||
const thrashersWithResources = thrashers.flatMap( | ||
async ({ displayName, curated: [{ enriched }] }) => { | ||
const resourceUrls = Object.values(enriched) | ||
.flatMap((embed) => | ||
[...embed.matchAll(regex)].map(([url]) => new URL(url)), | ||
) | ||
.filter(isSupportedResourceType); | ||
|
||
const resources = await getThrasherResources(resourceUrls); | ||
|
||
const embedSize = new Blob([...Object.values(enriched)]).size; | ||
|
||
const resourceSize = resources.reduce((map, { url, size }) => { | ||
const ext = getExtension(url); | ||
const acc = map.get(ext) ?? 0; | ||
map.set(ext, acc + size); | ||
return map; | ||
}, new Map<string, number>()); | ||
|
||
return { | ||
displayName, | ||
resources, | ||
embedSize, | ||
resourceSize, | ||
totalSize: | ||
embedSize + | ||
[...resourceSize.values()].reduce( | ||
(acc, next) => acc + next, | ||
0, | ||
), | ||
}; | ||
}, | ||
); | ||
return Promise.all(thrashersWithResources); | ||
}; | ||
|
||
const getTable = (data: Awaited<ReturnType<typeof getFrontThrashers>>) => { | ||
const rows = data | ||
.slice() | ||
.sort((a, b) => b.totalSize - a.totalSize) | ||
.map( | ||
({ displayName, embedSize, resourceSize, totalSize }) => | ||
'| ' + | ||
[ | ||
displayName, | ||
prettyBytes(totalSize), | ||
prettyBytes(embedSize), | ||
[...resourceSize.entries()] | ||
.sort(([, a], [, b]) => b - a) | ||
.map( | ||
([resourceType, size]) => | ||
`\`${resourceType}\`: ${prettyBytes(size)}`, | ||
) | ||
.join(', '), | ||
].join(' | ') + | ||
' |', | ||
); | ||
return [ | ||
'| Name | Total size | Embed size | Resources |', | ||
'| ---- | ---------- | ---------- | --------- |', | ||
...rows, | ||
]; | ||
}; | ||
|
||
// -- Script -- // | ||
|
||
const lines = ['# Largest thrashers on network fronts']; | ||
|
||
for (const id of fronts) { | ||
lines.push( | ||
'', | ||
'', | ||
`## [${id.toUpperCase()} Front](https://www.theguardian.com/${id}) `, | ||
'', | ||
); | ||
const data = await getFrontThrashers(id); | ||
|
||
lines.push(...getTable(data)); | ||
} | ||
|
||
const body = lines.join('\n'); | ||
|
||
if (!octokit) { | ||
console.log(body); | ||
Deno.exit(); | ||
} | ||
|
||
const { | ||
data: { html_url }, | ||
} = await octokit.rest.issues.update({ | ||
owner: 'guardian', | ||
repo: 'dotcom-rendering', | ||
issue_number: 5856, | ||
body, | ||
}); | ||
|
||
console.log('Updated issue:', html_url); | ||
|
||
Deno.exit(); |