From d3d284012924e02161a610cf4ba1c3baf79d4321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anne=20L=27H=C3=B4te?= Date: Tue, 24 Dec 2024 16:26:17 +0100 Subject: [PATCH] feat(affiliations): Do not query FOSM to improve OpenAlex affiliations ... well ... --- .../openalex-affiliations/results/index.jsx | 6 +- client/src/utils/works.jsx | 71 +++++++++---------- server/src/routes/affiliations.routes.js | 18 ++--- 3 files changed, 43 insertions(+), 52 deletions(-) diff --git a/client/src/pages/openalex-affiliations/results/index.jsx b/client/src/pages/openalex-affiliations/results/index.jsx index ef3771b..6997468 100644 --- a/client/src/pages/openalex-affiliations/results/index.jsx +++ b/client/src/pages/openalex-affiliations/results/index.jsx @@ -24,7 +24,7 @@ import getFlagEmoji from '../../../utils/flags'; import { getRorData, isRor } from '../../../utils/ror'; import { normalize, removeDiacritics } from '../../../utils/strings'; import { getTagColor } from '../../../utils/tags'; -import { getAffiliations } from '../../../utils/works'; +import { getOpenAlexAffiliations } from '../../../utils/works'; import ExportErrorsButton from '../components/export-errors-button'; import SendFeedbackButton from '../components/send-feedback-button'; import ListView from './list-view'; @@ -83,9 +83,9 @@ export default function Affiliations() { ]; const { data, error, isFetched, isFetching, refetch } = useQuery({ - queryKey: ['affiliations', JSON.stringify(options)], + queryKey: ['openalex-affiliations', JSON.stringify(options)], // Search for works from affiliations for each affiliation strictly longer than 2 letters - queryFn: () => getAffiliations( + queryFn: () => getOpenAlexAffiliations( { ...options, affiliationStrings: options.affiliations diff --git a/client/src/utils/works.jsx b/client/src/utils/works.jsx index 9542f2c..2f58f17 100644 --- a/client/src/utils/works.jsx +++ b/client/src/utils/works.jsx @@ -36,40 +36,6 @@ const timeout = (time) => { return controller; }; -const getAffiliations = async (body, toast) => { - const response = await fetch(`${VITE_API}/affiliations`, { - body: JSON.stringify(body), - headers: { 'Content-Type': 'application/json' }, - method: 'POST', - signal: timeout(1200).signal, // 20 minutes - }); - if (!response.ok) { - throw new Error('Oops... FOSM API request did not work for works !'); - } - const { affiliations, warnings } = await response.json(); - const resAffiliations = await unzipAll(affiliations); - let warningMessage = ''; - if (warnings?.isMaxFosmReached) { - warningMessage = warningMessage.concat( - `More than ${warnings.maxFosmValue} publications found in French OSM, only the first ${warnings.maxFosmValue} were retrieved.\n`, - ); - } - if (warnings?.isMaxOpenalexReached) { - warningMessage = warningMessage.concat( - `More than ${warnings.maxOpenalexValue} publications found in OpenAlex, only the first ${warnings.maxOpenalexValue} were retrieved.\n`, - ); - } - if (warningMessage) { - toast({ - description: warningMessage, - id: 'tooManyPublications', - title: 'Too Many publications found', - toastType: 'error', - }); - } - return { affiliations: resAffiliations, warnings }; -}; - const getIdLink = (type, id) => { let prefix = null; switch (type) { @@ -97,7 +63,6 @@ const getIdLink = (type, id) => { }; const getMentions = async (options) => { - // TODO: Replace by useQuery const response = await fetch(`${VITE_API}/mentions`, { body: JSON.stringify(options), headers: { 'Content-Type': 'application/json' }, @@ -111,6 +76,40 @@ const getMentions = async (options) => { return mentions; }; +const getOpenAlexAffiliations = async (body, toast) => { + const response = await fetch(`${VITE_API}/openalex-affiliations`, { + body: JSON.stringify(body), + headers: { 'Content-Type': 'application/json' }, + method: 'POST', + signal: timeout(1200).signal, // 20 minutes + }); + if (!response.ok) { + throw new Error('Oops... FOSM API request did not work for works !'); + } + const { affiliations, warnings } = await response.json(); + const resAffiliations = await unzipAll(affiliations); + let warningMessage = ''; + if (warnings?.isMaxFosmReached) { + warningMessage = warningMessage.concat( + `More than ${warnings.maxFosmValue} publications found in French OSM, only the first ${warnings.maxFosmValue} were retrieved.\n`, + ); + } + if (warnings?.isMaxOpenalexReached) { + warningMessage = warningMessage.concat( + `More than ${warnings.maxOpenalexValue} publications found in OpenAlex, only the first ${warnings.maxOpenalexValue} were retrieved.\n`, + ); + } + if (warningMessage) { + toast({ + description: warningMessage, + id: 'tooManyPublications', + title: 'Too Many publications found', + toastType: 'error', + }); + } + return { affiliations: resAffiliations, warnings }; +}; + const getWorks = async (body, toast) => { const response = await fetch(`${VITE_API}/works`, { body: JSON.stringify(body), @@ -193,9 +192,9 @@ const renderButtonDataset = (selected, fn, label, icon) => ( ); export { - getAffiliations, getIdLink, getMentions, + getOpenAlexAffiliations, getWorks, normalizeName, range, diff --git a/server/src/routes/affiliations.routes.js b/server/src/routes/affiliations.routes.js index ef43b06..2e59225 100644 --- a/server/src/routes/affiliations.routes.js +++ b/server/src/routes/affiliations.routes.js @@ -3,11 +3,9 @@ import express from 'express'; import { getInstitutionIdFromRor } from '../utils/openalex'; import { getCache, saveCache } from '../utils/s3'; -import { chunkArray, countUniqueValues, range } from '../utils/utils'; +import { chunkArray, range } from '../utils/utils'; import { - datasetsType, deduplicateWorks, - getFosmWorks, getOpenAlexPublications, groupByAffiliations, } from '../utils/works'; @@ -49,9 +47,9 @@ const chunkAndCompress = (data) => { return Promise.all(chunks.map((c) => compressData(c))); }; -const getWorks = async ({ options, resetCache = false }) => { +const getOpenAlexAffiliations = async ({ options, resetCache = false }) => { const shasum = crypto.createHash('sha1'); - shasum.update(JSON.stringify({ ...options, type: 'affiliations' })); + shasum.update(JSON.stringify({ ...options, type: 'openalex-affiliations' })); const searchId = shasum.digest('hex'); const start = new Date(); const queryId = start @@ -82,7 +80,6 @@ const getWorks = async ({ options, resetCache = false }) => { options.rorExclusions.map((ror) => getInstitutionIdFromRor(ror)), ); const queries = []; - queries.push(getFosmWorks({ options })); const affiliationStringsChunks = chunkArray({ array: options.affiliationStrings, }); @@ -104,11 +101,6 @@ const getWorks = async ({ options, resetCache = false }) => { }); const responses = await Promise.all(queries); const warnings = {}; - const MAX_FOSM = Number(process.env.ES_MAX_SIZE); - if (MAX_FOSM > 0 && responses.length > 0 && responses[0].length >= MAX_FOSM) { - warnings.isMaxFosmReached = true; - warnings.maxFosmValue = MAX_FOSM; - } const MAX_OPENALEX = Number(process.env.OPENALEX_MAX_SIZE); if ( MAX_OPENALEX > 0 @@ -162,7 +154,7 @@ const getWorks = async ({ options, resetCache = false }) => { return result; }; -router.route('/affiliations').post(async (req, res) => { +router.route('/openalex-affiliations').post(async (req, res) => { try { const options = req?.body ?? {}; if (!options?.affiliationStrings && !options?.rors) { @@ -170,7 +162,7 @@ router.route('/affiliations').post(async (req, res) => { message: 'You must provide at least one affiliation string or RoR.', }); } else { - const compressedResult = await getWorks({ options }); + const compressedResult = await getOpenAlexAffiliations({ options }); res.status(200).json(compressedResult); } } catch (err) {