Skip to content

Commit

Permalink
feat(affiliations): Do not query FOSM to improve OpenAlex affiliations
Browse files Browse the repository at this point in the history
... well ...
  • Loading branch information
annelhote committed Dec 24, 2024
1 parent 0d030eb commit d3d2840
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 52 deletions.
6 changes: 3 additions & 3 deletions client/src/pages/openalex-affiliations/results/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import getFlagEmoji from '../../../utils/flags';
import { getRorData, isRor } from '../../../utils/ror';
import { normalize, removeDiacritics } from '../../../utils/strings';
import { getTagColor } from '../../../utils/tags';
import { getAffiliations } from '../../../utils/works';
import { getOpenAlexAffiliations } from '../../../utils/works';
import ExportErrorsButton from '../components/export-errors-button';
import SendFeedbackButton from '../components/send-feedback-button';
import ListView from './list-view';
Expand Down Expand Up @@ -83,9 +83,9 @@ export default function Affiliations() {
];

const { data, error, isFetched, isFetching, refetch } = useQuery({
queryKey: ['affiliations', JSON.stringify(options)],
queryKey: ['openalex-affiliations', JSON.stringify(options)],
// Search for works from affiliations for each affiliation strictly longer than 2 letters
queryFn: () => getAffiliations(
queryFn: () => getOpenAlexAffiliations(
{
...options,
affiliationStrings: options.affiliations
Expand Down
71 changes: 35 additions & 36 deletions client/src/utils/works.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -36,40 +36,6 @@ const timeout = (time) => {
return controller;
};

const getAffiliations = async (body, toast) => {
const response = await fetch(`${VITE_API}/affiliations`, {
body: JSON.stringify(body),
headers: { 'Content-Type': 'application/json' },
method: 'POST',
signal: timeout(1200).signal, // 20 minutes
});
if (!response.ok) {
throw new Error('Oops... FOSM API request did not work for works !');
}
const { affiliations, warnings } = await response.json();
const resAffiliations = await unzipAll(affiliations);
let warningMessage = '';
if (warnings?.isMaxFosmReached) {
warningMessage = warningMessage.concat(
`More than ${warnings.maxFosmValue} publications found in French OSM, only the first ${warnings.maxFosmValue} were retrieved.\n`,
);
}
if (warnings?.isMaxOpenalexReached) {
warningMessage = warningMessage.concat(
`More than ${warnings.maxOpenalexValue} publications found in OpenAlex, only the first ${warnings.maxOpenalexValue} were retrieved.\n`,
);
}
if (warningMessage) {
toast({
description: warningMessage,
id: 'tooManyPublications',
title: 'Too Many publications found',
toastType: 'error',
});
}
return { affiliations: resAffiliations, warnings };
};

const getIdLink = (type, id) => {
let prefix = null;
switch (type) {
Expand Down Expand Up @@ -97,7 +63,6 @@ const getIdLink = (type, id) => {
};

const getMentions = async (options) => {
// TODO: Replace by useQuery
const response = await fetch(`${VITE_API}/mentions`, {
body: JSON.stringify(options),
headers: { 'Content-Type': 'application/json' },
Expand All @@ -111,6 +76,40 @@ const getMentions = async (options) => {
return mentions;
};

const getOpenAlexAffiliations = async (body, toast) => {
const response = await fetch(`${VITE_API}/openalex-affiliations`, {
body: JSON.stringify(body),
headers: { 'Content-Type': 'application/json' },
method: 'POST',
signal: timeout(1200).signal, // 20 minutes
});
if (!response.ok) {
throw new Error('Oops... FOSM API request did not work for works !');
}
const { affiliations, warnings } = await response.json();
const resAffiliations = await unzipAll(affiliations);
let warningMessage = '';
if (warnings?.isMaxFosmReached) {
warningMessage = warningMessage.concat(
`More than ${warnings.maxFosmValue} publications found in French OSM, only the first ${warnings.maxFosmValue} were retrieved.\n`,
);
}
if (warnings?.isMaxOpenalexReached) {
warningMessage = warningMessage.concat(
`More than ${warnings.maxOpenalexValue} publications found in OpenAlex, only the first ${warnings.maxOpenalexValue} were retrieved.\n`,
);
}
if (warningMessage) {
toast({
description: warningMessage,
id: 'tooManyPublications',
title: 'Too Many publications found',
toastType: 'error',
});
}
return { affiliations: resAffiliations, warnings };
};

const getWorks = async (body, toast) => {
const response = await fetch(`${VITE_API}/works`, {
body: JSON.stringify(body),
Expand Down Expand Up @@ -193,9 +192,9 @@ const renderButtonDataset = (selected, fn, label, icon) => (
);

export {
getAffiliations,
getIdLink,
getMentions,
getOpenAlexAffiliations,
getWorks,
normalizeName,
range,
Expand Down
18 changes: 5 additions & 13 deletions server/src/routes/affiliations.routes.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@ import express from 'express';

import { getInstitutionIdFromRor } from '../utils/openalex';
import { getCache, saveCache } from '../utils/s3';
import { chunkArray, countUniqueValues, range } from '../utils/utils';
import { chunkArray, range } from '../utils/utils';
import {
datasetsType,
deduplicateWorks,
getFosmWorks,
getOpenAlexPublications,
groupByAffiliations,
} from '../utils/works';
Expand Down Expand Up @@ -49,9 +47,9 @@ const chunkAndCompress = (data) => {
return Promise.all(chunks.map((c) => compressData(c)));
};

const getWorks = async ({ options, resetCache = false }) => {
const getOpenAlexAffiliations = async ({ options, resetCache = false }) => {
const shasum = crypto.createHash('sha1');
shasum.update(JSON.stringify({ ...options, type: 'affiliations' }));
shasum.update(JSON.stringify({ ...options, type: 'openalex-affiliations' }));
const searchId = shasum.digest('hex');
const start = new Date();
const queryId = start
Expand Down Expand Up @@ -82,7 +80,6 @@ const getWorks = async ({ options, resetCache = false }) => {
options.rorExclusions.map((ror) => getInstitutionIdFromRor(ror)),
);
const queries = [];
queries.push(getFosmWorks({ options }));
const affiliationStringsChunks = chunkArray({
array: options.affiliationStrings,
});
Expand All @@ -104,11 +101,6 @@ const getWorks = async ({ options, resetCache = false }) => {
});
const responses = await Promise.all(queries);
const warnings = {};
const MAX_FOSM = Number(process.env.ES_MAX_SIZE);
if (MAX_FOSM > 0 && responses.length > 0 && responses[0].length >= MAX_FOSM) {
warnings.isMaxFosmReached = true;
warnings.maxFosmValue = MAX_FOSM;
}
const MAX_OPENALEX = Number(process.env.OPENALEX_MAX_SIZE);
if (
MAX_OPENALEX > 0
Expand Down Expand Up @@ -162,15 +154,15 @@ const getWorks = async ({ options, resetCache = false }) => {
return result;
};

router.route('/affiliations').post(async (req, res) => {
router.route('/openalex-affiliations').post(async (req, res) => {
try {
const options = req?.body ?? {};
if (!options?.affiliationStrings && !options?.rors) {
res.status(400).json({
message: 'You must provide at least one affiliation string or RoR.',
});
} else {
const compressedResult = await getWorks({ options });
const compressedResult = await getOpenAlexAffiliations({ options });
res.status(200).json(compressedResult);
}
} catch (err) {
Expand Down

0 comments on commit d3d2840

Please sign in to comment.