diff --git a/server/src/routes/works.routes.js b/server/src/routes/works.routes.js index 937ef9ad..8492f14c 100644 --- a/server/src/routes/works.routes.js +++ b/server/src/routes/works.routes.js @@ -1,7 +1,6 @@ import express from 'express'; -import { groupByAffiliations } from '../utils/utils'; -import { deduplicateWorks, getFosmWorks, getOpenAlexPublications } from '../utils/works'; +import { deduplicateWorks, getFosmWorks, getOpenAlexPublications, groupByAffiliations } from '../utils/works'; const router = new express.Router(); @@ -21,43 +20,60 @@ router.route('/works') getFosmWorks({ options: { ...options, filter: { field: 'genre', value: 'dataset' } }, index: process.env.VITE_FOSM_DATASETS_INDEX }), ]); console.timeEnd(`0. Requests ${options.affiliations}`); - console.time(`1. Filter ${options.affiliations}`); - const data = {}; - data.publications = { - results: [ - ...responses[0].results.filter((result) => result.genre_raw !== 'dataset'), - ...responses[1].results, - ], - }; - data.datasets = { - results: [ - ...responses[0].results.filter((result) => result.genre_raw === 'dataset'), - ...responses[2].results, - ], - }; - console.timeEnd(`1. Filter ${options.affiliations}`); + console.time(`1. Concat ${options.affiliations}`); + const works = [ + ...responses[0], + ...responses[1], + ...responses[2], + ]; + console.timeEnd(`1. Concat ${options.affiliations}`); console.time(`2. Dedup ${options.affiliations}`); // Deduplicate publications by ids - data.publications.results = deduplicateWorks(data.publications.results); + const deduplicatedWorks = deduplicateWorks(works); console.timeEnd(`2. Dedup ${options.affiliations}`); + // Goup by affiliations + console.time(`3. GroupBy ${options.affiliations}`); + const uniqueAffiliations = groupByAffiliations({ options, works: deduplicatedWorks }); + console.timeEnd(`3. GroupBy ${options.affiliations}`); + // Sort between publications and datasets + console.time(`4. Sort ${options.affiliations}`); + const publications = []; + const datasets = []; + deduplicatedWorks.forEach((deduplicatedWork) => { + if ( + (deduplicatedWork.datasource.includes('fosm') && deduplicatedWork.genre_raw !== 'dataset') + || (deduplicatedWork.datasource.includes('openalex') && deduplicatedWork.type !== 'dataset') + ) { + publications.push(deduplicatedWork); + } else if ( + (deduplicatedWork.datasource.includes('fosm') && deduplicatedWork.genre_raw === 'dataset') + || (deduplicatedWork.datasource.includes('openalex') && deduplicatedWork.type === 'dataset') + ) { + datasets.push(deduplicatedWork); + } else { + console.log(`Work not sort : ${JSON.stringify(deduplicatedWork)}`); + } + }); + console.timeEnd(`4. Sort ${options.affiliations}`); // Compute distinct types & years for facet - console.time(`3. Facet ${options.affiliations}`); - data.publications.years = [...new Set( - data.publications.results.filter((publication) => !!publication?.year).map((publication) => Number(publication.year)), + console.time(`5. Facet ${options.affiliations}`); + const publicationsYears = [...new Set( + publications.filter((publication) => !!publication?.year).map((publication) => Number(publication.year)), )].sort((a, b) => b - a); - data.datasets.years = [...new Set( - data.datasets.results.filter((dataset) => !!dataset?.year).map((dataset) => Number(dataset.year)), + const datasetsYears = [...new Set( + datasets.filter((dataset) => !!dataset?.year).map((dataset) => Number(dataset.year)), )].sort((a, b) => b - a); - data.publications.types = [...new Set(data.publications.results.map((publication) => publication?.type))]; - data.datasets.types = [...new Set(data.datasets.results.map((dataset) => dataset?.type))]; - console.timeEnd(`3. Facet ${options.affiliations}`); - // Goup by affiliations - console.time(`4. GroupBy ${options.affiliations}`); - data.affiliations = groupByAffiliations({ ...data, options }); - console.timeEnd(`4. GroupBy ${options.affiliations}`); - console.time(`5. Serialization ${options.affiliations}`); - res.status(200).json(data); - console.timeEnd(`5. Serialization ${options.affiliations}`); + const publicationsTypes = [...new Set(publications.map((publication) => publication?.type))]; + const datasetsTypes = [...new Set(datasets.map((dataset) => dataset?.type))]; + console.timeEnd(`5. Facet ${options.affiliations}`); + // Build and serialize response + console.time(`6. Serialization ${options.affiliations}`); + res.status(200).json({ + affiliations: uniqueAffiliations, + datasets: { results: datasets, types: datasetsTypes, years: datasetsYears }, + publications: { results: publications, types: publicationsTypes, years: publicationsYears }, + }); + console.timeEnd(`6. Serialization ${options.affiliations}`); } } catch (err) { console.error(err); diff --git a/server/src/utils/utils.js b/server/src/utils/utils.js index f02397de..f7827e95 100644 --- a/server/src/utils/utils.js +++ b/server/src/utils/utils.js @@ -39,39 +39,9 @@ const normalizedName = (name) => name .replace(/\s+/g, ' ') .trim(); -const groupByAffiliations = ({ datasets, options, publications }) => { - const regexp = getRegexpFromOptions(options); - // Compute distinct affiliations of the undecided works - let allAffiliationsTmp = {}; - [...datasets.results, ...publications.results].forEach((work) => { - (work?.affiliations ?? []) - .forEach((affiliation) => { - const normalizedAffiliationName = normalizedName(affiliation); - if (!allAffiliationsTmp?.[normalizedAffiliationName]) { - // Check matches in affiliation name - let matches = affiliation?.match(regexp) ?? []; - // Normalize matched strings - matches = matches.map((match) => normalizedName(match)); - // Filter matches as unique - matches = [...new Set(matches)]; - allAffiliationsTmp[normalizedAffiliationName] = { - matches: matches.length, - name: affiliation, - nameHtml: affiliation.replace(regexp, '$&'), - works: [], - }; - } - allAffiliationsTmp[normalizedAffiliationName].works.push(work.id); - }); - }); - - allAffiliationsTmp = Object.values(allAffiliationsTmp) - .map((affiliation, index) => ({ ...affiliation, id: index.toString(), works: [...new Set(affiliation.works)], worksNumber: [...new Set(affiliation.works)].length })); - return allAffiliationsTmp; -}; - export { cleanId, - groupByAffiliations, + getRegexpFromOptions, + normalizedName, range, }; diff --git a/server/src/utils/works.js b/server/src/utils/works.js index 88805af0..d22031fd 100644 --- a/server/src/utils/works.js +++ b/server/src/utils/works.js @@ -1,4 +1,4 @@ -import { cleanId, range } from './utils'; +import { cleanId, getRegexpFromOptions, normalizedName, range } from './utils'; const VITE_OPENALEX_MAX_PAGE = Math.floor(process.env.VITE_OPENALEX_SIZE / process.env.VITE_OPENALEX_PER_PAGE); @@ -114,10 +114,7 @@ const getFosmWorks = async ({ options }) => { const years = range(startYear, endYear); const promises = years.map((year) => getFosmWorksByYear({ options: { ...options, year } })); const allResults = await Promise.all(promises); - return ({ - datasource: 'fosm', - results: allResults.flat(), - }); + return allResults.flat(); }; const getTypeFromOpenAlex = (type) => { @@ -210,14 +207,43 @@ const getOpenAlexPublications = async ({ options }) => { const years = range(startYear, endYear); const promises = years.map((year) => getOpenAlexPublicationsByYear({ ...options, year })); const allResults = await Promise.all(promises); - return ({ - datasource: 'openalex', - results: allResults.flat(), + return allResults.flat(); +}; + +const groupByAffiliations = ({ options, works }) => { + const regexp = getRegexpFromOptions(options); + // Compute distinct affiliations of works + let allAffiliationsTmp = {}; + works.forEach((work) => { + (work?.affiliations ?? []) + .forEach((affiliation) => { + const normalizedAffiliationName = normalizedName(affiliation); + if (!allAffiliationsTmp?.[normalizedAffiliationName]) { + // Check matches in affiliation name + let matches = affiliation?.match(regexp) ?? []; + // Normalize matched strings + matches = matches.map((match) => normalizedName(match)); + // Filter matches as unique + matches = [...new Set(matches)]; + allAffiliationsTmp[normalizedAffiliationName] = { + matches: matches.length, + name: affiliation, + nameHtml: affiliation.replace(regexp, '$&'), + works: [], + }; + } + allAffiliationsTmp[normalizedAffiliationName].works.push(work.id); + }); }); + + allAffiliationsTmp = Object.values(allAffiliationsTmp) + .map((affiliation, index) => ({ ...affiliation, id: index.toString(), works: [...new Set(affiliation.works)], worksNumber: [...new Set(affiliation.works)].length })); + return allAffiliationsTmp; }; export { deduplicateWorks, getFosmWorks, getOpenAlexPublications, + groupByAffiliations, };