From 2ad1e631ed02b0eecddf647a1d6f730efbf01ea5 Mon Sep 17 00:00:00 2001 From: eric Date: Wed, 21 Feb 2024 16:43:33 +0100 Subject: [PATCH] data linking with ror --- client/src/pages/worksView.jsx | 3 +++ client/src/utils/templates.jsx | 2 +- server/src/routes/works.routes.js | 42 ++++++++++++++++++------------- server/src/utils/utils.js | 14 +++++++++++ server/src/utils/works.js | 22 ++++++++++------ 5 files changed, 58 insertions(+), 25 deletions(-) diff --git a/client/src/pages/worksView.jsx b/client/src/pages/worksView.jsx index 8fc57cfa..15dce2cd 100644 --- a/client/src/pages/worksView.jsx +++ b/client/src/pages/worksView.jsx @@ -45,6 +45,9 @@ export default function WorksView({ + + + ); } diff --git a/client/src/utils/templates.jsx b/client/src/utils/templates.jsx index 29026fb3..f287edc7 100644 --- a/client/src/utils/templates.jsx +++ b/client/src/utils/templates.jsx @@ -18,7 +18,7 @@ const affiliationsTemplate = (rowData) => ( const allIdsTemplate = (rowData) => { let html = '
    '; rowData.allIds.forEach((id) => { - html += `
  • ${id.id_type}: `; + html += `
  • ${id.id_type}:
    `; const idLink = getIdLink(id.id_type, id.id_value); html += idLink ? `${id.id_value}` : `${id.id_value}`; html += '
  • '; diff --git a/server/src/routes/works.routes.js b/server/src/routes/works.routes.js index 0045c973..563e830b 100644 --- a/server/src/routes/works.routes.js +++ b/server/src/routes/works.routes.js @@ -16,32 +16,40 @@ router.route('/works') webSocketServer.broadcast(0); console.time(`1. Requests ${options}`); options.affiliationStrings = options.affiliationStrings.split(','); - if (options?.rors) { + if (options?.rors?.length > 0) { options.rors = options.rors.split(','); } options.datasets = options.datasets === 'true'; options.years = range(options.startYear, options.endYear); const optionsWithAffiliationStringsOnly = { - datasets: options.datasets, years: options.years, affiliationStrings: options.affiliationStrings, rors: [], + datasets: options.datasets, years: options.years, affiliationStrings: options.affiliationStrings.slice(0, 10), rors: [], }; - const optionsWithRorOnly = { - datasets: options.datasets, years: options.years, affiliationStrings: [], rors: options.rors, - }; - console.log('options00A', options); - console.log('options00', optionsWithAffiliationStringsOnly); - const responses = await Promise.all([ - getFosmWorks({ options }), - getOpenAlexPublications({ options: optionsWithAffiliationStringsOnly }), - getOpenAlexPublications({ options: optionsWithRorOnly }), - ]); + const queries = []; + queries.push(getFosmWorks({ options })); + queries.push(getOpenAlexPublications({ options: optionsWithAffiliationStringsOnly })); + if (options.rors?.length > 0) { + const optionsWithRorOnly = { + datasets: options.datasets, years: options.years, affiliationStrings: [], rors: options.rors, + }; + queries.push(getOpenAlexPublications({ options: optionsWithRorOnly })); + } + const responses = await Promise.all(queries); console.timeEnd(`1. Requests ${options}`); webSocketServer.broadcast(1); console.time(`2. Concat ${options}`); - const works = [ - ...responses[0], - ...responses[1], - ...responses[2], - ]; + let works = []; + if (options.rors?.length > 0) { + works = [ + ...responses[0], + ...responses[1], + ...responses[2], + ]; + } else { + works = [ + ...responses[0], + ...responses[1], + ]; + } console.timeEnd(`2. Concat ${options}`); webSocketServer.broadcast(2); console.time(`3. Dedup ${options}`); diff --git a/server/src/utils/utils.js b/server/src/utils/utils.js index 729d908a..0b23d1c3 100644 --- a/server/src/utils/utils.js +++ b/server/src/utils/utils.js @@ -8,6 +8,18 @@ const cleanId = (id) => ( : null ); +const intersectArrays = (array1, array2) => { + const res = array1.filter((value) => array2.includes(value)); + return res.length > 0; +}; + +const getAuthorOrcid = (elt) => { + const name = elt?.author?.name?.replace(',', ' ') || ''; + const orcid = elt?.author?.nameIdentifiers?.filter((ident) => ident.nameIdentifierScheme === 'ORCID')[0].nameIdentifier; + const res = name.concat(' ').concat(orcid); + return res; +}; + const countUniqueValues = ({ data = [], field }) => { const map = data .map((item) => item?.[field] ?? '') @@ -133,6 +145,8 @@ const range = (startYear, endYear = new Date().getFullYear()) => { export { cleanId, countUniqueValues, + getAuthorOrcid, + intersectArrays, range, removeDiacritics, }; diff --git a/server/src/utils/works.js b/server/src/utils/works.js index adf9e016..9c6e87c8 100644 --- a/server/src/utils/works.js +++ b/server/src/utils/works.js @@ -1,4 +1,4 @@ -import { cleanId, removeDiacritics } from './utils'; +import { cleanId, getAuthorOrcid, intersectArrays, removeDiacritics } from './utils'; const mergePublications = (publication1, publication2) => { // Any publication from FOSM is prioritized among others @@ -32,16 +32,18 @@ const getFosmQuery = (options, pit, searchAfter) => { options.affiliationStrings.forEach((affiliation) => { query.query.bool.should.push({ multi_match: { fields: affiliationsFields, query: `"${affiliation}"`, operator: 'and' } }); }); - options.rors.forEach((ror) => { - query.query.bool.should.push({ match: { rors: ror } }); - }); + if (options.rors?.length > 0) { + options.rors.forEach((ror) => { + query.query.bool.should.push({ match: { rors: ror } }); + }); + } query.query.bool.must.push({ range: { year: { gte: options.year, lte: options.year } } }); // Exclude files for Datacite query.query.bool.must_not.push({ terms: { genre: ['file', 'version', 'file_'] } }); query.query.bool.minimum_should_match = 1; query._source = [ - 'affiliations', 'authors', 'doi', 'external_ids', 'genre', 'genre_raw', 'hal_id', 'id', 'publisher', - 'publisher_dissemination', 'publisher_raw', 'title', 'year', + 'affiliations', 'authors', 'doi', 'external_ids', 'genre', 'genre_raw', 'hal_id', 'id', 'publisher', 'format', + 'publisher_dissemination', 'publisher_raw', 'title', 'year', 'fr_reasons_concat', 'fr_publications_linked', 'fr_authors_name', 'fr_authors_orcid', ]; query.sort = ['_shard_doc']; if (pit) { @@ -93,10 +95,16 @@ const getFosmWorksByYear = async ({ results = [], options, pit, searchAfter }) = authors: (result._source?.authors ?? []).map((author) => author.full_name), datasource: ['fosm'], id: cleanId(result._source?.doi ?? result._source?.hal_id ?? result._source.id), - publisher: result._source?.publisher_dissemination ?? result._source?.publisher ?? '', + publisher: result._source?.publisher_dissemination ?? result._source?.publisher ?? result._source?.publisher_raw ?? '', title: result._source.title, type: result._source?.genre_raw ?? result._source.genre, year: result?._source?.year?.toString() ?? '', + format: result?._source?.format?.toString() ?? '', + fr_reasons: result?._source?.fr_reasons_concat?.toString() ?? '', + fr_publications_linked: result?._source?.fr_publications_linked?.toString() ?? '', + fr_publications_linked: result?._source?.fr_publications_linked?.filter((el) => intersectArrays(el?.rors || [], options.rors)).map((el) => el.doi).toString() ?? '', + fr_authors_name: result?._source?.fr_authors_name?.filter((el) => intersectArrays(el?.rors || [], options.rors)).map((el) => el.author.name).toString() ?? '', + fr_authors_orcid: result?._source?.fr_authors_orcid?.filter((el) => intersectArrays(el?.rors || [], options.rors)).map((el) => getAuthorOrcid(el)).toString() ?? '', }))); if (hits.length > 0 && (Number(process.env.FOSM_MAX_SIZE) === 0 || results.length < Number(process.env.FOSM_MAX_SIZE))) { // eslint-disable-next-line no-param-reassign