Skip to content

Commit

Permalink
data linking with ror
Browse files Browse the repository at this point in the history
  • Loading branch information
ericjeangirard committed Feb 21, 2024
1 parent de554f0 commit 2ad1e63
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 25 deletions.
3 changes: 3 additions & 0 deletions client/src/pages/worksView.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ export default function WorksView({
<Column field="affiliationsHtml" header="Affiliations" body={affiliationsTemplate} />
<Column field="authors" header="Authors" body={authorsTemplate} filter filterMatchMode="contains" showFilterMenu={false} filterPlaceholder="Search by author" style={{ minWidth: '200px' }} />
<Column field="title" header="Title" filter filterMatchMode="contains" showFilterMenu={false} filterPlaceholder="Search by title" style={{ minWidth: '10px' }} />
<Column field="fr_publications_linked" header="Linked Article" />
<Column field="fr_authors_orcid" header="My institution author ORCID" />
<Column field="fr_authors_name" header="My institution author name" />
</DataTable>
);
}
Expand Down
2 changes: 1 addition & 1 deletion client/src/utils/templates.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const affiliationsTemplate = (rowData) => (
const allIdsTemplate = (rowData) => {
let html = '<ul>';
rowData.allIds.forEach((id) => {
html += `<li key="${id.id_value}">${id.id_type}: `;
html += `<li key="${id.id_value}">${id.id_type}:<br>`;
const idLink = getIdLink(id.id_type, id.id_value);
html += idLink ? `<a target="_blank" href="${idLink}">${id.id_value}</a>` : `<span>${id.id_value}</span>`;
html += '</li>';
Expand Down
42 changes: 25 additions & 17 deletions server/src/routes/works.routes.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,40 @@ router.route('/works')
webSocketServer.broadcast(0);
console.time(`1. Requests ${options}`);
options.affiliationStrings = options.affiliationStrings.split(',');
if (options?.rors) {
if (options?.rors?.length > 0) {
options.rors = options.rors.split(',');
}
options.datasets = options.datasets === 'true';
options.years = range(options.startYear, options.endYear);
const optionsWithAffiliationStringsOnly = {
datasets: options.datasets, years: options.years, affiliationStrings: options.affiliationStrings, rors: [],
datasets: options.datasets, years: options.years, affiliationStrings: options.affiliationStrings.slice(0, 10), rors: [],
};
const optionsWithRorOnly = {
datasets: options.datasets, years: options.years, affiliationStrings: [], rors: options.rors,
};
console.log('options00A', options);
console.log('options00', optionsWithAffiliationStringsOnly);
const responses = await Promise.all([
getFosmWorks({ options }),
getOpenAlexPublications({ options: optionsWithAffiliationStringsOnly }),
getOpenAlexPublications({ options: optionsWithRorOnly }),
]);
const queries = [];
queries.push(getFosmWorks({ options }));
queries.push(getOpenAlexPublications({ options: optionsWithAffiliationStringsOnly }));
if (options.rors?.length > 0) {
const optionsWithRorOnly = {
datasets: options.datasets, years: options.years, affiliationStrings: [], rors: options.rors,
};
queries.push(getOpenAlexPublications({ options: optionsWithRorOnly }));
}
const responses = await Promise.all(queries);
console.timeEnd(`1. Requests ${options}`);
webSocketServer.broadcast(1);
console.time(`2. Concat ${options}`);
const works = [
...responses[0],
...responses[1],
...responses[2],
];
let works = [];
if (options.rors?.length > 0) {
works = [
...responses[0],
...responses[1],
...responses[2],
];
} else {
works = [
...responses[0],
...responses[1],
];
}
console.timeEnd(`2. Concat ${options}`);
webSocketServer.broadcast(2);
console.time(`3. Dedup ${options}`);
Expand Down
14 changes: 14 additions & 0 deletions server/src/utils/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@ const cleanId = (id) => (
: null
);

const intersectArrays = (array1, array2) => {
const res = array1.filter((value) => array2.includes(value));
return res.length > 0;
};

const getAuthorOrcid = (elt) => {
const name = elt?.author?.name?.replace(',', ' ') || '';
const orcid = elt?.author?.nameIdentifiers?.filter((ident) => ident.nameIdentifierScheme === 'ORCID')[0].nameIdentifier;
const res = name.concat(' ').concat(orcid);
return res;
};

const countUniqueValues = ({ data = [], field }) => {
const map = data
.map((item) => item?.[field] ?? '')
Expand Down Expand Up @@ -133,6 +145,8 @@ const range = (startYear, endYear = new Date().getFullYear()) => {
export {
cleanId,
countUniqueValues,
getAuthorOrcid,
intersectArrays,
range,
removeDiacritics,
};
22 changes: 15 additions & 7 deletions server/src/utils/works.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { cleanId, removeDiacritics } from './utils';
import { cleanId, getAuthorOrcid, intersectArrays, removeDiacritics } from './utils';

const mergePublications = (publication1, publication2) => {
// Any publication from FOSM is prioritized among others
Expand Down Expand Up @@ -32,16 +32,18 @@ const getFosmQuery = (options, pit, searchAfter) => {
options.affiliationStrings.forEach((affiliation) => {
query.query.bool.should.push({ multi_match: { fields: affiliationsFields, query: `"${affiliation}"`, operator: 'and' } });
});
options.rors.forEach((ror) => {
query.query.bool.should.push({ match: { rors: ror } });
});
if (options.rors?.length > 0) {
options.rors.forEach((ror) => {
query.query.bool.should.push({ match: { rors: ror } });
});
}
query.query.bool.must.push({ range: { year: { gte: options.year, lte: options.year } } });
// Exclude files for Datacite
query.query.bool.must_not.push({ terms: { genre: ['file', 'version', 'file_'] } });
query.query.bool.minimum_should_match = 1;
query._source = [
'affiliations', 'authors', 'doi', 'external_ids', 'genre', 'genre_raw', 'hal_id', 'id', 'publisher',
'publisher_dissemination', 'publisher_raw', 'title', 'year',
'affiliations', 'authors', 'doi', 'external_ids', 'genre', 'genre_raw', 'hal_id', 'id', 'publisher', 'format',
'publisher_dissemination', 'publisher_raw', 'title', 'year', 'fr_reasons_concat', 'fr_publications_linked', 'fr_authors_name', 'fr_authors_orcid',
];
query.sort = ['_shard_doc'];
if (pit) {
Expand Down Expand Up @@ -93,10 +95,16 @@ const getFosmWorksByYear = async ({ results = [], options, pit, searchAfter }) =
authors: (result._source?.authors ?? []).map((author) => author.full_name),
datasource: ['fosm'],
id: cleanId(result._source?.doi ?? result._source?.hal_id ?? result._source.id),
publisher: result._source?.publisher_dissemination ?? result._source?.publisher ?? '',
publisher: result._source?.publisher_dissemination ?? result._source?.publisher ?? result._source?.publisher_raw ?? '',
title: result._source.title,
type: result._source?.genre_raw ?? result._source.genre,
year: result?._source?.year?.toString() ?? '',
format: result?._source?.format?.toString() ?? '',
fr_reasons: result?._source?.fr_reasons_concat?.toString() ?? '',
fr_publications_linked: result?._source?.fr_publications_linked?.toString() ?? '',
fr_publications_linked: result?._source?.fr_publications_linked?.filter((el) => intersectArrays(el?.rors || [], options.rors)).map((el) => el.doi).toString() ?? '',
fr_authors_name: result?._source?.fr_authors_name?.filter((el) => intersectArrays(el?.rors || [], options.rors)).map((el) => el.author.name).toString() ?? '',
fr_authors_orcid: result?._source?.fr_authors_orcid?.filter((el) => intersectArrays(el?.rors || [], options.rors)).map((el) => getAuthorOrcid(el)).toString() ?? '',
})));
if (hits.length > 0 && (Number(process.env.FOSM_MAX_SIZE) === 0 || results.length < Number(process.env.FOSM_MAX_SIZE))) {
// eslint-disable-next-line no-param-reassign
Expand Down

0 comments on commit 2ad1e63

Please sign in to comment.