Skip to content

Commit

Permalink
feat(query): Option to query only datasets, close #48
Browse files Browse the repository at this point in the history
  • Loading branch information
annelhote committed Dec 26, 2023
1 parent a870f85 commit 08124ab
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 22 deletions.
2 changes: 1 addition & 1 deletion client/src/pages/affiliationsTab.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export default function AffiliationsTab({ affiliations, selectedAffiliations, se
<Row gutters>
<Col n="2">
<TextInput
label="Search affiliations on name"
label="Filter affiliations on name"
onChange={(e) => setFilteredAffiliationName(e.target.value)}
value={filteredAffiliationName}
/>
Expand Down
24 changes: 20 additions & 4 deletions client/src/pages/filters.jsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import {
Button,
Checkbox,
CheckboxGroup,
Col,
Row,
Select,
Expand All @@ -23,12 +25,14 @@ export default function Filters({ sendQuery }) {
if (searchParams.size === 0) {
setSearchParams({
affiliations: [],
datasets: false,
endYear: '2021',
startYear: '2021',
});
} else {
setCurrentSearchParams({
affiliations: searchParams.getAll('affiliations'),
datasets: searchParams.get('datasets') === 'true',
endYear: searchParams.get('endYear'),
startYear: searchParams.get('startYear'),
});
Expand Down Expand Up @@ -56,15 +60,15 @@ export default function Filters({ sendQuery }) {
<Col n="5">
<TagInput
hint="Press ENTER to search for several terms / expressions. If several, an OR operator is used."
label="Affiliation raw name"
label="Affiliation name"
message={message}
messageType={messageType}
onTagsChange={(affiliations) => setSearchParams({ ...currentSearchParams, affiliations })}
tags={currentSearchParams.affiliations}
onInputHandler={setOnInputAffiliationsHandler}
/>
</Col>
<Col n="2">
<Col n="1">
<Select
hint="&nbsp;"
label="Start year"
Expand All @@ -73,7 +77,7 @@ export default function Filters({ sendQuery }) {
onChange={(e) => setSearchParams({ ...currentSearchParams, startYear: e.target.value })}
/>
</Col>
<Col n="2">
<Col n="1">
<Select
hint="&nbsp;"
label="End year"
Expand All @@ -82,7 +86,19 @@ export default function Filters({ sendQuery }) {
onChange={(e) => setSearchParams({ ...currentSearchParams, endYear: e.target.value })}
/>
</Col>
<Col>
<Col n="2">
<CheckboxGroup
hint="&nbsp;"
legend="&nbsp;"
>
<Checkbox
label="Search for datasets only"
checked={currentSearchParams.datasets}
onChange={(e) => setSearchParams({ ...currentSearchParams, datasets: e.target.checked })}
/>
</CheckboxGroup>
</Col>
<Col n="2">
<Button
icon="ri-search-line"
onClick={checkAndSendQuery}
Expand Down
2 changes: 1 addition & 1 deletion client/src/pages/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ export default function Home() {
{isFetching && (
<PageSpinner />
)}
{!isFetching && allPublications.length > 0 && (
{!isFetching && (allAffiliations.length > 0 || allDatasets.length > 0 || allPublications.length > 0) && (
<Tabs defaultActiveTab={0}>
<Tab label="Grouped affiliations of works">
<AffiliationsTab
Expand Down
29 changes: 13 additions & 16 deletions server/src/routes/works.routes.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ router.route('/works')
if (!options?.affiliations) {
res.status(400).json({ message: 'You must provide at least one affiliation.' });
} else {
options.affiliations = options.affiliations.split(',');
console.time(`0. Requests ${options.affiliations}`);
options.affiliations = options.affiliations.split(',');
options.datasets = options.datasets === 'true';
options.years = range(options.startYear, options.endYear);
const responses = await Promise.all([
getFosmWorks({ options }),
Expand All @@ -37,22 +38,18 @@ router.route('/works')
// Sort between publications and datasets
console.time(`4. Sort works ${options.affiliations}`);
const publications = [];
const datasets = [];
let datasets = [];
const deduplicatedWorksLength = deduplicatedWorks.length;
for (let i = 0; i < deduplicatedWorksLength; i += 1) {
const deduplicatedWork = deduplicatedWorks[i];
if (
(deduplicatedWork.datasource.includes('fosm') && deduplicatedWork.type !== 'dataset')
|| (deduplicatedWork.datasource.includes('openalex') && deduplicatedWork.type !== 'dataset')
) {
publications.push(deduplicatedWork);
} else if (
(deduplicatedWork.datasource.includes('fosm') && deduplicatedWork.type === 'dataset')
|| (deduplicatedWork.datasource.includes('openalex') && deduplicatedWork.type === 'dataset')
) {
datasets.push(deduplicatedWork);
} else {
console.error(`Work not sorted : ${JSON.stringify(deduplicatedWork)}`);
if (options.datasets) {
datasets = deduplicatedWorks;
} else {
for (let i = 0; i < deduplicatedWorksLength; i += 1) {
const deduplicatedWork = deduplicatedWorks[i];
if (deduplicatedWork.type !== 'dataset') {
publications.push(deduplicatedWork);
} else {
datasets.push(deduplicatedWork);
}
}
}
console.timeEnd(`4. Sort works ${options.affiliations}`);
Expand Down
7 changes: 7 additions & 0 deletions server/src/utils/works.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const getFosmQuery = (options, pit, searchAfter) => {
query.query.bool.should.push({ multi_match: { fields: affiliationsFields, query: `"${affiliation}"`, operator: 'and' } });
});
query.query.bool.must.push({ range: { year: { gte: options.year, lte: options.year } } });
// Exclude files for Datacite
query.query.bool.must_not.push({ term: { genre: 'file' } });
query.query.bool.minimum_should_match = 1;
query._source = ['affiliations', 'authors', 'doi', 'external_ids', 'genre', 'genre_raw', 'hal_id', 'id', 'journal_name', 'title', 'year'];
Expand All @@ -44,6 +45,9 @@ const getFosmQuery = (options, pit, searchAfter) => {
query.search_after = searchAfter;
query.track_total_hits = false;
}
if (options.datasets) {
query.query.bool.must.push({ term: { genre_raw: 'dataset' } });
}
return query;
};

Expand Down Expand Up @@ -165,6 +169,9 @@ const getOpenAlexPublicationsByYear = (options, cursor = '*', previousResponse =
if (options.affiliations.length) {
url += `,raw_affiliation_string.search:(${options.affiliations.map((aff) => `"${aff}"`).join(' OR ')})`;
}
if (options.datasets) {
url += ',type:dataset';
}
if (process?.env?.OPENALEX_KEY) {
url += `&api_key=${process.env.OPENALEX_KEY}`;
} else {
Expand Down

0 comments on commit 08124ab

Please sign in to comment.