-
Notifications
You must be signed in to change notification settings - Fork 48
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
List datasets + files from core + staging sources #700
Changes from 11 commits
a3d863f
94c100e
07c0796
d48d0e6
c547a0c
02fe52f
eed3a2d
f13a7cd
99c2c83
8cb84fe
8940561
cac3780
a00b8e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import { CollectedSources } from '../../sources/index.js'; | ||
import { splitPrefixIntoParts } from '../../utils/prefix.js'; | ||
import { BadRequest } from '../../httpErrors.js'; | ||
|
||
/** | ||
* Handler for /charon/getAvailable/V2 requests | ||
* | ||
* WIP: The getAvailable API design should be flexible enough to access | ||
* resources across the nextstrain ecosystem. This is a more flexible | ||
* definition of resource than our Resource class as this may include | ||
* intermediate files (sequences, alignments, metadata) which are not | ||
* fetched by Auspice. | ||
* | ||
* The entire set of these resources is large, and this API hasn't been | ||
* tested for performance. Currently the set is held in memory and | ||
* filtering operations are performed for each request. | ||
Comment on lines
+14
to
+16
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For prototyping this is fine, for sure, but I think we should consider other implementations here (SQLite) before we go much further. |
||
* | ||
* Note: we can set properties in layers of middleware, or in this handler | ||
* since all of these properties are _only_ used by this handler | ||
* it makes sense to place them here. If they become more widespread | ||
* then it's easy to lift them to middleware and expose the data on context. | ||
* | ||
* The response is always JSON, which our frontend(s) will parse and display. | ||
* The Accept header is not considered, however we could expand this to use | ||
* `contentTypesProvided` and `contentTypesConsumed` to behave return different | ||
* formats accordingly. | ||
* | ||
*/ | ||
const getAvailableV2 = async (req, res) => { | ||
|
||
const filterOptions = parseFilterOptions(req); | ||
const availableResources = (new CollectedSources(req.user)) | ||
.resources(filterOptions) | ||
|
||
const available = Object.fromEntries( | ||
Array.from(availableResources) | ||
.map(([resourceType, resources]) => | ||
[resourceType, resources.map((resource) => { | ||
const data = { | ||
name: resource.name, | ||
url: resource.nextstrainUrl(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what |
||
lastUpdated: resource.lastUpdated(), | ||
} | ||
if (filterOptions.showVersions) { | ||
data.versions = resource.versions.map((v) => | ||
[resource.lastUpdated(v), resource.nextstrainUrl(v)]) | ||
} | ||
return data; | ||
})]) | ||
) | ||
|
||
const WARNING = "This API is currently for internal use only. The request and response schemas will change." | ||
res.json({WARNING, ...available}); | ||
} | ||
|
||
export { | ||
getAvailableV2, | ||
}; | ||
|
||
|
||
const ALLOWED_TYPES = ['all', 'dataset', 'narrative', 'file']; | ||
|
||
function parseFilterOptions(req) { | ||
/** Currently we only consider a few filtering options, see the comment in `CollectedSources()` | ||
* for more details on the eventual aims here. | ||
* The current (WIP!) design encodes prefix as a query param, but if we decide that most requests | ||
* will only specify this option (e.g. sensible defaults are chosen / inferred for other options) | ||
* then it's perhaps more natural to encode this in the API path itself | ||
*/ | ||
const {source, prefixParts, isNarrative} = req.query?.prefix?.toString() ? | ||
splitPrefixIntoParts(req.query.prefix) : | ||
{source: false, prefixParts: [], isNarrative: undefined}; | ||
|
||
/* A single empty string is valid, but isn't desirable for filtering purposes */ | ||
if (prefixParts.length===1 && prefixParts[0]==='') prefixParts.pop(); | ||
|
||
const showVersions = req.query?.versions!==undefined; // test for presence of query parameter | ||
|
||
let resourceType = req.query?.type || 'all'; | ||
if (!ALLOWED_TYPES.includes(resourceType)) { | ||
throw new BadRequest("Specified resource type is not allowed"); | ||
} | ||
if (resourceType==='all' && isNarrative) { | ||
resourceType='narrative'; | ||
} | ||
if (isNarrative && resourceType!=='narrative') { | ||
throw new BadRequest("Contradiction in requested resource types between prefix + specified type") | ||
} | ||
|
||
return { | ||
source, | ||
showVersions, | ||
prefixParts, | ||
resourceType | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,7 +60,7 @@ const Groups = (() => { | |
updateKnownDatasetsAndNarratives, | ||
1000 * 60 * 60 * 6 // 6 hours | ||
); | ||
updateKnownDatasetsAndNarratives(); // initial collection on server start | ||
// updateKnownDatasetsAndNarratives(); // initial collection on server start | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the need for this temporary |
||
|
||
class MetaGroupSource { | ||
constructor(user) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/** | ||
* CollectedResources represents a set of resources linked by being conceptually | ||
* "the same". Sameness often means all available versions of the same filename | ||
* but could also encompass different filenames, e.g. X and X_YYYY-MM-DD would | ||
* be grouped together. | ||
* | ||
* The representation of resources themselves (`objects`) varies, but current implementations | ||
* use plain objects or Maps. Methods to interact with the objects are defined by a | ||
* subclass of `CollectedResources` which allows this flexibility in underlying data | ||
* structure. | ||
* | ||
* There is a strong conceptual similarity between objects and Resource classes. | ||
* Fundamentally Resource (sub-)classes are designed to map a nextstrain.org URL | ||
* to the URLs which represent the associated files, however here we are going the other | ||
* way: we want to map _files_ to their nextstrain.org URLs, where available. | ||
* This includes concepts which Resources can't yet represent because they don't | ||
* (yet) have associated nextstrain.org URLs -- concepts such as S3 object version. | ||
* (GitHub-sourced files have similar issues where the branch (or commit) info is | ||
* encoded in the Source, not the Resource.) | ||
* While we can monkeypatch the instances, I think it's better to extend the Resource | ||
* (sub-)classes if we want to go in that direction. | ||
* | ||
* Note that our RBAC authorization for resources currently uses ae policy from the | ||
* underlying source + tags from the appropriate Resource instance; with the tags coming | ||
* from the underling source + the resource type (currently Narrative | Dataset). | ||
* Currently, if a user is allowed to access the source then they are allowed read access | ||
* to all assets, so we don't perform any authorization at the resource level here. | ||
* If this changes, or we want to do it for completness, we could easily extend the | ||
* `authorized` function to set a policy for CollectedResources (via the associated source) | ||
* and define `authzTags` for CollectedResources as applicable. | ||
*/ | ||
class CollectedResources { | ||
constructor(source, objects) { | ||
/** | ||
* Each entry in resources must define at least 'key', 'lastUpdated' and 'type' | ||
* All other keys are implementation dependent as they will be parsed by the | ||
* `nextstrainUrl` method on the respective source. | ||
*/ | ||
this._source = source; | ||
[this._name, this._resourceType] = this.constructor.objectName(objects[0]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Basing the properties of a collection on its first element is a common source of bugs (e.g. when that first element is non-representative of the collection as a whole). It would be better to be explicit. |
||
this._versions = objects | ||
this._versions.sort((o) => this.lastUpdated(o)) | ||
} | ||
|
||
get name() {return this._name} | ||
|
||
get versions() {return this._versions;} | ||
|
||
get prefixParts() { | ||
/* potentially a generalised getter could be implemented here rather than via subclasses. | ||
The Resource class implements similar methods (see docstring above). */ | ||
throw new Error("get prefixParts() must be implemented by a subclass") | ||
} | ||
|
||
static objectName(object) { // eslint-disable-line no-unused-vars | ||
throw new Error("static objectName(object) must be implemented by a subclass") | ||
} | ||
|
||
lastUpdated(version=this._versions[0]) { // eslint-disable-line no-unused-vars | ||
throw new Error("lastUpdated(object) must be implemented by a subclass") | ||
} | ||
|
||
/** | ||
* Filter currently returns a boolean, but this may be expanded upon once filtering | ||
* is actually implemented | ||
*/ | ||
filter() { | ||
throw new Error("filter(args) must be implemented by a subclass") | ||
} | ||
|
||
nextstrainUrl(version=this._versions[0]) { // eslint-disable-line no-unused-vars | ||
throw new Error("nextstrainUrl(object) must be implemented by a subclass") | ||
} | ||
} | ||
|
||
export { | ||
CollectedResources | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this endpoint part of the Charon API, which is tightly coupled with Auspice, instead of elsewhere?