-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Petab download and upload #6
Changes from 7 commits
a3b0572
7c47eb3
591f876
a9bca1a
dbcbfb7
62d5ef5
f906e75
573eb3c
c8b0566
431edfb
bcd2619
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
package life.qbic.io; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.File; | ||
import java.io.FileReader; | ||
import java.io.IOException; | ||
import java.nio.charset.Charset; | ||
import java.nio.charset.StandardCharsets; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.Paths; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import life.qbic.model.petab.PetabMetadata; | ||
|
||
public class PetabParser { | ||
|
||
private final String META_INFO_YAML = "metaInformation.yaml"; | ||
|
||
public PetabMetadata parse(String dataPath) { | ||
|
||
File directory = new File(dataPath); | ||
List<String> sourcePetabReferences = new ArrayList<>(); | ||
|
||
File yaml = findYaml(directory); | ||
Comment on lines
+24
to
+27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: Maybe having a distinct exit condition makes sense? If a yaml was provided but is empty, this currently will return a PetabMetadata object with empty metadata, which could be confusing 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently only parent references are stored in the metadata and those can be empty, which is checked in the respective command. |
||
if (yaml != null) { | ||
BufferedReader reader = null; | ||
try { | ||
reader = new BufferedReader(new FileReader(yaml)); | ||
boolean inIDBlock = false; | ||
while (true) { | ||
String line = reader.readLine(); | ||
if (line == null) { | ||
break; | ||
} | ||
if(inIDBlock && line.contains(":")) { | ||
inIDBlock = false; | ||
} | ||
if(inIDBlock) { | ||
String[] tokens = line.split("-"); | ||
if(tokens.length == 3) { | ||
String datasetCode = tokens[1].strip()+"-"+tokens[2].strip(); | ||
sourcePetabReferences.add(datasetCode); | ||
} | ||
} | ||
if (line.contains("openBISSourceIds:")) { | ||
inIDBlock = true; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this section would benefit greatly from a small refactor into distinct methods that outline why the steps are taken for each if clause. Additionally these methods can more easily specify what happens if the input is malformed (token length != 3, why is the inIdBlock set to true even though it could be previously set to false etc.) |
||
} | ||
reader.close(); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
return new PetabMetadata(sourcePetabReferences); | ||
} | ||
|
||
public void addDatasetId(String outputPath, String datasetCode) throws IOException { | ||
|
||
Path path = Paths.get(Objects.requireNonNull(findYaml(new File(outputPath))).getPath()); | ||
Charset charset = StandardCharsets.UTF_8; | ||
|
||
String idInLine = "openBISId:(.*)?(\\r\\n|[\\r\\n])"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you reuse the |
||
|
||
String content = Files.readString(path, charset); | ||
content = content.replaceAll(idInLine, "openBISId: "+datasetCode+"\n"); | ||
Files.write(path, content.getBytes(charset)); | ||
} | ||
|
||
private File findYaml(File directory) { | ||
for (File file : Objects.requireNonNull(directory.listFiles())) { | ||
if (file.isFile() && file.getName().equals(META_INFO_YAML)) { | ||
return file; | ||
} | ||
if (file.isDirectory()) { | ||
return findYaml(file); | ||
} | ||
} | ||
System.out.println(META_INFO_YAML + " not found"); | ||
return null; | ||
Comment on lines
+93
to
+94
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: This currently does not catch the case on what happens if the user provides multiple "metaInformation.yaml" by mistake. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good point, I think the most top level yaml with that name should be considered the correct one, as it is now |
||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,8 +8,8 @@ | |
|
||
// main command with format specifiers for the usage help message | ||
@Command(name = "openbis-scripts", | ||
subcommands = { SampleHierarchyCommand.class, FindDatasetsCommand.class, | ||
UploadDatasetCommand.class, SpaceStatisticsCommand.class }, | ||
subcommands = { SampleHierarchyCommand.class, FindDatasetsCommand.class, DownloadPetabCommand.class, | ||
UploadPetabResultCommand.class, UploadDatasetCommand.class, SpaceStatisticsCommand.class }, | ||
description = "A client software for querying openBIS.", | ||
Comment on lines
10
to
13
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to give this command a more specific naming scheme? |
||
mixinStandardHelpOptions = true, versionProvider = ManifestVersionProvider.class) | ||
public class CommandLineOptions { | ||
|
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package life.qbic.io.commandline; | ||
|
||
import ch.ethz.sis.openbis.generic.OpenBIS; | ||
import ch.ethz.sis.openbis.generic.asapi.v3.dto.dataset.DataSet; | ||
import java.io.IOException; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Optional; | ||
import life.qbic.App; | ||
import life.qbic.io.PetabParser; | ||
import life.qbic.model.DatasetWithProperties; | ||
import life.qbic.model.download.OpenbisConnector; | ||
import picocli.CommandLine.Command; | ||
import picocli.CommandLine.Mixin; | ||
import picocli.CommandLine.Parameters; | ||
|
||
@Command(name = "download-petab", | ||
description = "Downloads PEtab dataset and stores some additional information from openbis in the petab.yaml") | ||
public class DownloadPetabCommand implements Runnable { | ||
|
||
@Parameters(arity = "1", paramLabel = "dataset id", description = "The code of the dataset to download. Can be found via list-data.") | ||
private String datasetCode; | ||
@Parameters(arity = "1", paramLabel = "download path", description = "The local path where to store the downloaded data") | ||
private String outputPath; | ||
@Mixin | ||
AuthenticationOptions auth = new AuthenticationOptions(); | ||
|
||
@Override | ||
public void run() { | ||
OpenBIS authentication = App.loginToOpenBIS(auth.getPassword(), auth.getUser(), auth.getAS(), auth.getDSS()); | ||
OpenbisConnector openbis = new OpenbisConnector(authentication); | ||
|
||
List<DataSet> datasets = openbis.findDataSets(Collections.singletonList(datasetCode)); | ||
|
||
if(datasets.isEmpty()) { | ||
System.out.println(datasetCode+" not found"); | ||
return; | ||
} | ||
DatasetWithProperties result = new DatasetWithProperties(datasets.get(0)); | ||
Optional<String> patientID = openbis.findPropertyInSampleHierarchy("PATIENT_DKFZ_ID", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be assumed as always present? Maybe it makes sense to move this into a constant at the top of the class? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is currently not used, we need more feedback first |
||
result.getExperiment().getIdentifier()); | ||
patientID.ifPresent(s -> result.addProperty("patientID", s)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If multiple properties will be set a distinct mapping could also make sense down the line (via an Enum e.g.) |
||
|
||
System.out.println("Found dataset, downloading."); | ||
System.out.println(); | ||
|
||
openbis.downloadDataset(outputPath, datasetCode); | ||
|
||
PetabParser parser = new PetabParser(); | ||
try { | ||
System.out.println("Adding dataset identifier to metaInformation.yaml."); | ||
parser.addDatasetId(outputPath, datasetCode); | ||
} catch (IOException e) { | ||
System.out.println("Could not add dataset identifier."); | ||
throw new RuntimeException(e); | ||
} | ||
System.out.println("Done"); | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,79 @@ | ||||||
package life.qbic.io.commandline; | ||||||
|
||||||
import ch.ethz.sis.openbis.generic.OpenBIS; | ||||||
import ch.ethz.sis.openbis.generic.asapi.v3.dto.dataset.id.DataSetPermId; | ||||||
import java.io.File; | ||||||
import java.nio.file.Path; | ||||||
import java.util.ArrayList; | ||||||
import java.util.List; | ||||||
import life.qbic.App; | ||||||
import life.qbic.io.PetabParser; | ||||||
import life.qbic.model.download.OpenbisConnector; | ||||||
import picocli.CommandLine.Command; | ||||||
import picocli.CommandLine.Mixin; | ||||||
import picocli.CommandLine.Option; | ||||||
import picocli.CommandLine.Parameters; | ||||||
|
||||||
@Command(name = "upload-petab-result", | ||||||
description = "uploads a petab based on other PETab downloaded from openbis and attaches it to a provided experiment and any datasets referenced in the PETab metadata.") | ||||||
public class UploadPetabResultCommand implements Runnable { | ||||||
|
||||||
@Parameters(arity = "1", paramLabel = "file/folder", description = "The path to the file or folder to upload") | ||||||
private String dataPath; | ||||||
@Parameters(arity = "1", paramLabel = "experiment ID", description = "The full identifier of the experiment the data should be attached to. " | ||||||
+ "The identifier must be of the format: /space/project/experiment") | ||||||
private String experimentID; | ||||||
//@Option(arity = "1..*", paramLabel = "<parent_datasets>", description = "Optional list of dataset codes to act" | ||||||
// + " as parents for the upload. E.g. when this dataset has been generated using these datasets as input.", names = {"-pa", "--parents"}) | ||||||
Comment on lines
+25
to
+26
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why was this commented out? Can this be removed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the ids are now provided via the yaml file, but there might be feedback that this option is better |
||||||
private List<String> parents = new ArrayList<>(); | ||||||
@Mixin | ||||||
AuthenticationOptions auth = new AuthenticationOptions(); | ||||||
|
||||||
private OpenbisConnector openbis; | ||||||
private PetabParser petabParser = new PetabParser(); | ||||||
|
||||||
@Override | ||||||
public void run() { | ||||||
OpenBIS authentication = App.loginToOpenBIS(auth.getPassword(), auth.getUser(), auth.getAS(), auth.getDSS()); | ||||||
openbis = new OpenbisConnector(authentication); | ||||||
|
||||||
if(!pathValid(dataPath)) { | ||||||
System.out.printf("Path %s could not be found%n", dataPath); | ||||||
return; | ||||||
} | ||||||
if(!experimentExists(experimentID)) { | ||||||
System.out.printf("Experiment %s could not be found%n", experimentID); | ||||||
return; | ||||||
} | ||||||
System.out.println("Looking for reference datasets in metaInformation.yaml..."); | ||||||
parents = petabParser.parse(dataPath).getSourcePetabReferences(); | ||||||
if(parents.isEmpty()) { | ||||||
System.out.println("No reference datasets found. Did you set the openBISSourceIds property?"); | ||||||
} else { | ||||||
System.out.println("Found reference ids: "+String.join(", ",parents)); | ||||||
} | ||||||
if(!datasetsExist(parents)) { | ||||||
System.out.printf("One or more datasets %s could not be found%n", parents); | ||||||
return; | ||||||
} | ||||||
System.out.println(); | ||||||
System.out.println("Reference datasets found, uploading dataset..."); | ||||||
System.out.println(); | ||||||
//TODO copy and remove source references here | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You still have stuff "ToDo" ;)
Suggested change
|
||||||
DataSetPermId result = openbis.registerDataset(Path.of(dataPath), experimentID, parents); | ||||||
System.out.printf("Dataset %s was successfully created%n", result.getPermId()); | ||||||
} | ||||||
|
||||||
private boolean datasetsExist(List<String> datasetCodes) { | ||||||
return openbis.findDataSets(datasetCodes).size() == datasetCodes.size(); | ||||||
} | ||||||
|
||||||
private boolean experimentExists(String experimentID) { | ||||||
return openbis.experimentExists(experimentID); | ||||||
} | ||||||
|
||||||
private boolean pathValid(String dataPath) { | ||||||
return new File(dataPath).exists(); | ||||||
} | ||||||
|
||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
package life.qbic.model.petab; | ||
|
||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
import java.util.List; | ||
|
||
public class Arguments { | ||
@JsonProperty | ||
List<String> housekeeperObservableIds; | ||
|
||
@Override | ||
public String toString() { | ||
return "Arguments{" + | ||
"housekeeperObservableIds=" + housekeeperObservableIds + | ||
'}'; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package life.qbic.model.petab; | ||
|
||
|
||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
|
||
public class CellCountInfo { | ||
@JsonProperty | ||
double seeded; | ||
@JsonProperty | ||
String ncellsCount; | ||
@JsonProperty | ||
String unit; | ||
|
||
@Override | ||
public String toString() { | ||
return "CellCountInfo{" + | ||
"seeded=" + seeded + | ||
", ncellsCount='" + ncellsCount + '\'' + | ||
", unit='" + unit + '\'' + | ||
'}'; | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package life.qbic.model.petab; | ||
|
||
|
||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
|
||
public class ConditionWithUnit { | ||
@JsonProperty | ||
String name; | ||
@JsonProperty | ||
String unit; | ||
|
||
public ConditionWithUnit() {} | ||
|
||
public ConditionWithUnit(String name, String unit) { | ||
this.name = name; | ||
this.unit = unit; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "ConditionWithUnit{" + | ||
"name='" + name + '\'' + | ||
", unit='" + unit + '\'' + | ||
'}'; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MAJOR NIT: Could this be provided case insensitive? I forsee that this file will be provides as metainformation.yaml instead of metaInformation.yaml
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good point, I will compare case insensitive