Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration of CDK ExhaustiveFragmenter #133

Open
wants to merge 11 commits into
base: production
Choose a base branch
from
Open
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import de.unijena.cheminf.mortar.message.Message;
import de.unijena.cheminf.mortar.model.data.FragmentDataModel;
import de.unijena.cheminf.mortar.model.data.MoleculeDataModel;
import de.unijena.cheminf.mortar.model.fragmentation.algorithm.CDKExhaustiveFragmenter;
import de.unijena.cheminf.mortar.model.fragmentation.algorithm.ErtlFunctionalGroupsFinderFragmenter;
import de.unijena.cheminf.mortar.model.fragmentation.algorithm.IMoleculeFragmenter;
import de.unijena.cheminf.mortar.model.fragmentation.algorithm.ScaffoldGeneratorFragmenter;
Expand Down Expand Up @@ -187,6 +188,11 @@ public class FragmentationService {
*/
private final IMoleculeFragmenter scaffoldGF;
//
/**
* Exhaustive fragmenter
*/
private final IMoleculeFragmenter cdkEF;
//
/**
* SettingsContainer to hold settings.
*/
Expand All @@ -206,13 +212,15 @@ public class FragmentationService {
*/
public FragmentationService(SettingsContainer aSettingsContainer) {
//Note: Every fragmenter class should only be added once to the array or there will be problems with setting persistence!
this.fragmenters = new IMoleculeFragmenter[3];
this.fragmenters = new IMoleculeFragmenter[4];
this.ertlFGF = new ErtlFunctionalGroupsFinderFragmenter();
this.fragmenters[0] = this.ertlFGF;
this.sugarRUF = new SugarRemovalUtilityFragmenter();
this.fragmenters[1] = this.sugarRUF;
this.scaffoldGF = new ScaffoldGeneratorFragmenter();
this.fragmenters[2] = this.scaffoldGF;
this.cdkEF = new CDKExhaustiveFragmenter();
this.fragmenters[3] = this.cdkEF;
//
Objects.requireNonNull(aSettingsContainer, "aSettingsContainer must not be null");
this.settingsContainer = aSettingsContainer;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
/*
* MORTAR - MOlecule fRagmenTAtion fRamework
* Copyright (C) 2024 Felix Baensch, Jonas Schaub ([email protected], [email protected])
*
* Source code is available at <https://github.com/FelixBaensch/MORTAR>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package de.unijena.cheminf.mortar.model.fragmentation.algorithm;

import de.unijena.cheminf.mortar.gui.util.GuiUtil;
import de.unijena.cheminf.mortar.message.Message;
import de.unijena.cheminf.mortar.model.io.Importer;
import de.unijena.cheminf.mortar.model.util.BasicDefinitions;
import de.unijena.cheminf.mortar.model.util.SimpleIDisplayEnumConstantProperty;

import javafx.beans.property.Property;
import javafx.beans.property.SimpleIntegerProperty;

import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.fragment.ExhaustiveFragmenter;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.smiles.SmilesParser;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Wrapper class that makes the
* <a href="https://cdk.github.io/cdk/latest/docs/api/org/openscience/cdk/fragment/ExhaustiveFragmenter.html">
* exhaustive fragmentation
* </a>
* from the CDK, available for MORTAR.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary comma

*
* @author Tom Weiß
* @version 1.0.0.0
*/
public class CDKExhaustiveFragmenter implements IMoleculeFragmenter {
//<editor-fold desc="Private static final variables">
/**
* The default value for the minimum fragment size used for the fragmentation.
*/
private static final int DEFAULT_MINIMUM_FRAGMENT_SIZE = 6;
//
/**
* The name of the algorithm used for fragmentation.
*/
private static final String ALGORITHM_NAME = "Exhaustive Fragmenter";
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two should be public.

//</editor-fold>
//
//<editor-fold desc="Private final variables">
/**
* The minimum size of the returned fragments. This size consists of all atoms, that are connected by more than
* a single bond or have more than one single bond.
*/
private final SimpleIntegerProperty minimumFragmentSize;
//
/**
* All settings of this fragmenter, encapsulated in JavaFX properties for binding in GUI.
*/
private final List<Property<?>> settings;
//
/**
* Map to store pairs of {@literal <setting name, tooltip text>}.
*/
private final HashMap<String, String> settingNameTooltipTextMap;
//
/**
* Map to store pairs of {@literal <setting name, display name>}.
*/
private final HashMap<String, String> settingNameDisplayNameMap;
//
/**
* Instance of ExhaustiveFragmenter class to fragment a molecule.
*/
private final ExhaustiveFragmenter cdkEFInstance;
//
/**
* Logger of this class.
*/
private static final Logger LOGGER = Logger.getLogger(CDKExhaustiveFragmenter.class.getName());
//</editor-fold>
//
//<editor-fold desc="Constructor">
/**
* Constructor, all settings are initialised with their default values as declared in the respective public constants.
*/
public CDKExhaustiveFragmenter() {
int tmpNumberOfSettings = 2;
this.settingNameTooltipTextMap = new HashMap<>(tmpNumberOfSettings,
BasicDefinitions.DEFAULT_HASH_COLLECTION_LOAD_FACTOR);
this.settingNameDisplayNameMap = new HashMap<>(tmpNumberOfSettings,
BasicDefinitions.DEFAULT_HASH_COLLECTION_LOAD_FACTOR);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use CollectionUtil.calculateInitialHashCollectionCapacity() or HashMap.newHashMap().

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see now that you only have one setting but are initialising the maps with a capacity of 2, ok. But for the future, please use the ways I indicated.

this.cdkEFInstance = new ExhaustiveFragmenter();
this.minimumFragmentSize = new SimpleIntegerProperty(this,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put -Setting at the end of this variable or -Preference. Otherwise, one might expect an integer here.

"Minimum Size for the returned fragments",
DEFAULT_MINIMUM_FRAGMENT_SIZE) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just as instance variables are indicated by the this statement, static variables are indicated by putting the class name upfront. So "CDKExhaustiveFragementer.DEFAULT_MINIMUM_FRAGMENT_SIZE".

@Override
public void set(int newValue) {
if (newValue > 0) {
try {
//throws IllegalArgumentException
CDKExhaustiveFragmenter.this.cdkEFInstance.setMinimumFragmentSize(newValue);
} catch (IllegalArgumentException anException) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the ExhaustiveFragmenter method throws that exception, so you don't need try-catch here.

CDKExhaustiveFragmenter.LOGGER.log(Level.WARNING, anException.toString(), anException);
GuiUtil.guiExceptionAlert(Message.get("Fragmenter.IllegalSettingValue.Title"),
Message.get("Fragmenter.IllegalSettingValue.Header"),
anException.toString(),
anException);
//re-throws the exception to properly reset the binding
throw anException;
}
super.set(newValue);
}
else {
IllegalArgumentException anException = new IllegalArgumentException("The minimum fragment size can not be zero");
CDKExhaustiveFragmenter.LOGGER.log(Level.WARNING, anException.toString(), anException);
GuiUtil.guiExceptionAlert(Message.get("Fragmenter.IllegalSettingValue.Title"),
Message.get("Fragmenter.IllegalSettingValue.Header"),
anException.toString(),
anException);
//re-throws the exception to properly reset the binding
throw anException;
}
}
};
this.settingNameTooltipTextMap.put(this.minimumFragmentSize.getName(),
Message.get("CDKExhaustiveFragmenter.minFragmentSize.tooltip"));
this.settingNameDisplayNameMap.put(this.minimumFragmentSize.getName(),
Message.get("CDKExhaustiveFragmenter.minFragmentSize.displayName"));
this.settings = new ArrayList<>(tmpNumberOfSettings);
this.settings.add(this.minimumFragmentSize);
}
//</editor-fold>
//
//<editor-fold desc="Public properties get">
/**
* Returns the minimum fragment size currently set.
*
* @return the currently set minimum fragment size.
*/
public int getMinimumFragmentSize() {
return this.minimumFragmentSize.get();
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a getter for the property also.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And a setter is missing.

}
//</editor-fold>
//
//<editor-fold desc="IMoleculeFragmenter methods">
@Override
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add an empty line, the code fold is not working properly, otherwise (because of the @OverRide below it)

public List<Property<?>> settingsProperties() {
return this.settings;
}

@Override
public Map<String, String> getSettingNameToTooltipTextMap() {
return this.settingNameTooltipTextMap;
}

@Override
public Map<String, String> getSettingNameToDisplayNameMap() {
return this.settingNameDisplayNameMap;
}

@Override
public String getFragmentationAlgorithmName() {
return CDKExhaustiveFragmenter.ALGORITHM_NAME;
}

@Override
public String getFragmentationAlgorithmDisplayName() {
return Message.get("CDKExhaustiveFragmenter.displayName");
}

@Override
public FragmentSaturationOption getFragmentSaturationSetting() {
//TODO: there is currently no possibility to implement saturation settings for the exhaustive fragmenter.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you describe this in more detail here like you did below?
And the way to do it here would be to throw an UnsupportedOperationException in these three methods. But please test whether this impacts running MORTAR. Since you do not add the property to the list of settings, we should be fine.

return null;
}

@Override
public SimpleIDisplayEnumConstantProperty fragmentSaturationSettingProperty() {
//TODO: there is currently no possibility to implement saturation settings for the exhaustive fragmenter.
return null;
}

@Override
public void setFragmentSaturationSetting(FragmentSaturationOption anOption) throws NullPointerException {
//TODO: there is currently no possibility to implement saturation settings for the exhaustive fragmenter.
}

@Override
public IMoleculeFragmenter copy() {
CDKExhaustiveFragmenter tmpCopy = new CDKExhaustiveFragmenter();
tmpCopy.minimumFragmentSize.set(this.minimumFragmentSize.get());
return tmpCopy;
}

@Override
public void restoreDefaultSettings() {
this.minimumFragmentSize.set(CDKExhaustiveFragmenter.DEFAULT_MINIMUM_FRAGMENT_SIZE);
}

@Override
public List<IAtomContainer> fragmentMolecule(IAtomContainer aMolecule) throws NullPointerException, IllegalArgumentException, CloneNotSupportedException {
//<editor-fold desc="Parameter tests">
Objects.requireNonNull(aMolecule, "Given molecule is null.");
boolean tmpCanBeFragmented = this.canBeFragmented(aMolecule);
if (!tmpCanBeFragmented) {
throw new IllegalArgumentException("Given molecule cannot be fragmented but should be filtered or preprocessed first.");
}
//</editor-fold>
IAtomContainer tmpMoleculeClone = aMolecule.clone();
List<IAtomContainer> tmpFragments = new ArrayList<>(0);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think of a more suiting default initial size, like maybe the number of bonds in the molecule times two?

try {
SmilesParser tmpSmilesParser = new SmilesParser(DefaultChemObjectBuilder.getInstance());
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use SilentChemObjectBuilder

this.cdkEFInstance.generateFragments(tmpMoleculeClone);
// there is also an option to extract atom containers directly with getFragmentsAsContainers but this oversaturates
// fragments described in this issue https://github.com/cdk/cdk/issues/1119.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good that you put this here but please flag it with TODO:

List<String> tmpSmiles = new ArrayList<>(List.of(this.cdkEFInstance.getFragments()));
for (String smile : tmpSmiles) {
tmpFragments.add(tmpSmilesParser.parseSmiles(smile));
}

} catch (Exception anException) {
throw new IllegalArgumentException("An error occurred during fragmentation: " + anException.toString() + " Molecule Name: " + aMolecule.getProperty(Importer.MOLECULE_NAME_PROPERTY_KEY));
}
return tmpFragments;
}

@Override
public boolean shouldBeFiltered(IAtomContainer aMolecule) {
return (Objects.isNull(aMolecule) || aMolecule.isEmpty());
}

@Override
public boolean shouldBePreprocessed(IAtomContainer aMolecule) throws NullPointerException {
Objects.requireNonNull(aMolecule, "Given molecule is null.");
return false;
}

@Override
public boolean canBeFragmented(IAtomContainer aMolecule) throws NullPointerException {
Objects.requireNonNull(aMolecule, "Given molecule is null.");
boolean tmpShouldBeFiltered = this.shouldBeFiltered(aMolecule);
boolean tmpShouldBePreprocessed = this.shouldBePreprocessed(aMolecule);
return !(tmpShouldBeFiltered || tmpShouldBePreprocessed);
}

@Override
public IAtomContainer applyPreprocessing(IAtomContainer aMolecule) throws NullPointerException, IllegalArgumentException, CloneNotSupportedException {
Objects.requireNonNull(aMolecule, "Given molecule is null.");
boolean tmpShouldBeFiltered = this.shouldBeFiltered(aMolecule);
if (tmpShouldBeFiltered) {
throw new IllegalArgumentException("The given molecule cannot be preprocessed but should be filtered.");
}
if (!this.shouldBePreprocessed(aMolecule)) {
return aMolecule.clone();
}
return aMolecule.clone();
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The if statement is unnecessary. I guess you wanted to leave room between the if statement and the return statement for actual preprocessing should you need to add it later? If yes, please add a comment, at least. Or simply remove the if.

}
//</editor-fold>
}
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,10 @@ ErtlFunctionalGroupsFinderFragmenter.EFGFFragmenterReturnedFragmentsOption.OnlyA
ErtlFunctionalGroupsFinderFragmenter.EFGFFragmenterReturnedFragmentsOption.OnlyAlkanes.tooltip = return only the non-functional-group alkane fragments of a molecule after fragmentation
ErtlFunctionalGroupsFinderFragmenter.EFGFFragmenterReturnedFragmentsOption.All.displayName = functional groups and alkanes
ErtlFunctionalGroupsFinderFragmenter.EFGFFragmenterReturnedFragmentsOption.All.tooltip = return both, functional groups and alkane fragments, after fragmentation
##CDKExhaustiveFragmenter##
CDKExhaustiveFragmenter.displayName = Exhaustive Fragmenter
CDKExhaustiveFragmenter.minFragmentSize.tooltip = Defines the minimum number of atoms (inclusive) that are required for one Fragment. The number of atoms consists of all atoms, that are connected by more than one single bond or have more than one single bond.
CDKExhaustiveFragmenter.minFragmentSize.displayName = Minimum fragment size setting
##SugarRemovalUtilityFragmenter##
SugarRemovalUtilityFragmenter.displayName = Sugar Removal Utility
SugarRemovalUtilityFragmenter.returnedFragmentsSetting.tooltip = Defines which fragments should be returned, sugar moieties, the aglycone, or both
Expand Down
Loading
Loading