Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration of CDK ExhaustiveFragmenter #133

Open
wants to merge 11 commits into
base: production
Choose a base branch
from
Open
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import de.unijena.cheminf.mortar.message.Message;
import de.unijena.cheminf.mortar.model.data.FragmentDataModel;
import de.unijena.cheminf.mortar.model.data.MoleculeDataModel;
import de.unijena.cheminf.mortar.model.fragmentation.algorithm.CDKExhaustiveFragmenter;
import de.unijena.cheminf.mortar.model.fragmentation.algorithm.ErtlFunctionalGroupsFinderFragmenter;
import de.unijena.cheminf.mortar.model.fragmentation.algorithm.IMoleculeFragmenter;
import de.unijena.cheminf.mortar.model.fragmentation.algorithm.ScaffoldGeneratorFragmenter;
Expand Down Expand Up @@ -187,6 +188,11 @@ public class FragmentationService {
*/
private final IMoleculeFragmenter scaffoldGF;
//
/**
* Exhaustive fragmenter
*/
private final IMoleculeFragmenter cdkEF;
//
/**
* SettingsContainer to hold settings.
*/
Expand All @@ -206,13 +212,15 @@ public class FragmentationService {
*/
public FragmentationService(SettingsContainer aSettingsContainer) {
//Note: Every fragmenter class should only be added once to the array or there will be problems with setting persistence!
this.fragmenters = new IMoleculeFragmenter[3];
this.fragmenters = new IMoleculeFragmenter[4];
this.ertlFGF = new ErtlFunctionalGroupsFinderFragmenter();
this.fragmenters[0] = this.ertlFGF;
this.sugarRUF = new SugarRemovalUtilityFragmenter();
this.fragmenters[1] = this.sugarRUF;
this.scaffoldGF = new ScaffoldGeneratorFragmenter();
this.fragmenters[2] = this.scaffoldGF;
this.cdkEF = new CDKExhaustiveFragmenter();
this.fragmenters[3] = this.cdkEF;
//
Objects.requireNonNull(aSettingsContainer, "aSettingsContainer must not be null");
this.settingsContainer = aSettingsContainer;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
/*
* MORTAR - MOlecule fRagmenTAtion fRamework
* Copyright (C) 2024 Felix Baensch, Jonas Schaub ([email protected], [email protected])
*
* Source code is available at <https://github.com/FelixBaensch/MORTAR>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package de.unijena.cheminf.mortar.model.fragmentation.algorithm;

import de.unijena.cheminf.mortar.gui.util.GuiUtil;
import de.unijena.cheminf.mortar.message.Message;
import de.unijena.cheminf.mortar.model.io.Importer;
import de.unijena.cheminf.mortar.model.util.BasicDefinitions;
import de.unijena.cheminf.mortar.model.util.CollectionUtil;
import de.unijena.cheminf.mortar.model.util.SimpleIDisplayEnumConstantProperty;

import javafx.beans.property.Property;
import javafx.beans.property.SimpleIntegerProperty;

import org.openscience.cdk.fragment.ExhaustiveFragmenter;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.silent.SilentChemObjectBuilder;
import org.openscience.cdk.smiles.SmilesParser;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Wrapper class that makes the
* <a href="https://cdk.github.io/cdk/latest/docs/api/org/openscience/cdk/fragment/ExhaustiveFragmenter.html">
* exhaustive fragmentation
* </a>
* from the CDK available for MORTAR. It has a performance of O(n!) where n is the number of splittable bonds. Splittable
* bonds are defined as non-ring, single bonds that are connected to at least one other atom, that is <b>not</b> an
* implicit hydrogen.
*
* @author Tom Weiß
* @version 1.0.0.0
*/
public class CDKExhaustiveFragmenter implements IMoleculeFragmenter {
//<editor-fold desc="Public static final variables">
/**
* The default value for the minimum fragment size used for the fragmentation.
*/
public static final int DEFAULT_MINIMUM_FRAGMENT_SIZE = 6;
//
/**
* The name of the algorithm used for fragmentation.
*/
public static final String ALGORITHM_NAME = "Exhaustive Fragmenter";
//</editor-fold>
//
//<editor-fold desc="Private final variables">
/**
* The minimum size of the returned fragments. This size consists of all atoms, that are connected by more than
* a single bond or have more than one single bond.
*/
private final SimpleIntegerProperty minimumFragmentSizeSetting;
//
/**
* All settings of this fragmenter, encapsulated in JavaFX properties for binding in GUI.
*/
private final List<Property<?>> settings;
//
/**
* Map to store pairs of {@literal <setting name, tooltip text>}.
*/
private final HashMap<String, String> settingNameTooltipTextMap;
//
/**
* Map to store pairs of {@literal <setting name, display name>}.
*/
private final HashMap<String, String> settingNameDisplayNameMap;
//
/**
* Instance of ExhaustiveFragmenter class to fragment a molecule.
*/
private final ExhaustiveFragmenter cdkEFInstance;
//
/**
* Logger of this class.
*/
private static final Logger LOGGER = Logger.getLogger(CDKExhaustiveFragmenter.class.getName());
//</editor-fold>
//
//<editor-fold desc="Constructor">
/**
* Constructor, all settings are initialised with their default values as declared in the respective public constants.
*/
public CDKExhaustiveFragmenter() {
int tmpNumberOfSettingsForTooltipMapSize = 1;
int tmpInitialCapacityForSettingNameTooltipTextMap = CollectionUtil.calculateInitialHashCollectionCapacity(
tmpNumberOfSettingsForTooltipMapSize,
BasicDefinitions.DEFAULT_HASH_COLLECTION_LOAD_FACTOR);
this.settingNameTooltipTextMap = new HashMap<>(tmpInitialCapacityForSettingNameTooltipTextMap,
BasicDefinitions.DEFAULT_HASH_COLLECTION_LOAD_FACTOR);
this.settingNameDisplayNameMap = new HashMap<>(tmpInitialCapacityForSettingNameTooltipTextMap,
BasicDefinitions.DEFAULT_HASH_COLLECTION_LOAD_FACTOR);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use CollectionUtil.calculateInitialHashCollectionCapacity() or HashMap.newHashMap().

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see now that you only have one setting but are initialising the maps with a capacity of 2, ok. But for the future, please use the ways I indicated.

this.cdkEFInstance = new ExhaustiveFragmenter();
this.minimumFragmentSizeSetting = new SimpleIntegerProperty(this,
"Minimum Size for the returned fragments",
CDKExhaustiveFragmenter.DEFAULT_MINIMUM_FRAGMENT_SIZE) {
@Override
public void set(int newValue) {
if (newValue > 0) {
CDKExhaustiveFragmenter.this.cdkEFInstance.setMinimumFragmentSize(newValue);
super.set(newValue);
}
else {
IllegalArgumentException anException = new IllegalArgumentException("The minimum fragment size can not be zero");
CDKExhaustiveFragmenter.LOGGER.log(Level.WARNING, anException.toString(), anException);
GuiUtil.guiExceptionAlert(Message.get("Fragmenter.IllegalSettingValue.Title"),
Message.get("Fragmenter.IllegalSettingValue.Header"),
anException.toString(),
anException);
//re-throws the exception to properly reset the binding
throw anException;
}
}
};
this.settingNameTooltipTextMap.put(this.minimumFragmentSizeSetting.getName(),
Message.get("CDKExhaustiveFragmenter.minFragmentSize.tooltip"));
this.settingNameDisplayNameMap.put(this.minimumFragmentSizeSetting.getName(),
Message.get("CDKExhaustiveFragmenter.minFragmentSize.displayName"));
this.settings = new ArrayList<>(tmpNumberOfSettingsForTooltipMapSize);
this.settings.add(this.minimumFragmentSizeSetting);
}
//</editor-fold>
//
//<editor-fold desc="Public properties get">
/**
* Returns the setting for the minimum fragment size.
*
* @return the setting for the minimum fragment size.
*/
public SimpleIntegerProperty getMinimumFragmentSizeSetting() {
return this.minimumFragmentSizeSetting;
}
/**
* Returns the minimum fragment size currently set.
*
* @return the currently set minimum fragment size.
*/
public int getMinimumFragmentSize() {
return this.minimumFragmentSizeSetting.get();
}
//</editor-fold>
//
//<editor-fold desc="Public properties set">

/**
* Returns the minimum fragment size currently set.
*
* @param minimumFragmentSize the new minimum fragment size.
*/
public void setMinimumFragmentSize(int minimumFragmentSize) {
this.minimumFragmentSizeSetting.set(minimumFragmentSize);
}
//</editor-fold>
//
//<editor-fold desc="IMoleculeFragmenter methods">

@Override
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add an empty line, the code fold is not working properly, otherwise (because of the @OverRide below it)

public List<Property<?>> settingsProperties() {
return this.settings;
}

@Override
public Map<String, String> getSettingNameToTooltipTextMap() {
return this.settingNameTooltipTextMap;
}

@Override
public Map<String, String> getSettingNameToDisplayNameMap() {
return this.settingNameDisplayNameMap;
}

@Override
public String getFragmentationAlgorithmName() {
return CDKExhaustiveFragmenter.ALGORITHM_NAME;
}

@Override
public String getFragmentationAlgorithmDisplayName() {
return Message.get("CDKExhaustiveFragmenter.displayName");
}

@Override
public FragmentSaturationOption getFragmentSaturationSetting() throws UnsupportedOperationException {
//TODO: there is currently no possibility to implement saturation settings for the exhaustive fragmenter.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you describe this in more detail here like you did below?
And the way to do it here would be to throw an UnsupportedOperationException in these three methods. But please test whether this impacts running MORTAR. Since you do not add the property to the list of settings, we should be fine.

// Because the exhaustive fragmenter in the CDK saturates the fragments by default and is not configurable.
throw new UnsupportedOperationException("The saturation is currently not configurable for the " + CDKExhaustiveFragmenter.ALGORITHM_NAME);
}

@Override
public SimpleIDisplayEnumConstantProperty fragmentSaturationSettingProperty() throws UnsupportedOperationException {
//TODO: there is currently no possibility to implement saturation settings for the exhaustive fragmenter.
// Because the exhaustive fragmenter in the CDK saturates the fragments by default and is not configurable.
throw new UnsupportedOperationException("The saturation is currently not configurable for the " + CDKExhaustiveFragmenter.ALGORITHM_NAME);
}

@Override
public void setFragmentSaturationSetting(FragmentSaturationOption anOption) throws UnsupportedOperationException {
//TODO: there is currently no possibility to implement saturation settings for the exhaustive fragmenter.
// Because the exhaustive fragmenter in the CDK saturates the fragments by default and is not configurable.
throw new UnsupportedOperationException("The saturation is currently not configurable for the " + CDKExhaustiveFragmenter.ALGORITHM_NAME);
}

@Override
public IMoleculeFragmenter copy() {
CDKExhaustiveFragmenter tmpCopy = new CDKExhaustiveFragmenter();
tmpCopy.minimumFragmentSizeSetting.set(this.minimumFragmentSizeSetting.get());
return tmpCopy;
}

@Override
public void restoreDefaultSettings() {
this.minimumFragmentSizeSetting.set(CDKExhaustiveFragmenter.DEFAULT_MINIMUM_FRAGMENT_SIZE);
}

@Override
public List<IAtomContainer> fragmentMolecule(IAtomContainer aMolecule) throws NullPointerException, IllegalArgumentException, CloneNotSupportedException {
//<editor-fold desc="Parameter tests">
Objects.requireNonNull(aMolecule, "Given molecule is null.");
boolean tmpCanBeFragmented = this.canBeFragmented(aMolecule);
if (!tmpCanBeFragmented) {
throw new IllegalArgumentException("Given molecule cannot be fragmented but should be filtered or preprocessed first.");
}
//</editor-fold>
IAtomContainer tmpMoleculeClone = aMolecule.clone();
// a rough estimation of the number of unique fragments produced by this fragmenter.
int fragmentListSizeEstimation = tmpMoleculeClone.getAtomCount() / 2;
List<IAtomContainer> tmpFragments = new ArrayList<>(fragmentListSizeEstimation);
try {
SmilesParser tmpSmilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance());
this.cdkEFInstance.generateFragments(tmpMoleculeClone);
// TODO: there is also an option to extract atom containers directly with getFragmentsAsContainers but this
// oversaturates fragments described in this issue https://github.com/cdk/cdk/issues/1119.
List<String> tmpSmiles = new ArrayList<>(List.of(this.cdkEFInstance.getFragments()));
for (String smile : tmpSmiles) {
tmpFragments.add(tmpSmilesParser.parseSmiles(smile));
}

} catch (Exception anException) {
throw new IllegalArgumentException("An error occurred during fragmentation: " + anException.toString() + " Molecule Name: " + aMolecule.getProperty(Importer.MOLECULE_NAME_PROPERTY_KEY));
}
return tmpFragments;
}

@Override
public boolean shouldBeFiltered(IAtomContainer aMolecule) {
return (Objects.isNull(aMolecule) || aMolecule.isEmpty());
}

@Override
public boolean shouldBePreprocessed(IAtomContainer aMolecule) throws NullPointerException {
Objects.requireNonNull(aMolecule, "Given molecule is null.");
return false;
}

@Override
public boolean canBeFragmented(IAtomContainer aMolecule) throws NullPointerException {
Objects.requireNonNull(aMolecule, "Given molecule is null.");
boolean tmpShouldBeFiltered = this.shouldBeFiltered(aMolecule);
boolean tmpShouldBePreprocessed = this.shouldBePreprocessed(aMolecule);
return !(tmpShouldBeFiltered || tmpShouldBePreprocessed);
}

@Override
public IAtomContainer applyPreprocessing(IAtomContainer aMolecule) throws NullPointerException, IllegalArgumentException, CloneNotSupportedException {
Objects.requireNonNull(aMolecule, "Given molecule is null.");
boolean tmpShouldBeFiltered = this.shouldBeFiltered(aMolecule);
if (tmpShouldBeFiltered) {
throw new IllegalArgumentException("The given molecule cannot be preprocessed but should be filtered.");
}
return aMolecule.clone();
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The if statement is unnecessary. I guess you wanted to leave room between the if statement and the return statement for actual preprocessing should you need to add it later? If yes, please add a comment, at least. Or simply remove the if.

}
//</editor-fold>
}
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,10 @@ ErtlFunctionalGroupsFinderFragmenter.EFGFFragmenterReturnedFragmentsOption.OnlyA
ErtlFunctionalGroupsFinderFragmenter.EFGFFragmenterReturnedFragmentsOption.OnlyAlkanes.tooltip = return only the non-functional-group alkane fragments of a molecule after fragmentation
ErtlFunctionalGroupsFinderFragmenter.EFGFFragmenterReturnedFragmentsOption.All.displayName = functional groups and alkanes
ErtlFunctionalGroupsFinderFragmenter.EFGFFragmenterReturnedFragmentsOption.All.tooltip = return both, functional groups and alkane fragments, after fragmentation
##CDKExhaustiveFragmenter##
CDKExhaustiveFragmenter.displayName = Exhaustive Fragmenter
CDKExhaustiveFragmenter.minFragmentSize.tooltip = Defines the minimum number of atoms (inclusive) that are required for one Fragment. The number of atoms consists of all atoms, that are connected by more than one single bond or have more than one single bond.
CDKExhaustiveFragmenter.minFragmentSize.displayName = Minimum fragment size setting
##SugarRemovalUtilityFragmenter##
SugarRemovalUtilityFragmenter.displayName = Sugar Removal Utility
SugarRemovalUtilityFragmenter.returnedFragmentsSetting.tooltip = Defines which fragments should be returned, sugar moieties, the aglycone, or both
Expand Down
Loading
Loading