diff --git a/License-header/License-header.txt b/License-header/License-header.txt index 70139b7..d1b5969 100644 --- a/License-header/License-header.txt +++ b/License-header/License-header.txt @@ -2,7 +2,7 @@ * ErtlFunctionalGroupsFinder for CDK * Copyright (c) $today.year Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny * - * Source code is available at + * Source code is available at * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 69a9715..a595206 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/ErtlFunctionalGroupsFinderPerformanceTest.java b/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/ErtlFunctionalGroupsFinderPerformanceTest.java index 5ee5f8d..61e0cbb 100644 --- a/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/ErtlFunctionalGroupsFinderPerformanceTest.java +++ b/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/ErtlFunctionalGroupsFinderPerformanceTest.java @@ -1,9 +1,8 @@ -/** - * Performance test for +/* * ErtlFunctionalGroupsFinder for CDK - * Copyright (C) 2019 Jonas Schaub + * Copyright (c) 2023 Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny * - * Source code is available at + * Source code is available at * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -18,8 +17,27 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ + package de.ibci.ertlfxgroupsfinder.performancetest; +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.aromaticity.Aromaticity; +import org.openscience.cdk.aromaticity.ElectronDonation; +import org.openscience.cdk.atomtype.CDKAtomTypeMatcher; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.graph.ConnectivityChecker; +import org.openscience.cdk.graph.CycleFinder; +import org.openscience.cdk.graph.Cycles; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IAtomContainerSet; +import org.openscience.cdk.interfaces.IAtomType; +import org.openscience.cdk.io.iterator.IteratingSDFReader; +import org.openscience.cdk.silent.SilentChemObjectBuilder; +import org.openscience.cdk.tools.CDKHydrogenAdder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; +import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; + import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -39,88 +57,71 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import org.openscience.cdk.CDKConstants; -import org.openscience.cdk.aromaticity.Aromaticity; -import org.openscience.cdk.aromaticity.ElectronDonation; -import org.openscience.cdk.atomtype.CDKAtomTypeMatcher; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.graph.ConnectivityChecker; -import org.openscience.cdk.graph.CycleFinder; -import org.openscience.cdk.graph.Cycles; -import org.openscience.cdk.interfaces.IAtom; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.interfaces.IAtomContainerSet; -import org.openscience.cdk.interfaces.IAtomType; -import org.openscience.cdk.io.iterator.IteratingSDFReader; -import org.openscience.cdk.silent.SilentChemObjectBuilder; -import org.openscience.cdk.tools.CDKHydrogenAdder; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; -import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; /** - * An application for testing the performance of the ErtlFunctionalGroupsFinder.find() method under parallelization on - * multiple threads. - * + * An application for testing the performance of the ErtlFunctionalGroupsFinder.find() method under parallelization on + * multiple threads. + * * @author Jonas Schaub * @version 1.0.0.1 */ public class ErtlFunctionalGroupsFinderPerformanceTest { - + // /** * Name of file for logging occurred exceptions */ private static final String EXCEPTIONS_LOG_FILE_NAME = "Exceptions_Log.txt"; - + /** * Name of file for writing results */ private static final String RESULTS_FILE_NAME = "Results.txt"; - + /** * All allowed atomic numbers to pass to the ErtlFunctionalGroupsFinder; * String will be split and resulting integers passed to a set */ private static final String NON_METALLIC_ATOMIC_NUMBERS = "1,2,6,7,8,9,10,15,16,17,18,34,35,36,53,54,86"; // - + // /** * All allowed atomic numbers to pass to the ErtlFunctionalGroupsFinder as a set of integers (will be parsed from * NON_METALLIC_ATOMIC_NUMBERS) */ private Set nonMetallicAtomicNumbersSet; - + /** * The working directory (the jar-file's directory) */ private String workingPath; - + /** * The given number of different threads to use */ private int numberOfThreadsToUse; - + /** * All molecules loaded from the SD file */ private IAtomContainer[] moleculesArray; - + /** * The aromaticity model in use */ private Aromaticity aromaticityModel; // - + // /** * Instantiates and starts the application. It first loads all molecules from a given SD file into memory and then - * distributes them equally on the given number of different threads to use. It measures the time it takes for all - * threads to complete the extraction of functional groups using the ErtlFunctionalGroupsFinder. It exits the system - * if an unexpected exception occurs that prevents the application from working, e.g. an IllegalArgumentException + * distributes them equally on the given number of different threads to use. It measures the time it takes for all + * threads to complete the extraction of functional groups using the ErtlFunctionalGroupsFinder. It exits the system + * if an unexpected exception occurs that prevents the application from working, e.g. an IllegalArgumentException * (will be logged to a file, not printed on the console). * - * @param anArgs the command line arguments, anArgs[0] must be the name of the SD file to load (must be located in + * @param anArgs the command line arguments, anArgs[0] must be the name of the SD file to load (must be located in * the same directory as the application's JAR file) and anArgs[1] must be the number of different threads to use * @throws java.io.IOException if the constructor is unable to open a text file for logging occurred exceptions */ @@ -128,7 +129,7 @@ public ErtlFunctionalGroupsFinderPerformanceTest(String[] anArgs) throws IOExcep this.workingPath = (new File("").getAbsoluteFile().getAbsolutePath()) + File.separator; LocalDateTime tmpDateTime = LocalDateTime.now(); String tmpTimeStamp = tmpDateTime.format(DateTimeFormatter.ofPattern("uuuu_MM_dd_HH_mm")); - File tmpExceptionsLogFile = new File(this.workingPath + File tmpExceptionsLogFile = new File(this.workingPath + ErtlFunctionalGroupsFinderPerformanceTest.EXCEPTIONS_LOG_FILE_NAME); FileWriter tmpExceptionsLogFileWriter = new FileWriter(tmpExceptionsLogFile, true); PrintWriter tmpExceptionsPrintWriter = new PrintWriter(tmpExceptionsLogFileWriter); @@ -165,7 +166,7 @@ public ErtlFunctionalGroupsFinderPerformanceTest(String[] anArgs) throws IOExcep tmpMetalNumbersInt[i] = Integer.parseInt(tmpMetalNumbersStrings[i]); } this.nonMetallicAtomicNumbersSet = new HashSet(Arrays.asList(tmpMetalNumbersInt)); - File tmpResultsLogFile = new File(this.workingPath + File tmpResultsLogFile = new File(this.workingPath + ErtlFunctionalGroupsFinderPerformanceTest.RESULTS_FILE_NAME); FileWriter tmpResultsLogFileWriter = new FileWriter(tmpResultsLogFile, true); PrintWriter tmpResultsPrintWriter = new PrintWriter(tmpResultsLogFileWriter); @@ -185,7 +186,7 @@ public ErtlFunctionalGroupsFinderPerformanceTest(String[] anArgs) throws IOExcep tmpMolecule = this.applyFiltersAndPreprocessing(tmpMolecule); tmpMoleculesList.add(tmpMolecule); } catch (Exception anException) { - /*If an IllegalArgumentException is thrown in applyFiltersAndPreprocessing (meaning that the molecule + /*If an IllegalArgumentException is thrown in applyFiltersAndPreprocessing (meaning that the molecule should be filtered) the molecule is skipped by catching this exception*/ } } @@ -240,7 +241,7 @@ public ErtlFunctionalGroupsFinderPerformanceTest(String[] anArgs) throws IOExcep } } // - + // /** * Performs all preprocessing needed for the ErtlFunctionalGroupsFinder and throws an IllegalArgumentException @@ -284,7 +285,7 @@ private IAtomContainer applyFiltersAndPreprocessing(IAtomContainer aMolecule) th this.aromaticityModel.apply(aMolecule); return aMolecule; } - + /** * Appends the given exception's stack trace to a log file. * @@ -296,8 +297,8 @@ private void appendToLogfile(Exception anException) { } PrintWriter tmpPrintWriter = null; try { - FileWriter tmpFileWriter = new FileWriter(this.workingPath - + ErtlFunctionalGroupsFinderPerformanceTest.EXCEPTIONS_LOG_FILE_NAME, + FileWriter tmpFileWriter = new FileWriter(this.workingPath + + ErtlFunctionalGroupsFinderPerformanceTest.EXCEPTIONS_LOG_FILE_NAME, true); tmpPrintWriter = new PrintWriter(tmpFileWriter); StringWriter tmpStringWriter = new StringWriter(); diff --git a/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/ExtractFunctionalGroupsTask.java b/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/ExtractFunctionalGroupsTask.java index 110529b..8823d21 100644 --- a/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/ExtractFunctionalGroupsTask.java +++ b/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/ExtractFunctionalGroupsTask.java @@ -1,9 +1,8 @@ -/** - * Performance test for +/* * ErtlFunctionalGroupsFinder for CDK - * Copyright (C) 2019 Jonas Schaub + * Copyright (c) 2023 Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny * - * Source code is available at + * Source code is available at * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -18,39 +17,41 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ + package de.ibci.ertlfxgroupsfinder.performancetest; -import java.util.concurrent.Callable; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.tools.ErtlFunctionalGroupsFinder; +import java.util.concurrent.Callable; + /** - * A Callable thread that extracts functional groups from all molecules in a given array using the + * A Callable thread that extracts functional groups from all molecules in a given array using the * ErtlFunctionalGroupsFinder class. - * + * * @author Jonas Schaub */ public class ExtractFunctionalGroupsTask implements Callable { private final IAtomContainer[] moleculesArray; - + private final ErtlFunctionalGroupsFinder ertlFinder; - + /** * Instantiates the thread. - * - * @param aListOfMolecules atom containers should meet the ErtlFunctionalGroupsFinder's input specifications but + * + * @param aListOfMolecules atom containers should meet the ErtlFunctionalGroupsFinder's input specifications but * any occurring exception will be caught */ public ExtractFunctionalGroupsTask(IAtomContainer[] aListOfMolecules) { this.moleculesArray = aListOfMolecules; this.ertlFinder = new ErtlFunctionalGroupsFinder(); } - + /** - * Applies the ErtlFunctionalGroupsFinder.find(IAtomContainer container, boolean clone) method on all given + * Applies the ErtlFunctionalGroupsFinder.find(IAtomContainer container, boolean clone) method on all given * molecules (parameter clone = false) and counts the occurring exceptions. - * + * * @return the number of occurred exceptions * @throws Exception if unable to compute a result (copied from doc in Callable interface) */ @@ -66,5 +67,5 @@ public Integer call() throws Exception { } return tmpExceptionsCounter; } - + } diff --git a/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/Main.java b/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/Main.java index 5fa4d88..1d9bb77 100644 --- a/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/Main.java +++ b/src/main/java/de/ibci/ertlfxgroupsfinder/performancetest/Main.java @@ -1,9 +1,8 @@ -/** - * Performance test for +/* * ErtlFunctionalGroupsFinder for CDK - * Copyright (C) 2019 Jonas Schaub + * Copyright (c) 2023 Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny * - * Source code is available at + * Source code is available at * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -18,11 +17,12 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ + package de.ibci.ertlfxgroupsfinder.performancetest; /** * Main class for starting application. - * + * * @author Jonas Schaub */ public class Main { @@ -32,9 +32,9 @@ private Main() { } /** - * Starts the application. Command line arguments must be the name of an SD-file to read (must be located in the + * Starts the application. Command line arguments must be the name of an SD-file to read (must be located in the * same directory as the application's .jar file) and the number of different threads to use for calculation. - * + * * @param args the command line arguments */ public static void main(String[] args) { @@ -45,7 +45,5 @@ public static void main(String[] args) { System.exit(1); } } - -} - +} diff --git a/src/main/java/org/openscience/cdk/tools/ErtlFunctionalGroupsFinder.java b/src/main/java/org/openscience/cdk/tools/ErtlFunctionalGroupsFinder.java index 0426e5b..93b1941 100644 --- a/src/main/java/org/openscience/cdk/tools/ErtlFunctionalGroupsFinder.java +++ b/src/main/java/org/openscience/cdk/tools/ErtlFunctionalGroupsFinder.java @@ -1,8 +1,8 @@ -/** +/* * ErtlFunctionalGroupsFinder for CDK - * Copyright (C) 2022 Sebastian Fritsch + * Copyright (c) 2023 Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny * - * Source code is available at + * Source code is available at * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -17,6 +17,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ + package org.openscience.cdk.tools; import org.openscience.cdk.graph.ConnectedComponents; @@ -51,10 +52,10 @@ * @version 1.0.0.1 */ public class ErtlFunctionalGroupsFinder { - - private static ILoggingTool log = LoggingToolFactory.createLoggingTool(ErtlFunctionalGroupsFinder.class); - private final static String CARBONYL_C_MARKER = "Carbonyl-C"; - private final Set nonmetalAtomicNumbers; + + private static ILoggingTool log = LoggingToolFactory.createLoggingTool(ErtlFunctionalGroupsFinder.class); + private static final String CARBONYL_C_MARKER = "Carbonyl-C"; + private final Set nonmetalAtomicNumbers; private final Mode mode; private EdgeToBondMap bondMap; private int[][] adjList; @@ -62,152 +63,152 @@ public class ErtlFunctionalGroupsFinder { private HashMap aromaticHeteroAtoms; // key: atom idx, value: isInGroup private Map> environmentsMap; - /** - * Defines the working mode. - */ - public static enum Mode{ - /** - * Default mode including the generalization step. - */ - DEFAULT, - /** - * Skips the generalization step. Functional groups will keep their full "environment". - */ - NO_GENERALIZATION; - } - - private enum EnvironmentCalCType { C_AROMATIC, C_ALIPHATIC }; - - /** - * Describes one carbon atom in the environment of a marked atom. It can either be aromatic - * or aliphatic and also contains a clone of its connecting bond. - */ - private class EnvironmentalC{ - private EnvironmentCalCType type; - private int bondIndex; - private IBond.Order bondOrder; - private IBond.Stereo bondStereo; - private boolean[] bondFlags; - - public EnvironmentalC(EnvironmentCalCType type, IBond bond, int indexInBond) { - this.type = type; - - bondIndex = indexInBond; - bondOrder = bond.getOrder(); - bondStereo = bond.getStereo(); - bondFlags = bond.getFlags(); - } - - public EnvironmentCalCType getType() { - return type; - } - - public IBond createBond(IAtom targetAtom, IAtom cAtom) { - IBond bond = targetAtom.getBuilder().newInstance(IBond.class); - if(bondIndex == 0) { - bond.setAtoms(new IAtom[] {cAtom, targetAtom}); - } - else { - bond.setAtoms(new IAtom[] {targetAtom, cAtom}); - } - bond.setOrder(bondOrder); - bond.setStereo(bondStereo); - bond.setFlags(bondFlags); - - return bond; - } - } - + /** + * Defines the working mode. + */ + public static enum Mode{ + /** + * Default mode including the generalization step. + */ + DEFAULT, + /** + * Skips the generalization step. Functional groups will keep their full "environment". + */ + NO_GENERALIZATION; + } + + private enum EnvironmentCalCType { C_AROMATIC, C_ALIPHATIC }; + + /** + * Describes one carbon atom in the environment of a marked atom. It can either be aromatic + * or aliphatic and also contains a clone of its connecting bond. + */ + private class EnvironmentalC{ + private EnvironmentCalCType type; + private int bondIndex; + private IBond.Order bondOrder; + private IBond.Stereo bondStereo; + private boolean[] bondFlags; + + public EnvironmentalC(EnvironmentCalCType type, IBond bond, int indexInBond) { + this.type = type; + + bondIndex = indexInBond; + bondOrder = bond.getOrder(); + bondStereo = bond.getStereo(); + bondFlags = bond.getFlags(); + } + + public EnvironmentCalCType getType() { + return type; + } + + public IBond createBond(IAtom targetAtom, IAtom cAtom) { + IBond bond = targetAtom.getBuilder().newInstance(IBond.class); + if(bondIndex == 0) { + bond.setAtoms(new IAtom[] {cAtom, targetAtom}); + } + else { + bond.setAtoms(new IAtom[] {targetAtom, cAtom}); + } + bond.setOrder(bondOrder); + bond.setStereo(bondStereo); + bond.setFlags(bondFlags); + + return bond; + } + } + /** * Default constructor for ErtlFunctionalGroupsFinder. */ public ErtlFunctionalGroupsFinder() { - this(Mode.DEFAULT); + this(Mode.DEFAULT); } - + /** * Constructor for ErtlFunctionalGroupsFinder. - * + * * @param mode working mode (see {@code ErtlFunctionalGroupsFinder.Mode}). */ public ErtlFunctionalGroupsFinder(Mode mode) { - this.mode = mode; - - // init non-metal and non-metalloid atom numbers - nonmetalAtomicNumbers = Set.of(1, 2, 6, 7, 8, 9, 10, 15, 16, 17, 18, 34, 35, 36, 53, 54, 86); //ImmutableSet.of(1, 2, 6, 7, 8, 9, 10, 15, 16, 17, 18, 34, 35, 36, 53, 54, 86); + this.mode = mode; + + // init non-metal and non-metalloid atom numbers + nonmetalAtomicNumbers = Set.of(1, 2, 6, 7, 8, 9, 10, 15, 16, 17, 18, 34, 35, 36, 53, 54, 86); //ImmutableSet.of(1, 2, 6, 7, 8, 9, 10, 15, 16, 17, 18, 34, 35, 36, 53, 54, 86); } - /** - * Find all functional groups contained in a molecule. - * - * NOTE: The input must consist of one connected structure and may not contain charged atoms, metals or metalloids. - * - * @param container the molecule which contains the functional groups (may not contain charged atoms, metals, - * metalloids or unconnected components!) - * @return a list with all functional groups found in the molecule. - */ - public List find(IAtomContainer container){ - return find(container, true); - } - /** * Find all functional groups contained in a molecule. - * - * NOTE: The input must consist of one connected structure and may not contain charged atoms, metals or metalloids. - * + * + * NOTE: The input must consist of one connected structure and may not contain charged atoms, metals or metalloids. + * * @param container the molecule which contains the functional groups (may not contain charged atoms, metals, - * metalloids or unconnected components!) - * @param clone Use 'false' to reuse the input container's bonds and atoms in the extraction of the functional - * groups. This may speed up the extraction and lower the memory consumption for processing large - * amounts of data but corrupts the original input container. - * Use 'true' to work with a clone and leave the input container intact (default). + * metalloids or unconnected components!) + * @return a list with all functional groups found in the molecule. + */ + public List find(IAtomContainer container){ + return find(container, true); + } + + /** + * Find all functional groups contained in a molecule. + * + * NOTE: The input must consist of one connected structure and may not contain charged atoms, metals or metalloids. + * + * @param container the molecule which contains the functional groups (may not contain charged atoms, metals, + * metalloids or unconnected components!) + * @param clone Use 'false' to reuse the input container's bonds and atoms in the extraction of the functional + * groups. This may speed up the extraction and lower the memory consumption for processing large + * amounts of data but corrupts the original input container. + * Use 'true' to work with a clone and leave the input container intact (default). * @return a list with all functional groups found in the molecule. */ public List find(IAtomContainer container, boolean clone){ - // work with a clone? - IAtomContainer mol; - if(clone){ - try { - mol = container.clone(); - } catch (CloneNotSupportedException e) { - throw new IllegalStateException("Atom container could not be cloned"); - } - } - else{ - mol = container; - } - - // init GraphUtil & EdgeToBondMap - bondMap = EdgeToBondMap.withSpaceFor(mol); - adjList = GraphUtil.toAdjList(mol, bondMap); - - checkConstraints(mol); - - // atom marking - markAtoms(mol); - - // extract raw groups - List groups = extractGroups(mol); - - // handle environment - if(mode == Mode.DEFAULT) { - expandGeneralizedEnvironments(groups); - } - else if (mode == Mode.NO_GENERALIZATION) { - expandFullEnvironments(groups); - } - else { - throw new IllegalStateException("Unknown mode."); - } - - // clear fields - bondMap = null; - adjList = null; - markedAtoms = null; - aromaticHeteroAtoms = null; - environmentsMap = null; - - return groups; + // work with a clone? + IAtomContainer mol; + if(clone){ + try { + mol = container.clone(); + } catch (CloneNotSupportedException e) { + throw new IllegalStateException("Atom container could not be cloned"); + } + } + else{ + mol = container; + } + + // init GraphUtil & EdgeToBondMap + bondMap = EdgeToBondMap.withSpaceFor(mol); + adjList = GraphUtil.toAdjList(mol, bondMap); + + checkConstraints(mol); + + // atom marking + markAtoms(mol); + + // extract raw groups + List groups = extractGroups(mol); + + // handle environment + if(mode == Mode.DEFAULT) { + expandGeneralizedEnvironments(groups); + } + else if (mode == Mode.NO_GENERALIZATION) { + expandFullEnvironments(groups); + } + else { + throw new IllegalStateException("Unknown mode."); + } + + // clear fields + bondMap = null; + adjList = null; + markedAtoms = null; + aromaticHeteroAtoms = null; + environmentsMap = null; + + return groups; } /** @@ -216,569 +217,569 @@ else if (mode == Mode.NO_GENERALIZATION) { * @param molecule Molecule with atoms to mark */ private void markAtoms(IAtomContainer molecule) { - if(isDbg()) log.debug("########## Starting search for atoms to mark ... ##########"); - - // store marked atoms - markedAtoms = new HashSet(molecule.getAtomCount()); //Sets.newHashSetWithExpectedSize(molecule.getAtomCount()); - // store aromatic heteroatoms - aromaticHeteroAtoms = new HashMap<>(); - - for(int idx = 0; idx < molecule.getAtomCount(); idx++) { - // skip atoms that already got marked in a previous iteration - if(markedAtoms.contains(idx)) { - continue; - } - IAtom cAtom = molecule.getAtom(idx); - // skip aromatic atoms but add them to set - if(cAtom.isAromatic()) { - if(isHeteroatom(cAtom)) { - aromaticHeteroAtoms.put(idx, false); - } - continue; - } - - int atomicNr = cAtom.getAtomicNumber(); - - // if C... - if(atomicNr == 6) { - boolean isMarked = false; // to detect if foor loop ran with or without marking the C atom - int oNSCounter = 0; // count for the number of connected O, N & S atoms - for(int connectedIdx : adjList[idx]) { - IAtom connectedAtom = molecule.getAtom(connectedIdx); - IBond connectedBond = bondMap.get(idx, connectedIdx); - - // if connected to Heteroatom or C in aliphatic double or triple bond... [CONDITIONS 2.1 & 2.2] - if(connectedAtom.getAtomicNumber() != 1 && ((connectedBond.getOrder() == Order.DOUBLE - || connectedBond.getOrder() == Order.TRIPLE) && !connectedBond.isAromatic())) { - - // set the connected atom as marked - if(markedAtoms.add(connectedIdx)) { - String connectedAtomCondition = connectedAtom.getAtomicNumber() == 6 ? "2.1/2.2" : "1"; - if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition %s", - connectedIdx, connectedAtom.getSymbol(), connectedAtomCondition)); - } - - // set the current atom as marked and break out of connected atoms - if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.1/2.2", - idx, cAtom.getSymbol())); - isMarked = true; - - // but check for carbonyl-C before break - if(connectedAtom.getAtomicNumber() == 8 && connectedBond.getOrder() == Order.DOUBLE - && adjList[idx].length == 3) { - if(isDbg()) log.debug(" - was flagged as Carbonly-C"); - cAtom.setProperty(CARBONYL_C_MARKER, true); - } - - break; - } - // if connected to O/N/S in single bond... - else if((connectedAtom.getAtomicNumber() == 7 - || connectedAtom.getAtomicNumber() == 8 - || connectedAtom.getAtomicNumber() == 16) - && connectedBond.getOrder() == Order.SINGLE){ - // if connected O/N/S is not aromatic... - if(!connectedAtom.isAromatic()) { - // set the connected O/N/S atom as marked - if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 1", - connectedIdx, connectedAtom.getSymbol())); - markedAtoms.add(connectedIdx); - - // if "acetal C" (2+ O/N/S in single bonds connected to sp3-C)... [CONDITION 2.3] - boolean isAllSingleBonds = true; - for(int connectedInSphere2Idx : adjList[connectedIdx]) { - IBond sphere2Bond = bondMap.get(connectedIdx, connectedInSphere2Idx); - if(sphere2Bond.getOrder() != Order.SINGLE) { - isAllSingleBonds = false; - break; - } - } - if(isAllSingleBonds) { - oNSCounter++; - if(oNSCounter > 1 && adjList[idx].length + cAtom.getImplicitHydrogenCount() == 4) { - // set as marked and break out of connected atoms - if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.3", - idx, cAtom.getSymbol())); - isMarked = true; - break; - } - } - } - // if part of oxirane, aziridine and thiirane ring... [CONDITION 2.4] - for(int connectedInSphere2Idx : adjList[connectedIdx]) { - IAtom connectedInSphere2Atom = molecule.getAtom(connectedInSphere2Idx); - if(connectedInSphere2Atom.getAtomicNumber() == 6) { - for(int connectedInSphere3Idx : adjList[connectedInSphere2Idx]) { - IAtom connectedInSphere3Atom = molecule.getAtom(connectedInSphere3Idx); - if(connectedInSphere3Atom.equals(cAtom)) { - // set connected atoms as marked - if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.4", - connectedInSphere2Idx, connectedInSphere2Atom.getSymbol())); - if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.4", - connectedInSphere3Idx, connectedInSphere3Atom.getSymbol())); - markedAtoms.add(connectedInSphere2Idx); - markedAtoms.add(connectedInSphere3Idx); - // set current atom as marked and break out of connected atoms - if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.4", - idx, cAtom.getSymbol())); - isMarked = true; - break; - } - } - } - } - } - } - if(isMarked) { - markedAtoms.add(idx); - continue; - } - // if none of the conditions 2.X apply, we have an unmarked C (not relevant here) - } - // if H... - else if (atomicNr == 1){ - // convert to implicit H - IAtom connectedAtom; - try { - connectedAtom = molecule.getAtom(adjList[idx][0]); - } - catch(ArrayIndexOutOfBoundsException e) { - break; - } - - - if(connectedAtom.getImplicitHydrogenCount() == null) { - connectedAtom.setImplicitHydrogenCount(1); - } - else { - connectedAtom.setImplicitHydrogenCount(connectedAtom.getImplicitHydrogenCount() + 1); - } - continue; - } - // if heteroatom... (CONDITION 1) - else { - if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 1", idx, cAtom.getSymbol())); - markedAtoms.add(idx); - continue; - } - } - if(isDbg()) log.debug(String.format("########## End of search. Marked %d/%d atoms. ##########", markedAtoms.size(), molecule.getAtomCount())); + if(isDbg()) log.debug("########## Starting search for atoms to mark ... ##########"); + + // store marked atoms + markedAtoms = new HashSet(molecule.getAtomCount()); //Sets.newHashSetWithExpectedSize(molecule.getAtomCount()); + // store aromatic heteroatoms + aromaticHeteroAtoms = new HashMap<>(); + + for(int idx = 0; idx < molecule.getAtomCount(); idx++) { + // skip atoms that already got marked in a previous iteration + if(markedAtoms.contains(idx)) { + continue; + } + IAtom cAtom = molecule.getAtom(idx); + // skip aromatic atoms but add them to set + if(cAtom.isAromatic()) { + if(isHeteroatom(cAtom)) { + aromaticHeteroAtoms.put(idx, false); + } + continue; + } + + int atomicNr = cAtom.getAtomicNumber(); + + // if C... + if(atomicNr == 6) { + boolean isMarked = false; // to detect if foor loop ran with or without marking the C atom + int oNSCounter = 0; // count for the number of connected O, N & S atoms + for(int connectedIdx : adjList[idx]) { + IAtom connectedAtom = molecule.getAtom(connectedIdx); + IBond connectedBond = bondMap.get(idx, connectedIdx); + + // if connected to Heteroatom or C in aliphatic double or triple bond... [CONDITIONS 2.1 & 2.2] + if(connectedAtom.getAtomicNumber() != 1 && ((connectedBond.getOrder() == Order.DOUBLE + || connectedBond.getOrder() == Order.TRIPLE) && !connectedBond.isAromatic())) { + + // set the connected atom as marked + if(markedAtoms.add(connectedIdx)) { + String connectedAtomCondition = connectedAtom.getAtomicNumber() == 6 ? "2.1/2.2" : "1"; + if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition %s", + connectedIdx, connectedAtom.getSymbol(), connectedAtomCondition)); + } + + // set the current atom as marked and break out of connected atoms + if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.1/2.2", + idx, cAtom.getSymbol())); + isMarked = true; + + // but check for carbonyl-C before break + if(connectedAtom.getAtomicNumber() == 8 && connectedBond.getOrder() == Order.DOUBLE + && adjList[idx].length == 3) { + if(isDbg()) log.debug(" - was flagged as Carbonly-C"); + cAtom.setProperty(CARBONYL_C_MARKER, true); + } + + break; + } + // if connected to O/N/S in single bond... + else if((connectedAtom.getAtomicNumber() == 7 + || connectedAtom.getAtomicNumber() == 8 + || connectedAtom.getAtomicNumber() == 16) + && connectedBond.getOrder() == Order.SINGLE){ + // if connected O/N/S is not aromatic... + if(!connectedAtom.isAromatic()) { + // set the connected O/N/S atom as marked + if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 1", + connectedIdx, connectedAtom.getSymbol())); + markedAtoms.add(connectedIdx); + + // if "acetal C" (2+ O/N/S in single bonds connected to sp3-C)... [CONDITION 2.3] + boolean isAllSingleBonds = true; + for(int connectedInSphere2Idx : adjList[connectedIdx]) { + IBond sphere2Bond = bondMap.get(connectedIdx, connectedInSphere2Idx); + if(sphere2Bond.getOrder() != Order.SINGLE) { + isAllSingleBonds = false; + break; + } + } + if(isAllSingleBonds) { + oNSCounter++; + if(oNSCounter > 1 && adjList[idx].length + cAtom.getImplicitHydrogenCount() == 4) { + // set as marked and break out of connected atoms + if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.3", + idx, cAtom.getSymbol())); + isMarked = true; + break; + } + } + } + // if part of oxirane, aziridine and thiirane ring... [CONDITION 2.4] + for(int connectedInSphere2Idx : adjList[connectedIdx]) { + IAtom connectedInSphere2Atom = molecule.getAtom(connectedInSphere2Idx); + if(connectedInSphere2Atom.getAtomicNumber() == 6) { + for(int connectedInSphere3Idx : adjList[connectedInSphere2Idx]) { + IAtom connectedInSphere3Atom = molecule.getAtom(connectedInSphere3Idx); + if(connectedInSphere3Atom.equals(cAtom)) { + // set connected atoms as marked + if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.4", + connectedInSphere2Idx, connectedInSphere2Atom.getSymbol())); + if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.4", + connectedInSphere3Idx, connectedInSphere3Atom.getSymbol())); + markedAtoms.add(connectedInSphere2Idx); + markedAtoms.add(connectedInSphere3Idx); + // set current atom as marked and break out of connected atoms + if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 2.4", + idx, cAtom.getSymbol())); + isMarked = true; + break; + } + } + } + } + } + } + if(isMarked) { + markedAtoms.add(idx); + continue; + } + // if none of the conditions 2.X apply, we have an unmarked C (not relevant here) + } + // if H... + else if (atomicNr == 1){ + // convert to implicit H + IAtom connectedAtom; + try { + connectedAtom = molecule.getAtom(adjList[idx][0]); + } + catch(ArrayIndexOutOfBoundsException e) { + break; + } + + + if(connectedAtom.getImplicitHydrogenCount() == null) { + connectedAtom.setImplicitHydrogenCount(1); + } + else { + connectedAtom.setImplicitHydrogenCount(connectedAtom.getImplicitHydrogenCount() + 1); + } + continue; + } + // if heteroatom... (CONDITION 1) + else { + if(isDbg()) log.debug(String.format("Marking Atom #%d (%s) - Met condition 1", idx, cAtom.getSymbol())); + markedAtoms.add(idx); + continue; + } + } + if(isDbg()) log.debug(String.format("########## End of search. Marked %d/%d atoms. ##########", markedAtoms.size(), molecule.getAtomCount())); } /** * Searches the molecule for groups of connected marked atoms and extracts each as a new functional group. * The extraction process includes marked atom's "environments". Connected H's are captured implicitly. - * + * * @param molecule the molecule which contains the functional groups * @return a list of all functional groups (including "environments") extracted from the molecule */ private List extractGroups(IAtomContainer molecule) { - if(isDbg()) log.debug("########## Starting identification & extraction of functional groups... ##########"); - - environmentsMap = new HashMap>(molecule.getAtomCount());//Maps.newHashMapWithExpectedSize(molecule.getAtomCount()); - int[] atomIdxToFGMap = new int[molecule.getAtomCount()]; - Arrays.fill(atomIdxToFGMap, -1); - int fGroupIdx = -1; - - while(!markedAtoms.isEmpty()) { - // search for another functional group - fGroupIdx++; - - // get next markedAtom as the starting node for the search - int beginIdx = markedAtoms.iterator().next(); - if(isDbg()) log.debug(String.format("Searching new functional group from atom #%d (%s)...", beginIdx, molecule.getAtom(beginIdx).getSymbol())); - - // do a BFS from there - Queue queue = new ArrayDeque<>(); - queue.add(beginIdx); - - while(!queue.isEmpty()) { - int currentIdx = queue.poll(); - - // we are only interested in marked atoms that are not yet included in a group - if(!markedAtoms.contains(currentIdx)){ - continue; - } - - // if it isn't... - IAtom currentAtom = molecule.getAtom(currentIdx); - if(isDbg()) log.debug(String.format(" visiting marked atom: #%d (%s)", currentIdx, currentAtom.getSymbol())); - - // add its index to the functional group - atomIdxToFGMap[currentIdx] = fGroupIdx; - // also scratch the index from markedAtoms - markedAtoms.remove(currentIdx); - - // and take look at the connected atoms - List currentEnvironment = new ArrayList<>(); - for(int connectedIdx : adjList[currentIdx]) { - // add connected marked atoms to queue - if(markedAtoms.contains(connectedIdx)) { - queue.add(connectedIdx); - continue; - } - - // ignore already handled connected atoms - if(atomIdxToFGMap[connectedIdx] >= 0){ - continue; - } - - // add unmarked connected aromatic heteroatoms - IAtom connectedAtom = molecule.getAtom(connectedIdx); - if(isHeteroatom(connectedAtom) && connectedAtom.isAromatic()) { - if(isDbg()) log.debug(" added connected aromatic heteroatom " + connectedAtom.getSymbol()); - atomIdxToFGMap[connectedIdx] = fGroupIdx; - // note that this aromatic heteroatom has been added to a group - aromaticHeteroAtoms.put(connectedIdx, true); - } - - // add unmarked connected atoms to current marked atom's environment - IBond connectedBond = bondMap.get(currentIdx, connectedIdx); - - EnvironmentCalCType type; - if (connectedAtom.getAtomicNumber() == 6) { - if(connectedAtom.isAromatic()) - type = EnvironmentCalCType.C_AROMATIC; - else - type = EnvironmentCalCType.C_ALIPHATIC; - } - else { - // aromatic heteroatom, so just ignore - continue; - } - currentEnvironment.add(new EnvironmentalC(type, connectedBond, connectedBond.getBegin() == connectedAtom ? 0 : 1)); - } - environmentsMap.put(currentAtom, currentEnvironment); - - // debug logging - if(isDbg()) { - int cAromCount = 0, cAliphCount = 0; - for(EnvironmentalC comp : currentEnvironment) { - if(comp.getType() == EnvironmentCalCType.C_AROMATIC) - cAromCount++; - else if(comp.getType() == EnvironmentCalCType.C_ALIPHATIC) - cAliphCount++; - } - log.debug(String.format(" logged marked atom's environment: C_ar:%d, C_al:%d (and %d implicit hydrogens)", cAromCount, cAliphCount, currentAtom.getImplicitHydrogenCount())); - } - } - - if(isDbg()) log.debug(" search completed."); - } - - // also create FG for lone aromatic heteroatoms, not connected to a FG yet. - for(int atomIdx : aromaticHeteroAtoms.keySet()) { - if(!aromaticHeteroAtoms.get(atomIdx)) { - fGroupIdx++; - atomIdxToFGMap[atomIdx] = fGroupIdx; - if(isDbg()) log.debug("Created FG for lone aromatic heteroatom: " + molecule.getAtom(atomIdx).getSymbol()); - } - } - - List fGs = partitionIntoGroups(molecule, atomIdxToFGMap, fGroupIdx + 1); - - if(isDbg()) log.debug(String.format("########## Found & extracted %d functional groups. ##########", fGroupIdx + 1)); - return fGs; + if(isDbg()) log.debug("########## Starting identification & extraction of functional groups... ##########"); + + environmentsMap = new HashMap>(molecule.getAtomCount());//Maps.newHashMapWithExpectedSize(molecule.getAtomCount()); + int[] atomIdxToFGMap = new int[molecule.getAtomCount()]; + Arrays.fill(atomIdxToFGMap, -1); + int fGroupIdx = -1; + + while(!markedAtoms.isEmpty()) { + // search for another functional group + fGroupIdx++; + + // get next markedAtom as the starting node for the search + int beginIdx = markedAtoms.iterator().next(); + if(isDbg()) log.debug(String.format("Searching new functional group from atom #%d (%s)...", beginIdx, molecule.getAtom(beginIdx).getSymbol())); + + // do a BFS from there + Queue queue = new ArrayDeque<>(); + queue.add(beginIdx); + + while(!queue.isEmpty()) { + int currentIdx = queue.poll(); + + // we are only interested in marked atoms that are not yet included in a group + if(!markedAtoms.contains(currentIdx)){ + continue; + } + + // if it isn't... + IAtom currentAtom = molecule.getAtom(currentIdx); + if(isDbg()) log.debug(String.format(" visiting marked atom: #%d (%s)", currentIdx, currentAtom.getSymbol())); + + // add its index to the functional group + atomIdxToFGMap[currentIdx] = fGroupIdx; + // also scratch the index from markedAtoms + markedAtoms.remove(currentIdx); + + // and take look at the connected atoms + List currentEnvironment = new ArrayList<>(); + for(int connectedIdx : adjList[currentIdx]) { + // add connected marked atoms to queue + if(markedAtoms.contains(connectedIdx)) { + queue.add(connectedIdx); + continue; + } + + // ignore already handled connected atoms + if(atomIdxToFGMap[connectedIdx] >= 0){ + continue; + } + + // add unmarked connected aromatic heteroatoms + IAtom connectedAtom = molecule.getAtom(connectedIdx); + if(isHeteroatom(connectedAtom) && connectedAtom.isAromatic()) { + if(isDbg()) log.debug(" added connected aromatic heteroatom " + connectedAtom.getSymbol()); + atomIdxToFGMap[connectedIdx] = fGroupIdx; + // note that this aromatic heteroatom has been added to a group + aromaticHeteroAtoms.put(connectedIdx, true); + } + + // add unmarked connected atoms to current marked atom's environment + IBond connectedBond = bondMap.get(currentIdx, connectedIdx); + + EnvironmentCalCType type; + if (connectedAtom.getAtomicNumber() == 6) { + if(connectedAtom.isAromatic()) + type = EnvironmentCalCType.C_AROMATIC; + else + type = EnvironmentCalCType.C_ALIPHATIC; + } + else { + // aromatic heteroatom, so just ignore + continue; + } + currentEnvironment.add(new EnvironmentalC(type, connectedBond, connectedBond.getBegin() == connectedAtom ? 0 : 1)); + } + environmentsMap.put(currentAtom, currentEnvironment); + + // debug logging + if(isDbg()) { + int cAromCount = 0, cAliphCount = 0; + for(EnvironmentalC comp : currentEnvironment) { + if(comp.getType() == EnvironmentCalCType.C_AROMATIC) + cAromCount++; + else if(comp.getType() == EnvironmentCalCType.C_ALIPHATIC) + cAliphCount++; + } + log.debug(String.format(" logged marked atom's environment: C_ar:%d, C_al:%d (and %d implicit hydrogens)", cAromCount, cAliphCount, currentAtom.getImplicitHydrogenCount())); + } + } + + if(isDbg()) log.debug(" search completed."); + } + + // also create FG for lone aromatic heteroatoms, not connected to a FG yet. + for(int atomIdx : aromaticHeteroAtoms.keySet()) { + if(!aromaticHeteroAtoms.get(atomIdx)) { + fGroupIdx++; + atomIdxToFGMap[atomIdx] = fGroupIdx; + if(isDbg()) log.debug("Created FG for lone aromatic heteroatom: " + molecule.getAtom(atomIdx).getSymbol()); + } + } + + List fGs = partitionIntoGroups(molecule, atomIdxToFGMap, fGroupIdx + 1); + + if(isDbg()) log.debug(String.format("########## Found & extracted %d functional groups. ##########", fGroupIdx + 1)); + return fGs; } /** - * Generalizes the full environments of functional groups, providing a good balance between preserving + * Generalizes the full environments of functional groups, providing a good balance between preserving * meaningful detail and generalization. - * + * * @param fGroups the list of functional groups including "environments" */ private void expandGeneralizedEnvironments(List fGroups){ - if(isDbg()) log.debug("########## Starting generalization of functional groups... ##########"); - - for(IAtomContainer fGroup : fGroups) { - int atomCount = fGroup.getAtomCount(); - - if(isDbg()) log.debug(String.format("Generalizing functional group (%d atoms)...", atomCount)); - - // prechecking for special cases... - if(fGroup.getAtomCount() == 1) { - IAtom atom = fGroup.getAtom(0); - List environment = environmentsMap.get(atom); - - if(environment != null) { - int envCCount = environment.size(); - - // for H2N-C_env & HO-C_env -> do not replace H & C_env by R! - if((atom.getAtomicNumber() == 8 && envCCount == 1) - || (atom.getAtomicNumber() == 7 && envCCount == 1)){ - if(isDbg()) log.debug(String.format(" - found single atomic N or O FG with one env. C. Expanding environment...", atom.getSymbol())); - expandEnvironment(atom, fGroup); - - int hCount = atom.getImplicitHydrogenCount(); - if(hCount != 0) { - if(isDbg()) log.debug(String.format(" - adding %d hydrogens...", hCount)); - addHydrogens(atom, hCount, fGroup); - atom.setImplicitHydrogenCount(0); - } - continue; - } - // for HN-(C_env)-C_env & HS-C_env -> do not replace H by R! (only C_env!) - if((atom.getAtomicNumber() == 7 && envCCount == 2) - || (atom.getAtomicNumber() == 16 && envCCount == 1)) { - if(isDbg()) log.debug(" - found sec. amine or simple thiol"); - int hCount = atom.getImplicitHydrogenCount(); - if(hCount != 0) { - if(isDbg()) log.debug(String.format(" - adding %d hydrogens...", hCount)); - addHydrogens(atom, hCount, fGroup); - atom.setImplicitHydrogenCount(0); - } - if(isDbg()) log.debug(" - expanding environment..."); - expandEnvironmentGeneralized(atom, fGroup); - continue; - } - } - else if(isHeteroatom(atom)) { - int rAtomCount = atom.getValency(); - Integer hCount = atom.getImplicitHydrogenCount(); - if(hCount != null && hCount != 0) { - atom.setImplicitHydrogenCount(0); - } - String atomTypeName = atom.getAtomTypeName(); - if(isDbg()) log.debug(String.format(" - found single aromatic heteroatom (%s, Atomtype %s). Adding %d R-Atoms...", atom.getSymbol(), atomTypeName, rAtomCount)); - addRAtoms(atom, rAtomCount, fGroup); - continue; - } - } - - // get atoms to process - List fGroupAtoms = new ArrayList(fGroup.getAtomCount());//Lists.newArrayList(fGroup.atoms()); - fGroup.atoms().forEach(fGroupAtoms::add); - - // process atoms... - for(IAtom atom : fGroupAtoms) { - List environment = environmentsMap.get(atom); - - if(environment == null) { - if(atom.getImplicitHydrogenCount() != 0) { - atom.setImplicitHydrogenCount(0); - } - int rAtomCount = atom.getValency() - 1; - if(isDbg()) log.debug(String.format(" - found connected aromatic heteroatom (%s). Adding %d R-Atoms...", atom.getSymbol(), rAtomCount)); - addRAtoms(atom, rAtomCount, fGroup); - } - - // processing carbons... - if(atom.getAtomicNumber() == 6) { - if(atom.getProperty(CARBONYL_C_MARKER) == null) { - if(atom.getImplicitHydrogenCount() != 0) { - atom.setImplicitHydrogenCount(0); - } - if(isDbg()) log.debug(" - ignoring environment for marked carbon atom"); - continue; - } - else { - if(isDbg()) log.debug(" - found carbonyl-carbon. Expanding environment..."); - expandEnvironmentGeneralized(atom, fGroup); - continue; - } - } - // processing heteroatoms... - else { - if(isDbg()) log.debug(String.format(" - found heteroatom (%s). Expanding environment...", atom.getSymbol())); - expandEnvironmentGeneralized(atom, fGroup); - continue; - } - } - } - - if(isDbg()) log.debug("########## Generalization of functional groups completed. ##########"); + if(isDbg()) log.debug("########## Starting generalization of functional groups... ##########"); + + for(IAtomContainer fGroup : fGroups) { + int atomCount = fGroup.getAtomCount(); + + if(isDbg()) log.debug(String.format("Generalizing functional group (%d atoms)...", atomCount)); + + // prechecking for special cases... + if(fGroup.getAtomCount() == 1) { + IAtom atom = fGroup.getAtom(0); + List environment = environmentsMap.get(atom); + + if(environment != null) { + int envCCount = environment.size(); + + // for H2N-C_env & HO-C_env -> do not replace H & C_env by R! + if((atom.getAtomicNumber() == 8 && envCCount == 1) + || (atom.getAtomicNumber() == 7 && envCCount == 1)){ + if(isDbg()) log.debug(String.format(" - found single atomic N or O FG with one env. C. Expanding environment...", atom.getSymbol())); + expandEnvironment(atom, fGroup); + + int hCount = atom.getImplicitHydrogenCount(); + if(hCount != 0) { + if(isDbg()) log.debug(String.format(" - adding %d hydrogens...", hCount)); + addHydrogens(atom, hCount, fGroup); + atom.setImplicitHydrogenCount(0); + } + continue; + } + // for HN-(C_env)-C_env & HS-C_env -> do not replace H by R! (only C_env!) + if((atom.getAtomicNumber() == 7 && envCCount == 2) + || (atom.getAtomicNumber() == 16 && envCCount == 1)) { + if(isDbg()) log.debug(" - found sec. amine or simple thiol"); + int hCount = atom.getImplicitHydrogenCount(); + if(hCount != 0) { + if(isDbg()) log.debug(String.format(" - adding %d hydrogens...", hCount)); + addHydrogens(atom, hCount, fGroup); + atom.setImplicitHydrogenCount(0); + } + if(isDbg()) log.debug(" - expanding environment..."); + expandEnvironmentGeneralized(atom, fGroup); + continue; + } + } + else if(isHeteroatom(atom)) { + int rAtomCount = atom.getValency(); + Integer hCount = atom.getImplicitHydrogenCount(); + if(hCount != null && hCount != 0) { + atom.setImplicitHydrogenCount(0); + } + String atomTypeName = atom.getAtomTypeName(); + if(isDbg()) log.debug(String.format(" - found single aromatic heteroatom (%s, Atomtype %s). Adding %d R-Atoms...", atom.getSymbol(), atomTypeName, rAtomCount)); + addRAtoms(atom, rAtomCount, fGroup); + continue; + } + } + + // get atoms to process + List fGroupAtoms = new ArrayList(fGroup.getAtomCount());//Lists.newArrayList(fGroup.atoms()); + fGroup.atoms().forEach(fGroupAtoms::add); + + // process atoms... + for(IAtom atom : fGroupAtoms) { + List environment = environmentsMap.get(atom); + + if(environment == null) { + if(atom.getImplicitHydrogenCount() != 0) { + atom.setImplicitHydrogenCount(0); + } + int rAtomCount = atom.getValency() - 1; + if(isDbg()) log.debug(String.format(" - found connected aromatic heteroatom (%s). Adding %d R-Atoms...", atom.getSymbol(), rAtomCount)); + addRAtoms(atom, rAtomCount, fGroup); + } + + // processing carbons... + if(atom.getAtomicNumber() == 6) { + if(atom.getProperty(CARBONYL_C_MARKER) == null) { + if(atom.getImplicitHydrogenCount() != 0) { + atom.setImplicitHydrogenCount(0); + } + if(isDbg()) log.debug(" - ignoring environment for marked carbon atom"); + continue; + } + else { + if(isDbg()) log.debug(" - found carbonyl-carbon. Expanding environment..."); + expandEnvironmentGeneralized(atom, fGroup); + continue; + } + } + // processing heteroatoms... + else { + if(isDbg()) log.debug(String.format(" - found heteroatom (%s). Expanding environment...", atom.getSymbol())); + expandEnvironmentGeneralized(atom, fGroup); + continue; + } + } + } + + if(isDbg()) log.debug("########## Generalization of functional groups completed. ##########"); } - /** - * Expands the full environments of functional groups, converted into atoms and bonds. - * - * @param fGroups the list of functional groups including "environments" - */ - private void expandFullEnvironments(List fGroups) { - if(isDbg()) log.debug("########## Starting expansion of full environments for functional groups... ##########"); - - for(IAtomContainer fGroup : fGroups) { - int atomCount = fGroup.getAtomCount(); - if(isDbg()) log.debug(String.format("Expanding environment on functional group (%d atoms)...", atomCount)); - - for(int i = 0; i < atomCount; i++) { - IAtom atom = fGroup.getAtom(i); - - if(isDbg()) log.debug(String.format(" - Atom #%d:% - Expanding environment...", i)); - expandEnvironment(atom, fGroup); - - int hCount = atom.getImplicitHydrogenCount(); - if(hCount != 0) { - if(isDbg()) log.debug(String.format(" - adding %d hydrogens...", hCount)); - addHydrogens(atom, hCount, fGroup); - atom.setImplicitHydrogenCount(0); - } - } - } - - if(isDbg()) log.debug("########## Expansion of full environments for functional groups completed. ##########"); + /** + * Expands the full environments of functional groups, converted into atoms and bonds. + * + * @param fGroups the list of functional groups including "environments" + */ + private void expandFullEnvironments(List fGroups) { + if(isDbg()) log.debug("########## Starting expansion of full environments for functional groups... ##########"); + + for(IAtomContainer fGroup : fGroups) { + int atomCount = fGroup.getAtomCount(); + if(isDbg()) log.debug(String.format("Expanding environment on functional group (%d atoms)...", atomCount)); + + for(int i = 0; i < atomCount; i++) { + IAtom atom = fGroup.getAtom(i); + + if(isDbg()) log.debug(String.format(" - Atom #%d:% - Expanding environment...", i)); + expandEnvironment(atom, fGroup); + + int hCount = atom.getImplicitHydrogenCount(); + if(hCount != 0) { + if(isDbg()) log.debug(String.format(" - adding %d hydrogens...", hCount)); + addHydrogens(atom, hCount, fGroup); + atom.setImplicitHydrogenCount(0); + } + } + } + + if(isDbg()) log.debug("########## Expansion of full environments for functional groups completed. ##########"); } - + private void expandEnvironment(IAtom atom, IAtomContainer container) { - List environment = environmentsMap.get(atom); - - if(environment == null || environment.isEmpty()) { - if(isDbg()) log.debug(" found no environment to expand."); - return; - } - - int cAromCount = 0, cAliphCount = 0; - for(EnvironmentalC envC : environment) { - IAtom cAtom = atom.getBuilder().newInstance(IAtom.class, "C"); + List environment = environmentsMap.get(atom); + + if(environment == null || environment.isEmpty()) { + if(isDbg()) log.debug(" found no environment to expand."); + return; + } + + int cAromCount = 0, cAliphCount = 0; + for(EnvironmentalC envC : environment) { + IAtom cAtom = atom.getBuilder().newInstance(IAtom.class, "C"); cAtom.setAtomTypeName("C"); cAtom.setImplicitHydrogenCount(0); - if(envC.getType() == EnvironmentCalCType.C_AROMATIC) { - cAtom.setIsAromatic(true); - cAromCount++; - } - else { - cAliphCount++; - } - - IBond bond = envC.createBond(atom, cAtom); - - container.addAtom(cAtom); - container.addBond(bond); - } - - if(isDbg()) log.debug(String.format(" expanded environment: %dx C_ar and %dx C_al", cAromCount, cAliphCount)); + if(envC.getType() == EnvironmentCalCType.C_AROMATIC) { + cAtom.setIsAromatic(true); + cAromCount++; + } + else { + cAliphCount++; + } + + IBond bond = envC.createBond(atom, cAtom); + + container.addAtom(cAtom); + container.addBond(bond); + } + + if(isDbg()) log.debug(String.format(" expanded environment: %dx C_ar and %dx C_al", cAromCount, cAliphCount)); } - + // only call this on marked heteroatoms / carbonyl-C's! private void expandEnvironmentGeneralized(IAtom atom, IAtomContainer container) { - - List environment = environmentsMap.get(atom); - - if(environment == null) { - if(isDbg()) log.debug(" found no environment to expand."); - return; - } - - int rAtomCount = environment.size(); - int rAtomsForCCount = rAtomCount; - if(atom.getAtomicNumber() == 8 && atom.getImplicitHydrogenCount() == 1) { - addHydrogens(atom, 1, container); - atom.setImplicitHydrogenCount(0); - if(isDbg()) log.debug(" expanded hydrogen on connected OH-Group"); - } - else if(isHeteroatom(atom)) rAtomCount += atom.getImplicitHydrogenCount(); - addRAtoms(atom, rAtomCount, container); - - if(atom.getImplicitHydrogenCount() != 0) { - atom.setImplicitHydrogenCount(0); - } - - if(isDbg()) log.debug(String.format(" expanded environment: %dx R-atom (incl. %d for H replacement)", rAtomCount, rAtomCount - rAtomsForCCount)); + + List environment = environmentsMap.get(atom); + + if(environment == null) { + if(isDbg()) log.debug(" found no environment to expand."); + return; + } + + int rAtomCount = environment.size(); + int rAtomsForCCount = rAtomCount; + if(atom.getAtomicNumber() == 8 && atom.getImplicitHydrogenCount() == 1) { + addHydrogens(atom, 1, container); + atom.setImplicitHydrogenCount(0); + if(isDbg()) log.debug(" expanded hydrogen on connected OH-Group"); + } + else if(isHeteroatom(atom)) rAtomCount += atom.getImplicitHydrogenCount(); + addRAtoms(atom, rAtomCount, container); + + if(atom.getImplicitHydrogenCount() != 0) { + atom.setImplicitHydrogenCount(0); + } + + if(isDbg()) log.debug(String.format(" expanded environment: %dx R-atom (incl. %d for H replacement)", rAtomCount, rAtomCount - rAtomsForCCount)); } - + private static final boolean isHeteroatom(IAtom atom) { - int atomicNr = atom.getAtomicNumber(); - return atomicNr != 1 && atomicNr != 6; + int atomicNr = atom.getAtomicNumber(); + return atomicNr != 1 && atomicNr != 6; } - + private final boolean isNonmetal(IAtom atom) { - return nonmetalAtomicNumbers.contains(atom.getAtomicNumber()); + return nonmetalAtomicNumbers.contains(atom.getAtomicNumber()); } - + private void addHydrogens(IAtom atom, int number, IAtomContainer container) { - for(int i = 0; i < number; i++) { - IAtom hydrogen = atom.getBuilder().newInstance(IAtom.class, "H"); + for(int i = 0; i < number; i++) { + IAtom hydrogen = atom.getBuilder().newInstance(IAtom.class, "H"); hydrogen.setAtomTypeName("H"); hydrogen.setImplicitHydrogenCount(0); - + container.addAtom(hydrogen); container.addBond(atom.getBuilder().newInstance(IBond.class, atom, hydrogen, Order.SINGLE)); - } + } } - + private void addRAtoms(IAtom atom, int number, IAtomContainer container) { - for(int i = 0; i < number; i++) { - IPseudoAtom rAtom = atom.getBuilder().newInstance(IPseudoAtom.class, "R"); - rAtom.setAttachPointNum(1); - rAtom.setImplicitHydrogenCount(0); - - container.addAtom(rAtom); - container.addBond(atom.getBuilder().newInstance(IBond.class, atom, rAtom, Order.SINGLE)); - } + for(int i = 0; i < number; i++) { + IPseudoAtom rAtom = atom.getBuilder().newInstance(IPseudoAtom.class, "R"); + rAtom.setAttachPointNum(1); + rAtom.setImplicitHydrogenCount(0); + + container.addAtom(rAtom); + container.addBond(atom.getBuilder().newInstance(IBond.class, atom, rAtom, Order.SINGLE)); + } } - + private List partitionIntoGroups(IAtomContainer sourceContainer, int[] atomIdxToFGMap, int fGroupCount) { - List groups = new ArrayList<>(fGroupCount); - for(int i = 0; i < fGroupCount; i++) { - groups.add(sourceContainer.getBuilder().newInstance(IAtomContainer.class)); - } - - Map atomtoFGMap = new HashMap(sourceContainer.getAtomCount());//Maps.newHashMapWithExpectedSize(sourceContainer.getAtomCount()); - - // atoms - for(int atomIdx = 0; atomIdx < sourceContainer.getAtomCount(); atomIdx++) { - int fGroupId = atomIdxToFGMap[atomIdx]; - - if(fGroupId == -1) { - continue; - } - - IAtom atom = sourceContainer.getAtom(atomIdx); - IAtomContainer myGroup = groups.get(fGroupId); - myGroup.addAtom(atom); - atomtoFGMap.put(atom, myGroup); - } - - // bonds - for(IBond bond : sourceContainer.bonds()) { - IAtomContainer beginGroup = atomtoFGMap.get(bond.getBegin()); - IAtomContainer endGroup = atomtoFGMap.get(bond.getEnd()); - - if(beginGroup == null || endGroup == null || beginGroup != endGroup) - continue; - - beginGroup.addBond(bond); - } - - // single electrons - for (ISingleElectron electron : sourceContainer.singleElectrons()) { - IAtomContainer group = atomtoFGMap.get(electron.getAtom()); - if(group != null) - group.addSingleElectron(electron); - } - - // lone pairs + List groups = new ArrayList<>(fGroupCount); + for(int i = 0; i < fGroupCount; i++) { + groups.add(sourceContainer.getBuilder().newInstance(IAtomContainer.class)); + } + + Map atomtoFGMap = new HashMap(sourceContainer.getAtomCount());//Maps.newHashMapWithExpectedSize(sourceContainer.getAtomCount()); + + // atoms + for(int atomIdx = 0; atomIdx < sourceContainer.getAtomCount(); atomIdx++) { + int fGroupId = atomIdxToFGMap[atomIdx]; + + if(fGroupId == -1) { + continue; + } + + IAtom atom = sourceContainer.getAtom(atomIdx); + IAtomContainer myGroup = groups.get(fGroupId); + myGroup.addAtom(atom); + atomtoFGMap.put(atom, myGroup); + } + + // bonds + for(IBond bond : sourceContainer.bonds()) { + IAtomContainer beginGroup = atomtoFGMap.get(bond.getBegin()); + IAtomContainer endGroup = atomtoFGMap.get(bond.getEnd()); + + if(beginGroup == null || endGroup == null || beginGroup != endGroup) + continue; + + beginGroup.addBond(bond); + } + + // single electrons + for (ISingleElectron electron : sourceContainer.singleElectrons()) { + IAtomContainer group = atomtoFGMap.get(electron.getAtom()); + if(group != null) + group.addSingleElectron(electron); + } + + // lone pairs for (ILonePair lonePair : sourceContainer.lonePairs()) { - IAtomContainer group = atomtoFGMap.get(lonePair.getAtom()); - if(group != null) - group.addLonePair(lonePair); + IAtomContainer group = atomtoFGMap.get(lonePair.getAtom()); + if(group != null) + group.addLonePair(lonePair); } - - return groups; + + return groups; } - + private boolean isDbg() { - return log.isDebugEnabled(); + return log.isDebugEnabled(); } - + private boolean checkConstraints(IAtomContainer molecule) { - for(IAtom atom : molecule.atoms()) { - if(atom.getFormalCharge() != null && atom.getFormalCharge() != 0) { - throw new IllegalArgumentException("Input molecule must not contain any charges."); - } - if(!isNonmetal(atom)) { - throw new IllegalArgumentException("Input molecule must not contain metals or metalloids."); - } - if(atom.getImplicitHydrogenCount() == null) { - atom.setImplicitHydrogenCount(0); - } - } - - ConnectedComponents cc = new ConnectedComponents(adjList); - if(cc.nComponents() != 1) { - throw new IllegalArgumentException("Input molecule must consist of only a single connected stucture."); - } - - return true; + for(IAtom atom : molecule.atoms()) { + if(atom.getFormalCharge() != null && atom.getFormalCharge() != 0) { + throw new IllegalArgumentException("Input molecule must not contain any charges."); + } + if(!isNonmetal(atom)) { + throw new IllegalArgumentException("Input molecule must not contain metals or metalloids."); + } + if(atom.getImplicitHydrogenCount() == null) { + atom.setImplicitHydrogenCount(0); + } + } + + ConnectedComponents cc = new ConnectedComponents(adjList); + if(cc.nComponents() != 1) { + throw new IllegalArgumentException("Input molecule must consist of only a single connected stucture."); + } + + return true; } -} \ No newline at end of file +} diff --git a/src/main/java/org/openscience/cdk/tools/ErtlFunctionalGroupsFinderUtility.java b/src/main/java/org/openscience/cdk/tools/ErtlFunctionalGroupsFinderUtility.java index c7b6dd3..1439a60 100644 --- a/src/main/java/org/openscience/cdk/tools/ErtlFunctionalGroupsFinderUtility.java +++ b/src/main/java/org/openscience/cdk/tools/ErtlFunctionalGroupsFinderUtility.java @@ -1,22 +1,23 @@ /* - * Utilities for ErtlFunctionalGroupsFinder for CDK - * Copyright (C) 2022 Jonas Schaub - * - * Source code is available at - * + * ErtlFunctionalGroupsFinder for CDK + * Copyright (c) 2023 Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny + * + * Source code is available at + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ + package org.openscience.cdk.tools; import org.openscience.cdk.CDKConstants; @@ -997,11 +998,11 @@ private static String getIDForLogging(IAtomContainer aMolecule) throws NullPoint String tmpCdkTitle = aMolecule.getProperty(CDKConstants.TITLE); String tmpTitle = aMolecule.getTitle(); String tmpID = aMolecule.getID(); - if (!Objects.isNull(tmpCdkTitle) && !tmpCdkTitle.equals("")) { + if (!Objects.isNull(tmpCdkTitle) && !tmpCdkTitle.isEmpty()) { return "CDK title: " + tmpCdkTitle; - } else if (!Objects.isNull(tmpTitle) && !tmpTitle.equals("")) { + } else if (!Objects.isNull(tmpTitle) && !tmpTitle.isEmpty()) { return "Title: " + tmpTitle; - } else if (!Objects.isNull(tmpID) && !tmpID.equals("")) { + } else if (!Objects.isNull(tmpID) && !tmpID.isEmpty()) { return "ID: " + tmpID; } else { return "No title or id could be determined."; diff --git a/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderEvaluationTest.java b/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderEvaluationTest.java index 35bdb66..6353746 100644 --- a/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderEvaluationTest.java +++ b/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderEvaluationTest.java @@ -1,9 +1,8 @@ -/** - * Evaluation tests for +/* * ErtlFunctionalGroupsFinder for CDK - * Copyright (C) 2019 Jonas Schaub + * Copyright (c) 2023 Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny * - * Source code is available at + * Source code is available at * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -18,23 +17,8 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -package org.openscience.cdk.tools.test; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; +package org.openscience.cdk.tools.test; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assumptions; @@ -63,81 +47,97 @@ import org.openscience.cdk.smiles.SmiFlavor; import org.openscience.cdk.smiles.SmilesGenerator; import org.openscience.cdk.smiles.SmilesParser; +import org.openscience.cdk.tools.CDKHydrogenAdder; +import org.openscience.cdk.tools.ErtlFunctionalGroupsFinder; import org.openscience.cdk.tools.ErtlFunctionalGroupsFinder.Mode; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; -import org.openscience.cdk.tools.ErtlFunctionalGroupsFinder; -import org.openscience.cdk.tools.CDKHydrogenAdder; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; /** * This test class can be used to read an SD file containing chemical structures, to extract their functional groups using - * the ErtlFunctionalGroupsFinder with different settings (i.e. electron donation model and cycle finder algorithm), and write + * the ErtlFunctionalGroupsFinder with different settings (i.e. electron donation model and cycle finder algorithm), and write * the functional groups with their associated frequency under the given settings in this SD file to a CSV file. *

* To run correctly the constant SD_FILE_PATH must be set to where to find the specific file on the local system. *

* All written files will be placed in a new folder in the same directory as the read SD file. *

- * Note for addition of new tests: Only one SD file should be analyzed per test method (since some mechanisms work under + * Note for addition of new tests: Only one SD file should be analyzed per test method (since some mechanisms work under * that assumption). - * + * * @author Jonas Schaub * @version 1.0.0.1 */ public class ErtlFunctionalGroupsFinderEvaluationTest { - + // - + // /** * Path to SD file that should be analyzed */ private static final String SD_FILE_PATH = "...\\ChEBI_lite_3star_subset.sdf"; - + /** * Directory for output files; Will be created as sub-folder in the working directory (the directory of the read SD file) */ private static final String OUTPUT_FOLDER_FROM_WORKING_DIRECTORY = "ErtlFunctionalGroupsFinderEvaluationTest_Output"; - + /** * Format of the time stamp addition to all written output files */ private static final String DATE_TIME_FORMAT_PATTERN = "uuuu_MM_dd_HH_mm"; - + /** * Separator for file name segments (test identifier, file name, time stamp) */ private static final String FILE_NAME_ADDITION_SEPERATOR = "_"; - + // /** * Identifier string for SD file electron donation test that counts multiples */ private static final String ELECTRON_DONATION_TEST_IDENTIFIER = "ElectronDonationTest"; - + /** * Identifier string for SD file electron donation test that does not count multiples per molecule */ private static final String ELECTRON_DONATION_NO_MULTIPLES_TEST_IDENTIFIER = "ElectrondDonationNoMultiplesTest"; - + /** * Identifier string for test of different CycleFinder settings */ private static final String CYCLE_FINDER_TEST_IDENTIFIER = "CycleFinderTest"; // - + // /** * Name of file for logging occurred exceptions with causing molecules */ private static final String EXCEPTIONS_LOG_FILE_NAME = "Exceptions_Log"; - + /** * File type of exceptions log file */ private static final String EXCEPTIONS_LOG_FILE_TYPE = ".txt"; - + /** * First lines in the exceptions log file */ @@ -147,322 +147,322 @@ public class ErtlFunctionalGroupsFinderEvaluationTest { + "You need to be looking at an earlier stack trace to see the details.)" + System.getProperty("line.separator"); // - + // /** * Name of file for logging filtered molecules */ private static final String FILTERED_MOLECULES_FILE_NAME = "Filtered_Molecules"; - + /** * File type of filtered molecules log file */ private static final String FILTERED_MOLECULES_FILE_TYPE = ".txt"; - + /** * First line in the filtered molecules log file */ private static final String FILTERED_MOLECULES_FILE_HEADER = "Following molecules were filtered:\n"; // - + // /** * Name of file for writing resulting functional groups and frequencies to (output file) */ private static final String OUTPUT_FILE_NAME = "Functional_Groups_Frequencies"; - + /** * File type of output file */ private static final String OUTPUT_FILE_TYPE = ".csv"; - + /** * Key for the output file's header under which to store the unique SMILES code of a functional group */ private static final String SMILES_CODE_KEY = "smiles"; - + /** * Key for the output file's header under which to store the pseudo SMILES code of a functional group */ private static final String PSEUDO_SMILES_CODE_KEY = "pseudoSmiles"; - + /** * Key for the output file's header under which to store the hash code of a functional group */ private static final String HASH_CODE_KEY = "hashCode"; - + /** - * Key for the output file's header under which to store the frequency of a functional group when using the + * Key for the output file's header under which to store the frequency of a functional group when using the * cdk electron donation model (and internally for the master HashMap's inner maps) */ private static final String CDK_MODEL_SETTINGS_KEY = "cdk"; - + /** - * Key for the output file's header under which to store the frequency of a functional group when using the + * Key for the output file's header under which to store the frequency of a functional group when using the * daylight electron donation model (and internally for the master HashMap's inner maps) */ private static final String DAYLIGHT_MODEL_SETTINGS_KEY = "daylight"; - + /** - * Key for the output file's header under which to store the frequency of a functional group when using the + * Key for the output file's header under which to store the frequency of a functional group when using the * pi bonds electron donation model (and internally for the master HashMap's inner maps) */ private static final String PIBONDS_MODEL_SETTINGS_KEY = "piBonds"; - + /** - * Key for the output file's header under which to store the frequency of a functional group when using the + * Key for the output file's header under which to store the frequency of a functional group when using the * cdk allowing exocyclic electron donation model (and internally for the master HashMap's inner maps) */ private static final String CDK_EXOCYCLIC_MODEL_SETTINGS_KEY = "cdkExocyclic"; - + /** - * Key for the output file's header under which to store the frequency of a functional group when using the + * Key for the output file's header under which to store the frequency of a functional group when using the * cdk legacy aromaticity model (and internally for the master HashMap's inner maps) */ private static final String CDK_LEGACY_MODEL_SETTINGS_KEY = "cdkLegacy"; - + /** - * This string will be added to an original settings key when using the ErtlFunctionalGroupsFinder with activated + * This string will be added to an original settings key when using the ErtlFunctionalGroupsFinder with activated * generalization */ private static final String GENERALIZATION_SETTINGS_KEY_ADDITION = "Generalized"; - + /** - * Key for the master HashMap's inner maps under which to store the ChEBI or ChEMBL id/name or CDK title of an exemplary + * Key for the master HashMap's inner maps under which to store the ChEBI or ChEMBL id/name or CDK title of an exemplary * molecule that contains this functional group */ private static final String MOLECULE_OF_ORIGIN_KEY = "origin"; - + /** * Separator for the output file's values */ private static final String OUTPUT_FILE_SEPERATOR = ","; - + /** - * Placeholder String for every functional group's SMILES code whose real SMILES representation could not be + * Placeholder String for every functional group's SMILES code whose real SMILES representation could not be * generated */ private static final String SMILES_CODE_PLACEHOLDER = "[SMILES code could not be created]"; - + /** * Placeholder String for 'parent' molecules' IDs, assigned in case they could not be read */ private static final String MOLECULE_OF_ORIGIN_ID_PLACEHOLDER = "[no id for molecule of origin]"; // // - + // /** * Key for setting an IAtomContainer's property that specifies if the molecule consisted of one or more unconnected * structures and the biggest of these structures was selected in preprocessing */ private static final String BIGGEST_FRAGMENT_SELECTED_PROPERTY_KEY = "biggest_fragment_was_selected"; - + /** * Key for setting an IAtomContainer's property that specifies if the molecule contained charged atoms and these * charges were neutralized in preprocessing */ private static final String CHARGES_NEUTRALIZED_PROPERTY_KEY = "charges_were_neutralized"; - + /** * Key for setting an IAtomContainers property that specifies if the molecule must be filtered */ private static final String MOLECULE_MUST_BE_FILTERED_PROPERTY_KEY = "molecule_must_be_filtered"; - + /** * Key for setting an IAtomContainers property that specifies why the molecule was or has to be filtered */ private static final String CAUSE_FOR_FILTERING_PROPERTY_KEY = "filtering_cause"; - + /** * Message specifying that the atom or bond count of a molecule is zero */ private static final String ATOM_OR_BOND_COUNT_ZERO = "Atom or bond count 0"; - + /** * Message specifying that a molecule contains not allowed atomic numbers */ private static final String FORBIDDEN_ATOMIC_NUMBER = "Contains one or more metal, metalloid or \"R\" atoms"; - + /** - * All allowed atomic numbers to pass to the ErtlFunctionalGroupsFinder; + * All allowed atomic numbers to pass to the ErtlFunctionalGroupsFinder; * String will be split and resulting integers passed to a set */ private static final String NON_METALLIC_ATOMIC_NUMBERS = "1,2,6,7,8,9,10,15,16,17,18,34,35,36,53,54,86"; - + /** - * This string will be added to an original settings key (only for exception logging) when a molecule consists of two or + * This string will be added to an original settings key (only for exception logging) when a molecule consists of two or * more unconnected structures and the biggest one is chosen for analysis in the preprocessing */ private static final String FRAGMENT_SELECTED_SETTINGS_KEY_ADDITION = "(biggest fragment selected)"; - + /** * This string will be added to an original settings key (only for exception logging) when a molecule contains charged atoms * and these charges are neutralized in the preprocessing */ private static final String NEUTRALIZED_SETTINGS_KEY_ADDITION = "(neutralized)"; // - + // /** * Pseudo SMILES representation of an aromatic C atom */ private static final String PSEUDO_SMILES_AROMATIC_CARBON = "C*"; - + /** * Pseudo SMILES representation of an aromatic N atom */ private static final String PSEUDO_SMILES_AROMATIC_NITROGEN = "N*"; - + /** * Pseudo SMILES representation of an aromatic S atom */ private static final String PSEUDO_SMILES_AROMATIC_SULPHUR = "S*"; - + /** * Pseudo SMILES representation of an aromatic O atom */ private static final String PSEUDO_SMILES_AROMATIC_OXYGEN = "O*"; - + /** * Pseudo SMILES representation of an aromatic Se atom */ private static final String PSEUDO_SMILES_AROMATIC_SELENIUM = "Se*"; - + /** * Pseudo SMILES representation of an aromatic P atom */ private static final String PSEUDO_SMILES_AROMATIC_PHOSPHOR = "P*"; - + /** * Pseudo SMILES representation of an undefined pseudo atom */ private static final String PSEUDO_SMILES_R_ATOM = "R"; // - + /** * Type of the generated SMILES codes */ private static final int SMILES_GENERATOR_OUTPUT_MODE = SmiFlavor.Unique; - + /** - * Initial capacity of the master HashMap (where all data is written to); May be adjusted when analyzing larger + * Initial capacity of the master HashMap (where all data is written to); May be adjusted when analyzing larger * SD files */ private static final int MASTER_HASHMAP_INITIAL_CAPACITY = 1000; - + /** - * Initial capacity of the master HashMap's inner maps that store the frequencies for different settings for a + * Initial capacity of the master HashMap's inner maps that store the frequencies for different settings for a * specific functional group */ private static final int INNER_HASHMAPS_INITIAL_CAPACITY = 20; - + /** * Load factor of the master HashMap */ private static final float MASTER_HASHMAP_LOAD_FACTOR = 0.9f; // - + // /** * Directory for all produced files; It will be the directory where th SD file that is analyzed was loaded from */ private String outputDirectory; - + /** * Counts all occurring exceptions in one test */ private int exceptionsCounter; - + /** - * True if the filtered molecules were logged in the filtered molecules log file; This is only necessary in the + * True if the filtered molecules were logged in the filtered molecules log file; This is only necessary in the * first iteration since the applied filters are the same in every iteration (assuming that in a single test * only one SD file is analyzed) */ private boolean areFilteredMoleculesLogged; - + /** * True if all operations in initialize() were successful and the test is able to run */ private boolean isTestAbleToRun; - + /** - * True if the currently running test requires files to be loaded and files to log exceptions or filtered molecules + * True if the currently running test requires files to be loaded and files to log exceptions or filtered molecules * or the produced results and therefore the PrintWriter objects have been initialized */ private boolean areFileOperationsActivated; - + /** * PrintWriter for logging exceptions */ private PrintWriter exceptionsPrintWriter; - + /** * PrintWriter for logging filtered molecules */ private PrintWriter filteredMoleculesPrintWriter; - + /** * PrintWriter for writing the resulting functional groups and frequencies */ private PrintWriter dataOutputPrintWriter; - + /** * SmilesGenerator for generating SMILES and pseudo SMILES representations */ private SmilesGenerator smilesGenerator; - + /** * MoleculeHashGenerator for easy assessment whether a functional group was already entered into the master HashMap */ private MoleculeHashGenerator molHashGenerator; - + /** * Instance of the ErtlFunctionalGroupsFinder with generalization turned off */ private ErtlFunctionalGroupsFinder ertlFGFinderGenOff; - + /** * Instance of the ErtlFunctionalGroupsFinder with generalization turned on */ private ErtlFunctionalGroupsFinder ertlFGFinderGenOn; - + /** - * Master HashMap for storing results; Its keys are the hash codes produced by the MoleculeHashGenerator for the - * functional groups and its values are inner HashMaps that hold the (pseudo) SMILES representation of a functional + * Master HashMap for storing results; Its keys are the hash codes produced by the MoleculeHashGenerator for the + * functional groups and its values are inner HashMaps that hold the (pseudo) SMILES representation of a functional * group and its frequencies for different settings as String-Object pairs, plus an exemplary molecule of origin */ private HashMap masterHashMap; - + /** * A map that gives a certain element symbol for a placeholder atom marking a specific aromatic atom in pseudo SMILES * creation */ private HashMap pseudoSmilesAromaticElementToPlaceholderElementMap; - + /** - * A map that gives the pseudo SMILES representation for a specific placeholder element from + * A map that gives the pseudo SMILES representation for a specific placeholder element from * pseudoSmilesAromaticElementToPlaceholderElementMap */ private HashMap pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap; - + /** * A list for storing all used settings keys in a test */ private List settingsKeysList; - + /** - * All allowed atomic numbers to pass to the ErtlFunctionalGroupsFinder as a set of integers (will be parsed from + * All allowed atomic numbers to pass to the ErtlFunctionalGroupsFinder as a set of integers (will be parsed from * NON_METALLIC_ATOMIC_NUMBERS) */ private Set nonMetallicAtomicNumbersSet; // - + // /** * Constructor *

- * Note: it does not initialize any class variables (except 5) because that would be unnecessary when it is called by a + * Note: it does not initialize any class variables (except 5) because that would be unnecessary when it is called by a * test method inherited from CDKTestCase; these initializations are done by initialize(). */ public ErtlFunctionalGroupsFinderEvaluationTest() { @@ -474,12 +474,12 @@ public ErtlFunctionalGroupsFinderEvaluationTest() { this.dataOutputPrintWriter = null; } // - + // - + // /** - * Test for analyzing molecules in an SD file for all four different electron donation models supplied by the cdk: + * Test for analyzing molecules in an SD file for all four different electron donation models supplied by the cdk: * daylight, cdk, piBonds, cdkAllowingExocyclic and the aromaticity model cdkLegacy. *

* (Functional groups occurring multiple times in the same molecule are counted multiple times) @@ -488,13 +488,13 @@ public ErtlFunctionalGroupsFinderEvaluationTest() { */ @Test public void testElectronDonationDependency() throws Exception { - this.analyzeElectronDonationDependency(ErtlFunctionalGroupsFinderEvaluationTest.SD_FILE_PATH, - ErtlFunctionalGroupsFinderEvaluationTest.ELECTRON_DONATION_TEST_IDENTIFIER, + this.analyzeElectronDonationDependency(ErtlFunctionalGroupsFinderEvaluationTest.SD_FILE_PATH, + ErtlFunctionalGroupsFinderEvaluationTest.ELECTRON_DONATION_TEST_IDENTIFIER, true); } - + /** - * Test for analyzing molecules in an SD file for all four different electron donation models supplied by the cdk: + * Test for analyzing molecules in an SD file for all four different electron donation models supplied by the cdk: * daylight, cdk, piBonds, cdkAllowingExocyclic and the aromaticity model cdkLegacy. *

* Difference to testElectronDonationDependency(): If the same functional group occurs multiple times in the same molecule @@ -504,13 +504,13 @@ public void testElectronDonationDependency() throws Exception { */ @Test public void testElectronDonationDependencyNoMultiples() throws Exception { - this.analyzeElectronDonationDependency(ErtlFunctionalGroupsFinderEvaluationTest.SD_FILE_PATH, - ErtlFunctionalGroupsFinderEvaluationTest.ELECTRON_DONATION_NO_MULTIPLES_TEST_IDENTIFIER, + this.analyzeElectronDonationDependency(ErtlFunctionalGroupsFinderEvaluationTest.SD_FILE_PATH, + ErtlFunctionalGroupsFinderEvaluationTest.ELECTRON_DONATION_NO_MULTIPLES_TEST_IDENTIFIER, false); } - + /** - * Test for analyzing molecules in an SD file for six different CycleFinder settings supplied by the cdk: all(), + * Test for analyzing molecules in an SD file for six different CycleFinder settings supplied by the cdk: all(), * vertexShort(), relevant(), essential(), tripleShort() and cdkAromaticSet(). *

* (Functional groups occurring multiple times in the same molecule are counted multiple times) @@ -519,10 +519,10 @@ public void testElectronDonationDependencyNoMultiples() throws Exception { */ @Test public void testCycleFinderDependency() throws Exception { - this.initializeWithFileOperations(ErtlFunctionalGroupsFinderEvaluationTest.SD_FILE_PATH, + this.initializeWithFileOperations(ErtlFunctionalGroupsFinderEvaluationTest.SD_FILE_PATH, ErtlFunctionalGroupsFinderEvaluationTest.CYCLE_FINDER_TEST_IDENTIFIER); Assumptions.assumeTrue(this.isTestAbleToRun); - + System.out.println("\nLoading file with path: " + ErtlFunctionalGroupsFinderEvaluationTest.SD_FILE_PATH); File tmpSDFile = new File(ErtlFunctionalGroupsFinderEvaluationTest.SD_FILE_PATH); int tmpRequiredNumberOfReaders = 6; @@ -538,23 +538,23 @@ public void testCycleFinderDependency() throws Exception { Assumptions.assumeTrue(false); return; } - + Aromaticity tmpDaylightModelAll = new Aromaticity(ElectronDonation.daylight(), Cycles.all()); Aromaticity tmpDaylightModelVertexShort = new Aromaticity(ElectronDonation.daylight(), Cycles.vertexShort()); Aromaticity tmpDaylightModelRelevant = new Aromaticity(ElectronDonation.daylight(), Cycles.relevant()); Aromaticity tmpDaylightModelEssential = new Aromaticity(ElectronDonation.daylight(), Cycles.essential()); Aromaticity tmpDaylightModelTripleShort = new Aromaticity(ElectronDonation.daylight(), Cycles.tripletShort()); Aromaticity tmpDaylightModelCdkAromaticSet = new Aromaticity(ElectronDonation.daylight(), Cycles.cdkAromaticSet()); - + boolean tmpAreMultiplesCounted = false; - + this.calculateAbsoluteFGFrequencies(tmpReaders[0], "all", tmpDaylightModelAll, tmpAreMultiplesCounted); this.calculateAbsoluteFGFrequencies(tmpReaders[1], "vertexShort", tmpDaylightModelVertexShort, tmpAreMultiplesCounted); this.calculateAbsoluteFGFrequencies(tmpReaders[2], "relevant", tmpDaylightModelRelevant, tmpAreMultiplesCounted); this.calculateAbsoluteFGFrequencies(tmpReaders[3], "essential", tmpDaylightModelEssential, tmpAreMultiplesCounted); this.calculateAbsoluteFGFrequencies(tmpReaders[4], "tripletShort", tmpDaylightModelTripleShort, tmpAreMultiplesCounted); this.calculateAbsoluteFGFrequencies(tmpReaders[5], "cdkAromaticSet", tmpDaylightModelCdkAromaticSet, tmpAreMultiplesCounted); - + System.out.println("\nAll analyses are done!"); for (IteratingSDFReader tmpReader : tmpReaders) { tmpReader.close(); @@ -563,11 +563,11 @@ public void testCycleFinderDependency() throws Exception { System.out.println("\nFinished!"); System.out.println("\nNumber of occured exceptions: " + this.exceptionsCounter); } - + /** * Testing the ErtlFunctionalGroupsFinder.find() method's performance on the given SD file. - * - * @throws java.lang.Exception if initializeWithFileOperations() throws an exception or the IteratingSDFReader + * + * @throws java.lang.Exception if initializeWithFileOperations() throws an exception or the IteratingSDFReader * can not be closed or an unexpectedException occurs */ @Test @@ -593,7 +593,7 @@ public void testPerformance() throws Exception { return; } List tmpMoleculesList = new LinkedList<>(); - Aromaticity tmpCdkLegacyModel = new Aromaticity(ElectronDonation.daylight(), + Aromaticity tmpCdkLegacyModel = new Aromaticity(ElectronDonation.daylight(), Cycles.or(Cycles.all(), Cycles.vertexShort())); while (tmpReader.hasNext()) { try { @@ -621,7 +621,7 @@ public void testPerformance() throws Exception { System.out.println("\nExtraction of functional groups from these molecules took " + (tmpEndTime - tmpStartTime) + " ms.\n"); } // - + // /** * Test for correct MoleculeHashGenerator settings/performance on some examples. @@ -632,7 +632,7 @@ public void testPerformance() throws Exception { public void testMoleculeHashGeneratorSettings() throws Exception { this.initialize(false, ""); SmilesParser tmpSmilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); - + /*Chebi70986, Chebi16238 and Chebi57692 all contain the same functional group with pseudo SMILES code "O=C1N=C(C(=NR)C(=O)N1R)N(R)R", but different hybridizations in the resulting atom containers. But their hash codes should be the same under the given settings. This is tested exemplary for many similar cases*/ @@ -656,7 +656,7 @@ public void testMoleculeHashGeneratorSettings() throws Exception { Assertions.assertEquals(tmpHashCode1.longValue(), tmpHashCode2.longValue()); } } - + /*Functional groups like the tertiary amine or the hydroxyl group appear with aromatic and non-aromatic central atoms. These two cases should be discrimated by the MoleculeHashGenerator under the given settings*/ String tmpTertiaryAmineSmiles = "*N(*)*"; @@ -675,7 +675,7 @@ public void testMoleculeHashGeneratorSettings() throws Exception { tmpAtom.setIsAromatic(true); } Assertions.assertNotEquals(this.molHashGenerator.generate(tmpAromMol), this.molHashGenerator.generate(tmpNonAromMol)); - + /*The following are examples of different (unique!) SMILES codes representing the same functional groups. They should be assigned the same hash code*/ HashMap tmpEquivalentSmilesMap = new HashMap<>(20); @@ -697,44 +697,44 @@ public void testMoleculeHashGeneratorSettings() throws Exception { Assertions.assertEquals(this.molHashGenerator.generate(tmpKeyMol), this.molHashGenerator.generate(tmpValueMol)); } } - + /** * Test for correct preprocessing (neutralization of charges and selection of biggest fragment). - * + * * @throws Exception if initialize() throws an exception or a SMILES code can not be parsed into a molecule */ @Test public void testPreprocessing() throws Exception { this.initialize(false, ""); String tmpSmiles = "CC[O-].C"; - SmilesParser tmpSmilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); + SmilesParser tmpSmilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); IAtomContainer tmpMol = tmpSmilesParser.parseSmiles(tmpSmiles); tmpMol = this.applyFiltersAndPreprocessing(tmpMol); - SmilesGenerator tmpGenerator = SmilesGenerator.unique(); - Assertions.assertEquals("OCC", tmpGenerator.create(tmpMol)); + SmilesGenerator tmpGenerator = SmilesGenerator.unique(); + Assertions.assertEquals("OCC", tmpGenerator.create(tmpMol)); } // - + // - + // /** - * Analyzes molecules in an SD file for all four different electron donation models supplied by the cdk: + * Analyzes molecules in an SD file for all four different electron donation models supplied by the cdk: * daylight, cdk, piBonds, cdkAllowingExocyclic and the aromaticity model cdkLegacy. - * + * * @param anSDFilePath absolute path of the SD file to analyze - * @param aTestIdentifier a folder with this name will be created in the output directory and it will be added to + * @param aTestIdentifier a folder with this name will be created in the output directory and it will be added to * the output and log files' names for association of test and files; may be null or empty - * @param anAreMultiplesCounted if false, functional groups that occur multiple times in the same molecule will + * @param anAreMultiplesCounted if false, functional groups that occur multiple times in the same molecule will * only be counted once * @throws java.lang.Exception if initializeWithFileOperations() throws an exception or an unexpected exception occurs */ - private void analyzeElectronDonationDependency(String anSDFilePath, - String aTestIdentifier, + private void analyzeElectronDonationDependency(String anSDFilePath, + String aTestIdentifier, boolean anAreMultiplesCounted) throws Exception { this.initializeWithFileOperations(anSDFilePath, aTestIdentifier); Assumptions.assumeTrue(this.isTestAbleToRun); - + System.out.println("\nLoading file with path: " + anSDFilePath); File tmpSDFile = new File(anSDFilePath); int tmpRequiredNumberOfReaders = 5; @@ -752,24 +752,24 @@ private void analyzeElectronDonationDependency(String anSDFilePath, } //If the 'all' CycleFinder produces an Intractable exception the 'vertexShort' CycleFinder is used CycleFinder tmpCycleFinder = Cycles.or(Cycles.all(), Cycles.vertexShort()); - + Aromaticity tmpDaylightModel = new Aromaticity(ElectronDonation.daylight(), tmpCycleFinder); Aromaticity tmpCdkModel = new Aromaticity(ElectronDonation.cdk(), tmpCycleFinder); Aromaticity tmpPiBondsModel = new Aromaticity(ElectronDonation.piBonds(), tmpCycleFinder); Aromaticity tmpCdkAllowingExocyclicModel = new Aromaticity(ElectronDonation.cdkAllowingExocyclic(), tmpCycleFinder); Aromaticity tmpCDKLegacyModel = Aromaticity.cdkLegacy(); - - this.calculateAbsoluteFGFrequencies(tmpReaders[0], + + this.calculateAbsoluteFGFrequencies(tmpReaders[0], ErtlFunctionalGroupsFinderEvaluationTest.DAYLIGHT_MODEL_SETTINGS_KEY, tmpDaylightModel, anAreMultiplesCounted); - this.calculateAbsoluteFGFrequencies(tmpReaders[1], + this.calculateAbsoluteFGFrequencies(tmpReaders[1], ErtlFunctionalGroupsFinderEvaluationTest.CDK_MODEL_SETTINGS_KEY, tmpCdkModel, anAreMultiplesCounted); - this.calculateAbsoluteFGFrequencies(tmpReaders[2], + this.calculateAbsoluteFGFrequencies(tmpReaders[2], ErtlFunctionalGroupsFinderEvaluationTest.PIBONDS_MODEL_SETTINGS_KEY, tmpPiBondsModel, anAreMultiplesCounted); - this.calculateAbsoluteFGFrequencies(tmpReaders[3], + this.calculateAbsoluteFGFrequencies(tmpReaders[3], ErtlFunctionalGroupsFinderEvaluationTest.CDK_EXOCYCLIC_MODEL_SETTINGS_KEY, tmpCdkAllowingExocyclicModel, anAreMultiplesCounted); - this.calculateAbsoluteFGFrequencies(tmpReaders[4], + this.calculateAbsoluteFGFrequencies(tmpReaders[4], ErtlFunctionalGroupsFinderEvaluationTest.CDK_LEGACY_MODEL_SETTINGS_KEY, tmpCDKLegacyModel, anAreMultiplesCounted); - + System.out.println("\nAll analyses are done!"); for (IteratingSDFReader tmpReader : tmpReaders) { tmpReader.close(); @@ -778,12 +778,12 @@ private void analyzeElectronDonationDependency(String anSDFilePath, System.out.println("\nFinished!"); System.out.println("\nNumber of occured exceptions: " + this.exceptionsCounter); } - + /** - * Initializes all class variables except the working directory and the PrintWriter instances. This method should be + * Initializes all class variables except the working directory and the PrintWriter instances. This method should be * called directly when a test does not require any of the specific file operations like logging or reading an SD file. - * - * @param aShouldPrintHeader true, if this method is called directly (not by initializeWithFileOperations()) and + * + * @param aShouldPrintHeader true, if this method is called directly (not by initializeWithFileOperations()) and * should print on the console that a new test was started * @param aTestIdentifier if aShouldPrintHeader is true this ID will be printed to the console */ @@ -797,7 +797,7 @@ private void initialize(boolean aShouldPrintHeader, String aTestIdentifier) { this.molHashGenerator = new HashGeneratorMaker() .depth(8) .elemental() - //following line is used instead of .orbital() because the atom hybridizations take more information into + //following line is used instead of .orbital() because the atom hybridizations take more information into //account than the bond order sum but that is not required here //Note: This works here because the ErtlFunctionalGroupsFinder extracts the relevant atoms and bonds only //resulting in incomplete valences that can be used here in this way @@ -806,7 +806,7 @@ private void initialize(boolean aShouldPrintHeader, String aTestIdentifier) { .molecular(); this.ertlFGFinderGenOff = new ErtlFunctionalGroupsFinder(Mode.NO_GENERALIZATION); this.ertlFGFinderGenOn = new ErtlFunctionalGroupsFinder(Mode.DEFAULT); - this.masterHashMap = new HashMap(ErtlFunctionalGroupsFinderEvaluationTest.MASTER_HASHMAP_INITIAL_CAPACITY, + this.masterHashMap = new HashMap(ErtlFunctionalGroupsFinderEvaluationTest.MASTER_HASHMAP_INITIAL_CAPACITY, ErtlFunctionalGroupsFinderEvaluationTest.MASTER_HASHMAP_LOAD_FACTOR); this.settingsKeysList = new LinkedList<>(); this.exceptionsCounter = 0; @@ -826,19 +826,19 @@ private void initialize(boolean aShouldPrintHeader, String aTestIdentifier) { this.pseudoSmilesAromaticElementToPlaceholderElementMap.put("P", "Pm"); this.pseudoSmilesAromaticElementToPlaceholderElementMap.put("R", "Es"); this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap = new HashMap<>(10, 1); - this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Es", + this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Es", ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_R_ATOM); - this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Pm", + this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Pm", ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_AROMATIC_PHOSPHOR); - this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Sc", + this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Sc", ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_AROMATIC_SELENIUM); - this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Os", + this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Os", ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_AROMATIC_OXYGEN); this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Sm", ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_AROMATIC_SULPHUR); - this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Nd", + this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Nd", ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_AROMATIC_NITROGEN); - this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Ce", + this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.put("Ce", ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_AROMATIC_CARBON); this.isTestAbleToRun = true; if (aShouldPrintHeader) { @@ -847,13 +847,13 @@ private void initialize(boolean aShouldPrintHeader, String aTestIdentifier) { } /** * Initializes all class variables and determines the output directory. - * - * @param anSDFilePath absolute path of the SD file to analyze for a quick pre-check if it is present and the test + * + * @param anSDFilePath absolute path of the SD file to analyze for a quick pre-check if it is present and the test * is therefore meant to run; may be empty but not null - * @param aTestIdentifier a folder with this name will be created in the output directory and it will be added to + * @param aTestIdentifier a folder with this name will be created in the output directory and it will be added to * the output and log files' names for association of test and files; may be null or empty - * @throws java.lang.Exception if one the FileWriter instances can not be instantiated, more than - * Integer.MAX-VALUE tests are to be run this minute (error in the naming of output files), aPathOfSDFile is null or + * @throws java.lang.Exception if one the FileWriter instances can not be instantiated, more than + * Integer.MAX-VALUE tests are to be run this minute (error in the naming of output files), aPathOfSDFile is null or * an unexpected exception occurs. */ private void initializeWithFileOperations(String anSDFilePath, String aTestIdentifier) throws Exception { @@ -872,9 +872,9 @@ private void initializeWithFileOperations(String anSDFilePath, String aTestIdent } //Determine the output directory String tmpOutputRootDirectory = tmpSDFile.getAbsoluteFile().getParent() + File.separator; - this.outputDirectory = tmpOutputRootDirectory - + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FOLDER_FROM_WORKING_DIRECTORY - + File.separator + this.outputDirectory = tmpOutputRootDirectory + + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FOLDER_FROM_WORKING_DIRECTORY + + File.separator + aTestIdentifier; File tmpOutputDirectoryFile = new File(this.outputDirectory); if (!tmpOutputDirectoryFile.exists()) { @@ -886,11 +886,11 @@ private void initializeWithFileOperations(String anSDFilePath, String aTestIdent String tmpDateTimeAddition = tmpDateTime.format(DateTimeFormatter.ofPattern( ErtlFunctionalGroupsFinderEvaluationTest.DATE_TIME_FORMAT_PATTERN)); //Set up exceptions log file - File tmpExceptionsLogFile = new File(this.outputDirectory + File.separator + aTestIdentifier - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + ErtlFunctionalGroupsFinderEvaluationTest.EXCEPTIONS_LOG_FILE_NAME - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + tmpDateTimeAddition + File tmpExceptionsLogFile = new File(this.outputDirectory + File.separator + aTestIdentifier + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + ErtlFunctionalGroupsFinderEvaluationTest.EXCEPTIONS_LOG_FILE_NAME + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + tmpDateTimeAddition + ErtlFunctionalGroupsFinderEvaluationTest.EXCEPTIONS_LOG_FILE_TYPE); int tmpFilesInThisMinuteCounter = 1; boolean tmpNumberAddedToFileName = false; @@ -898,12 +898,12 @@ private void initializeWithFileOperations(String anSDFilePath, String aTestIdent if (tmpExceptionsLogFile.exists()) { tmpNumberAddedToFileName = true; while (tmpFilesInThisMinuteCounter <= Integer.MAX_VALUE) { - tmpExceptionsLogFile = new File(this.outputDirectory + File.separator + aTestIdentifier - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + ErtlFunctionalGroupsFinderEvaluationTest.EXCEPTIONS_LOG_FILE_NAME - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + tmpDateTimeAddition - + "(" + tmpFilesInThisMinuteCounter + ")" + tmpExceptionsLogFile = new File(this.outputDirectory + File.separator + aTestIdentifier + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + ErtlFunctionalGroupsFinderEvaluationTest.EXCEPTIONS_LOG_FILE_NAME + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + tmpDateTimeAddition + + "(" + tmpFilesInThisMinuteCounter + ")" + ErtlFunctionalGroupsFinderEvaluationTest.EXCEPTIONS_LOG_FILE_TYPE); if (!tmpExceptionsLogFile.exists()) { break; @@ -923,18 +923,18 @@ private void initializeWithFileOperations(String anSDFilePath, String aTestIdent //Set up filtered molecules log file File tmpFilteredMoleculesFile; if (tmpNumberAddedToFileName) { - tmpFilteredMoleculesFile = new File(this.outputDirectory+ File.separator + aTestIdentifier - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + ErtlFunctionalGroupsFinderEvaluationTest.FILTERED_MOLECULES_FILE_NAME - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + tmpDateTimeAddition + "(" + tmpFilesInThisMinuteCounter + ")" + tmpFilteredMoleculesFile = new File(this.outputDirectory+ File.separator + aTestIdentifier + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + ErtlFunctionalGroupsFinderEvaluationTest.FILTERED_MOLECULES_FILE_NAME + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + tmpDateTimeAddition + "(" + tmpFilesInThisMinuteCounter + ")" + ErtlFunctionalGroupsFinderEvaluationTest.FILTERED_MOLECULES_FILE_TYPE); } else { - tmpFilteredMoleculesFile = new File(this.outputDirectory+ File.separator + aTestIdentifier - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + ErtlFunctionalGroupsFinderEvaluationTest.FILTERED_MOLECULES_FILE_NAME - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + tmpDateTimeAddition + tmpFilteredMoleculesFile = new File(this.outputDirectory+ File.separator + aTestIdentifier + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + ErtlFunctionalGroupsFinderEvaluationTest.FILTERED_MOLECULES_FILE_NAME + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + tmpDateTimeAddition + ErtlFunctionalGroupsFinderEvaluationTest.FILTERED_MOLECULES_FILE_TYPE); } FileWriter tmpFilteredMoleculesFileWriter = new FileWriter(tmpFilteredMoleculesFile); @@ -945,19 +945,19 @@ private void initializeWithFileOperations(String anSDFilePath, String aTestIdent //Set up output file File tmpOutputFile; if (tmpNumberAddedToFileName) { - tmpOutputFile = new File(this.outputDirectory+ File.separator + aTestIdentifier - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_NAME - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + tmpDateTimeAddition - + "(" + tmpFilesInThisMinuteCounter + ")" + tmpOutputFile = new File(this.outputDirectory+ File.separator + aTestIdentifier + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_NAME + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + tmpDateTimeAddition + + "(" + tmpFilesInThisMinuteCounter + ")" + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_TYPE); } else { - tmpOutputFile = new File(this.outputDirectory+ File.separator + aTestIdentifier + tmpOutputFile = new File(this.outputDirectory+ File.separator + aTestIdentifier + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_NAME - + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR - + tmpDateTimeAddition + + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_NAME + + ErtlFunctionalGroupsFinderEvaluationTest.FILE_NAME_ADDITION_SEPERATOR + + tmpDateTimeAddition + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_TYPE); } FileWriter tmpOutputFileWriter = new FileWriter(tmpOutputFile); @@ -966,24 +966,24 @@ private void initializeWithFileOperations(String anSDFilePath, String aTestIdent this.areFileOperationsActivated = true; this.initialize(false, aTestIdentifier); } - + /** - * Does one iteration of loading molecules from an IteratingSDFReader, applying the given aromaticity model, + * Does one iteration of loading molecules from an IteratingSDFReader, applying the given aromaticity model, * extracting their functional groups (with and without generalization) and adding the results to the master HashMap. * Exceptions caused by read-in molecules are caught and logged. - * + * * @param aReader to load the molecules to be screened from an SD file * @param aSettingsKey resulting functional groups will be added to the inner maps of the master HasMap under this key * @param anAromaticity to apply to the molecules - * @param anAreMultiplesCounted if false, functional groups that occur multiple times in the same molecule will + * @param anAreMultiplesCounted if false, functional groups that occur multiple times in the same molecule will * only be counted once */ private void calculateAbsoluteFGFrequencies( - IteratingSDFReader aReader, - String aSettingsKey, - Aromaticity anAromaticity, + IteratingSDFReader aReader, + String aSettingsKey, + Aromaticity anAromaticity, boolean anAreMultiplesCounted) { - + System.out.println("\nAnalyzing database using specified settings: " + aSettingsKey); // int tmpMoleculesCounter = 0; //total number of molecules successfully loaded from the Sd file @@ -1007,7 +1007,7 @@ private void calculateAbsoluteFGFrequencies( String tmpSettingsKeyForLogging = aSettingsKey; IAtomContainer tmpOriginalMolecule = null; try { - /*Note: A molecule can be supposed to be filtered for more than one of the named reasons but only the first + /*Note: A molecule can be supposed to be filtered for more than one of the named reasons but only the first tested reason will be named as cause*/ String tmpCauseForFiltering; //Remove s-groups before cloning, they cause trouble in atomContainer.clone(); see cdk api CDKConstants.CTAB_SGROUPS @@ -1063,13 +1063,13 @@ private void calculateAbsoluteFGFrequencies( } if (!(tmpFunctionalGroups == null || tmpFunctionalGroups.isEmpty())) { //If a molecule does not have FGs without generalization it does not have FGs with generalization either - this.enterFunctionalGroupsIntoMasterMap(tmpFunctionalGroups, - aSettingsKey, - anAreMultiplesCounted, + this.enterFunctionalGroupsIntoMasterMap(tmpFunctionalGroups, + aSettingsKey, + anAreMultiplesCounted, tmpOriginalMolecule); - this.enterFunctionalGroupsIntoMasterMap(tmpFunctionalGroupsGeneralized, + this.enterFunctionalGroupsIntoMasterMap(tmpFunctionalGroupsGeneralized, aSettingsKey + ErtlFunctionalGroupsFinderEvaluationTest.GENERALIZATION_SETTINGS_KEY_ADDITION, - anAreMultiplesCounted, + anAreMultiplesCounted, tmpOriginalMolecule); } else { tmpNoFunctionalGroupsCounter++; @@ -1095,23 +1095,23 @@ private void calculateAbsoluteFGFrequencies( System.out.println(tmpValidMoleculesCounter + " molecules were valid."); System.out.println(tmpNoFunctionalGroupsCounter + " molecules contained no functional groups."); System.out.println(this.masterHashMap.size() + " different functional groups were detected so far."); - + } - + /** - * Combines all filtering and preprocessing steps. Molecules will be filtered if they contain elements with a not allowed atomic + * Combines all filtering and preprocessing steps. Molecules will be filtered if they contain elements with a not allowed atomic * number (metal, metalloid or 'R' atoms) or if their atom or bond count is zero. If the molecule should be filtered - * the IAtomContainer property MOLECULE_MUST_BE_FILTERED_PROPERTY_KEY will be set to true and the + * the IAtomContainer property MOLECULE_MUST_BE_FILTERED_PROPERTY_KEY will be set to true and the * CAUSE_FOR_FILTERING_PROPERTY_KEY will give the cause for the filtering. *

* The preprocessing consists of neutralizing any charges in the molecule and selecting the biggest fragment for * further processing if the molecule consists of one or more unconnected structures. If any of these cases apply * the IAtomContainer properties CHARGES_NEUTRALIZED_PROPERTY_KEY and BIGGEST_FRAGMENT_SELECTED_PROPERTY_KEY will be * set accordingly. - * + * * @param aMolecule the molecule to be processed * @return the processed molecule - * @throws CDKException if AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms() or neutralizeCharges() + * @throws CDKException if AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms() or neutralizeCharges() * throws a CDKException */ private IAtomContainer applyFiltersAndPreprocessing(IAtomContainer aMolecule) throws CDKException { @@ -1121,15 +1121,15 @@ private IAtomContainer applyFiltersAndPreprocessing(IAtomContainer aMolecule) th aMolecule.setProperty(ErtlFunctionalGroupsFinderEvaluationTest.BIGGEST_FRAGMENT_SELECTED_PROPERTY_KEY, false); if (this.isAtomOrBondCountZero(aMolecule)) { aMolecule.setProperty(ErtlFunctionalGroupsFinderEvaluationTest.MOLECULE_MUST_BE_FILTERED_PROPERTY_KEY, true); - aMolecule.setProperty(ErtlFunctionalGroupsFinderEvaluationTest.CAUSE_FOR_FILTERING_PROPERTY_KEY, + aMolecule.setProperty(ErtlFunctionalGroupsFinderEvaluationTest.CAUSE_FOR_FILTERING_PROPERTY_KEY, ErtlFunctionalGroupsFinderEvaluationTest.ATOM_OR_BOND_COUNT_ZERO); return aMolecule; } - /*Remove s-groups before cloning (will be done by the ErtlFunctionalGroupsFinder.find() method), + /*Remove s-groups before cloning (will be done by the ErtlFunctionalGroupsFinder.find() method), they cause trouble in atomContainer.clone(); see cdk api CDKConstants.CTAB_SGROUPS.*/ aMolecule.removeProperty(CDKConstants.CTAB_SGROUPS); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(aMolecule); - //Preprocessing: From structures containing two or more unconnected structures (e.g. ions) + //Preprocessing: From structures containing two or more unconnected structures (e.g. ions) //choose the largest structure for analysis if (this.isStructureUnconnected(aMolecule)) { aMolecule = this.selectBiggestFragment(aMolecule); @@ -1138,7 +1138,7 @@ private IAtomContainer applyFiltersAndPreprocessing(IAtomContainer aMolecule) th //Filter molecules containing metals, metalloids or "R" atoms if (this.areMetallicOrMetalloidAtomsInMolecule(aMolecule)) { aMolecule.setProperty(ErtlFunctionalGroupsFinderEvaluationTest.MOLECULE_MUST_BE_FILTERED_PROPERTY_KEY, true); - aMolecule.setProperty(ErtlFunctionalGroupsFinderEvaluationTest.CAUSE_FOR_FILTERING_PROPERTY_KEY, + aMolecule.setProperty(ErtlFunctionalGroupsFinderEvaluationTest.CAUSE_FOR_FILTERING_PROPERTY_KEY, ErtlFunctionalGroupsFinderEvaluationTest.FORBIDDEN_ATOMIC_NUMBER); return aMolecule; } @@ -1149,20 +1149,20 @@ private IAtomContainer applyFiltersAndPreprocessing(IAtomContainer aMolecule) th } return aMolecule; } - + /** * Returns true, if the atom or bond count of the molecule is zero. This is a cause for filtering the molecule. - * + * * @param aMolecule the molecule to be tested * @return true, if the atom or bond count of the molecule is zero */ private boolean isAtomOrBondCountZero(IAtomContainer aMolecule) { return aMolecule.getAtomCount() == 0 || aMolecule.getBondCount() == 0; } - + /** * Returns true, if a not allowed atomic number is detected in the molecule. This is a cause for filtering the molecule. - * + * * @param aMolecule the molecule to be tested * @return true, if the molecule contains a not allowed element */ @@ -1174,22 +1174,22 @@ private boolean areMetallicOrMetalloidAtomsInMolecule(IAtomContainer aMolecule) } return false; } - + /** * Returns true, if the molecule consists of two or more unconnected structures. - * + * * @param aMolecule the molecule to be tested * @return true, if the molecule consists of two or more unconnected structures */ private boolean isStructureUnconnected(IAtomContainer aMolecule) { return (!ConnectivityChecker.isConnected(aMolecule)); } - + /** - * Returns the biggest unconnected fragment/structure of the given molecule. To pre-check if the molecule has - * two or more unconnected structures use isStructureConnected(). All set properties of aMolecule will be copied to + * Returns the biggest unconnected fragment/structure of the given molecule. To pre-check if the molecule has + * two or more unconnected structures use isStructureConnected(). All set properties of aMolecule will be copied to * the returned IAtomContainer. - * + * * @param aMolecule the molecule whose biggest fragment should be found * @return the biggest unconnected fragment/structure of the given molecule */ @@ -1204,10 +1204,10 @@ private IAtomContainer selectBiggestFragment(IAtomContainer aMolecule) { tmpBiggestFragment.setProperties(aMolecule.getProperties()); return tmpBiggestFragment; } - + /** * Returns true, if the molecule contains charged atoms. - * + * * @param aMolecule the molecule to be tested * @return true, if the molecule contains charged atoms */ @@ -1219,14 +1219,14 @@ private boolean isMoleculeCharged(IAtomContainer aMolecule) { } return false; } - + /** - * Neutralizes all non-zero charges in the given molecule. To pre-check if the molecule has charged atoms use + * Neutralizes all non-zero charges in the given molecule. To pre-check if the molecule has charged atoms use * isMoleculeCharged(). - * + * * @param aMolecule the molecule to be neutralized * @return the same IAtomContainer instance as aMolecule but with neutralized charges - * @throws CDKException if CDKAtomTypeMatcher.findMatchingAtomType() or CDKHydrogenAdder.addImplicitHydrogens + * @throws CDKException if CDKAtomTypeMatcher.findMatchingAtomType() or CDKHydrogenAdder.addImplicitHydrogens * throws a CDKException */ private IAtomContainer neutralizeCharges(IAtomContainer aMolecule) throws CDKException { @@ -1244,25 +1244,25 @@ private IAtomContainer neutralizeCharges(IAtomContainer aMolecule) throws CDKExc } return aMolecule; } - + /** - * Inserts a list of IAtomContainers (the functional groups of one molecule) into the master HashMap. If the - * functional group is already inserted (with this settings key) its frequency for the given settings key is raised + * Inserts a list of IAtomContainers (the functional groups of one molecule) into the master HashMap. If the + * functional group is already inserted (with this settings key) its frequency for the given settings key is raised * by one or else a new inner HashMap will be created for it. - * + * * @param aFunctionalGroupsList the functional groups of one molecule to be inserted * @param aSettingsKey will be the key of the inner HashMap inside the master HashMap - * @param anAreMultiplesCounted if false, functional groups that occur multiple times in aFunctionalGroupsList will + * @param anAreMultiplesCounted if false, functional groups that occur multiple times in aFunctionalGroupsList will * only be entered once into the master Hashmap - * @param anFGContainingMolecule the molecule from which the functional groups originated; will be added to the + * @param anFGContainingMolecule the molecule from which the functional groups originated; will be added to the * master Hashmap */ private void enterFunctionalGroupsIntoMasterMap( - List aFunctionalGroupsList, - String aSettingsKey, + List aFunctionalGroupsList, + String aSettingsKey, boolean anAreMultiplesCounted, IAtomContainer anFGContainingMolecule) { - + if (!this.settingsKeysList.contains(aSettingsKey)) { this.settingsKeysList.add(aSettingsKey); } @@ -1310,15 +1310,15 @@ private void enterFunctionalGroupsIntoMasterMap( this.masterHashMap.put(tmpHashCode, tmpNewInnerMap); } tmpAlreadyEnteredFGsForThisMol.add(tmpHashCode); - } + } } - + /** - * Writes all frequency data with the respective hash code, SMILES code, pseudo SMILES code and the ChEBI or ChEMBL - * id or CDK title of an exemplary molecule that contains this functional group for all functional groups in the + * Writes all frequency data with the respective hash code, SMILES code, pseudo SMILES code and the ChEBI or ChEMBL + * id or CDK title of an exemplary molecule that contains this functional group for all functional groups in the * master HashMap to the output file. *

- * Note: The IAtomContainer object stored in the master HashMap's inner maps are cloned in this method for pseudo + * Note: The IAtomContainer object stored in the master HashMap's inner maps are cloned in this method for pseudo * SMILES creation. And all PrintWriter instances will be closed. */ private void saveData() { @@ -1328,15 +1328,15 @@ private void saveData() { } System.out.println("\nWriting to file..."); //Writing the output file's header - String tmpFileHeader = ErtlFunctionalGroupsFinderEvaluationTest.HASH_CODE_KEY - + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR - + ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_CODE_KEY - + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + String tmpFileHeader = ErtlFunctionalGroupsFinderEvaluationTest.HASH_CODE_KEY + + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + + ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_CODE_KEY + + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + ErtlFunctionalGroupsFinderEvaluationTest.SMILES_CODE_KEY; for (String tmpSettingsKey : this.settingsKeysList) { tmpFileHeader += ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + tmpSettingsKey; } - tmpFileHeader += ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + tmpFileHeader += ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + ErtlFunctionalGroupsFinderEvaluationTest.MOLECULE_OF_ORIGIN_KEY; this.dataOutputPrintWriter.println(tmpFileHeader); this.dataOutputPrintWriter.flush(); @@ -1348,16 +1348,16 @@ private void saveData() { String tmpSmilesCode = (String) tmpInnerMap.get(ErtlFunctionalGroupsFinderEvaluationTest.SMILES_CODE_KEY); String tmpPseudoSmilesCode = (String) tmpInnerMap.get(ErtlFunctionalGroupsFinderEvaluationTest.PSEUDO_SMILES_CODE_KEY); //Writing the record for this functional group - String tmpRecord = tmpHashCode - + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR - + tmpPseudoSmilesCode - + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + String tmpRecord = tmpHashCode + + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + + tmpPseudoSmilesCode + + ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + tmpSmilesCode; for (String tmpSettingsKey : this.settingsKeysList) { if (tmpInnerMap.get(tmpSettingsKey) == null) { tmpInnerMap.put(tmpSettingsKey, 0); } - tmpRecord += ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + tmpRecord += ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + tmpInnerMap.get(tmpSettingsKey); } IAtomContainer tmpMoleculeOfOrigin = (IAtomContainer)tmpInnerMap.get( @@ -1372,7 +1372,7 @@ private void saveData() { } else if (tmpCdkTitle != null) { tmpRecord += ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + tmpCdkTitle; } else { - tmpRecord += ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + tmpRecord += ErtlFunctionalGroupsFinderEvaluationTest.OUTPUT_FILE_SEPERATOR + ErtlFunctionalGroupsFinderEvaluationTest.MOLECULE_OF_ORIGIN_ID_PLACEHOLDER; } this.dataOutputPrintWriter.println(tmpRecord); @@ -1383,15 +1383,15 @@ private void saveData() { this.exceptionsPrintWriter.close(); this.filteredMoleculesPrintWriter.close(); } - + /** - * Returns the Pseudo SMILES code of the given molecule. Pseudo atoms are represented by 'R' atoms and aromatic + * Returns the Pseudo SMILES code of the given molecule. Pseudo atoms are represented by 'R' atoms and aromatic * (C, S, O, P, Se, N) atoms will be marked by *. - * + * * @param aMolecule the molecule to be represented by the Pseudo SMILES string * @return the Pseudo SMILES representation of the given molecule * @throws CDKException if SmilesGenerator.create() throws a CDKException - * @throws CloneNotSupportedException if IAtomContainer.clone() throws a CloneNotSupportedException when + * @throws CloneNotSupportedException if IAtomContainer.clone() throws a CloneNotSupportedException when * invoked on aMolecule */ private String getPseudoSmilesCode(IAtomContainer aMolecule) throws CDKException, CloneNotSupportedException { @@ -1399,7 +1399,7 @@ private String getPseudoSmilesCode(IAtomContainer aMolecule) throws CDKException for (IAtom tmpAtom : tmpMolecule.atoms()) { if (tmpAtom.isAromatic()) { IAtom tmpReplacementAtom = null; - if (tmpAtom.getSymbol() != null + if (tmpAtom.getSymbol() != null && this.pseudoSmilesAromaticElementToPlaceholderElementMap.containsKey(tmpAtom.getSymbol())) { tmpReplacementAtom = new Atom(this.pseudoSmilesAromaticElementToPlaceholderElementMap.get(tmpAtom.getSymbol())); } @@ -1411,7 +1411,7 @@ private String getPseudoSmilesCode(IAtomContainer aMolecule) throws CDKException } } tmpAtom.setIsAromatic(false); - if (tmpAtom instanceof IPseudoAtom && "R".equals(((IPseudoAtom)tmpAtom).getLabel())) { + if (tmpAtom instanceof IPseudoAtom && "R".equals(((IPseudoAtom)tmpAtom).getLabel())) { //second condition: see creation of R atoms in ErtlFunctionalGroupsFinder IAtom tmpReplacementAtom = new Atom(this.pseudoSmilesAromaticElementToPlaceholderElementMap.get("R")); Integer tmpImplicitHydrogenCount = tmpAtom.getImplicitHydrogenCount(); @@ -1425,18 +1425,18 @@ private String getPseudoSmilesCode(IAtomContainer aMolecule) throws CDKException } String tmpPseudoSmilesCode = this.smilesGenerator.create(tmpMolecule); for (String tmpPlaceholderElementSymbol : this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.keySet()) { - tmpPseudoSmilesCode = tmpPseudoSmilesCode.replaceAll("(\\[" + tmpPlaceholderElementSymbol + "\\])", + tmpPseudoSmilesCode = tmpPseudoSmilesCode.replaceAll("(\\[" + tmpPlaceholderElementSymbol + "\\])", this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.get(tmpPlaceholderElementSymbol)) - .replaceAll("(" + tmpPlaceholderElementSymbol + ")", + .replaceAll("(" + tmpPlaceholderElementSymbol + ")", this.pseudoSmilesPlaceholderElementToPseudoSmilesSymbolMap.get(tmpPlaceholderElementSymbol)); } return tmpPseudoSmilesCode; } - + /** - * Logs molecules that are filtered from the SD file to the filtered molecules file with SMILES code, ChEBI name + * Logs molecules that are filtered from the SD file to the filtered molecules file with SMILES code, ChEBI name * and id or ChEMBL id or CDK title and why they were filtered. - * + * * @param aMolecule the filtered molecule to be logged * @param aCounter the number of filtered molecules so far (will be written to file) * @param aCause why this molecule was filtered @@ -1451,7 +1451,7 @@ private void logFilteredMolecule(IAtomContainer aMolecule, int aCounter, String try { this.filteredMoleculesPrintWriter.println("SMILES code: " + this.smilesGenerator.create(aMolecule)); } catch (CDKException | NullPointerException anException){ - this.filteredMoleculesPrintWriter.println("SMILES code: " + this.filteredMoleculesPrintWriter.println("SMILES code: " + ErtlFunctionalGroupsFinderEvaluationTest.SMILES_CODE_PLACEHOLDER); } String tmpChebiName = aMolecule.getProperty("ChEBI Name"); @@ -1469,13 +1469,13 @@ private void logFilteredMolecule(IAtomContainer aMolecule, int aCounter, String this.filteredMoleculesPrintWriter.println("Cause: " + aCause); this.filteredMoleculesPrintWriter.flush(); } - + /** - * Logs molecules that raised exceptions somewhere in the processing to the exceptions log file with exception + * Logs molecules that raised exceptions somewhere in the processing to the exceptions log file with exception * message and stack trace, SMILES code ChEBI name and id or ChEMBL id or CDK title. - * + * * @param anException the exception caused by the molecule - * @param aSettingsKey a string representation of the settings tested in the current iteration, + * @param aSettingsKey a string representation of the settings tested in the current iteration, * e.g. the aromaticity model * @param aMolecule the exception-causing molecule */ @@ -1486,13 +1486,13 @@ private void logException(Exception anException, String aSettingsKey, IAtomConta } this.exceptionsCounter++; this.exceptionsPrintWriter.println(); - this.exceptionsPrintWriter.println(this.exceptionsCounter + ". " + anException.getClass() + ": " + this.exceptionsPrintWriter.println(this.exceptionsCounter + ". " + anException.getClass() + ": " + anException.getLocalizedMessage()); this.exceptionsPrintWriter.println("Settings key: " + aSettingsKey); try { this.exceptionsPrintWriter.println("SMILES code: " + this.smilesGenerator.create(aMolecule)); } catch (CDKException | NullPointerException aNewException){ - this.exceptionsPrintWriter.println("SMILES code: " + this.exceptionsPrintWriter.println("SMILES code: " + ErtlFunctionalGroupsFinderEvaluationTest.SMILES_CODE_PLACEHOLDER); } String tmpChebiName = aMolecule.getProperty("ChEBI Name"); @@ -1522,7 +1522,7 @@ private void logException(Exception anException, String aSettingsKey, IAtomConta * @see AtomEncoder */ enum CustomAtomEncoder implements AtomEncoder { - + /** * Encode if an atom is aromatic or not. This specification is necessary to distinguish functional groups with * aromatic environments and those without. For example: [H]O[C] and [H]OC* (pseudo SMILES codes) should be @@ -1531,7 +1531,7 @@ enum CustomAtomEncoder implements AtomEncoder { * @see IAtom#isAromatic() */ AROMATICITY { - + /** *{@inheritDoc} */ diff --git a/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderTest.java b/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderTest.java index f4ca63a..c4eedde 100644 --- a/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderTest.java +++ b/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderTest.java @@ -1,8 +1,8 @@ -/** +/* * ErtlFunctionalGroupsFinder for CDK - * Copyright (C) 2022 Sebastian Fritsch + * Copyright (c) 2023 Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny * - * Source code is available at + * Source code is available at * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -17,6 +17,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ + package org.openscience.cdk.tools.test; import org.junit.jupiter.api.Assertions; @@ -38,8 +39,8 @@ import org.openscience.cdk.isomorphism.VentoFoggia; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.smiles.SmilesParser; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.ErtlFunctionalGroupsFinder; +import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import java.util.LinkedList; import java.util.List; @@ -56,218 +57,218 @@ public class ErtlFunctionalGroupsFinderTest { public ErtlFunctionalGroupsFinderTest() { super(); } - + @Test public void testFind1() throws Exception { - String moleculeSmiles = "Cc1cc(C)nc(NS(=O)(=O)c2ccc(N)cc2)n1"; - String[] expectedFGs = new String[] {"[R]N([R])S(=O)(=O)[R]", "[c]N(H)H", "NarR3", "NarR3"}; - testFind(moleculeSmiles, expectedFGs); + String moleculeSmiles = "Cc1cc(C)nc(NS(=O)(=O)c2ccc(N)cc2)n1"; + String[] expectedFGs = new String[] {"[R]N([R])S(=O)(=O)[R]", "[c]N(H)H", "NarR3", "NarR3"}; + testFind(moleculeSmiles, expectedFGs); } - + @Test public void testFind2() throws Exception{ - String moleculeSmiles = "NC(=N)c1ccc(\\\\C=C\\\\c2ccc(cc2O)C(=N)N)cc1"; - String[] expectedFGs = new String[] {"[R]N=C-N([R])[R]", "[C]=[C]", "[c]OH", "[R]N=C-N([R])[R]"}; - testFind(moleculeSmiles, expectedFGs); + String moleculeSmiles = "NC(=N)c1ccc(\\\\C=C\\\\c2ccc(cc2O)C(=N)N)cc1"; + String[] expectedFGs = new String[] {"[R]N=C-N([R])[R]", "[C]=[C]", "[c]OH", "[R]N=C-N([R])[R]"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind3() throws Exception { + String moleculeSmiles = "CC(=O)Nc1nnc(s1)S(=O)(=O)N"; + String[] expectedFGs = new String[] {"[R]N([R])C(=O)[R]", "[R]S(=O)(=O)N([R])[R]", "NarR3", "NarR3", "SarR2"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind4() throws Exception { + String moleculeSmiles = "NS(=O)(=O)c1cc2c(NCNS2(=O)=O)cc1Cl"; + String[] expectedFGs = new String[] {"[R]S(=O)(=O)N([R])[R]", "[R]S(=O)(=O)N([R])[C]N([R])[R]", "[R]Cl"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind5() throws Exception { + String moleculeSmiles = "CNC1=Nc2ccc(Cl)cc2C(=N(=O)C1)c3ccccc3"; + String[] expectedFGs = new String[] {"[R]N([R])[C]=N[R]", "[R]Cl", "[R]N(=O)=[C]"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind6() throws Exception { + String moleculeSmiles = "Cc1onc(c2ccccc2)c1C(=O)N[C@H]3[C@H]4SC(C)(C)[C@@H](N4C3=O)C(=O)O"; + String[] expectedFGs = new String[] {"O=C([R])N([R])[R]", "O=C([R])N([R])[C]S[R]", "O=C([R])OH", "OarR2", "NarR3"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind7() throws Exception { + String moleculeSmiles = "Clc1ccccc1C2=NCC(=O)Nc3ccc(cc23)N(=O)=O"; + String[] expectedFGs = new String[] {"[R]Cl", "[R]N=[C]", "[R]C(=O)N([R])[R]", "O=N([R])=O"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind8() throws Exception { + String moleculeSmiles = "COc1cc(cc(C(=O)NCC2CCCN2CC=C)c1OC)S(=O)(=O)N"; + String[] expectedFGs = new String[] {"[R]O[R]", "[R]N([R])C(=O)[R]", "N([R])([R])[R]", "[C]=[C]", "[R]O[R]", "[R]S(=O)(=O)N([R])[R]"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind9() throws Exception { + String moleculeSmiles = "Cc1ccc(Cl)c(Nc2ccccc2C(=O)O)c1Cl"; + String[] expectedFGs = new String[] {"[R]Cl", "[R]N(H)[R]", "O=C(OH)[R]", "[R]Cl"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind10() throws Exception { + String moleculeSmiles = "Clc1ccc2Oc3ccccc3N=C(N4CCNCC4)c2c1"; + String[] expectedFGs = new String[] {"[R]Cl", "[R]O[R]", "[R]N([R])[C]=N[R]", "[R]N([H])[R]"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind11() throws Exception { + String moleculeSmiles = "FC(F)(F)CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13"; + String[] expectedFGs = new String[] {"[R]F", "[R]F", "[R]F", "O=C([R])N([R])[R]", "[R]N=[C]", "[R]Cl"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind12() throws Exception { + String moleculeSmiles = "OC[C@H]1O[C@H](C[C@@H]1O)n2cnc3[C@H](O)CNC=Nc23";; + String[] expectedFGs = new String[] {"[C]O[H]", "[R]O[R]", "[C]OH", "[C]OH", "[R]N=CN([R])[R]", "NarR3", "NarR3"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind13() throws Exception { + String moleculeSmiles = "CCN[C@H]1C[C@H](C)S(=O)(=O)c2sc(cc12)S(=O)(=O)N"; + String[] expectedFGs = new String[] {"[R]N([R])H", "O=S(=O)([R])[R]", "[R]S(=O)(=O)N([R])[R]", "SarR2"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind14() throws Exception { + String moleculeSmiles = "C[C@@H](O)[C@@H]1[C@H]2[C@@H](C)C(=C(N2C1=O)C(=O)O)S[C@@H]3CN[C@@H](C3)C(=O)N(C)C"; + String[] expectedFGs = new String[] {"[C]O[H]", "O=C([R])N([R])C(C(=O)(OH))=[C]S[R]", "[R]N(H)[R]", "[R]N([R])C([R])=O"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind15() throws Exception { + String moleculeSmiles = "C[C@@H]1CN(C[C@H](C)N1)c2c(F)c(N)c3C(=O)C(=CN(C4CC4)c3c2F)C(=O)O"; + String[] expectedFGs = new String[] {"[R]N([R])[R]", "[R]N([H])[R]", "[R]F", "[c]N(H)H", "[c]=O", "[R]F", "[R]C(=O)OH", "NarR3"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind16() throws Exception { + String moleculeSmiles = "CC(=CCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3)C"; + String[] expectedFGs = new String[] {"[C]=[C]", "[R]C(=O)N([R])N([R])C(=O)[R]"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind17() throws Exception { + String moleculeSmiles = "Clc1ccc2N=C3NC(=O)CN3Cc2c1Cl"; + String[] expectedFGs = new String[] {"Cl[R]", "[R]N=C(N([R])[R])N([R])C(=O)[R]", "Cl[R]"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind18() throws Exception { + String moleculeSmiles = "CC(=O)N[C@@H]1[C@@H](NC(=N)N)C=C(O[C@H]1[C@H](O)[C@H](O)CO)C(=O)O"; + String[] expectedFGs = new String[] {"[R]N([R])C(=O)[R]", "[R]N([R])C(=N[R])N([R])[R]", "O=C(OH)C(=[C])O[R]" , "[C]OH", "[C]OH", "[C]OH"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind19() throws Exception { + String moleculeSmiles = "C[C@H](O)[C@H](O)[C@H]1CNc2nc(N)nc(O)c2N1"; + String[] expectedFGs = new String[] {"[C]OH", "[C]OH", "[R]N(H)[R]" , "[c]N(H)H", "[c]OH", "[R]N(H)[R]", "NarR3", "NarR3"}; + testFind(moleculeSmiles, expectedFGs); + } + + @Test + public void testFind20() throws Exception { + String moleculeSmiles = "N[C@@H]1CCCCN(C1)c2c(Cl)cc3C(=O)C(=CN(C4CC4)c3c2Cl)C(=O)O"; + String[] expectedFGs = new String[] {"[C]N([H])[H]", "[R]N([R])[R]", "[R]Cl" , "[c]=O", "[R]Cl", "[R]C(=O)OH", "NarR3"}; + testFind(moleculeSmiles, expectedFGs); + } + + private void testFind(String moleculeSmiles, String[] fGStrings) throws Exception { + testFind(moleculeSmiles, fGStrings, new Aromaticity(ElectronDonation.daylight(), Cycles.all())); + } + + private void testFind(String moleculeSmiles, String[] fGStrings, Aromaticity aromaticity) throws Exception { + // prepare input + SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); + IAtomContainer mol = smilesParser.parseSmiles(moleculeSmiles); + AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); + aromaticity.apply(mol); + + // find functional groups + ErtlFunctionalGroupsFinder fgFinder = new ErtlFunctionalGroupsFinder(); + List fGs = fgFinder.find(mol); + + // get expected groups + List expectedFGs = new LinkedList<>(); + for (String fGString : fGStrings) { + expectedFGs.add(buildFunctionalGroup(fGString)); + } + + // compare + this.assertIsomorphism(expectedFGs, fGs); } - - @Test - public void testFind3() throws Exception { - String moleculeSmiles = "CC(=O)Nc1nnc(s1)S(=O)(=O)N"; - String[] expectedFGs = new String[] {"[R]N([R])C(=O)[R]", "[R]S(=O)(=O)N([R])[R]", "NarR3", "NarR3", "SarR2"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind4() throws Exception { - String moleculeSmiles = "NS(=O)(=O)c1cc2c(NCNS2(=O)=O)cc1Cl"; - String[] expectedFGs = new String[] {"[R]S(=O)(=O)N([R])[R]", "[R]S(=O)(=O)N([R])[C]N([R])[R]", "[R]Cl"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind5() throws Exception { - String moleculeSmiles = "CNC1=Nc2ccc(Cl)cc2C(=N(=O)C1)c3ccccc3"; - String[] expectedFGs = new String[] {"[R]N([R])[C]=N[R]", "[R]Cl", "[R]N(=O)=[C]"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind6() throws Exception { - String moleculeSmiles = "Cc1onc(c2ccccc2)c1C(=O)N[C@H]3[C@H]4SC(C)(C)[C@@H](N4C3=O)C(=O)O"; - String[] expectedFGs = new String[] {"O=C([R])N([R])[R]", "O=C([R])N([R])[C]S[R]", "O=C([R])OH", "OarR2", "NarR3"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind7() throws Exception { - String moleculeSmiles = "Clc1ccccc1C2=NCC(=O)Nc3ccc(cc23)N(=O)=O"; - String[] expectedFGs = new String[] {"[R]Cl", "[R]N=[C]", "[R]C(=O)N([R])[R]", "O=N([R])=O"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind8() throws Exception { - String moleculeSmiles = "COc1cc(cc(C(=O)NCC2CCCN2CC=C)c1OC)S(=O)(=O)N"; - String[] expectedFGs = new String[] {"[R]O[R]", "[R]N([R])C(=O)[R]", "N([R])([R])[R]", "[C]=[C]", "[R]O[R]", "[R]S(=O)(=O)N([R])[R]"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind9() throws Exception { - String moleculeSmiles = "Cc1ccc(Cl)c(Nc2ccccc2C(=O)O)c1Cl"; - String[] expectedFGs = new String[] {"[R]Cl", "[R]N(H)[R]", "O=C(OH)[R]", "[R]Cl"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind10() throws Exception { - String moleculeSmiles = "Clc1ccc2Oc3ccccc3N=C(N4CCNCC4)c2c1"; - String[] expectedFGs = new String[] {"[R]Cl", "[R]O[R]", "[R]N([R])[C]=N[R]", "[R]N([H])[R]"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind11() throws Exception { - String moleculeSmiles = "FC(F)(F)CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13"; - String[] expectedFGs = new String[] {"[R]F", "[R]F", "[R]F", "O=C([R])N([R])[R]", "[R]N=[C]", "[R]Cl"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind12() throws Exception { - String moleculeSmiles = "OC[C@H]1O[C@H](C[C@@H]1O)n2cnc3[C@H](O)CNC=Nc23";; - String[] expectedFGs = new String[] {"[C]O[H]", "[R]O[R]", "[C]OH", "[C]OH", "[R]N=CN([R])[R]", "NarR3", "NarR3"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind13() throws Exception { - String moleculeSmiles = "CCN[C@H]1C[C@H](C)S(=O)(=O)c2sc(cc12)S(=O)(=O)N"; - String[] expectedFGs = new String[] {"[R]N([R])H", "O=S(=O)([R])[R]", "[R]S(=O)(=O)N([R])[R]", "SarR2"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind14() throws Exception { - String moleculeSmiles = "C[C@@H](O)[C@@H]1[C@H]2[C@@H](C)C(=C(N2C1=O)C(=O)O)S[C@@H]3CN[C@@H](C3)C(=O)N(C)C"; - String[] expectedFGs = new String[] {"[C]O[H]", "O=C([R])N([R])C(C(=O)(OH))=[C]S[R]", "[R]N(H)[R]", "[R]N([R])C([R])=O"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind15() throws Exception { - String moleculeSmiles = "C[C@@H]1CN(C[C@H](C)N1)c2c(F)c(N)c3C(=O)C(=CN(C4CC4)c3c2F)C(=O)O"; - String[] expectedFGs = new String[] {"[R]N([R])[R]", "[R]N([H])[R]", "[R]F", "[c]N(H)H", "[c]=O", "[R]F", "[R]C(=O)OH", "NarR3"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind16() throws Exception { - String moleculeSmiles = "CC(=CCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3)C"; - String[] expectedFGs = new String[] {"[C]=[C]", "[R]C(=O)N([R])N([R])C(=O)[R]"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind17() throws Exception { - String moleculeSmiles = "Clc1ccc2N=C3NC(=O)CN3Cc2c1Cl"; - String[] expectedFGs = new String[] {"Cl[R]", "[R]N=C(N([R])[R])N([R])C(=O)[R]", "Cl[R]"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind18() throws Exception { - String moleculeSmiles = "CC(=O)N[C@@H]1[C@@H](NC(=N)N)C=C(O[C@H]1[C@H](O)[C@H](O)CO)C(=O)O"; - String[] expectedFGs = new String[] {"[R]N([R])C(=O)[R]", "[R]N([R])C(=N[R])N([R])[R]", "O=C(OH)C(=[C])O[R]" , "[C]OH", "[C]OH", "[C]OH"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind19() throws Exception { - String moleculeSmiles = "C[C@H](O)[C@H](O)[C@H]1CNc2nc(N)nc(O)c2N1"; - String[] expectedFGs = new String[] {"[C]OH", "[C]OH", "[R]N(H)[R]" , "[c]N(H)H", "[c]OH", "[R]N(H)[R]", "NarR3", "NarR3"}; - testFind(moleculeSmiles, expectedFGs); - } - - @Test - public void testFind20() throws Exception { - String moleculeSmiles = "N[C@@H]1CCCCN(C1)c2c(Cl)cc3C(=O)C(=CN(C4CC4)c3c2Cl)C(=O)O"; - String[] expectedFGs = new String[] {"[C]N([H])[H]", "[R]N([R])[R]", "[R]Cl" , "[c]=O", "[R]Cl", "[R]C(=O)OH", "NarR3"}; - testFind(moleculeSmiles, expectedFGs); - } - - private void testFind(String moleculeSmiles, String[] fGStrings) throws Exception { - testFind(moleculeSmiles, fGStrings, new Aromaticity(ElectronDonation.daylight(), Cycles.all())); - } - - private void testFind(String moleculeSmiles, String[] fGStrings, Aromaticity aromaticity) throws Exception { - // prepare input - SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); - IAtomContainer mol = smilesParser.parseSmiles(moleculeSmiles); - AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); - aromaticity.apply(mol); - - // find functional groups - ErtlFunctionalGroupsFinder fgFinder = new ErtlFunctionalGroupsFinder(); - List fGs = fgFinder.find(mol); - - // get expected groups - List expectedFGs = new LinkedList<>(); - for (String fGString : fGStrings) { - expectedFGs.add(buildFunctionalGroup(fGString)); - } - - // compare - this.assertIsomorphism(expectedFGs, fGs); - } /** * NOTE: actual and expected functional groups must be in the same order! - * + * * @param expectedFGs list of expected functional groups - * @param actualFGs list of actual functional groups + * @param actualFGs list of actual functional groups * @throws Exception if anything does not work as planned */ private void assertIsomorphism(List expectedFGs, List actualFGs) { - Assertions.assertEquals(expectedFGs.size(), actualFGs.size(), - "Number of functional groups does not match the expected number of groups"); - - for(int i = 0; i < expectedFGs.size(); i++) { - IAtomContainer cExp = expectedFGs.get(i); - IAtomContainer cAct = actualFGs.get(i); - - Assertions.assertEquals(cExp.getAtomCount(), cAct.getAtomCount(), - "Groups #" + i + ": different atom count"); - Assertions.assertEquals(cExp.getBondCount(), cAct.getBondCount(), - "Groups #" + i + ": different bond count"); - - Pattern pattern = VentoFoggia.findIdentical(cExp); - - Assertions.assertTrue(pattern.matches(cAct), "Groups #" + i + ": not isomorph"); - - Mappings mappings = pattern.matchAll(cAct); - - Map atomMap = mappings.toAtomMap().iterator().next(); - for (Map.Entry e : atomMap.entrySet()) { - IAtom atomExp = e.getKey(); - IAtom atomAct = e.getValue(); - Assertions.assertEquals(atomExp.isAromatic(), atomAct.isAromatic(), - "Groups #" + i + ": Atom aromaticity does not match" - + atomAct.getSymbol() + atomAct.isAromatic() + atomExp.getSymbol() - + atomExp.isAromatic()); - } - - Map bondMap = mappings.toBondMap().iterator().next(); - for (Map.Entry e : bondMap.entrySet()) { - IBond bondExp = e.getKey(); - IBond bondAct = e.getValue(); - Assertions.assertEquals(bondExp.isAromatic(), bondAct.isAromatic(), - "Groups #" + i + ": Bond aromaticity does not match"); - } - } + Assertions.assertEquals(expectedFGs.size(), actualFGs.size(), + "Number of functional groups does not match the expected number of groups"); + + for(int i = 0; i < expectedFGs.size(); i++) { + IAtomContainer cExp = expectedFGs.get(i); + IAtomContainer cAct = actualFGs.get(i); + + Assertions.assertEquals(cExp.getAtomCount(), cAct.getAtomCount(), + "Groups #" + i + ": different atom count"); + Assertions.assertEquals(cExp.getBondCount(), cAct.getBondCount(), + "Groups #" + i + ": different bond count"); + + Pattern pattern = VentoFoggia.findIdentical(cExp); + + Assertions.assertTrue(pattern.matches(cAct), "Groups #" + i + ": not isomorph"); + + Mappings mappings = pattern.matchAll(cAct); + + Map atomMap = mappings.toAtomMap().iterator().next(); + for (Map.Entry e : atomMap.entrySet()) { + IAtom atomExp = e.getKey(); + IAtom atomAct = e.getValue(); + Assertions.assertEquals(atomExp.isAromatic(), atomAct.isAromatic(), + "Groups #" + i + ": Atom aromaticity does not match" + + atomAct.getSymbol() + atomAct.isAromatic() + atomExp.getSymbol() + + atomExp.isAromatic()); + } + + Map bondMap = mappings.toBondMap().iterator().next(); + for (Map.Entry e : bondMap.entrySet()) { + IBond bondExp = e.getKey(); + IBond bondAct = e.getValue(); + Assertions.assertEquals(bondExp.isAromatic(), bondAct.isAromatic(), + "Groups #" + i + ": Bond aromaticity does not match"); + } + } } - + private IAtomContainer buildFunctionalGroup(String string) { IAtom a1, a2, a3, a4, a5, a6, a7, a8, a9; IBond b1, b2, b3, b4, b5, b6, b7, b8, b9; @@ -277,44 +278,44 @@ private IAtomContainer buildFunctionalGroup(String string) { // custom templates switch(string) { case "NarR3": - a1 = builder.newInstance(IPseudoAtom.class, "R"); - a2 = builder.newInstance(IPseudoAtom.class, "R"); - a3 = builder.newInstance(IPseudoAtom.class, "R"); + a1 = builder.newInstance(IPseudoAtom.class, "R"); + a2 = builder.newInstance(IPseudoAtom.class, "R"); + a3 = builder.newInstance(IPseudoAtom.class, "R"); a4 = builder.newInstance(IAtom.class, "N"); a4.setIsAromatic(true); - + b1 = builder.newInstance(IBond.class, a1, a4, Order.SINGLE); b2 = builder.newInstance(IBond.class, a2, a4, Order.SINGLE); b3 = builder.newInstance(IBond.class, a3, a4, Order.SINGLE); - + container = new AtomContainer(); container.setAtoms(new IAtom[] {a1, a2, a3, a4}); container.setBonds(new IBond[] {b1, b2, b3}); return container; - + case "SarR2": - a1 = builder.newInstance(IPseudoAtom.class, "R"); - a2 = builder.newInstance(IPseudoAtom.class, "R"); + a1 = builder.newInstance(IPseudoAtom.class, "R"); + a2 = builder.newInstance(IPseudoAtom.class, "R"); a3 = builder.newInstance(IAtom.class, "S"); a3.setIsAromatic(true); - + b1 = builder.newInstance(IBond.class, a1, a3, Order.SINGLE); b2 = builder.newInstance(IBond.class, a2, a3, Order.SINGLE); - + container = new AtomContainer(); container.setAtoms(new IAtom[] {a1, a2, a3}); container.setBonds(new IBond[] {b1, b2}); return container; - + case "OarR2": - a1 = builder.newInstance(IPseudoAtom.class, "R"); - a2 = builder.newInstance(IPseudoAtom.class, "R"); + a1 = builder.newInstance(IPseudoAtom.class, "R"); + a2 = builder.newInstance(IPseudoAtom.class, "R"); a3 = builder.newInstance(IAtom.class, "O"); a3.setIsAromatic(true); - + b1 = builder.newInstance(IBond.class, a1, a3, Order.SINGLE); b2 = builder.newInstance(IBond.class, a2, a3, Order.SINGLE); - + container = new AtomContainer(); container.setAtoms(new IAtom[] {a1, a2, a3}); container.setBonds(new IBond[] {b1, b2}); @@ -322,29 +323,29 @@ private IAtomContainer buildFunctionalGroup(String string) { // smiles default: - try { - SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); - try { - if(string.equals("[c]=O")) - smilesParser.kekulise(false); - container = smilesParser.parseSmiles(string); - } - catch(InvalidSmilesException e) { - smilesParser.kekulise(false); - container = smilesParser.parseSmiles(string); - } - + try { + SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); + try { + if(string.equals("[c]=O")) + smilesParser.kekulise(false); + container = smilesParser.parseSmiles(string); + } + catch(InvalidSmilesException e) { + smilesParser.kekulise(false); + container = smilesParser.parseSmiles(string); + } + for(IAtom a : container.atoms()) { - if(a instanceof PseudoAtom) { - a.setSymbol("R"); - } + if(a instanceof PseudoAtom) { + a.setSymbol("R"); + } } return container; - } - catch(InvalidSmilesException e) { - throw new IllegalArgumentException("Input string '" + string + " could not be found as a template " + - "and is not a valid SMILES string."); - } + } + catch(InvalidSmilesException e) { + throw new IllegalArgumentException("Input string '" + string + " could not be found as a template " + + "and is not a valid SMILES string."); + } } } -} \ No newline at end of file +} diff --git a/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderUtilityTest.java b/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderUtilityTest.java index 591c376..7e85486 100644 --- a/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderUtilityTest.java +++ b/src/test/java/org/openscience/cdk/tools/test/ErtlFunctionalGroupsFinderUtilityTest.java @@ -1,22 +1,23 @@ /* - * Test of Utilities for ErtlFunctionalGroupsFinder for CDK - * Copyright (C) 2022 Jonas Schaub - * - * Source code is available at - * + * ErtlFunctionalGroupsFinder for CDK + * Copyright (c) 2023 Sebastian Fritsch, Stefan Neumann, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny + * + * Source code is available at + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ + package org.openscience.cdk.tools.test; import org.junit.jupiter.api.Assertions;