diff --git a/tool/fragment/src/main/java/org/openscience/cdk/fragment/ExhaustiveFragmenter.java b/tool/fragment/src/main/java/org/openscience/cdk/fragment/ExhaustiveFragmenter.java
index af248b8e9f..65bffc4acd 100644
--- a/tool/fragment/src/main/java/org/openscience/cdk/fragment/ExhaustiveFragmenter.java
+++ b/tool/fragment/src/main/java/org/openscience/cdk/fragment/ExhaustiveFragmenter.java
@@ -1,4 +1,5 @@
-/* Copyright (C) 2010 Rajarshi Guha
+/* Copyright (C) 2025 Rajarshi Guha
+ * Tom Weiß
*
* Contact: cdk-devel@lists.sourceforge.net
*
@@ -22,185 +23,938 @@
*/
package org.openscience.cdk.fragment;
-import org.openscience.cdk.aromaticity.Aromaticity;
import org.openscience.cdk.exception.CDKException;
-import org.openscience.cdk.graph.SpanningTree;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
-import org.openscience.cdk.interfaces.IRingSet;
+import org.openscience.cdk.interfaces.IChemObject;
+import org.openscience.cdk.interfaces.IPseudoAtom;
+import org.openscience.cdk.interfaces.IStereoElement;
+import org.openscience.cdk.ringsearch.RingSearch;
+import org.openscience.cdk.smiles.SmiFlavor;
import org.openscience.cdk.smiles.SmilesGenerator;
-import org.openscience.cdk.tools.CDKHydrogenAdder;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
-import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
+import java.util.ArrayDeque;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
/**
- * Generate fragments exhaustively.
- *
- * This fragmentation scheme simply breaks single non-ring bonds. By default
- * fragments smaller than 6 atoms in size are not considered, but this can be
- * changed by the user. Side chains are retained.
+ * Performs exhaustive fragmentation of molecules by breaking single non-ring,
+ * non-terminal bonds in all combinations. If it is not possible to generate
+ * fragments, an empty list is returned. Non-terminal bonds are those connected
+ * to heavy atoms that respectively have another bond to a heavy atom.
+ *
+ * By default:
+ *
+ * - Fragments smaller than 6 atoms (excluding implicit hydrogen) are not
+ * returned.
+ * - Fragments are returned with open valences, where a bond has been split.
+ * - The fragmentation splits at a maximum tree depth of 27, meaning that
+ * maximum 27 bonds are split in one run.
+ * - The SMILES code of the fragments is generated with {@link SmiFlavor#Unique}
+ * and {@link SmiFlavor#UseAromaticSymbols}.
+ * - Stereo information is disregarded
+ *
+ * However, users can modify these settings, with the exception, that the
+ * maximum tree depth can not be higher than 31 (Java's limitation caused by
+ * integer indexing).
+ *
+ * Warning on preservation of stereo information: This process
+ * is not reliable and can lead to incorrect stereochemistry in the resulting
+ * fragments. When a chiral center is broken during fragmentation, the new
+ * fragment may be incorrectly assigned as chiral even if it is not
+ * anymore because some of its substituents are now equal.
+ *
+ * Example:the chiral molecule {@code CC[C@@H](Cl)CCCC} will i.a.
+ * produce the fragment {@code CC[C@@H](Cl)CC} where the stereo configuration is
+ * preserved but the structure is actually not chiral anymore.
+ *
+ *
+ * Note on Stereochemistry and SMILES:
+ * For stereochemical information to be included in the SMILES strings
+ * returned by {@link #getFragments()}, the `smilesGenerator` used by this
+ * fragmenter must be configured with the {@link SmiFlavor#Stereo} flag.
+ * If the flag is not set, the SMILES will not contain stereochemistry,
+ * even if this setting is enabled and the underlying {@code IAtomContainer}
+ * objects have stereo elements.
+ *
+ *
+ * Fragment Deduplication:
+ * The `ExhaustiveFragmenter` uses unique SMILES strings for internal
+ * deduplication of generated fragments. This means that after a fragment is
+ * generated, its unique SMILES representation is computed (using the default or
+ * user specified {@link SmilesGenerator}). Be aware that stereo information is
+ * only copied and checked for deduplication if
+ * {@link ExhaustiveFragmenter#setPreserveStereo} is set to true and the
+ * specified {@link SmilesGenerator} has {@link SmiFlavor#Stereo}. If a fragment
+ * with the same canonical SMILES has already been generated and stored, the new
+ * fragment is considered a duplicate and is not added to the results.
+ *
+ * This deduplication strategy is particularly important when considering the
+ * {@link Saturation} setting:
+ *
+ * - If fragments are {@link Saturation#HYDROGEN_SATURATED_FRAGMENTS}, the
+ * saturation process might lead to a canonical SMILES that is identical to a
+ * fragment obtained via a different bond cleavage, or a fragment that appears
+ * different due to explicit hydrogen representation but becomes identical when
+ * canonicalized.
+ * - For example, an unsaturated fragment like `[CH]1CCCCC1` (cyclohexyl
+ * radical) might deduplicate with a saturated `C1CCCCC1` (cyclohexane) if
+ * `HYDROGEN_SATURATED_FRAGMENTS` is enabled and both forms canonicalize to the
+ * same SMILES depending on the exact SMILES generator and atom properties.
+ * - It is crucial to understand that the uniqueness is based solely on the
+ * canonical SMILES string, not on the exact atom-by-atom identity or origin
+ * within the original molecule.
+ *
+ *
+ * Example Usage:
+ *
{@code
+ * // By default, returns unsaturated fragments with a minimum size of 6 atoms
+ * ExhaustiveFragmenter fragmenter = new ExhaustiveFragmenter();
+ * SmilesParser smiParser = new SmilesParser(SilentChemObjectBuilder.getInstance());
+ * // Cyclopentylbenzene
+ * IAtomContainer mol = smiParser.parseSmiles("C1CCCC1C1=CC=CC=C1");
+ * fragmenter.generateFragments(mol);
+ *
+ * // Retrieve SMILES representations of fragments
+ * String[] smilesFragments = fragmenter.getFragments();
+ * // Example Result (depending on exact fragmentation points and min size):
+ * // "[C]1=CC=CC=C1"
+ *
+ * // Retrieve AtomContainer representations of fragments
+ * IAtomContainer[] atomContainerFragments = fragmenter.getFragmentsAsContainers();
+ *
+ * // Example: Configuring for hydrogen-saturated fragments with a minimum size of 5
+ * ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(
+ * 5,
+ * ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS
+ * );
+ * saturatedFragmenter.generateFragments(mol);
+ * String[] saturatedSmilesFragments = saturatedFragmenter.getFragments();
+ * // "C1CCCC1", "C1=CC=CC=C1"
+ * }
*
* @author Rajarshi Guha
+ * @author Tom Weiß
+ * @cdk.module fragment
* @cdk.keyword fragment
*/
public class ExhaustiveFragmenter implements IFragmenter {
- private static final int DEFAULT_MIN_FRAG_SIZE = 6;
+ /**
+ * Specifies whether generated fragments should be saturated (hydrogens added)
+ * or unsaturated.
+ */
+ public enum Saturation {
+ /**
+ * Fragments will be returned in their saturated form
+ * (implicit hydrogen atoms added).
+ */
+ HYDROGEN_SATURATED_FRAGMENTS,
+
+ /**
+ * Fragments will be saturated with R atoms.
+ */
+ R_SATURATED_FRAGMENTS,
+
+ /**
+ * Fragments will be returned in their unsaturated form
+ * (no additional hydrogen atoms). The unsaturated atoms are the atoms
+ * of the split bonds.
+ */
+ UNSATURATED_FRAGMENTS
+ }
+
+ private static final int DEFAULT_MIN_FRAG_SIZE = 6;
+ private static final Saturation DEFAULT_SATURATION =
+ Saturation.UNSATURATED_FRAGMENTS;
+ private static final SmilesGenerator DEFAULT_SMILES_GENERATOR =
+ new SmilesGenerator(
+ SmiFlavor.Unique | SmiFlavor.UseAromaticSymbols
+ );
+ // assuming each fragment is unique (as if there was no deduplication)
+ // 27 would be the maximum tree depth to hold all fragments in the
+ // hashmap.
+ private static final int DEFAULT_INCLUSIVE_MAX_TREE_DEPTH = 27;
+ private static final boolean DEFAULT_COPY_STEREO_INFO = false;
- final Map fragMap;
- final SmilesGenerator smilesGenerator;
- String[] fragments = null;
- int minFragSize;
- private static final ILoggingTool logger = LoggingToolFactory
- .createLoggingTool(ExhaustiveFragmenter.class);
+ private Map fragMap;
+ private final SmilesGenerator smilesGenerator;
+ private int inclusiveMaxTreeDepth;
+ private int minFragSize;
+ private Saturation saturationSetting;
+ private boolean preserveStereo;
+ private static final ILoggingTool logger =
+ LoggingToolFactory.createLoggingTool(ExhaustiveFragmenter.class);
/**
- * Instantiate fragmenter with default minimum fragment size.
+ * Constructs an ExhaustiveFragmenter with the default settings:
+ *
+ * - Minimum fragment size: 6 atoms (excluding implicit hydrogen)
+ * - {@link Saturation#UNSATURATED_FRAGMENTS}
+ * - Default {@link SmilesGenerator}
+ * ({@link SmiFlavor#Unique} | {@link SmiFlavor#UseAromaticSymbols})
+ * - inclusive maximum tree depth of 27
+ * - stereo information is not preserved
+ *
*/
public ExhaustiveFragmenter() {
- this(DEFAULT_MIN_FRAG_SIZE);
+ this(
+ DEFAULT_SMILES_GENERATOR,
+ DEFAULT_MIN_FRAG_SIZE,
+ DEFAULT_SATURATION,
+ DEFAULT_INCLUSIVE_MAX_TREE_DEPTH,
+ DEFAULT_COPY_STEREO_INFO
+ );
}
/**
- * Instantiate fragmenter with user specified minimum fragment size.
+ * Constructs an ExhaustiveFragmenter with a user-defined minimum fragment
+ * size and saturation setting. Uses the default {@link SmilesGenerator} and
+ * default inclusive maximum tree depth of 27. Stereo information is not
+ * preserved.
*
- * @param minFragSize the minimum fragment size desired
+ * @param minFragSize minimum number of atoms in a valid fragment
+ * (excluding implicit hydrogen).
+ * @param saturationSetting determines whether fragments should be saturated
+ * (with hydrogens or R-atoms) or unsaturated.
+ */
+ public ExhaustiveFragmenter(int minFragSize, Saturation saturationSetting) {
+ this(
+ DEFAULT_SMILES_GENERATOR,
+ minFragSize,
+ saturationSetting,
+ DEFAULT_INCLUSIVE_MAX_TREE_DEPTH,
+ DEFAULT_COPY_STEREO_INFO
+ );
+ }
+
+ /**
+ * Constructs an ExhaustiveFragmenter with a user-defined minimum fragment
+ * size. Saturation defaults to {@link Saturation#UNSATURATED_FRAGMENTS}.
+ * Uses the default {@link SmilesGenerator} and the default
+ * inclusive maximum tree depth of 27. Stereo information is not
+ * preserved.
+ *
+ * @param minFragSize minimum number of atoms in a valid fragment
+ * (excluding implicit hydrogen).
*/
public ExhaustiveFragmenter(int minFragSize) {
- this.minFragSize = minFragSize;
- fragMap = new HashMap<>();
- smilesGenerator = SmilesGenerator.unique().aromatic();
+ this(
+ DEFAULT_SMILES_GENERATOR,
+ minFragSize,
+ DEFAULT_SATURATION,
+ DEFAULT_INCLUSIVE_MAX_TREE_DEPTH,
+ DEFAULT_COPY_STEREO_INFO
+ );
+ }
+
+ /**
+ * Constructs an ExhaustiveFragmenter with a user-defined saturation setting.
+ * Minimum fragment size defaults to 6. Uses the default {@link SmilesGenerator}
+ * and the default inclusive maximum tree depth of 27. Stereo information is
+ * not preserved.
+ *
+ * @param saturation how open valences should be treated after the
+ * fragmentation.
+ */
+ public ExhaustiveFragmenter(Saturation saturation) {
+ this(
+ DEFAULT_SMILES_GENERATOR,
+ DEFAULT_MIN_FRAG_SIZE,
+ saturation,
+ DEFAULT_INCLUSIVE_MAX_TREE_DEPTH,
+ DEFAULT_COPY_STEREO_INFO
+ );
+ }
+
+ /**
+ * Constructs an ExhaustiveFragmenter with a user-provided
+ * {@link SmilesGenerator} and user defined:
+ *
+ * - minimum fragment size
+ * - inclusive max tree depth
+ * - saturation setting
+ * - preservation of stereochemistry information
+ *
+ *
+ * @param smilesGenerator the {@link SmilesGenerator} instance to use for
+ * creating SMILES strings
+ * for fragment deduplication and retrieval.
+ * @param minFragSize minimum number of atoms in a valid fragment
+ * (excluding implicit hydrogen).
+ * @param saturationSetting determines whether fragments should be saturated
+ * (with hydrogens or R-atoms) or unsaturated.
+ * @param inclusiveMaxTreeDepth represents the maximum number of bonds that
+ * will be split for a fragmentation.
+ * @param preserveStereo signals whether to attempt to copy stereochemical
+ * information from the original molecule to the
+ * generated fragments. Warning: This
+ * process is not reliable and can lead to incorrect
+ * stereochemistry in the resulting fragments.
+ */
+ public ExhaustiveFragmenter(
+ SmilesGenerator smilesGenerator,
+ int minFragSize,
+ Saturation saturationSetting,
+ int inclusiveMaxTreeDepth,
+ boolean preserveStereo
+ ) {
+ if (saturationSetting == null) {
+ throw new NullPointerException(
+ "The given SaturationSetting can not be null"
+ );
+ }
+ this.saturationSetting = saturationSetting;
+ if (smilesGenerator == null) {
+ throw new NullPointerException(
+ "The given SmilesGenerator can not be null"
+ );
+ }
+ this.smilesGenerator = smilesGenerator;
+ this.preserveStereo = preserveStereo;
+ this.setInclusiveMaxTreeDepth(inclusiveMaxTreeDepth);
+ this.setMinimumFragmentSize(minFragSize);
+ this.fragMap = null;
}
/**
- * Set the minimum fragment size.
+ * Sets the minimum allowed fragment size. This has to be greater than zero.
*
- * @param minFragSize the smallest size fragment that will be returned
+ * @param minFragSize minimum number of atoms in a valid fragment.
*/
public void setMinimumFragmentSize(int minFragSize) {
+ if (minFragSize <= 0) {
+ throw new IllegalArgumentException(
+ "Minimum fragment size must be a positive integer (>= 1)" +
+ " Provided: " + minFragSize
+ );
+ }
this.minFragSize = minFragSize;
}
/**
- * Generate fragments for the input molecule.
+ * Sets whether fragments should be saturated or unsaturated.
+ *
+ * @param saturationSetting the saturation mode for generated fragments.
+ */
+ public void setSaturationSetting(Saturation saturationSetting) {
+ if (saturationSetting == null) {
+ throw new NullPointerException(
+ "The given SaturationSetting can not be null"
+ );
+ }
+ this.saturationSetting = saturationSetting;
+ }
+
+ /**
+ * Sets the maximum number of bonds that can be simultaneously split in a
+ * single fragmentation event.
+ *
+ * Must be within the range {@code 0 < inclusiveMaxTreeDepth < 32}. This
+ * limit is important due to the combinatorial explosion of fragments
+ * (which scales with 2^n, where n is the number of splittable bonds) and
+ * Java's use of 32-bit integers for indexing. Setting a lower limit can
+ * help manage computational resources for larger molecules.
+ *
+ *
+ * @param inclusiveMaxTreeDepth the exclusive maximum number of bonds that
+ * can be split in one atom container.
+ */
+ public void setInclusiveMaxTreeDepth(int inclusiveMaxTreeDepth) {
+ if (inclusiveMaxTreeDepth <= 0 || inclusiveMaxTreeDepth >= 32) {
+ throw new IllegalArgumentException(
+ "Inclusive max tree depth must be grater then zero and " +
+ "smaller then 32. Provided: " + inclusiveMaxTreeDepth
+ );
+ }
+ this.inclusiveMaxTreeDepth = inclusiveMaxTreeDepth;
+ }
+
+ /**
+ * Sets whether stereochemical information from the original molecule should
+ * be copied to the generated fragments.
+ *
+ *
+ * Warning: The copying process is not reliable and can
+ * result in fragments with incorrect stereochemistry. This method copies
+ * elements based on the presence of atoms and bonds, but it does not perform
+ * a chemical validation check on the resulting fragment.
+ * For example, a chiral center might be copied even if the new fragment does
+ * not contain the minimum four different substituents required for chirality.
+ * Use caution and consider a separate validation step.
+ *
+ *
+ * Note on Stereochemistry and SMILES:
+ * For stereochemical information to be included in the SMILES strings
+ * returned by {@link #getFragments()}, the `smilesGenerator` used by this
+ * fragmenter must be configured with the {@link SmiFlavor#Stereo} flag.
+ * If the flag is not set, the SMILES will not contain stereochemistry,
+ * even if this setting is enabled and the underlying `IAtomContainer` objects
+ * have stereo elements.
+ *
+ *
+ * @param preserve {@code true} to enable attempting to copy
+ * stereo information; {@code false} otherwise.
+ */
+ public void setPreserveStereo(boolean preserve) {
+ this.preserveStereo = preserve;
+ }
+
+ /**
+ * Generates fragments for the given molecule.
+ * The generated fragments are stored internally and can be retrieved via:
+ * - {@link #getFragments()} (SMILES representation)
+ * - {@link #getFragmentsAsContainers()} (IAtomContainer representation)
*
- * @param atomContainer The input molecule.
+ * @param atomContainer the input molecule.
+ * @throws CDKException if fragmentation encounters an error.
*/
@Override
public void generateFragments(IAtomContainer atomContainer) throws CDKException {
- fragMap.clear();
+ if (this.fragMap != null) {
+ this.fragMap.clear();
+ }
run(atomContainer);
}
- private List run(IAtomContainer atomContainer) throws CDKException {
+ /**
+ * Splits the molecule at all possible combinations of splittable bonds and
+ * saturates the open valences of the resulting fragments according to the
+ * {@link ExhaustiveFragmenter#saturationSetting}. Only non-ring and
+ * non-terminal single bonds are considered for splitting.
+ *
+ * @param atomContainer the molecule to be split.
+ * @throws CDKException if an error occurs during hydrogen addition or atom
+ * type perception.
+ */
+ private void run(IAtomContainer atomContainer) throws CDKException {
+ if (atomContainer == null) {
+ throw new NullPointerException("No molecule provided");
+ }
+
+ // Return early if the molecule has fewer than 3 bonds
+ // (no meaningful splits possible)
+ if (atomContainer.getBondCount() < 3 ||
+ atomContainer.getAtomCount() < this.minFragSize ||
+ atomContainer.isEmpty()) {
+ this.fragMap = new HashMap<>(0);
+ return;
+ }
+
+ // Retrieve bonds that are eligible for splitting
+ IBond[] splittableBonds = getSplittableBonds(atomContainer);
- ArrayList fragments = new ArrayList<>();
+ // If no splittable bonds are found, return early
+ if (splittableBonds.length == 0) {
+ logger.debug("no splittable bonds found");
+ this.fragMap = new HashMap<>(0);
+ return;
+ }
+ if (splittableBonds.length > this.inclusiveMaxTreeDepth) {
+ logger.debug(
+ "Got " + splittableBonds.length + " splittable bonds" +
+ " but only " + this.inclusiveMaxTreeDepth + " tree depth. " +
+ "This means only a maximum of " + this.inclusiveMaxTreeDepth +
+ " bonds can be split at once during a fragmentation step"
+ );
+ }
+ logger.debug("Got " + splittableBonds.length + " splittable bonds");
+
+ // Compute the number of possible bond subsets (excluding the empty set):
+ // 2^n - 1
+ int numberOfIterations = (1 << splittableBonds.length) - 1;
+
+ // Store indices of splittable bonds for subset generation
+ int[] splittableBondIndices = new int[splittableBonds.length];
+ for (int i = 0; i < splittableBonds.length; i++) {
+ splittableBondIndices[i] = splittableBonds[i].getIndex();
+ }
+
+ this.fragMap = new HashMap<>(numberOfIterations);
+
+ // Iterate over all non-empty subsets of splittable bonds
+ for (int i = 1; i <= numberOfIterations; i++) {
+ int[] subset = generateSubset(i, splittableBondIndices);
+ int subsetSize = subset.length;
+
+ // Skip subsets exceeding the allowed depth
+ if (subsetSize > this.inclusiveMaxTreeDepth) {
+ continue;
+ }
- if (atomContainer.getBondCount() < 3) return fragments;
- List splitableBonds = getSplitableBonds(atomContainer);
- if (splitableBonds.size() == 0) return fragments;
- logger.debug("Got " + splitableBonds.size() + " splittable bonds");
+ // Convert subset indices back to bond objects
+ IBond[] bondsToSplit = new IBond[subsetSize];
+ for (int j = 0; j < subsetSize; j++) {
+ bondsToSplit[j] = atomContainer.getBond(subset[j]);
+ }
+
+ // Split the molecule and retrieve the resulting fragments
+ IAtomContainer[] parts = splitBondsWithCopy(
+ atomContainer, bondsToSplit
+ );
- String tmpSmiles;
- for (IBond bond : splitableBonds) {
- List parts = FragmentUtils.splitMolecule(atomContainer, bond);
- // make sure we don't add the same fragment twice
+ // Process each fragment
for (IAtomContainer partContainer : parts) {
- AtomContainerManipulator.clearAtomConfigurations(partContainer);
- for (IAtom atom : partContainer.atoms())
- atom.setImplicitHydrogenCount(null);
- AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(partContainer);
- CDKHydrogenAdder.getInstance(partContainer.getBuilder()).addImplicitHydrogens(partContainer);
- Aromaticity.cdkLegacy().apply(partContainer);
- tmpSmiles = smilesGenerator.create(partContainer);
- if (partContainer.getAtomCount() >= minFragSize && !fragMap.containsKey(tmpSmiles)) {
- fragments.add(partContainer);
- fragMap.put(tmpSmiles, partContainer);
+
+ // Generate a unique SMILES representation of the fragment
+ String tmpSmiles = this.smilesGenerator.create(partContainer);
+
+ int numberOfAtoms = 0;
+ for (IAtom atom : partContainer.atoms()) {
+
+ if (atom instanceof IPseudoAtom) {
+ continue;
+ }
+ numberOfAtoms++;
}
- }
- }
- // try and partition the fragments
- List tmp = new ArrayList<>(fragments);
- for (IAtomContainer fragment : fragments) {
- if (fragment.getBondCount() < 3 || fragment.getAtomCount() < minFragSize) continue;
- if (getSplitableBonds(fragment).size() == 0) continue;
-
- List frags = run(fragment);
- if (frags.size() == 0) continue;
-
- for (IAtomContainer frag : frags) {
- if (frag.getBondCount() < 3) continue;
- AtomContainerManipulator.clearAtomConfigurations(frag);
- for (IAtom atom : frag.atoms())
- atom.setImplicitHydrogenCount(null);
- AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(frag);
- CDKHydrogenAdder.getInstance(frag.getBuilder()).addImplicitHydrogens(frag);
- Aromaticity.cdkLegacy().apply(frag);
- tmpSmiles = smilesGenerator.create(frag);
- if (frag.getAtomCount() >= minFragSize && !fragMap.containsKey(tmpSmiles)) {
- tmp.add(frag);
- fragMap.put(tmpSmiles, frag);
+ // Store the fragment if it meets the size requirement and is
+ // unique
+ if (numberOfAtoms >= minFragSize) {
+ if (fragMap.size() == Integer.MAX_VALUE) {
+ throw new ArithmeticException(
+ "The maximum size to store the current amount of " +
+ "molecules would exceed the maximum value for an " +
+ "integer and overflow"
+ );
+ }
+ fragMap.putIfAbsent(tmpSmiles, partContainer);
}
}
}
- fragments = new ArrayList<>(tmp);
- return fragments;
}
- private List getSplitableBonds(IAtomContainer atomContainer) throws CDKException {
+ /**
+ * Detects and returns the bonds, which will be split by an exhaustive
+ * fragmentation. This method is especially useful to determine if it is
+ * even possible to split a specific molecule exhaustively. The number of
+ * fragments is 2^n - 1 with n being the number of splittable bonds.
+ * It is impossible to generate all possible fragment combinations for a molecule
+ * with more than 31 splittable bonds, as this would exceed the maximum tree depth
+ * of 31 due to the combinatorial explosion. For molecules with more than 31
+ * splittable bonds, the fragmentation will still occur, but it will be limited
+ * to a maximum of {@code inclusiveMaxTreeDepth} bonds per fragmentation step.
+ * To mitigate this one can check this with this function, for example:
+ *
+ * {@code
+ * ExhaustiveFragmenter exhFragmenter = new Exhaustive Fragmenter;
+ * if (exhFragmenter.getSplittableBonds(mol) > Integer.SIZE - 1) {
+ * // handle the case, where it is impossible to entirely split the
+ * // molecule
+ * }}
+ *
+ *
+ * @param atomContainer the container which contains the molecule in question.
+ * @return the bonds which would be split by the exhaustive fragmentation.
+ */
+ public static IBond[] getSplittableBonds(IAtomContainer atomContainer) {
+ if (atomContainer == null) {
+ throw new NullPointerException("The atom container must not be null");
+ }
+ if (atomContainer.isEmpty()) {
+ throw new IllegalArgumentException("The atom container must contain " +
+ "an actual molecule");
+ }
+
// do ring detection
- SpanningTree spanningTree = new SpanningTree(atomContainer);
- IRingSet allRings = spanningTree.getAllRings();
+ RingSearch ringSearch = new RingSearch(atomContainer);
+ IAtomContainer allRingsContainer = ringSearch.ringFragments();
- // find the splitable bonds
- ArrayList splitableBonds = new ArrayList<>();
+ // find the splittable bonds
+ ArrayList splittableBondSet = new ArrayList<>(
+ atomContainer.getBondCount() / 3
+ );
for (IBond bond : atomContainer.bonds()) {
- boolean isInRing = false;
- boolean isTerminal = false;
-
- // lets see if it's in a ring
- IRingSet rings = allRings.getRings(bond);
- if (rings.getAtomContainerCount() != 0) isInRing = true;
-
- // lets see if it is a terminal bond
- for (IAtom atom : bond.atoms()) {
- if (atomContainer.getConnectedBondsCount(atom) == 1) {
- isTerminal = true;
- break;
+
+ // only single bonds are candidates for splitting
+ if (bond.getOrder() == IBond.Order.SINGLE) {
+ boolean isInRing = false;
+ boolean isTerminal = false;
+
+ // lets see if it's in a ring
+ if (allRingsContainer.contains(bond)) isInRing = true;
+
+ // lets see if it is a terminal bond
+ for (IAtom atom : bond.atoms()) {
+ if (atomContainer.getConnectedBondsCount(atom) == 1) {
+ isTerminal = true;
+ break;
+ }
}
+
+ if (!(isInRing || isTerminal)) splittableBondSet.add(bond);
+ }
+ }
+ return splittableBondSet.toArray(new IBond[0]);
+ }
+
+ /**
+ * Generates a subset from the given array `nums`, determined by the binary
+ * representation of `index`. Each bit in `index` indicates whether the
+ * corresponding element in `nums` is included in the subset. The order of
+ * elements does not matter (i.e., `[1, 2]` and `[2, 1]` are equivalent).
+ *
+ * The total number of possible subsets is (2^n) - 1, where `n` is the
+ * length of `nums`. Subsets are generated using bitwise operations, where
+ * each `1` bit in `index` selects the corresponding element from `nums`.
+ *
+ * Example output for `nums = [1, 2, 3]`:
+ *
+ * index = 1 → [1]
+ * index = 2 → [2]
+ * index = 3 → [1, 2]
+ * index = 4 → [3]
+ * index = 5 → [1, 3]
+ * index = 6 → [2, 3]
+ * index = 7 → [1, 2, 3]
+ *
+ *
+ * Example bitwise selection for `index = 5` (`101` in binary):
+ *
+ * index (binary) nums result
+ * 1 → 1 → [1]
+ * 0 → 2
+ * 1 → 3 → [1, 3]
+ *
+ *
+ * @param index an integer whose binary representation determines the subset
+ * elements. A `1` bit at position `j` means `nums[j]` is
+ * included.
+ * @param nums the array from which to generate subsets. Duplicate values
+ * in `nums` may result in duplicate subset entries.
+ * @return An array containing the subset corresponding to `index`.
+ */
+ protected static int[] generateSubset(int index, int[] nums) {
+ // allocate subset array based on the number of 1-bits in index.
+ int[] subset = new int[Integer.bitCount(index)];
+ int subsetIndex = 0;
+
+ // process using bit manipulation - only iterate through set bits
+ while (index != 0) {
+ // find position of lowest set bit
+ int lowestBitPos = Integer.numberOfTrailingZeros(index);
+
+ // add the corresponding element from nums if within bounds
+ if (lowestBitPos < nums.length) {
+ subset[subsetIndex] = nums[lowestBitPos];
+ subsetIndex++;
}
- if (!(isInRing || isTerminal)) splitableBonds.add(bond);
+ // Clear the lowest set bit and continue
+ index = index & (index - 1);
+ }
+
+ return subset;
+ }
+
+ /**
+ * Add pseudo ("R") atoms to an atom in a molecule.
+ *
+ * @param atom the atom to add the pseudo atoms to
+ * @param rcount the number of pseudo atoms to add
+ * @param mol the molecule the atom belongs to
+ */
+ private void addRAtoms(IAtom atom, int rcount, IAtomContainer mol) {
+ for (int i = 0; i < rcount; i++) {
+ IPseudoAtom tmpRAtom = atom.getBuilder().newInstance(
+ IPseudoAtom.class, "R"
+ );
+ tmpRAtom.setAttachPointNum(1);
+ tmpRAtom.setImplicitHydrogenCount(0);
+ mol.addAtom(tmpRAtom);
+ mol.addBond(atom.getBuilder().newInstance(
+ IBond.class,
+ atom, tmpRAtom,
+ IBond.Order.SINGLE
+ ));
}
- return splitableBonds;
+ }
+
+ /**
+ * Creates a copy of an atom and adds it to the specified atom container.
+ *
+ * @param originalAtom the atom to be copied.
+ * @param atomContainer the destination container where the copied atom will
+ * be added.
+ * @return A new atom with the same properties as `originalAtom`, added to
+ * `atomContainer`.
+ */
+ private static IAtom copyAtom(IAtom originalAtom, IAtomContainer atomContainer) {
+ IAtom copiedAtom = atomContainer.newAtom(originalAtom.getAtomicNumber(),
+ originalAtom.getImplicitHydrogenCount());
+ copiedAtom.setIsAromatic(originalAtom.isAromatic());
+ copiedAtom.setValency(originalAtom.getValency());
+ copiedAtom.setAtomTypeName(originalAtom.getAtomTypeName());
+ copiedAtom.setFormalCharge(originalAtom.getFormalCharge());
+ return copiedAtom;
+ }
+
+ /**
+ * Creates a copy of a bond and adds it to the specified atom container.
+ *
+ * @param cpyCurrentAtom atom in the new atom container that is connected by
+ * the bond to be copied.
+ * @param cpyNbor the neighbour of `cpyCurrentAtom` that is connected by the
+ * bond one wants to copy.
+ * @param origBond the bond in the original molecule.
+ * @param atomContainer the new atom container to which the bond is to
+ * be copied.
+ * @return The bond in the new atom container.
+ */
+ private static IBond copyBond(
+ IAtom cpyCurrentAtom,
+ IAtom cpyNbor,
+ IBond origBond,
+ IAtomContainer atomContainer
+ ) {
+ IBond cpyBond = atomContainer.newBond(
+ cpyCurrentAtom,
+ cpyNbor,
+ origBond.getOrder());
+ cpyBond.setDisplay(origBond.getDisplay());
+ cpyBond.setIsAromatic(origBond.isAromatic());
+ // Setting is in ring is possible here because we always detect rings
+ // in the process of detecting the splittable bonds.
+ cpyBond.setIsInRing(origBond.isInRing());
+ return cpyBond;
+ }
+
+ /**
+ * Copies a subset of stereochemical information from a source molecule
+ * to a new fragment.
+ *
+ *
+ * This method iterates through the stereochemical elements of the original
+ * molecule (e.g., chiral centers, cis/trans bonds) and copies only those
+ * that are fully contained within the new fragment. A stereochemical element
+ * is considered fully contained if all of its defining atoms and bonds
+ * are present in the fragment, based on the provided atom and bond maps.
+ *
+ * Warning: This process is not reliable and can lead to
+ * incorrect stereochemistry in the fragment. When a chiral
+ * center is broken during fragmentation, the new fragment may be
+ * incorrectly assigned as chiral even if it is not anymore because some of
+ * its substituents are now equal.
+ *
+ * Example:the chiral molecule {@code CC[C@@H](Cl)CCCC} will i.a.
+ * produce the fragment {@code CC[C@@H](Cl)CC} where the stereo configuration is
+ * preserved but the structure is actually not chiral anymore.
+ *
+ *
+ * @param origMol the original molecule containing the stereochemical
+ * information.
+ * @param fragmentContainer the new fragment where the stereochemical
+ * information will be added.
+ * @param origToCpyAtomMap a mapping of atoms from the original molecule to
+ * their corresponding atoms in the new fragment.
+ * @param origToCpyBondMap a mapping of bonds from the original molecule to
+ * their corresponding bonds in the new fragment.
+ */
+ private void attemptCopyStereoInformation(
+ IAtomContainer origMol,
+ IAtomContainer fragmentContainer,
+ Map origToCpyAtomMap,
+ Map origToCpyBondMap
+ ) {
+ // adding stereo information if all elements are present in the
+ // new fragment
+ for (IStereoElement, ?> elem : origMol.stereoElements()) {
+ boolean focusIsPresent = true;
+ boolean carriersArePresent = true;
+ final IChemObject origFocus = elem.getFocus();
+ if (origFocus instanceof IAtom) {
+ if (!origToCpyAtomMap.containsKey(origFocus)) {
+ focusIsPresent = false;
+ }
+ } else if (origFocus instanceof IBond) {
+ if (!origToCpyBondMap.containsKey(origFocus)) {
+ focusIsPresent = false;
+ }
+ }
+
+ for (IChemObject iChemObject : elem.getCarriers()) {
+ if (iChemObject instanceof IAtom) {
+ if (!origToCpyAtomMap.containsKey(iChemObject)) {
+ carriersArePresent = false;
+ break;
+ }
+ } else if (iChemObject instanceof IBond) {
+ if (!origToCpyBondMap.containsKey(iChemObject)) {
+ carriersArePresent = false;
+ break;
+ }
+ }
+ }
+
+ if (focusIsPresent && carriersArePresent) {
+ fragmentContainer.addStereoElement(elem.map(origToCpyAtomMap, origToCpyBondMap));
+ }
+ }
+ }
+
+ /**
+ * Splits and saturates (if specified via {@link #saturationSetting}) a
+ * molecule into multiple fragments by removing the specified bonds and
+ * making copies of the resulting fragments.
+ *
+ * @param origMol the molecule to be split.
+ * @param bondsToSplit the bonds that should be removed to create
+ * separate fragments.
+ * @return An array of copied molecular fragments resulting from the split.
+ */
+ private IAtomContainer[] splitBondsWithCopy(
+ IAtomContainer origMol,
+ IBond[] bondsToSplit
+ ) {
+ Set bondsToSplitSet = new HashSet<>(
+ (int) Math.ceil(bondsToSplit.length / 0.75)
+ );
+ // for a faster lookup the hashset is used here.
+ bondsToSplitSet.addAll(Arrays.asList(bondsToSplit));
+ boolean[] visitedOriginalAtoms = new boolean[origMol.getAtomCount()];
+ List fragmentList = new ArrayList<>(bondsToSplit.length + 1);
+
+ for (int i = 0; i < origMol.getAtomCount(); i++) {
+ IAtom currPotentialStartAtom = origMol.getAtom(i);
+ if (!visitedOriginalAtoms[origMol.indexOf(currPotentialStartAtom)]) {
+ IAtomContainer fragmentContainer =
+ origMol.getBuilder().newInstance(IAtomContainer.class);
+ Map origToCpyAtomMap = new HashMap<>(
+ (int) Math.ceil(origMol.getAtomCount() / 0.75)
+ );
+ Map origToCpyBondMap = new HashMap<>(
+ (int) Math.ceil(origMol.getBondCount() / 0.75)
+ );
+ Deque dfsStack = new ArrayDeque<>();
+ // Store split counts specific to the atoms in the fragment being built
+ Map splitCountsCpyAtoms = new HashMap<>();
+
+ dfsStack.push(currPotentialStartAtom);
+ visitedOriginalAtoms[origMol.indexOf(currPotentialStartAtom)] = true;
+ IAtom cpyStartAtom = copyAtom(currPotentialStartAtom, fragmentContainer);
+ origToCpyAtomMap.put(currPotentialStartAtom, cpyStartAtom);
+
+ while (!dfsStack.isEmpty()) {
+ IAtom origCurrAtom = dfsStack.pop();
+ IAtom cpyCurrentAtom = origToCpyAtomMap.get(origCurrAtom);
+
+ for (IBond origBond : origMol.getConnectedBondsList(origCurrAtom)) {
+ IAtom origNbor = origBond.getOther(origCurrAtom);
+ boolean isThisABondToSplit = bondsToSplitSet.contains(origBond);
+
+ if (!isThisABondToSplit) {
+ if (!origToCpyAtomMap.containsKey(origNbor)) {
+ visitedOriginalAtoms[origMol.indexOf(origNbor)] = true;
+ IAtom cpyNbor = copyAtom(origNbor, fragmentContainer);
+ origToCpyAtomMap.put(origNbor, cpyNbor);
+ IBond cpyBond = copyBond(
+ cpyCurrentAtom,
+ cpyNbor,
+ origBond,
+ fragmentContainer
+ );
+ origToCpyBondMap.put(origBond, cpyBond);
+ dfsStack.push(origNbor);
+ } else {
+ IAtom cpyNbor = origToCpyAtomMap.get(origNbor);
+ // Add bond only if not already present
+ if (fragmentContainer.getBond(cpyCurrentAtom, cpyNbor) == null) {
+ IBond cpyBond = copyBond(
+ cpyCurrentAtom,
+ cpyNbor,
+ origBond,
+ fragmentContainer
+ );
+ origToCpyBondMap.put(origBond, cpyBond);
+ }
+ }
+ } else {
+ // This bond is being cut. The origCurrAtom is part of the fragment being built.
+ // Increment the cleavage count for its corresponding copied atom.
+ splitCountsCpyAtoms.put(cpyCurrentAtom,
+ splitCountsCpyAtoms.getOrDefault(cpyCurrentAtom, 0) + 1);
+ }
+ }
+ }
+
+ // Apply saturation logic based on the number of splitting counts for this fragment
+ if (this.saturationSetting != Saturation.UNSATURATED_FRAGMENTS) {
+ for (Map.Entry entry : splitCountsCpyAtoms.entrySet()) {
+ IAtom atom = entry.getKey();
+ int bondsCutCount = entry.getValue();
+
+ switch (this.saturationSetting) {
+ case HYDROGEN_SATURATED_FRAGMENTS:
+ Integer currImplHCount = atom.getImplicitHydrogenCount();
+ int newImplHCount =
+ (currImplHCount == null ? 0 : currImplHCount) + bondsCutCount;
+ atom.setImplicitHydrogenCount(newImplHCount);
+ break;
+ case R_SATURATED_FRAGMENTS:
+ addRAtoms(atom, bondsCutCount, fragmentContainer);
+ break;
+ default:
+ throw new UnsupportedOperationException(
+ "no treatment defined yet for this new enum constant"
+ );
+ }
+ }
+ }
+ if (this.preserveStereo) {
+ attemptCopyStereoInformation(
+ origMol,
+ fragmentContainer,
+ origToCpyAtomMap,
+ origToCpyBondMap
+ );
+ }
+ fragmentList.add(fragmentContainer);
+ }
+ }
+ return fragmentList.toArray(new IAtomContainer[0]);
}
/**
* Get the fragments generated as SMILES strings.
+ *
+ * Note on Stereochemistry:
+ * Stereochemistry information will only be included in the returned SMILES
+ * strings if the `SmilesGenerator` used by this fragmenter was configured
+ * with the {@link SmiFlavor#Stereo} flag.
+ *
*
* @return a String[] of the fragments.
*/
@Override
public String[] getFragments() {
+ if (fragMap == null) {
+ throw new NullPointerException("It is mandatory to generate " +
+ "fragments before getting them");
+ }
return (new ArrayList<>(fragMap.keySet())).toArray(new String[0]);
}
/**
- * Get the fragments generated as {@link IAtomContainer} objects..
+ * Get the fragments generated as {@link IAtomContainer} objects.
*
* @return a IAtomContainer[] of the fragments.
*/
@Override
public IAtomContainer[] getFragmentsAsContainers() {
+ if (fragMap == null) {
+ throw new NullPointerException("It is mandatory to generate " +
+ "fragments before getting them");
+ }
return (new ArrayList<>(fragMap.values())).toArray(new IAtomContainer[0]);
}
diff --git a/tool/fragment/src/test/java/org/openscience/cdk/fragment/ExhaustiveFragmenterTest.java b/tool/fragment/src/test/java/org/openscience/cdk/fragment/ExhaustiveFragmenterTest.java
index 960cbd2f7a..d58cbc4e96 100644
--- a/tool/fragment/src/test/java/org/openscience/cdk/fragment/ExhaustiveFragmenterTest.java
+++ b/tool/fragment/src/test/java/org/openscience/cdk/fragment/ExhaustiveFragmenterTest.java
@@ -1,5 +1,5 @@
-/*
- * Copyright (C) 2010 Rajarshi Guha
+/* Copyright (C) 2025 Rajarshi Guha
+ * Tom Weiß
*
* Contact: cdk-devel@lists.sourceforge.net
*
@@ -22,112 +22,947 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
-import org.openscience.cdk.test.CDKTestCase;
-import org.openscience.cdk.DefaultChemObjectBuilder;
+import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;
+import org.openscience.cdk.interfaces.IBond;
+import org.openscience.cdk.silent.SilentChemObjectBuilder;
+import org.openscience.cdk.smiles.SmiFlavor;
+import org.openscience.cdk.smiles.SmilesGenerator;
import org.openscience.cdk.smiles.SmilesParser;
+import org.openscience.cdk.test.CDKTestCase;
import java.util.Arrays;
-import java.util.List;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
-import static org.hamcrest.CoreMatchers.hasItems;
-import static org.hamcrest.CoreMatchers.is;
+import static org.junit.jupiter.api.Assertions.fail;
/**
* Test exhaustive fragmenter.
+ * This test class covers various scenarios for the {@link ExhaustiveFragmenter},
+ * including different saturation settings:
+ *
+ * - {@link org.openscience.cdk.fragment.ExhaustiveFragmenter.Saturation#UNSATURATED_FRAGMENTS}
+ * - {@link org.openscience.cdk.fragment.ExhaustiveFragmenter.Saturation#HYDROGEN_SATURATED_FRAGMENTS}
+ * - {@link org.openscience.cdk.fragment.ExhaustiveFragmenter.Saturation#R_SATURATED_FRAGMENTS}
+ *
+ * and minimum fragment size.
*
+ * @see ExhaustiveFragmenter
*/
class ExhaustiveFragmenterTest extends CDKTestCase {
-
- private static ExhaustiveFragmenter fragmenter;
- private static SmilesParser smilesParser;
+ private static SmilesParser smilesParser;
@BeforeAll
static void setup() {
- fragmenter = new ExhaustiveFragmenter();
- smilesParser = new SmilesParser(DefaultChemObjectBuilder.getInstance());
+ smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance());
}
+ // --- Unsaturated Fragments Tests ---
+
+ /**
+ * Tests that a simple linear alkane (propane) with no splittable bonds
+ * yields no fragments when using the unsaturated setting.
+ */
@Test
- void testEF1() throws Exception {
+ void testEF1Unsaturated() throws Exception {
IAtomContainer mol = smilesParser.parseSmiles("CCC");
- fragmenter.generateFragments(mol);
- String[] frags = fragmenter.getFragments();
+ ExhaustiveFragmenter unsaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.UNSATURATED_FRAGMENTS);
+ unsaturatedFragmenter.generateFragments(mol);
+ String[] frags = unsaturatedFragmenter.getFragments();
Assertions.assertEquals(0, frags.length);
}
+ /**
+ * Tests that a simple cycloalkane (cyclopentane) with no non-ring, non-terminal bonds
+ * yields no fragments when using the unsaturated setting.
+ */
@Test
- void testEF2() throws Exception {
+ void testEF2Unsaturated() throws Exception {
IAtomContainer mol = smilesParser.parseSmiles("C1CCCC1");
- fragmenter.generateFragments(mol);
- String[] frags = fragmenter.getFragments();
+ ExhaustiveFragmenter unsaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.UNSATURATED_FRAGMENTS);
+ unsaturatedFragmenter.generateFragments(mol);
+ String[] frags = unsaturatedFragmenter.getFragments();
Assertions.assertEquals(0, frags.length);
}
+ /**
+ * Tests fragmentation of ethylcyclohexane with unsaturated fragments.
+ * Expects "[CH]1CCCCC1" as a fragment, representing the cyclohexyl radical.
+ */
@Test
- void testEF3() throws Exception {
+ void testEF3Unsaturated() throws Exception {
IAtomContainer mol = smilesParser.parseSmiles("C1CCCCC1CC");
- fragmenter.generateFragments(mol);
- String[] frags = fragmenter.getFragments();
- org.hamcrest.MatcherAssert.assertThat(frags, is(new String[]{"C1CCCCC1"}));
+ ExhaustiveFragmenter unsaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.UNSATURATED_FRAGMENTS);
+ unsaturatedFragmenter.generateFragments(mol);
+ String[] frags = unsaturatedFragmenter.getFragments();
+ Assertions.assertArrayEquals(new String[]{"[CH]1CCCCC1"}, frags);
}
+ /**
+ * Tests fragmentation of ethylbenzene with unsaturated fragments.
+ * Expects "[c]1ccccc1" as a fragment, representing the phenyl radical.
+ */
@Test
- void testEF4() throws Exception {
+ void testEF4Unsaturated() throws Exception {
IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1CC");
- fragmenter.generateFragments(mol);
- String[] frags = fragmenter.getFragments();
+ ExhaustiveFragmenter unsaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.UNSATURATED_FRAGMENTS);
+ unsaturatedFragmenter.generateFragments(mol);
+ String[] frags = unsaturatedFragmenter.getFragments();
Assertions.assertNotNull(frags);
- org.hamcrest.MatcherAssert.assertThat(frags, is(new String[]{"c1ccccc1"}));
+ Assertions.assertArrayEquals(new String[]{"[c]1ccccc1"}, frags);
}
+ /**
+ * Tests fragmentation of diphenylmethane with unsaturated fragments.
+ * Expects "[CH2]c1ccccc1" (benzyl radical) and "[c]1ccccc1" (phenyl radical).
+ */
@Test
- void testEF5() throws Exception {
+ void testEF5Unsaturated() throws Exception {
IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1Cc1ccccc1");
- fragmenter.generateFragments(mol);
- String[] frags = fragmenter.getFragments();
+ ExhaustiveFragmenter unsaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.UNSATURATED_FRAGMENTS);
+ unsaturatedFragmenter.generateFragments(mol);
+ String[] frags = unsaturatedFragmenter.getFragments();
Assertions.assertNotNull(frags);
- org.hamcrest.MatcherAssert.assertThat(Arrays.asList(frags), hasItems("c1ccc(cc1)C", "c1ccccc1"));
- Assertions.assertNotNull(fragmenter.getFragmentsAsContainers());
- Assertions.assertEquals(2, fragmenter.getFragmentsAsContainers().length);
-
+ assertFragsContain(
+ new String[]{
+ "[CH2]c1ccccc1",
+ "[c]1ccccc1"
+ }, frags
+ );
+ Assertions.assertNotNull(unsaturatedFragmenter.getFragmentsAsContainers());
+ Assertions.assertEquals(2, unsaturatedFragmenter.getFragmentsAsContainers().length);
}
+ /**
+ * Tests fragmentation of biphenyl with unsaturated fragments.
+ * Expects only "[c]1ccccc1" as the fragment.
+ */
@Test
- void testEF6() throws Exception {
+ void testEF6Unsaturated() throws Exception {
IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1c1ccccc1");
- fragmenter.generateFragments(mol);
- String[] frags = fragmenter.getFragments();
+ ExhaustiveFragmenter unsaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.UNSATURATED_FRAGMENTS);
+ unsaturatedFragmenter.generateFragments(mol);
+ String[] frags = unsaturatedFragmenter.getFragments();
Assertions.assertNotNull(frags);
- org.hamcrest.MatcherAssert.assertThat(frags, is(new String[]{"c1ccccc1"}));
+ Assertions.assertArrayEquals(new String[]{"[c]1ccccc1"}, frags);
- Assertions.assertNotNull(fragmenter.getFragmentsAsContainers());
- Assertions.assertEquals(1, fragmenter.getFragmentsAsContainers().length);
+ Assertions.assertNotNull(unsaturatedFragmenter.getFragmentsAsContainers());
+ Assertions.assertEquals(1, unsaturatedFragmenter.getFragmentsAsContainers().length);
}
+ /**
+ * Tests a complex molecule with unsaturated fragments.
+ * Expected fragments include phenyl and various complex radical fragments.
+ * Note: The number of fragments (26) is higher than the saturated version (25)
+ * because unsaturated fragments explicitly show radical centers, which can lead to
+ * unique SMILES for fragments that would be canonicalized identically when saturated
+ * due to differences in hydrogen counts or explicit radical representation.
+ */
@Test
- void testEF7() throws Exception {
+ void testEF7Unsaturated() throws Exception {
IAtomContainer mol = smilesParser.parseSmiles("C1(c2ccccc2)(CC(CC1)CCc1ccccc1)CC1C=CC=C1");
- fragmenter.generateFragments(mol);
- List frags = Arrays.asList(fragmenter.getFragments());
+ ExhaustiveFragmenter unsaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.UNSATURATED_FRAGMENTS);
+ unsaturatedFragmenter.generateFragments(mol);
+ String[] frags = unsaturatedFragmenter.getFragments();
+ Assertions.assertNotNull(frags);
+ // There is one additional fragment in comparison to the saturated version because there are following fragments:
+ // [C]1CCC([CH2])C1
+ // [CH2][C]1C[CH]CC1
+ // these fragments only differ in the number of hydrogen's bonded to their respective carbon atoms. So these
+ // fragments would show up as one if saturated.
+ Assertions.assertEquals(26, frags.length);
+
+ Assertions.assertNotNull(unsaturatedFragmenter.getFragmentsAsContainers());
+ Assertions.assertEquals(26, unsaturatedFragmenter.getFragmentsAsContainers().length);
+
+ assertFragsContain(
+ new String[]{
+ "[c]1ccccc1",
+ "[CH2]CC1CCC(c2ccccc2)(CC3C=CC=C3)C1",
+ "[CH2]C1CCC([CH2])(c2ccccc2)C1"
+ }, frags
+ );
+ }
+
+ // --- Hydrogen-Saturated Fragments Tests ---
+
+ /**
+ * Tests that a simple linear alkane (propane) with no splittable bonds
+ * yields no fragments when using the hydrogen-saturated setting.
+ */
+ @Test
+ void testEF1Saturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("CCC");
+ ExhaustiveFragmenter fragmenterSaturated = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ fragmenterSaturated.generateFragments(mol);
+ String[] frags = fragmenterSaturated.getFragments();
+ Assertions.assertEquals(0, frags.length);
+ }
+
+ /**
+ * Tests that a simple cycloalkane (cyclopentane) with no non-ring, non-terminal bonds
+ * yields no fragments when using the hydrogen-saturated setting.
+ */
+ @Test
+ void testEF2Saturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("C1CCCC1");
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ Assertions.assertEquals(0, frags.length);
+ }
+
+ /**
+ * Tests fragmentation of methylcyclohexane with hydrogen-saturated fragments.
+ * Expects "C1CCCCC1" as a fragment, representing cyclohexane.
+ */
+ @Test
+ void testEF3Saturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("C1CCCCC1CC");
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ Assertions.assertArrayEquals(new String[]{"C1CCCCC1"}, frags);
+ }
+
+ /**
+ * Tests fragmentation of ethylbenzene with hydrogen-saturated fragments.
+ * Expects "c1ccccc1" as a fragment, representing benzene.
+ */
+ @Test
+ void testEF4Saturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1CC");
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ Assertions.assertNotNull(frags);
+ Assertions.assertArrayEquals(new String[]{"c1ccccc1"}, frags);
+ }
+
+ /**
+ * Tests fragmentation of diphenylmethane with hydrogen-saturated fragments.
+ * Expects "c1ccc(cc1)C" (toluene) and "c1ccccc1" (benzene).
+ * Note: "c1ccc(cc1)C" might also be canonicalized as "Cc1ccccc1".
+ */
+ @Test
+ void testEF5Saturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1Cc1ccccc1");
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ Assertions.assertNotNull(frags);
+ assertFragsContain(
+ new String[]{
+ "c1ccc(cc1)C",
+ "c1ccccc1"
+ }, frags
+ );
+ Assertions.assertNotNull(saturatedFragmenter.getFragmentsAsContainers());
+ Assertions.assertEquals(2, saturatedFragmenter.getFragmentsAsContainers().length);
+ }
+
+ /**
+ * Tests fragmentation of biphenyl with hydrogen-saturated fragments.
+ * Expects only "c1ccccc1" (benzene) as the fragment.
+ */
+ @Test
+ void testEF6Saturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1c1ccccc1");
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
Assertions.assertNotNull(frags);
- Assertions.assertEquals(25, frags.size());
+ Assertions.assertArrayEquals(new String[]{"c1ccccc1"}, frags);
- Assertions.assertNotNull(fragmenter.getFragmentsAsContainers());
- Assertions.assertEquals(25, fragmenter.getFragmentsAsContainers().length);
+ Assertions.assertNotNull(saturatedFragmenter.getFragmentsAsContainers());
+ Assertions.assertEquals(1, saturatedFragmenter.getFragmentsAsContainers().length);
+ }
- org.hamcrest.MatcherAssert.assertThat(frags, hasItems("c1ccccc1", "c1ccc(cc1)C2(CCC(CC)C2)CC3C=CC=C3", "c1ccc(cc1)C2(C)CCC(C)C2"));
+ /**
+ * Tests a complex molecule with hydrogen-saturated fragments.
+ * Expected fragments include benzene and various complex saturated fragments.
+ * Compared to the unsaturated version, some fragments might canonicalize to the same SMILES
+ * after saturation, resulting in a slightly lower count (25 vs 26).
+ */
+ @Test
+ void testEF7Saturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("C1(c2ccccc2)(CC(CC1)CCc1ccccc1)CC1C=CC=C1");
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ Assertions.assertNotNull(frags);
+ Assertions.assertEquals(25, frags.length);
+
+ Assertions.assertNotNull(saturatedFragmenter.getFragmentsAsContainers());
+ Assertions.assertEquals(25, saturatedFragmenter.getFragmentsAsContainers().length);
+
+ assertFragsContain(
+ new String[]{
+ "c1ccccc1",
+ "c1ccc(cc1)C2(CCC(CC)C2)CC3C=CC=C3",
+ "c1ccc(cc1)C2(C)CCC(C)C2"
+ }, frags
+ );
}
+ // --- R-Group Saturated Fragments Tests ---
+
+ /**
+ * Tests fragmentation of ethylcyclohexane with R-group saturated fragments.
+ * Expects "*C1CCCCC1" as a fragment, representing the cyclohexyl group with an R-atom.
+ */
+ @Test
+ void testEF3RestSaturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("C1CCCCC1CC");
+ ExhaustiveFragmenter rSaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.R_SATURATED_FRAGMENTS);
+ rSaturatedFragmenter.generateFragments(mol);
+ String[] frags = rSaturatedFragmenter.getFragments();
+ Assertions.assertNotNull(frags);
+ Assertions.assertArrayEquals(new String[]{"*C1CCCCC1"}, frags);
+ }
+
+ /**
+ * Tests fragmentation of toluene with R-group saturated fragments.
+ * Expects "*c1ccccc1" (phenyl with R-atom) and "*Cc1ccccc1" (benzyl with R-atom).
+ */
+ @Test
+ void testEF5RestSaturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1Cc1ccccc1");
+ ExhaustiveFragmenter rSaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.R_SATURATED_FRAGMENTS);
+ rSaturatedFragmenter.generateFragments(mol);
+ String[] frags = rSaturatedFragmenter.getFragments();
+ Assertions.assertNotNull(frags);
+ assertFragsContain(
+ new String[]{
+ "*c1ccccc1",
+ "*Cc1ccccc1"
+ }, frags
+ );
+ Assertions.assertEquals(2, rSaturatedFragmenter.getFragmentsAsContainers().length);
+ }
+
+ /**
+ * Tests fragmentation of biphenyl with R-group saturated fragments.
+ * Expects only "*c1ccccc1" as the fragment.
+ */
+ @Test
+ void testEF6RestSaturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1c1ccccc1");
+ ExhaustiveFragmenter rSaturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.R_SATURATED_FRAGMENTS);
+ rSaturatedFragmenter.generateFragments(mol);
+ String[] frags = rSaturatedFragmenter.getFragments();
+ Assertions.assertNotNull(frags);
+ Assertions.assertArrayEquals(new String[]{"*c1ccccc1"}, frags);
+ Assertions.assertEquals(1, rSaturatedFragmenter.getFragmentsAsContainers().length);
+ }
+
+ /**
+ * Tests a complex molecule with R-group saturated fragments.
+ * The number of fragments can differ from hydrogen-saturated or unsaturated versions
+ * due to the R-group affecting the size of the fragments.
+ */
+ @Test
+ void testEF7RestSaturated() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("C1(c2ccccc2)(CC(CC1)CCc1ccccc1)CC1C=CC=C1");
+ ExhaustiveFragmenter fragmenterRSaturated = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.R_SATURATED_FRAGMENTS);
+ fragmenterRSaturated.generateFragments(mol);
+ String[] frags = fragmenterRSaturated.getFragments();
+ Assertions.assertNotNull(frags);
+ // Needs to have the same number of fragments as the unsaturated version.
+ Assertions.assertEquals(26, fragmenterRSaturated.getFragmentsAsContainers().length);
+ assertFragsContain(
+ new String[]{
+ "*c1ccccc1",
+ "*C1CCC(c2ccccc2)(CC3C=CC=C3)C1",
+ "*C1CCC(*)(c2ccccc2)C1"
+ }, frags
+ );
+ }
+
+ // --- General Fragmenter Tests ---
+
+ /**
+ * Tests the minimum fragment size setting.
+ * With a minimum size of 6, only the larger ring (cyclohexane) should be returned
+ * from a molecule composed of a cyclopentane and a cyclohexane connected by a single bond.
+ */
@Test
void testMinSize() throws Exception {
IAtomContainer mol = smilesParser.parseSmiles("C1CCCC1C2CCCCC2");
- fragmenter.setMinimumFragmentSize(6);
- fragmenter.generateFragments(mol);
- String[] frags = fragmenter.getFragments();
+ ExhaustiveFragmenter fragmenterSaturated = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ fragmenterSaturated.setMinimumFragmentSize(6);
+ fragmenterSaturated.generateFragments(mol);
+ String[] frags = fragmenterSaturated.getFragments();
Assertions.assertNotNull(frags);
Assertions.assertEquals(1, frags.length);
- Assertions.assertTrue(frags[0].equals("C1CCCCC1"));
+ Assertions.assertEquals("C1CCCCC1", frags[0]);
+ }
+
+ /**
+ * Tests that lowering the minimum fragment size allows smaller fragments to be returned.
+ * For "C1CCCC1C2CCCCC2", setting min size to 5 should yield both rings.
+ */
+ @Test
+ void testMinSizeLowered() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("C1CCCC1C2CCCCC2");
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.setMinimumFragmentSize(5);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ Assertions.assertNotNull(frags);
+ Assertions.assertEquals(2, frags.length);
+ assertFragsContain(
+ new String[]{
+ "C1CCCCC1",
+ "C1CCCC1"
+ }, frags
+ );
+ }
+
+ /**
+ * Verifies that the SMILES representations obtained from fragments match
+ * the SMILES generated directly from their corresponding {@link IAtomContainer} objects.
+ */
+ @Test
+ void testEqualityOfSmilesAndContainers() throws Exception {
+ SmilesGenerator smilesGenerator = new SmilesGenerator(SmiFlavor.UseAromaticSymbols | SmiFlavor.Unique);
+ IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1CC(N)C(=O)O"); // Phenylalanine
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] smilesFrags = saturatedFragmenter.getFragments();
+ IAtomContainer[] containerFrags = saturatedFragmenter.getFragmentsAsContainers();
+ for (IAtomContainer frag : containerFrags) {
+ assertFragsContain(
+ new String[]{
+ smilesGenerator.create(frag)
+ }, smilesFrags
+ );
+ }
+ }
+
+ /**
+ * Tests the {@link ExhaustiveFragmenter#getSplittableBonds(IAtomContainer)} method
+ * for a linear alkane (propane), which should have no splittable bonds.
+ */
+ @Test
+ void testGetSplittableBondsLinearMolecule() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("CCC"); // Propane
+ IBond[] splittableBonds = ExhaustiveFragmenter.getSplittableBonds(mol);
+ Assertions.assertEquals(0, splittableBonds.length);
+ }
+
+ /**
+ * Tests the {@link ExhaustiveFragmenter#getSplittableBonds(IAtomContainer)} method
+ * for a cyclic alkane (cyclopentane), which should have no splittable bonds (all bonds are in a ring).
+ */
+ @Test
+ void testGetSplittableBondsCyclicMolecule() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("C1CCCC1"); // Cyclopentane
+ IBond[] splittableBonds = ExhaustiveFragmenter.getSplittableBonds(mol);
+ Assertions.assertEquals(0, splittableBonds.length);
+ }
+
+ /**
+ * Tests the {@link ExhaustiveFragmenter#getSplittableBonds(IAtomContainer)} method
+ * for ethylbenzene, which should have one splittable bond (the bond between the phenyl and ethyl groups).
+ */
+ @Test
+ void testGetSplittableBondsBenzeneWithSideChain() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1CC"); // Ethylbenzene
+ IBond[] splittableBonds = ExhaustiveFragmenter.getSplittableBonds(mol);
+ Assertions.assertEquals(1, splittableBonds.length);
+ }
+
+ /**
+ * Tests the {@link ExhaustiveFragmenter#getSplittableBonds(IAtomContainer)} method
+ * for biphenyl, which should have one splittable bond (the bond connecting the two phenyl rings).
+ */
+ @Test
+ void testGetSplittableBondsBiphenyl() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1c1ccccc1"); // Biphenyl
+ IBond[] splittableBonds = ExhaustiveFragmenter.getSplittableBonds(mol);
+ Assertions.assertEquals(1, splittableBonds.length);
+ }
+
+ /**
+ * Tests the internal helper method `generateSubset` which creates subsets
+ * based on the bit representation of an index.
+ * This ensures the combinatorial generation of bond subsets works correctly.
+ */
+ @Test
+ void testGenerateSubset() {
+ int[] nums = new int[]{10, 20, 30, 40};
+
+ // index = 1 (0001) -> {nums[0]}
+ Assertions.assertArrayEquals(
+ new int[]{10},
+ ExhaustiveFragmenter.generateSubset(1, nums)
+ );
+
+ // index = 2 (0010) -> {nums[1]}
+ Assertions.assertArrayEquals(
+ new int[]{20},
+ ExhaustiveFragmenter.generateSubset(2, nums)
+ );
+
+ // index = 3 (0011) -> {nums[0], nums[1]}
+ Assertions.assertArrayEquals(
+ new int[]{10, 20},
+ ExhaustiveFragmenter.generateSubset(3, nums)
+ );
+
+ // index = 4 (0100) -> {nums[2]}
+ Assertions.assertArrayEquals(
+ new int[]{30},
+ ExhaustiveFragmenter.generateSubset(4, nums)
+ );
+
+ // index = 5 (0101) -> {nums[0], nums[2]}
+ Assertions.assertArrayEquals(
+ new int[]{10, 30},
+ ExhaustiveFragmenter.generateSubset(5, nums)
+ );
+
+ // index = 7 (0111) -> {nums[0], nums[1], nums[2]}
+ Assertions.assertArrayEquals(
+ new int[]{10, 20, 30},
+ ExhaustiveFragmenter.generateSubset(7, nums)
+ );
+
+ // index = 15 (1111) -> {nums[0], nums[1], nums[2], nums[3]}
+ Assertions.assertArrayEquals(
+ new int[]{10, 20, 30, 40},
+ ExhaustiveFragmenter.generateSubset(15, nums)
+ );
+ }
+
+ /**
+ * Tests the functionality of providing a custom SmilesGenerator to the ExhaustiveFragmenter.
+ * This test uses a SmilesGenerator that does NOT use aromatic symbols, expecting kekulized SMILES.
+ */
+ @Test
+ void testCustomSmilesGenerator() throws Exception {
+ SmilesGenerator customSmilesGen = new SmilesGenerator(SmiFlavor.Unique); // No SmiFlavor.UseAromaticSymbols
+ ExhaustiveFragmenter customFragmenter = new ExhaustiveFragmenter(
+ customSmilesGen,
+ 6,
+ ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS,
+ Integer.SIZE - 1,
+ false
+ );
+ IAtomContainer mol = smilesParser.parseSmiles("c1ccccc1Cc1ccccc1"); // Diphenylmethane
+ customFragmenter.generateFragments(mol);
+ String[] frags = customFragmenter.getFragments();
+
+ Assertions.assertNotNull(frags);
+ assertFragsContain(
+ new String[]{
+ "C=1C=CC=CC1",
+ "C=1C=CC(=CC1)C"
+ }, frags
+ );
+ Assertions.assertEquals(2, frags.length);
+ }
+
+ /**
+ * Tests the setInclusiveMaxTreeDepth method using 1,4-dibutylbenzene.
+ * By varying `inclusiveMaxTreeDepth`, we can observe how the number of generated fragments changes.
+ *
+ *
+ * Molecule: 1,4-dibutylbenzene (CCCCc1ccc(CCCC)cc1)
+ * Splittable bonds: 6 (the three C-C bonds for each butyl chain, from the ring until the second last C-atom).
+ * Fragmenter setup: minFragSize = 4 (to include butyl and benzene fragments), hydrogen-saturated fragments.
+ *
+ * Expected fragments for different inclusiveMaxTreeDepth settings:
+ *
+ * 1. inclusiveMaxTreeDepth = 1 (allows 1 simultaneous cuts):
+ * - Expected fragments: 4:
+ *
+ * 2. inclusiveMaxTreeDepth = 2 (allows up to 2 simultaneous cut):
+ * - Considers all subsets of splittable bonds of size 1 and 2
+ * - Expected unique fragments: 10
+ *
+ * 3. inclusiveMaxTreeDepth = 3 (allows up to 3 simultaneous cuts):
+ * - Considers all subsets of splittable bonds of size 1, 2 and 3
+ * - Includes fragments from 1-cut operations, plus fragments from 2-cut
+ * and 3-cut operations:
+ * - Expected unique fragments: 10
+ *
+ */
+ @Test
+ void testSetInclusiveMaxTreeDepth() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles("CCCCc1ccc(CCCC)cc1");
+
+ // Define a standard SmilesGenerator for fragmenter instantiation
+ SmilesGenerator standardSmilesGen = new SmilesGenerator(
+ SmiFlavor.Unique | SmiFlavor.UseAromaticSymbols
+ );
+
+ ExhaustiveFragmenter localFragmenter;
+
+ localFragmenter = new ExhaustiveFragmenter(
+ standardSmilesGen,
+ 4,
+ ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS,
+ Integer.SIZE - 1,
+ false
+ );
+ localFragmenter.setInclusiveMaxTreeDepth(1);
+ localFragmenter.generateFragments(mol);
+ String[] fragsDepth1 = localFragmenter.getFragments();
+ Assertions.assertEquals(4, fragsDepth1.length,
+ "Expected 4 fragments when inclusiveMaxTreeDepth is 1 (allows 1 cuts) for 1,4-dibutylbenzene");
+ assertFragsContain(
+ new String[]{
+ "c1cc(ccc1C)CCCC",
+ "c1ccc(cc1)CCCC",
+ "c1cc(ccc1CC)CCCC",
+ "CCCC"
+ }, fragsDepth1
+ );
+
+ localFragmenter = new ExhaustiveFragmenter(
+ standardSmilesGen,
+ 4,
+ ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS,
+ Integer.SIZE - 1,
+ false
+ );
+ localFragmenter.setInclusiveMaxTreeDepth(2);
+ localFragmenter.generateFragments(mol);
+ String[] fragsDepth2 = localFragmenter.getFragments();
+ Assertions.assertEquals(10, fragsDepth2.length,
+ "Expected 10 fragments when inclusiveMaxTreeDepth is 2 (allows up to 2 cut)");
+ assertFragsContain(
+ new String[]{
+ "c1ccc(cc1)C",
+ "c1ccc(cc1)CC",
+ "c1ccc(cc1)CCCC",
+ "c1cc(ccc1C)C",
+ "c1cc(ccc1C)CC",
+ "c1cc(ccc1C)CCCC",
+ "c1cc(ccc1CC)CC",
+ "c1cc(ccc1CC)CCCC",
+ "c1ccccc1",
+ "CCCC"
+ }, fragsDepth2
+ );
+
+ localFragmenter = new ExhaustiveFragmenter(
+ standardSmilesGen,
+ 4,
+ ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS,
+ Integer.SIZE - 1,
+ false
+ );
+ localFragmenter.setInclusiveMaxTreeDepth(3);
+ localFragmenter.generateFragments(mol);
+ String[] fragsDepth3 = localFragmenter.getFragments();
+ Assertions.assertEquals(10, fragsDepth3.length,
+ "Expected 10 fragments when inclusiveMaxTreeDepth is 3 (allows up to 3 cuts), same as max 2 cuts");
+ assertFragsContain(
+ new String[]{
+ "c1ccc(cc1)C",
+ "c1ccc(cc1)CC",
+ "c1ccc(cc1)CCCC",
+ "c1cc(ccc1C)C",
+ "c1cc(ccc1C)CC",
+ "c1cc(ccc1C)CCCC",
+ "c1cc(ccc1CC)CC",
+ "c1cc(ccc1CC)CCCC",
+ "c1ccccc1",
+ "CCCC"
+ }, fragsDepth3
+ );
+ }
+
+ /**
+ * Tests that double bonds will not be split.
+ */
+ @Test
+ void testDoubleBondIssue() throws CDKException {
+ IAtomContainer mol = smilesParser.parseSmiles("C1CCCCC1=CCC");
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ assertFragsContain(
+ new String[]{
+ "C=C1CCCCC1",
+ }, frags
+ );
+ }
+
+ // --- Complementary Molecule Tests ---
+
+ /**
+ * Tests exhaustive fragmentation on an example molecule with a disconnected
+ * structure.
+ */
+ @Test
+ void testDisconnectedMolecules() throws Exception {
+ IAtomContainer mol = smilesParser.parseSmiles(
+ "C(CN(CC(=O)[O-])CC(=O)[O-])N(CC(=O)[O-])CC(=O)[O-].[Na+].[Na+].[Na+].[Na+]"
+ ); //Sodium edetate
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ assertFragsContain(
+ new String[]{
+ "O=C([O-])CNCCNCC(=O)[O-]",
+ "O=C([O-])CNCC(=O)[O-]",
+ "O=C([O-])CN(C)CCN(C)C",
+ "O=C([O-])CNCCNC",
+ "O=C([O-])CN(CC(=O)[O-])CC"
+ }, frags
+ );
+ }
+
+ /**
+ * Testing a bigger molecule
+ *
+ * @throws Exception if anything goes wrong
+ */
+ @Test
+ void testBigMolecule1() throws Exception {
+ SmilesParser smiPar = new SmilesParser(SilentChemObjectBuilder.getInstance());
+ IAtomContainer mol = smiPar.parseSmiles("CC1=C(C(=CC=C1)NC2=CC=CC=C2C" +
+ "(=O)NC(CCS(=O)C)C(=O)NC(C)C3=CC=C(C=C3)F)C"); //PubChem CID 118705975
+
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(
+ ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS
+ );
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ assertFragsContain(
+ new String[]{
+ "O=C(NCC)CCC",
+ "NC=1C=CC=CC1",
+ "O=C(N)CCCS(=O)C",
+ "FC=1C=CC(=CC1)C(N)C"
+ }, frags
+ );
+ }
+
+ /**
+ * Testing a molecule with 31 splittable bonds (takes extremely long, maybe days)
+ *
+ * @throws Exception if anything goes wrong
+ */
+ // @Test
+ void testMaxSplittableBonds() throws Exception {
+ SmilesParser smiPar = new SmilesParser(SilentChemObjectBuilder.getInstance());
+ IAtomContainer mol = smiPar.parseSmiles("C[C@]12CC[C@](CC1C3=CC(=O)C4[C@]5(CCC(C(C5CC[C@]4([C@@]3(CC2)C)C)(C)C)" +
+ "OC6C(C(C(C(O6)C(=O)N[C@H](CCC(=O)OC)C(=O)OC)O)O)OC7C(C(C(C(O7)C(=O)N[C@H](CCC(=O)OC)C(=O)OC)O)O)O)C)(C)C" +
+ "(=O)N[C@H](CCC(=O)OC)C(=O)OC"); // Pubchem CID 16396833
+ ExhaustiveFragmenter saturatedFragmenter = new ExhaustiveFragmenter(ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS);
+ saturatedFragmenter.generateFragments(mol);
+ String[] frags = saturatedFragmenter.getFragments();
+ assertFragsContain(
+ new String[]{
+ "O=CCNC(=O)c1ccccc1",
+ "O=C(N)CNC(=O)c1ccccc1N",
+ "O=C(NC)c1ccccc1N",
+ "O=C(NCCC)c1ccccc1N",
+ "O=CCNC(=O)c1ccccc1Nc2cccc(c2C)C",
+ "O=C(N)CCCS(=O)C",
+ "O=C(N)C(NC(=O)c1ccccc1)CCS(=O)C"
+ }, frags
+ );
+ }
+
+ /**
+ * Testing the test molecule of the Java doc comment for the
+ * {@link ExhaustiveFragmenter}
+ *
+ * @throws Exception if anything goes wrong
+ */
+ @Test
+ void testExampleUsage() throws Exception {
+
+ // test with default settings
+ SmilesParser smiPar = new SmilesParser(SilentChemObjectBuilder.getInstance());
+ IAtomContainer mol = smiPar.parseSmiles("C1CCCC1C1=CC=CC=C1");
+ ExhaustiveFragmenter localDefaultFragmenter = new ExhaustiveFragmenter();
+ localDefaultFragmenter.generateFragments(mol.clone());
+ String[] fragsDefault = localDefaultFragmenter.getFragments();
+ assertFragsContain(
+ new String[]{
+ "[C]1=CC=CC=C1"
+ }, fragsDefault
+ );
+
+ // test with minimal fragment size of 5 and hydrogen saturation
+
+ ExhaustiveFragmenter localCustomFragmenter = new ExhaustiveFragmenter(
+ 5,
+ ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS
+ );
+ localCustomFragmenter.generateFragments(mol.clone());
+ String[] fragsCustom = localCustomFragmenter.getFragments();
+ assertFragsContain(
+ new String[]{
+ "C1CCCC1",
+ "C1=CC=CC=C1",
+ },
+ fragsCustom
+ );
+ }
+
+ /**
+ * Ensures that stereochemical information (double-bond E/Z markers '/' or
+ * '\') is preserved when generating fragments.
+ */
+ @Test
+ void testStereoChemistryCopied() throws Exception {
+ SmilesGenerator smilesGenerator =
+ new SmilesGenerator(
+ SmiFlavor.UseAromaticSymbols | SmiFlavor.Stereo
+ );
+ IAtomContainer mol = smilesParser.parseSmiles("CC[C@H](F)C/C=C/C");
+ ExhaustiveFragmenter fragmenter = new ExhaustiveFragmenter(
+ smilesGenerator,
+ 6,
+ ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS,
+ 31,
+ true
+ );
+ fragmenter.generateFragments(mol);
+
+ String[] smilesFrags = fragmenter.getFragments();
+ IAtomContainer[] containerFrags = fragmenter.getFragmentsAsContainers();
+
+ Assertions.assertNotNull(smilesFrags);
+ Assertions.assertNotNull(containerFrags);
+ Assertions.assertEquals(smilesFrags.length, containerFrags.length,
+ "Number of SMILES fragments and container fragments must match");
+
+ String[] containerSmiles = new String[smilesFrags.length];
+ for (int i = 0; i < containerSmiles.length; i++) {
+ containerSmiles[i] = smilesGenerator.create(containerFrags[i]);
+ }
+
+ assertFragsContain(
+ new String[]{
+ "C(F)C/C=C/C"
+ }, smilesFrags
+ );
+
+ assertFragsContain(
+ new String[]{
+ "C(F)C/C=C/C"
+ }, containerSmiles
+ );
+ }
+
+ /**
+ * Ensures that stereochemical information (chiral centers '@') is
+ * copied if the fragmentation yields a fragment with the same chiral center.
+ */
+ @Test
+ void testTetrahdralStereoChemistryCopied() throws Exception {
+ SmilesGenerator smilesGenerator =
+ new SmilesGenerator(
+ SmiFlavor.UseAromaticSymbols | SmiFlavor.Stereo
+ );
+ IAtomContainer mol = smilesParser.parseSmiles("[C@@H](Cl)(O)CCCCCC");
+ ExhaustiveFragmenter fragmenter = new ExhaustiveFragmenter(
+ smilesGenerator,
+ 6,
+ ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS,
+ 31,
+ true
+ );
+ fragmenter.generateFragments(mol);
+
+ String[] smilesFrags = fragmenter.getFragments();
+ assertFragsContain(
+ new String[]{
+ "[C@@H](Cl)(O)CCC",
+ "CCCCCC",
+ "[C@@H](Cl)(O)CCCC"
+ }, smilesFrags
+ );
+ }
+
+ /**
+ * Tests a known bug where the stereo information for a chiral center is
+ * incorrectly copied to fragments where the center is no longer chiral.
+ * This occurs when the fragmentation results in two identical substituents,
+ * which should, by definition, remove the chirality from the center.
+ */
+ @Test
+ void testTetrahedralStereoChemistryFalselyCopied() throws Exception {
+ SmilesGenerator smilesGenerator =
+ new SmilesGenerator(
+ SmiFlavor.UseAromaticSymbols | SmiFlavor.Stereo
+ );
+ IAtomContainer mol = smilesParser.parseSmiles("CC[C@@H](Cl)CCCC");
+ ExhaustiveFragmenter fragmenter = new ExhaustiveFragmenter(
+ smilesGenerator,
+ 6,
+ ExhaustiveFragmenter.Saturation.HYDROGEN_SATURATED_FRAGMENTS,
+ 31,
+ true
+ );
+ fragmenter.generateFragments(mol);
+
+ String[] smilesFrags = fragmenter.getFragments();
+ assertFragsContain(
+ new String[]{
+ "C(Cl)CCCC",
+ // The chemically correct representation would be CCC(Cl)CC
+ // instead of:
+ "CC[C@@H](Cl)CC"
+ }, smilesFrags
+ );
+ }
+
+ // --utility --
+
+ /**
+ * Asserts that an array of strings contains all the expected elements,
+ * allowing for additional, unexpected elements in the actual array.
+ *
+ *
+ * This assertion is useful for verifying that a collection contains a
+ * specific subset of items. It fails only if an expected element is
+ * missing from the actual array. The failure message will list both
+ * missing elements and any extra, unexpected elements found.
+ *
+ *
+ * @param expected The {@code String} array containing the elements that are
+ * expected to be present in the {@code actual} array.
+ * @param actual The {@code String} array containing the elements to be
+ * tested against the {@code expected} array.
+ */
+ private static void assertFragsContain(
+ String[] expected,
+ String[] actual
+ ) {
+
+ Set expectedSet = new HashSet<>(Arrays.asList(expected));
+ Set actualSet = new HashSet<>(Arrays.asList(actual));
+
+ Set missing = expectedSet.stream()
+ .filter(item -> !actualSet.contains(item))
+ .collect(Collectors.toSet());
+
+ Set extra = actualSet.stream()
+ .filter(item -> !expectedSet.contains(item))
+ .collect(Collectors.toSet());
+
+ if (!missing.isEmpty()) {
+ StringBuilder failureMessage = new StringBuilder();
+
+ failureMessage.append("Expected but not found: ").append(missing).append("\n");
+ if (!extra.isEmpty()) {
+ failureMessage.append("Found but not expected: ").append(extra).append("\n");
+ }
+
+ fail(failureMessage.toString());
+ }
}
}