Skip to content

Commit

Permalink
Merge pull request #294 from seoklab/jnooree/issue-293
Browse files Browse the repository at this point in the history
fix(core,algo): improve aromaticity detection
  • Loading branch information
jnooree authored Apr 11, 2024
2 parents e3116b8 + 60961e0 commit 3cb9514
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 66 deletions.
2 changes: 2 additions & 0 deletions include/nuri/core/molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -2301,6 +2301,8 @@ namespace internal {
extern int nonbonding_electrons(const AtomData &data, int total_valence);

extern int count_pi_e(Molecule::Atom atom, int total_valence);

extern int aromatic_pi_e(Molecule::Atom atom, int total_valence);
} // namespace internal

/**
Expand Down
2 changes: 1 addition & 1 deletion src/algo/guess_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace {
return;

sum_pi_e +=
internal::count_pi_e(atom, internal::sum_bond_order(atom, false));
internal::aromatic_pi_e(atom, internal::sum_bond_order(atom, false));
}

int test = sum_pi_e % 4 - 2;
Expand Down
140 changes: 90 additions & 50 deletions src/core/molecule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include "nuri/utils.h"

namespace nuri {
using internal::count_pi_e;
using internal::aromatic_pi_e;
using internal::effective_element_or_element;
using internal::from_degree;
using internal::nonbonding_electrons;
Expand Down Expand Up @@ -521,18 +521,20 @@ namespace internal {
<< " aromatic bonds";

if (num_aromatic == 1) {
ABSL_LOG(INFO) << "Atom with single aromatic bond; assuming double bond "
ABSL_LOG(INFO) << "Atom with single aromatic bond; assuming single bond "
"for bond order calculation";
} else if (num_aromatic > 3) {
return sum_order + 1;
}

if (num_aromatic > 3) {
// Aromatic atom with >= 4 aromatic bonds is very unlikely;
// just log it and fall through
ABSL_LOG(WARNING) << "Cannot correctly determine total bond order for "
"aromatic atom with more than 4 aromatic bonds";
}

// The logic here:
// - for 1 aromatic bond, assume it's a double bond (e.g. carboxylate C-O
// bond has "aromatic" bond order in some Mol2 files) = 2
// - for 1 aromatic bond, assume it's a single bond
// - for 2 aromatic bonds, each will contribute 1.5 to the total bond
// order (e.g. benzene) = 3
// - for 3 aromatic bonds, assume 2, 1, 1 for the bond orders (this is
Expand Down Expand Up @@ -589,6 +591,34 @@ namespace {
}
}
}

bool path_can_conjugate(const BondData &prev, Molecule::Neighbor curr,
const absl::FixedArray<int> &valences) {
auto src = curr.src(), dst = curr.dst();

if (prev.order() == constants::kSingleBond) {
if (curr.edge_data().order() == constants::kSingleBond
&& src.data().atomic_number() != 0
&& dst.data().atomic_number() != 0) {
// Single - single bond -> conjugated if curr has lone pair and
// next doesn't, or vice versa, or any of them is dummy
int src_nbe = nonbonding_electrons(src.data(), valences[src.id()]),
dst_nbe = nonbonding_electrons(dst.data(), valences[dst.id()]);

if ((src_nbe > 0 && dst_nbe > 0) || (src_nbe <= 0 && dst_nbe <= 0))
return false;
}
} else if (curr.edge_data().order() != constants::kSingleBond
&& (prev.order() != constants::kAromaticBond
|| curr.edge_data().order() != constants::kAromaticBond)) {
// Aromatic - aromatic bond -> conjugated
// double~triple - double~triple bond -> allene-like
// aromatic - double~triple bond -> not conjugated (erroneous?)
return false;
}

return true;
}
} // namespace

bool MoleculeSanitizer::sanitize_conjugated() {
Expand Down Expand Up @@ -619,31 +649,8 @@ bool MoleculeSanitizer::sanitize_conjugated() {

if (prev_bond == nullptr) {
prev_bond = &nei.edge_data();
} else {
if (prev_bond->order() == constants::kSingleBond) {
if (nei.edge_data().order() == constants::kSingleBond
&& src.data().atomic_number() != 0
&& dst.data().atomic_number() != 0) {
// Single - single bond -> conjugated if curr has lone pair and
// next doesn't, or vice versa, or any of them is dummy
const int src_nbe =
nonbonding_electrons(src.data(), valences_[curr]),
dst_nbe =
nonbonding_electrons(dst.data(), valences_[dst.id()]);
if ((src_nbe > 0 && dst_nbe > 0)
|| (src_nbe <= 0 && dst_nbe <= 0)) {
continue;
}
}
} else if (nei.edge_data().order() != constants::kSingleBond
&& (prev_bond->order() != constants::kAromaticBond
|| nei.edge_data().order()
!= constants::kAromaticBond)) {
// Aromatic - aromatic bond -> conjugated
// double~triple - double~triple bond -> allene-like
// aromatic - double~triple bond -> not conjugated (erroneous?)
continue;
}
} else if (!path_can_conjugate(*prev_bond, nei, valences_)) {
continue;
}

self(self, dst.id(), &nei.edge_data());
Expand Down Expand Up @@ -729,6 +736,18 @@ namespace internal {
return pie_estimate;
}

int aromatic_pi_e(Molecule::Atom atom, int total_valence) {
if (std::any_of(atom.begin(), atom.end(), [](Molecule::Neighbor nei) {
return !nei.edge_data().is_ring_bond()
&& nei.edge_data().order() > constants::kSingleBond;
})) {
// Exocyclic multiple bond, don't contribute to pi electrons
return 0;
}

return count_pi_e(atom, total_valence);
}

int steric_number(const int total_degree, const int nb_electrons) {
const int lone_pairs = nb_electrons / 2;
return total_degree + lone_pairs;
Expand All @@ -754,38 +773,59 @@ namespace {
return nei.edge_data().order()
== constants::kDoubleBond;
})
< 2
// No exocyclic high-order bonds
&& std::all_of(atom.begin(), atom.end(), [](Molecule::Neighbor nei) {
return nei.edge_data().is_ring_bond()
|| nei.edge_data().order() == constants::kSingleBond;
});
< 2;
}

bool is_ring_aromatic(const Molecule &mol, const std::vector<int> &ring,
const int pi_e_sum) {
return pi_e_sum % 4 == 2
// Dummy atoms are always allowed in aromatic rings
|| std::any_of(ring.begin(), ring.end(), [&](int id) {
return mol.atom(id).data().atomic_number() == 0;
});
const std::vector<int> &pi_es,
const absl::FixedArray<int> &valences) {
const int pi_e_sum = std::accumulate(pi_es.begin(), pi_es.end(), 0);
bool initial = pi_e_sum % 4 == 2
// Dummy atoms are always allowed in aromatic rings
|| std::any_of(ring.begin(), ring.end(), [&](int id) {
return mol.atom(id).data().atomic_number() == 0;
});
if (!initial)
return false;

const BondData *prev_data = nullptr;

for (int i = 0; i < ring.size(); ++i) {
const int src = ring[i],
dst = ring[(i + 1) % static_cast<int>(ring.size())];

auto curr = mol.find_neighbor(src, dst);
ABSL_DCHECK(!curr.end());

if (prev_data != nullptr
&& !path_can_conjugate(*prev_data, *curr, valences))
return false;

prev_data = &curr->edge_data();
}

return true;
}

void mark_aromatic_ring(Molecule &mol, const std::vector<int> &ring,
const absl::flat_hash_map<int, int> &pi_e) {
int pi_e_sum = 0;
const absl::flat_hash_map<int, int> &pi_e,
const absl::FixedArray<int> &valences) {
std::vector<int> ring_pi_e;
ring_pi_e.reserve(ring.size());

for (int i = 0; i < ring.size(); ++i) {
auto it = pi_e.find(ring[i]);
if (it == pi_e.end()) {
return;
}
pi_e_sum += it->second;

int ne = it->second;
ring_pi_e.push_back(ne);
}

const bool this_aromatic = is_ring_aromatic(mol, ring, pi_e_sum);
if (!this_aromatic) {
const bool this_aromatic = is_ring_aromatic(mol, ring, ring_pi_e, valences);
if (!this_aromatic)
return;
}

for (int i = 0; i < ring.size(); ++i) {
const int src = ring[i],
Expand All @@ -804,13 +844,13 @@ namespace {

for (auto atom: mol) {
if (is_aromatic_candidate(atom)) {
pi_e.insert({ atom.id(), count_pi_e(atom, valences[atom.id()]) });
pi_e.insert({ atom.id(), aromatic_pi_e(atom, valences[atom.id()]) });
}
}

auto mark_aromatic_for = [&](const std::vector<std::vector<int>> &rs) {
for (const std::vector<int> &ring: rs) {
mark_aromatic_ring(mol, ring, pi_e);
mark_aromatic_ring(mol, ring, pi_e, valences);
}
};

Expand Down
28 changes: 14 additions & 14 deletions src/fmt/smiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ constexpr auto update_hydrogen = [](auto &ctx) {
const int last_idx = get_last_idx(ctx),
h_count = char_to_int(x3::_attr(ctx).value_or('1'));

ABSL_DLOG(INFO) << "Adding " << h_count << " hydrogens to atom " << last_idx;
ABSL_DVLOG(3) << "Adding " << h_count << " hydrogens to atom " << last_idx;
mutator.mol().atom(last_idx).data().set_implicit_hydrogens(h_count);
};

Expand All @@ -178,7 +178,7 @@ void set_charge(Ctx &ctx, bool positive, int abs_charge) {
charge = positive ? abs_charge : -abs_charge;
mutator.mol().atom(last_idx).data().set_formal_charge(charge);

ABSL_DLOG(INFO) << "Setting charge of atom " << last_idx << " to " << charge;
ABSL_DVLOG(3) << "Setting charge of atom " << last_idx << " to " << charge;
}

constexpr auto update_charge_number = [](auto &ctx) {
Expand Down Expand Up @@ -228,8 +228,8 @@ bool add_bond(MoleculeMutator &mutator, ImplicitAromatics &aromatics,

BondData bond_data;

// Automatic bond
if (bond_repr == '\0') {
// Automatic bond or up/down bond
if (bond_repr == '\0' || bond_repr == '\\' || bond_repr == '/') {
const AtomData &last_atom_data = mutator.mol().atom(prev).data(),
&atom_data = mutator.mol().atom(curr).data();
bond_data.order() = last_atom_data.is_aromatic() && atom_data.is_aromatic()
Expand All @@ -239,8 +239,8 @@ bool add_bond(MoleculeMutator &mutator, ImplicitAromatics &aromatics,
bond_data.order() = char_to_bond(bond_repr);
}

ABSL_DLOG(INFO) << "Trying to add bond " << prev << " -> " << curr << ": "
<< bond_data.order();
ABSL_DVLOG(3) << "Trying to add bond " << prev << " -> " << curr << ": "
<< bond_data.order();

auto [it, success] = mutator.add_bond(prev, curr, bond_data);
if (bond_repr == '\0' && bond_data.order() == constants::kAromaticBond)
Expand All @@ -255,8 +255,8 @@ int add_atom(Ctx &ctx, const Element *elem, bool aromatic) {
const int idx = mutator.add_atom(AtomData(*elem));
mutator.mol().atom(idx).data().set_aromatic(aromatic);

ABSL_DLOG(INFO) << "Adding " << (aromatic ? "aromatic " : "") << "atom "
<< idx << " (" << elem->symbol() << ')';
ABSL_DVLOG(3) << "Adding " << (aromatic ? "aromatic " : "") << "atom " << idx
<< " (" << elem->symbol() << ')';

const int last_idx = get_last_idx(ctx);
const char last_bond_data = x3::get<last_bond_data_tag>(ctx);
Expand Down Expand Up @@ -310,8 +310,8 @@ constexpr auto bracket_atom_adder(bool is_aromatic) {
constexpr auto set_chirality = [](auto &ctx) {
const int idx = get_last_idx(ctx);
x3::get<chirality_map_tag>(ctx).get()[idx] = x3::_attr(ctx);
ABSL_DLOG(INFO) << "Setting chirality of atom " << idx << " to "
<< static_cast<int>(x3::_attr(ctx));
ABSL_DVLOG(3) << "Setting chirality of atom " << idx << " to "
<< static_cast<int>(x3::_attr(ctx));
};

constexpr auto set_atom_class = [](auto &) {
Expand All @@ -327,12 +327,12 @@ const auto bracket_atom = //

constexpr auto set_last_bond_data = [](auto &ctx) {
x3::get<last_bond_data_tag>(ctx).get() = x3::_attr(ctx);
ABSL_DLOG(INFO) << "Setting last bond data to " << x3::_attr(ctx);
ABSL_DVLOG(3) << "Setting last bond data to " << x3::_attr(ctx);
};

constexpr auto set_last_bond_auto = [](auto &ctx) {
x3::get<last_bond_data_tag>(ctx).get() = '\0';
ABSL_DLOG(INFO) << "Setting last bond data to auto";
ABSL_DVLOG(3) << "Setting last bond data to auto";
};

const auto bond = x3::char_("-=#$:/\\")[set_last_bond_data];
Expand All @@ -352,8 +352,8 @@ void handle_ring(Ctx &ctx, int ring_idx) {

// New ring index, nothing to do.
if (is_new) {
ABSL_DLOG(INFO) << "Adding ring index " << ring_idx
<< ", src: " << current_idx << ", " << bond_data;
ABSL_DVLOG(3) << "Adding ring index " << ring_idx
<< ", src: " << current_idx << ", " << bond_data;
return;
}

Expand Down
2 changes: 1 addition & 1 deletion test/algo/guess_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ TEST(GuessSelectedMolecules, GPC) {
const AtomData &data = mol.atom(i).data();
EXPECT_EQ(data.hybridization(), constants::kSP2);
EXPECT_TRUE(data.is_aromatic());
EXPECT_EQ(data.implicit_hydrogens(), i == 38 || i == 39);
EXPECT_EQ(data.implicit_hydrogens(), i == 38 || i == 39) << i;
}
EXPECT_EQ(mol.atom(34).data().hybridization(), constants::kTerminal);
EXPECT_EQ(mol.atom(34).data().implicit_hydrogens(), 0);
Expand Down
Loading

0 comments on commit 3cb9514

Please sign in to comment.