From 8403c56cc5545e12cf83a5cbe4f64bf4041e1284 Mon Sep 17 00:00:00 2001 From: Michael Taylor Date: Thu, 17 Oct 2024 17:12:53 -0600 Subject: [PATCH] Adding Solvent Workflow --- .gitignore | 4 +- architector/complex_construction.py | 11 +- architector/io_process_input.py | 10 +- development/dipole_conformers.ipynb | 301 +++ .../solubility_dev/Soldb_workflow_dev.ipynb | 1782 +++++++++++++++++ development/solubility_dev/gbsa_opt.txt | 2 + development/solubility_dev/xtb_solvent.py | 101 + 7 files changed, 2201 insertions(+), 10 deletions(-) create mode 100644 development/dipole_conformers.ipynb create mode 100644 development/solubility_dev/Soldb_workflow_dev.ipynb create mode 100644 development/solubility_dev/gbsa_opt.txt create mode 100644 development/solubility_dev/xtb_solvent.py diff --git a/.gitignore b/.gitignore index bbf65f7..f283f81 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,6 @@ *.traj *__pycache__* **/.vscode/ -meta_production_sample.pkl \ No newline at end of file +meta_production_sample.pkl +development/**/*.csv +development/**/*.mol2 \ No newline at end of file diff --git a/architector/complex_construction.py b/architector/complex_construction.py index 40b43e4..1f236f5 100644 --- a/architector/complex_construction.py +++ b/architector/complex_construction.py @@ -487,7 +487,7 @@ def complex_driver(inputDict1): ligandDict = newligDict else: # Generate from scratch ligandDict = {} # - + coreCoordList = core_geo_class.geometry_dict[coreType] # Assign con atoms based on all ligands @@ -540,7 +540,7 @@ def complex_driver(inputDict1): out_energies.append(complexClass.calculator.energy) if inputDict['parameters']['return_only_1']: break - if not isinstance(complexClass, bool): # Catch cases where no conformation generated. + if not isinstance(complexClass, bool): # Catch cases where no conformation generated. order = np.argsort(out_energies) for ind, j in enumerate(order[0:inputDict[ 'parameters']['n_conformers']]): @@ -585,7 +585,7 @@ def complex_driver(inputDict1): return conf_dict,inputDict,core_preprocess_time,symmetry_preprocess_time,int_time1 else: return {},inputDict,0,0,0 - + def build_complex_driver(inputDict1): """build_complex_driver overall driver building of the complex @@ -787,12 +787,13 @@ def build_complex(inputDict): keys.append(key) vals.append(val) order = np.argsort(xtb_energies) - for j,i in enumerate(order): + for j, i in enumerate(order): if tmp_inputDict['parameters']['add_secondary_shell_species'] and \ (j < tmp_inputDict['parameters'][ 'secondary_shell_n_conformers']): if tmp_inputDict['parameters']['debug']: - print('Starting secondary shell addition on {} of {}!'.format(j+1,len(order))) + print('Starting secondary shell addition on {} of {}!'.format(j+1, + len(order))) print('Normally adds a chunk of time to generation.') # Use the docking function to add species specified in inputDict/parameters mol_plus_species, species_list = \ diff --git a/architector/io_process_input.py b/architector/io_process_input.py index 874d970..6f0f7bb 100644 --- a/architector/io_process_input.py +++ b/architector/io_process_input.py @@ -1094,12 +1094,13 @@ def inparse(inputDict): if metal in io_ptable.metal_charge_dict: outparams['metal_ox'] = io_ptable.metal_charge_dict[metal] else: # Pull lowest positive "main" oxidation state from mendeleev - elem = mendeleev.__dict__[newinpDict['core']['metal']] + elem = mendeleev.element(newinpDict['core']['metal']) outparams['metal_ox'] = [x.oxidation_state for x in elem._oxidation_states if (x.category == 'main') and (x.oxidation_state > 0)][0] if outparams['metal_spin'] is None: if outparams['metal_ox'] != io_ptable.metal_charge_dict.get(metal,100): # Calculate from mendeleev reference - Generally aufbau. - outparams['metal_spin'] = mendeleev.__dict__[newinpDict['core']['metal']].ec.ionize(outparams['metal_ox']).unpaired_electrons() + outparams['metal_spin'] = mendeleev.element( + newinpDict['core']['metal']).ec.ionize(outparams['metal_ox']).unpaired_electrons() else: # Otherwise use refdict. outparams['metal_spin'] = io_ptable.metal_spin_dict[metal] @@ -1434,12 +1435,13 @@ def inparse_2D(inputDict): if metal in io_ptable.metal_charge_dict: outparams['metal_ox'] = io_ptable.metal_charge_dict[metal] else: # Pull lowest positive "main" oxidation state from mendeleev - elem = mendeleev.__dict__[newinpDict['core']['metal']] + elem = mendeleev.element(newinpDict['core']['metal']) outparams['metal_ox'] = [x.oxidation_state for x in elem._oxidation_states if (x.category == 'main') and (x.oxidation_state > 0)][0] if outparams['metal_spin'] is None: if outparams['metal_ox'] != io_ptable.metal_charge_dict.get(metal,100): # Calculate from mendeleev reference - Generally aufbau. - outparams['metal_spin'] = mendeleev.__dict__[newinpDict['core']['metal']].ec.ionize(outparams['metal_ox']).unpaired_electrons() + outparams['metal_spin'] = mendeleev.element( + newinpDict['core']['metal']).ec.ionize(outparams['metal_ox']).unpaired_electrons() else: # Otherwise use refdict. outparams['metal_spin'] = io_ptable.metal_spin_dict[metal] diff --git a/development/dipole_conformers.ipynb b/development/dipole_conformers.ipynb new file mode 100644 index 0000000..d7c6f45 --- /dev/null +++ b/development/dipole_conformers.ipynb @@ -0,0 +1,301 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from architector.io_obabel import generate_obmol_conformers\n", + "from architector.io_calc import CalcExecutor\n", + "from architector import view_structures, convert_io_molecule\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "example_smi = '[O-]CCCCCCCC'" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "application/3dmoljs_load.v0": "
\n

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n
\n", + "text/html": [ + "
\n", + "

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "view_structures(example_smi)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0;31mSignature:\u001b[0m\n", + "\u001b[0mgenerate_obmol_conformers\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mstructure\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mrmsd_cutoff\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mconf_cutoff\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3000\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0menergy_cutoff\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m50.0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mconfab_verbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0moutput_format\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'mol2'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mneutralize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mfunctionalizations\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mfix_m_neighbors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mreturn_energies\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mDocstring:\u001b[0m\n", + "generate_obmol_conformers \n", + "generate conformers with openbabel for given smiles\n", + "using confab conformer generation routine\n", + "O'Boyle NM, Vandermeersch T, Flynn CJ, Maguire AR, Hutchison GR. Confab\n", + "- Systematic generation of diverse low-energy conformers.\n", + "Journal of Cheminformatics.\n", + "2011;3:8. doi:10.1186/1758-2946-3-8.\n", + "\n", + "Parameters\n", + "----------\n", + "structure : str/obmol/architector molecule\n", + " structure to generate conformers\n", + "rmsd_cutoff : float, optional\n", + " cutoff for how similar conformers, by default 0.4\n", + "conf_cutoff : int, optional\n", + " total number of conformers to generate, by default 3000\n", + "energy_cutoff : float, optional\n", + " how similar in energy, by default 50.0\n", + "confab_verbose : bool, optional\n", + " give more detailed output, by default False\n", + "output_format : str, optional\n", + " which format to output , by default 'mol2'\n", + "neutralize : bool, optional\n", + " neutralize smiles?, by default False\n", + "functionalizations : dict, optional\n", + " add functionalizations?, by default None\n", + "fix_m_neighbors : bool, optional\n", + " Fix the metal and it's neighbors for conformer generation?, by default True\n", + "return_energies : bool, optional\n", + " return the FF energies in addition to the conformers generated\n", + "\n", + "Returns\n", + "-------\n", + "output_strings : list (str)\n", + " list of conformers generated as whatever format desired\n", + "output_energies : list (float)\n", + " forcefield energies\n", + "\u001b[0;31mFile:\u001b[0m ~/software/Architector/architector/io_obabel.py\n", + "\u001b[0;31mType:\u001b[0m function" + ] + } + ], + "source": [ + "generate_obmol_conformers?" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "..tot conformations = 729\n", + "..tot confs tested = 729\n", + "..below energy threshold = 416\n" + ] + } + ], + "source": [ + "inmol = convert_io_molecule(example_smi)\n", + "confs, energies = generate_obmol_conformers(inmol, return_energies=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "maxn = 50\n", + "energies = np.array(energies)\n", + "confs = np.array(confs)\n", + "inds = np.argsort(energies)[0:50]\n", + "energies = energies[inds]\n", + "confs = confs[inds]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "for conf in confs:\n", + " mol = convert_io_molecule(conf)\n", + " mol.charge = inmol.charge\n", + " mol.uhf = inmol.uhf\n", + " mol = CalcExecutor(mol, store_results=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "mol = convert_io_molecule(confs[0])\n", + "mol.charge = inmol.charge\n", + "mol.uhf = inmol.uhf\n", + "out = CalcExecutor(mol, store_results=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'energy': np.float64(-816.9000416777939),\n", + " 'free_energy': np.float64(-816.9000416777939),\n", + " 'forces': array([[ 0.28378842, 0.33477263, -0.75373227],\n", + " [-1.04049172, -0.55685275, 1.27703926],\n", + " [ 1.00277457, -0.49240235, 0.99423132],\n", + " [ 0.20456854, 0.15037447, -0.24977243],\n", + " [-0.23821607, -0.20648925, 0.38906024],\n", + " [ 0.34777745, 0.14522854, -0.36219241],\n", + " [-0.34144358, -0.00278862, 0.36040958],\n", + " [-0.05424816, -0.0015257 , -0.46478714],\n", + " [ 0.13792136, -0.0827629 , 0.20812258],\n", + " [ 0.09290374, -0.84759769, -0.98914122],\n", + " [ 0.01585683, 1.24588224, 0.14252203],\n", + " [-0.01605829, 0.08062343, -0.14438943],\n", + " [-0.01462221, 0.15192424, -0.25797314],\n", + " [-0.00521853, -0.12286551, 0.04843276],\n", + " [-0.1023895 , 0.00972636, 0.06939511],\n", + " [ 0.10096792, 0.11171518, -0.03414113],\n", + " [ 0.03253288, -0.01964668, -0.14813087],\n", + " [-0.02911639, -0.10555022, 0.05531861],\n", + " [-0.11324737, 0.0228215 , 0.09956204],\n", + " [ 0.05171199, 0.02541033, -0.05329201],\n", + " [ 0.04721392, 0.05026935, -0.1182356 ],\n", + " [-0.03805434, 0.00324511, 0.04355195],\n", + " [-0.13303325, -0.00172634, 0.08583243],\n", + " [ 0.01991344, 0.20775976, -0.01064227],\n", + " [-0.16217188, -0.02017217, -0.00191085],\n", + " [-0.04961978, -0.07937296, -0.18513718]]),\n", + " 'charges': array([-0.78905706, 0.18155775, -0.05978337, -0.05090621, -0.05102407,\n", + " -0.05303006, -0.05555973, -0.04932335, -0.09548127, -0.11107204,\n", + " -0.11205044, -0.00201463, -0.00399092, 0.00368538, 0.00316856,\n", + " 0.0165721 , 0.01619308, 0.01894226, 0.01878787, 0.02355866,\n", + " 0.02375079, 0.0234871 , 0.02383364, 0.02903448, 0.02282579,\n", + " 0.0278957 ]),\n", + " 'dipole': array([ 0.6491453 , -2.51155028, -0.12032556])}" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out.results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/development/solubility_dev/Soldb_workflow_dev.ipynb b/development/solubility_dev/Soldb_workflow_dev.ipynb new file mode 100644 index 0000000..65cbcf8 --- /dev/null +++ b/development/solubility_dev/Soldb_workflow_dev.ipynb @@ -0,0 +1,1782 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8ce90290-eb42-479a-984a-9053f49b476b", + "metadata": {}, + "source": [ + "Download AqSolDb v1:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9ca328a6-e28d-4098-907f-f62ce2c43386", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 1144k 100 1144k 0 0 1114k 0 0:00:01 0:00:01 --:--:-- 1115k\n" + ] + } + ], + "source": [ + "!curl https://www.amdlab.nl/database/AqSolDB/data/AqSolDB_v1.0_min.csv > AqSolDB_v1.0_min.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ee331cc4-7983-4f02-842e-ac66e46bae17", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from tqdm import tqdm\n", + "from architector.io_obabel import get_obmol_smiles, get_OBMol_coords_anums_graph\n", + "import architector.io_ptable as io_ptable\n", + "from architector import convert_io_molecule, view_structures\n", + "from architector.io_conformers import obmol_conformers\n", + "from architector.io_obabel import generate_obmol_conformers\n", + "from architector.io_calc import CalcExecutor\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "305af5a7-3841-4b62-83ca-11f1cf0cfa83", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('AqSolDB_v1.0_min.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "48be044d-026a-4398-9517-f327bad15785", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDNameInChIKeySMILESSolubility
0A-3N,N,N-trimethyloctadecan-1-aminium bromideSZEMGTQCPRNXEG-UHFFFAOYSA-M[Br-].CCCCCCCCCCCCCCCCCC[N+](C)(C)C-3.616127
1A-4Benzo[cd]indol-2(1H)-oneGPYLCFQEKPUWLD-UHFFFAOYSA-NO=C1Nc2cccc3cccc1c23-3.254767
2A-54-chlorobenzaldehydeAVPYQKSLYISFPO-UHFFFAOYSA-NClc1ccc(C=O)cc1-2.177078
3A-8zinc bis[2-hydroxy-3,5-bis(1-phenylethyl)benzo...XTUPUYCJWKHGSW-UHFFFAOYSA-L[Zn++].CC(c1ccccc1)c2cc(C(C)c3ccccc3)c(O)c(c2)...-3.924409
4A-94-({4-[bis(oxiran-2-ylmethyl)amino]phenyl}meth...FAUAZXVRLVIARB-UHFFFAOYSA-NC1OC1CN(CC2CO2)c3ccc(Cc4ccc(cc4)N(CC5CO5)CC6CO...-4.662065
..................
9977I-84tetracaineGKCBAIGFKIBETG-UHFFFAOYSA-NC(c1ccc(cc1)NCCCC)(=O)OCCN(C)C-3.010000
9978I-85tetracyclineOFVLGDICTFRJMM-WESIUVDSSA-NOC1=C(C(C2=C(O)[C@@](C(C(C(N)=O)=C(O)[C@H]3N(C...-2.930000
9979I-86thymolMGSRCZKZVOBKFT-UHFFFAOYSA-Nc1(cc(ccc1C(C)C)C)O-2.190000
9980I-93verapamilSGTNSNPWRIOYBX-UHFFFAOYSA-NCOc1ccc(CCN(C)CCCC(C#N)(C(C)C)c2ccc(OC)c(OC)c2...-3.980000
9981I-94warfarinPJVWKTKQMONHTI-UHFFFAOYSA-NCC(=O)CC(c1ccccc1)c1c(O)c2ccccc2oc1=O-4.780000
\n", + "

9982 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " ID Name \\\n", + "0 A-3 N,N,N-trimethyloctadecan-1-aminium bromide \n", + "1 A-4 Benzo[cd]indol-2(1H)-one \n", + "2 A-5 4-chlorobenzaldehyde \n", + "3 A-8 zinc bis[2-hydroxy-3,5-bis(1-phenylethyl)benzo... \n", + "4 A-9 4-({4-[bis(oxiran-2-ylmethyl)amino]phenyl}meth... \n", + "... ... ... \n", + "9977 I-84 tetracaine \n", + "9978 I-85 tetracycline \n", + "9979 I-86 thymol \n", + "9980 I-93 verapamil \n", + "9981 I-94 warfarin \n", + "\n", + " InChIKey \\\n", + "0 SZEMGTQCPRNXEG-UHFFFAOYSA-M \n", + "1 GPYLCFQEKPUWLD-UHFFFAOYSA-N \n", + "2 AVPYQKSLYISFPO-UHFFFAOYSA-N \n", + "3 XTUPUYCJWKHGSW-UHFFFAOYSA-L \n", + "4 FAUAZXVRLVIARB-UHFFFAOYSA-N \n", + "... ... \n", + "9977 GKCBAIGFKIBETG-UHFFFAOYSA-N \n", + "9978 OFVLGDICTFRJMM-WESIUVDSSA-N \n", + "9979 MGSRCZKZVOBKFT-UHFFFAOYSA-N \n", + "9980 SGTNSNPWRIOYBX-UHFFFAOYSA-N \n", + "9981 PJVWKTKQMONHTI-UHFFFAOYSA-N \n", + "\n", + " SMILES Solubility \n", + "0 [Br-].CCCCCCCCCCCCCCCCCC[N+](C)(C)C -3.616127 \n", + "1 O=C1Nc2cccc3cccc1c23 -3.254767 \n", + "2 Clc1ccc(C=O)cc1 -2.177078 \n", + "3 [Zn++].CC(c1ccccc1)c2cc(C(C)c3ccccc3)c(O)c(c2)... -3.924409 \n", + "4 C1OC1CN(CC2CO2)c3ccc(Cc4ccc(cc4)N(CC5CO5)CC6CO... -4.662065 \n", + "... ... ... \n", + "9977 C(c1ccc(cc1)NCCCC)(=O)OCCN(C)C -3.010000 \n", + "9978 OC1=C(C(C2=C(O)[C@@](C(C(C(N)=O)=C(O)[C@H]3N(C... -2.930000 \n", + "9979 c1(cc(ccc1C(C)C)C)O -2.190000 \n", + "9980 COc1ccc(CCN(C)CCCC(C#N)(C(C)C)c2ccc(OC)c(OC)c2... -3.980000 \n", + "9981 CC(=O)CC(c1ccccc1)c1c(O)c2ccccc2oc1=O -4.780000 \n", + "\n", + "[9982 rows x 5 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2ead2917-a732-496d-a2bc-276272b82437", + "metadata": {}, + "outputs": [], + "source": [ + "f1 = (df.SMILES.str.count('\\.') < 1) # Remove any disjoint components" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "644b040b-a189-454b-8b70-87cf60935a5d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████████████████████████████████████████████████████████████| 9982/9982 [00:02<00:00, 3715.64it/s]\n" + ] + } + ], + "source": [ + "f2 = pd.Series([True]*df.shape[0])\n", + "for i,row in tqdm(df.iterrows(), total=df.shape[0]):\n", + " obmol = get_obmol_smiles(row['SMILES'], build=False)\n", + " _ , syms, _ = get_OBMol_coords_anums_graph(obmol, get_types=True)\n", + " mets = [x for x in syms if x in io_ptable.all_metals]\n", + " if len(mets) > 0:\n", + " f2[i] = False" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "dcc56629-4007-4a03-b490-bef96370871d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(8797, 5)\n" + ] + } + ], + "source": [ + "fdf = df[(f2) & (f1)]\n", + "print(fdf.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "475134ef-ea63-4b37-9091-e7b4aec9c83f", + "metadata": {}, + "outputs": [], + "source": [ + "def gen_confs(smi, totaln=50):\n", + " inmol = convert_io_molecule(smi)\n", + " confs, energies = generate_obmol_conformers(inmol, return_energies=True, conf_cutoff=totaln*100)\n", + " energies = np.array(energies)\n", + " confs = np.array(confs)\n", + " inds = np.argsort(energies)[0:totaln]\n", + " energies = energies[inds]\n", + " confs = confs[inds]\n", + " out_confs = []\n", + " for i,conf in enumerate(confs):\n", + " tmol = convert_io_molecule(conf)\n", + " tmol.charge = inmol.charge\n", + " tmol.uhf = 0\n", + " tmol.xtb_charge = inmol.charge\n", + " tmol.xtb_uhf = 0\n", + " tconf = tmol.write_mol2('UFF_Energy={}'.format(energies[i]), writestring=True)\n", + " out_confs.append(tconf)\n", + " return out_confs" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8f8e60b5-ea5d-4d07-bdd7-0fb46e84da9e", + "metadata": {}, + "outputs": [], + "source": [ + "confs = gen_confs('[O-]CCCCC')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "df4ba26f-4813-4290-bf97-894cebd90802", + "metadata": {}, + "outputs": [ + { + "data": { + "application/3dmoljs_load.v0": "
\n

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n
\n", + "text/html": [ + "
\n", + "

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "view_structures(confs[0:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "690e90bb-c8a3-4b3d-98d0-9b957ab90cd5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "25" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(confs)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e8507261-31d6-46e5-85d7-7aaa1c84a628", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "@MOLECULE\n", + "UFF_Energy=0.1829190280226131 Charge: -1 Unpaired_Electrons: 0 XTB_Unpaired_Electrons: 0 XTB_Charge: -1\n", + " 17 16 1 0 0\n", + "SMALL\n", + "NoCharges\n", + "****\n", + "Generated from Architector\n", + "\n", + "@ATOM\n", + " 1 O1 2.6249 -2.5973 0.0093 O.3 1 RES1 0.0000\n", + " 2 C1 2.2331 -2.0752 -1.1426 C.3 1 RES1 0.0000\n", + " 3 C2 2.7656 -0.6177 -1.2188 C.3 1 RES1 0.0000\n", + " 4 C3 2.4129 0.1674 -2.4944 C.3 1 RES1 0.0000\n", + " 5 C4 2.9453 1.6018 -2.5533 C.3 1 RES1 0.0000\n", + " 6 C5 2.5685 2.3312 -3.8332 C.3 1 RES1 0.0000\n", + " 7 H1 2.5385 -2.5650 -2.0788 H 1 RES1 0.0000\n", + " 8 H2 1.1482 -1.9782 -1.3111 H 1 RES1 0.0000\n", + " 9 H3 2.3738 -0.0682 -0.3529 H 1 RES1 0.0000\n", + " 10 H4 3.8584 -0.6074 -1.0950 H 1 RES1 0.0000\n", + " 11 H5 2.7994 -0.3887 -3.3567 H 1 RES1 0.0000\n", + " 12 H6 1.3234 0.1946 -2.6179 H 1 RES1 0.0000\n", + " 13 H7 2.5332 2.1590 -1.7028 H 1 RES1 0.0000\n", + " 14 H8 4.0328 1.6128 -2.4295 H 1 RES1 0.0000\n", + " 15 H9 1.4815 2.3771 -3.9510 H 1 RES1 0.0000\n", + " 16 H10 2.9489 3.3577 -3.8057 H 1 RES1 0.0000\n", + " 17 H11 2.9927 1.8457 -4.7164 H 1 RES1 0.0000\n", + "@BOND\n", + " 1 1 2 1\n", + " 2 2 3 1\n", + " 3 2 7 1\n", + " 4 2 8 1\n", + " 5 3 4 1\n", + " 6 3 9 1\n", + " 7 3 10 1\n", + " 8 4 5 1\n", + " 9 4 11 1\n", + " 10 4 12 1\n", + " 11 5 6 1\n", + " 12 5 13 1\n", + " 13 5 14 1\n", + " 14 6 15 1\n", + " 15 6 16 1\n", + " 16 6 17 1\n", + "@SUBSTRUCTURE\n", + " 1 RES1 17 GROUP 0 **** **** 0 \n", + "\n" + ] + } + ], + "source": [ + "print(confs[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5910242a-03f6-4649-84e2-f89d019b2ad0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confomers: 50\n" + ] + }, + { + "data": { + "application/3dmoljs_load.v0": "
\n

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n
\n", + "text/html": [ + "
\n", + "

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/3dmoljs_load.v0": "
\n

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n
\n", + "text/html": [ + "
\n", + "

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ind = 12\n", + "confs = gen_confs(fdf.iloc[ind]['SMILES'])\n", + "print('Confomers:', len(confs))\n", + "view_structures(fdf.iloc[ind]['SMILES'])\n", + "view_structures(confs[0:4])" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "c8bea742-88d4-4fef-8b9b-53ccd0f24aa2", + "metadata": {}, + "outputs": [], + "source": [ + "def evalulate_conf_xtb(conf):\n", + " import os\n", + " os.environ[\"OMP_NUM_THREADS\"] = \"1\"\n", + " os.environ[\"OPENBLAS_NUM_THREADS\"] = \"1\"\n", + " os.environ[\"MKL_NUM_THREADS\"] = \"1\"\n", + " os.environ[\"VECLIB_MAXIMUM_THREADS\"] = \"1\"\n", + " os.environ[\"NUMEXPR_NUM_THREADS\"] = \"1\"\n", + " from architector.io_calc import CalcExecutor\n", + " from xtb_solvent import xtb_solv_params\n", + " solvent='water'\n", + " out = CalcExecutor(conf,\n", + " method='GFN2-xTB',\n", + " store_results=True,\n", + " relax=True,\n", + " fmax=0.05,\n", + " xtb_solvent=solvent)\n", + " if out.successful:\n", + " results = out.results\n", + " results['xtb_mol2'] = out.mol.write_mol2('GFn2-XTB_relax', \n", + " writestring=True)\n", + " # xtb_sa_eval_dict = xtb_solv_params(results['xtb_mol2'], solvent=solvent)\n", + " # if isinstance(xtb_sa_eval_dict, dict):\n", + " # results.update(xtb_sa_eval_dict)\n", + " # else:\n", + " # return None\n", + " return results\n", + " else:\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "fed40e14", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "normal termination of xtb\n", + "Note: The following floating-point exceptions are signalling: IEEE_UNDERFLOW_FLAG\n" + ] + } + ], + "source": [ + "out = evalulate_conf_xtb(confs[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "bffd364a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['energy', 'free_energy', 'forces', 'dipole', 'charges', 'xtb_mol2', 'sas', 'born_radii'])" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "400ed6e3-e8f6-423c-ac4b-84c894adda4a", + "metadata": {}, + "outputs": [], + "source": [ + "## Just hangs for me.\n", + "# from executorlib import Executor\n", + "\n", + "# with Executor(max_workers=12, cores_per_worker=1, threads_per_core=1) as exe:\n", + "# out_results = list(tqdm(exe.map(evalulate_conf_xtb, confs), total=len(confs)))" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "78d97781-f708-44c6-9283-5e6595adae94", + "metadata": {}, + "outputs": [], + "source": [ + "from concurrent.futures import ProcessPoolExecutor\n", + "import multiprocessing as mp" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "2fc8e44e-a07b-4b1e-86b1-0b0420f9cbc5", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:05<00:00, 9.61it/s]\n" + ] + } + ], + "source": [ + "with ProcessPoolExecutor(\n", + " max_workers=12,\n", + " mp_context=mp.get_context('fork')) as exe:\n", + " out_results = list(tqdm(exe.map(evalulate_conf_xtb, confs), total=len(confs)))" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "a5ea4ad3-0118-4b43-9ba5-229246098411", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(out_results)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "48a253f9-1f71-45cb-841f-45225c72d292", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
energyfree_energyforcesdipolechargesxtb_mol2
0-941.953893-941.953893[[-0.0064054274805771675, 0.002893010498177088...[-0.04072509722616722, 0.4271721600245919, 0.5...[-0.11478628082344275, 0.003135417180573183, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
1-941.950280-941.950280[[-0.005747968896342007, 0.0018745948789265158...[-0.17535441597366486, -0.4851559094552098, 0....[-0.11412520265988979, 0.0030248534750732248, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
2-942.042879-942.042879[[-0.004074275762437106, -0.004648667982582211...[-0.04347311031771758, -0.537465498185844, 0.1...[-0.11308169713123725, 0.0046844825432522524, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
3-941.917303-941.917303[[0.02297184960918795, -0.007096980853435831, ...[-0.1081611768128786, 0.5700338757740078, 0.39...[-0.11282084473184698, 0.00343528545446857, -0...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
4-941.925803-941.925803[[-0.00950542960255489, 0.0013225729101183279,...[0.03999807451346173, 0.5137888912027536, 0.43...[-0.1133431929759389, 0.004619124125817553, -0...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
5-941.944995-941.944995[[-0.012649020593404663, 0.008891172917475326,...[-0.02019514472401081, 0.3534161701592894, 0.5...[-0.1132901094327726, 0.0038520755462167766, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
6-941.963922-941.963922[[-0.004040395285581353, -0.005494595557095002...[-0.2760326399408978, -0.6226984118173071, 0.2...[-0.1132794690839442, 0.005506370801200339, -0...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
7-941.933859-941.933859[[-0.03113600670791028, -0.012523592246384135,...[-0.4646939981518977, -0.4543851187179526, 0.2...[-0.11455749486667728, 0.004402608688685528, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
8-941.933840-941.933840[[-0.0023090121795046, -0.0012468292679679693,...[-0.11902841857768293, 0.37839388841507415, 0....[-0.11360419866355351, 0.0037736931269920365, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
9-941.942060-941.942060[[-0.005573365585241426, 0.016333912964534444,...[0.008244584381401365, 0.3509699766971826, 0.3...[-0.11019970991046024, 0.005848684216261016, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
10-941.982895-941.982895[[-0.005789551710469486, -0.005415001277539138...[-0.20080321600305756, -0.441855094329486, 0.2...[-0.11285473497350985, 0.005063549018848312, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
11-941.989306-941.989306[[0.0023797569165056494, 0.0014927387150377607...[-0.34217775743938655, 0.206008261524908, 0.39...[-0.11232483049930286, 0.0042552854505437665, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
12-941.996077-941.996077[[0.007872942006244004, -0.015973658216846945,...[-0.03057891685648984, -0.6035728943770056, 0....[-0.11466408844107442, 0.0030596302840158046, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
13-941.961751-941.961751[[0.013873978777933609, 0.008714346279801942, ...[-0.3090836179001432, -0.5710604396804403, 0.2...[-0.11457207094463301, 0.004317772363820485, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
14-941.945587-941.945587[[0.02330201120535815, 0.0037549411116072928, ...[-0.06403308012802437, 0.4502260235518645, 0.3...[-0.11190094981249321, 0.006234969573618007, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
15-941.959734-941.959734[[-0.004274421525671059, 0.00933781864063549, ...[-0.2996062613744575, 0.42448345272549765, 0.4...[-0.11448587052718476, 0.00301700806848354, -0...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
16-941.957871-941.957871[[0.029044434923186434, -0.014841328242279772,...[-0.20993014063235144, 0.49726264633227535, 0....[-0.11264407067753646, 0.0032690517505117173, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
17-941.975852-941.975852[[0.013154518965379634, 0.00377022015669898, 0...[0.008486763537757912, 0.4417631514538985, 0.4...[-0.11406889911063403, 0.003555211048615295, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
18-942.008965-942.008965[[-0.007688946886930841, 0.0037896335380984384...[0.10234664997307748, -0.5114196914304929, 0.2...[-0.11205578255935358, 0.004674101182466722, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
19-941.966669-941.966669[[0.0026953435096534087, 0.002979370969931447,...[-0.2988712089214222, 0.4570317276286429, -0.1...[-0.11239806409192919, 0.0021378138634558785, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
20-941.976303-941.976303[[0.00949072281355, -0.003257001332871327, 0.0...[-0.4087391157241313, 0.3738489962543335, 0.38...[-0.11341018058416302, 0.0010303001999083239, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
21-941.957536-941.957536[[-0.006712128253738265, 0.003174343250744261,...[-0.07689631278551505, 0.49246974693425255, 0....[-0.11285616336687668, 0.0006248046019975695, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
22-941.986535-941.986535[[0.0031198718917791464, 0.0035276424727782303...[-0.13607065699767962, 0.32826054550023237, -0...[-0.11352561367974287, 0.0022344351214790142, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
23-941.968502-941.968502[[-0.0008081220351732268, -0.02263822517147316...[-0.272093398576091, 0.4600825022722129, -0.12...[-0.11395220340664092, 0.00478164559167335, -0...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
24-941.944514-941.944514[[-0.013292161034476373, 0.006266857420767831,...[-0.19348582923642033, 0.4954191617800937, -0....[-0.11329557958925107, 0.004670431551598034, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
25-941.967755-941.967755[[0.018707547630603883, -0.006985174117220028,...[-0.24485392634771252, 0.37875702621492124, 0....[-0.11473579876678727, 0.0038344852950510933, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
26-941.981779-941.981779[[0.0042593208766267395, -0.001137467886214728...[0.3710727954892336, 0.5068447243139468, 0.433...[-0.11302409334121935, 0.005889123355889356, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
27-941.937742-941.937742[[-0.004415312469214524, -0.020941742452419677...[-0.33549841731401703, -0.21726389396929238, -...[-0.11295230012463539, 0.003917668476299971, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
28-941.959590-941.959590[[-0.0019702566455652728, -0.00723399908969859...[-0.1692058793516984, 0.4845564511321144, -0.2...[-0.11328248251225162, 0.0031808024696459575, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
29-941.932717-941.932717[[-0.028987095684595784, -0.002520984421162864...[0.11355148006057446, 0.31523511055311776, 0.5...[-0.11290935710169636, 0.0052879522534911155, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
30-942.005847-942.005847[[-0.029631153712220554, 0.007592473178661552,...[0.017330046307516635, -0.3929130878068432, -0...[-0.11221807413984114, 0.004229490258218435, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
31-941.901757-941.901757[[0.004824352437003144, -0.009401487785090024,...[0.25575055726103113, 0.40874344166617377, 0.3...[-0.11242996946527903, -0.0002510390197548451,...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
32-941.950690-941.950690[[0.0026851207587491373, 0.0068610564945188945...[-0.2803754786065773, -0.5334886680474346, 0.3...[-0.1143846573356333, 0.0007603854631822171, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
33-941.968843-941.968843[[-0.01867505201993493, 0.003181643012599281, ...[-0.15301724594136062, 0.2790231605693987, 0.4...[-0.11036408143591933, 0.004562874819385346, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
34-941.960774-941.960774[[-0.011877644788936905, -0.026954376709123075...[-0.180940180910476, 0.5516790344836181, -0.03...[-0.11294364578269431, 0.0058543182899778445, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
35-942.014285-942.014285[[0.0022976800395958846, -0.013589687592119109...[-0.1233117303197784, 0.404802148964335, -0.22...[-0.11307602877487552, 0.002501638750332616, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
36-941.903403-941.903403[[-0.02331996214296517, 0.010685002632797171, ...[-0.332922798967579, -0.12642438605821282, -0....[-0.11343353190285174, 0.0043780086253094735, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
37-942.007395-942.007395[[0.0037780741836213076, 0.01902316925118944, ...[-0.1858311274812901, -0.21594710678209902, -0...[-0.11177213363081451, 0.004453577860408647, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
38-941.988840-941.988840[[-0.03240830104540279, -0.02253494700723065, ...[-0.2555268008384709, -0.029942606439405283, -...[-0.11609808215104198, 0.006123767778125511, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
39-941.969572-941.969572[[0.0040911268112533246, 0.012365663907514207,...[-0.2663932517587651, 0.26697139093640154, -0....[-0.11165253467512878, 0.0049063381096841885, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
40-941.927235-941.927235[[-0.008174874431749588, 0.013413745987587647,...[0.20904164456672533, 0.448837039860279, 0.397...[-0.1145075396193811, 0.0021868780484034234, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
41-942.000749-942.000749[[0.01962134024504779, 0.003156185634012357, 0...[-0.06965001336782749, -0.3691016573623966, -0...[-0.11276189668287563, 0.004253667859406118, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
42-941.971101-941.971101[[-0.004151267084641824, 0.00915523136618795, ...[-0.4321532218356113, 0.20033685859622585, 0.1...[-0.11322831382126233, 0.004611301130298395, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
43-941.939776-941.939776[[-0.010520437363048455, 0.003726169169612492,...[0.06603613307825389, 0.40741966296805665, -0....[-0.11132501957504222, 0.005441943149804271, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
44-941.953197-941.953197[[0.0018059801146283656, -0.00335369257013824,...[-0.20444341850718695, -0.4359319005681523, -0...[-0.11333333683408459, 0.0009756690332999446, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
45-941.946527-941.946527[[0.014795988019366456, 0.02189946451324612, 0...[-0.3175890531027094, 0.43342801047633495, -0....[-0.11427698722596516, 0.0023814808037803115, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
46-941.940041-941.940041[[0.00015349676460309794, -0.00903777215247311...[0.019740685879562958, 0.6193679712236572, 0.4...[-0.11393793263361515, 0.006143554757771022, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
47-941.976173-941.976173[[-0.00358151778159043, 0.01569987890645837, 0...[-0.4206374798811715, 0.3549809044674297, 0.01...[-0.11447556030989668, 0.001317522791494885, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
48-941.952127-941.952127[[-0.0028454505148780237, 0.014495838430332444...[-0.030667465591407494, -0.3920030947808661, -...[-0.11077041313482294, 0.0063098981597411175, ...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
49-941.940078-941.940078[[0.005687163462374582, 0.009315960485936454, ...[-0.423702477923642, 0.1986383126846553, 0.494...[-0.11355494951655856, 0.001975819011750174, -...@<TRIPOS>MOLECULE\\nGFn2-XTB_relax Charge: 0 Un...
\n", + "
" + ], + "text/plain": [ + " energy free_energy \\\n", + "0 -941.953893 -941.953893 \n", + "1 -941.950280 -941.950280 \n", + "2 -942.042879 -942.042879 \n", + "3 -941.917303 -941.917303 \n", + "4 -941.925803 -941.925803 \n", + "5 -941.944995 -941.944995 \n", + "6 -941.963922 -941.963922 \n", + "7 -941.933859 -941.933859 \n", + "8 -941.933840 -941.933840 \n", + "9 -941.942060 -941.942060 \n", + "10 -941.982895 -941.982895 \n", + "11 -941.989306 -941.989306 \n", + "12 -941.996077 -941.996077 \n", + "13 -941.961751 -941.961751 \n", + "14 -941.945587 -941.945587 \n", + "15 -941.959734 -941.959734 \n", + "16 -941.957871 -941.957871 \n", + "17 -941.975852 -941.975852 \n", + "18 -942.008965 -942.008965 \n", + "19 -941.966669 -941.966669 \n", + "20 -941.976303 -941.976303 \n", + "21 -941.957536 -941.957536 \n", + "22 -941.986535 -941.986535 \n", + "23 -941.968502 -941.968502 \n", + "24 -941.944514 -941.944514 \n", + "25 -941.967755 -941.967755 \n", + "26 -941.981779 -941.981779 \n", + "27 -941.937742 -941.937742 \n", + "28 -941.959590 -941.959590 \n", + "29 -941.932717 -941.932717 \n", + "30 -942.005847 -942.005847 \n", + "31 -941.901757 -941.901757 \n", + "32 -941.950690 -941.950690 \n", + "33 -941.968843 -941.968843 \n", + "34 -941.960774 -941.960774 \n", + "35 -942.014285 -942.014285 \n", + "36 -941.903403 -941.903403 \n", + "37 -942.007395 -942.007395 \n", + "38 -941.988840 -941.988840 \n", + "39 -941.969572 -941.969572 \n", + "40 -941.927235 -941.927235 \n", + "41 -942.000749 -942.000749 \n", + "42 -941.971101 -941.971101 \n", + "43 -941.939776 -941.939776 \n", + "44 -941.953197 -941.953197 \n", + "45 -941.946527 -941.946527 \n", + "46 -941.940041 -941.940041 \n", + "47 -941.976173 -941.976173 \n", + "48 -941.952127 -941.952127 \n", + "49 -941.940078 -941.940078 \n", + "\n", + " forces \\\n", + "0 [[-0.0064054274805771675, 0.002893010498177088... \n", + "1 [[-0.005747968896342007, 0.0018745948789265158... \n", + "2 [[-0.004074275762437106, -0.004648667982582211... \n", + "3 [[0.02297184960918795, -0.007096980853435831, ... \n", + "4 [[-0.00950542960255489, 0.0013225729101183279,... \n", + "5 [[-0.012649020593404663, 0.008891172917475326,... \n", + "6 [[-0.004040395285581353, -0.005494595557095002... \n", + "7 [[-0.03113600670791028, -0.012523592246384135,... \n", + "8 [[-0.0023090121795046, -0.0012468292679679693,... \n", + "9 [[-0.005573365585241426, 0.016333912964534444,... \n", + "10 [[-0.005789551710469486, -0.005415001277539138... \n", + "11 [[0.0023797569165056494, 0.0014927387150377607... \n", + "12 [[0.007872942006244004, -0.015973658216846945,... \n", + "13 [[0.013873978777933609, 0.008714346279801942, ... \n", + "14 [[0.02330201120535815, 0.0037549411116072928, ... \n", + "15 [[-0.004274421525671059, 0.00933781864063549, ... \n", + "16 [[0.029044434923186434, -0.014841328242279772,... \n", + "17 [[0.013154518965379634, 0.00377022015669898, 0... \n", + "18 [[-0.007688946886930841, 0.0037896335380984384... \n", + "19 [[0.0026953435096534087, 0.002979370969931447,... \n", + "20 [[0.00949072281355, -0.003257001332871327, 0.0... \n", + "21 [[-0.006712128253738265, 0.003174343250744261,... \n", + "22 [[0.0031198718917791464, 0.0035276424727782303... \n", + "23 [[-0.0008081220351732268, -0.02263822517147316... \n", + "24 [[-0.013292161034476373, 0.006266857420767831,... \n", + "25 [[0.018707547630603883, -0.006985174117220028,... \n", + "26 [[0.0042593208766267395, -0.001137467886214728... \n", + "27 [[-0.004415312469214524, -0.020941742452419677... \n", + "28 [[-0.0019702566455652728, -0.00723399908969859... \n", + "29 [[-0.028987095684595784, -0.002520984421162864... \n", + "30 [[-0.029631153712220554, 0.007592473178661552,... \n", + "31 [[0.004824352437003144, -0.009401487785090024,... \n", + "32 [[0.0026851207587491373, 0.0068610564945188945... \n", + "33 [[-0.01867505201993493, 0.003181643012599281, ... \n", + "34 [[-0.011877644788936905, -0.026954376709123075... \n", + "35 [[0.0022976800395958846, -0.013589687592119109... \n", + "36 [[-0.02331996214296517, 0.010685002632797171, ... \n", + "37 [[0.0037780741836213076, 0.01902316925118944, ... \n", + "38 [[-0.03240830104540279, -0.02253494700723065, ... \n", + "39 [[0.0040911268112533246, 0.012365663907514207,... \n", + "40 [[-0.008174874431749588, 0.013413745987587647,... \n", + "41 [[0.01962134024504779, 0.003156185634012357, 0... \n", + "42 [[-0.004151267084641824, 0.00915523136618795, ... \n", + "43 [[-0.010520437363048455, 0.003726169169612492,... \n", + "44 [[0.0018059801146283656, -0.00335369257013824,... \n", + "45 [[0.014795988019366456, 0.02189946451324612, 0... \n", + "46 [[0.00015349676460309794, -0.00903777215247311... \n", + "47 [[-0.00358151778159043, 0.01569987890645837, 0... \n", + "48 [[-0.0028454505148780237, 0.014495838430332444... \n", + "49 [[0.005687163462374582, 0.009315960485936454, ... \n", + "\n", + " dipole \\\n", + "0 [-0.04072509722616722, 0.4271721600245919, 0.5... \n", + "1 [-0.17535441597366486, -0.4851559094552098, 0.... \n", + "2 [-0.04347311031771758, -0.537465498185844, 0.1... \n", + "3 [-0.1081611768128786, 0.5700338757740078, 0.39... \n", + "4 [0.03999807451346173, 0.5137888912027536, 0.43... \n", + "5 [-0.02019514472401081, 0.3534161701592894, 0.5... \n", + "6 [-0.2760326399408978, -0.6226984118173071, 0.2... \n", + "7 [-0.4646939981518977, -0.4543851187179526, 0.2... \n", + "8 [-0.11902841857768293, 0.37839388841507415, 0.... \n", + "9 [0.008244584381401365, 0.3509699766971826, 0.3... \n", + "10 [-0.20080321600305756, -0.441855094329486, 0.2... \n", + "11 [-0.34217775743938655, 0.206008261524908, 0.39... \n", + "12 [-0.03057891685648984, -0.6035728943770056, 0.... \n", + "13 [-0.3090836179001432, -0.5710604396804403, 0.2... \n", + "14 [-0.06403308012802437, 0.4502260235518645, 0.3... \n", + "15 [-0.2996062613744575, 0.42448345272549765, 0.4... \n", + "16 [-0.20993014063235144, 0.49726264633227535, 0.... \n", + "17 [0.008486763537757912, 0.4417631514538985, 0.4... \n", + "18 [0.10234664997307748, -0.5114196914304929, 0.2... \n", + "19 [-0.2988712089214222, 0.4570317276286429, -0.1... \n", + "20 [-0.4087391157241313, 0.3738489962543335, 0.38... \n", + "21 [-0.07689631278551505, 0.49246974693425255, 0.... \n", + "22 [-0.13607065699767962, 0.32826054550023237, -0... \n", + "23 [-0.272093398576091, 0.4600825022722129, -0.12... \n", + "24 [-0.19348582923642033, 0.4954191617800937, -0.... \n", + "25 [-0.24485392634771252, 0.37875702621492124, 0.... \n", + "26 [0.3710727954892336, 0.5068447243139468, 0.433... \n", + "27 [-0.33549841731401703, -0.21726389396929238, -... \n", + "28 [-0.1692058793516984, 0.4845564511321144, -0.2... \n", + "29 [0.11355148006057446, 0.31523511055311776, 0.5... \n", + "30 [0.017330046307516635, -0.3929130878068432, -0... \n", + "31 [0.25575055726103113, 0.40874344166617377, 0.3... \n", + "32 [-0.2803754786065773, -0.5334886680474346, 0.3... \n", + "33 [-0.15301724594136062, 0.2790231605693987, 0.4... \n", + "34 [-0.180940180910476, 0.5516790344836181, -0.03... \n", + "35 [-0.1233117303197784, 0.404802148964335, -0.22... \n", + "36 [-0.332922798967579, -0.12642438605821282, -0.... \n", + "37 [-0.1858311274812901, -0.21594710678209902, -0... \n", + "38 [-0.2555268008384709, -0.029942606439405283, -... \n", + "39 [-0.2663932517587651, 0.26697139093640154, -0.... \n", + "40 [0.20904164456672533, 0.448837039860279, 0.397... \n", + "41 [-0.06965001336782749, -0.3691016573623966, -0... \n", + "42 [-0.4321532218356113, 0.20033685859622585, 0.1... \n", + "43 [0.06603613307825389, 0.40741966296805665, -0.... \n", + "44 [-0.20444341850718695, -0.4359319005681523, -0... \n", + "45 [-0.3175890531027094, 0.43342801047633495, -0.... \n", + "46 [0.019740685879562958, 0.6193679712236572, 0.4... \n", + "47 [-0.4206374798811715, 0.3549809044674297, 0.01... \n", + "48 [-0.030667465591407494, -0.3920030947808661, -... \n", + "49 [-0.423702477923642, 0.1986383126846553, 0.494... \n", + "\n", + " charges \\\n", + "0 [-0.11478628082344275, 0.003135417180573183, -... \n", + "1 [-0.11412520265988979, 0.0030248534750732248, ... \n", + "2 [-0.11308169713123725, 0.0046844825432522524, ... \n", + "3 [-0.11282084473184698, 0.00343528545446857, -0... \n", + "4 [-0.1133431929759389, 0.004619124125817553, -0... \n", + "5 [-0.1132901094327726, 0.0038520755462167766, -... \n", + "6 [-0.1132794690839442, 0.005506370801200339, -0... \n", + "7 [-0.11455749486667728, 0.004402608688685528, -... \n", + "8 [-0.11360419866355351, 0.0037736931269920365, ... \n", + "9 [-0.11019970991046024, 0.005848684216261016, -... \n", + "10 [-0.11285473497350985, 0.005063549018848312, -... \n", + "11 [-0.11232483049930286, 0.0042552854505437665, ... \n", + "12 [-0.11466408844107442, 0.0030596302840158046, ... \n", + "13 [-0.11457207094463301, 0.004317772363820485, -... \n", + "14 [-0.11190094981249321, 0.006234969573618007, -... \n", + "15 [-0.11448587052718476, 0.00301700806848354, -0... \n", + "16 [-0.11264407067753646, 0.0032690517505117173, ... \n", + "17 [-0.11406889911063403, 0.003555211048615295, -... \n", + "18 [-0.11205578255935358, 0.004674101182466722, -... \n", + "19 [-0.11239806409192919, 0.0021378138634558785, ... \n", + "20 [-0.11341018058416302, 0.0010303001999083239, ... \n", + "21 [-0.11285616336687668, 0.0006248046019975695, ... \n", + "22 [-0.11352561367974287, 0.0022344351214790142, ... \n", + "23 [-0.11395220340664092, 0.00478164559167335, -0... \n", + "24 [-0.11329557958925107, 0.004670431551598034, -... \n", + "25 [-0.11473579876678727, 0.0038344852950510933, ... \n", + "26 [-0.11302409334121935, 0.005889123355889356, -... \n", + "27 [-0.11295230012463539, 0.003917668476299971, -... \n", + "28 [-0.11328248251225162, 0.0031808024696459575, ... \n", + "29 [-0.11290935710169636, 0.0052879522534911155, ... \n", + "30 [-0.11221807413984114, 0.004229490258218435, -... \n", + "31 [-0.11242996946527903, -0.0002510390197548451,... \n", + "32 [-0.1143846573356333, 0.0007603854631822171, -... \n", + "33 [-0.11036408143591933, 0.004562874819385346, -... \n", + "34 [-0.11294364578269431, 0.0058543182899778445, ... \n", + "35 [-0.11307602877487552, 0.002501638750332616, -... \n", + "36 [-0.11343353190285174, 0.0043780086253094735, ... \n", + "37 [-0.11177213363081451, 0.004453577860408647, -... \n", + "38 [-0.11609808215104198, 0.006123767778125511, -... \n", + "39 [-0.11165253467512878, 0.0049063381096841885, ... \n", + "40 [-0.1145075396193811, 0.0021868780484034234, -... \n", + "41 [-0.11276189668287563, 0.004253667859406118, -... \n", + "42 [-0.11322831382126233, 0.004611301130298395, -... \n", + "43 [-0.11132501957504222, 0.005441943149804271, -... \n", + "44 [-0.11333333683408459, 0.0009756690332999446, ... \n", + "45 [-0.11427698722596516, 0.0023814808037803115, ... \n", + "46 [-0.11393793263361515, 0.006143554757771022, -... \n", + "47 [-0.11447556030989668, 0.001317522791494885, -... \n", + "48 [-0.11077041313482294, 0.0063098981597411175, ... \n", + "49 [-0.11355494951655856, 0.001975819011750174, -... \n", + "\n", + " xtb_mol2 \n", + "0 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "1 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "2 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "3 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "4 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "5 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "6 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "7 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "8 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "9 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "10 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "11 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "12 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "13 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "14 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "15 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "16 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "17 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "18 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "19 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "20 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "21 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "22 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "23 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "24 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "25 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "26 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "27 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "28 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "29 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "30 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "31 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "32 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "33 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "34 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "35 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "36 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "37 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "38 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "39 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "40 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "41 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "42 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "43 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "44 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "45 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "46 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "47 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "48 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... \n", + "49 @MOLECULE\\nGFn2-XTB_relax Charge: 0 Un... " + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "ce54d7db-9a98-42e1-acd6-0c63691f4e17", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDNameInChIKeySMILESSolubility
1A-4Benzo[cd]indol-2(1H)-oneGPYLCFQEKPUWLD-UHFFFAOYSA-NO=C1Nc2cccc3cccc1c23-3.254767
2A-54-chlorobenzaldehydeAVPYQKSLYISFPO-UHFFFAOYSA-NClc1ccc(C=O)cc1-2.177078
4A-94-({4-[bis(oxiran-2-ylmethyl)amino]phenyl}meth...FAUAZXVRLVIARB-UHFFFAOYSA-NC1OC1CN(CC2CO2)c3ccc(Cc4ccc(cc4)N(CC5CO5)CC6CO...-4.662065
5A-10vinyltolueneJZHGRUMIRATHIU-UHFFFAOYSA-NCc1cccc(C=C)c1-3.123150
6A-113-(3-ethylcyclopentyl)propanoic acidWVRFSLWCFASCIS-UHFFFAOYSA-NCCC1CCC(CCC(O)=O)C1-3.286116
..................
9977I-84tetracaineGKCBAIGFKIBETG-UHFFFAOYSA-NC(c1ccc(cc1)NCCCC)(=O)OCCN(C)C-3.010000
9978I-85tetracyclineOFVLGDICTFRJMM-WESIUVDSSA-NOC1=C(C(C2=C(O)[C@@](C(C(C(N)=O)=C(O)[C@H]3N(C...-2.930000
9979I-86thymolMGSRCZKZVOBKFT-UHFFFAOYSA-Nc1(cc(ccc1C(C)C)C)O-2.190000
9980I-93verapamilSGTNSNPWRIOYBX-UHFFFAOYSA-NCOc1ccc(CCN(C)CCCC(C#N)(C(C)C)c2ccc(OC)c(OC)c2...-3.980000
9981I-94warfarinPJVWKTKQMONHTI-UHFFFAOYSA-NCC(=O)CC(c1ccccc1)c1c(O)c2ccccc2oc1=O-4.780000
\n", + "

8797 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " ID Name \\\n", + "1 A-4 Benzo[cd]indol-2(1H)-one \n", + "2 A-5 4-chlorobenzaldehyde \n", + "4 A-9 4-({4-[bis(oxiran-2-ylmethyl)amino]phenyl}meth... \n", + "5 A-10 vinyltoluene \n", + "6 A-11 3-(3-ethylcyclopentyl)propanoic acid \n", + "... ... ... \n", + "9977 I-84 tetracaine \n", + "9978 I-85 tetracycline \n", + "9979 I-86 thymol \n", + "9980 I-93 verapamil \n", + "9981 I-94 warfarin \n", + "\n", + " InChIKey \\\n", + "1 GPYLCFQEKPUWLD-UHFFFAOYSA-N \n", + "2 AVPYQKSLYISFPO-UHFFFAOYSA-N \n", + "4 FAUAZXVRLVIARB-UHFFFAOYSA-N \n", + "5 JZHGRUMIRATHIU-UHFFFAOYSA-N \n", + "6 WVRFSLWCFASCIS-UHFFFAOYSA-N \n", + "... ... \n", + "9977 GKCBAIGFKIBETG-UHFFFAOYSA-N \n", + "9978 OFVLGDICTFRJMM-WESIUVDSSA-N \n", + "9979 MGSRCZKZVOBKFT-UHFFFAOYSA-N \n", + "9980 SGTNSNPWRIOYBX-UHFFFAOYSA-N \n", + "9981 PJVWKTKQMONHTI-UHFFFAOYSA-N \n", + "\n", + " SMILES Solubility \n", + "1 O=C1Nc2cccc3cccc1c23 -3.254767 \n", + "2 Clc1ccc(C=O)cc1 -2.177078 \n", + "4 C1OC1CN(CC2CO2)c3ccc(Cc4ccc(cc4)N(CC5CO5)CC6CO... -4.662065 \n", + "5 Cc1cccc(C=C)c1 -3.123150 \n", + "6 CCC1CCC(CCC(O)=O)C1 -3.286116 \n", + "... ... ... \n", + "9977 C(c1ccc(cc1)NCCCC)(=O)OCCN(C)C -3.010000 \n", + "9978 OC1=C(C(C2=C(O)[C@@](C(C(C(N)=O)=C(O)[C@H]3N(C... -2.930000 \n", + "9979 c1(cc(ccc1C(C)C)C)O -2.190000 \n", + "9980 COc1ccc(CCN(C)CCCC(C#N)(C(C)C)c2ccc(OC)c(OC)c2... -3.980000 \n", + "9981 CC(=O)CC(c1ccccc1)c1c(O)c2ccccc2oc1=O -4.780000 \n", + "\n", + "[8797 rows x 5 columns]" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03dc381d-ee77-476a-a624-4c60fbd95363", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/development/solubility_dev/gbsa_opt.txt b/development/solubility_dev/gbsa_opt.txt new file mode 100644 index 0000000..1dd4d7b --- /dev/null +++ b/development/solubility_dev/gbsa_opt.txt @@ -0,0 +1,2 @@ +$write + gbsa=true diff --git a/development/solubility_dev/xtb_solvent.py b/development/solubility_dev/xtb_solvent.py new file mode 100644 index 0000000..5cc7d9c --- /dev/null +++ b/development/solubility_dev/xtb_solvent.py @@ -0,0 +1,101 @@ +import architector.arch_context_manage as arch_context_manage +import architector.io_molecule as io_molecule +import shutil +import subprocess as sub +import numpy as np + + +def read_solv_params(file): + with open(file, "r") as file1: + lines = file1.readlines() + solvent_area_start_key = "generalized Born model for continuum solvation" + sovlent_area_end_key = "total SASA" + start = False + sas = [] + born_radii = [] + for line in lines: + if solvent_area_start_key in line: + start = True + elif sovlent_area_end_key in line: + break + elif start and (len(line.strip().split()) == 6): + sline = line.strip().split() + if sline[1].isnumeric(): + sas.append(float(sline[4])) + born_radii.append(float(sline[3])) + else: + pass + outdict = {"sas": np.array(sas), "born_radii": np.array(born_radii)} + return outdict + + +def xtb_solv_params(structure, solvent="water"): + """ + Take in a structure, evaluate with xtb to SA surface area + + Parameters + ---------- + structure : mol2str + structure passsed + solvent : str, optional + whether to use a solvent for conformer evalulation, default 'none' + + Returns + ---------- + results : dict + Compiled XTB results + """ + + # Convert smiles to xyz string + + xtbPath = shutil.which("xtb") + + mol = io_molecule.convert_io_molecule(structure) + if mol.xtb_charge is None: + mol.detect_charge_spin() + mol_charge = mol.xtb_charge + + mol.swap_actinide() + + even_odd_electrons = ( + np.sum([atom.number for atom in mol.ase_atoms]) - mol_charge + ) % 2 + if mol.xtb_uhf is not None: + uhf = mol.xtb_uhf + else: + uhf = 0 # Set spin to LS by default + if (even_odd_electrons == 1) and (uhf == 0): + uhf = 1 + elif (even_odd_electrons == 1) and (uhf < 7) and (uhf % 2 == 0): + uhf += 1 + elif (even_odd_electrons == 1) and (uhf >= 7) and (uhf % 2 == 0): + uhf -= 1 + if (even_odd_electrons == 0) and (uhf % 2 == 1): + uhf = uhf - 1 + elif (even_odd_electrons == 1) and (uhf % 2 == 0): + uhf = uhf + 1 + + mol_charge = int(mol_charge) # Ensure integers + uhf = int(uhf) + xyzstr = io_molecule.convert_ase_xyz(mol.ase_atoms) + + with arch_context_manage.make_temp_directory() as _: + # Write xyz file + with open("structure.xyz", "w") as outFile: + outFile.write(xyzstr) + + with open("solv_options.txt", "w") as file1: + file1.write("$write\n") + file1.write(" gbsa=true\n") + + # Run xtb + execStr = "{} structure.xyz --chrg {} --uhf {} --alpb {} -P 1 -I solv_options.txt> output.xtb".format( + xtbPath, int(mol_charge), int(uhf), solvent + ) + + sub.run(execStr, shell=True, check=True) + + # Read conformers from file + result_dict = read_solv_params("output.xtb") + + return result_dict