RDKit
Open-source cheminformatics and machine learning.
ChemTransforms.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2006-2012 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_CHEMTRANSFORMS_H__
11 #define _RD_CHEMTRANSFORMS_H__
12 
13 #include <boost/smart_ptr.hpp>
14 #include <vector>
15 #include <iostream>
16 
17 #include "MolFragmenter.h"
18 
19 namespace RDKit {
20 class ROMol;
21 typedef boost::shared_ptr<ROMol> ROMOL_SPTR;
22 
23 //! \brief Returns a copy of an ROMol with the atoms and bonds that
24 //! match a pattern removed.
25 /*!
26  \param mol the ROMol of interest
27  \param query the query ROMol
28  \param onlyFrags if this is set, atoms will only be removed if
29  the entire fragment in which they are found is
30  matched by the query.
31 
32  \return a copy of \c mol with the matching atoms and bonds (if any)
33  removed.
34 */
35 ROMol *deleteSubstructs(const ROMol &mol, const ROMol &query,
36  bool onlyFrags = false);
37 
38 //! \brief Returns a list of copies of an ROMol with the atoms and bonds that
39 //! match a pattern replaced with the atoms contained in another molecule.
40 /*!
41  Bonds are created between the joining atom in the existing molecule
42  and the atoms in the new molecule. So, using SMILES instead of molecules:
43  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
44  ['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
45  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
46  ['[X]NCCN[X]']
47  Chains should be handled "correctly":
48  replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
49  ['C[X]C']
50  As should rings:
51  replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
52  ['C1[X]C1']
53  And higher order branches:
54  replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
55  ['C[X](C)C']
56  Note that the client is responsible for making sure that the
57  resulting molecule actually makes sense - this function does not
58  perform sanitization.
59 
60  \param mol the ROMol of interest
61  \param query the query ROMol
62  \param replacement the ROMol to be inserted
63  \param replaceAll if this is true, only a single result, with all
64  occurances
65  of the substructure replaced, will be returned.
66  \param replacementConnectionPoint index of the atom in the replacement
67  that
68  the bond should made to
69 
70  \return a vector of pointers to copies of \c mol with the matching atoms
71  and bonds (if any) replaced
72 
73 */
74 std::vector<ROMOL_SPTR> replaceSubstructs(
75  const ROMol &mol, const ROMol &query, const ROMol &replacement,
76  bool replaceAll = false, unsigned int replacementConnectionPoint = 0);
77 
78 //! \brief Returns a copy of an ROMol with the atoms and bonds that
79 //! don't fall within a substructure match removed.
80 //!
81 //! dummy atoms are left to indicate attachment points.
82 //!
83 /*!
84  \param mol the ROMol of interest
85  \param coreQuery a query ROMol to be used to match the core
86 
87  \return a copy of \c mol with the non-matching atoms and bonds (if any)
88  removed and dummies at the connection points.
89 */
90 ROMol *replaceSidechains(const ROMol &mol, const ROMol &coreQuery);
91 
92 //! \brief Returns a copy of an ROMol with the atoms and bonds that
93 //! do fall within a substructure match removed.
94 //!
95 //! dummy atoms are left to indicate attachment points.
96 //!
97 /*!
98  Note that this is essentially identical to the replaceSidechains function,
99  except we
100  invert the query and replace the atoms that *do* match the query.
101 
102  \param mol - the ROMol of interest
103  \param coreQuery - a query ROMol to be used to match the core
104  \param replaceDummies - if set, atoms matching dummies in the core will also
105  be replaced
106  \param labelByIndex - if set, the dummy atoms at attachment points are
107  labelled with the
108  index+1 of the corresponding atom in the core
109  \param requireDummyMatch - if set, only side chains that are connected to
110  atoms in
111  the core that have attached dummies will be
112  considered.
113  Molecules that have sidechains that are attached
114  at other points will be rejected (NULL returned).
115 
116  \return a copy of \c mol with the non-matching atoms and bonds (if any)
117  removed and dummies at the connection points. The client is
118  responsible
119  for deleting this molecule. If the core query is not matched, NULL
120  is returned.
121 */
122 ROMol *replaceCore(const ROMol &mol, const ROMol &coreQuery,
123  bool replaceDummies = true, bool labelByIndex = false,
124  bool requireDummyMatch = false);
125 
126 //! \brief Carries out a Murcko decomposition on the molecule provided
127 //!
128 /*!
129 
130  \param mol - the ROMol of interest
131 
132  \return a new ROMol with the Murcko scaffold
133  The client is responsible for deleting this molecule.
134 */
135 ROMol *MurckoDecompose(const ROMol &mol);
136 
137 //! \brief Combined two molecules to create a new one
138 //!
139 /*!
140 
141  \param mol1 - the first ROMol to be combined
142  \param mol2 - the second ROMol to be combined
143  \param offset - a constant offset to be added to every
144  atom position in mol2
145 
146  \return a new ROMol with the two molecules combined.
147  The new molecule has not been sanitized.
148  The client is responsible for deleting this molecule.
149 */
150 ROMol *combineMols(const ROMol &mol1, const ROMol &mol2,
151  RDGeom::Point3D offset = RDGeom::Point3D(0, 0, 0));
152 
153 //! \brief Adds named recursive queries to a molecule's atoms based on atom
154 // labels
155 //!
156 /*!
157 
158  \param mol - the molecule to be modified
159  \param queries - the dictionary of named queries to add
160  \param propName - the atom property to use to get query names
161  \param reactantLabels - to store pairs of (atom index, query string)
162 
163 
164  NOTES:
165  - existing query information, if present, will be supplemented (AND logic)
166  - non-query atoms will be replaced with query atoms using only the query
167  logic
168  - query names can be present as comma separated lists, they will then
169  be combined using OR logic.
170  - throws a KeyErrorException if a particular query name is not present
171  in \c queries
172 
173 */
175  ROMol &mol, const std::map<std::string, ROMOL_SPTR> &queries,
176  const std::string &propName,
177  std::vector<std::pair<unsigned int, std::string> > *reactantLabels = NULL);
178 
179 //! \brief parses a query definition file and sets up a set of definitions
180 //! suitable for use by addRecursiveQueries()
181 /*!
182 
183  \param filename - the name of the file to be read
184  \param queryDefs - the dictionary of named queries (return value)
185  \param standardize - if true, query names will be converted to lower
186  case
187  \param delimiter - the line delimiter in the file
188  \param comment - text used to recognize comment lines
189  \param nameColumn - column with the names of queries
190  \param smartsColumn - column with the SMARTS definitions of the queries
191 
192 */
193 void parseQueryDefFile(const std::string &filename,
194  std::map<std::string, ROMOL_SPTR> &queryDefs,
195  bool standardize = true,
196  const std::string &delimiter = "\t",
197  const std::string &comment = "//",
198  unsigned int nameColumn = 0,
199  unsigned int smartsColumn = 1);
200 //! \overload
201 void parseQueryDefFile(std::istream *inStream,
202  std::map<std::string, ROMOL_SPTR> &queryDefs,
203  bool standardize = true,
204  const std::string &delimiter = "\t",
205  const std::string &comment = "//",
206  unsigned int nameColumn = 0,
207  unsigned int smartsColumn = 1);
208 //! \brief equivalent to parseQueryDefFile() but the query definitions are
209 // explicitly passed in
210 void parseQueryDefText(const std::string &queryDefText,
211  std::map<std::string, ROMOL_SPTR> &queryDefs,
212  bool standardize = true,
213  const std::string &delimiter = "\t",
214  const std::string &comment = "//",
215  unsigned int nameColumn = 0,
216  unsigned int smartsColumn = 1);
217 }
218 
219 #endif
ROMol * combineMols(const ROMol &mol1, const ROMol &mol2, RDGeom::Point3D offset=RDGeom::Point3D(0, 0, 0))
Combined two molecules to create a new one.
void addRecursiveQueries(ROMol &mol, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::pair< unsigned int, std::string > > *reactantLabels=NULL)
Adds named recursive queries to a molecule&#39;s atoms based on atom.
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:102
ROMol * replaceCore(const ROMol &mol, const ROMol &coreQuery, bool replaceDummies=true, bool labelByIndex=false, bool requireDummyMatch=false)
Returns a copy of an ROMol with the atoms and bonds that do fall within a substructure match removed...
void parseQueryDefFile(const std::string &filename, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\, const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
parses a query definition file and sets up a set of definitions suitable for use by addRecursiveQueri...
ROMol * MurckoDecompose(const ROMol &mol)
Carries out a Murcko decomposition on the molecule provided.
ROMol * deleteSubstructs(const ROMol &mol, const ROMol &query, bool onlyFrags=false)
Returns a copy of an ROMol with the atoms and bonds that match a pattern removed. ...
boost::shared_ptr< ROMol > ROMOL_SPTR
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
std::vector< ROMOL_SPTR > replaceSubstructs(const ROMol &mol, const ROMol &query, const ROMol &replacement, bool replaceAll=false, unsigned int replacementConnectionPoint=0)
Returns a list of copies of an ROMol with the atoms and bonds that match a pattern replaced with the ...
void parseQueryDefText(const std::string &queryDefText, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\, const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
equivalent to parseQueryDefFile() but the query definitions are
ROMol * replaceSidechains(const ROMol &mol, const ROMol &coreQuery)
Returns a copy of an ROMol with the atoms and bonds that don&#39;t fall within a substructure match remov...