moleculegraph

  1from .general_utils import *
  2from .molecule_utils import *
  3
  4import numpy as np
  5import csv
  6import re
  7# from rapidfuzz.string_metric import normalized_levenshtein
  8import json
  9
 10"""
 11moleculegraph
 12
 13A graph representation for molecules and everything else for data mapping
 14"""
 15
 16__version__ = "0.0.0"
 17__author__ = "Maximilian Fleck"
 18__credits__ = "ITT Uni Stuttgart"
 19
 20class molecule:
 21    """
 22    simple undirected graph representation for molecules and more.
 23    
 24    write code about molecules without the need to care about them.
 25    
 26    syntax only. Bring your own semantics.
 27    
 28    Main results are:
 29    - distance matrix of the molecule.
 30    - atom, bond, angle, torsion lists.
 31
 32    YOU can use them to:
 33    - map force-field information onto your molecule.
 34    - generate inputs for templating trajectories and coordinates
 35    - use group contribution approaches
 36    - process coordinates from the PubChem-API
 37
 38    Furthermore:
 39    - this graph-representation is quite general and not limited to molecules.
 40    - ...you might use it to represent your favorite baking recipes and optimize them.    
 41    
 42    """
 43
 44    def __init__(self, molstring, splitter=split_molstring, get_syntax=get_syntax_from_numbers,
 45                 branch_point_to_last=False, ring_point_to_first=False,
 46                ):
 47        """
 48        initializes molecules based on graph-list representation.
 49        
 50        The term index (or idx) always refers to a character in the string, 
 51        be it semantic or syntactic. Semantics and syntax of the string are 
 52        no longer important once the bond list and thus the actual graph is 
 53        created. Then there are only semantic objects, i.e. atoms.
 54        Thus, all numbers occuring in bond, angle or torsional lists or in
 55        distance matrices refer to atomic numbers. Indexes and syntactic lists 
 56        are only important for advanced applications. For example, generating
 57        strings with the same meaning. If both options are available they are
 58        are marked with index/indexes/idx and number/numbers. For example:
 59        ring_root_numbers and ring_root_indexes.
 60
 61        Parameters:
 62        - mol:
 63            - string representation of a molecule.
 64        - splitter:
 65            - function that splits input into syntactic/semantic elements
 66            - returns np.array with all elements
 67            - default: split_molstring from molecule_utils
 68            - you can pass your own function :)
 69        - get_syntax:
 70            - function that generates atom and functional descriptors from 
 71              syntactic/semantic elements
 72            - returns two np.arrays:
 73                -ff: functions array
 74                    - branches pointing forward f > 0
 75                    - rings pointing backward f < 0
 76                    - beads without function f = 0 
 77                -nn: atoms array
 78                    - contains atom numbers
 79                    - functions are -1
 80            - default: get_syntax_from_numbers from molecule_utils   
 81            - you can pass your own function :)
 82        - branch_point_to_last:
 83            - boolean, False 
 84            - comes into play when branch points outside molecule
 85            - if True branch will point to last atom
 86        - ring_point_to_first:
 87            - boolean, False
 88            - comes into play when ring points outside molecule
 89            - if True ring will point to first atom
 90        
 91        Returns:
 92            nothing
 93        """
 94        
 95        self.branch_point_to_last= branch_point_to_last
 96        """boolean if True branch that points outside molecule will point to last atom"""
 97        self.ring_point_to_first = ring_point_to_first
 98        """boolean if True ring that points outside molecule will point to first atom"""
 99        
100        mol           = np.array( splitter(molstring) )
101        self.molecule = np.array( splitter(molstring) )
102        """array representation of the molecule i.e. all keys/words/whatever in a list"""
103        
104        syntactic_elements = get_syntax(mol)
105        
106        self.f = np.array( syntactic_elements[0] ).astype(int)  # shows function
107        """array with all functions. 
108        - branches pointing forward f > 0
109        - rings pointing backward f < 0
110        - beads without function f = 0
111        """
112        self.n = np.array( syntactic_elements[1] ).astype(int)  # shows atomnumber
113        """array with atom numbers. branches and rings n = -1 as they are no atoms"""        
114        self.i = np.arange(len(mol))  # shows index
115        """array with indexes. atoms, branches and rings get an index"""         
116        self.len = len(self.i)
117        """number of all elements i.e. atoms, branches and rings"""       
118
119        # contains indexes of atoms
120        # index of atom no n: self.atom_indexes[n]
121        self.atom_indexes = self.i[self.f == 0]
122        """array with indexes of all atoms"""
123        self.atom_names = self.molecule[self.f == 0]
124        """array with names of all atoms"""
125        self.atom_keys = np.array( [ "["+x+"]" for x in self.atom_names ] )
126        """array with keys of all atoms"""        
127        self.atom_number = len(self.atom_indexes)
128        """number of all atoms"""
129        self.atom_numbers = np.arange(self.atom_number)
130        """array with numbers of all atoms"""
131        
132        # housekeeping for get_neighbour_list (will be called later)
133        # (the following things are generated when calling get_neighbour_list)
134        self.idx_neighbour_list = np.array([])
135        """array with indexes of all neighboured/bonded atoms"""
136        self.neighbour_list = np.array([])
137        """array with numbers of all neighboured/bonded atoms"""
138        self.ring_root_indexes = np.array([])
139        """array with indexes of all atoms rooting a ring"""
140       
141        
142        # housekeeping for get_distance_matrix (will be called later)
143        # (the following things are generated when calling get_distance_matrix)
144        self.distance_matrix = np.array([])
145        """distance matrix of all atoms in the molecule"""
146        self.bond_lists = np.array([])
147        """nested array with bond lists 
148        - starting from atoms separeted by one bond bond_lists[0]
149        - then atoms separeted by two bonds bond_lists[1] 
150        - ...
151        """
152        self.idx_bond_list = np.array([])
153        """array with indexes of all neighboured/bonded atoms"""
154        self.bond_list = np.array([])  
155        """array with numbers of all neighboured/bonded atoms"""
156        self.bond_keys  = np.array([])
157        """array with keys of all neighboured/bonded atoms"""
158        self.angle_list = np.array([])
159        """array of numbers of all angle forming atoms (separated by 2 bonds)"""
160        self.angle_names = np.array([])
161        """array of names of all angle forming atoms (separated by 2 bonds)"""
162        self.angle_keys = np.array([])
163        """array of keys of all angle forming atoms (separated by 2 bonds)"""
164        self.torsion_list = np.array([])
165        """array of numbers of all dihedral forming atoms (separated by 3 bonds)"""
166        self.torsion_names = np.array([])
167        """array of names of all dihedral forming atoms (separated by 3 bonds)"""
168        self.torsion_keys = np.array([])        
169        """array of keys of all dihedral forming atoms (separated by 3 bonds)"""
170
171        # get bond list
172        self.get_neighbour_list()        
173        
174        # get branch ends
175        n, count = np.unique(self.bond_list, return_counts=True)
176        pp = np.squeeze(np.where(count == 1))
177        if np.sum(pp) > 0:
178            self.branch_end_numbers = n[pp]
179            self.branch_end_indexes = self.atom_indexes[self.branch_end_numbers]
180        else:
181            self.branch_end_numbers = np.empty(0)
182            self.branch_end_indexes = np.empty(0)
183        # get ring closures
184        self.ring_close_indexes = np.squeeze(np.where(self.f < 0)) - 1
185        """array with indexes of all atoms closing a ring"""
186        if np.sum(self.ring_close_indexes.shape) > 0:
187            self.ring_close_numbers = self.n[self.ring_close_indexes]
188            """array with atom numbers of all atoms closing a ring"""
189            self.ring_root_numbers = self.n[self.ring_root_indexes]
190            """array with atom numbers of all atoms rooting a ring"""
191        else:
192            self.ring_close_numbers = np.empty(0)
193            self.ring_root_numbers = np.empty(0)
194
195        # get distance matrix
196        self.get_distance_matrix()
197
198        dummy = unique_sort(self.atom_names, return_inverse=True)
199        (
200            self.unique_atom_keys,
201            self.unique_atom_indexes,
202            self.unique_atom_inverse,
203        ) = dummy
204        self.unique_atom_names = self.atom_names[self.unique_atom_indexes]
205        """array with unique atom names"""
206        self.unique_atom_numbers = self.atom_numbers[self.unique_atom_indexes]
207        """array with numbers of unique atom names"""
208        assert np.array_equal(
209            self.unique_atom_keys[self.unique_atom_inverse], self.atom_names
210        ), "unique atom in inverse doesnt work"
211
212        dummy = unique_sort(self.bond_keys, return_inverse=True)
213        self.unique_bond_keys = dummy[0]
214        """array with unique bond keys"""
215        self.unique_bond_indexes = dummy[1]
216        """array with unique bond indexes"""
217        self.unique_bond_inverse = dummy[2]
218        """array to invert unique bonds"""
219
220        self.unique_bond_names = self.bond_names[self.unique_bond_indexes]
221        """array with unique bond names"""
222        self.unique_bond_numbers = self.bond_list[self.unique_bond_indexes]
223        """array with numbers of unique bond names"""
224        if len(self.bond_keys) > 0:
225            assert np.array_equal(
226                self.unique_bond_keys[self.unique_bond_inverse], self.bond_keys
227            ), "unique bond in inverse doesnt work"
228
229        dummy = unique_sort(self.angle_keys, return_inverse=True)
230        self.unique_angle_keys = dummy[0]
231        """array with unique angle keys"""
232        self.unique_angle_indexes = dummy[1]
233        """array with unique angle indexes"""
234        self.unique_angle_inverse = dummy[2]
235        """array to invert unique angles"""
236
237        self.unique_angle_names = self.angle_names[self.unique_angle_indexes]
238        """array with unique angle names"""
239        self.unique_angle_numbers = self.angle_list[self.unique_angle_indexes]
240        """array with numbers of unique angle names"""
241        if len(self.angle_keys) > 0:
242            assert np.array_equal(
243                self.unique_angle_keys[self.unique_angle_inverse], self.angle_keys
244            ), "unique angle in inverse doesnt work"
245
246        dummy = unique_sort(self.torsion_keys, return_inverse=True)
247        self.unique_torsion_keys = dummy[0]
248        """array with unique torsion keys"""
249        self.unique_torsion_indexes = dummy[1]
250        """array with unique torsion indexes"""
251        self.unique_torsion_inverse = dummy[2]
252        """array to invert unique torsions"""
253        
254        self.unique_torsion_names = self.torsion_names[self.unique_torsion_indexes]
255        """array with unique torsion names"""
256        self.unique_torsion_numbers = self.torsion_list[self.unique_torsion_indexes]
257        """array with numbers of unique torsion names"""
258        if len(self.torsion_keys) > 0:
259            assert np.array_equal(
260                self.unique_torsion_keys[self.unique_torsion_inverse], self.torsion_keys
261            ), "unique angle in inverse doesnt work"
262
263        self.unique_atom_pair_names = []
264        self.unique_atom_pair_keys = []
265        for i, a0 in enumerate(self.unique_atom_names):
266            for j, a1 in enumerate(self.unique_atom_names):
267                if i > j:
268                    self.unique_atom_pair_names.append(sort_force_fields([a0, a1]))
269                    self.unique_atom_pair_keys.append(
270                        make_graph(self.unique_atom_pair_names[-1])
271                    )
272        self.unique_atom_pair_names = np.array(self.unique_atom_pair_names)
273        """array with unique atom pair names. use them for combining rules."""
274        self.unique_atom_pair_keys = np.array(self.unique_atom_pair_keys)
275        """array with unique atom pair keys. use them for combining rules."""
276        return
277
278    def reinit_mol(self,mol):
279        """
280        reinitializes moleculeclass
281        
282        """
283        
284        self.molstring = mol
285        if isinstance(mol, str):
286            mol = mol[1:-1].split("][")
287        
288        self.molecule = np.array(mol)
289        """array representation of the molecule i.e. all keys/words/whatever in a list"""
290        self.f = np.zeros(len(mol))  # shows function
291        """array with all functions. 
292        - branches pointing forward f > 0
293        - rings pointing backward f < 0
294        - beads without function f = 0
295        """
296        self.n = -1 * np.ones(len(mol))  # shows atomnumber
297        """array with atom numbers. branches and rings n = -1 as they are no atoms"""        
298        self.i = np.arange(len(mol))  # shows index
299        """array with indexes. atoms, branches and rings get an index"""         
300        self.len = len(self.i)
301        """number of all elements i.e. atoms, branches and rings"""       
302        n = 0
303        for i, m in enumerate(mol):
304            if m[0] == "b":
305                self.f[i] = int(m[1:])
306            elif m[0] == "r":
307                # dum = -int(m[1:])
308                self.f[i] = -int(m[1:])  # self.get_atom_index()
309            else:
310                self.n[i] = n
311                n += 1    
312        return    
313    
314    
315    def get_neighbour_list(self):
316        """
317        generates a neighbour list from an initialized molecule
318
319        Parameters: None
320
321        Returns:
322        - neighbour_list:
323            - list of neighboured atoms containing their atom numbers.
324        - idx_neighbour_list:
325            - list of neighboured atoms containing their atom indexes.
326        """ 
327
328        idx_neighbour_list = []
329        idx_branch_ends = np.empty(0)
330        idx_branch_starts = np.empty(0)
331        idx_ring_close = np.empty(0)
332        idx_branch_funs = np.empty(0)
333
334        branches = np.zeros(len(self.i))
335        branch_origins = np.zeros(len(self.i))
336        branch_count = 1
337
338        if self.atom_number < 2:
339            self.idx_neighbour_list = np.array([])
340            self.neighbour_list = np.array([])
341            self.idx_bond_list = np.array([])
342            self.bond_list = np.array([])
343            return np.array([]), np.array([])
344
345        """
346        find branches and assign numbers.
347        get main path through molecule.
348        build molecule from this...
349        """
350        inv_f = self.f[::-1]
351        branch_no = 1
352        inv_f_p = np.atleast_1d(np.squeeze(np.where(inv_f != 0)))
353        inv_branches = inv_f.copy()
354        inv_branches[inv_branches != 0] = -1
355        for i, ifi in zip(inv_f_p, inv_f[inv_f_p]):
356            if ifi < 0:  # ring
357                inv_branches[i] = -1
358            if ifi > 0:  # branch :)
359                pp = np.atleast_1d(np.squeeze(np.where(inv_branches[:i] == 0)))
360                if len(pp) > ifi:
361                    p_branch = pp[-int(ifi) :]
362                    inv_branches[p_branch] = branch_no
363                    inv_branches[i] = branch_no
364                    branch_no += 1
365        branches = reenumerate_branches(inv_branches[::-1])
366        # print(branches)
367
368        main_path_p = np.where(branches == 0)
369        main_path_i = self.i[main_path_p]
370
371        # get bond list of main path
372        bond_list = bond_list_from_simple_path(main_path_i)
373        # print( "branches",branches )
374
375        """
376        build bond list
377        """
378        f_p = np.atleast_1d(np.squeeze(np.where(self.f != 0)))
379        self.ring_root_indexes = []
380        # print("f_p , self.f[f_p]", f_p, self.f[f_p])
381        for i, fi in zip(f_p, self.f[f_p]):
382            #print(branches)
383            if fi < 0:
384                idx = np.max(
385                    get_diff_in_bond_lists(f_p[f_p < i], np.arange(i))
386                )  # index of last atom
387                subgraph = graph_from_bonds(bond_list)
388                path_to_main = get_shortest_path(subgraph, 0, idx)
389
390                bond_list_to_main = bond_list_from_simple_path(path_to_main)
391                graph_to_main = graph_from_bonds(bond_list_to_main)
392                root = get_root(graph_to_main, idx, int(np.abs(fi)))
393                branches[i] = branches[root] # connect ring to acompanying branch
394                if root < 0 and self.ring_point_to_first:
395                    if fi+root >= self.min_ring_size:
396                        root = np.min(self.atom_indexes)
397                        bond_list = np.concatenate([bond_list, [[root, idx]]])
398                        self.ring_root_indexes.append(root)                    
399                        print("root of ring outside molecule. point to first atom.")
400                    else:
401                        print( "ring size smaller:",self.min_ring_size,". ignore")
402                elif root < 0:
403                    print("root of ring outside molecule. ignore.")
404                else:
405                    bond_list = np.concatenate([bond_list, [[root, idx]]])
406                    self.ring_root_indexes.append(root)
407            elif fi > 0:
408                link = get_branch_root(branches[i], branches)
409                if not self.is_atom(link):
410                    link = self.get_last_atom_idx(link)
411                branch = branches[i]
412                p_subchain_i = np.atleast_1d(np.squeeze(np.where(branches == branch)))[
413                    1:
414                ]
415                if len(p_subchain_i) < fi and self.branch_point_to_last:
416                    link = np.max(self.atom_indexes)
417                    p_subchain_i = np.concatenate([[link], p_subchain_i])
418                    subchain_bond_list = bond_list_from_simple_path(p_subchain_i)
419                    bond_list = np.concatenate([bond_list, subchain_bond_list])                    
420                    print("end of branch outside molecule. point to last atom.")
421                elif len(p_subchain_i) < fi:
422                    print("end of branch outside molecule. ignore.")
423                else:
424                    p_subchain_i = np.concatenate([[link], p_subchain_i])
425                    subchain_bond_list = bond_list_from_simple_path(p_subchain_i)
426                    bond_list = np.concatenate([bond_list, subchain_bond_list])
427            else:
428                print("fi = 0... this should not happen :P")
429        #print("fin branches", branches)
430        self.idx_neighbour_list = bond_list
431        self.idx_bond_list = bond_list
432        self.ring_root_indexes = np.array(self.ring_root_indexes)
433        
434        self.neighbour_list = np.vectorize(self.get_atom_no)(self.idx_neighbour_list)
435        self.bond_list = self.neighbour_list
436        return self.neighbour_list, self.idx_neighbour_list
437
438    def get_bond_matrix(self):
439        """
440        generates bond matrix from an initialized molecule
441
442        Parameters: None
443
444        Returns:
445        - bond_matrix:
446            - bond matrix of you molecule
447        """
448        self.bond_matrix = get_bond_matrix(self.neighbour_list, self.atom_number)
449        return self.bond_matrix.copy()
450
451    def get_distance_matrix(self):
452        """
453        generates distance matrix from an initialized molecule.
454        Furthermore different angle and torsion lists are generated.
455
456        Parameters: None
457        
458        Returns:
459        - distance_matrix:
460            - distance matrix of you molecule
461        """
462        self.distance_matrix, self.bond_lists = get_distance_matrix(
463            self.neighbour_list, self.atom_number
464        )
465        self.bond_list = self.bond_lists[0]
466        self.bond_names = np.array(
467            [sort_force_fields(self.atom_names[a]) for a in self.bond_list]
468        )
469        self.bond_keys = np.array([make_graph(t) for t in self.bond_names])
470        if len(self.bond_lists) > 1:
471            self.angle_list = np.array(self.bond_lists[1])
472            self.angle_names = np.array(
473                [sort_force_fields(self.atom_names[a]) for a in self.angle_list]
474            )
475            self.angle_keys = np.array([make_graph(t) for t in self.angle_names])
476        else:
477            self.angle_list = np.array([])
478            self.angle_names = np.array([])
479            self.angle_keys = np.array([])
480        if len(self.bond_lists) > 2:
481            self.torsion_list = np.array(self.bond_lists[2])
482            self.torsion_names = np.array(
483                [sort_force_fields(self.atom_names[a]) for a in self.torsion_list]
484            )
485            self.torsion_keys = np.array([make_graph(t) for t in self.torsion_names])
486        else:
487            self.torsion_list = np.array([])
488            self.torsion_names = np.array([])
489            self.torsion_keys = np.array([])
490        return self.distance_matrix.copy()
491    
492    def is_atom(self,idx):
493        """
494        checks if index is an atom.
495        - Number: atom number
496        - Index:  index in list representation of molecule.
497        Number and index are differing due to functionals in the list.
498
499        Parameters:
500        - idx:
501            - idx of the atom
502            
503        Returns:
504        - boolean
505        """
506        if idx in self.atom_indexes:
507            return True
508        else:
509            return False
510
511    def get_atom_index(self, no):
512        """
513        returns index of atom.
514        - Number: atom number
515        - Index:  index in list representation of molecule.
516        Number and index are differing due to functionals in the list.
517
518        Parameters:
519        - no:
520           - number of the atom
521           
522        Returns:
523        - index of atom number "no"
524        """
525        return int(self.atom_indexes[int(no)])
526
527    def get_atom_no(self, idx):
528        """
529        returns index of atom.
530        - Number: atom number
531        - Index:  index in list representation of molecule.
532        Number and index are differing due to functionals in the list.
533
534        Parameters:
535        - idx:
536            - idx of the atom
537            
538        Returns:
539        - number of atom with index
540        """
541        idx = int(idx)
542        if self.f[idx] >= 0:
543            no = int(np.squeeze(np.where(self.atom_indexes == idx)))
544            assert no == self.n[idx], "atom numbers dont work,idx"
545            return no
546        else:
547            assert self.n[idx] == -1, "atom numbers dont work, n=-1"
548            print("Error in get_atom_no: not an atom",idx)
549            return False
550
551    def get_next_atom_idx(self, idx):
552        """
553        returns index of the next atom after index idx.
554        Index:  index in list representation of molecule.
555        Number and index are differing due to functionals in the list.
556
557        Parameters:
558        idx:
559            - index in list representation of molecule
560            
561        Returns:
562        - index of the next atom after index idx
563        """
564        return int(np.min(self.atom_indexes[(self.atom_indexes - idx) > 0]))
565
566    def get_last_atom_idx(self, idx):
567        """
568        returns index of the atom before index idx.
569        Index:  index in list representation of molecule.
570        Number and index are differing due to functionals in the list.
571
572        Parameters:
573        idx:
574            - index in list representation of molecule
575            
576        Returns:
577        - index of the atom before index idx
578        """
579        return int(np.max(self.atom_indexes[(self.atom_indexes - idx) < 0]))
580
581    def get_next_atom_no(self, idx):
582        """
583        returns number of the atom after index idx.
584        Index:  index in list representation of molecule.
585        Number: atom number
586        Number and index are differing due to functionals in the list.
587
588        Parameters:
589        - idx:
590            - index in list representation of molecule
591            
592        Returns:
593        - number of the atom after index idx
594        """
595        return int(self.n[self.get_next_atom_idx(idx)])
596
597    def get_last_atom_no(self, idx):
598        """
599        returns number of the atom before index idx.
600        Index:  index in list representation of molecule.
601        Number: atom number
602        Number and index are differing due to functionals in the list.
603
604        Parameters:
605            self:
606                - initialized molecule.
607            idx:
608                - index in list representation of molecule
609                
610        Returns:
611            number of the atom before index idx
612        """
613        return int(self.n[self.get_last_atom_idx(idx)])
614
615    def map_molecule_via_atom_names(self, data):
616        """
617        maps information from a dictionary onto the molecule.
618        Use this for molecule information i.e. coordinates, group contributions,...
619
620        Parameters:
621        - data:
622            - dictionary with: keys == atom names !!!
623            
624        Returns:
625        - mapped data
626        """
627        return [*map(data.get, self.atom_names)]
628
629    def map_unique_via_atom_names(self, data):
630        """
631        Maps information from a dictionary onto unique elements of the molecule.
632        Preserves the order of the original list.
633        Use this for force-field information i.e. potentials,...
634
635        Parameters:
636        - data:
637            - dictionary with: keys == atom names !!!
638            
639        Returns:
640        - mapped unique data
641        """
642        print(
643            "map_unique_via_atom_names is outtdated... dont use\n shouldnt cause errors btw"
644        )
645        return [*map(data.get, self.unique_atom_names)]
646
647    def map_molecule(self, goal, data):
648        """
649        maps information from a dictionary onto a goal.
650        Use this for molecule information i.e. coordinates, group contributions,...
651
652
653        Parameters:
654        - goal:
655            - list of keys
656        - data:
657            - dictionary with keys in goal
658            
659        Returns:
660        - mapped data
661        """
662        return [*map(data.get, goal)]
663
664    def map_unique(self, goal, data):
665        """
666        Maps information from a dictionary onto unique elements a goal.
667        Preserves the order of the original list.
668        Use this for force-field information i.e. potentials,...
669
670        Parameters:
671        - goal:
672            - list of keys
673        - data:
674            - dictionary with keys in goal
675            
676        Returns:
677        - mapped unique data
678        """
679        print("map_unique is outtdated... dont use\n shouldnt cause errors btw")
680        return [*map(data.get, unique_sort(goal))]
681
682    def map_nested(self, goal, data):
683        """
684        Maps information from a dictionary onto nested elements of a goal.
685        Preserves the order of the original list.
686        Use this for advanced force-field information i.e. mixing rules,...
687
688        same as:
689        [ self.map_unique( m, data) for m in goal ]
690
691        Parameters:
692        - goal:
693            -with list list of keys
694        - data:
695            -dictionary with keys in goal
696            
697        Returns:
698        - mapped unique data
699        """
700        return [[*map(data.get, m)] for m in goal]
701
702    def get_distance(self, x, y):
703        """
704        Returns distance between atoms no x and y
705        0: same atom i.e. x==y
706        1: bond
707        2: angle
708        3: torsion
709
710        Parameters:
711        - x,y:
712            - atom numbers
713            
714        Returns:
715        - distance
716        """
717        return self.distance_matrix[int(x)][int(y)]
718
719    def get_sorted_key(self, x):
720        """
721        Sorts a list alphabetically.
722        Very important for dictionary keys in the molecule class!!!
723
724        Parameters:
725        - x:
726            - string list to sort
727            
728        Returns:
729        - key (string) and sorted list
730        """
731        x_sort = sort_force_fields(x)
732        return make_graph(x_sort), x_sort
733
734    def visualize(self, saveto=""):
735        """
736        plots and visualizes molecule
737        
738        Parameters:
739        - saveto:
740            - optional, location to save pic of graph to,
741            - add file format at the end ;)
742            
743        Returns:
744            nothing
745        """
746        if self.atom_numbers.size > 1:
747            plot_graph(self.atom_names, self.bond_list, saveto=saveto)
748        else:
749            print("one bead only")
750        return
751    
752    
753  
class molecule:
 21class molecule:
 22    """
 23    simple undirected graph representation for molecules and more.
 24    
 25    write code about molecules without the need to care about them.
 26    
 27    syntax only. Bring your own semantics.
 28    
 29    Main results are:
 30    - distance matrix of the molecule.
 31    - atom, bond, angle, torsion lists.
 32
 33    YOU can use them to:
 34    - map force-field information onto your molecule.
 35    - generate inputs for templating trajectories and coordinates
 36    - use group contribution approaches
 37    - process coordinates from the PubChem-API
 38
 39    Furthermore:
 40    - this graph-representation is quite general and not limited to molecules.
 41    - ...you might use it to represent your favorite baking recipes and optimize them.    
 42    
 43    """
 44
 45    def __init__(self, molstring, splitter=split_molstring, get_syntax=get_syntax_from_numbers,
 46                 branch_point_to_last=False, ring_point_to_first=False,
 47                ):
 48        """
 49        initializes molecules based on graph-list representation.
 50        
 51        The term index (or idx) always refers to a character in the string, 
 52        be it semantic or syntactic. Semantics and syntax of the string are 
 53        no longer important once the bond list and thus the actual graph is 
 54        created. Then there are only semantic objects, i.e. atoms.
 55        Thus, all numbers occuring in bond, angle or torsional lists or in
 56        distance matrices refer to atomic numbers. Indexes and syntactic lists 
 57        are only important for advanced applications. For example, generating
 58        strings with the same meaning. If both options are available they are
 59        are marked with index/indexes/idx and number/numbers. For example:
 60        ring_root_numbers and ring_root_indexes.
 61
 62        Parameters:
 63        - mol:
 64            - string representation of a molecule.
 65        - splitter:
 66            - function that splits input into syntactic/semantic elements
 67            - returns np.array with all elements
 68            - default: split_molstring from molecule_utils
 69            - you can pass your own function :)
 70        - get_syntax:
 71            - function that generates atom and functional descriptors from 
 72              syntactic/semantic elements
 73            - returns two np.arrays:
 74                -ff: functions array
 75                    - branches pointing forward f > 0
 76                    - rings pointing backward f < 0
 77                    - beads without function f = 0 
 78                -nn: atoms array
 79                    - contains atom numbers
 80                    - functions are -1
 81            - default: get_syntax_from_numbers from molecule_utils   
 82            - you can pass your own function :)
 83        - branch_point_to_last:
 84            - boolean, False 
 85            - comes into play when branch points outside molecule
 86            - if True branch will point to last atom
 87        - ring_point_to_first:
 88            - boolean, False
 89            - comes into play when ring points outside molecule
 90            - if True ring will point to first atom
 91        
 92        Returns:
 93            nothing
 94        """
 95        
 96        self.branch_point_to_last= branch_point_to_last
 97        """boolean if True branch that points outside molecule will point to last atom"""
 98        self.ring_point_to_first = ring_point_to_first
 99        """boolean if True ring that points outside molecule will point to first atom"""
100        
101        mol           = np.array( splitter(molstring) )
102        self.molecule = np.array( splitter(molstring) )
103        """array representation of the molecule i.e. all keys/words/whatever in a list"""
104        
105        syntactic_elements = get_syntax(mol)
106        
107        self.f = np.array( syntactic_elements[0] ).astype(int)  # shows function
108        """array with all functions. 
109        - branches pointing forward f > 0
110        - rings pointing backward f < 0
111        - beads without function f = 0
112        """
113        self.n = np.array( syntactic_elements[1] ).astype(int)  # shows atomnumber
114        """array with atom numbers. branches and rings n = -1 as they are no atoms"""        
115        self.i = np.arange(len(mol))  # shows index
116        """array with indexes. atoms, branches and rings get an index"""         
117        self.len = len(self.i)
118        """number of all elements i.e. atoms, branches and rings"""       
119
120        # contains indexes of atoms
121        # index of atom no n: self.atom_indexes[n]
122        self.atom_indexes = self.i[self.f == 0]
123        """array with indexes of all atoms"""
124        self.atom_names = self.molecule[self.f == 0]
125        """array with names of all atoms"""
126        self.atom_keys = np.array( [ "["+x+"]" for x in self.atom_names ] )
127        """array with keys of all atoms"""        
128        self.atom_number = len(self.atom_indexes)
129        """number of all atoms"""
130        self.atom_numbers = np.arange(self.atom_number)
131        """array with numbers of all atoms"""
132        
133        # housekeeping for get_neighbour_list (will be called later)
134        # (the following things are generated when calling get_neighbour_list)
135        self.idx_neighbour_list = np.array([])
136        """array with indexes of all neighboured/bonded atoms"""
137        self.neighbour_list = np.array([])
138        """array with numbers of all neighboured/bonded atoms"""
139        self.ring_root_indexes = np.array([])
140        """array with indexes of all atoms rooting a ring"""
141       
142        
143        # housekeeping for get_distance_matrix (will be called later)
144        # (the following things are generated when calling get_distance_matrix)
145        self.distance_matrix = np.array([])
146        """distance matrix of all atoms in the molecule"""
147        self.bond_lists = np.array([])
148        """nested array with bond lists 
149        - starting from atoms separeted by one bond bond_lists[0]
150        - then atoms separeted by two bonds bond_lists[1] 
151        - ...
152        """
153        self.idx_bond_list = np.array([])
154        """array with indexes of all neighboured/bonded atoms"""
155        self.bond_list = np.array([])  
156        """array with numbers of all neighboured/bonded atoms"""
157        self.bond_keys  = np.array([])
158        """array with keys of all neighboured/bonded atoms"""
159        self.angle_list = np.array([])
160        """array of numbers of all angle forming atoms (separated by 2 bonds)"""
161        self.angle_names = np.array([])
162        """array of names of all angle forming atoms (separated by 2 bonds)"""
163        self.angle_keys = np.array([])
164        """array of keys of all angle forming atoms (separated by 2 bonds)"""
165        self.torsion_list = np.array([])
166        """array of numbers of all dihedral forming atoms (separated by 3 bonds)"""
167        self.torsion_names = np.array([])
168        """array of names of all dihedral forming atoms (separated by 3 bonds)"""
169        self.torsion_keys = np.array([])        
170        """array of keys of all dihedral forming atoms (separated by 3 bonds)"""
171
172        # get bond list
173        self.get_neighbour_list()        
174        
175        # get branch ends
176        n, count = np.unique(self.bond_list, return_counts=True)
177        pp = np.squeeze(np.where(count == 1))
178        if np.sum(pp) > 0:
179            self.branch_end_numbers = n[pp]
180            self.branch_end_indexes = self.atom_indexes[self.branch_end_numbers]
181        else:
182            self.branch_end_numbers = np.empty(0)
183            self.branch_end_indexes = np.empty(0)
184        # get ring closures
185        self.ring_close_indexes = np.squeeze(np.where(self.f < 0)) - 1
186        """array with indexes of all atoms closing a ring"""
187        if np.sum(self.ring_close_indexes.shape) > 0:
188            self.ring_close_numbers = self.n[self.ring_close_indexes]
189            """array with atom numbers of all atoms closing a ring"""
190            self.ring_root_numbers = self.n[self.ring_root_indexes]
191            """array with atom numbers of all atoms rooting a ring"""
192        else:
193            self.ring_close_numbers = np.empty(0)
194            self.ring_root_numbers = np.empty(0)
195
196        # get distance matrix
197        self.get_distance_matrix()
198
199        dummy = unique_sort(self.atom_names, return_inverse=True)
200        (
201            self.unique_atom_keys,
202            self.unique_atom_indexes,
203            self.unique_atom_inverse,
204        ) = dummy
205        self.unique_atom_names = self.atom_names[self.unique_atom_indexes]
206        """array with unique atom names"""
207        self.unique_atom_numbers = self.atom_numbers[self.unique_atom_indexes]
208        """array with numbers of unique atom names"""
209        assert np.array_equal(
210            self.unique_atom_keys[self.unique_atom_inverse], self.atom_names
211        ), "unique atom in inverse doesnt work"
212
213        dummy = unique_sort(self.bond_keys, return_inverse=True)
214        self.unique_bond_keys = dummy[0]
215        """array with unique bond keys"""
216        self.unique_bond_indexes = dummy[1]
217        """array with unique bond indexes"""
218        self.unique_bond_inverse = dummy[2]
219        """array to invert unique bonds"""
220
221        self.unique_bond_names = self.bond_names[self.unique_bond_indexes]
222        """array with unique bond names"""
223        self.unique_bond_numbers = self.bond_list[self.unique_bond_indexes]
224        """array with numbers of unique bond names"""
225        if len(self.bond_keys) > 0:
226            assert np.array_equal(
227                self.unique_bond_keys[self.unique_bond_inverse], self.bond_keys
228            ), "unique bond in inverse doesnt work"
229
230        dummy = unique_sort(self.angle_keys, return_inverse=True)
231        self.unique_angle_keys = dummy[0]
232        """array with unique angle keys"""
233        self.unique_angle_indexes = dummy[1]
234        """array with unique angle indexes"""
235        self.unique_angle_inverse = dummy[2]
236        """array to invert unique angles"""
237
238        self.unique_angle_names = self.angle_names[self.unique_angle_indexes]
239        """array with unique angle names"""
240        self.unique_angle_numbers = self.angle_list[self.unique_angle_indexes]
241        """array with numbers of unique angle names"""
242        if len(self.angle_keys) > 0:
243            assert np.array_equal(
244                self.unique_angle_keys[self.unique_angle_inverse], self.angle_keys
245            ), "unique angle in inverse doesnt work"
246
247        dummy = unique_sort(self.torsion_keys, return_inverse=True)
248        self.unique_torsion_keys = dummy[0]
249        """array with unique torsion keys"""
250        self.unique_torsion_indexes = dummy[1]
251        """array with unique torsion indexes"""
252        self.unique_torsion_inverse = dummy[2]
253        """array to invert unique torsions"""
254        
255        self.unique_torsion_names = self.torsion_names[self.unique_torsion_indexes]
256        """array with unique torsion names"""
257        self.unique_torsion_numbers = self.torsion_list[self.unique_torsion_indexes]
258        """array with numbers of unique torsion names"""
259        if len(self.torsion_keys) > 0:
260            assert np.array_equal(
261                self.unique_torsion_keys[self.unique_torsion_inverse], self.torsion_keys
262            ), "unique angle in inverse doesnt work"
263
264        self.unique_atom_pair_names = []
265        self.unique_atom_pair_keys = []
266        for i, a0 in enumerate(self.unique_atom_names):
267            for j, a1 in enumerate(self.unique_atom_names):
268                if i > j:
269                    self.unique_atom_pair_names.append(sort_force_fields([a0, a1]))
270                    self.unique_atom_pair_keys.append(
271                        make_graph(self.unique_atom_pair_names[-1])
272                    )
273        self.unique_atom_pair_names = np.array(self.unique_atom_pair_names)
274        """array with unique atom pair names. use them for combining rules."""
275        self.unique_atom_pair_keys = np.array(self.unique_atom_pair_keys)
276        """array with unique atom pair keys. use them for combining rules."""
277        return
278
279    def reinit_mol(self,mol):
280        """
281        reinitializes moleculeclass
282        
283        """
284        
285        self.molstring = mol
286        if isinstance(mol, str):
287            mol = mol[1:-1].split("][")
288        
289        self.molecule = np.array(mol)
290        """array representation of the molecule i.e. all keys/words/whatever in a list"""
291        self.f = np.zeros(len(mol))  # shows function
292        """array with all functions. 
293        - branches pointing forward f > 0
294        - rings pointing backward f < 0
295        - beads without function f = 0
296        """
297        self.n = -1 * np.ones(len(mol))  # shows atomnumber
298        """array with atom numbers. branches and rings n = -1 as they are no atoms"""        
299        self.i = np.arange(len(mol))  # shows index
300        """array with indexes. atoms, branches and rings get an index"""         
301        self.len = len(self.i)
302        """number of all elements i.e. atoms, branches and rings"""       
303        n = 0
304        for i, m in enumerate(mol):
305            if m[0] == "b":
306                self.f[i] = int(m[1:])
307            elif m[0] == "r":
308                # dum = -int(m[1:])
309                self.f[i] = -int(m[1:])  # self.get_atom_index()
310            else:
311                self.n[i] = n
312                n += 1    
313        return    
314    
315    
316    def get_neighbour_list(self):
317        """
318        generates a neighbour list from an initialized molecule
319
320        Parameters: None
321
322        Returns:
323        - neighbour_list:
324            - list of neighboured atoms containing their atom numbers.
325        - idx_neighbour_list:
326            - list of neighboured atoms containing their atom indexes.
327        """ 
328
329        idx_neighbour_list = []
330        idx_branch_ends = np.empty(0)
331        idx_branch_starts = np.empty(0)
332        idx_ring_close = np.empty(0)
333        idx_branch_funs = np.empty(0)
334
335        branches = np.zeros(len(self.i))
336        branch_origins = np.zeros(len(self.i))
337        branch_count = 1
338
339        if self.atom_number < 2:
340            self.idx_neighbour_list = np.array([])
341            self.neighbour_list = np.array([])
342            self.idx_bond_list = np.array([])
343            self.bond_list = np.array([])
344            return np.array([]), np.array([])
345
346        """
347        find branches and assign numbers.
348        get main path through molecule.
349        build molecule from this...
350        """
351        inv_f = self.f[::-1]
352        branch_no = 1
353        inv_f_p = np.atleast_1d(np.squeeze(np.where(inv_f != 0)))
354        inv_branches = inv_f.copy()
355        inv_branches[inv_branches != 0] = -1
356        for i, ifi in zip(inv_f_p, inv_f[inv_f_p]):
357            if ifi < 0:  # ring
358                inv_branches[i] = -1
359            if ifi > 0:  # branch :)
360                pp = np.atleast_1d(np.squeeze(np.where(inv_branches[:i] == 0)))
361                if len(pp) > ifi:
362                    p_branch = pp[-int(ifi) :]
363                    inv_branches[p_branch] = branch_no
364                    inv_branches[i] = branch_no
365                    branch_no += 1
366        branches = reenumerate_branches(inv_branches[::-1])
367        # print(branches)
368
369        main_path_p = np.where(branches == 0)
370        main_path_i = self.i[main_path_p]
371
372        # get bond list of main path
373        bond_list = bond_list_from_simple_path(main_path_i)
374        # print( "branches",branches )
375
376        """
377        build bond list
378        """
379        f_p = np.atleast_1d(np.squeeze(np.where(self.f != 0)))
380        self.ring_root_indexes = []
381        # print("f_p , self.f[f_p]", f_p, self.f[f_p])
382        for i, fi in zip(f_p, self.f[f_p]):
383            #print(branches)
384            if fi < 0:
385                idx = np.max(
386                    get_diff_in_bond_lists(f_p[f_p < i], np.arange(i))
387                )  # index of last atom
388                subgraph = graph_from_bonds(bond_list)
389                path_to_main = get_shortest_path(subgraph, 0, idx)
390
391                bond_list_to_main = bond_list_from_simple_path(path_to_main)
392                graph_to_main = graph_from_bonds(bond_list_to_main)
393                root = get_root(graph_to_main, idx, int(np.abs(fi)))
394                branches[i] = branches[root] # connect ring to acompanying branch
395                if root < 0 and self.ring_point_to_first:
396                    if fi+root >= self.min_ring_size:
397                        root = np.min(self.atom_indexes)
398                        bond_list = np.concatenate([bond_list, [[root, idx]]])
399                        self.ring_root_indexes.append(root)                    
400                        print("root of ring outside molecule. point to first atom.")
401                    else:
402                        print( "ring size smaller:",self.min_ring_size,". ignore")
403                elif root < 0:
404                    print("root of ring outside molecule. ignore.")
405                else:
406                    bond_list = np.concatenate([bond_list, [[root, idx]]])
407                    self.ring_root_indexes.append(root)
408            elif fi > 0:
409                link = get_branch_root(branches[i], branches)
410                if not self.is_atom(link):
411                    link = self.get_last_atom_idx(link)
412                branch = branches[i]
413                p_subchain_i = np.atleast_1d(np.squeeze(np.where(branches == branch)))[
414                    1:
415                ]
416                if len(p_subchain_i) < fi and self.branch_point_to_last:
417                    link = np.max(self.atom_indexes)
418                    p_subchain_i = np.concatenate([[link], p_subchain_i])
419                    subchain_bond_list = bond_list_from_simple_path(p_subchain_i)
420                    bond_list = np.concatenate([bond_list, subchain_bond_list])                    
421                    print("end of branch outside molecule. point to last atom.")
422                elif len(p_subchain_i) < fi:
423                    print("end of branch outside molecule. ignore.")
424                else:
425                    p_subchain_i = np.concatenate([[link], p_subchain_i])
426                    subchain_bond_list = bond_list_from_simple_path(p_subchain_i)
427                    bond_list = np.concatenate([bond_list, subchain_bond_list])
428            else:
429                print("fi = 0... this should not happen :P")
430        #print("fin branches", branches)
431        self.idx_neighbour_list = bond_list
432        self.idx_bond_list = bond_list
433        self.ring_root_indexes = np.array(self.ring_root_indexes)
434        
435        self.neighbour_list = np.vectorize(self.get_atom_no)(self.idx_neighbour_list)
436        self.bond_list = self.neighbour_list
437        return self.neighbour_list, self.idx_neighbour_list
438
439    def get_bond_matrix(self):
440        """
441        generates bond matrix from an initialized molecule
442
443        Parameters: None
444
445        Returns:
446        - bond_matrix:
447            - bond matrix of you molecule
448        """
449        self.bond_matrix = get_bond_matrix(self.neighbour_list, self.atom_number)
450        return self.bond_matrix.copy()
451
452    def get_distance_matrix(self):
453        """
454        generates distance matrix from an initialized molecule.
455        Furthermore different angle and torsion lists are generated.
456
457        Parameters: None
458        
459        Returns:
460        - distance_matrix:
461            - distance matrix of you molecule
462        """
463        self.distance_matrix, self.bond_lists = get_distance_matrix(
464            self.neighbour_list, self.atom_number
465        )
466        self.bond_list = self.bond_lists[0]
467        self.bond_names = np.array(
468            [sort_force_fields(self.atom_names[a]) for a in self.bond_list]
469        )
470        self.bond_keys = np.array([make_graph(t) for t in self.bond_names])
471        if len(self.bond_lists) > 1:
472            self.angle_list = np.array(self.bond_lists[1])
473            self.angle_names = np.array(
474                [sort_force_fields(self.atom_names[a]) for a in self.angle_list]
475            )
476            self.angle_keys = np.array([make_graph(t) for t in self.angle_names])
477        else:
478            self.angle_list = np.array([])
479            self.angle_names = np.array([])
480            self.angle_keys = np.array([])
481        if len(self.bond_lists) > 2:
482            self.torsion_list = np.array(self.bond_lists[2])
483            self.torsion_names = np.array(
484                [sort_force_fields(self.atom_names[a]) for a in self.torsion_list]
485            )
486            self.torsion_keys = np.array([make_graph(t) for t in self.torsion_names])
487        else:
488            self.torsion_list = np.array([])
489            self.torsion_names = np.array([])
490            self.torsion_keys = np.array([])
491        return self.distance_matrix.copy()
492    
493    def is_atom(self,idx):
494        """
495        checks if index is an atom.
496        - Number: atom number
497        - Index:  index in list representation of molecule.
498        Number and index are differing due to functionals in the list.
499
500        Parameters:
501        - idx:
502            - idx of the atom
503            
504        Returns:
505        - boolean
506        """
507        if idx in self.atom_indexes:
508            return True
509        else:
510            return False
511
512    def get_atom_index(self, no):
513        """
514        returns index of atom.
515        - Number: atom number
516        - Index:  index in list representation of molecule.
517        Number and index are differing due to functionals in the list.
518
519        Parameters:
520        - no:
521           - number of the atom
522           
523        Returns:
524        - index of atom number "no"
525        """
526        return int(self.atom_indexes[int(no)])
527
528    def get_atom_no(self, idx):
529        """
530        returns index of atom.
531        - Number: atom number
532        - Index:  index in list representation of molecule.
533        Number and index are differing due to functionals in the list.
534
535        Parameters:
536        - idx:
537            - idx of the atom
538            
539        Returns:
540        - number of atom with index
541        """
542        idx = int(idx)
543        if self.f[idx] >= 0:
544            no = int(np.squeeze(np.where(self.atom_indexes == idx)))
545            assert no == self.n[idx], "atom numbers dont work,idx"
546            return no
547        else:
548            assert self.n[idx] == -1, "atom numbers dont work, n=-1"
549            print("Error in get_atom_no: not an atom",idx)
550            return False
551
552    def get_next_atom_idx(self, idx):
553        """
554        returns index of the next atom after index idx.
555        Index:  index in list representation of molecule.
556        Number and index are differing due to functionals in the list.
557
558        Parameters:
559        idx:
560            - index in list representation of molecule
561            
562        Returns:
563        - index of the next atom after index idx
564        """
565        return int(np.min(self.atom_indexes[(self.atom_indexes - idx) > 0]))
566
567    def get_last_atom_idx(self, idx):
568        """
569        returns index of the atom before index idx.
570        Index:  index in list representation of molecule.
571        Number and index are differing due to functionals in the list.
572
573        Parameters:
574        idx:
575            - index in list representation of molecule
576            
577        Returns:
578        - index of the atom before index idx
579        """
580        return int(np.max(self.atom_indexes[(self.atom_indexes - idx) < 0]))
581
582    def get_next_atom_no(self, idx):
583        """
584        returns number of the atom after index idx.
585        Index:  index in list representation of molecule.
586        Number: atom number
587        Number and index are differing due to functionals in the list.
588
589        Parameters:
590        - idx:
591            - index in list representation of molecule
592            
593        Returns:
594        - number of the atom after index idx
595        """
596        return int(self.n[self.get_next_atom_idx(idx)])
597
598    def get_last_atom_no(self, idx):
599        """
600        returns number of the atom before index idx.
601        Index:  index in list representation of molecule.
602        Number: atom number
603        Number and index are differing due to functionals in the list.
604
605        Parameters:
606            self:
607                - initialized molecule.
608            idx:
609                - index in list representation of molecule
610                
611        Returns:
612            number of the atom before index idx
613        """
614        return int(self.n[self.get_last_atom_idx(idx)])
615
616    def map_molecule_via_atom_names(self, data):
617        """
618        maps information from a dictionary onto the molecule.
619        Use this for molecule information i.e. coordinates, group contributions,...
620
621        Parameters:
622        - data:
623            - dictionary with: keys == atom names !!!
624            
625        Returns:
626        - mapped data
627        """
628        return [*map(data.get, self.atom_names)]
629
630    def map_unique_via_atom_names(self, data):
631        """
632        Maps information from a dictionary onto unique elements of the molecule.
633        Preserves the order of the original list.
634        Use this for force-field information i.e. potentials,...
635
636        Parameters:
637        - data:
638            - dictionary with: keys == atom names !!!
639            
640        Returns:
641        - mapped unique data
642        """
643        print(
644            "map_unique_via_atom_names is outtdated... dont use\n shouldnt cause errors btw"
645        )
646        return [*map(data.get, self.unique_atom_names)]
647
648    def map_molecule(self, goal, data):
649        """
650        maps information from a dictionary onto a goal.
651        Use this for molecule information i.e. coordinates, group contributions,...
652
653
654        Parameters:
655        - goal:
656            - list of keys
657        - data:
658            - dictionary with keys in goal
659            
660        Returns:
661        - mapped data
662        """
663        return [*map(data.get, goal)]
664
665    def map_unique(self, goal, data):
666        """
667        Maps information from a dictionary onto unique elements a goal.
668        Preserves the order of the original list.
669        Use this for force-field information i.e. potentials,...
670
671        Parameters:
672        - goal:
673            - list of keys
674        - data:
675            - dictionary with keys in goal
676            
677        Returns:
678        - mapped unique data
679        """
680        print("map_unique is outtdated... dont use\n shouldnt cause errors btw")
681        return [*map(data.get, unique_sort(goal))]
682
683    def map_nested(self, goal, data):
684        """
685        Maps information from a dictionary onto nested elements of a goal.
686        Preserves the order of the original list.
687        Use this for advanced force-field information i.e. mixing rules,...
688
689        same as:
690        [ self.map_unique( m, data) for m in goal ]
691
692        Parameters:
693        - goal:
694            -with list list of keys
695        - data:
696            -dictionary with keys in goal
697            
698        Returns:
699        - mapped unique data
700        """
701        return [[*map(data.get, m)] for m in goal]
702
703    def get_distance(self, x, y):
704        """
705        Returns distance between atoms no x and y
706        0: same atom i.e. x==y
707        1: bond
708        2: angle
709        3: torsion
710
711        Parameters:
712        - x,y:
713            - atom numbers
714            
715        Returns:
716        - distance
717        """
718        return self.distance_matrix[int(x)][int(y)]
719
720    def get_sorted_key(self, x):
721        """
722        Sorts a list alphabetically.
723        Very important for dictionary keys in the molecule class!!!
724
725        Parameters:
726        - x:
727            - string list to sort
728            
729        Returns:
730        - key (string) and sorted list
731        """
732        x_sort = sort_force_fields(x)
733        return make_graph(x_sort), x_sort
734
735    def visualize(self, saveto=""):
736        """
737        plots and visualizes molecule
738        
739        Parameters:
740        - saveto:
741            - optional, location to save pic of graph to,
742            - add file format at the end ;)
743            
744        Returns:
745            nothing
746        """
747        if self.atom_numbers.size > 1:
748            plot_graph(self.atom_names, self.bond_list, saveto=saveto)
749        else:
750            print("one bead only")
751        return

simple undirected graph representation for molecules and more.

write code about molecules without the need to care about them.

syntax only. Bring your own semantics.

Main results are:

  • distance matrix of the molecule.
  • atom, bond, angle, torsion lists.

YOU can use them to:

  • map force-field information onto your molecule.
  • generate inputs for templating trajectories and coordinates
  • use group contribution approaches
  • process coordinates from the PubChem-API

Furthermore:

  • this graph-representation is quite general and not limited to molecules.
  • ...you might use it to represent your favorite baking recipes and optimize them.
molecule( molstring, splitter=<function split_molstring>, get_syntax=<function get_syntax_from_numbers>, branch_point_to_last=False, ring_point_to_first=False)
 45    def __init__(self, molstring, splitter=split_molstring, get_syntax=get_syntax_from_numbers,
 46                 branch_point_to_last=False, ring_point_to_first=False,
 47                ):
 48        """
 49        initializes molecules based on graph-list representation.
 50        
 51        The term index (or idx) always refers to a character in the string, 
 52        be it semantic or syntactic. Semantics and syntax of the string are 
 53        no longer important once the bond list and thus the actual graph is 
 54        created. Then there are only semantic objects, i.e. atoms.
 55        Thus, all numbers occuring in bond, angle or torsional lists or in
 56        distance matrices refer to atomic numbers. Indexes and syntactic lists 
 57        are only important for advanced applications. For example, generating
 58        strings with the same meaning. If both options are available they are
 59        are marked with index/indexes/idx and number/numbers. For example:
 60        ring_root_numbers and ring_root_indexes.
 61
 62        Parameters:
 63        - mol:
 64            - string representation of a molecule.
 65        - splitter:
 66            - function that splits input into syntactic/semantic elements
 67            - returns np.array with all elements
 68            - default: split_molstring from molecule_utils
 69            - you can pass your own function :)
 70        - get_syntax:
 71            - function that generates atom and functional descriptors from 
 72              syntactic/semantic elements
 73            - returns two np.arrays:
 74                -ff: functions array
 75                    - branches pointing forward f > 0
 76                    - rings pointing backward f < 0
 77                    - beads without function f = 0 
 78                -nn: atoms array
 79                    - contains atom numbers
 80                    - functions are -1
 81            - default: get_syntax_from_numbers from molecule_utils   
 82            - you can pass your own function :)
 83        - branch_point_to_last:
 84            - boolean, False 
 85            - comes into play when branch points outside molecule
 86            - if True branch will point to last atom
 87        - ring_point_to_first:
 88            - boolean, False
 89            - comes into play when ring points outside molecule
 90            - if True ring will point to first atom
 91        
 92        Returns:
 93            nothing
 94        """
 95        
 96        self.branch_point_to_last= branch_point_to_last
 97        """boolean if True branch that points outside molecule will point to last atom"""
 98        self.ring_point_to_first = ring_point_to_first
 99        """boolean if True ring that points outside molecule will point to first atom"""
100        
101        mol           = np.array( splitter(molstring) )
102        self.molecule = np.array( splitter(molstring) )
103        """array representation of the molecule i.e. all keys/words/whatever in a list"""
104        
105        syntactic_elements = get_syntax(mol)
106        
107        self.f = np.array( syntactic_elements[0] ).astype(int)  # shows function
108        """array with all functions. 
109        - branches pointing forward f > 0
110        - rings pointing backward f < 0
111        - beads without function f = 0
112        """
113        self.n = np.array( syntactic_elements[1] ).astype(int)  # shows atomnumber
114        """array with atom numbers. branches and rings n = -1 as they are no atoms"""        
115        self.i = np.arange(len(mol))  # shows index
116        """array with indexes. atoms, branches and rings get an index"""         
117        self.len = len(self.i)
118        """number of all elements i.e. atoms, branches and rings"""       
119
120        # contains indexes of atoms
121        # index of atom no n: self.atom_indexes[n]
122        self.atom_indexes = self.i[self.f == 0]
123        """array with indexes of all atoms"""
124        self.atom_names = self.molecule[self.f == 0]
125        """array with names of all atoms"""
126        self.atom_keys = np.array( [ "["+x+"]" for x in self.atom_names ] )
127        """array with keys of all atoms"""        
128        self.atom_number = len(self.atom_indexes)
129        """number of all atoms"""
130        self.atom_numbers = np.arange(self.atom_number)
131        """array with numbers of all atoms"""
132        
133        # housekeeping for get_neighbour_list (will be called later)
134        # (the following things are generated when calling get_neighbour_list)
135        self.idx_neighbour_list = np.array([])
136        """array with indexes of all neighboured/bonded atoms"""
137        self.neighbour_list = np.array([])
138        """array with numbers of all neighboured/bonded atoms"""
139        self.ring_root_indexes = np.array([])
140        """array with indexes of all atoms rooting a ring"""
141       
142        
143        # housekeeping for get_distance_matrix (will be called later)
144        # (the following things are generated when calling get_distance_matrix)
145        self.distance_matrix = np.array([])
146        """distance matrix of all atoms in the molecule"""
147        self.bond_lists = np.array([])
148        """nested array with bond lists 
149        - starting from atoms separeted by one bond bond_lists[0]
150        - then atoms separeted by two bonds bond_lists[1] 
151        - ...
152        """
153        self.idx_bond_list = np.array([])
154        """array with indexes of all neighboured/bonded atoms"""
155        self.bond_list = np.array([])  
156        """array with numbers of all neighboured/bonded atoms"""
157        self.bond_keys  = np.array([])
158        """array with keys of all neighboured/bonded atoms"""
159        self.angle_list = np.array([])
160        """array of numbers of all angle forming atoms (separated by 2 bonds)"""
161        self.angle_names = np.array([])
162        """array of names of all angle forming atoms (separated by 2 bonds)"""
163        self.angle_keys = np.array([])
164        """array of keys of all angle forming atoms (separated by 2 bonds)"""
165        self.torsion_list = np.array([])
166        """array of numbers of all dihedral forming atoms (separated by 3 bonds)"""
167        self.torsion_names = np.array([])
168        """array of names of all dihedral forming atoms (separated by 3 bonds)"""
169        self.torsion_keys = np.array([])        
170        """array of keys of all dihedral forming atoms (separated by 3 bonds)"""
171
172        # get bond list
173        self.get_neighbour_list()        
174        
175        # get branch ends
176        n, count = np.unique(self.bond_list, return_counts=True)
177        pp = np.squeeze(np.where(count == 1))
178        if np.sum(pp) > 0:
179            self.branch_end_numbers = n[pp]
180            self.branch_end_indexes = self.atom_indexes[self.branch_end_numbers]
181        else:
182            self.branch_end_numbers = np.empty(0)
183            self.branch_end_indexes = np.empty(0)
184        # get ring closures
185        self.ring_close_indexes = np.squeeze(np.where(self.f < 0)) - 1
186        """array with indexes of all atoms closing a ring"""
187        if np.sum(self.ring_close_indexes.shape) > 0:
188            self.ring_close_numbers = self.n[self.ring_close_indexes]
189            """array with atom numbers of all atoms closing a ring"""
190            self.ring_root_numbers = self.n[self.ring_root_indexes]
191            """array with atom numbers of all atoms rooting a ring"""
192        else:
193            self.ring_close_numbers = np.empty(0)
194            self.ring_root_numbers = np.empty(0)
195
196        # get distance matrix
197        self.get_distance_matrix()
198
199        dummy = unique_sort(self.atom_names, return_inverse=True)
200        (
201            self.unique_atom_keys,
202            self.unique_atom_indexes,
203            self.unique_atom_inverse,
204        ) = dummy
205        self.unique_atom_names = self.atom_names[self.unique_atom_indexes]
206        """array with unique atom names"""
207        self.unique_atom_numbers = self.atom_numbers[self.unique_atom_indexes]
208        """array with numbers of unique atom names"""
209        assert np.array_equal(
210            self.unique_atom_keys[self.unique_atom_inverse], self.atom_names
211        ), "unique atom in inverse doesnt work"
212
213        dummy = unique_sort(self.bond_keys, return_inverse=True)
214        self.unique_bond_keys = dummy[0]
215        """array with unique bond keys"""
216        self.unique_bond_indexes = dummy[1]
217        """array with unique bond indexes"""
218        self.unique_bond_inverse = dummy[2]
219        """array to invert unique bonds"""
220
221        self.unique_bond_names = self.bond_names[self.unique_bond_indexes]
222        """array with unique bond names"""
223        self.unique_bond_numbers = self.bond_list[self.unique_bond_indexes]
224        """array with numbers of unique bond names"""
225        if len(self.bond_keys) > 0:
226            assert np.array_equal(
227                self.unique_bond_keys[self.unique_bond_inverse], self.bond_keys
228            ), "unique bond in inverse doesnt work"
229
230        dummy = unique_sort(self.angle_keys, return_inverse=True)
231        self.unique_angle_keys = dummy[0]
232        """array with unique angle keys"""
233        self.unique_angle_indexes = dummy[1]
234        """array with unique angle indexes"""
235        self.unique_angle_inverse = dummy[2]
236        """array to invert unique angles"""
237
238        self.unique_angle_names = self.angle_names[self.unique_angle_indexes]
239        """array with unique angle names"""
240        self.unique_angle_numbers = self.angle_list[self.unique_angle_indexes]
241        """array with numbers of unique angle names"""
242        if len(self.angle_keys) > 0:
243            assert np.array_equal(
244                self.unique_angle_keys[self.unique_angle_inverse], self.angle_keys
245            ), "unique angle in inverse doesnt work"
246
247        dummy = unique_sort(self.torsion_keys, return_inverse=True)
248        self.unique_torsion_keys = dummy[0]
249        """array with unique torsion keys"""
250        self.unique_torsion_indexes = dummy[1]
251        """array with unique torsion indexes"""
252        self.unique_torsion_inverse = dummy[2]
253        """array to invert unique torsions"""
254        
255        self.unique_torsion_names = self.torsion_names[self.unique_torsion_indexes]
256        """array with unique torsion names"""
257        self.unique_torsion_numbers = self.torsion_list[self.unique_torsion_indexes]
258        """array with numbers of unique torsion names"""
259        if len(self.torsion_keys) > 0:
260            assert np.array_equal(
261                self.unique_torsion_keys[self.unique_torsion_inverse], self.torsion_keys
262            ), "unique angle in inverse doesnt work"
263
264        self.unique_atom_pair_names = []
265        self.unique_atom_pair_keys = []
266        for i, a0 in enumerate(self.unique_atom_names):
267            for j, a1 in enumerate(self.unique_atom_names):
268                if i > j:
269                    self.unique_atom_pair_names.append(sort_force_fields([a0, a1]))
270                    self.unique_atom_pair_keys.append(
271                        make_graph(self.unique_atom_pair_names[-1])
272                    )
273        self.unique_atom_pair_names = np.array(self.unique_atom_pair_names)
274        """array with unique atom pair names. use them for combining rules."""
275        self.unique_atom_pair_keys = np.array(self.unique_atom_pair_keys)
276        """array with unique atom pair keys. use them for combining rules."""
277        return

initializes molecules based on graph-list representation.

The term index (or idx) always refers to a character in the string, be it semantic or syntactic. Semantics and syntax of the string are no longer important once the bond list and thus the actual graph is created. Then there are only semantic objects, i.e. atoms. Thus, all numbers occuring in bond, angle or torsional lists or in distance matrices refer to atomic numbers. Indexes and syntactic lists are only important for advanced applications. For example, generating strings with the same meaning. If both options are available they are are marked with index/indexes/idx and number/numbers. For example: ring_root_numbers and ring_root_indexes.

Parameters:

  • mol:
    • string representation of a molecule.
  • splitter:
    • function that splits input into syntactic/semantic elements
    • returns np.array with all elements
    • default: split_molstring from molecule_utils
    • you can pass your own function :)
  • get_syntax:
    • function that generates atom and functional descriptors from syntactic/semantic elements
    • returns two np.arrays: -ff: functions array - branches pointing forward f > 0 - rings pointing backward f < 0 - beads without function f = 0 -nn: atoms array - contains atom numbers - functions are -1
    • default: get_syntax_from_numbers from molecule_utils
    • you can pass your own function :)
  • branch_point_to_last:
    • boolean, False
    • comes into play when branch points outside molecule
    • if True branch will point to last atom
  • ring_point_to_first:
    • boolean, False
    • comes into play when ring points outside molecule
    • if True ring will point to first atom

Returns: nothing

branch_point_to_last

boolean if True branch that points outside molecule will point to last atom

ring_point_to_first

boolean if True ring that points outside molecule will point to first atom

molecule

array representation of the molecule i.e. all keys/words/whatever in a list

f

array with all functions.

  • branches pointing forward f > 0
  • rings pointing backward f < 0
  • beads without function f = 0
n

array with atom numbers. branches and rings n = -1 as they are no atoms

i

array with indexes. atoms, branches and rings get an index

len

number of all elements i.e. atoms, branches and rings

atom_indexes

array with indexes of all atoms

atom_names

array with names of all atoms

atom_keys

array with keys of all atoms

atom_number

number of all atoms

atom_numbers

array with numbers of all atoms

idx_neighbour_list

array with indexes of all neighboured/bonded atoms

neighbour_list

array with numbers of all neighboured/bonded atoms

ring_root_indexes

array with indexes of all atoms rooting a ring

distance_matrix

distance matrix of all atoms in the molecule

bond_lists

nested array with bond lists

  • starting from atoms separeted by one bond bond_lists[0]
  • then atoms separeted by two bonds bond_lists[1]
  • ...
idx_bond_list

array with indexes of all neighboured/bonded atoms

bond_list

array with numbers of all neighboured/bonded atoms

bond_keys

array with keys of all neighboured/bonded atoms

angle_list

array of numbers of all angle forming atoms (separated by 2 bonds)

angle_names

array of names of all angle forming atoms (separated by 2 bonds)

angle_keys

array of keys of all angle forming atoms (separated by 2 bonds)

torsion_list

array of numbers of all dihedral forming atoms (separated by 3 bonds)

torsion_names

array of names of all dihedral forming atoms (separated by 3 bonds)

torsion_keys

array of keys of all dihedral forming atoms (separated by 3 bonds)

ring_close_indexes

array with indexes of all atoms closing a ring

unique_atom_names

array with unique atom names

unique_atom_numbers

array with numbers of unique atom names

unique_bond_keys

array with unique bond keys

unique_bond_indexes

array with unique bond indexes

unique_bond_inverse

array to invert unique bonds

unique_bond_names

array with unique bond names

unique_bond_numbers

array with numbers of unique bond names

unique_angle_keys

array with unique angle keys

unique_angle_indexes

array with unique angle indexes

unique_angle_inverse

array to invert unique angles

unique_angle_names

array with unique angle names

unique_angle_numbers

array with numbers of unique angle names

unique_torsion_keys

array with unique torsion keys

unique_torsion_indexes

array with unique torsion indexes

unique_torsion_inverse

array to invert unique torsions

unique_torsion_names

array with unique torsion names

unique_torsion_numbers

array with numbers of unique torsion names

unique_atom_pair_names

array with unique atom pair names. use them for combining rules.

unique_atom_pair_keys

array with unique atom pair keys. use them for combining rules.

def reinit_mol(self, mol):
279    def reinit_mol(self,mol):
280        """
281        reinitializes moleculeclass
282        
283        """
284        
285        self.molstring = mol
286        if isinstance(mol, str):
287            mol = mol[1:-1].split("][")
288        
289        self.molecule = np.array(mol)
290        """array representation of the molecule i.e. all keys/words/whatever in a list"""
291        self.f = np.zeros(len(mol))  # shows function
292        """array with all functions. 
293        - branches pointing forward f > 0
294        - rings pointing backward f < 0
295        - beads without function f = 0
296        """
297        self.n = -1 * np.ones(len(mol))  # shows atomnumber
298        """array with atom numbers. branches and rings n = -1 as they are no atoms"""        
299        self.i = np.arange(len(mol))  # shows index
300        """array with indexes. atoms, branches and rings get an index"""         
301        self.len = len(self.i)
302        """number of all elements i.e. atoms, branches and rings"""       
303        n = 0
304        for i, m in enumerate(mol):
305            if m[0] == "b":
306                self.f[i] = int(m[1:])
307            elif m[0] == "r":
308                # dum = -int(m[1:])
309                self.f[i] = -int(m[1:])  # self.get_atom_index()
310            else:
311                self.n[i] = n
312                n += 1    
313        return    

reinitializes moleculeclass

def get_neighbour_list(self):
316    def get_neighbour_list(self):
317        """
318        generates a neighbour list from an initialized molecule
319
320        Parameters: None
321
322        Returns:
323        - neighbour_list:
324            - list of neighboured atoms containing their atom numbers.
325        - idx_neighbour_list:
326            - list of neighboured atoms containing their atom indexes.
327        """ 
328
329        idx_neighbour_list = []
330        idx_branch_ends = np.empty(0)
331        idx_branch_starts = np.empty(0)
332        idx_ring_close = np.empty(0)
333        idx_branch_funs = np.empty(0)
334
335        branches = np.zeros(len(self.i))
336        branch_origins = np.zeros(len(self.i))
337        branch_count = 1
338
339        if self.atom_number < 2:
340            self.idx_neighbour_list = np.array([])
341            self.neighbour_list = np.array([])
342            self.idx_bond_list = np.array([])
343            self.bond_list = np.array([])
344            return np.array([]), np.array([])
345
346        """
347        find branches and assign numbers.
348        get main path through molecule.
349        build molecule from this...
350        """
351        inv_f = self.f[::-1]
352        branch_no = 1
353        inv_f_p = np.atleast_1d(np.squeeze(np.where(inv_f != 0)))
354        inv_branches = inv_f.copy()
355        inv_branches[inv_branches != 0] = -1
356        for i, ifi in zip(inv_f_p, inv_f[inv_f_p]):
357            if ifi < 0:  # ring
358                inv_branches[i] = -1
359            if ifi > 0:  # branch :)
360                pp = np.atleast_1d(np.squeeze(np.where(inv_branches[:i] == 0)))
361                if len(pp) > ifi:
362                    p_branch = pp[-int(ifi) :]
363                    inv_branches[p_branch] = branch_no
364                    inv_branches[i] = branch_no
365                    branch_no += 1
366        branches = reenumerate_branches(inv_branches[::-1])
367        # print(branches)
368
369        main_path_p = np.where(branches == 0)
370        main_path_i = self.i[main_path_p]
371
372        # get bond list of main path
373        bond_list = bond_list_from_simple_path(main_path_i)
374        # print( "branches",branches )
375
376        """
377        build bond list
378        """
379        f_p = np.atleast_1d(np.squeeze(np.where(self.f != 0)))
380        self.ring_root_indexes = []
381        # print("f_p , self.f[f_p]", f_p, self.f[f_p])
382        for i, fi in zip(f_p, self.f[f_p]):
383            #print(branches)
384            if fi < 0:
385                idx = np.max(
386                    get_diff_in_bond_lists(f_p[f_p < i], np.arange(i))
387                )  # index of last atom
388                subgraph = graph_from_bonds(bond_list)
389                path_to_main = get_shortest_path(subgraph, 0, idx)
390
391                bond_list_to_main = bond_list_from_simple_path(path_to_main)
392                graph_to_main = graph_from_bonds(bond_list_to_main)
393                root = get_root(graph_to_main, idx, int(np.abs(fi)))
394                branches[i] = branches[root] # connect ring to acompanying branch
395                if root < 0 and self.ring_point_to_first:
396                    if fi+root >= self.min_ring_size:
397                        root = np.min(self.atom_indexes)
398                        bond_list = np.concatenate([bond_list, [[root, idx]]])
399                        self.ring_root_indexes.append(root)                    
400                        print("root of ring outside molecule. point to first atom.")
401                    else:
402                        print( "ring size smaller:",self.min_ring_size,". ignore")
403                elif root < 0:
404                    print("root of ring outside molecule. ignore.")
405                else:
406                    bond_list = np.concatenate([bond_list, [[root, idx]]])
407                    self.ring_root_indexes.append(root)
408            elif fi > 0:
409                link = get_branch_root(branches[i], branches)
410                if not self.is_atom(link):
411                    link = self.get_last_atom_idx(link)
412                branch = branches[i]
413                p_subchain_i = np.atleast_1d(np.squeeze(np.where(branches == branch)))[
414                    1:
415                ]
416                if len(p_subchain_i) < fi and self.branch_point_to_last:
417                    link = np.max(self.atom_indexes)
418                    p_subchain_i = np.concatenate([[link], p_subchain_i])
419                    subchain_bond_list = bond_list_from_simple_path(p_subchain_i)
420                    bond_list = np.concatenate([bond_list, subchain_bond_list])                    
421                    print("end of branch outside molecule. point to last atom.")
422                elif len(p_subchain_i) < fi:
423                    print("end of branch outside molecule. ignore.")
424                else:
425                    p_subchain_i = np.concatenate([[link], p_subchain_i])
426                    subchain_bond_list = bond_list_from_simple_path(p_subchain_i)
427                    bond_list = np.concatenate([bond_list, subchain_bond_list])
428            else:
429                print("fi = 0... this should not happen :P")
430        #print("fin branches", branches)
431        self.idx_neighbour_list = bond_list
432        self.idx_bond_list = bond_list
433        self.ring_root_indexes = np.array(self.ring_root_indexes)
434        
435        self.neighbour_list = np.vectorize(self.get_atom_no)(self.idx_neighbour_list)
436        self.bond_list = self.neighbour_list
437        return self.neighbour_list, self.idx_neighbour_list

generates a neighbour list from an initialized molecule

Parameters: None

Returns:

  • neighbour_list:
    • list of neighboured atoms containing their atom numbers.
  • idx_neighbour_list:
    • list of neighboured atoms containing their atom indexes.
def get_bond_matrix(self):
439    def get_bond_matrix(self):
440        """
441        generates bond matrix from an initialized molecule
442
443        Parameters: None
444
445        Returns:
446        - bond_matrix:
447            - bond matrix of you molecule
448        """
449        self.bond_matrix = get_bond_matrix(self.neighbour_list, self.atom_number)
450        return self.bond_matrix.copy()

generates bond matrix from an initialized molecule

Parameters: None

Returns:

  • bond_matrix:
    • bond matrix of you molecule
def get_distance_matrix(self):
452    def get_distance_matrix(self):
453        """
454        generates distance matrix from an initialized molecule.
455        Furthermore different angle and torsion lists are generated.
456
457        Parameters: None
458        
459        Returns:
460        - distance_matrix:
461            - distance matrix of you molecule
462        """
463        self.distance_matrix, self.bond_lists = get_distance_matrix(
464            self.neighbour_list, self.atom_number
465        )
466        self.bond_list = self.bond_lists[0]
467        self.bond_names = np.array(
468            [sort_force_fields(self.atom_names[a]) for a in self.bond_list]
469        )
470        self.bond_keys = np.array([make_graph(t) for t in self.bond_names])
471        if len(self.bond_lists) > 1:
472            self.angle_list = np.array(self.bond_lists[1])
473            self.angle_names = np.array(
474                [sort_force_fields(self.atom_names[a]) for a in self.angle_list]
475            )
476            self.angle_keys = np.array([make_graph(t) for t in self.angle_names])
477        else:
478            self.angle_list = np.array([])
479            self.angle_names = np.array([])
480            self.angle_keys = np.array([])
481        if len(self.bond_lists) > 2:
482            self.torsion_list = np.array(self.bond_lists[2])
483            self.torsion_names = np.array(
484                [sort_force_fields(self.atom_names[a]) for a in self.torsion_list]
485            )
486            self.torsion_keys = np.array([make_graph(t) for t in self.torsion_names])
487        else:
488            self.torsion_list = np.array([])
489            self.torsion_names = np.array([])
490            self.torsion_keys = np.array([])
491        return self.distance_matrix.copy()

generates distance matrix from an initialized molecule. Furthermore different angle and torsion lists are generated.

Parameters: None

Returns:

  • distance_matrix:
    • distance matrix of you molecule
def is_atom(self, idx):
493    def is_atom(self,idx):
494        """
495        checks if index is an atom.
496        - Number: atom number
497        - Index:  index in list representation of molecule.
498        Number and index are differing due to functionals in the list.
499
500        Parameters:
501        - idx:
502            - idx of the atom
503            
504        Returns:
505        - boolean
506        """
507        if idx in self.atom_indexes:
508            return True
509        else:
510            return False

checks if index is an atom.

  • Number: atom number
  • Index: index in list representation of molecule. Number and index are differing due to functionals in the list.

Parameters:

  • idx:
    • idx of the atom

Returns:

  • boolean
def get_atom_index(self, no):
512    def get_atom_index(self, no):
513        """
514        returns index of atom.
515        - Number: atom number
516        - Index:  index in list representation of molecule.
517        Number and index are differing due to functionals in the list.
518
519        Parameters:
520        - no:
521           - number of the atom
522           
523        Returns:
524        - index of atom number "no"
525        """
526        return int(self.atom_indexes[int(no)])

returns index of atom.

  • Number: atom number
  • Index: index in list representation of molecule. Number and index are differing due to functionals in the list.

Parameters:

  • no:
    • number of the atom

Returns:

  • index of atom number "no"
def get_atom_no(self, idx):
528    def get_atom_no(self, idx):
529        """
530        returns index of atom.
531        - Number: atom number
532        - Index:  index in list representation of molecule.
533        Number and index are differing due to functionals in the list.
534
535        Parameters:
536        - idx:
537            - idx of the atom
538            
539        Returns:
540        - number of atom with index
541        """
542        idx = int(idx)
543        if self.f[idx] >= 0:
544            no = int(np.squeeze(np.where(self.atom_indexes == idx)))
545            assert no == self.n[idx], "atom numbers dont work,idx"
546            return no
547        else:
548            assert self.n[idx] == -1, "atom numbers dont work, n=-1"
549            print("Error in get_atom_no: not an atom",idx)
550            return False

returns index of atom.

  • Number: atom number
  • Index: index in list representation of molecule. Number and index are differing due to functionals in the list.

Parameters:

  • idx:
    • idx of the atom

Returns:

  • number of atom with index
def get_next_atom_idx(self, idx):
552    def get_next_atom_idx(self, idx):
553        """
554        returns index of the next atom after index idx.
555        Index:  index in list representation of molecule.
556        Number and index are differing due to functionals in the list.
557
558        Parameters:
559        idx:
560            - index in list representation of molecule
561            
562        Returns:
563        - index of the next atom after index idx
564        """
565        return int(np.min(self.atom_indexes[(self.atom_indexes - idx) > 0]))

returns index of the next atom after index idx. Index: index in list representation of molecule. Number and index are differing due to functionals in the list.

Parameters: idx: - index in list representation of molecule

Returns:

  • index of the next atom after index idx
def get_last_atom_idx(self, idx):
567    def get_last_atom_idx(self, idx):
568        """
569        returns index of the atom before index idx.
570        Index:  index in list representation of molecule.
571        Number and index are differing due to functionals in the list.
572
573        Parameters:
574        idx:
575            - index in list representation of molecule
576            
577        Returns:
578        - index of the atom before index idx
579        """
580        return int(np.max(self.atom_indexes[(self.atom_indexes - idx) < 0]))

returns index of the atom before index idx. Index: index in list representation of molecule. Number and index are differing due to functionals in the list.

Parameters: idx: - index in list representation of molecule

Returns:

  • index of the atom before index idx
def get_next_atom_no(self, idx):
582    def get_next_atom_no(self, idx):
583        """
584        returns number of the atom after index idx.
585        Index:  index in list representation of molecule.
586        Number: atom number
587        Number and index are differing due to functionals in the list.
588
589        Parameters:
590        - idx:
591            - index in list representation of molecule
592            
593        Returns:
594        - number of the atom after index idx
595        """
596        return int(self.n[self.get_next_atom_idx(idx)])

returns number of the atom after index idx. Index: index in list representation of molecule. Number: atom number Number and index are differing due to functionals in the list.

Parameters:

  • idx:
    • index in list representation of molecule

Returns:

  • number of the atom after index idx
def get_last_atom_no(self, idx):
598    def get_last_atom_no(self, idx):
599        """
600        returns number of the atom before index idx.
601        Index:  index in list representation of molecule.
602        Number: atom number
603        Number and index are differing due to functionals in the list.
604
605        Parameters:
606            self:
607                - initialized molecule.
608            idx:
609                - index in list representation of molecule
610                
611        Returns:
612            number of the atom before index idx
613        """
614        return int(self.n[self.get_last_atom_idx(idx)])

returns number of the atom before index idx. Index: index in list representation of molecule. Number: atom number Number and index are differing due to functionals in the list.

Parameters: self: - initialized molecule. idx: - index in list representation of molecule

Returns: number of the atom before index idx

def map_molecule_via_atom_names(self, data):
616    def map_molecule_via_atom_names(self, data):
617        """
618        maps information from a dictionary onto the molecule.
619        Use this for molecule information i.e. coordinates, group contributions,...
620
621        Parameters:
622        - data:
623            - dictionary with: keys == atom names !!!
624            
625        Returns:
626        - mapped data
627        """
628        return [*map(data.get, self.atom_names)]

maps information from a dictionary onto the molecule. Use this for molecule information i.e. coordinates, group contributions,...

Parameters:

  • data:
    • dictionary with: keys == atom names !!!

Returns:

  • mapped data
def map_unique_via_atom_names(self, data):
630    def map_unique_via_atom_names(self, data):
631        """
632        Maps information from a dictionary onto unique elements of the molecule.
633        Preserves the order of the original list.
634        Use this for force-field information i.e. potentials,...
635
636        Parameters:
637        - data:
638            - dictionary with: keys == atom names !!!
639            
640        Returns:
641        - mapped unique data
642        """
643        print(
644            "map_unique_via_atom_names is outtdated... dont use\n shouldnt cause errors btw"
645        )
646        return [*map(data.get, self.unique_atom_names)]

Maps information from a dictionary onto unique elements of the molecule. Preserves the order of the original list. Use this for force-field information i.e. potentials,...

Parameters:

  • data:
    • dictionary with: keys == atom names !!!

Returns:

  • mapped unique data
def map_molecule(self, goal, data):
648    def map_molecule(self, goal, data):
649        """
650        maps information from a dictionary onto a goal.
651        Use this for molecule information i.e. coordinates, group contributions,...
652
653
654        Parameters:
655        - goal:
656            - list of keys
657        - data:
658            - dictionary with keys in goal
659            
660        Returns:
661        - mapped data
662        """
663        return [*map(data.get, goal)]

maps information from a dictionary onto a goal. Use this for molecule information i.e. coordinates, group contributions,...

Parameters:

  • goal:
    • list of keys
  • data:
    • dictionary with keys in goal

Returns:

  • mapped data
def map_unique(self, goal, data):
665    def map_unique(self, goal, data):
666        """
667        Maps information from a dictionary onto unique elements a goal.
668        Preserves the order of the original list.
669        Use this for force-field information i.e. potentials,...
670
671        Parameters:
672        - goal:
673            - list of keys
674        - data:
675            - dictionary with keys in goal
676            
677        Returns:
678        - mapped unique data
679        """
680        print("map_unique is outtdated... dont use\n shouldnt cause errors btw")
681        return [*map(data.get, unique_sort(goal))]

Maps information from a dictionary onto unique elements a goal. Preserves the order of the original list. Use this for force-field information i.e. potentials,...

Parameters:

  • goal:
    • list of keys
  • data:
    • dictionary with keys in goal

Returns:

  • mapped unique data
def map_nested(self, goal, data):
683    def map_nested(self, goal, data):
684        """
685        Maps information from a dictionary onto nested elements of a goal.
686        Preserves the order of the original list.
687        Use this for advanced force-field information i.e. mixing rules,...
688
689        same as:
690        [ self.map_unique( m, data) for m in goal ]
691
692        Parameters:
693        - goal:
694            -with list list of keys
695        - data:
696            -dictionary with keys in goal
697            
698        Returns:
699        - mapped unique data
700        """
701        return [[*map(data.get, m)] for m in goal]

Maps information from a dictionary onto nested elements of a goal. Preserves the order of the original list. Use this for advanced force-field information i.e. mixing rules,...

same as: [ self.map_unique( m, data) for m in goal ]

Parameters:

  • goal: -with list list of keys
  • data: -dictionary with keys in goal

Returns:

  • mapped unique data
def get_distance(self, x, y):
703    def get_distance(self, x, y):
704        """
705        Returns distance between atoms no x and y
706        0: same atom i.e. x==y
707        1: bond
708        2: angle
709        3: torsion
710
711        Parameters:
712        - x,y:
713            - atom numbers
714            
715        Returns:
716        - distance
717        """
718        return self.distance_matrix[int(x)][int(y)]

Returns distance between atoms no x and y 0: same atom i.e. x==y 1: bond 2: angle 3: torsion

Parameters:

  • x,y:
    • atom numbers

Returns:

  • distance
def get_sorted_key(self, x):
720    def get_sorted_key(self, x):
721        """
722        Sorts a list alphabetically.
723        Very important for dictionary keys in the molecule class!!!
724
725        Parameters:
726        - x:
727            - string list to sort
728            
729        Returns:
730        - key (string) and sorted list
731        """
732        x_sort = sort_force_fields(x)
733        return make_graph(x_sort), x_sort

Sorts a list alphabetically. Very important for dictionary keys in the molecule class!!!

Parameters:

  • x:
    • string list to sort

Returns:

  • key (string) and sorted list
def visualize(self, saveto=''):
735    def visualize(self, saveto=""):
736        """
737        plots and visualizes molecule
738        
739        Parameters:
740        - saveto:
741            - optional, location to save pic of graph to,
742            - add file format at the end ;)
743            
744        Returns:
745            nothing
746        """
747        if self.atom_numbers.size > 1:
748            plot_graph(self.atom_names, self.bond_list, saveto=saveto)
749        else:
750            print("one bead only")
751        return

plots and visualizes molecule

Parameters:

  • saveto:
    • optional, location to save pic of graph to,
    • add file format at the end ;)

Returns: nothing