moleculegraph
1from .general_utils import * 2from .molecule_utils import * 3 4import numpy as np 5import csv 6import re 7# from rapidfuzz.string_metric import normalized_levenshtein 8import json 9 10""" 11moleculegraph 12 13A graph representation for molecules and everything else for data mapping 14""" 15 16__version__ = "0.0.0" 17__author__ = "Maximilian Fleck" 18__credits__ = "ITT Uni Stuttgart" 19 20class molecule: 21 """ 22 simple undirected graph representation for molecules and more. 23 24 write code about molecules without the need to care about them. 25 26 syntax only. Bring your own semantics. 27 28 Main results are: 29 - distance matrix of the molecule. 30 - atom, bond, angle, torsion lists. 31 32 YOU can use them to: 33 - map force-field information onto your molecule. 34 - generate inputs for templating trajectories and coordinates 35 - use group contribution approaches 36 - process coordinates from the PubChem-API 37 38 Furthermore: 39 - this graph-representation is quite general and not limited to molecules. 40 - ...you might use it to represent your favorite baking recipes and optimize them. 41 42 """ 43 44 def __init__(self, molstring, splitter=split_molstring, get_syntax=get_syntax_from_numbers, 45 branch_point_to_last=False, ring_point_to_first=False, 46 ): 47 """ 48 initializes molecules based on graph-list representation. 49 50 The term index (or idx) always refers to a character in the string, 51 be it semantic or syntactic. Semantics and syntax of the string are 52 no longer important once the bond list and thus the actual graph is 53 created. Then there are only semantic objects, i.e. atoms. 54 Thus, all numbers occuring in bond, angle or torsional lists or in 55 distance matrices refer to atomic numbers. Indexes and syntactic lists 56 are only important for advanced applications. For example, generating 57 strings with the same meaning. If both options are available they are 58 are marked with index/indexes/idx and number/numbers. For example: 59 ring_root_numbers and ring_root_indexes. 60 61 Parameters: 62 - mol: 63 - string representation of a molecule. 64 - splitter: 65 - function that splits input into syntactic/semantic elements 66 - returns np.array with all elements 67 - default: split_molstring from molecule_utils 68 - you can pass your own function :) 69 - get_syntax: 70 - function that generates atom and functional descriptors from 71 syntactic/semantic elements 72 - returns two np.arrays: 73 -ff: functions array 74 - branches pointing forward f > 0 75 - rings pointing backward f < 0 76 - beads without function f = 0 77 -nn: atoms array 78 - contains atom numbers 79 - functions are -1 80 - default: get_syntax_from_numbers from molecule_utils 81 - you can pass your own function :) 82 - branch_point_to_last: 83 - boolean, False 84 - comes into play when branch points outside molecule 85 - if True branch will point to last atom 86 - ring_point_to_first: 87 - boolean, False 88 - comes into play when ring points outside molecule 89 - if True ring will point to first atom 90 91 Returns: 92 nothing 93 """ 94 95 self.branch_point_to_last= branch_point_to_last 96 """boolean if True branch that points outside molecule will point to last atom""" 97 self.ring_point_to_first = ring_point_to_first 98 """boolean if True ring that points outside molecule will point to first atom""" 99 100 mol = np.array( splitter(molstring) ) 101 self.molecule = np.array( splitter(molstring) ) 102 """array representation of the molecule i.e. all keys/words/whatever in a list""" 103 104 syntactic_elements = get_syntax(mol) 105 106 self.f = np.array( syntactic_elements[0] ).astype(int) # shows function 107 """array with all functions. 108 - branches pointing forward f > 0 109 - rings pointing backward f < 0 110 - beads without function f = 0 111 """ 112 self.n = np.array( syntactic_elements[1] ).astype(int) # shows atomnumber 113 """array with atom numbers. branches and rings n = -1 as they are no atoms""" 114 self.i = np.arange(len(mol)) # shows index 115 """array with indexes. atoms, branches and rings get an index""" 116 self.len = len(self.i) 117 """number of all elements i.e. atoms, branches and rings""" 118 119 # contains indexes of atoms 120 # index of atom no n: self.atom_indexes[n] 121 self.atom_indexes = self.i[self.f == 0] 122 """array with indexes of all atoms""" 123 self.atom_names = self.molecule[self.f == 0] 124 """array with names of all atoms""" 125 self.atom_keys = np.array( [ "["+x+"]" for x in self.atom_names ] ) 126 """array with keys of all atoms""" 127 self.atom_number = len(self.atom_indexes) 128 """number of all atoms""" 129 self.atom_numbers = np.arange(self.atom_number) 130 """array with numbers of all atoms""" 131 132 # housekeeping for get_neighbour_list (will be called later) 133 # (the following things are generated when calling get_neighbour_list) 134 self.idx_neighbour_list = np.array([]) 135 """array with indexes of all neighboured/bonded atoms""" 136 self.neighbour_list = np.array([]) 137 """array with numbers of all neighboured/bonded atoms""" 138 self.ring_root_indexes = np.array([]) 139 """array with indexes of all atoms rooting a ring""" 140 141 142 # housekeeping for get_distance_matrix (will be called later) 143 # (the following things are generated when calling get_distance_matrix) 144 self.distance_matrix = np.array([]) 145 """distance matrix of all atoms in the molecule""" 146 self.bond_lists = np.array([]) 147 """nested array with bond lists 148 - starting from atoms separeted by one bond bond_lists[0] 149 - then atoms separeted by two bonds bond_lists[1] 150 - ... 151 """ 152 self.idx_bond_list = np.array([]) 153 """array with indexes of all neighboured/bonded atoms""" 154 self.bond_list = np.array([]) 155 """array with numbers of all neighboured/bonded atoms""" 156 self.bond_keys = np.array([]) 157 """array with keys of all neighboured/bonded atoms""" 158 self.angle_list = np.array([]) 159 """array of numbers of all angle forming atoms (separated by 2 bonds)""" 160 self.angle_names = np.array([]) 161 """array of names of all angle forming atoms (separated by 2 bonds)""" 162 self.angle_keys = np.array([]) 163 """array of keys of all angle forming atoms (separated by 2 bonds)""" 164 self.torsion_list = np.array([]) 165 """array of numbers of all dihedral forming atoms (separated by 3 bonds)""" 166 self.torsion_names = np.array([]) 167 """array of names of all dihedral forming atoms (separated by 3 bonds)""" 168 self.torsion_keys = np.array([]) 169 """array of keys of all dihedral forming atoms (separated by 3 bonds)""" 170 171 # get bond list 172 self.get_neighbour_list() 173 174 # get branch ends 175 n, count = np.unique(self.bond_list, return_counts=True) 176 pp = np.squeeze(np.where(count == 1)) 177 if np.sum(pp) > 0: 178 self.branch_end_numbers = n[pp] 179 self.branch_end_indexes = self.atom_indexes[self.branch_end_numbers] 180 else: 181 self.branch_end_numbers = np.empty(0) 182 self.branch_end_indexes = np.empty(0) 183 # get ring closures 184 self.ring_close_indexes = np.squeeze(np.where(self.f < 0)) - 1 185 """array with indexes of all atoms closing a ring""" 186 if np.sum(self.ring_close_indexes.shape) > 0: 187 self.ring_close_numbers = self.n[self.ring_close_indexes] 188 """array with atom numbers of all atoms closing a ring""" 189 self.ring_root_numbers = self.n[self.ring_root_indexes] 190 """array with atom numbers of all atoms rooting a ring""" 191 else: 192 self.ring_close_numbers = np.empty(0) 193 self.ring_root_numbers = np.empty(0) 194 195 # get distance matrix 196 self.get_distance_matrix() 197 198 dummy = unique_sort(self.atom_names, return_inverse=True) 199 ( 200 self.unique_atom_keys, 201 self.unique_atom_indexes, 202 self.unique_atom_inverse, 203 ) = dummy 204 self.unique_atom_names = self.atom_names[self.unique_atom_indexes] 205 """array with unique atom names""" 206 self.unique_atom_numbers = self.atom_numbers[self.unique_atom_indexes] 207 """array with numbers of unique atom names""" 208 assert np.array_equal( 209 self.unique_atom_keys[self.unique_atom_inverse], self.atom_names 210 ), "unique atom in inverse doesnt work" 211 212 dummy = unique_sort(self.bond_keys, return_inverse=True) 213 self.unique_bond_keys = dummy[0] 214 """array with unique bond keys""" 215 self.unique_bond_indexes = dummy[1] 216 """array with unique bond indexes""" 217 self.unique_bond_inverse = dummy[2] 218 """array to invert unique bonds""" 219 220 self.unique_bond_names = self.bond_names[self.unique_bond_indexes] 221 """array with unique bond names""" 222 self.unique_bond_numbers = self.bond_list[self.unique_bond_indexes] 223 """array with numbers of unique bond names""" 224 if len(self.bond_keys) > 0: 225 assert np.array_equal( 226 self.unique_bond_keys[self.unique_bond_inverse], self.bond_keys 227 ), "unique bond in inverse doesnt work" 228 229 dummy = unique_sort(self.angle_keys, return_inverse=True) 230 self.unique_angle_keys = dummy[0] 231 """array with unique angle keys""" 232 self.unique_angle_indexes = dummy[1] 233 """array with unique angle indexes""" 234 self.unique_angle_inverse = dummy[2] 235 """array to invert unique angles""" 236 237 self.unique_angle_names = self.angle_names[self.unique_angle_indexes] 238 """array with unique angle names""" 239 self.unique_angle_numbers = self.angle_list[self.unique_angle_indexes] 240 """array with numbers of unique angle names""" 241 if len(self.angle_keys) > 0: 242 assert np.array_equal( 243 self.unique_angle_keys[self.unique_angle_inverse], self.angle_keys 244 ), "unique angle in inverse doesnt work" 245 246 dummy = unique_sort(self.torsion_keys, return_inverse=True) 247 self.unique_torsion_keys = dummy[0] 248 """array with unique torsion keys""" 249 self.unique_torsion_indexes = dummy[1] 250 """array with unique torsion indexes""" 251 self.unique_torsion_inverse = dummy[2] 252 """array to invert unique torsions""" 253 254 self.unique_torsion_names = self.torsion_names[self.unique_torsion_indexes] 255 """array with unique torsion names""" 256 self.unique_torsion_numbers = self.torsion_list[self.unique_torsion_indexes] 257 """array with numbers of unique torsion names""" 258 if len(self.torsion_keys) > 0: 259 assert np.array_equal( 260 self.unique_torsion_keys[self.unique_torsion_inverse], self.torsion_keys 261 ), "unique angle in inverse doesnt work" 262 263 self.unique_atom_pair_names = [] 264 self.unique_atom_pair_keys = [] 265 for i, a0 in enumerate(self.unique_atom_names): 266 for j, a1 in enumerate(self.unique_atom_names): 267 if i > j: 268 self.unique_atom_pair_names.append(sort_force_fields([a0, a1])) 269 self.unique_atom_pair_keys.append( 270 make_graph(self.unique_atom_pair_names[-1]) 271 ) 272 self.unique_atom_pair_names = np.array(self.unique_atom_pair_names) 273 """array with unique atom pair names. use them for combining rules.""" 274 self.unique_atom_pair_keys = np.array(self.unique_atom_pair_keys) 275 """array with unique atom pair keys. use them for combining rules.""" 276 return 277 278 def reinit_mol(self,mol): 279 """ 280 reinitializes moleculeclass 281 282 """ 283 284 self.molstring = mol 285 if isinstance(mol, str): 286 mol = mol[1:-1].split("][") 287 288 self.molecule = np.array(mol) 289 """array representation of the molecule i.e. all keys/words/whatever in a list""" 290 self.f = np.zeros(len(mol)) # shows function 291 """array with all functions. 292 - branches pointing forward f > 0 293 - rings pointing backward f < 0 294 - beads without function f = 0 295 """ 296 self.n = -1 * np.ones(len(mol)) # shows atomnumber 297 """array with atom numbers. branches and rings n = -1 as they are no atoms""" 298 self.i = np.arange(len(mol)) # shows index 299 """array with indexes. atoms, branches and rings get an index""" 300 self.len = len(self.i) 301 """number of all elements i.e. atoms, branches and rings""" 302 n = 0 303 for i, m in enumerate(mol): 304 if m[0] == "b": 305 self.f[i] = int(m[1:]) 306 elif m[0] == "r": 307 # dum = -int(m[1:]) 308 self.f[i] = -int(m[1:]) # self.get_atom_index() 309 else: 310 self.n[i] = n 311 n += 1 312 return 313 314 315 def get_neighbour_list(self): 316 """ 317 generates a neighbour list from an initialized molecule 318 319 Parameters: None 320 321 Returns: 322 - neighbour_list: 323 - list of neighboured atoms containing their atom numbers. 324 - idx_neighbour_list: 325 - list of neighboured atoms containing their atom indexes. 326 """ 327 328 idx_neighbour_list = [] 329 idx_branch_ends = np.empty(0) 330 idx_branch_starts = np.empty(0) 331 idx_ring_close = np.empty(0) 332 idx_branch_funs = np.empty(0) 333 334 branches = np.zeros(len(self.i)) 335 branch_origins = np.zeros(len(self.i)) 336 branch_count = 1 337 338 if self.atom_number < 2: 339 self.idx_neighbour_list = np.array([]) 340 self.neighbour_list = np.array([]) 341 self.idx_bond_list = np.array([]) 342 self.bond_list = np.array([]) 343 return np.array([]), np.array([]) 344 345 """ 346 find branches and assign numbers. 347 get main path through molecule. 348 build molecule from this... 349 """ 350 inv_f = self.f[::-1] 351 branch_no = 1 352 inv_f_p = np.atleast_1d(np.squeeze(np.where(inv_f != 0))) 353 inv_branches = inv_f.copy() 354 inv_branches[inv_branches != 0] = -1 355 for i, ifi in zip(inv_f_p, inv_f[inv_f_p]): 356 if ifi < 0: # ring 357 inv_branches[i] = -1 358 if ifi > 0: # branch :) 359 pp = np.atleast_1d(np.squeeze(np.where(inv_branches[:i] == 0))) 360 if len(pp) > ifi: 361 p_branch = pp[-int(ifi) :] 362 inv_branches[p_branch] = branch_no 363 inv_branches[i] = branch_no 364 branch_no += 1 365 branches = reenumerate_branches(inv_branches[::-1]) 366 # print(branches) 367 368 main_path_p = np.where(branches == 0) 369 main_path_i = self.i[main_path_p] 370 371 # get bond list of main path 372 bond_list = bond_list_from_simple_path(main_path_i) 373 # print( "branches",branches ) 374 375 """ 376 build bond list 377 """ 378 f_p = np.atleast_1d(np.squeeze(np.where(self.f != 0))) 379 self.ring_root_indexes = [] 380 # print("f_p , self.f[f_p]", f_p, self.f[f_p]) 381 for i, fi in zip(f_p, self.f[f_p]): 382 #print(branches) 383 if fi < 0: 384 idx = np.max( 385 get_diff_in_bond_lists(f_p[f_p < i], np.arange(i)) 386 ) # index of last atom 387 subgraph = graph_from_bonds(bond_list) 388 path_to_main = get_shortest_path(subgraph, 0, idx) 389 390 bond_list_to_main = bond_list_from_simple_path(path_to_main) 391 graph_to_main = graph_from_bonds(bond_list_to_main) 392 root = get_root(graph_to_main, idx, int(np.abs(fi))) 393 branches[i] = branches[root] # connect ring to acompanying branch 394 if root < 0 and self.ring_point_to_first: 395 if fi+root >= self.min_ring_size: 396 root = np.min(self.atom_indexes) 397 bond_list = np.concatenate([bond_list, [[root, idx]]]) 398 self.ring_root_indexes.append(root) 399 print("root of ring outside molecule. point to first atom.") 400 else: 401 print( "ring size smaller:",self.min_ring_size,". ignore") 402 elif root < 0: 403 print("root of ring outside molecule. ignore.") 404 else: 405 bond_list = np.concatenate([bond_list, [[root, idx]]]) 406 self.ring_root_indexes.append(root) 407 elif fi > 0: 408 link = get_branch_root(branches[i], branches) 409 if not self.is_atom(link): 410 link = self.get_last_atom_idx(link) 411 branch = branches[i] 412 p_subchain_i = np.atleast_1d(np.squeeze(np.where(branches == branch)))[ 413 1: 414 ] 415 if len(p_subchain_i) < fi and self.branch_point_to_last: 416 link = np.max(self.atom_indexes) 417 p_subchain_i = np.concatenate([[link], p_subchain_i]) 418 subchain_bond_list = bond_list_from_simple_path(p_subchain_i) 419 bond_list = np.concatenate([bond_list, subchain_bond_list]) 420 print("end of branch outside molecule. point to last atom.") 421 elif len(p_subchain_i) < fi: 422 print("end of branch outside molecule. ignore.") 423 else: 424 p_subchain_i = np.concatenate([[link], p_subchain_i]) 425 subchain_bond_list = bond_list_from_simple_path(p_subchain_i) 426 bond_list = np.concatenate([bond_list, subchain_bond_list]) 427 else: 428 print("fi = 0... this should not happen :P") 429 #print("fin branches", branches) 430 self.idx_neighbour_list = bond_list 431 self.idx_bond_list = bond_list 432 self.ring_root_indexes = np.array(self.ring_root_indexes) 433 434 self.neighbour_list = np.vectorize(self.get_atom_no)(self.idx_neighbour_list) 435 self.bond_list = self.neighbour_list 436 return self.neighbour_list, self.idx_neighbour_list 437 438 def get_bond_matrix(self): 439 """ 440 generates bond matrix from an initialized molecule 441 442 Parameters: None 443 444 Returns: 445 - bond_matrix: 446 - bond matrix of you molecule 447 """ 448 self.bond_matrix = get_bond_matrix(self.neighbour_list, self.atom_number) 449 return self.bond_matrix.copy() 450 451 def get_distance_matrix(self): 452 """ 453 generates distance matrix from an initialized molecule. 454 Furthermore different angle and torsion lists are generated. 455 456 Parameters: None 457 458 Returns: 459 - distance_matrix: 460 - distance matrix of you molecule 461 """ 462 self.distance_matrix, self.bond_lists = get_distance_matrix( 463 self.neighbour_list, self.atom_number 464 ) 465 self.bond_list = self.bond_lists[0] 466 self.bond_names = np.array( 467 [sort_force_fields(self.atom_names[a]) for a in self.bond_list] 468 ) 469 self.bond_keys = np.array([make_graph(t) for t in self.bond_names]) 470 if len(self.bond_lists) > 1: 471 self.angle_list = np.array(self.bond_lists[1]) 472 self.angle_names = np.array( 473 [sort_force_fields(self.atom_names[a]) for a in self.angle_list] 474 ) 475 self.angle_keys = np.array([make_graph(t) for t in self.angle_names]) 476 else: 477 self.angle_list = np.array([]) 478 self.angle_names = np.array([]) 479 self.angle_keys = np.array([]) 480 if len(self.bond_lists) > 2: 481 self.torsion_list = np.array(self.bond_lists[2]) 482 self.torsion_names = np.array( 483 [sort_force_fields(self.atom_names[a]) for a in self.torsion_list] 484 ) 485 self.torsion_keys = np.array([make_graph(t) for t in self.torsion_names]) 486 else: 487 self.torsion_list = np.array([]) 488 self.torsion_names = np.array([]) 489 self.torsion_keys = np.array([]) 490 return self.distance_matrix.copy() 491 492 def is_atom(self,idx): 493 """ 494 checks if index is an atom. 495 - Number: atom number 496 - Index: index in list representation of molecule. 497 Number and index are differing due to functionals in the list. 498 499 Parameters: 500 - idx: 501 - idx of the atom 502 503 Returns: 504 - boolean 505 """ 506 if idx in self.atom_indexes: 507 return True 508 else: 509 return False 510 511 def get_atom_index(self, no): 512 """ 513 returns index of atom. 514 - Number: atom number 515 - Index: index in list representation of molecule. 516 Number and index are differing due to functionals in the list. 517 518 Parameters: 519 - no: 520 - number of the atom 521 522 Returns: 523 - index of atom number "no" 524 """ 525 return int(self.atom_indexes[int(no)]) 526 527 def get_atom_no(self, idx): 528 """ 529 returns index of atom. 530 - Number: atom number 531 - Index: index in list representation of molecule. 532 Number and index are differing due to functionals in the list. 533 534 Parameters: 535 - idx: 536 - idx of the atom 537 538 Returns: 539 - number of atom with index 540 """ 541 idx = int(idx) 542 if self.f[idx] >= 0: 543 no = int(np.squeeze(np.where(self.atom_indexes == idx))) 544 assert no == self.n[idx], "atom numbers dont work,idx" 545 return no 546 else: 547 assert self.n[idx] == -1, "atom numbers dont work, n=-1" 548 print("Error in get_atom_no: not an atom",idx) 549 return False 550 551 def get_next_atom_idx(self, idx): 552 """ 553 returns index of the next atom after index idx. 554 Index: index in list representation of molecule. 555 Number and index are differing due to functionals in the list. 556 557 Parameters: 558 idx: 559 - index in list representation of molecule 560 561 Returns: 562 - index of the next atom after index idx 563 """ 564 return int(np.min(self.atom_indexes[(self.atom_indexes - idx) > 0])) 565 566 def get_last_atom_idx(self, idx): 567 """ 568 returns index of the atom before index idx. 569 Index: index in list representation of molecule. 570 Number and index are differing due to functionals in the list. 571 572 Parameters: 573 idx: 574 - index in list representation of molecule 575 576 Returns: 577 - index of the atom before index idx 578 """ 579 return int(np.max(self.atom_indexes[(self.atom_indexes - idx) < 0])) 580 581 def get_next_atom_no(self, idx): 582 """ 583 returns number of the atom after index idx. 584 Index: index in list representation of molecule. 585 Number: atom number 586 Number and index are differing due to functionals in the list. 587 588 Parameters: 589 - idx: 590 - index in list representation of molecule 591 592 Returns: 593 - number of the atom after index idx 594 """ 595 return int(self.n[self.get_next_atom_idx(idx)]) 596 597 def get_last_atom_no(self, idx): 598 """ 599 returns number of the atom before index idx. 600 Index: index in list representation of molecule. 601 Number: atom number 602 Number and index are differing due to functionals in the list. 603 604 Parameters: 605 self: 606 - initialized molecule. 607 idx: 608 - index in list representation of molecule 609 610 Returns: 611 number of the atom before index idx 612 """ 613 return int(self.n[self.get_last_atom_idx(idx)]) 614 615 def map_molecule_via_atom_names(self, data): 616 """ 617 maps information from a dictionary onto the molecule. 618 Use this for molecule information i.e. coordinates, group contributions,... 619 620 Parameters: 621 - data: 622 - dictionary with: keys == atom names !!! 623 624 Returns: 625 - mapped data 626 """ 627 return [*map(data.get, self.atom_names)] 628 629 def map_unique_via_atom_names(self, data): 630 """ 631 Maps information from a dictionary onto unique elements of the molecule. 632 Preserves the order of the original list. 633 Use this for force-field information i.e. potentials,... 634 635 Parameters: 636 - data: 637 - dictionary with: keys == atom names !!! 638 639 Returns: 640 - mapped unique data 641 """ 642 print( 643 "map_unique_via_atom_names is outtdated... dont use\n shouldnt cause errors btw" 644 ) 645 return [*map(data.get, self.unique_atom_names)] 646 647 def map_molecule(self, goal, data): 648 """ 649 maps information from a dictionary onto a goal. 650 Use this for molecule information i.e. coordinates, group contributions,... 651 652 653 Parameters: 654 - goal: 655 - list of keys 656 - data: 657 - dictionary with keys in goal 658 659 Returns: 660 - mapped data 661 """ 662 return [*map(data.get, goal)] 663 664 def map_unique(self, goal, data): 665 """ 666 Maps information from a dictionary onto unique elements a goal. 667 Preserves the order of the original list. 668 Use this for force-field information i.e. potentials,... 669 670 Parameters: 671 - goal: 672 - list of keys 673 - data: 674 - dictionary with keys in goal 675 676 Returns: 677 - mapped unique data 678 """ 679 print("map_unique is outtdated... dont use\n shouldnt cause errors btw") 680 return [*map(data.get, unique_sort(goal))] 681 682 def map_nested(self, goal, data): 683 """ 684 Maps information from a dictionary onto nested elements of a goal. 685 Preserves the order of the original list. 686 Use this for advanced force-field information i.e. mixing rules,... 687 688 same as: 689 [ self.map_unique( m, data) for m in goal ] 690 691 Parameters: 692 - goal: 693 -with list list of keys 694 - data: 695 -dictionary with keys in goal 696 697 Returns: 698 - mapped unique data 699 """ 700 return [[*map(data.get, m)] for m in goal] 701 702 def get_distance(self, x, y): 703 """ 704 Returns distance between atoms no x and y 705 0: same atom i.e. x==y 706 1: bond 707 2: angle 708 3: torsion 709 710 Parameters: 711 - x,y: 712 - atom numbers 713 714 Returns: 715 - distance 716 """ 717 return self.distance_matrix[int(x)][int(y)] 718 719 def get_sorted_key(self, x): 720 """ 721 Sorts a list alphabetically. 722 Very important for dictionary keys in the molecule class!!! 723 724 Parameters: 725 - x: 726 - string list to sort 727 728 Returns: 729 - key (string) and sorted list 730 """ 731 x_sort = sort_force_fields(x) 732 return make_graph(x_sort), x_sort 733 734 def visualize(self, saveto=""): 735 """ 736 plots and visualizes molecule 737 738 Parameters: 739 - saveto: 740 - optional, location to save pic of graph to, 741 - add file format at the end ;) 742 743 Returns: 744 nothing 745 """ 746 if self.atom_numbers.size > 1: 747 plot_graph(self.atom_names, self.bond_list, saveto=saveto) 748 else: 749 print("one bead only") 750 return 751 752 753
21class molecule: 22 """ 23 simple undirected graph representation for molecules and more. 24 25 write code about molecules without the need to care about them. 26 27 syntax only. Bring your own semantics. 28 29 Main results are: 30 - distance matrix of the molecule. 31 - atom, bond, angle, torsion lists. 32 33 YOU can use them to: 34 - map force-field information onto your molecule. 35 - generate inputs for templating trajectories and coordinates 36 - use group contribution approaches 37 - process coordinates from the PubChem-API 38 39 Furthermore: 40 - this graph-representation is quite general and not limited to molecules. 41 - ...you might use it to represent your favorite baking recipes and optimize them. 42 43 """ 44 45 def __init__(self, molstring, splitter=split_molstring, get_syntax=get_syntax_from_numbers, 46 branch_point_to_last=False, ring_point_to_first=False, 47 ): 48 """ 49 initializes molecules based on graph-list representation. 50 51 The term index (or idx) always refers to a character in the string, 52 be it semantic or syntactic. Semantics and syntax of the string are 53 no longer important once the bond list and thus the actual graph is 54 created. Then there are only semantic objects, i.e. atoms. 55 Thus, all numbers occuring in bond, angle or torsional lists or in 56 distance matrices refer to atomic numbers. Indexes and syntactic lists 57 are only important for advanced applications. For example, generating 58 strings with the same meaning. If both options are available they are 59 are marked with index/indexes/idx and number/numbers. For example: 60 ring_root_numbers and ring_root_indexes. 61 62 Parameters: 63 - mol: 64 - string representation of a molecule. 65 - splitter: 66 - function that splits input into syntactic/semantic elements 67 - returns np.array with all elements 68 - default: split_molstring from molecule_utils 69 - you can pass your own function :) 70 - get_syntax: 71 - function that generates atom and functional descriptors from 72 syntactic/semantic elements 73 - returns two np.arrays: 74 -ff: functions array 75 - branches pointing forward f > 0 76 - rings pointing backward f < 0 77 - beads without function f = 0 78 -nn: atoms array 79 - contains atom numbers 80 - functions are -1 81 - default: get_syntax_from_numbers from molecule_utils 82 - you can pass your own function :) 83 - branch_point_to_last: 84 - boolean, False 85 - comes into play when branch points outside molecule 86 - if True branch will point to last atom 87 - ring_point_to_first: 88 - boolean, False 89 - comes into play when ring points outside molecule 90 - if True ring will point to first atom 91 92 Returns: 93 nothing 94 """ 95 96 self.branch_point_to_last= branch_point_to_last 97 """boolean if True branch that points outside molecule will point to last atom""" 98 self.ring_point_to_first = ring_point_to_first 99 """boolean if True ring that points outside molecule will point to first atom""" 100 101 mol = np.array( splitter(molstring) ) 102 self.molecule = np.array( splitter(molstring) ) 103 """array representation of the molecule i.e. all keys/words/whatever in a list""" 104 105 syntactic_elements = get_syntax(mol) 106 107 self.f = np.array( syntactic_elements[0] ).astype(int) # shows function 108 """array with all functions. 109 - branches pointing forward f > 0 110 - rings pointing backward f < 0 111 - beads without function f = 0 112 """ 113 self.n = np.array( syntactic_elements[1] ).astype(int) # shows atomnumber 114 """array with atom numbers. branches and rings n = -1 as they are no atoms""" 115 self.i = np.arange(len(mol)) # shows index 116 """array with indexes. atoms, branches and rings get an index""" 117 self.len = len(self.i) 118 """number of all elements i.e. atoms, branches and rings""" 119 120 # contains indexes of atoms 121 # index of atom no n: self.atom_indexes[n] 122 self.atom_indexes = self.i[self.f == 0] 123 """array with indexes of all atoms""" 124 self.atom_names = self.molecule[self.f == 0] 125 """array with names of all atoms""" 126 self.atom_keys = np.array( [ "["+x+"]" for x in self.atom_names ] ) 127 """array with keys of all atoms""" 128 self.atom_number = len(self.atom_indexes) 129 """number of all atoms""" 130 self.atom_numbers = np.arange(self.atom_number) 131 """array with numbers of all atoms""" 132 133 # housekeeping for get_neighbour_list (will be called later) 134 # (the following things are generated when calling get_neighbour_list) 135 self.idx_neighbour_list = np.array([]) 136 """array with indexes of all neighboured/bonded atoms""" 137 self.neighbour_list = np.array([]) 138 """array with numbers of all neighboured/bonded atoms""" 139 self.ring_root_indexes = np.array([]) 140 """array with indexes of all atoms rooting a ring""" 141 142 143 # housekeeping for get_distance_matrix (will be called later) 144 # (the following things are generated when calling get_distance_matrix) 145 self.distance_matrix = np.array([]) 146 """distance matrix of all atoms in the molecule""" 147 self.bond_lists = np.array([]) 148 """nested array with bond lists 149 - starting from atoms separeted by one bond bond_lists[0] 150 - then atoms separeted by two bonds bond_lists[1] 151 - ... 152 """ 153 self.idx_bond_list = np.array([]) 154 """array with indexes of all neighboured/bonded atoms""" 155 self.bond_list = np.array([]) 156 """array with numbers of all neighboured/bonded atoms""" 157 self.bond_keys = np.array([]) 158 """array with keys of all neighboured/bonded atoms""" 159 self.angle_list = np.array([]) 160 """array of numbers of all angle forming atoms (separated by 2 bonds)""" 161 self.angle_names = np.array([]) 162 """array of names of all angle forming atoms (separated by 2 bonds)""" 163 self.angle_keys = np.array([]) 164 """array of keys of all angle forming atoms (separated by 2 bonds)""" 165 self.torsion_list = np.array([]) 166 """array of numbers of all dihedral forming atoms (separated by 3 bonds)""" 167 self.torsion_names = np.array([]) 168 """array of names of all dihedral forming atoms (separated by 3 bonds)""" 169 self.torsion_keys = np.array([]) 170 """array of keys of all dihedral forming atoms (separated by 3 bonds)""" 171 172 # get bond list 173 self.get_neighbour_list() 174 175 # get branch ends 176 n, count = np.unique(self.bond_list, return_counts=True) 177 pp = np.squeeze(np.where(count == 1)) 178 if np.sum(pp) > 0: 179 self.branch_end_numbers = n[pp] 180 self.branch_end_indexes = self.atom_indexes[self.branch_end_numbers] 181 else: 182 self.branch_end_numbers = np.empty(0) 183 self.branch_end_indexes = np.empty(0) 184 # get ring closures 185 self.ring_close_indexes = np.squeeze(np.where(self.f < 0)) - 1 186 """array with indexes of all atoms closing a ring""" 187 if np.sum(self.ring_close_indexes.shape) > 0: 188 self.ring_close_numbers = self.n[self.ring_close_indexes] 189 """array with atom numbers of all atoms closing a ring""" 190 self.ring_root_numbers = self.n[self.ring_root_indexes] 191 """array with atom numbers of all atoms rooting a ring""" 192 else: 193 self.ring_close_numbers = np.empty(0) 194 self.ring_root_numbers = np.empty(0) 195 196 # get distance matrix 197 self.get_distance_matrix() 198 199 dummy = unique_sort(self.atom_names, return_inverse=True) 200 ( 201 self.unique_atom_keys, 202 self.unique_atom_indexes, 203 self.unique_atom_inverse, 204 ) = dummy 205 self.unique_atom_names = self.atom_names[self.unique_atom_indexes] 206 """array with unique atom names""" 207 self.unique_atom_numbers = self.atom_numbers[self.unique_atom_indexes] 208 """array with numbers of unique atom names""" 209 assert np.array_equal( 210 self.unique_atom_keys[self.unique_atom_inverse], self.atom_names 211 ), "unique atom in inverse doesnt work" 212 213 dummy = unique_sort(self.bond_keys, return_inverse=True) 214 self.unique_bond_keys = dummy[0] 215 """array with unique bond keys""" 216 self.unique_bond_indexes = dummy[1] 217 """array with unique bond indexes""" 218 self.unique_bond_inverse = dummy[2] 219 """array to invert unique bonds""" 220 221 self.unique_bond_names = self.bond_names[self.unique_bond_indexes] 222 """array with unique bond names""" 223 self.unique_bond_numbers = self.bond_list[self.unique_bond_indexes] 224 """array with numbers of unique bond names""" 225 if len(self.bond_keys) > 0: 226 assert np.array_equal( 227 self.unique_bond_keys[self.unique_bond_inverse], self.bond_keys 228 ), "unique bond in inverse doesnt work" 229 230 dummy = unique_sort(self.angle_keys, return_inverse=True) 231 self.unique_angle_keys = dummy[0] 232 """array with unique angle keys""" 233 self.unique_angle_indexes = dummy[1] 234 """array with unique angle indexes""" 235 self.unique_angle_inverse = dummy[2] 236 """array to invert unique angles""" 237 238 self.unique_angle_names = self.angle_names[self.unique_angle_indexes] 239 """array with unique angle names""" 240 self.unique_angle_numbers = self.angle_list[self.unique_angle_indexes] 241 """array with numbers of unique angle names""" 242 if len(self.angle_keys) > 0: 243 assert np.array_equal( 244 self.unique_angle_keys[self.unique_angle_inverse], self.angle_keys 245 ), "unique angle in inverse doesnt work" 246 247 dummy = unique_sort(self.torsion_keys, return_inverse=True) 248 self.unique_torsion_keys = dummy[0] 249 """array with unique torsion keys""" 250 self.unique_torsion_indexes = dummy[1] 251 """array with unique torsion indexes""" 252 self.unique_torsion_inverse = dummy[2] 253 """array to invert unique torsions""" 254 255 self.unique_torsion_names = self.torsion_names[self.unique_torsion_indexes] 256 """array with unique torsion names""" 257 self.unique_torsion_numbers = self.torsion_list[self.unique_torsion_indexes] 258 """array with numbers of unique torsion names""" 259 if len(self.torsion_keys) > 0: 260 assert np.array_equal( 261 self.unique_torsion_keys[self.unique_torsion_inverse], self.torsion_keys 262 ), "unique angle in inverse doesnt work" 263 264 self.unique_atom_pair_names = [] 265 self.unique_atom_pair_keys = [] 266 for i, a0 in enumerate(self.unique_atom_names): 267 for j, a1 in enumerate(self.unique_atom_names): 268 if i > j: 269 self.unique_atom_pair_names.append(sort_force_fields([a0, a1])) 270 self.unique_atom_pair_keys.append( 271 make_graph(self.unique_atom_pair_names[-1]) 272 ) 273 self.unique_atom_pair_names = np.array(self.unique_atom_pair_names) 274 """array with unique atom pair names. use them for combining rules.""" 275 self.unique_atom_pair_keys = np.array(self.unique_atom_pair_keys) 276 """array with unique atom pair keys. use them for combining rules.""" 277 return 278 279 def reinit_mol(self,mol): 280 """ 281 reinitializes moleculeclass 282 283 """ 284 285 self.molstring = mol 286 if isinstance(mol, str): 287 mol = mol[1:-1].split("][") 288 289 self.molecule = np.array(mol) 290 """array representation of the molecule i.e. all keys/words/whatever in a list""" 291 self.f = np.zeros(len(mol)) # shows function 292 """array with all functions. 293 - branches pointing forward f > 0 294 - rings pointing backward f < 0 295 - beads without function f = 0 296 """ 297 self.n = -1 * np.ones(len(mol)) # shows atomnumber 298 """array with atom numbers. branches and rings n = -1 as they are no atoms""" 299 self.i = np.arange(len(mol)) # shows index 300 """array with indexes. atoms, branches and rings get an index""" 301 self.len = len(self.i) 302 """number of all elements i.e. atoms, branches and rings""" 303 n = 0 304 for i, m in enumerate(mol): 305 if m[0] == "b": 306 self.f[i] = int(m[1:]) 307 elif m[0] == "r": 308 # dum = -int(m[1:]) 309 self.f[i] = -int(m[1:]) # self.get_atom_index() 310 else: 311 self.n[i] = n 312 n += 1 313 return 314 315 316 def get_neighbour_list(self): 317 """ 318 generates a neighbour list from an initialized molecule 319 320 Parameters: None 321 322 Returns: 323 - neighbour_list: 324 - list of neighboured atoms containing their atom numbers. 325 - idx_neighbour_list: 326 - list of neighboured atoms containing their atom indexes. 327 """ 328 329 idx_neighbour_list = [] 330 idx_branch_ends = np.empty(0) 331 idx_branch_starts = np.empty(0) 332 idx_ring_close = np.empty(0) 333 idx_branch_funs = np.empty(0) 334 335 branches = np.zeros(len(self.i)) 336 branch_origins = np.zeros(len(self.i)) 337 branch_count = 1 338 339 if self.atom_number < 2: 340 self.idx_neighbour_list = np.array([]) 341 self.neighbour_list = np.array([]) 342 self.idx_bond_list = np.array([]) 343 self.bond_list = np.array([]) 344 return np.array([]), np.array([]) 345 346 """ 347 find branches and assign numbers. 348 get main path through molecule. 349 build molecule from this... 350 """ 351 inv_f = self.f[::-1] 352 branch_no = 1 353 inv_f_p = np.atleast_1d(np.squeeze(np.where(inv_f != 0))) 354 inv_branches = inv_f.copy() 355 inv_branches[inv_branches != 0] = -1 356 for i, ifi in zip(inv_f_p, inv_f[inv_f_p]): 357 if ifi < 0: # ring 358 inv_branches[i] = -1 359 if ifi > 0: # branch :) 360 pp = np.atleast_1d(np.squeeze(np.where(inv_branches[:i] == 0))) 361 if len(pp) > ifi: 362 p_branch = pp[-int(ifi) :] 363 inv_branches[p_branch] = branch_no 364 inv_branches[i] = branch_no 365 branch_no += 1 366 branches = reenumerate_branches(inv_branches[::-1]) 367 # print(branches) 368 369 main_path_p = np.where(branches == 0) 370 main_path_i = self.i[main_path_p] 371 372 # get bond list of main path 373 bond_list = bond_list_from_simple_path(main_path_i) 374 # print( "branches",branches ) 375 376 """ 377 build bond list 378 """ 379 f_p = np.atleast_1d(np.squeeze(np.where(self.f != 0))) 380 self.ring_root_indexes = [] 381 # print("f_p , self.f[f_p]", f_p, self.f[f_p]) 382 for i, fi in zip(f_p, self.f[f_p]): 383 #print(branches) 384 if fi < 0: 385 idx = np.max( 386 get_diff_in_bond_lists(f_p[f_p < i], np.arange(i)) 387 ) # index of last atom 388 subgraph = graph_from_bonds(bond_list) 389 path_to_main = get_shortest_path(subgraph, 0, idx) 390 391 bond_list_to_main = bond_list_from_simple_path(path_to_main) 392 graph_to_main = graph_from_bonds(bond_list_to_main) 393 root = get_root(graph_to_main, idx, int(np.abs(fi))) 394 branches[i] = branches[root] # connect ring to acompanying branch 395 if root < 0 and self.ring_point_to_first: 396 if fi+root >= self.min_ring_size: 397 root = np.min(self.atom_indexes) 398 bond_list = np.concatenate([bond_list, [[root, idx]]]) 399 self.ring_root_indexes.append(root) 400 print("root of ring outside molecule. point to first atom.") 401 else: 402 print( "ring size smaller:",self.min_ring_size,". ignore") 403 elif root < 0: 404 print("root of ring outside molecule. ignore.") 405 else: 406 bond_list = np.concatenate([bond_list, [[root, idx]]]) 407 self.ring_root_indexes.append(root) 408 elif fi > 0: 409 link = get_branch_root(branches[i], branches) 410 if not self.is_atom(link): 411 link = self.get_last_atom_idx(link) 412 branch = branches[i] 413 p_subchain_i = np.atleast_1d(np.squeeze(np.where(branches == branch)))[ 414 1: 415 ] 416 if len(p_subchain_i) < fi and self.branch_point_to_last: 417 link = np.max(self.atom_indexes) 418 p_subchain_i = np.concatenate([[link], p_subchain_i]) 419 subchain_bond_list = bond_list_from_simple_path(p_subchain_i) 420 bond_list = np.concatenate([bond_list, subchain_bond_list]) 421 print("end of branch outside molecule. point to last atom.") 422 elif len(p_subchain_i) < fi: 423 print("end of branch outside molecule. ignore.") 424 else: 425 p_subchain_i = np.concatenate([[link], p_subchain_i]) 426 subchain_bond_list = bond_list_from_simple_path(p_subchain_i) 427 bond_list = np.concatenate([bond_list, subchain_bond_list]) 428 else: 429 print("fi = 0... this should not happen :P") 430 #print("fin branches", branches) 431 self.idx_neighbour_list = bond_list 432 self.idx_bond_list = bond_list 433 self.ring_root_indexes = np.array(self.ring_root_indexes) 434 435 self.neighbour_list = np.vectorize(self.get_atom_no)(self.idx_neighbour_list) 436 self.bond_list = self.neighbour_list 437 return self.neighbour_list, self.idx_neighbour_list 438 439 def get_bond_matrix(self): 440 """ 441 generates bond matrix from an initialized molecule 442 443 Parameters: None 444 445 Returns: 446 - bond_matrix: 447 - bond matrix of you molecule 448 """ 449 self.bond_matrix = get_bond_matrix(self.neighbour_list, self.atom_number) 450 return self.bond_matrix.copy() 451 452 def get_distance_matrix(self): 453 """ 454 generates distance matrix from an initialized molecule. 455 Furthermore different angle and torsion lists are generated. 456 457 Parameters: None 458 459 Returns: 460 - distance_matrix: 461 - distance matrix of you molecule 462 """ 463 self.distance_matrix, self.bond_lists = get_distance_matrix( 464 self.neighbour_list, self.atom_number 465 ) 466 self.bond_list = self.bond_lists[0] 467 self.bond_names = np.array( 468 [sort_force_fields(self.atom_names[a]) for a in self.bond_list] 469 ) 470 self.bond_keys = np.array([make_graph(t) for t in self.bond_names]) 471 if len(self.bond_lists) > 1: 472 self.angle_list = np.array(self.bond_lists[1]) 473 self.angle_names = np.array( 474 [sort_force_fields(self.atom_names[a]) for a in self.angle_list] 475 ) 476 self.angle_keys = np.array([make_graph(t) for t in self.angle_names]) 477 else: 478 self.angle_list = np.array([]) 479 self.angle_names = np.array([]) 480 self.angle_keys = np.array([]) 481 if len(self.bond_lists) > 2: 482 self.torsion_list = np.array(self.bond_lists[2]) 483 self.torsion_names = np.array( 484 [sort_force_fields(self.atom_names[a]) for a in self.torsion_list] 485 ) 486 self.torsion_keys = np.array([make_graph(t) for t in self.torsion_names]) 487 else: 488 self.torsion_list = np.array([]) 489 self.torsion_names = np.array([]) 490 self.torsion_keys = np.array([]) 491 return self.distance_matrix.copy() 492 493 def is_atom(self,idx): 494 """ 495 checks if index is an atom. 496 - Number: atom number 497 - Index: index in list representation of molecule. 498 Number and index are differing due to functionals in the list. 499 500 Parameters: 501 - idx: 502 - idx of the atom 503 504 Returns: 505 - boolean 506 """ 507 if idx in self.atom_indexes: 508 return True 509 else: 510 return False 511 512 def get_atom_index(self, no): 513 """ 514 returns index of atom. 515 - Number: atom number 516 - Index: index in list representation of molecule. 517 Number and index are differing due to functionals in the list. 518 519 Parameters: 520 - no: 521 - number of the atom 522 523 Returns: 524 - index of atom number "no" 525 """ 526 return int(self.atom_indexes[int(no)]) 527 528 def get_atom_no(self, idx): 529 """ 530 returns index of atom. 531 - Number: atom number 532 - Index: index in list representation of molecule. 533 Number and index are differing due to functionals in the list. 534 535 Parameters: 536 - idx: 537 - idx of the atom 538 539 Returns: 540 - number of atom with index 541 """ 542 idx = int(idx) 543 if self.f[idx] >= 0: 544 no = int(np.squeeze(np.where(self.atom_indexes == idx))) 545 assert no == self.n[idx], "atom numbers dont work,idx" 546 return no 547 else: 548 assert self.n[idx] == -1, "atom numbers dont work, n=-1" 549 print("Error in get_atom_no: not an atom",idx) 550 return False 551 552 def get_next_atom_idx(self, idx): 553 """ 554 returns index of the next atom after index idx. 555 Index: index in list representation of molecule. 556 Number and index are differing due to functionals in the list. 557 558 Parameters: 559 idx: 560 - index in list representation of molecule 561 562 Returns: 563 - index of the next atom after index idx 564 """ 565 return int(np.min(self.atom_indexes[(self.atom_indexes - idx) > 0])) 566 567 def get_last_atom_idx(self, idx): 568 """ 569 returns index of the atom before index idx. 570 Index: index in list representation of molecule. 571 Number and index are differing due to functionals in the list. 572 573 Parameters: 574 idx: 575 - index in list representation of molecule 576 577 Returns: 578 - index of the atom before index idx 579 """ 580 return int(np.max(self.atom_indexes[(self.atom_indexes - idx) < 0])) 581 582 def get_next_atom_no(self, idx): 583 """ 584 returns number of the atom after index idx. 585 Index: index in list representation of molecule. 586 Number: atom number 587 Number and index are differing due to functionals in the list. 588 589 Parameters: 590 - idx: 591 - index in list representation of molecule 592 593 Returns: 594 - number of the atom after index idx 595 """ 596 return int(self.n[self.get_next_atom_idx(idx)]) 597 598 def get_last_atom_no(self, idx): 599 """ 600 returns number of the atom before index idx. 601 Index: index in list representation of molecule. 602 Number: atom number 603 Number and index are differing due to functionals in the list. 604 605 Parameters: 606 self: 607 - initialized molecule. 608 idx: 609 - index in list representation of molecule 610 611 Returns: 612 number of the atom before index idx 613 """ 614 return int(self.n[self.get_last_atom_idx(idx)]) 615 616 def map_molecule_via_atom_names(self, data): 617 """ 618 maps information from a dictionary onto the molecule. 619 Use this for molecule information i.e. coordinates, group contributions,... 620 621 Parameters: 622 - data: 623 - dictionary with: keys == atom names !!! 624 625 Returns: 626 - mapped data 627 """ 628 return [*map(data.get, self.atom_names)] 629 630 def map_unique_via_atom_names(self, data): 631 """ 632 Maps information from a dictionary onto unique elements of the molecule. 633 Preserves the order of the original list. 634 Use this for force-field information i.e. potentials,... 635 636 Parameters: 637 - data: 638 - dictionary with: keys == atom names !!! 639 640 Returns: 641 - mapped unique data 642 """ 643 print( 644 "map_unique_via_atom_names is outtdated... dont use\n shouldnt cause errors btw" 645 ) 646 return [*map(data.get, self.unique_atom_names)] 647 648 def map_molecule(self, goal, data): 649 """ 650 maps information from a dictionary onto a goal. 651 Use this for molecule information i.e. coordinates, group contributions,... 652 653 654 Parameters: 655 - goal: 656 - list of keys 657 - data: 658 - dictionary with keys in goal 659 660 Returns: 661 - mapped data 662 """ 663 return [*map(data.get, goal)] 664 665 def map_unique(self, goal, data): 666 """ 667 Maps information from a dictionary onto unique elements a goal. 668 Preserves the order of the original list. 669 Use this for force-field information i.e. potentials,... 670 671 Parameters: 672 - goal: 673 - list of keys 674 - data: 675 - dictionary with keys in goal 676 677 Returns: 678 - mapped unique data 679 """ 680 print("map_unique is outtdated... dont use\n shouldnt cause errors btw") 681 return [*map(data.get, unique_sort(goal))] 682 683 def map_nested(self, goal, data): 684 """ 685 Maps information from a dictionary onto nested elements of a goal. 686 Preserves the order of the original list. 687 Use this for advanced force-field information i.e. mixing rules,... 688 689 same as: 690 [ self.map_unique( m, data) for m in goal ] 691 692 Parameters: 693 - goal: 694 -with list list of keys 695 - data: 696 -dictionary with keys in goal 697 698 Returns: 699 - mapped unique data 700 """ 701 return [[*map(data.get, m)] for m in goal] 702 703 def get_distance(self, x, y): 704 """ 705 Returns distance between atoms no x and y 706 0: same atom i.e. x==y 707 1: bond 708 2: angle 709 3: torsion 710 711 Parameters: 712 - x,y: 713 - atom numbers 714 715 Returns: 716 - distance 717 """ 718 return self.distance_matrix[int(x)][int(y)] 719 720 def get_sorted_key(self, x): 721 """ 722 Sorts a list alphabetically. 723 Very important for dictionary keys in the molecule class!!! 724 725 Parameters: 726 - x: 727 - string list to sort 728 729 Returns: 730 - key (string) and sorted list 731 """ 732 x_sort = sort_force_fields(x) 733 return make_graph(x_sort), x_sort 734 735 def visualize(self, saveto=""): 736 """ 737 plots and visualizes molecule 738 739 Parameters: 740 - saveto: 741 - optional, location to save pic of graph to, 742 - add file format at the end ;) 743 744 Returns: 745 nothing 746 """ 747 if self.atom_numbers.size > 1: 748 plot_graph(self.atom_names, self.bond_list, saveto=saveto) 749 else: 750 print("one bead only") 751 return
simple undirected graph representation for molecules and more.
write code about molecules without the need to care about them.
syntax only. Bring your own semantics.
Main results are:
- distance matrix of the molecule.
- atom, bond, angle, torsion lists.
YOU can use them to:
- map force-field information onto your molecule.
- generate inputs for templating trajectories and coordinates
- use group contribution approaches
- process coordinates from the PubChem-API
Furthermore:
- this graph-representation is quite general and not limited to molecules.
- ...you might use it to represent your favorite baking recipes and optimize them.
45 def __init__(self, molstring, splitter=split_molstring, get_syntax=get_syntax_from_numbers, 46 branch_point_to_last=False, ring_point_to_first=False, 47 ): 48 """ 49 initializes molecules based on graph-list representation. 50 51 The term index (or idx) always refers to a character in the string, 52 be it semantic or syntactic. Semantics and syntax of the string are 53 no longer important once the bond list and thus the actual graph is 54 created. Then there are only semantic objects, i.e. atoms. 55 Thus, all numbers occuring in bond, angle or torsional lists or in 56 distance matrices refer to atomic numbers. Indexes and syntactic lists 57 are only important for advanced applications. For example, generating 58 strings with the same meaning. If both options are available they are 59 are marked with index/indexes/idx and number/numbers. For example: 60 ring_root_numbers and ring_root_indexes. 61 62 Parameters: 63 - mol: 64 - string representation of a molecule. 65 - splitter: 66 - function that splits input into syntactic/semantic elements 67 - returns np.array with all elements 68 - default: split_molstring from molecule_utils 69 - you can pass your own function :) 70 - get_syntax: 71 - function that generates atom and functional descriptors from 72 syntactic/semantic elements 73 - returns two np.arrays: 74 -ff: functions array 75 - branches pointing forward f > 0 76 - rings pointing backward f < 0 77 - beads without function f = 0 78 -nn: atoms array 79 - contains atom numbers 80 - functions are -1 81 - default: get_syntax_from_numbers from molecule_utils 82 - you can pass your own function :) 83 - branch_point_to_last: 84 - boolean, False 85 - comes into play when branch points outside molecule 86 - if True branch will point to last atom 87 - ring_point_to_first: 88 - boolean, False 89 - comes into play when ring points outside molecule 90 - if True ring will point to first atom 91 92 Returns: 93 nothing 94 """ 95 96 self.branch_point_to_last= branch_point_to_last 97 """boolean if True branch that points outside molecule will point to last atom""" 98 self.ring_point_to_first = ring_point_to_first 99 """boolean if True ring that points outside molecule will point to first atom""" 100 101 mol = np.array( splitter(molstring) ) 102 self.molecule = np.array( splitter(molstring) ) 103 """array representation of the molecule i.e. all keys/words/whatever in a list""" 104 105 syntactic_elements = get_syntax(mol) 106 107 self.f = np.array( syntactic_elements[0] ).astype(int) # shows function 108 """array with all functions. 109 - branches pointing forward f > 0 110 - rings pointing backward f < 0 111 - beads without function f = 0 112 """ 113 self.n = np.array( syntactic_elements[1] ).astype(int) # shows atomnumber 114 """array with atom numbers. branches and rings n = -1 as they are no atoms""" 115 self.i = np.arange(len(mol)) # shows index 116 """array with indexes. atoms, branches and rings get an index""" 117 self.len = len(self.i) 118 """number of all elements i.e. atoms, branches and rings""" 119 120 # contains indexes of atoms 121 # index of atom no n: self.atom_indexes[n] 122 self.atom_indexes = self.i[self.f == 0] 123 """array with indexes of all atoms""" 124 self.atom_names = self.molecule[self.f == 0] 125 """array with names of all atoms""" 126 self.atom_keys = np.array( [ "["+x+"]" for x in self.atom_names ] ) 127 """array with keys of all atoms""" 128 self.atom_number = len(self.atom_indexes) 129 """number of all atoms""" 130 self.atom_numbers = np.arange(self.atom_number) 131 """array with numbers of all atoms""" 132 133 # housekeeping for get_neighbour_list (will be called later) 134 # (the following things are generated when calling get_neighbour_list) 135 self.idx_neighbour_list = np.array([]) 136 """array with indexes of all neighboured/bonded atoms""" 137 self.neighbour_list = np.array([]) 138 """array with numbers of all neighboured/bonded atoms""" 139 self.ring_root_indexes = np.array([]) 140 """array with indexes of all atoms rooting a ring""" 141 142 143 # housekeeping for get_distance_matrix (will be called later) 144 # (the following things are generated when calling get_distance_matrix) 145 self.distance_matrix = np.array([]) 146 """distance matrix of all atoms in the molecule""" 147 self.bond_lists = np.array([]) 148 """nested array with bond lists 149 - starting from atoms separeted by one bond bond_lists[0] 150 - then atoms separeted by two bonds bond_lists[1] 151 - ... 152 """ 153 self.idx_bond_list = np.array([]) 154 """array with indexes of all neighboured/bonded atoms""" 155 self.bond_list = np.array([]) 156 """array with numbers of all neighboured/bonded atoms""" 157 self.bond_keys = np.array([]) 158 """array with keys of all neighboured/bonded atoms""" 159 self.angle_list = np.array([]) 160 """array of numbers of all angle forming atoms (separated by 2 bonds)""" 161 self.angle_names = np.array([]) 162 """array of names of all angle forming atoms (separated by 2 bonds)""" 163 self.angle_keys = np.array([]) 164 """array of keys of all angle forming atoms (separated by 2 bonds)""" 165 self.torsion_list = np.array([]) 166 """array of numbers of all dihedral forming atoms (separated by 3 bonds)""" 167 self.torsion_names = np.array([]) 168 """array of names of all dihedral forming atoms (separated by 3 bonds)""" 169 self.torsion_keys = np.array([]) 170 """array of keys of all dihedral forming atoms (separated by 3 bonds)""" 171 172 # get bond list 173 self.get_neighbour_list() 174 175 # get branch ends 176 n, count = np.unique(self.bond_list, return_counts=True) 177 pp = np.squeeze(np.where(count == 1)) 178 if np.sum(pp) > 0: 179 self.branch_end_numbers = n[pp] 180 self.branch_end_indexes = self.atom_indexes[self.branch_end_numbers] 181 else: 182 self.branch_end_numbers = np.empty(0) 183 self.branch_end_indexes = np.empty(0) 184 # get ring closures 185 self.ring_close_indexes = np.squeeze(np.where(self.f < 0)) - 1 186 """array with indexes of all atoms closing a ring""" 187 if np.sum(self.ring_close_indexes.shape) > 0: 188 self.ring_close_numbers = self.n[self.ring_close_indexes] 189 """array with atom numbers of all atoms closing a ring""" 190 self.ring_root_numbers = self.n[self.ring_root_indexes] 191 """array with atom numbers of all atoms rooting a ring""" 192 else: 193 self.ring_close_numbers = np.empty(0) 194 self.ring_root_numbers = np.empty(0) 195 196 # get distance matrix 197 self.get_distance_matrix() 198 199 dummy = unique_sort(self.atom_names, return_inverse=True) 200 ( 201 self.unique_atom_keys, 202 self.unique_atom_indexes, 203 self.unique_atom_inverse, 204 ) = dummy 205 self.unique_atom_names = self.atom_names[self.unique_atom_indexes] 206 """array with unique atom names""" 207 self.unique_atom_numbers = self.atom_numbers[self.unique_atom_indexes] 208 """array with numbers of unique atom names""" 209 assert np.array_equal( 210 self.unique_atom_keys[self.unique_atom_inverse], self.atom_names 211 ), "unique atom in inverse doesnt work" 212 213 dummy = unique_sort(self.bond_keys, return_inverse=True) 214 self.unique_bond_keys = dummy[0] 215 """array with unique bond keys""" 216 self.unique_bond_indexes = dummy[1] 217 """array with unique bond indexes""" 218 self.unique_bond_inverse = dummy[2] 219 """array to invert unique bonds""" 220 221 self.unique_bond_names = self.bond_names[self.unique_bond_indexes] 222 """array with unique bond names""" 223 self.unique_bond_numbers = self.bond_list[self.unique_bond_indexes] 224 """array with numbers of unique bond names""" 225 if len(self.bond_keys) > 0: 226 assert np.array_equal( 227 self.unique_bond_keys[self.unique_bond_inverse], self.bond_keys 228 ), "unique bond in inverse doesnt work" 229 230 dummy = unique_sort(self.angle_keys, return_inverse=True) 231 self.unique_angle_keys = dummy[0] 232 """array with unique angle keys""" 233 self.unique_angle_indexes = dummy[1] 234 """array with unique angle indexes""" 235 self.unique_angle_inverse = dummy[2] 236 """array to invert unique angles""" 237 238 self.unique_angle_names = self.angle_names[self.unique_angle_indexes] 239 """array with unique angle names""" 240 self.unique_angle_numbers = self.angle_list[self.unique_angle_indexes] 241 """array with numbers of unique angle names""" 242 if len(self.angle_keys) > 0: 243 assert np.array_equal( 244 self.unique_angle_keys[self.unique_angle_inverse], self.angle_keys 245 ), "unique angle in inverse doesnt work" 246 247 dummy = unique_sort(self.torsion_keys, return_inverse=True) 248 self.unique_torsion_keys = dummy[0] 249 """array with unique torsion keys""" 250 self.unique_torsion_indexes = dummy[1] 251 """array with unique torsion indexes""" 252 self.unique_torsion_inverse = dummy[2] 253 """array to invert unique torsions""" 254 255 self.unique_torsion_names = self.torsion_names[self.unique_torsion_indexes] 256 """array with unique torsion names""" 257 self.unique_torsion_numbers = self.torsion_list[self.unique_torsion_indexes] 258 """array with numbers of unique torsion names""" 259 if len(self.torsion_keys) > 0: 260 assert np.array_equal( 261 self.unique_torsion_keys[self.unique_torsion_inverse], self.torsion_keys 262 ), "unique angle in inverse doesnt work" 263 264 self.unique_atom_pair_names = [] 265 self.unique_atom_pair_keys = [] 266 for i, a0 in enumerate(self.unique_atom_names): 267 for j, a1 in enumerate(self.unique_atom_names): 268 if i > j: 269 self.unique_atom_pair_names.append(sort_force_fields([a0, a1])) 270 self.unique_atom_pair_keys.append( 271 make_graph(self.unique_atom_pair_names[-1]) 272 ) 273 self.unique_atom_pair_names = np.array(self.unique_atom_pair_names) 274 """array with unique atom pair names. use them for combining rules.""" 275 self.unique_atom_pair_keys = np.array(self.unique_atom_pair_keys) 276 """array with unique atom pair keys. use them for combining rules.""" 277 return
initializes molecules based on graph-list representation.
The term index (or idx) always refers to a character in the string, be it semantic or syntactic. Semantics and syntax of the string are no longer important once the bond list and thus the actual graph is created. Then there are only semantic objects, i.e. atoms. Thus, all numbers occuring in bond, angle or torsional lists or in distance matrices refer to atomic numbers. Indexes and syntactic lists are only important for advanced applications. For example, generating strings with the same meaning. If both options are available they are are marked with index/indexes/idx and number/numbers. For example: ring_root_numbers and ring_root_indexes.
Parameters:
- mol:
- string representation of a molecule.
- splitter:
- function that splits input into syntactic/semantic elements
- returns np.array with all elements
- default: split_molstring from molecule_utils
- you can pass your own function :)
- get_syntax:
- function that generates atom and functional descriptors from syntactic/semantic elements
- returns two np.arrays: -ff: functions array - branches pointing forward f > 0 - rings pointing backward f < 0 - beads without function f = 0 -nn: atoms array - contains atom numbers - functions are -1
- default: get_syntax_from_numbers from molecule_utils
- you can pass your own function :)
- branch_point_to_last:
- boolean, False
- comes into play when branch points outside molecule
- if True branch will point to last atom
- ring_point_to_first:
- boolean, False
- comes into play when ring points outside molecule
- if True ring will point to first atom
Returns: nothing
array with all functions.
- branches pointing forward f > 0
- rings pointing backward f < 0
- beads without function f = 0
nested array with bond lists
- starting from atoms separeted by one bond bond_lists[0]
- then atoms separeted by two bonds bond_lists[1]
- ...
279 def reinit_mol(self,mol): 280 """ 281 reinitializes moleculeclass 282 283 """ 284 285 self.molstring = mol 286 if isinstance(mol, str): 287 mol = mol[1:-1].split("][") 288 289 self.molecule = np.array(mol) 290 """array representation of the molecule i.e. all keys/words/whatever in a list""" 291 self.f = np.zeros(len(mol)) # shows function 292 """array with all functions. 293 - branches pointing forward f > 0 294 - rings pointing backward f < 0 295 - beads without function f = 0 296 """ 297 self.n = -1 * np.ones(len(mol)) # shows atomnumber 298 """array with atom numbers. branches and rings n = -1 as they are no atoms""" 299 self.i = np.arange(len(mol)) # shows index 300 """array with indexes. atoms, branches and rings get an index""" 301 self.len = len(self.i) 302 """number of all elements i.e. atoms, branches and rings""" 303 n = 0 304 for i, m in enumerate(mol): 305 if m[0] == "b": 306 self.f[i] = int(m[1:]) 307 elif m[0] == "r": 308 # dum = -int(m[1:]) 309 self.f[i] = -int(m[1:]) # self.get_atom_index() 310 else: 311 self.n[i] = n 312 n += 1 313 return
reinitializes moleculeclass
316 def get_neighbour_list(self): 317 """ 318 generates a neighbour list from an initialized molecule 319 320 Parameters: None 321 322 Returns: 323 - neighbour_list: 324 - list of neighboured atoms containing their atom numbers. 325 - idx_neighbour_list: 326 - list of neighboured atoms containing their atom indexes. 327 """ 328 329 idx_neighbour_list = [] 330 idx_branch_ends = np.empty(0) 331 idx_branch_starts = np.empty(0) 332 idx_ring_close = np.empty(0) 333 idx_branch_funs = np.empty(0) 334 335 branches = np.zeros(len(self.i)) 336 branch_origins = np.zeros(len(self.i)) 337 branch_count = 1 338 339 if self.atom_number < 2: 340 self.idx_neighbour_list = np.array([]) 341 self.neighbour_list = np.array([]) 342 self.idx_bond_list = np.array([]) 343 self.bond_list = np.array([]) 344 return np.array([]), np.array([]) 345 346 """ 347 find branches and assign numbers. 348 get main path through molecule. 349 build molecule from this... 350 """ 351 inv_f = self.f[::-1] 352 branch_no = 1 353 inv_f_p = np.atleast_1d(np.squeeze(np.where(inv_f != 0))) 354 inv_branches = inv_f.copy() 355 inv_branches[inv_branches != 0] = -1 356 for i, ifi in zip(inv_f_p, inv_f[inv_f_p]): 357 if ifi < 0: # ring 358 inv_branches[i] = -1 359 if ifi > 0: # branch :) 360 pp = np.atleast_1d(np.squeeze(np.where(inv_branches[:i] == 0))) 361 if len(pp) > ifi: 362 p_branch = pp[-int(ifi) :] 363 inv_branches[p_branch] = branch_no 364 inv_branches[i] = branch_no 365 branch_no += 1 366 branches = reenumerate_branches(inv_branches[::-1]) 367 # print(branches) 368 369 main_path_p = np.where(branches == 0) 370 main_path_i = self.i[main_path_p] 371 372 # get bond list of main path 373 bond_list = bond_list_from_simple_path(main_path_i) 374 # print( "branches",branches ) 375 376 """ 377 build bond list 378 """ 379 f_p = np.atleast_1d(np.squeeze(np.where(self.f != 0))) 380 self.ring_root_indexes = [] 381 # print("f_p , self.f[f_p]", f_p, self.f[f_p]) 382 for i, fi in zip(f_p, self.f[f_p]): 383 #print(branches) 384 if fi < 0: 385 idx = np.max( 386 get_diff_in_bond_lists(f_p[f_p < i], np.arange(i)) 387 ) # index of last atom 388 subgraph = graph_from_bonds(bond_list) 389 path_to_main = get_shortest_path(subgraph, 0, idx) 390 391 bond_list_to_main = bond_list_from_simple_path(path_to_main) 392 graph_to_main = graph_from_bonds(bond_list_to_main) 393 root = get_root(graph_to_main, idx, int(np.abs(fi))) 394 branches[i] = branches[root] # connect ring to acompanying branch 395 if root < 0 and self.ring_point_to_first: 396 if fi+root >= self.min_ring_size: 397 root = np.min(self.atom_indexes) 398 bond_list = np.concatenate([bond_list, [[root, idx]]]) 399 self.ring_root_indexes.append(root) 400 print("root of ring outside molecule. point to first atom.") 401 else: 402 print( "ring size smaller:",self.min_ring_size,". ignore") 403 elif root < 0: 404 print("root of ring outside molecule. ignore.") 405 else: 406 bond_list = np.concatenate([bond_list, [[root, idx]]]) 407 self.ring_root_indexes.append(root) 408 elif fi > 0: 409 link = get_branch_root(branches[i], branches) 410 if not self.is_atom(link): 411 link = self.get_last_atom_idx(link) 412 branch = branches[i] 413 p_subchain_i = np.atleast_1d(np.squeeze(np.where(branches == branch)))[ 414 1: 415 ] 416 if len(p_subchain_i) < fi and self.branch_point_to_last: 417 link = np.max(self.atom_indexes) 418 p_subchain_i = np.concatenate([[link], p_subchain_i]) 419 subchain_bond_list = bond_list_from_simple_path(p_subchain_i) 420 bond_list = np.concatenate([bond_list, subchain_bond_list]) 421 print("end of branch outside molecule. point to last atom.") 422 elif len(p_subchain_i) < fi: 423 print("end of branch outside molecule. ignore.") 424 else: 425 p_subchain_i = np.concatenate([[link], p_subchain_i]) 426 subchain_bond_list = bond_list_from_simple_path(p_subchain_i) 427 bond_list = np.concatenate([bond_list, subchain_bond_list]) 428 else: 429 print("fi = 0... this should not happen :P") 430 #print("fin branches", branches) 431 self.idx_neighbour_list = bond_list 432 self.idx_bond_list = bond_list 433 self.ring_root_indexes = np.array(self.ring_root_indexes) 434 435 self.neighbour_list = np.vectorize(self.get_atom_no)(self.idx_neighbour_list) 436 self.bond_list = self.neighbour_list 437 return self.neighbour_list, self.idx_neighbour_list
generates a neighbour list from an initialized molecule
Parameters: None
Returns:
- neighbour_list:
- list of neighboured atoms containing their atom numbers.
- idx_neighbour_list:
- list of neighboured atoms containing their atom indexes.
439 def get_bond_matrix(self): 440 """ 441 generates bond matrix from an initialized molecule 442 443 Parameters: None 444 445 Returns: 446 - bond_matrix: 447 - bond matrix of you molecule 448 """ 449 self.bond_matrix = get_bond_matrix(self.neighbour_list, self.atom_number) 450 return self.bond_matrix.copy()
generates bond matrix from an initialized molecule
Parameters: None
Returns:
- bond_matrix:
- bond matrix of you molecule
452 def get_distance_matrix(self): 453 """ 454 generates distance matrix from an initialized molecule. 455 Furthermore different angle and torsion lists are generated. 456 457 Parameters: None 458 459 Returns: 460 - distance_matrix: 461 - distance matrix of you molecule 462 """ 463 self.distance_matrix, self.bond_lists = get_distance_matrix( 464 self.neighbour_list, self.atom_number 465 ) 466 self.bond_list = self.bond_lists[0] 467 self.bond_names = np.array( 468 [sort_force_fields(self.atom_names[a]) for a in self.bond_list] 469 ) 470 self.bond_keys = np.array([make_graph(t) for t in self.bond_names]) 471 if len(self.bond_lists) > 1: 472 self.angle_list = np.array(self.bond_lists[1]) 473 self.angle_names = np.array( 474 [sort_force_fields(self.atom_names[a]) for a in self.angle_list] 475 ) 476 self.angle_keys = np.array([make_graph(t) for t in self.angle_names]) 477 else: 478 self.angle_list = np.array([]) 479 self.angle_names = np.array([]) 480 self.angle_keys = np.array([]) 481 if len(self.bond_lists) > 2: 482 self.torsion_list = np.array(self.bond_lists[2]) 483 self.torsion_names = np.array( 484 [sort_force_fields(self.atom_names[a]) for a in self.torsion_list] 485 ) 486 self.torsion_keys = np.array([make_graph(t) for t in self.torsion_names]) 487 else: 488 self.torsion_list = np.array([]) 489 self.torsion_names = np.array([]) 490 self.torsion_keys = np.array([]) 491 return self.distance_matrix.copy()
generates distance matrix from an initialized molecule. Furthermore different angle and torsion lists are generated.
Parameters: None
Returns:
- distance_matrix:
- distance matrix of you molecule
493 def is_atom(self,idx): 494 """ 495 checks if index is an atom. 496 - Number: atom number 497 - Index: index in list representation of molecule. 498 Number and index are differing due to functionals in the list. 499 500 Parameters: 501 - idx: 502 - idx of the atom 503 504 Returns: 505 - boolean 506 """ 507 if idx in self.atom_indexes: 508 return True 509 else: 510 return False
checks if index is an atom.
- Number: atom number
- Index: index in list representation of molecule. Number and index are differing due to functionals in the list.
Parameters:
- idx:
- idx of the atom
Returns:
- boolean
512 def get_atom_index(self, no): 513 """ 514 returns index of atom. 515 - Number: atom number 516 - Index: index in list representation of molecule. 517 Number and index are differing due to functionals in the list. 518 519 Parameters: 520 - no: 521 - number of the atom 522 523 Returns: 524 - index of atom number "no" 525 """ 526 return int(self.atom_indexes[int(no)])
returns index of atom.
- Number: atom number
- Index: index in list representation of molecule. Number and index are differing due to functionals in the list.
Parameters:
- no:
- number of the atom
Returns:
- index of atom number "no"
528 def get_atom_no(self, idx): 529 """ 530 returns index of atom. 531 - Number: atom number 532 - Index: index in list representation of molecule. 533 Number and index are differing due to functionals in the list. 534 535 Parameters: 536 - idx: 537 - idx of the atom 538 539 Returns: 540 - number of atom with index 541 """ 542 idx = int(idx) 543 if self.f[idx] >= 0: 544 no = int(np.squeeze(np.where(self.atom_indexes == idx))) 545 assert no == self.n[idx], "atom numbers dont work,idx" 546 return no 547 else: 548 assert self.n[idx] == -1, "atom numbers dont work, n=-1" 549 print("Error in get_atom_no: not an atom",idx) 550 return False
returns index of atom.
- Number: atom number
- Index: index in list representation of molecule. Number and index are differing due to functionals in the list.
Parameters:
- idx:
- idx of the atom
Returns:
- number of atom with index
552 def get_next_atom_idx(self, idx): 553 """ 554 returns index of the next atom after index idx. 555 Index: index in list representation of molecule. 556 Number and index are differing due to functionals in the list. 557 558 Parameters: 559 idx: 560 - index in list representation of molecule 561 562 Returns: 563 - index of the next atom after index idx 564 """ 565 return int(np.min(self.atom_indexes[(self.atom_indexes - idx) > 0]))
returns index of the next atom after index idx. Index: index in list representation of molecule. Number and index are differing due to functionals in the list.
Parameters: idx: - index in list representation of molecule
Returns:
- index of the next atom after index idx
567 def get_last_atom_idx(self, idx): 568 """ 569 returns index of the atom before index idx. 570 Index: index in list representation of molecule. 571 Number and index are differing due to functionals in the list. 572 573 Parameters: 574 idx: 575 - index in list representation of molecule 576 577 Returns: 578 - index of the atom before index idx 579 """ 580 return int(np.max(self.atom_indexes[(self.atom_indexes - idx) < 0]))
returns index of the atom before index idx. Index: index in list representation of molecule. Number and index are differing due to functionals in the list.
Parameters: idx: - index in list representation of molecule
Returns:
- index of the atom before index idx
582 def get_next_atom_no(self, idx): 583 """ 584 returns number of the atom after index idx. 585 Index: index in list representation of molecule. 586 Number: atom number 587 Number and index are differing due to functionals in the list. 588 589 Parameters: 590 - idx: 591 - index in list representation of molecule 592 593 Returns: 594 - number of the atom after index idx 595 """ 596 return int(self.n[self.get_next_atom_idx(idx)])
returns number of the atom after index idx. Index: index in list representation of molecule. Number: atom number Number and index are differing due to functionals in the list.
Parameters:
- idx:
- index in list representation of molecule
Returns:
- number of the atom after index idx
598 def get_last_atom_no(self, idx): 599 """ 600 returns number of the atom before index idx. 601 Index: index in list representation of molecule. 602 Number: atom number 603 Number and index are differing due to functionals in the list. 604 605 Parameters: 606 self: 607 - initialized molecule. 608 idx: 609 - index in list representation of molecule 610 611 Returns: 612 number of the atom before index idx 613 """ 614 return int(self.n[self.get_last_atom_idx(idx)])
returns number of the atom before index idx. Index: index in list representation of molecule. Number: atom number Number and index are differing due to functionals in the list.
Parameters: self: - initialized molecule. idx: - index in list representation of molecule
Returns: number of the atom before index idx
616 def map_molecule_via_atom_names(self, data): 617 """ 618 maps information from a dictionary onto the molecule. 619 Use this for molecule information i.e. coordinates, group contributions,... 620 621 Parameters: 622 - data: 623 - dictionary with: keys == atom names !!! 624 625 Returns: 626 - mapped data 627 """ 628 return [*map(data.get, self.atom_names)]
maps information from a dictionary onto the molecule. Use this for molecule information i.e. coordinates, group contributions,...
Parameters:
- data:
- dictionary with: keys == atom names !!!
Returns:
- mapped data
630 def map_unique_via_atom_names(self, data): 631 """ 632 Maps information from a dictionary onto unique elements of the molecule. 633 Preserves the order of the original list. 634 Use this for force-field information i.e. potentials,... 635 636 Parameters: 637 - data: 638 - dictionary with: keys == atom names !!! 639 640 Returns: 641 - mapped unique data 642 """ 643 print( 644 "map_unique_via_atom_names is outtdated... dont use\n shouldnt cause errors btw" 645 ) 646 return [*map(data.get, self.unique_atom_names)]
Maps information from a dictionary onto unique elements of the molecule. Preserves the order of the original list. Use this for force-field information i.e. potentials,...
Parameters:
- data:
- dictionary with: keys == atom names !!!
Returns:
- mapped unique data
648 def map_molecule(self, goal, data): 649 """ 650 maps information from a dictionary onto a goal. 651 Use this for molecule information i.e. coordinates, group contributions,... 652 653 654 Parameters: 655 - goal: 656 - list of keys 657 - data: 658 - dictionary with keys in goal 659 660 Returns: 661 - mapped data 662 """ 663 return [*map(data.get, goal)]
maps information from a dictionary onto a goal. Use this for molecule information i.e. coordinates, group contributions,...
Parameters:
- goal:
- list of keys
- data:
- dictionary with keys in goal
Returns:
- mapped data
665 def map_unique(self, goal, data): 666 """ 667 Maps information from a dictionary onto unique elements a goal. 668 Preserves the order of the original list. 669 Use this for force-field information i.e. potentials,... 670 671 Parameters: 672 - goal: 673 - list of keys 674 - data: 675 - dictionary with keys in goal 676 677 Returns: 678 - mapped unique data 679 """ 680 print("map_unique is outtdated... dont use\n shouldnt cause errors btw") 681 return [*map(data.get, unique_sort(goal))]
Maps information from a dictionary onto unique elements a goal. Preserves the order of the original list. Use this for force-field information i.e. potentials,...
Parameters:
- goal:
- list of keys
- data:
- dictionary with keys in goal
Returns:
- mapped unique data
683 def map_nested(self, goal, data): 684 """ 685 Maps information from a dictionary onto nested elements of a goal. 686 Preserves the order of the original list. 687 Use this for advanced force-field information i.e. mixing rules,... 688 689 same as: 690 [ self.map_unique( m, data) for m in goal ] 691 692 Parameters: 693 - goal: 694 -with list list of keys 695 - data: 696 -dictionary with keys in goal 697 698 Returns: 699 - mapped unique data 700 """ 701 return [[*map(data.get, m)] for m in goal]
Maps information from a dictionary onto nested elements of a goal. Preserves the order of the original list. Use this for advanced force-field information i.e. mixing rules,...
same as: [ self.map_unique( m, data) for m in goal ]
Parameters:
- goal: -with list list of keys
- data: -dictionary with keys in goal
Returns:
- mapped unique data
703 def get_distance(self, x, y): 704 """ 705 Returns distance between atoms no x and y 706 0: same atom i.e. x==y 707 1: bond 708 2: angle 709 3: torsion 710 711 Parameters: 712 - x,y: 713 - atom numbers 714 715 Returns: 716 - distance 717 """ 718 return self.distance_matrix[int(x)][int(y)]
Returns distance between atoms no x and y 0: same atom i.e. x==y 1: bond 2: angle 3: torsion
Parameters:
- x,y:
- atom numbers
Returns:
- distance
720 def get_sorted_key(self, x): 721 """ 722 Sorts a list alphabetically. 723 Very important for dictionary keys in the molecule class!!! 724 725 Parameters: 726 - x: 727 - string list to sort 728 729 Returns: 730 - key (string) and sorted list 731 """ 732 x_sort = sort_force_fields(x) 733 return make_graph(x_sort), x_sort
Sorts a list alphabetically. Very important for dictionary keys in the molecule class!!!
Parameters:
- x:
- string list to sort
Returns:
- key (string) and sorted list
735 def visualize(self, saveto=""): 736 """ 737 plots and visualizes molecule 738 739 Parameters: 740 - saveto: 741 - optional, location to save pic of graph to, 742 - add file format at the end ;) 743 744 Returns: 745 nothing 746 """ 747 if self.atom_numbers.size > 1: 748 plot_graph(self.atom_names, self.bond_list, saveto=saveto) 749 else: 750 print("one bead only") 751 return
plots and visualizes molecule
Parameters:
- saveto:
- optional, location to save pic of graph to,
- add file format at the end ;)
Returns: nothing