moleculegraph.general_utils

  1import numpy as np
  2import glob
  3import csv
  4from rapidfuzz.distance.Levenshtein import normalized_distance
  5import json
  6from .molecule_utils import sort_force_fields, make_graph
  7
  8"""
  9loose collection of functions in connection with the molecule class.
 10Mainly heading towards generate stuff for mapping for our inhouse projects.
 11Might be useless for external users.
 12
 13subjects:
 14- read
 15- write
 16- angles, distances...
 17- forces
 18"""
 19
 20def unite_atoms( elements, atoms, masses, bond_list, UA=True ):
 21    """
 22    generates united atom coordinates from a list of atoms.
 23
 24    TO DO:
 25    - the atoms and masses objects are not obvious...
 26
 27    Parameters:
 28    - elements:
 29        - list of atoms elements.
 30    - atoms:
 31        - list of atoms coordinates.
 32    - masses:
 33        - masses of the atoms.
 34    - bond_matrix:
 35        - bond matrix of the corresponding molecule
 36        - ...distance matrix works too
 37        
 38    Returns:
 39    - new_matrix:
 40        - list of united atoms with coordinates.
 41    - new_masses:
 42        - list of corresponding united atom masses.
 43    - new_bonds:
 44        - list of corresponding united atom bonds.
 45        - you can generate a new bond or distance matrix with this list.
 46    """
 47    keep = []
 48    keep_bonds = []
 49    for i, p in enumerate(bond_list):
 50        p0 = int(np.min(p))
 51        p1 = int(np.max(p))
 52        name0 = elements[p0][0]
 53        name1 = elements[p1][0]
 54        if (name0 == "H" and name1 == "C" and UA) or (
 55            name1 == "H" and name0 == "C" and UA
 56        ):
 57            if name0 == "H" and name1 == "C":
 58                p0 = int(np.max(p))
 59                p1 = int(np.min(p))
 60                name0 = elements[p0][0]
 61                name1 = elements[p1][0]
 62            coos_a = np.array(atoms[p0]).astype(float)
 63            coos_b = np.array(atoms[p1]).astype(float)
 64            mass_a = masses[p0]
 65            mass_b = masses[p1]
 66            dummy = (coos_a * mass_a + coos_b * mass_b) / (mass_a + mass_b)
 67            atoms[p0][0] = dummy[0]
 68            atoms[p0][1] = dummy[1]
 69            atoms[p0][2] = dummy[2]
 70            masses[p0] = mass_a + mass_b
 71            masses[p1] = -1
 72            keep.append(p0)
 73        else:
 74            keep.append(p0)
 75            keep.append(p1)
 76            keep_bonds.append(i)
 77
 78    keep = np.array(np.sort(np.unique(keep))).astype(int)
 79
 80    new_atoms = atoms[keep]
 81    new_bonds = bond_list[keep_bonds]
 82    new_elements = elements[keep]
 83
 84    p1 = np.where(masses != -1)
 85    new_masses = masses[p1]
 86    return new_elements, new_atoms, new_masses, new_bonds
 87
 88
 89def assign_coos_via_distance_mat(coos_list, distance_matrix, reference):
 90    """
 91    NOT MATURE !!! (but sometimes useful)
 92    
 93    Assigns coos to suit a reference based on a distance matrix relying to the coos.
 94    Reference and distance matrix/ coos belong to the same molecule type but are sorted in
 95    different ways. Sorted rows (or cols) of a distance matrix belong to the same atom when
 96    they are equal.
 97
 98    TO DO:
 99    - add check based on levensthein to omit errors through branches!!!
100    - gernalizable for more than only coos???
101    - bad names :(
102
103    Parameters:
104    - coos_list:
105        - list of coordinates (or anything else???!!!).
106    - distance_matrix:
107        - distance matrix which belongs to the coos_list.
108    - reference:
109        - distance matrix which belongs to the reference you want to apply the coos to.
110
111    Returns:
112    - new_coos_list:
113        - list of coordinates fitting the reference.
114    - idx:
115        - indexes to translate sth. to reference.
116    """
117    distance_matrix_sort = np.sort(distance_matrix, axis=1)
118    reference_sort = np.sort(reference, axis=1)
119    idx = []
120    for ref in reference_sort:
121        for i, row in enumerate(distance_matrix_sort):
122            if np.array_equal(ref, row) and i not in idx:
123                idx.append(i)
124                break
125    idx = np.array(idx).astype(int)
126
127    print(
128        """\n\nWARNING:
129    Assign_coos_via_distance_mat is not mature yet.
130    In branched molecules errors are conceivable because atom elements are not checked.\n
131    In structurally symmetric molecules with non-symmetric atome types errors might occur \n
132    Double-check your results!!! \n \n"""
133    )
134
135    return coos_list[idx], idx
136
137def pair_from_row(row):
138    """
139    Gets pair potential from Joachims MC Code pair_potentials file.
140    Charges are added later ;)
141
142    Parameters:
143    - row:
144        - splitted line i.e. list of line in pair_potentials file.
145    
146    Returns:
147    - pair potential dictionary
148    """
149    pair = {}
150    pair["name"] = str(row[0])
151    pair["mass"] = float(row[1])
152    pair["epsilon"] = float(row[2])
153    pair["sigma"] = float(row[3])
154    pair["m"] = float(row[4])
155    pair["cut"] = float(row[5])
156    pair["charge"] = 0.0
157    return pair
158
159
160def pair_of_h():
161    """
162    Builds pair potential dummy for hydrogen atoms.
163    Charges are added later ;)
164
165    Parameters:
166        none
167    
168    Returns:
169    - hydrogen pair potential dictionary
170    """
171    pair = {}
172    pair["name"] = "H"
173    pair["mass"] = 1.0
174    pair["epsilon"] = 0.0
175    pair["sigma"] = 1.0
176    pair["m"] = 12.0
177    pair["cut"] = 0.0
178    pair["charge"] = 0.0
179    return pair
180
181
182def get_charge(row, pair_dict):
183    """
184    Assigns charge to pair potential dict.
185    Uses the normalized_levenshtein algorithm :)
186
187    Parameters:
188    - row:
189        - splitted charge line i.e. list of line in pair_potentials file.
190    - pair_dict:
191        - dictionary containing pair potentials
192    
193    Returns:
194    - key:
195        - dict key i.e. atom name the charge belongs to.
196    - charge:
197        - float value of the charge
198    """
199    keys = list(pair_dict.keys())
200    dummy = np.zeros(len(keys))
201    for i, key in enumerate(keys):
202        dummy[i] = normalized_distance(row[0][1:], key)
203    pointer = np.squeeze(np.where(dummy == np.amin(dummy)))
204    charge = float(row[2])
205    if pointer.shape:
206        print("ERROR: several atoms found for one charge:")
207        print("charge:", charge)
208        print("atoms:",keys[pointer])
209        return None, charge
210    else:
211        return keys[pointer], charge
212
213
214def read_pair_potentials(path):
215    """
216    Reads pair potentials from Joachims MC Code pair_potentials file.
217
218    Parameters:
219    - path:
220        - path to pair_potentials file.
221            
222    Returns:
223    - pair_dict:
224        - dictionary containing pair potentials
225    """
226    keylist = ["!", "#", "models:", "types", "References"]
227    pair_dict = {}
228    flag = -1
229    with open(path) as csvfile:
230        spamreader = csv.reader(csvfile, delimiter=" ")
231        for row in spamreader:
232            row = [x for x in row if x]
233            if row:
234                if "VdW-site" in row:
235                    flag = 1
236                elif "coulomb-site" in row:
237                    flag = 2
238                elif row[0] in keylist:
239                    break
240                elif flag == 1 and row:
241                    pair_dict[row[0]] = pair_from_row(row)
242                elif flag == 2 and row:
243                    # print(row)
244                    if row[0].split("_")[0] == "cH":
245                        pair_dict[row[0]] = pair_of_h()
246                        pair_dict[row[0]]["charge"] = float(row[2])
247                    else:
248                        p, ch = get_charge(row, pair_dict)
249                        # print(p,ch)
250                        pair_dict[p]["charge"] = ch
251    return pair_dict
252
253
254def read_bond_potentials(path):
255    """
256    Reads bond potentials from Joachims MC Code bond_potentials file.
257
258    Parameters:
259    - path:
260        - path to bond_potentials file.
261        
262    Returns:
263    - bond_dict:
264        - dictionary containing bond potentials
265    """
266    keylist = ["!", "#", "models:", "types"]
267    bond_dict = {}
268    flag = 0
269    with open(path) as csvfile:
270        spamreader = csv.reader(csvfile, delimiter=" ")
271        for row in spamreader:
272            row = [x for x in row if x]
273            if row:
274                if row[0] == "model":
275                    flag = 1
276                elif flag == 1 and row[0] in keylist:
277                    break
278                elif flag == 1:
279                    dummy = sort_force_fields(row[1:3])
280                    name = make_graph(dummy)
281                    bond_dict[name] = {}
282                    bond_dict[name]["list"] = row[1:3]
283                    bond_dict[name]["type"] = int(row[0])
284                    # bond_dict[name]["len"] = float(row[3])
285                    # bond_dict[name]["spring"] = float(row[4])
286                    bond_dict[name]["p"] = [float(r) for r in row[3:]]
287
288    return bond_dict
289
290
291def read_angle_potentials(path):
292    """
293    Reads angle potentials from Joachims MC Code angle_potentials file.
294
295    Parameters:
296    - path:
297        - path to angle_potentials file.
298        
299    Returns:
300    - angle_dict:
301        - dictionary containing angle potentials
302    """
303    keylist = ["!", "#", "models:", "types", "Note", "Note:"]
304    angle_dict = {}
305    flag = 0
306    with open(path) as csvfile:
307        spamreader = csv.reader(csvfile, delimiter=" ")
308        for row in spamreader:
309            row = [x for x in row if x]
310            if row:
311                if row[0] == "model":
312                    flag = 1
313                elif flag == 1 and row[0] in keylist:
314                    break
315                elif flag == 1:
316                    dummy = sort_force_fields(row[1:4])
317                    name = make_graph(dummy)
318                    angle_dict[name] = {}
319                    angle_dict[name]["list"] = row[1:4]
320                    angle_dict[name]["type"] = int(row[0])
321                    # angle_dict[name]["angle"] = float(row[4])
322                    # angle_dict[name]["p"] = float(row[5])
323                    angle_dict[name]["p"] = [float(r) for r in row[4:]]
324
325    return angle_dict
326
327
328def read_torsion_potentials(path):
329    """
330    Reads torsion potentials from Joachims MC Code torsion_potentials file.
331
332    Parameters:
333    - path:
334        - path to torsion_potentials file.
335        
336    Returns:
337    - torsion_dict:
338        - dictionary containing torsion potentials
339    """
340    keylist = ["!", "#", "models:", "types", "Note:"]
341    torsion_dict = {}
342    flag = 0
343    with open(path) as csvfile:
344        spamreader = csv.reader(csvfile, delimiter=" ")
345        for row in spamreader:
346            row = [x for x in row if x]
347            if row:
348                if row[0] == "model":
349                    flag = 1
350                elif flag == 1 and row[0] in keylist:
351                    break
352                elif flag == 1:
353                    dummy = sort_force_fields(row[1:5])
354                    name = make_graph(dummy)
355                    torsion_dict[name] = {}
356                    torsion_dict[name]["list"] = row[1:5]
357                    torsion_dict[name]["type"] = int(row[0])
358                    torsion_dict[name]["p"] = [float(r) for r in row[5:]]
359
360    return torsion_dict
361
362
363def read_xyz(path, energy=False):
364    """
365    Reads xyz file. Supports tubomole xyz with energy in header
366
367    Parameters:
368    - path:
369        - path to xyz file.
370    - energy:
371        - returns energy if true.
372        - use this with turbomol QM-results.
373    
374    Returns:
375    - xyz:
376        - list of dicts, keys: "atom": atom name and "xyz": coordinates.
377    - energy:
378        - (optional) if energy = True (bad style ?)
379    """
380    data = []
381    with open(path) as csvfile:
382        spamreader = csv.reader(csvfile, delimiter=" ")
383        for row in spamreader:
384            data.append([r for r in row if r])
385    n = int(data[0][0])
386    xyz = {}
387    for i, d in enumerate(data[2 : n + 2]):
388        xyz[i] = {}
389        xyz[i]["atom"] = d[0]
390        xyz[i]["xyz"] = np.array([float(x) for x in d[1:4]])
391
392    if energy:
393        energy = float(data[1][2])
394        return xyz, energy
395
396    return xyz
397
398
399def assign_CHx(xyz):
400    """
401    Builds united atom groups fromm all atom xyz.
402    Works with turbomol results and read_xyz def.
403
404    Parameters:
405    - xyz:
406        - list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
407        
408    Returns:
409    - xyz_CHx:
410        - list of united atom dicts, keys: "atom": atom name and "xyz": coordinates.
411    """
412    xyz_CHx = {}
413    xkeys = sorted(xyz.keys())
414    ii = 0
415    x = np.min(xkeys)
416    flag = 0
417    while x <= np.max(xkeys):
418        if flag == 0 and xyz[x]["atom"] == "C":
419            coos = np.array(xyz[x]["xyz"])
420            flag = 1
421            x += 1
422        elif flag > 0:
423            if xyz[x]["atom"] == "H":
424                coos = np.column_stack((coos, xyz[x]["xyz"]))
425                x += 1
426                flag += 1
427            else:
428                no = flag - 1
429                xyz_CHx[ii] = {}
430                xyz_CHx[ii]["atom"] = "CH" + str(no)
431                xyz_CHx[ii]["xyz"] = np.sum(
432                    coos * np.array([15] + [1] * no), axis=1
433                ) / (15 + 1 * no)
434                coos = []
435                flag = 0
436                ii += 1
437        else:
438            xyz_CHx[ii] = xyz[x]
439            x += 1
440            ii += 1
441    print(
442        """\n\nWARNING:
443    Very basic tool. Only works with well-sorted coordinates. Your better off doing it by hand. At least check your results!!! \n \n"""
444    )
445    return xyz_CHx
446
447
448def kill_CHx(xyz):
449    """
450    Kills C-bonded hydrogens fromm all atom xyz.
451    Works with turbomol results and read_xyz def.
452
453    Parameters:
454    - xyz:
455        - list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
456    
457    Returns:
458    - xyz_CHx:
459        - list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
460    """
461    xyz_CHx = {}
462    xkeys = sorted(xyz.keys())
463    ii = 0
464    x = np.min(xkeys)
465    flag = 0
466    while x <= np.max(xkeys):
467        if flag == 0 and xyz[x]["atom"] == "C":
468            coos = np.array(xyz[x]["xyz"])
469            flag = 1
470            x += 1
471        elif flag > 0:
472            if xyz[x]["atom"] == "H":
473                x += 1
474                flag += 1
475            else:
476                no = flag - 1
477                xyz_CHx[ii] = {}
478                xyz_CHx[ii]["atom"] = "C"
479                xyz_CHx[ii]["xyz"] = coos
480                coos = []
481                flag = 0
482                ii += 1
483        else:
484            xyz_CHx[ii] = xyz[x]
485            x += 1
486            ii += 1
487    print(
488        """\n\nWARNING:
489    Very basic tool. Only works with well-sorted coordinates. Your better off doing it by hand. At least check your results!!! \n \n"""
490    )
491    return xyz_CHx
492
493
494def to_xyz(xyz, path):
495    """
496    Writes xyz file.
497    Works with turbomol results and read_xyz def.
498
499    Parameters:
500    - xyz:
501        - list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
502    - path:
503        - path to xyz file.
504        
505    Returns:
506        nothing
507    """
508    f = open(path, "w")
509    f.write(str(len(xyz)) + "\n")
510    f.write("\n")
511    for x in xyz:
512        line = "    ".join([xyz[x]["atom"][0]] + [str(y) for y in xyz[x]["xyz"]])
513        f.write(line + "\n")
514    f.close()
515    return
516
517
518def distance(x1, x2):
519    """
520    Returns spatial distance.
521
522    Parameters:
523    - x1, x2:
524        - vectors.
525        
526    Returns:
527    - spatial distance between x1 and x2.
528    """
529    return np.linalg.norm(x1 - x2)
530
531
532def unit_vector(vector):
533    """
534    Returns the unit vector of the vector.
535
536    Parameters:
537    - vector:
538        - vector.
539        
540    Returns:
541    - unit vector
542    """
543    return vector / np.linalg.norm(vector)
544
545
546def angle_between(x1, x2, x3):
547    """
548    Returns the angle in radians between vectors x1, x2, x3.
549
550    Parameters:
551    - x1, x2, x3:
552        - vectors.
553            
554    Returns:
555    - rad angle
556    """
557    v1 = unit_vector(x1 - x2)
558    v2 = unit_vector(x3 - x2)
559
560    v1_u = unit_vector(v1)
561    v2_u = unit_vector(v2)
562    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
563
564
565def dihedral(x0, x1, x2, x3):
566    """
567    Returns the dihedral angle in radians between vectors x0, x1, x2, x3.
568    Praxeolitic formula -> 1 sqrt, 1 cross product
569
570    Parameters:
571    - x0, x1, x2, x3:
572        - vectors.
573    
574    Returns:
575    - deg angle
576    """
577    b0 = -1.0 * (x1 - x0)
578    b1 = x2 - x1
579    b2 = x3 - x2
580
581    # normalize b1 so that it does not influence magnitude of vector
582    # rejections that come next
583    # print(b1)
584    # print(np.linalg.norm(b1))
585    if np.linalg.norm(b1) > 0:
586        b1 /= np.linalg.norm(b1)
587
588    # vector rejections
589    # v = projection of b0 onto plane perpendicular to b1
590    #   = b0 minus component that aligns with b1
591    # w = projection of b2 onto plane perpendicular to b1
592    #   = b2 minus component that aligns with b1
593    v = b0 - np.dot(b0, b1) * b1
594    w = b2 - np.dot(b2, b1) * b1
595
596    # angle between v and w in a plane is the torsion angle
597    # v and w may not be normalized but that's fine since tan is y/x
598    x = np.dot(v, w)
599    y = np.dot(np.cross(b1, v), w)
600    return np.arctan2(y, x)
def unite_atoms(elements, atoms, masses, bond_list, UA=True):
21def unite_atoms( elements, atoms, masses, bond_list, UA=True ):
22    """
23    generates united atom coordinates from a list of atoms.
24
25    TO DO:
26    - the atoms and masses objects are not obvious...
27
28    Parameters:
29    - elements:
30        - list of atoms elements.
31    - atoms:
32        - list of atoms coordinates.
33    - masses:
34        - masses of the atoms.
35    - bond_matrix:
36        - bond matrix of the corresponding molecule
37        - ...distance matrix works too
38        
39    Returns:
40    - new_matrix:
41        - list of united atoms with coordinates.
42    - new_masses:
43        - list of corresponding united atom masses.
44    - new_bonds:
45        - list of corresponding united atom bonds.
46        - you can generate a new bond or distance matrix with this list.
47    """
48    keep = []
49    keep_bonds = []
50    for i, p in enumerate(bond_list):
51        p0 = int(np.min(p))
52        p1 = int(np.max(p))
53        name0 = elements[p0][0]
54        name1 = elements[p1][0]
55        if (name0 == "H" and name1 == "C" and UA) or (
56            name1 == "H" and name0 == "C" and UA
57        ):
58            if name0 == "H" and name1 == "C":
59                p0 = int(np.max(p))
60                p1 = int(np.min(p))
61                name0 = elements[p0][0]
62                name1 = elements[p1][0]
63            coos_a = np.array(atoms[p0]).astype(float)
64            coos_b = np.array(atoms[p1]).astype(float)
65            mass_a = masses[p0]
66            mass_b = masses[p1]
67            dummy = (coos_a * mass_a + coos_b * mass_b) / (mass_a + mass_b)
68            atoms[p0][0] = dummy[0]
69            atoms[p0][1] = dummy[1]
70            atoms[p0][2] = dummy[2]
71            masses[p0] = mass_a + mass_b
72            masses[p1] = -1
73            keep.append(p0)
74        else:
75            keep.append(p0)
76            keep.append(p1)
77            keep_bonds.append(i)
78
79    keep = np.array(np.sort(np.unique(keep))).astype(int)
80
81    new_atoms = atoms[keep]
82    new_bonds = bond_list[keep_bonds]
83    new_elements = elements[keep]
84
85    p1 = np.where(masses != -1)
86    new_masses = masses[p1]
87    return new_elements, new_atoms, new_masses, new_bonds

generates united atom coordinates from a list of atoms.

TO DO:

  • the atoms and masses objects are not obvious...

Parameters:

  • elements:
    • list of atoms elements.
  • atoms:
    • list of atoms coordinates.
  • masses:
    • masses of the atoms.
  • bond_matrix:
    • bond matrix of the corresponding molecule
    • ...distance matrix works too

Returns:

  • new_matrix:
    • list of united atoms with coordinates.
  • new_masses:
    • list of corresponding united atom masses.
  • new_bonds:
    • list of corresponding united atom bonds.
    • you can generate a new bond or distance matrix with this list.
def assign_coos_via_distance_mat(coos_list, distance_matrix, reference):
 90def assign_coos_via_distance_mat(coos_list, distance_matrix, reference):
 91    """
 92    NOT MATURE !!! (but sometimes useful)
 93    
 94    Assigns coos to suit a reference based on a distance matrix relying to the coos.
 95    Reference and distance matrix/ coos belong to the same molecule type but are sorted in
 96    different ways. Sorted rows (or cols) of a distance matrix belong to the same atom when
 97    they are equal.
 98
 99    TO DO:
100    - add check based on levensthein to omit errors through branches!!!
101    - gernalizable for more than only coos???
102    - bad names :(
103
104    Parameters:
105    - coos_list:
106        - list of coordinates (or anything else???!!!).
107    - distance_matrix:
108        - distance matrix which belongs to the coos_list.
109    - reference:
110        - distance matrix which belongs to the reference you want to apply the coos to.
111
112    Returns:
113    - new_coos_list:
114        - list of coordinates fitting the reference.
115    - idx:
116        - indexes to translate sth. to reference.
117    """
118    distance_matrix_sort = np.sort(distance_matrix, axis=1)
119    reference_sort = np.sort(reference, axis=1)
120    idx = []
121    for ref in reference_sort:
122        for i, row in enumerate(distance_matrix_sort):
123            if np.array_equal(ref, row) and i not in idx:
124                idx.append(i)
125                break
126    idx = np.array(idx).astype(int)
127
128    print(
129        """\n\nWARNING:
130    Assign_coos_via_distance_mat is not mature yet.
131    In branched molecules errors are conceivable because atom elements are not checked.\n
132    In structurally symmetric molecules with non-symmetric atome types errors might occur \n
133    Double-check your results!!! \n \n"""
134    )
135
136    return coos_list[idx], idx

NOT MATURE !!! (but sometimes useful)

Assigns coos to suit a reference based on a distance matrix relying to the coos. Reference and distance matrix/ coos belong to the same molecule type but are sorted in different ways. Sorted rows (or cols) of a distance matrix belong to the same atom when they are equal.

TO DO:

  • add check based on levensthein to omit errors through branches!!!
  • gernalizable for more than only coos???
  • bad names :(

Parameters:

  • coos_list:
    • list of coordinates (or anything else???!!!).
  • distance_matrix:
    • distance matrix which belongs to the coos_list.
  • reference:
    • distance matrix which belongs to the reference you want to apply the coos to.

Returns:

  • new_coos_list:
    • list of coordinates fitting the reference.
  • idx:
    • indexes to translate sth. to reference.
def pair_from_row(row):
138def pair_from_row(row):
139    """
140    Gets pair potential from Joachims MC Code pair_potentials file.
141    Charges are added later ;)
142
143    Parameters:
144    - row:
145        - splitted line i.e. list of line in pair_potentials file.
146    
147    Returns:
148    - pair potential dictionary
149    """
150    pair = {}
151    pair["name"] = str(row[0])
152    pair["mass"] = float(row[1])
153    pair["epsilon"] = float(row[2])
154    pair["sigma"] = float(row[3])
155    pair["m"] = float(row[4])
156    pair["cut"] = float(row[5])
157    pair["charge"] = 0.0
158    return pair

Gets pair potential from Joachims MC Code pair_potentials file. Charges are added later ;)

Parameters:

  • row:
    • splitted line i.e. list of line in pair_potentials file.

Returns:

  • pair potential dictionary
def pair_of_h():
161def pair_of_h():
162    """
163    Builds pair potential dummy for hydrogen atoms.
164    Charges are added later ;)
165
166    Parameters:
167        none
168    
169    Returns:
170    - hydrogen pair potential dictionary
171    """
172    pair = {}
173    pair["name"] = "H"
174    pair["mass"] = 1.0
175    pair["epsilon"] = 0.0
176    pair["sigma"] = 1.0
177    pair["m"] = 12.0
178    pair["cut"] = 0.0
179    pair["charge"] = 0.0
180    return pair

Builds pair potential dummy for hydrogen atoms. Charges are added later ;)

Parameters: none

Returns:

  • hydrogen pair potential dictionary
def get_charge(row, pair_dict):
183def get_charge(row, pair_dict):
184    """
185    Assigns charge to pair potential dict.
186    Uses the normalized_levenshtein algorithm :)
187
188    Parameters:
189    - row:
190        - splitted charge line i.e. list of line in pair_potentials file.
191    - pair_dict:
192        - dictionary containing pair potentials
193    
194    Returns:
195    - key:
196        - dict key i.e. atom name the charge belongs to.
197    - charge:
198        - float value of the charge
199    """
200    keys = list(pair_dict.keys())
201    dummy = np.zeros(len(keys))
202    for i, key in enumerate(keys):
203        dummy[i] = normalized_distance(row[0][1:], key)
204    pointer = np.squeeze(np.where(dummy == np.amin(dummy)))
205    charge = float(row[2])
206    if pointer.shape:
207        print("ERROR: several atoms found for one charge:")
208        print("charge:", charge)
209        print("atoms:",keys[pointer])
210        return None, charge
211    else:
212        return keys[pointer], charge

Assigns charge to pair potential dict. Uses the normalized_levenshtein algorithm :)

Parameters:

  • row:
    • splitted charge line i.e. list of line in pair_potentials file.
  • pair_dict:
    • dictionary containing pair potentials

Returns:

  • key:
    • dict key i.e. atom name the charge belongs to.
  • charge:
    • float value of the charge
def read_pair_potentials(path):
215def read_pair_potentials(path):
216    """
217    Reads pair potentials from Joachims MC Code pair_potentials file.
218
219    Parameters:
220    - path:
221        - path to pair_potentials file.
222            
223    Returns:
224    - pair_dict:
225        - dictionary containing pair potentials
226    """
227    keylist = ["!", "#", "models:", "types", "References"]
228    pair_dict = {}
229    flag = -1
230    with open(path) as csvfile:
231        spamreader = csv.reader(csvfile, delimiter=" ")
232        for row in spamreader:
233            row = [x for x in row if x]
234            if row:
235                if "VdW-site" in row:
236                    flag = 1
237                elif "coulomb-site" in row:
238                    flag = 2
239                elif row[0] in keylist:
240                    break
241                elif flag == 1 and row:
242                    pair_dict[row[0]] = pair_from_row(row)
243                elif flag == 2 and row:
244                    # print(row)
245                    if row[0].split("_")[0] == "cH":
246                        pair_dict[row[0]] = pair_of_h()
247                        pair_dict[row[0]]["charge"] = float(row[2])
248                    else:
249                        p, ch = get_charge(row, pair_dict)
250                        # print(p,ch)
251                        pair_dict[p]["charge"] = ch
252    return pair_dict

Reads pair potentials from Joachims MC Code pair_potentials file.

Parameters:

  • path:
    • path to pair_potentials file.

Returns:

  • pair_dict:
    • dictionary containing pair potentials
def read_bond_potentials(path):
255def read_bond_potentials(path):
256    """
257    Reads bond potentials from Joachims MC Code bond_potentials file.
258
259    Parameters:
260    - path:
261        - path to bond_potentials file.
262        
263    Returns:
264    - bond_dict:
265        - dictionary containing bond potentials
266    """
267    keylist = ["!", "#", "models:", "types"]
268    bond_dict = {}
269    flag = 0
270    with open(path) as csvfile:
271        spamreader = csv.reader(csvfile, delimiter=" ")
272        for row in spamreader:
273            row = [x for x in row if x]
274            if row:
275                if row[0] == "model":
276                    flag = 1
277                elif flag == 1 and row[0] in keylist:
278                    break
279                elif flag == 1:
280                    dummy = sort_force_fields(row[1:3])
281                    name = make_graph(dummy)
282                    bond_dict[name] = {}
283                    bond_dict[name]["list"] = row[1:3]
284                    bond_dict[name]["type"] = int(row[0])
285                    # bond_dict[name]["len"] = float(row[3])
286                    # bond_dict[name]["spring"] = float(row[4])
287                    bond_dict[name]["p"] = [float(r) for r in row[3:]]
288
289    return bond_dict

Reads bond potentials from Joachims MC Code bond_potentials file.

Parameters:

  • path:
    • path to bond_potentials file.

Returns:

  • bond_dict:
    • dictionary containing bond potentials
def read_angle_potentials(path):
292def read_angle_potentials(path):
293    """
294    Reads angle potentials from Joachims MC Code angle_potentials file.
295
296    Parameters:
297    - path:
298        - path to angle_potentials file.
299        
300    Returns:
301    - angle_dict:
302        - dictionary containing angle potentials
303    """
304    keylist = ["!", "#", "models:", "types", "Note", "Note:"]
305    angle_dict = {}
306    flag = 0
307    with open(path) as csvfile:
308        spamreader = csv.reader(csvfile, delimiter=" ")
309        for row in spamreader:
310            row = [x for x in row if x]
311            if row:
312                if row[0] == "model":
313                    flag = 1
314                elif flag == 1 and row[0] in keylist:
315                    break
316                elif flag == 1:
317                    dummy = sort_force_fields(row[1:4])
318                    name = make_graph(dummy)
319                    angle_dict[name] = {}
320                    angle_dict[name]["list"] = row[1:4]
321                    angle_dict[name]["type"] = int(row[0])
322                    # angle_dict[name]["angle"] = float(row[4])
323                    # angle_dict[name]["p"] = float(row[5])
324                    angle_dict[name]["p"] = [float(r) for r in row[4:]]
325
326    return angle_dict

Reads angle potentials from Joachims MC Code angle_potentials file.

Parameters:

  • path:
    • path to angle_potentials file.

Returns:

  • angle_dict:
    • dictionary containing angle potentials
def read_torsion_potentials(path):
329def read_torsion_potentials(path):
330    """
331    Reads torsion potentials from Joachims MC Code torsion_potentials file.
332
333    Parameters:
334    - path:
335        - path to torsion_potentials file.
336        
337    Returns:
338    - torsion_dict:
339        - dictionary containing torsion potentials
340    """
341    keylist = ["!", "#", "models:", "types", "Note:"]
342    torsion_dict = {}
343    flag = 0
344    with open(path) as csvfile:
345        spamreader = csv.reader(csvfile, delimiter=" ")
346        for row in spamreader:
347            row = [x for x in row if x]
348            if row:
349                if row[0] == "model":
350                    flag = 1
351                elif flag == 1 and row[0] in keylist:
352                    break
353                elif flag == 1:
354                    dummy = sort_force_fields(row[1:5])
355                    name = make_graph(dummy)
356                    torsion_dict[name] = {}
357                    torsion_dict[name]["list"] = row[1:5]
358                    torsion_dict[name]["type"] = int(row[0])
359                    torsion_dict[name]["p"] = [float(r) for r in row[5:]]
360
361    return torsion_dict

Reads torsion potentials from Joachims MC Code torsion_potentials file.

Parameters:

  • path:
    • path to torsion_potentials file.

Returns:

  • torsion_dict:
    • dictionary containing torsion potentials
def read_xyz(path, energy=False):
364def read_xyz(path, energy=False):
365    """
366    Reads xyz file. Supports tubomole xyz with energy in header
367
368    Parameters:
369    - path:
370        - path to xyz file.
371    - energy:
372        - returns energy if true.
373        - use this with turbomol QM-results.
374    
375    Returns:
376    - xyz:
377        - list of dicts, keys: "atom": atom name and "xyz": coordinates.
378    - energy:
379        - (optional) if energy = True (bad style ?)
380    """
381    data = []
382    with open(path) as csvfile:
383        spamreader = csv.reader(csvfile, delimiter=" ")
384        for row in spamreader:
385            data.append([r for r in row if r])
386    n = int(data[0][0])
387    xyz = {}
388    for i, d in enumerate(data[2 : n + 2]):
389        xyz[i] = {}
390        xyz[i]["atom"] = d[0]
391        xyz[i]["xyz"] = np.array([float(x) for x in d[1:4]])
392
393    if energy:
394        energy = float(data[1][2])
395        return xyz, energy
396
397    return xyz

Reads xyz file. Supports tubomole xyz with energy in header

Parameters:

  • path:
    • path to xyz file.
  • energy:
    • returns energy if true.
    • use this with turbomol QM-results.

Returns:

  • xyz:
    • list of dicts, keys: "atom": atom name and "xyz": coordinates.
  • energy:
    • (optional) if energy = True (bad style ?)
def assign_CHx(xyz):
400def assign_CHx(xyz):
401    """
402    Builds united atom groups fromm all atom xyz.
403    Works with turbomol results and read_xyz def.
404
405    Parameters:
406    - xyz:
407        - list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
408        
409    Returns:
410    - xyz_CHx:
411        - list of united atom dicts, keys: "atom": atom name and "xyz": coordinates.
412    """
413    xyz_CHx = {}
414    xkeys = sorted(xyz.keys())
415    ii = 0
416    x = np.min(xkeys)
417    flag = 0
418    while x <= np.max(xkeys):
419        if flag == 0 and xyz[x]["atom"] == "C":
420            coos = np.array(xyz[x]["xyz"])
421            flag = 1
422            x += 1
423        elif flag > 0:
424            if xyz[x]["atom"] == "H":
425                coos = np.column_stack((coos, xyz[x]["xyz"]))
426                x += 1
427                flag += 1
428            else:
429                no = flag - 1
430                xyz_CHx[ii] = {}
431                xyz_CHx[ii]["atom"] = "CH" + str(no)
432                xyz_CHx[ii]["xyz"] = np.sum(
433                    coos * np.array([15] + [1] * no), axis=1
434                ) / (15 + 1 * no)
435                coos = []
436                flag = 0
437                ii += 1
438        else:
439            xyz_CHx[ii] = xyz[x]
440            x += 1
441            ii += 1
442    print(
443        """\n\nWARNING:
444    Very basic tool. Only works with well-sorted coordinates. Your better off doing it by hand. At least check your results!!! \n \n"""
445    )
446    return xyz_CHx

Builds united atom groups fromm all atom xyz. Works with turbomol results and read_xyz def.

Parameters:

  • xyz:
    • list of atom dicts, keys: "atom": atom name and "xyz": coordinates.

Returns:

  • xyz_CHx:
    • list of united atom dicts, keys: "atom": atom name and "xyz": coordinates.
def kill_CHx(xyz):
449def kill_CHx(xyz):
450    """
451    Kills C-bonded hydrogens fromm all atom xyz.
452    Works with turbomol results and read_xyz def.
453
454    Parameters:
455    - xyz:
456        - list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
457    
458    Returns:
459    - xyz_CHx:
460        - list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
461    """
462    xyz_CHx = {}
463    xkeys = sorted(xyz.keys())
464    ii = 0
465    x = np.min(xkeys)
466    flag = 0
467    while x <= np.max(xkeys):
468        if flag == 0 and xyz[x]["atom"] == "C":
469            coos = np.array(xyz[x]["xyz"])
470            flag = 1
471            x += 1
472        elif flag > 0:
473            if xyz[x]["atom"] == "H":
474                x += 1
475                flag += 1
476            else:
477                no = flag - 1
478                xyz_CHx[ii] = {}
479                xyz_CHx[ii]["atom"] = "C"
480                xyz_CHx[ii]["xyz"] = coos
481                coos = []
482                flag = 0
483                ii += 1
484        else:
485            xyz_CHx[ii] = xyz[x]
486            x += 1
487            ii += 1
488    print(
489        """\n\nWARNING:
490    Very basic tool. Only works with well-sorted coordinates. Your better off doing it by hand. At least check your results!!! \n \n"""
491    )
492    return xyz_CHx

Kills C-bonded hydrogens fromm all atom xyz. Works with turbomol results and read_xyz def.

Parameters:

  • xyz:
    • list of atom dicts, keys: "atom": atom name and "xyz": coordinates.

Returns:

  • xyz_CHx:
    • list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
def to_xyz(xyz, path):
495def to_xyz(xyz, path):
496    """
497    Writes xyz file.
498    Works with turbomol results and read_xyz def.
499
500    Parameters:
501    - xyz:
502        - list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
503    - path:
504        - path to xyz file.
505        
506    Returns:
507        nothing
508    """
509    f = open(path, "w")
510    f.write(str(len(xyz)) + "\n")
511    f.write("\n")
512    for x in xyz:
513        line = "    ".join([xyz[x]["atom"][0]] + [str(y) for y in xyz[x]["xyz"]])
514        f.write(line + "\n")
515    f.close()
516    return

Writes xyz file. Works with turbomol results and read_xyz def.

Parameters:

  • xyz:
    • list of atom dicts, keys: "atom": atom name and "xyz": coordinates.
  • path:
    • path to xyz file.

Returns: nothing

def distance(x1, x2):
519def distance(x1, x2):
520    """
521    Returns spatial distance.
522
523    Parameters:
524    - x1, x2:
525        - vectors.
526        
527    Returns:
528    - spatial distance between x1 and x2.
529    """
530    return np.linalg.norm(x1 - x2)

Returns spatial distance.

Parameters:

  • x1, x2:
    • vectors.

Returns:

  • spatial distance between x1 and x2.
def unit_vector(vector):
533def unit_vector(vector):
534    """
535    Returns the unit vector of the vector.
536
537    Parameters:
538    - vector:
539        - vector.
540        
541    Returns:
542    - unit vector
543    """
544    return vector / np.linalg.norm(vector)

Returns the unit vector of the vector.

Parameters:

  • vector:
    • vector.

Returns:

  • unit vector
def angle_between(x1, x2, x3):
547def angle_between(x1, x2, x3):
548    """
549    Returns the angle in radians between vectors x1, x2, x3.
550
551    Parameters:
552    - x1, x2, x3:
553        - vectors.
554            
555    Returns:
556    - rad angle
557    """
558    v1 = unit_vector(x1 - x2)
559    v2 = unit_vector(x3 - x2)
560
561    v1_u = unit_vector(v1)
562    v2_u = unit_vector(v2)
563    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

Returns the angle in radians between vectors x1, x2, x3.

Parameters:

  • x1, x2, x3:
    • vectors.

Returns:

  • rad angle
def dihedral(x0, x1, x2, x3):
566def dihedral(x0, x1, x2, x3):
567    """
568    Returns the dihedral angle in radians between vectors x0, x1, x2, x3.
569    Praxeolitic formula -> 1 sqrt, 1 cross product
570
571    Parameters:
572    - x0, x1, x2, x3:
573        - vectors.
574    
575    Returns:
576    - deg angle
577    """
578    b0 = -1.0 * (x1 - x0)
579    b1 = x2 - x1
580    b2 = x3 - x2
581
582    # normalize b1 so that it does not influence magnitude of vector
583    # rejections that come next
584    # print(b1)
585    # print(np.linalg.norm(b1))
586    if np.linalg.norm(b1) > 0:
587        b1 /= np.linalg.norm(b1)
588
589    # vector rejections
590    # v = projection of b0 onto plane perpendicular to b1
591    #   = b0 minus component that aligns with b1
592    # w = projection of b2 onto plane perpendicular to b1
593    #   = b2 minus component that aligns with b1
594    v = b0 - np.dot(b0, b1) * b1
595    w = b2 - np.dot(b2, b1) * b1
596
597    # angle between v and w in a plane is the torsion angle
598    # v and w may not be normalized but that's fine since tan is y/x
599    x = np.dot(v, w)
600    y = np.dot(np.cross(b1, v), w)
601    return np.arctan2(y, x)

Returns the dihedral angle in radians between vectors x0, x1, x2, x3. Praxeolitic formula -> 1 sqrt, 1 cross product

Parameters:

  • x0, x1, x2, x3:
    • vectors.

Returns:

  • deg angle