• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

CCPBioSim / CodeEntropy / 13999062252

21 Mar 2025 07:03PM UTC coverage: 29.725%. Remained the same
13999062252

push

github

web-flow
Merge pull request #72 from CCPBioSim/63-conformational-entropy

Issue 63- Conformational Entropy Calculation. This merge closes #63 and resolves differences between the two versions of the code for aliphatic residues.

0 of 3 new or added lines in 2 files covered. (0.0%)

2 existing lines in 1 file now uncovered.

162 of 545 relevant lines covered (29.72%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

44.2
/CodeEntropy/main_mcc.py
1
import argparse
3✔
2
import math
3✔
3
import os
3✔
4

5
import MDAnalysis as mda
3✔
6

7
# import numpy as np
8
import pandas as pd
3✔
9
import yaml
3✔
10

11
from CodeEntropy import EntropyFunctions as EF
3✔
12
from CodeEntropy import LevelFunctions as LF
3✔
13
from CodeEntropy import MDAUniverseHelper as MDAHelper
3✔
14

15
# from datetime import datetime
16

17
arg_map = {
3✔
18
    "top_traj_file": {
19
        "type": str,
20
        "nargs": "+",
21
        "help": "Path to Structure/topology file followed by Trajectory file(s)",
22
        "default": [],
23
    },
24
    "selection_string": {
25
        "type": str,
26
        "help": "Selection string for CodeEntropy",
27
        "default": "all",
28
    },
29
    "start": {
30
        "type": int,
31
        "help": "Start analysing the trajectory from this frame index",
32
        "default": 0,
33
    },
34
    "end": {
35
        "type": int,
36
        "help": "Stop analysing the trajectory at this frame index",
37
        "default": -1,
38
    },
39
    "step": {
40
        "type": int,
41
        "help": "Interval between two consecutive frames to be read index",
42
        "default": 1,
43
    },
44
    "bin_width": {
45
        "type": int,
46
        "help": "Bin width in degrees for making the histogram",
47
        "default": 30,
48
    },
49
    "temperature": {
50
        "type": float,
51
        "help": "Temperature for entropy calculation (K)",
52
        "default": 298.0,
53
    },
54
    "verbose": {
55
        "type": bool,
56
        "help": "True/False flag for noisy or quiet output",
57
        "default": False,
58
    },
59
    "thread": {"type": int, "help": "How many multiprocess to use", "default": 1},
60
    "outfile": {
61
        "type": str,
62
        "help": "Name of the file where the output will be written",
63
        "default": "outfile.out",
64
    },
65
    "resfile": {
66
        "type": str,
67
        "help": "Name of the file where the residue entropy output will be written",
68
        "default": "res_outfile.out",
69
    },
70
    "mout": {
71
        "type": str,
72
        "help": "Name of the file where certain matrices will be written",
73
        "default": None,
74
    },
75
    "force_partitioning": {"type": float, "help": "Force partitioning", "default": 0.5},
76
    "waterEntropy": {"type": bool, "help": "Calculate water entropy", "default": False},
77
}
78

79

80
def load_config(file_path):
3✔
81
    """Load YAML configuration file."""
82
    if not os.path.exists(file_path):
3✔
83
        raise FileNotFoundError(f"Configuration file '{file_path}' not found.")
×
84

85
    with open(file_path, "r") as file:
3✔
86
        config = yaml.safe_load(file)
3✔
87

88
        # If YAML content is empty, return an empty dictionary
89
        if config is None:
3✔
90
            config = {}
3✔
91

92
    return config
3✔
93

94

95
def setup_argparse():
3✔
96
    """Setup argument parsing dynamically based on arg_map."""
97
    parser = argparse.ArgumentParser(
3✔
98
        description="CodeEntropy: Entropy calculation with MCC method."
99
    )
100

101
    for arg, properties in arg_map.items():
3✔
102
        kwargs = {key: properties[key] for key in properties if key != "help"}
3✔
103
        parser.add_argument(f"--{arg}", **kwargs, help=properties.get("help"))
3✔
104

105
    return parser
3✔
106

107

108
def merge_configs(args, run_config):
3✔
109
    """Merge CLI arguments with YAML configuration."""
110
    if run_config is None:
3✔
111
        run_config = {}
×
112

113
    if not isinstance(run_config, dict):
3✔
114
        raise TypeError("run_config must be a dictionary or None.")
3✔
115

116
    # Step 1: Merge YAML configuration into args
117
    for key, value in run_config.items():
3✔
118
        if getattr(args, key, None) is None:
3✔
119
            setattr(args, key, value)
3✔
120

121
    # Step 2: Set default values for any missing arguments from `arg_map`
122
    for key, params in arg_map.items():
3✔
123
        if getattr(args, key, None) is None:
3✔
124
            setattr(args, key, params.get("default"))
3✔
125

126
    # Step 3: Override with CLI values if provided
127
    for key in arg_map.keys():
3✔
128
        cli_value = getattr(args, key, None)
3✔
129
        if cli_value is not None:
3✔
130
            run_config[key] = cli_value
3✔
131

132
    return args
3✔
133

134

135
def main():
3✔
136
    """
137
    Main function for calculating the entropy of a system using the multiscale cell
138
    correlation method.
139
    """
140
    try:
3✔
141
        config = load_config("config.yaml")
3✔
142

143
        if config is None:
3✔
144
            raise ValueError(
3✔
145
                "No configuration file found, and no CLI arguments were provided."
146
            )
147

148
        parser = setup_argparse()
3✔
149
        args, unknown = parser.parse_known_args()
3✔
150

151
        # Process each run in the YAML configuration
152
        for run_name, run_config in config.items():
3✔
153
            if isinstance(run_config, dict):
3✔
154
                # Merging CLI arguments with YAML configuration
155
                args = merge_configs(args, run_config)
3✔
156

157
                # Ensure necessary arguments are provided
158
                if not getattr(args, "top_traj_file"):
3✔
159
                    raise ValueError(
×
160
                        "The 'top_traj_file' argument is required but not provided."
161
                    )
162
                if not getattr(args, "selection_string"):
3✔
163
                    raise ValueError(
×
164
                        "The 'selection_string' argument is required but not provided."
165
                    )
166

167
                # REPLACE INPUTS
168
                print(f"Printing all input for {run_name}")
3✔
169
                for arg in vars(args):
3✔
170
                    print(f" {arg}: {getattr(args, arg) or ''}")
3✔
171
            else:
172
                print(f"Run configuration for {run_name} is not a dictionary.")
×
173
    except ValueError as e:
3✔
174
        print(e)
3✔
175
        raise
3✔
176

177
    # startTime = datetime.now()
178

179
    # Get topology and trajectory file names and make universe
180
    tprfile = args.top_traj_file[0]
3✔
181
    trrfile = args.top_traj_file[1:]
3✔
182
    u = mda.Universe(tprfile, trrfile)
3✔
183

184
    # Define bin_width for histogram from inputs
185
    bin_width = args.bin_width
3✔
186

187
    # Define trajectory slicing from inputs
188
    start = args.start
3✔
189
    if start is None:
3✔
190
        start = 0
×
191
    end = args.end
3✔
192
    if end is None:
3✔
193
        end = -1
×
194
    step = args.step
3✔
195
    if step is None:
3✔
196
        step = 1
×
197
    # Count number of frames, easy if not slicing
198
    if start == 0 and end == -1 and step == 1:
3✔
199
        number_frames = len(u.trajectory)
3✔
200
    elif end == -1:
×
201
        end = len(u.trajectory)
×
202
        number_frames = math.floor((end - start) / step) + 1
×
203
    else:
204
        number_frames = math.floor((end - start) / step) + 1
×
205
    print(number_frames)
3✔
206

207
    # Create pandas data frame for results
208
    results_df = pd.DataFrame(columns=["Molecule ID", "Level", "Type", "Result"])
3✔
209
    residue_results_df = pd.DataFrame(
3✔
210
        columns=["Molecule ID", "Residue", "Type", "Result"]
211
    )
212

213
    # printing headings for output files
214
    with open(args.outfile, "a") as out:
3✔
215
        print("Molecule\tLevel\tType\tResult (J/mol/K)\n", file=out)
3✔
216

217
    with open(args.resfile, "a") as res:
3✔
218
        print("Molecule\tResidue\tType\tResult (J/mol/K)\n", file=res)
3✔
219

220
    # Reduce number of atoms in MDA universe to selection_string arg
221
    # (default all atoms included)
222
    if args.selection_string == "all":
3✔
223
        reduced_atom = u
3✔
224
    else:
225
        reduced_atom = MDAHelper.new_U_select_atom(u, args.selection_string)
×
226
        reduced_atom_name = f"{len(reduced_atom.trajectory)}_frame_dump_atom_selection"
×
227
        MDAHelper.write_universe(reduced_atom, reduced_atom_name)
×
228

229
    # Scan system for molecules and select levels (united atom, residue, polymer)
230
    # for each
231
    number_molecules, levels = LF.select_levels(reduced_atom, args.verbose)
3✔
232

233
    # Loop over molecules
234
    for molecule in range(number_molecules):
3✔
235
        # molecule data container of MDAnalysis Universe type for internal degrees
236
        # of freedom getting indices of first and last atoms in the molecule
237
        # assuming atoms are numbered consecutively and all atoms in a given
238
        # molecule are together
239
        index1 = reduced_atom.atoms.fragments[molecule].indices[0]
×
240
        index2 = reduced_atom.atoms.fragments[molecule].indices[-1]
×
241
        selection_string = f"index {index1}:{index2}"
×
242
        molecule_container = MDAHelper.new_U_select_atom(reduced_atom, selection_string)
×
243

244
        # Calculate entropy for each relevent level
245
        for level in levels[molecule]:
×
246
            if level == levels[molecule][-1]:
×
247
                highest_level = True
×
248
            else:
249
                highest_level = False
×
250

251
            if level == "united_atom":
×
252
                # loop over residues, report results per residue + total united atom
253
                # level. This is done per residue to reduce the size of the matrices -
254
                # amino acid resiudes have tens of united atoms but a whole protein
255
                # could have thousands. Doing the calculation per residue allows for
256
                # comparisons of contributions from different residues
257
                num_residues = len(molecule_container.residues)
×
258
                S_trans = 0
×
259
                S_rot = 0
×
260
                S_conf = 0
×
261

UNCOV
262
                for residue in range(num_residues):
×
263
                    # molecule data container of MDAnalysis Universe type for internal
264
                    # degrees of freedom getting indices of first and last atoms in the
265
                    # molecule assuming atoms are numbered consecutively and all atoms
266
                    # in a given molecule are together
267
                    index1 = molecule_container.residues[residue].atoms.indices[0]
×
268
                    index2 = molecule_container.residues[residue].atoms.indices[-1]
×
269
                    selection_string = f"index {index1}:{index2}"
×
270
                    residue_container = MDAHelper.new_U_select_atom(
×
271
                        molecule_container, selection_string
272
                    )
NEW
273
                    residue_heavy_atoms_container = MDAHelper.new_U_select_atom(
×
274
                        residue_container, "not name H*"
275
                    )  # only heavy atom dihedrals are relevant
276

277
                    # Vibrational entropy at every level
278
                    # Get the force and torque matrices for the beads at the relevant
279
                    # level
280

UNCOV
281
                    force_matrix, torque_matrix = LF.get_matrices(
×
282
                        residue_container,
283
                        level,
284
                        args.verbose,
285
                        start,
286
                        end,
287
                        step,
288
                        number_frames,
289
                        highest_level,
290
                    )
291

292
                    # Calculate the entropy from the diagonalisation of the matrices
293
                    S_trans_residue = EF.vibrational_entropy(
×
294
                        force_matrix, "force", args.temperature, highest_level
295
                    )
296
                    S_trans += S_trans_residue
×
297
                    print(f"S_trans_{level}_{residue} = {S_trans_residue}")
×
298
                    new_row = pd.DataFrame(
×
299
                        {
300
                            "Molecule ID": [molecule],
301
                            "Residue": [residue],
302
                            "Type": ["Transvibrational (J/mol/K)"],
303
                            "Result": [S_trans_residue],
304
                        }
305
                    )
306
                    residue_results_df = pd.concat(
×
307
                        [residue_results_df, new_row], ignore_index=True
308
                    )
309
                    with open(args.resfile, "a") as res:
×
310
                        print(
×
311
                            molecule,
312
                            "\t",
313
                            residue,
314
                            "\tTransvibration\t",
315
                            S_trans_residue,
316
                            file=res,
317
                        )
318

319
                    S_rot_residue = EF.vibrational_entropy(
×
320
                        torque_matrix, "torque", args.temperature, highest_level
321
                    )
322
                    S_rot += S_rot_residue
×
323
                    print(f"S_rot_{level}_{residue} = {S_rot_residue}")
×
324
                    new_row = pd.DataFrame(
×
325
                        {
326
                            "Molecule ID": [molecule],
327
                            "Residue": [residue],
328
                            "Type": ["Rovibrational (J/mol/K)"],
329
                            "Result": [S_rot_residue],
330
                        }
331
                    )
332
                    residue_results_df = pd.concat(
×
333
                        [residue_results_df, new_row], ignore_index=True
334
                    )
335
                    with open(args.resfile, "a") as res:
×
336
                        #  print(new_row, file=res)
337
                        print(
×
338
                            molecule,
339
                            "\t",
340
                            residue,
341
                            "\tRovibrational \t",
342
                            S_rot_residue,
343
                            file=res,
344
                        )
345

346
                    # Conformational entropy based on atom dihedral angle distributions
347
                    # Gives entropy of conformations within each residue
348

349
                    # Get dihedral angle distribution
NEW
350
                    dihedrals = LF.get_dihedrals(residue_heavy_atoms_container, level)
×
351

352
                    # Calculate conformational entropy
353
                    S_conf_residue = EF.conformational_entropy(
×
354
                        residue_heavy_atoms_container,
355
                        dihedrals,
356
                        bin_width,
357
                        start,
358
                        end,
359
                        step,
360
                        number_frames,
361
                    )
362
                    S_conf += S_conf_residue
×
363
                    print(f"S_conf_{level}_{residue} = {S_conf_residue}")
×
364
                    new_row = pd.DataFrame(
×
365
                        {
366
                            "Molecule ID": [molecule],
367
                            "Residue": [residue],
368
                            "Type": ["Conformational (J/mol/K)"],
369
                            "Result": [S_conf_residue],
370
                        }
371
                    )
372
                    residue_results_df = pd.concat(
×
373
                        [residue_results_df, new_row], ignore_index=True
374
                    )
375
                    with open(args.resfile, "a") as res:
×
376
                        print(
×
377
                            molecule,
378
                            "\t",
379
                            residue,
380
                            "\tConformational\t",
381
                            S_conf_residue,
382
                            file=res,
383
                        )
384

385
                # Print united atom level results summed over all residues
386
                print(f"S_trans_{level} = {S_trans}")
×
387
                new_row = pd.DataFrame(
×
388
                    {
389
                        "Molecule ID": [molecule],
390
                        "Level": [level],
391
                        "Type": ["Transvibrational (J/mol/K)"],
392
                        "Result": [S_trans],
393
                    }
394
                )
395
                with open(args.outfile, "a") as out:
×
396
                    print(
×
397
                        molecule, "\t", level, "\tTransvibration\t", S_trans, file=out
398
                    )
399

400
                results_df = pd.concat([results_df, new_row], ignore_index=True)
×
401

402
                print(f"S_rot_{level} = {S_rot}")
×
403
                new_row = pd.DataFrame(
×
404
                    {
405
                        "Molecule ID": [molecule],
406
                        "Level": [level],
407
                        "Type": ["Rovibrational (J/mol/K)"],
408
                        "Result": [S_rot],
409
                    }
410
                )
411
                results_df = pd.concat([results_df, new_row], ignore_index=True)
×
412
                with open(args.outfile, "a") as out:
×
413
                    print(molecule, "\t", level, "\tRovibrational \t", S_rot, file=out)
×
414

415
                print(f"S_conf_{level} = {S_conf}")
×
416
                new_row = pd.DataFrame(
×
417
                    {
418
                        "Molecule ID": [molecule],
419
                        "Level": [level],
420
                        "Type": ["Conformational (J/mol/K)"],
421
                        "Result": [S_conf],
422
                    }
423
                )
424
                results_df = pd.concat([results_df, new_row], ignore_index=True)
×
425
                with open(args.outfile, "a") as out:
×
426
                    print(molecule, "\t", level, "\tConformational\t", S_conf, file=out)
×
427

428
            if level in ("polymer", "residue"):
×
429
                # Vibrational entropy at every level
430
                # Get the force and torque matrices for the beads at the relevant level
431
                force_matrix, torque_matrix = LF.get_matrices(
×
432
                    molecule_container,
433
                    level,
434
                    args.verbose,
435
                    start,
436
                    end,
437
                    step,
438
                    number_frames,
439
                    highest_level,
440
                )
441

442
                # Calculate the entropy from the diagonalisation of the matrices
443
                S_trans = EF.vibrational_entropy(
×
444
                    force_matrix, "force", args.temperature, highest_level
445
                )
446
                print(f"S_trans_{level} = {S_trans}")
×
447
                new_row = pd.DataFrame(
×
448
                    {
449
                        "Molecule ID": [molecule],
450
                        "Level": [level],
451
                        "Type": ["Transvibrational (J/mol/K)"],
452
                        "Result": [S_trans],
453
                    }
454
                )
455
                results_df = pd.concat([results_df, new_row], ignore_index=True)
×
456
                with open(args.outfile, "a") as out:
×
457
                    print(
×
458
                        molecule, "\t", level, "\tTransvibrational\t", S_trans, file=out
459
                    )
460

461
                S_rot = EF.vibrational_entropy(
×
462
                    torque_matrix, "torque", args.temperature, highest_level
463
                )
464
                print(f"S_rot_{level} = {S_rot}")
×
465
                new_row = pd.DataFrame(
×
466
                    {
467
                        "Molecule ID": [molecule],
468
                        "Level": [level],
469
                        "Type": ["Rovibrational (J/mol/K)"],
470
                        "Result": [S_rot],
471
                    }
472
                )
473
                results_df = pd.concat([results_df, new_row], ignore_index=True)
×
474
                with open(args.outfile, "a") as out:
×
475
                    print(molecule, "\t", level, "\tRovibrational \t", S_rot, file=out)
×
476

477
                # Note: conformational entropy is not calculated at the polymer level,
478
                # because there is at most one polymer bead per molecule so no dihedral
479
                # angles.
480

481
            if level == "residue":
×
482
                # Conformational entropy based on distributions of dihedral angles
483
                # of residues. Gives conformational entropy of secondary structure
484

485
                # Get dihedral angle distribution
486
                dihedrals = LF.get_dihedrals(molecule_container, level)
×
487
                # Calculate conformational entropy
488
                S_conf = EF.conformational_entropy(
×
489
                    molecule_container,
490
                    dihedrals,
491
                    bin_width,
492
                    start,
493
                    end,
494
                    step,
495
                    number_frames,
496
                )
497
                print(f"S_conf_{level} = {S_conf}")
×
498
                new_row = pd.DataFrame(
×
499
                    {
500
                        "Molecule ID": [molecule],
501
                        "Level": [level],
502
                        "Type": ["Conformational (J/mol/K)"],
503
                        "Result": [S_conf],
504
                    }
505
                )
506
                results_df = pd.concat([results_df, new_row], ignore_index=True)
×
507
                with open(args.outfile, "a") as out:
×
508
                    print(molecule, "\t", level, "\tConformational\t", S_conf, file=out)
×
509

510
            # Orientational entropy based on network of neighbouring molecules,
511
            #  only calculated at the highest level (whole molecule)
512
            #    if highest_level:
513
            #        neigbours = LF.get_neighbours(reduced_atom, molecule)
514
            #        S_orient = EF.orientational_entropy(neighbours)
515
            #        print(f"S_orient_{level} = {S_orient}")
516
            #        new_row = pd.DataFrame({
517
            #            'Molecule ID': [molecule],
518
            #            'Level': [level],
519
            #            'Type':['Orientational (J/mol/K)'],
520
            #            'Result': [S_orient],})
521
            #        results_df = pd.concat([results_df, new_row], ignore_index=True)
522
            #        with open(args.outfile, "a") as out:
523
            #    print(molecule,
524
            #          "\t",
525
            #          level,
526
            #          "\tOrientational\t",
527
            #          S_orient,
528
            #          file=out)
529

530
        # Report total entropy for the molecule
531
        S_molecule = results_df[results_df["Molecule ID"] == molecule]["Result"].sum()
×
532
        print(f"S_molecule = {S_molecule}")
×
533
        new_row = pd.DataFrame(
×
534
            {
535
                "Molecule ID": [molecule],
536
                "Level": ["Molecule Total"],
537
                "Type": ["Molecule Total Entropy "],
538
                "Result": [S_molecule],
539
            }
540
        )
541
        results_df = pd.concat([results_df, new_row], ignore_index=True)
×
542
        with open(args.outfile, "a") as out:
×
543
            print(molecule, "\t Molecule\tTotal Entropy\t", S_molecule, file=out)
×
544

545

546
if __name__ == "__main__":
3✔
547

548
    main()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc