• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

materialsproject / pymatgen / 4075885785

pending completion
4075885785

push

github

Shyue Ping Ong
Merge branch 'master' of github.com:materialsproject/pymatgen

96 of 96 new or added lines in 27 files covered. (100.0%)

81013 of 102710 relevant lines covered (78.88%)

0.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

47.74
/pymatgen/io/qchem/outputs.py
1
# Copyright (c) Pymatgen Development Team.
2
# Distributed under the terms of the MIT License.
3

4
"""
1✔
5
Parsers for Qchem output files.
6
"""
7

8
from __future__ import annotations
1✔
9

10
import copy
1✔
11
import logging
1✔
12
import math
1✔
13
import os
1✔
14
import re
1✔
15
import warnings
1✔
16
from typing import Any
1✔
17

18
import networkx as nx
1✔
19
import numpy as np
1✔
20
import pandas as pd
1✔
21
from monty.io import zopen
1✔
22
from monty.json import MSONable, jsanitize
1✔
23

24
from pymatgen.analysis.graphs import MoleculeGraph
1✔
25
from pymatgen.analysis.local_env import OpenBabelNN
1✔
26
from pymatgen.core import Molecule
1✔
27
from pymatgen.io.qchem.utils import (
1✔
28
    process_parsed_coords,
29
    process_parsed_fock_matrix,
30
    read_matrix_pattern,
31
    read_pattern,
32
    read_table_pattern,
33
)
34

35
try:
1✔
36
    from openbabel import openbabel
1✔
37
except ImportError:
1✔
38
    openbabel = None
1✔
39

40

41
__author__ = "Samuel Blau, Brandon Wood, Shyam Dwaraknath, Evan Spotte-Smith, Ryan Kingsbury"
1✔
42
__copyright__ = "Copyright 2018-2022, The Materials Project"
1✔
43
__version__ = "0.1"
1✔
44
__maintainer__ = "Samuel Blau"
1✔
45
__email__ = "samblau1@gmail.com"
1✔
46
__credits__ = "Gabe Gomes"
1✔
47

48
logger = logging.getLogger(__name__)
1✔
49

50

51
class QCOutput(MSONable):
1✔
52
    """
53
    Class to parse QChem output files.
54
    """
55

56
    def __init__(self, filename: str):
1✔
57
        """
58
        Args:
59
            filename (str): Filename to parse
60
        """
61
        self.filename = filename
1✔
62
        self.data: dict[str, Any] = {}
1✔
63
        self.data["errors"] = []
1✔
64
        self.data["warnings"] = {}
1✔
65
        self.text = ""
1✔
66
        with zopen(filename, mode="rt", encoding="ISO-8859-1") as f:
1✔
67
            self.text = f.read()
1✔
68

69
        # Check if output file contains multiple output files. If so, print an error message and exit
70
        self.data["multiple_outputs"] = read_pattern(
1✔
71
            self.text, {"key": r"Job\s+\d+\s+of\s+(\d+)\s+"}, terminate_on_match=True
72
        ).get("key")
73
        if self.data.get("multiple_outputs") is not None:
1✔
74
            if self.data.get("multiple_outputs") != [["1"]]:
1✔
75
                raise ValueError(
×
76
                    "ERROR: multiple calculation outputs found in file "
77
                    + filename
78
                    + ". Please instead call QCOutput.mulitple_outputs_from_file(QCOutput,'"
79
                    + filename
80
                    + "')"
81
                )
82

83
        # Parse the Q-Chem major version
84
        if read_pattern(
1✔
85
            self.text, {"key": r"A Quantum Leap Into The Future Of Chemistry\s+Q-Chem 4"}, terminate_on_match=True
86
        ).get("key") == [[]]:
87
            self.data["version"] = "4"
×
88
        elif read_pattern(
1✔
89
            self.text, {"key": r"A Quantum Leap Into The Future Of Chemistry\s+Q-Chem 5"}, terminate_on_match=True
90
        ).get("key") == [[]]:
91
            self.data["version"] = "5"
1✔
92
        elif read_pattern(
×
93
            self.text, {"key": r"A Quantum Leap Into The Future Of Chemistry\s+Q-Chem 6"}, terminate_on_match=True
94
        ).get("key") == [[]]:
95
            self.data["version"] = "6"
×
96
        else:
97
            self.data["version"] = "unknown"
×
98

99
        # Parse the molecular details: charge, multiplicity,
100
        # species, and initial geometry.
101
        self._read_charge_and_multiplicity()
1✔
102
        if read_pattern(self.text, {"key": r"Nuclear Repulsion Energy"}, terminate_on_match=True).get("key") == [[]]:
1✔
103
            self._read_species_and_inital_geometry()
1✔
104

105
        # Check if calculation finished
106
        self.data["completion"] = read_pattern(
1✔
107
            self.text,
108
            {"key": r"Thank you very much for using Q-Chem.\s+Have a nice day."},
109
            terminate_on_match=True,
110
        ).get("key")
111

112
        # If the calculation finished, parse the job time.
113
        if self.data.get("completion", []):
1✔
114
            temp_timings = read_pattern(
1✔
115
                self.text,
116
                {"key": r"Total job time\:\s*([\d\-\.]+)s\(wall\)\,\s*([\d\-\.]+)s\(cpu\)"},
117
            ).get("key")
118
            if temp_timings is not None:
1✔
119
                self.data["walltime"] = float(temp_timings[0][0])
1✔
120
                self.data["cputime"] = float(temp_timings[0][1])
1✔
121
            else:
122
                self.data["walltime"] = None
×
123
                self.data["cputime"] = None
×
124

125
        # Check if calculation is unrestricted
126
        self.data["unrestricted"] = read_pattern(
1✔
127
            self.text,
128
            {"key": r"A(?:n)*\sunrestricted[\s\w\-]+SCF\scalculation\swill\sbe"},
129
            terminate_on_match=True,
130
        ).get("key")
131
        if not self.data["unrestricted"]:
1✔
132
            self.data["unrestricted"] = read_pattern(
1✔
133
                self.text,
134
                {"key": r"unrestricted = true"},
135
                terminate_on_match=True,
136
            ).get("key")
137

138
        if self.data["unrestricted"] is None and self.data["multiplicity"] != 1:
1✔
139
            self.data["unrestricted"] = [[]]
×
140

141
        # Get the value of scf_final_print in the output file
142
        scf_final_print = read_pattern(
1✔
143
            self.text,
144
            {"key": r"scf_final_print\s*=\s*(\d+)"},
145
            terminate_on_match=True,
146
        ).get("key")
147
        if scf_final_print is not None:
1✔
148
            self.data["scf_final_print"] = int(scf_final_print[0][0])
×
149
        else:
150
            self.data["scf_final_print"] = 0
1✔
151

152
        # Check if calculation uses GEN_SCFMAN, multiple potential output formats
153
        self.data["using_GEN_SCFMAN"] = read_pattern(
1✔
154
            self.text,
155
            {"key": r"\s+GEN_SCFMAN: A general SCF calculation manager"},
156
            terminate_on_match=True,
157
        ).get("key")
158
        if not self.data["using_GEN_SCFMAN"]:
1✔
159
            self.data["using_GEN_SCFMAN"] = read_pattern(
1✔
160
                self.text,
161
                {"key": r"\s+General SCF calculation program by"},
162
                terminate_on_match=True,
163
            ).get("key")
164

165
        # Check if the SCF failed to converge
166
        if read_pattern(self.text, {"key": r"SCF failed to converge"}, terminate_on_match=True).get("key") == [[]]:
1✔
167
            self.data["errors"] += ["SCF_failed_to_converge"]
×
168

169
        # Parse the SCF
170
        self._read_SCF()
1✔
171

172
        # Parse the Mulliken/ESP/RESP charges and dipoles
173
        self._read_charges_and_dipoles()
1✔
174

175
        # Check for various warnings
176
        self._detect_general_warnings()
1✔
177

178
        # Parse mem_total, if present
179
        self.data["mem_total"] = None
1✔
180
        if read_pattern(self.text, {"key": r"mem_total\s*="}, terminate_on_match=True).get("key") == [[]]:
1✔
181
            temp_mem_total = read_pattern(self.text, {"key": r"mem_total\s*=\s*(\d+)"}, terminate_on_match=True).get(
1✔
182
                "key"
183
            )
184
            self.data["mem_total"] = int(temp_mem_total[0][0])
1✔
185

186
        # Parse gap info, if present:
187
        if read_pattern(self.text, {"key": r"Generalized Kohn-Sham gap"}, terminate_on_match=True).get("key") == [[]]:
1✔
188
            gap_info = {}
×
189
            # If this is open-shell gap info:
190
            if read_pattern(self.text, {"key": r"Alpha HOMO Eigenvalue"}, terminate_on_match=True).get("key") == [[]]:
×
191
                temp_alpha_HOMO = read_pattern(
×
192
                    self.text, {"key": r"Alpha HOMO Eigenvalue\s*=\s*([\d\-\.]+)"}, terminate_on_match=True
193
                ).get("key")
194
                gap_info["alpha_HOMO"] = float(temp_alpha_HOMO[0][0])
×
195
                temp_beta_HOMO = read_pattern(
×
196
                    self.text, {"key": r"Beta  HOMO Eigenvalue\s*=\s*([\d\-\.]+)"}, terminate_on_match=True
197
                ).get("key")
198
                gap_info["beta_HOMO"] = float(temp_beta_HOMO[0][0])
×
199
                temp_alpha_LUMO = read_pattern(
×
200
                    self.text, {"key": r"Alpha LUMO Eigenvalue\s*=\s*([\d\-\.]+)"}, terminate_on_match=True
201
                ).get("key")
202
                gap_info["alpha_LUMO"] = float(temp_alpha_LUMO[0][0])
×
203
                temp_beta_LUMO = read_pattern(
×
204
                    self.text, {"key": r"Beta  LUMO Eigenvalue\s*=\s*([\d\-\.]+)"}, terminate_on_match=True
205
                ).get("key")
206
                gap_info["beta_LUMO"] = float(temp_beta_LUMO[0][0])
×
207
                temp_alpha_gap = read_pattern(
×
208
                    self.text, {"key": r"HOMO-Alpha LUMO gap\s*=\s*([\d\-\.]+)"}, terminate_on_match=True
209
                ).get("key")
210
                gap_info["alpha_gap"] = float(temp_alpha_gap[0][0])
×
211
                temp_beta_gap = read_pattern(
×
212
                    self.text, {"key": r"HOMO-Beta LUMO gap\s*=\s*([\d\-\.]+)"}, terminate_on_match=True
213
                ).get("key")
214
                gap_info["beta_gap"] = float(temp_beta_gap[0][0])
×
215

216
            temp_HOMO = read_pattern(
×
217
                self.text, {"key": r"    HOMO Eigenvalue\s*=\s*([\d\-\.]+)"}, terminate_on_match=True
218
            ).get("key")
219
            gap_info["HOMO"] = float(temp_HOMO[0][0])
×
220
            temp_LUMO = read_pattern(
×
221
                self.text, {"key": r"    LUMO Eigenvalue\s*=\s*([\d\-\.]+)"}, terminate_on_match=True
222
            ).get("key")
223
            gap_info["LUMO"] = float(temp_LUMO[0][0])
×
224
            temp_KSgap = read_pattern(self.text, {"key": r"KS gap\s*=\s*([\d\-\.]+)"}, terminate_on_match=True).get(
×
225
                "key"
226
            )
227
            gap_info["KSgap"] = float(temp_KSgap[0][0])
×
228
            self.data["gap_info"] = gap_info
×
229
        else:
230
            self.data["gap_info"] = None
1✔
231

232
        # Check to see if PCM or SMD are present
233
        self.data["solvent_method"] = None
1✔
234
        self.data["solvent_data"] = None
1✔
235

236
        if read_pattern(self.text, {"key": r"solvent_method\s*=?\s*pcm"}, terminate_on_match=True).get("key") == [[]]:
1✔
237
            self.data["solvent_method"] = "PCM"
×
238
        if read_pattern(self.text, {"key": r"solvent_method\s*=?\s*smd"}, terminate_on_match=True).get("key") == [[]]:
1✔
239
            self.data["solvent_method"] = "SMD"
×
240
        if read_pattern(self.text, {"key": r"solvent_method\s*=?\s*isosvp"}, terminate_on_match=True).get("key") == [
1✔
241
            []
242
        ]:
243
            self.data["solvent_method"] = "ISOSVP"
1✔
244

245
        # if solvent_method is not None, populate solvent_data with None values for all possible keys
246
        # ISOSVP and CMIRS data are nested under respective keys
247
        # TODO - nest PCM and SMD data under respective keys in a future PR
248
        pcm_keys = [
1✔
249
            "PCM_dielectric",
250
            "g_electrostatic",
251
            "g_cavitation",
252
            "g_dispersion",
253
            "g_repulsion",
254
            "total_contribution_pcm",
255
            "solute_internal_energy",
256
        ]
257
        smd_keys = ["SMD_solvent", "smd0", "smd3", "smd4", "smd6", "smd9"]
1✔
258
        isosvp_keys = [
1✔
259
            "isosvp_dielectric",
260
            "final_soln_phase_e",
261
            "solute_internal_e",
262
            "total_solvation_free_e",
263
            "change_solute_internal_e",
264
            "reaction_field_free_e",
265
        ]
266
        cmirs_keys = [
1✔
267
            "CMIRS_enabled",
268
            "dispersion_e",
269
            "exchange_e",
270
            "min_neg_field_e",
271
            "max_pos_field_e",
272
        ]
273

274
        if self.data["solvent_method"] is not None:
1✔
275
            self.data["solvent_data"] = {}
1✔
276
            for key in pcm_keys + smd_keys:
1✔
277
                self.data["solvent_data"][key] = None
1✔
278
            self.data["solvent_data"]["isosvp"] = {}
1✔
279
            for key in isosvp_keys:
1✔
280
                self.data["solvent_data"]["isosvp"][key] = None
1✔
281
            self.data["solvent_data"]["cmirs"] = {}
1✔
282
            for key in cmirs_keys:
1✔
283
                self.data["solvent_data"]["cmirs"][key] = None
1✔
284

285
        # Parse information specific to a solvent model
286
        if self.data["solvent_method"] == "PCM":
1✔
287
            temp_dielectric = read_pattern(
×
288
                self.text, {"key": r"dielectric\s*([\d\-\.]+)"}, terminate_on_match=True
289
            ).get("key")
290
            self.data["solvent_data"]["PCM_dielectric"] = float(temp_dielectric[0][0])
×
291
            self._read_pcm_information()
×
292
        elif self.data["solvent_method"] == "SMD":
1✔
293
            if read_pattern(self.text, {"key": r"Unrecognized solvent"}, terminate_on_match=True).get("key") == [[]]:
×
294
                if not self.data.get("completion", []):
×
295
                    self.data["errors"] += ["unrecognized_solvent"]
×
296
                else:
297
                    self.data["warnings"]["unrecognized_solvent"] = True
×
298
            temp_solvent = read_pattern(self.text, {"key": r"\s[Ss]olvent:? ([a-zA-Z]+)"}).get("key")
×
299
            for val in temp_solvent:
×
300
                if val[0] != temp_solvent[0][0]:
×
301
                    if val[0] != "for":
×
302
                        self.data["warnings"]["SMD_two_solvents"] = str(temp_solvent[0][0]) + " and " + str(val[0])
×
303
                    else:
304
                        if (
×
305
                            "unrecognized_solvent" not in self.data["errors"]
306
                            and "unrecognized_solvent" not in self.data["warnings"]
307
                        ):
308
                            self.data["warnings"]["questionable_SMD_parsing"] = True
×
309
            self.data["solvent_data"]["SMD_solvent"] = temp_solvent[0][0]
×
310
            self._read_smd_information()
×
311
        elif self.data["solvent_method"] == "ISOSVP":
1✔
312
            self.data["solvent_data"]["cmirs"]["CMIRS_enabled"] = False
1✔
313
            self._read_isosvp_information()
1✔
314
            if read_pattern(
1✔
315
                self.text,
316
                {"cmirs": r"DEFESR calculation with single-center isodensity surface"},
317
                terminate_on_match=True,
318
            ).get("cmirs") == [[]]:
319
                # this is a CMIRS calc
320
                # note that all other outputs follow the same format as ISOSVP
321
                self._read_cmirs_information()
1✔
322
                # TODO - would it make more sense to set solvent_method = 'cmirs'?
323
                # but in the QChem input file, solvent_method is still 'isosvp'
324
                self.data["solvent_data"]["cmirs"]["CMIRS_enabled"] = True
1✔
325

326
                # TODO - parsing to identify the CMIRS solvent?
327
                # this would require parsing the $pcm_nonels section of input and comparing
328
                # parameters to the contents of CMIRS_SETTINGS in pymatgen.io.qchem.sets
329

330
        # Parse the final energy
331
        temp_final_energy = read_pattern(self.text, {"key": r"Final\senergy\sis\s+([\d\-\.]+)"}).get("key")
1✔
332
        if temp_final_energy is None:
1✔
333
            self.data["final_energy"] = None
1✔
334
        else:
335
            self.data["final_energy"] = float(temp_final_energy[0][0])
×
336

337
        if self.data["final_energy"] is None:
1✔
338
            temp_dict = read_pattern(
1✔
339
                self.text,
340
                {"final_energy": r"\s*Total\s+energy in the final basis set\s+=\s*([\d\-\.]+)"},
341
            )
342

343
            if temp_dict.get("final_energy") is not None:
1✔
344
                self.data["final_energy"] = float(temp_dict.get("final_energy")[-1][0])
1✔
345

346
        # Check if calculation is using dft_d and parse relevant info if so
347
        self.data["using_dft_d3"] = read_pattern(self.text, {"key": r"dft_d\s*= d3"}, terminate_on_match=True).get(
1✔
348
            "key"
349
        )
350
        if self.data.get("using_dft_d3", []):
1✔
351
            temp_d3 = read_pattern(
×
352
                self.text,
353
                {"key": r"\-D3 energy without 3body term =\s*([\d\.\-]+) hartrees"},
354
            ).get("key")
355
            real_d3 = np.zeros(len(temp_d3))
×
356
            if temp_d3 is None:
×
357
                self.data["dft_d3"] = None
×
358
            elif len(temp_d3) == 1:
×
359
                self.data["dft_d3"] = float(temp_d3[0][0])
×
360
            else:
361
                for ii, entry in enumerate(temp_d3):
×
362
                    real_d3[ii] = float(entry[0])
×
363
                self.data["dft_d3"] = real_d3
×
364

365
        # Parse the S2 values in the case of an unrestricted calculation
366
        if self.data.get("unrestricted", []):
1✔
367
            correct_s2 = 0.5 * (self.data["multiplicity"] - 1) * (0.5 * (self.data["multiplicity"] - 1) + 1)
1✔
368
            temp_S2 = read_pattern(self.text, {"key": r"<S\^2>\s=\s+([\d\-\.]+)"}).get("key")
1✔
369
            if temp_S2 is None:
1✔
370
                self.data["S2"] = None
×
371
            elif len(temp_S2) == 1:
1✔
372
                self.data["S2"] = float(temp_S2[0][0])
1✔
373
                if abs(correct_s2 - self.data["S2"]) > 0.01:
1✔
374
                    self.data["warnings"]["spin_contamination"] = abs(correct_s2 - self.data["S2"])
1✔
375
            else:
376
                real_S2 = np.zeros(len(temp_S2))
×
377
                have_spin_contamination = False
×
378
                for ii, entry in enumerate(temp_S2):
×
379
                    real_S2[ii] = float(entry[0])
×
380
                    if abs(correct_s2 - real_S2[ii]) > 0.01:
×
381
                        have_spin_contamination = True
×
382
                self.data["S2"] = real_S2
×
383
                if have_spin_contamination:
×
384
                    spin_contamination = np.zeros(len(self.data["S2"]))
×
385
                    for ii, entry in enumerate(self.data["S2"]):
×
386
                        spin_contamination[ii] = abs(correct_s2 - entry)
×
387
                    self.data["warnings"]["spin_contamination"] = spin_contamination
×
388

389
        # Parse additional data from coupled-cluster calculations
390
        self.data["coupled_cluster"] = read_pattern(
1✔
391
            self.text, {"key": r"CCMAN2: suite of methods based on coupled cluster"}
392
        ).get("key")
393
        if self.data.get("coupled_cluster", []):
1✔
394
            temp_dict = read_pattern(
×
395
                self.text,
396
                {
397
                    "SCF": r"\s+SCF energy\s+=\s+([\d\-\.]+)",
398
                    "MP2": r"\s+MP2 energy\s+=\s+([\d\-\.]+)",
399
                    "CCSD_correlation": r"\s+CCSD correlation energy\s+=\s+([\d\-\.]+)",
400
                    "CCSD": r"\s+CCSD total energy\s+=\s+([\d\-\.]+)",
401
                    "CCSD(T)_correlation": r"\s+CCSD\(T\) correlation energy\s+=\s+([\d\-\.]+)",
402
                    "CCSD(T)": r"\s+CCSD\(T\) total energy\s+=\s+([\d\-\.]+)",
403
                },
404
            )
405

406
            if temp_dict.get("SCF") is None:
×
407
                self.data["hf_scf_energy"] = None
×
408
            else:
409
                self.data["hf_scf_energy"] = float(temp_dict["SCF"][0][0])
×
410

411
            if temp_dict.get("MP2") is None:
×
412
                self.data["mp2_energy"] = None
×
413
            else:
414
                self.data["mp2_energy"] = float(temp_dict["MP2"][0][0])
×
415

416
            if temp_dict.get("CCSD_correlation") is None:
×
417
                self.data["ccsd_correlation_energy"] = None
×
418
            else:
419
                self.data["ccsd_correlation_energy"] = float(temp_dict["CCSD_correlation"][0][0])
×
420

421
            if temp_dict.get("CCSD") is None:
×
422
                self.data["ccsd_total_energy"] = None
×
423
            else:
424
                self.data["ccsd_total_energy"] = float(temp_dict["CCSD"][0][0])
×
425

426
            if temp_dict.get("CCSD(T)_correlation") is None:
×
427
                self.data["ccsd(t)_correlation_energy"] = None
×
428
            else:
429
                self.data["ccsd(t)_correlation_energy"] = float(temp_dict["CCSD(T)_correlation"][0][0])
×
430

431
            if temp_dict.get("CCSD(T)") is None:
×
432
                self.data["ccsd(t)_total_energy"] = None
×
433
            else:
434
                self.data["ccsd(t)_total_energy"] = float(temp_dict["CCSD(T)"][0][0])
×
435

436
        # Check if the calculation is a geometry optimization. If so, parse the relevant output
437
        self.data["optimization"] = read_pattern(self.text, {"key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*opt"}).get("key")
1✔
438
        if self.data.get("optimization", []):
1✔
439
            # Determine if the calculation is using the new geometry optimizer
440
            self.data["new_optimizer"] = read_pattern(self.text, {"key": r"(?i)\s*geom_opt2\s*(?:=)*\s*3"}).get("key")
×
441
            if self.data["version"] == "6":
×
442
                temp_driver = read_pattern(self.text, {"key": r"(?i)\s*geom_opt_driver\s*(?:=)*\s*optimize"}).get("key")
×
443
                if temp_driver is None:
×
444
                    self.data["new_optimizer"] = [[]]
×
445
            # Check if we have an unexpected transition state
446
            tmp_transition_state = read_pattern(self.text, {"key": r"TRANSITION STATE CONVERGED"}).get("key")
×
447
            if tmp_transition_state is not None:
×
448
                self.data["warnings"]["unexpected_transition_state"] = True
×
449
            self._read_optimization_data()
×
450

451
        # Check if the calculation is a transition state optimization. If so, parse the relevant output
452
        # Note: for now, TS calculations are treated the same as optimization calculations
453
        self.data["transition_state"] = read_pattern(self.text, {"key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*ts"}).get(
1✔
454
            "key"
455
        )
456
        if self.data.get("transition_state", []):
1✔
457
            self._read_optimization_data()
×
458

459
        # Check if the calculation contains a constraint in an $opt section.
460
        self.data["opt_constraint"] = read_pattern(self.text, {"key": r"\$opt\s+CONSTRAINT"}).get("key")
1✔
461
        if self.data.get("opt_constraint"):
1✔
462
            temp_constraint = read_pattern(
×
463
                self.text,
464
                {
465
                    "key": r"Constraints and their Current Values\s+Value\s+Constraint\s+(\w+)\:\s+([\d\-\.]+)\s+"
466
                    r"([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)"
467
                },
468
            ).get("key")
469
            if temp_constraint is not None:
×
470
                self.data["opt_constraint"] = temp_constraint[0]
×
471
                if self.data.get("opt_constraint") is not None:
×
472
                    if float(self.data["opt_constraint"][5]) != float(self.data["opt_constraint"][6]):
×
473
                        if abs(float(self.data["opt_constraint"][5])) != abs(float(self.data["opt_constraint"][6])):
×
474
                            raise ValueError("ERROR: Opt section value and constraint should be the same!")
×
475
                        if abs(float(self.data["opt_constraint"][5])) not in [
×
476
                            0.0,
477
                            180.0,
478
                        ]:
479
                            raise ValueError(
×
480
                                "ERROR: Opt section value and constraint can only differ by a sign at 0.0 and 180.0!"
481
                            )
482

483
        # Check if the calculation is a frequency analysis. If so, parse the relevant output
484
        self.data["frequency_job"] = read_pattern(
1✔
485
            self.text,
486
            {"key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*freq"},
487
            terminate_on_match=True,
488
        ).get("key")
489
        if self.data.get("frequency_job", []):
1✔
490
            self._read_frequency_data()
×
491

492
        # Check if the calculation is a single point. If so, parse the relevant output
493
        self.data["single_point_job"] = read_pattern(
1✔
494
            self.text,
495
            {"key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*sp"},
496
            terminate_on_match=True,
497
        ).get("key")
498

499
        # Check if the calculation is a force calculation. If so, parse the relevant output
500
        self.data["force_job"] = read_pattern(
1✔
501
            self.text,
502
            {"key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*force"},
503
            terminate_on_match=True,
504
        ).get("key")
505
        if self.data.get("force_job", []):
1✔
506
            self._read_force_data()
×
507

508
        # Read in the eigenvalues from the output file
509
        if self.data["scf_final_print"] >= 1:
1✔
510
            self._read_eigenvalues()
×
511

512
        # Read the Fock matrix from the output file
513
        if self.data["scf_final_print"] >= 3:
1✔
514
            self._read_fock_matrix()
×
515
            self._read_coefficient_matrix()
×
516

517
        # Check if the calculation is a PES scan. If so, parse the relevant output
518
        self.data["scan_job"] = read_pattern(
1✔
519
            self.text, {"key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*pes_scan"}, terminate_on_match=True
520
        ).get("key")
521
        if self.data.get("scan_job", []):
1✔
522
            self._read_scan_data()
×
523

524
        # Check if an NBO calculation was performed. If so, parse the relevant output
525
        self.data["nbo_data"] = read_pattern(
1✔
526
            self.text, {"key": r"N A T U R A L   A T O M I C   O R B I T A L"}, terminate_on_match=True
527
        ).get("key")
528
        if self.data.get("nbo_data", []):
1✔
529
            self._read_nbo_data()
1✔
530

531
        # If the calculation did not finish and no errors have been identified yet, check for other errors
532
        if not self.data.get("completion", []) and self.data.get("errors") == []:
1✔
533
            self._check_completion_errors()
×
534

535
    @staticmethod
1✔
536
    def multiple_outputs_from_file(cls, filename, keep_sub_files=True):
1✔
537
        """
538
        Parses a QChem output file with multiple calculations
539
        # 1.) Separates the output into sub-files
540
            e.g. qcout -> qcout.0, qcout.1, qcout.2 ... qcout.N
541
            a.) Find delimiter for multiple calculations
542
            b.) Make separate output sub-files
543
        2.) Creates separate QCCalcs for each one from the sub-files
544
        """
545
        to_return = []
×
546
        with zopen(filename, "rt") as f:
×
547
            text = re.split(r"\s*(?:Running\s+)*Job\s+\d+\s+of\s+\d+\s+", f.read())
×
548
        if text[0] == "":
×
549
            text = text[1:]
×
550
        for i, sub_text in enumerate(text):
×
551
            with open(filename + "." + str(i), "w") as temp:
×
552
                temp.write(sub_text)
×
553
            tempOutput = cls(filename + "." + str(i))
×
554
            to_return.append(tempOutput)
×
555
            if not keep_sub_files:
×
556
                os.remove(filename + "." + str(i))
×
557
        return to_return
×
558

559
    def _read_eigenvalues(self):
1✔
560
        """Parse the orbital energies from the output file. An array of the
561
        dimensions of the number of orbitals used in the calculation is stored."""
562

563
        # Find the pattern corresponding to the "Final Alpha MO Eigenvalues" section
564
        header_pattern = r"Final Alpha MO Eigenvalues"
×
565
        # The elements of the matrix are always floats, they are surrounded by
566
        # the index of the matrix, which are integers.
567
        elements_pattern = r"\-*\d+\.\d+"
×
568
        if not self.data.get("unrestricted", []):
×
569
            spin_unrestricted = False
×
570
            # Since this is a spin-paired calculation, there will be
571
            # only a single list of orbital energies (denoted as alpha)
572
            # in the input file. The footer will then correspond
573
            # to the next section, which is the MO coefficients.
574
            footer_pattern = r"Final Alpha MO Coefficients+\s*"
×
575
        else:
576
            spin_unrestricted = True
×
577
            # It is an unrestricted calculation, so there will be two sets
578
            # of eigenvalues. First parse the alpha eigenvalues.
579
            footer_pattern = r"Final Beta MO Eigenvalues"
×
580
        # Common for both spin restricted and unrestricted calculations is the alpha eigenvalues.
581
        alpha_eigenvalues = read_matrix_pattern(
×
582
            header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
583
        )
584
        # The beta eigenvalues are only present if this is a spin-unrestricted calculation.
585
        if spin_unrestricted:
×
586
            header_pattern = r"Final Beta MO Eigenvalues"
×
587
            footer_pattern = r"Final Alpha MO Coefficients+\s*"
×
588
            beta_eigenvalues = read_matrix_pattern(
×
589
                header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
590
            )
591

592
        self.data["alpha_eigenvalues"] = alpha_eigenvalues
×
593

594
        if spin_unrestricted:
×
595
            self.data["beta_eigenvalues"] = beta_eigenvalues
×
596

597
    def _read_fock_matrix(self):
1✔
598
        """Parses the Fock matrix. The matrix is read in whole
599
        from the output file and then transformed into the right dimensions."""
600

601
        # The header is the same for both spin-restricted and spin-unrestricted calculations.
602
        header_pattern = r"Final Alpha Fock Matrix"
×
603
        # The elements of the matrix are always floats, they are surrounded by
604
        # the index of the matrix, which are integers
605
        elements_pattern = r"\-*\d+\.\d+"
×
606
        if not self.data.get("unrestricted", []):
×
607
            spin_unrestricted = False
×
608
            footer_pattern = "SCF time:"
×
609
        else:
610
            spin_unrestricted = True
×
611
            footer_pattern = "Final Beta Fock Matrix"
×
612
        # Common for both spin restricted and unrestricted calculations is the alpha Fock matrix.
613
        alpha_fock_matrix = read_matrix_pattern(
×
614
            header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
615
        )
616
        # The beta Fock matrix is only present if this is a spin-unrestricted calculation.
617
        if spin_unrestricted:
×
618
            header_pattern = r"Final Beta Fock Matrix"
×
619
            footer_pattern = "SCF time:"
×
620
            beta_fock_matrix = read_matrix_pattern(
×
621
                header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
622
            )
623

624
        # Convert the matrices to the right dimension. Right now they are simply
625
        # one massive list of numbers, but we need to split them into a matrix. The
626
        # Fock matrix must always be a square matrix and as a result, we have to
627
        # modify the dimensions.
628
        alpha_fock_matrix = process_parsed_fock_matrix(alpha_fock_matrix)
×
629
        self.data["alpha_fock_matrix"] = alpha_fock_matrix
×
630

631
        if spin_unrestricted:
×
632
            # Perform the same transformation for the beta Fock matrix.
633
            beta_fock_matrix = process_parsed_fock_matrix(beta_fock_matrix)
×
634
            self.data["beta_fock_matrix"] = beta_fock_matrix
×
635

636
    def _read_coefficient_matrix(self):
1✔
637
        """Parses the coefficient matrix from the output file. Done is much
638
        the same was as the Fock matrix."""
639
        # The header is the same for both spin-restricted and spin-unrestricted calculations.
640
        header_pattern = r"Final Alpha MO Coefficients"
×
641
        # The elements of the matrix are always floats, they are surrounded by
642
        # the index of the matrix, which are integers
643
        elements_pattern = r"\-*\d+\.\d+"
×
644
        if not self.data.get("unrestricted", []):
×
645
            spin_unrestricted = False
×
646
            footer_pattern = "Final Alpha Density Matrix"
×
647
        else:
648
            spin_unrestricted = True
×
649
            footer_pattern = "Final Beta MO Coefficients"
×
650
        # Common for both spin restricted and unrestricted calculations is the alpha Fock matrix.
651
        alpha_coeff_matrix = read_matrix_pattern(
×
652
            header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
653
        )
654
        if spin_unrestricted:
×
655
            header_pattern = r"Final Beta MO Coefficients"
×
656
            footer_pattern = "Final Alpha Density Matrix"
×
657
            beta_coeff_matrix = read_matrix_pattern(
×
658
                header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
659
            )
660

661
        # Convert the matrices to the right dimension. Right now they are simply
662
        # one massive list of numbers, but we need to split them into a matrix. The
663
        # Fock matrix must always be a square matrix and as a result, we have to
664
        # modify the dimensions.
665
        alpha_coeff_matrix = process_parsed_fock_matrix(alpha_coeff_matrix)
×
666
        self.data["alpha_coeff_matrix"] = alpha_coeff_matrix
×
667

668
        if spin_unrestricted:
×
669
            # Perform the same transformation for the beta Fock matrix.
670
            beta_coeff_matrix = process_parsed_fock_matrix(beta_coeff_matrix)
×
671
            self.data["beta_coeff_matrix"] = beta_coeff_matrix
×
672

673
    def _read_charge_and_multiplicity(self):
1✔
674
        """
675
        Parses charge and multiplicity.
676
        """
677
        temp_charge = read_pattern(self.text, {"key": r"\$molecule\s+([\-\d]+)\s+\d"}, terminate_on_match=True).get(
1✔
678
            "key"
679
        )
680
        if temp_charge is not None:
1✔
681
            self.data["charge"] = int(temp_charge[0][0])
1✔
682
        else:
683
            temp_charge = read_pattern(
×
684
                self.text,
685
                {"key": r"Sum of atomic charges \=\s+([\d\-\.\+]+)"},
686
                terminate_on_match=True,
687
            ).get("key")
688
            if temp_charge is None:
×
689
                self.data["charge"] = None
×
690
            else:
691
                self.data["charge"] = int(float(temp_charge[0][0]))
×
692

693
        temp_multiplicity = read_pattern(
1✔
694
            self.text, {"key": r"\$molecule\s+[\-\d]+\s+(\d)"}, terminate_on_match=True
695
        ).get("key")
696
        if temp_multiplicity is not None:
1✔
697
            self.data["multiplicity"] = int(temp_multiplicity[0][0])
1✔
698
        else:
699
            temp_multiplicity = read_pattern(
×
700
                self.text,
701
                {"key": r"Sum of spin\s+charges \=\s+([\d\-\.\+]+)"},
702
                terminate_on_match=True,
703
            ).get("key")
704
            if temp_multiplicity is None:
×
705
                self.data["multiplicity"] = 1
×
706
            else:
707
                self.data["multiplicity"] = int(float(temp_multiplicity[0][0])) + 1
×
708

709
    def _read_species_and_inital_geometry(self):
1✔
710
        """
711
        Parses species and initial geometry.
712
        """
713
        header_pattern = r"Standard Nuclear Orientation \(Angstroms\)\s+I\s+Atom\s+X\s+Y\s+Z\s+-+"
1✔
714
        table_pattern = r"\s*\d+\s+([a-zA-Z]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*"
1✔
715
        footer_pattern = r"\s*-+"
1✔
716
        temp_geom = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern)
1✔
717
        if temp_geom is None or len(temp_geom) == 0:
1✔
718
            self.data["species"] = None
×
719
            self.data["initial_geometry"] = None
×
720
            self.data["initial_molecule"] = None
×
721
            self.data["point_group"] = None
×
722
        else:
723
            temp_point_group = read_pattern(
1✔
724
                self.text,
725
                {"key": r"Molecular Point Group\s+([A-Za-z\d\*]+)"},
726
                terminate_on_match=True,
727
            ).get("key")
728
            if temp_point_group is not None:
1✔
729
                self.data["point_group"] = temp_point_group[0][0]
×
730
            else:
731
                self.data["point_group"] = None
1✔
732
            temp_geom = temp_geom[0]
1✔
733
            species = []
1✔
734
            geometry = np.zeros(shape=(len(temp_geom), 3), dtype=float)
1✔
735
            for ii, entry in enumerate(temp_geom):
1✔
736
                species += [entry[0]]
1✔
737
                for jj in range(3):
1✔
738
                    if "*" in entry[jj + 1]:
1✔
739
                        geometry[ii, jj] = 10000000000.0
×
740
                    else:
741
                        geometry[ii, jj] = float(entry[jj + 1])
1✔
742
            self.data["species"] = species
1✔
743
            self.data["initial_geometry"] = geometry
1✔
744
            if self.data["charge"] is not None and self.data["multiplicity"] is not None:
1✔
745
                self.data["initial_molecule"] = Molecule(
1✔
746
                    species=species,
747
                    coords=geometry,
748
                    charge=self.data.get("charge"),
749
                    spin_multiplicity=self.data.get("multiplicity"),
750
                )
751
            else:
752
                self.data["initial_molecule"] = None
×
753

754
    def _read_SCF(self):
1✔
755
        """
756
        Parses both old and new SCFs.
757
        """
758
        if self.data.get("using_GEN_SCFMAN", []):
1✔
759
            footer_pattern = r"(^\s*\-+\n\s+SCF time|^\s*gen_scfman_exception: SCF failed to converge)"
1✔
760
            header_pattern = (
1✔
761
                r"^\s*\-+\s+Cycle\s+Energy\s+(?:(?:DIIS)*\s+[Ee]rror)*(?:RMS Gradient)*\s+\-+"
762
                r"(?:\s*\-+\s+OpenMP\s+Integral\s+computing\s+Module\s+"
763
                r"(?:Release:\s+version\s+[\d\-\.]+\,\s+\w+\s+[\d\-\.]+\, "
764
                r"Q-Chem Inc\. Pittsburgh\s+)*\-+)*\n"
765
            )
766
            table_pattern = (
1✔
767
                r"(?:\s*Nonlocal correlation = [\d\-\.]+e[\d\-]+)*"
768
                r"(?:\s*Inaccurate integrated density:\n\s+Number of electrons\s+=\s+[\d\-\.]+\n\s+"
769
                r"Numerical integral\s+=\s+[\d\-\.]+\n\s+Relative error\s+=\s+[\d\-\.]+\s+\%\n)*\s*\d+\s+"
770
                r"([\d\-\.]+)\s+([\d\-\.]+)e([\d\-\.\+]+)(?:\s+Convergence criterion met)*"
771
                r"(?:\s+Preconditoned Steepest Descent)*(?:\s+Roothaan Step)*(?:\s+"
772
                r"(?:Normal\s+)*BFGS [Ss]tep)*(?:\s+LineSearch Step)*(?:\s+Line search: overstep)*"
773
                r"(?:\s+Dog-leg BFGS step)*(?:\s+Line search: understep)*"
774
                r"(?:\s+Descent step)*(?:\s+Done DIIS. Switching to GDM)*"
775
                r"(?:\s+Done GDM. Switching to DIIS)*"
776
                r"(?:(?:\s+Done GDM. Switching to GDM with quadratic line-search\s)*\s*GDM subspace size\: \d+)*"
777
                r"(?:\s*\-+\s+Cycle\s+Energy\s+(?:(?:DIIS)*\s+[Ee]rror)*"
778
                r"(?:RMS Gradient)*\s+\-+"
779
                r"(?:\s*\-+\s+OpenMP\s+Integral\s+computing\s+Module\s+"
780
                r"(?:Release:\s+version\s+[\d\-\.]+\,\s+\w+\s+[\d\-\.]+\, "
781
                r"Q-Chem Inc\. Pittsburgh\s+)*\-+)*\n)*"
782
                r"(?:\s*Line search, dEdstep = [\d\-\.]+e[\d\-\.\+]+\s+[\d\-\.]+e[\d\-\.\+]+\s+[\d\-\.]+\s*)*"
783
                r"(?:\s*[\d\-\.]+e[\d\-\.\+]+\s+[\d\-\.]+\s+[\d\-\.]+e[\d\-\.\+]+\s+[\d\-\.]+e[\d\-\.\+]+\s"
784
                r"+[\d\-\.]+\s+[\d\-\.]+e[\d\-\.\+]+\s+Optimal value differs by [\d\-\.]+e[\d\-\.\+]+ from prediction)*"
785
                r"(?:\s*Resetting GDM\.)*(?:\s+[\d\-\.]+e[\d\-\.\+]+\s+[\d\-\.]+\s+[\d\-\.]+e[\d\-\.\+]+)*"
786
                r"(?:\s+[\d\-\.]+e[\d\-\.\+]+\s+[\d\-\.]+\s+[\d\-\.]+e[\d\-\.\+]+\s+[\d\-\.]+e[\d\-\.\+]+\s+"
787
                r"[\d\-\.]+\s+[\d\-\.]+e[\d\-\.\+]+\s+Optimal value differs by [\d\-\.]+e[\d\-\.\+]+ from prediction)*"
788
                r"(?:\s*gdm_qls\: Orbitals will not converge further\.)*"
789
                r"(?:(\n\s*[a-z\dA-Z_\s/]+\.C|\n\s*GDM)::WARNING energy changes are now smaller than effective "
790
                r"accuracy\.\s*(\n\s*[a-z\dA-Z_\s/]+\.C|\n\s*GDM)::\s+calculation will continue, but THRESH "
791
                r"should be increased\s*"
792
                r"(\n\s*[a-z\dA-Z_\s/]+\.C|\n\s*GDM)::\s+or SCF_CONVERGENCE decreased"
793
                r"\.\s*(\n\s*[a-z\dA-Z_\s/]+\.C|\n\s*GDM)::\s+effective_thresh = [\d\-\.]+e[\d\-]+)*"
794
            )
795
        else:
796
            if "SCF_failed_to_converge" in self.data.get("errors"):
1✔
797
                footer_pattern = r"^\s*\d+\s*[\d\-\.]+\s+[\d\-\.]+E[\d\-\.]+\s+Convergence\s+failure\n"
×
798
            else:
799
                footer_pattern = r"^\s*\-+\n"
1✔
800
            header_pattern = r"^\s*\-+\s+Cycle\s+Energy\s+DIIS Error\s+\-+\n"
1✔
801
            table_pattern = (
1✔
802
                r"(?:\s*Inaccurate integrated density:\n\s+Number of electrons\s+=\s+[\d\-\.]+\n\s+"
803
                r"Numerical integral\s+=\s+[\d\-\.]+\n\s+Relative error\s+=\s+[\d\-\.]+\s+\%\n)*\s*\d+\s*"
804
                r"([\d\-\.]+)\s+([\d\-\.]+)E([\d\-\.\+]+)(?:\s*\n\s*cpu\s+[\d\-\.]+\swall\s+[\d\-\.]+)*"
805
                r"(?:\nin dftxc\.C, eleTot sum is:[\d\-\.]+, tauTot is\:[\d\-\.]+)*"
806
                r"(?:\s+Convergence criterion met)*(?:\s+Done RCA\. Switching to DIIS)*"
807
                r"(?:\n\s*Warning: not using a symmetric Q)*"
808
                r"(?:\nRecomputing EXC\s*[\d\-\.]+\s*[\d\-\.]+\s*[\d\-\.]+"
809
                r"(?:\s*\nRecomputing EXC\s*[\d\-\.]+\s*[\d\-\.]+\s*[\d\-\.]+)*)*"
810
            )
811

812
        temp_scf = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern)
1✔
813
        real_scf = []
1✔
814
        for one_scf in temp_scf:
1✔
815
            temp = np.zeros(shape=(len(one_scf), 2))
1✔
816
            for ii, entry in enumerate(one_scf):
1✔
817
                temp[ii, 0] = float(entry[0])
1✔
818
                temp[ii, 1] = float(entry[1]) * 10 ** float(entry[2])
1✔
819
            real_scf += [temp]
1✔
820

821
        self.data["SCF"] = real_scf
1✔
822

823
        temp_thresh_warning = read_pattern(
1✔
824
            self.text,
825
            {
826
                "key": r"\n[a-zA-Z_\s/]+\.C::WARNING energy changes are now smaller than effective accuracy"
827
                r"\.\n[a-zA-Z_\s/]+\.C::\s+calculation will continue, but THRESH should be increased\n"
828
                r"[a-zA-Z_\s/]+\.C::\s+or SCF_CONVERGENCE decreased\. \n"
829
                r"[a-zA-Z_\s/]+\.C::\s+effective_thresh = ([\d\-\.]+e[\d\-]+)"
830
            },
831
        ).get("key")
832
        if temp_thresh_warning is not None:
1✔
833
            if len(temp_thresh_warning) == 1:
×
834
                self.data["warnings"]["thresh"] = float(temp_thresh_warning[0][0])
×
835
            else:
836
                thresh_warning = np.zeros(len(temp_thresh_warning))
×
837
                for ii, entry in enumerate(temp_thresh_warning):
×
838
                    thresh_warning[ii] = float(entry[0])
×
839
                self.data["warnings"]["thresh"] = thresh_warning
×
840

841
        temp_SCF_energy = read_pattern(self.text, {"key": r"SCF   energy in the final basis set =\s*([\d\-\.]+)"}).get(
1✔
842
            "key"
843
        )
844
        if temp_SCF_energy is not None:
1✔
845
            if len(temp_SCF_energy) == 1:
1✔
846
                self.data["SCF_energy_in_the_final_basis_set"] = float(temp_SCF_energy[0][0])
1✔
847
            else:
848
                SCF_energy = np.zeros(len(temp_SCF_energy))
×
849
                for ii, val in enumerate(temp_SCF_energy):
×
850
                    SCF_energy[ii] = float(val[0])
×
851
                self.data["SCF_energy_in_the_final_basis_set"] = SCF_energy
×
852

853
        temp_Total_energy = read_pattern(
1✔
854
            self.text, {"key": r"Total energy in the final basis set =\s*([\d\-\.]+)"}
855
        ).get("key")
856
        if temp_Total_energy is not None:
1✔
857
            if len(temp_Total_energy) == 1:
1✔
858
                self.data["Total_energy_in_the_final_basis_set"] = float(temp_Total_energy[0][0])
1✔
859
            else:
860
                Total_energy = np.zeros(len(temp_Total_energy))
×
861
                for ii, val in enumerate(temp_Total_energy):
×
862
                    Total_energy[ii] = float(val[0])
×
863
                self.data["Total_energy_in_the_final_basis_set"] = Total_energy
×
864

865
    def _read_charges_and_dipoles(self):
1✔
866
        """
867
        Parses Mulliken/ESP/RESP charges.
868
        Parses associated dipoles.
869
        Also parses spins given an unrestricted SCF.
870
        """
871

872
        self.data["dipoles"] = {}
1✔
873
        temp_dipole_total = read_pattern(
1✔
874
            self.text, {"key": r"X\s*[\d\-\.]+\s*Y\s*[\d\-\.]+\s*Z\s*[\d\-\.]+\s*Tot\s*([\d\-\.]+)"}
875
        ).get("key")
876
        temp_dipole = read_pattern(
1✔
877
            self.text, {"key": r"X\s*([\d\-\.]+)\s*Y\s*([\d\-\.]+)\s*Z\s*([\d\-\.]+)\s*Tot\s*[\d\-\.]+"}
878
        ).get("key")
879
        if temp_dipole is not None:
1✔
880
            if len(temp_dipole_total) == 1:
1✔
881
                self.data["dipoles"]["total"] = float(temp_dipole_total[0][0])
1✔
882
                dipole = np.zeros(3)
1✔
883
                for ii, val in enumerate(temp_dipole[0]):
1✔
884
                    dipole[ii] = float(val)
1✔
885
                self.data["dipoles"]["dipole"] = dipole
1✔
886
            else:
887
                total = np.zeros(len(temp_dipole_total))
×
888
                for ii, val in enumerate(temp_dipole_total):
×
889
                    total[ii] = float(val[0])
×
890
                self.data["dipoles"]["total"] = total
×
891
                dipole = np.zeros(shape=(len(temp_dipole_total), 3))
×
892
                for ii, _entry in enumerate(temp_dipole):
×
893
                    for jj, _val in enumerate(temp_dipole[ii]):
×
894
                        dipole[ii][jj] = temp_dipole[ii][jj]
×
895
                self.data["dipoles"]["dipole"] = dipole
×
896

897
        if self.data.get("unrestricted", []):
1✔
898
            header_pattern = (
1✔
899
                r"\-+\s+Ground-State Mulliken Net Atomic Charges\s+Atom\s+Charge \(a\.u\.\)\s+"
900
                r"Spin\s\(a\.u\.\)\s+\-+"
901
            )
902
            table_pattern = r"\s+\d+\s\w+\s+([\d\-\.]+)\s+([\d\-\.]+)"
1✔
903
            footer_pattern = r"\s\s\-+\s+Sum of atomic charges"
1✔
904
        else:
905
            header_pattern = r"\-+\s+Ground-State Mulliken Net Atomic Charges\s+Atom\s+Charge \(a\.u\.\)\s+\-+"
1✔
906
            table_pattern = r"\s+\d+\s\w+\s+([\d\-\.]+)"
1✔
907
            footer_pattern = r"\s\s\-+\s+Sum of atomic charges"
1✔
908

909
        temp_mulliken = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern)
1✔
910
        real_mulliken = []
1✔
911
        for one_mulliken in temp_mulliken:
1✔
912
            if self.data.get("unrestricted", []):
1✔
913
                temp = np.zeros(shape=(len(one_mulliken), 2))
1✔
914
                for ii, entry in enumerate(one_mulliken):
1✔
915
                    temp[ii, 0] = float(entry[0])
1✔
916
                    temp[ii, 1] = float(entry[1])
1✔
917
            else:
918
                temp = np.zeros(len(one_mulliken))
1✔
919
                for ii, entry in enumerate(one_mulliken):
1✔
920
                    temp[ii] = float(entry[0])
1✔
921
            real_mulliken += [temp]
1✔
922

923
        self.data["Mulliken"] = real_mulliken
1✔
924

925
        # Check for ESP/RESP charges
926
        esp_or_resp = read_pattern(self.text, {"key": r"Merz-Kollman (R?ESP) Net Atomic Charges"}).get("key")
1✔
927
        if esp_or_resp is not None:
1✔
928
            header_pattern = r"Merz-Kollman (R?ESP) Net Atomic Charges\s+Atom\s+Charge \(a\.u\.\)\s+\-+"
1✔
929
            table_pattern = r"\s+\d+\s\w+\s+([\d\-\.]+)"
1✔
930
            footer_pattern = r"\s\s\-+\s+Sum of atomic charges"
1✔
931

932
            temp_esp_or_resp = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern)
1✔
933
            real_esp_or_resp = []
1✔
934
            for one_entry in temp_esp_or_resp:
1✔
935
                temp = np.zeros(len(one_entry))
1✔
936
                for ii, entry in enumerate(one_entry):
1✔
937
                    temp[ii] = float(entry[0])
1✔
938
                real_esp_or_resp += [temp]
1✔
939
            self.data[esp_or_resp[0][0]] = real_esp_or_resp
1✔
940
            temp_RESP_dipole_total = read_pattern(
1✔
941
                self.text,
942
                {"key": r"Related Dipole Moment =\s*([\d\-\.]+)\s*\(X\s*[\d\-\.]+\s*Y\s*[\d\-\.]+\s*Z\s*[\d\-\.]+\)"},
943
            ).get("key")
944
            temp_RESP_dipole = read_pattern(
1✔
945
                self.text,
946
                {  # pylint: disable=line-too-long
947
                    "key": r"Related Dipole Moment =\s*[\d\-\.]+\s*\(X\s*([\d\-\.]+)\s*Y\s*([\d\-\.]+)\s*Z\s*([\d\-\.]+)\)"
948
                },
949
            ).get("key")
950
            if temp_RESP_dipole is not None:
1✔
951
                if len(temp_RESP_dipole_total) == 1:
1✔
952
                    self.data["dipoles"]["RESP_total"] = float(temp_RESP_dipole_total[0][0])
1✔
953
                    RESP_dipole = np.zeros(3)
1✔
954
                    for ii, val in enumerate(temp_RESP_dipole[0]):
1✔
955
                        RESP_dipole[ii] = float(val)
1✔
956
                    self.data["dipoles"]["RESP_dipole"] = RESP_dipole
1✔
957
                else:
958
                    RESP_total = np.zeros(len(temp_RESP_dipole_total))
×
959
                    for ii, val in enumerate(temp_RESP_dipole_total):
×
960
                        RESP_total[ii] = float(val[0])
×
961
                    self.data["dipoles"]["RESP_total"] = RESP_total
×
962
                    RESP_dipole = np.zeros(shape=(len(temp_RESP_dipole_total), 3))
×
963
                    for ii, _entry in enumerate(temp_RESP_dipole):
×
964
                        for jj, _val in enumerate(temp_RESP_dipole[ii]):
×
965
                            RESP_dipole[ii][jj] = temp_RESP_dipole[ii][jj]
×
966
                    self.data["dipoles"]["RESP_dipole"] = RESP_dipole
×
967

968
    def _detect_general_warnings(self):
1✔
969
        # Check for inaccurate integrated density
970
        temp_inac_integ = read_pattern(
1✔
971
            self.text,
972
            {
973
                "key": r"Inaccurate integrated density:\n\s+Number of electrons\s+=\s+([\d\-\.]+)\n\s+"
974
                r"Numerical integral\s+=\s+([\d\-\.]+)\n\s+Relative error\s+=\s+([\d\-\.]+)\s+\%\n"
975
            },
976
        ).get("key")
977
        if temp_inac_integ is not None:
1✔
978
            inaccurate_integrated_density = np.zeros(shape=(len(temp_inac_integ), 3))
×
979
            for ii, entry in enumerate(temp_inac_integ):
×
980
                for jj, val in enumerate(entry):
×
981
                    inaccurate_integrated_density[ii][jj] = float(val)
×
982
            self.data["warnings"]["inaccurate_integrated_density"] = inaccurate_integrated_density
×
983

984
        # Check for an MKL error
985
        if read_pattern(self.text, {"key": r"Intel MKL ERROR"}, terminate_on_match=True).get("key") == [[]]:
1✔
986
            self.data["warnings"]["mkl"] = True
×
987

988
        # Check if the job is being hindered by a lack of analytical derivatives
989
        if read_pattern(
1✔
990
            self.text,
991
            {"key": r"Starting finite difference calculation for IDERIV"},
992
            terminate_on_match=True,
993
        ).get("key") == [[]]:
994
            self.data["warnings"]["missing_analytical_derivates"] = True
×
995

996
        # Check if the job is complaining about MO files of inconsistent size
997
        if read_pattern(
1✔
998
            self.text,
999
            {"key": r"Inconsistent size for SCF MO coefficient file"},
1000
            terminate_on_match=True,
1001
        ).get("key") == [[]]:
1002
            self.data["warnings"]["inconsistent_size"] = True
×
1003

1004
        # Check for AO linear depend
1005
        if read_pattern(self.text, {"key": r"Linear dependence detected in AO basis"}, terminate_on_match=True).get(
1✔
1006
            "key"
1007
        ) == [[]]:
1008
            self.data["warnings"]["linear_dependence"] = True
×
1009

1010
        # Check for Hessian without desired local structure
1011
        if read_pattern(
1✔
1012
            self.text,
1013
            {"key": r"\*\*WARNING\*\* Hessian does not have the Desired Local Structure"},
1014
            terminate_on_match=True,
1015
        ).get("key") == [[]]:
1016
            self.data["warnings"]["hessian_local_structure"] = True
×
1017

1018
        # Check if GetCART cycle iterations ever exceeded
1019
        if read_pattern(
1✔
1020
            self.text,
1021
            {"key": r"\*\*\*ERROR\*\*\* Exceeded allowed number of iterative cycles in GetCART"},
1022
            terminate_on_match=True,
1023
        ).get("key") == [[]]:
1024
            self.data["warnings"]["GetCART_cycles"] = True
×
1025

1026
        # Check for problems with internal coordinates
1027
        if read_pattern(
1✔
1028
            self.text,
1029
            {"key": r"\*\*WARNING\*\* Problems with Internal Coordinates"},
1030
            terminate_on_match=True,
1031
        ).get("key") == [[]]:
1032
            self.data["warnings"]["internal_coordinates"] = True
×
1033

1034
        # Check for an issue with an RFO step
1035
        if read_pattern(
1✔
1036
            self.text,
1037
            {"key": r"UNABLE TO DETERMINE Lambda IN RFO  \*\*\s+\*\* Taking simple Newton-Raphson step"},
1038
            terminate_on_match=True,
1039
        ).get("key") == [[]]:
1040
            self.data["warnings"]["bad_lambda_take_NR_step"] = True
×
1041

1042
        # Check for a switch into Cartesian coordinates
1043
        if read_pattern(self.text, {"key": r"SWITCHING TO CARTESIAN OPTIMIZATION"}, terminate_on_match=True).get(
1✔
1044
            "key"
1045
        ) == [[]]:
1046
            self.data["warnings"]["switch_to_cartesian"] = True
×
1047

1048
        # Check for problem with eigenvalue magnitude
1049
        if read_pattern(self.text, {"key": r"\*\*WARNING\*\* Magnitude of eigenvalue"}, terminate_on_match=True).get(
1✔
1050
            "key"
1051
        ) == [[]]:
1052
            self.data["warnings"]["eigenvalue_magnitude"] = True
×
1053

1054
        # Check for problem with hereditary postivive definiteness
1055
        if read_pattern(
1✔
1056
            self.text,
1057
            {"key": r"\*\*WARNING\*\* Hereditary positive definiteness endangered"},
1058
            terminate_on_match=True,
1059
        ).get("key") == [[]]:
1060
            self.data["warnings"]["positive_definiteness_endangered"] = True
×
1061

1062
        # Check if there were problems with a colinear bend
1063
        if read_pattern(
1✔
1064
            self.text,
1065
            {
1066
                "key": r"\*\*\*ERROR\*\*\* Angle[\s\d]+is near\-linear\s+"
1067
                r"But No atom available to define colinear bend"
1068
            },
1069
            terminate_on_match=True,
1070
        ).get("key") == [[]]:
1071
            self.data["warnings"]["colinear_bend"] = True
×
1072

1073
        # Check if there were problems diagonalizing B*B(t)
1074
        if read_pattern(
1✔
1075
            self.text,
1076
            {"key": r"\*\*\*ERROR\*\*\* Unable to Diagonalize B\*B\(t\) in <MakeNIC>"},
1077
            terminate_on_match=True,
1078
        ).get("key") == [[]]:
1079
            self.data["warnings"]["diagonalizing_BBt"] = True
×
1080

1081
    def _read_geometries(self):
1✔
1082
        """
1083
        Parses all geometries from an optimization trajectory.
1084
        """
1085
        geoms = []
×
1086
        if self.data.get("new_optimizer") is None:
×
1087
            header_pattern = r"\s+Optimization\sCycle:\s+\d+\s+Coordinates \(Angstroms\)\s+ATOM\s+X\s+Y\s+Z"
×
1088
            table_pattern = r"\s+\d+\s+\w+\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)"
×
1089
            footer_pattern = r"\s+Point Group\:\s+[\d\w\*]+\s+Number of degrees of freedom\:\s+\d+"
×
1090
        elif read_pattern(
×
1091
            self.text,
1092
            {"key": r"Geometry Optimization Coordinates :\s+Cartesian"},
1093
            terminate_on_match=True,
1094
        ).get("key") == [[]]:
1095
            header_pattern = (
×
1096
                r"RMS\s+of Stepsize\s+[\d\-\.]+\s+-+\s+Standard Nuclear Orientation "
1097
                r"\(Angstroms\)\s+I\s+Atom\s+X\s+Y\s+Z\s+-+"
1098
            )
1099
            table_pattern = r"\s*\d+\s+[a-zA-Z]+\s*([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*"
×
1100
            footer_pattern = r"\s*-+"
×
1101
        else:  # pylint: disable=line-too-long
1102
            header_pattern = (
×
1103
                r"Finished Iterative Coordinate Back-Transformation\s+-+\s+Standard Nuclear Orientation "
1104
                r"\(Angstroms\)\s+I\s+Atom\s+X\s+Y\s+Z\s+-+"
1105
            )
1106
            table_pattern = r"\s*\d+\s+[a-zA-Z]+\s*([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*"
×
1107
            footer_pattern = r"\s*-+"
×
1108
        parsed_geometries = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern)
×
1109
        for parsed_geometry in parsed_geometries:
×
1110
            if not parsed_geometry:
×
1111
                geoms.append(None)
×
1112
            else:
1113
                geoms.append(process_parsed_coords(parsed_geometry))
×
1114
        if len(geoms) >= 1:
×
1115
            self.data["geometries"] = geoms
×
1116
            self.data["last_geometry"] = geoms[-1]
×
1117
            if self.data.get("charge") is not None:
×
1118
                self.data["molecule_from_last_geometry"] = Molecule(
×
1119
                    species=self.data.get("species"),
1120
                    coords=self.data.get("last_geometry"),
1121
                    charge=self.data.get("charge"),
1122
                    spin_multiplicity=self.data.get("multiplicity"),
1123
                )
1124

1125
            # Parses optimized XYZ coordinates. If not present, parses optimized Z-matrix.
1126
            if self.data.get("new_optimizer") is None:  # pylint: disable-next=line-too-long
×
1127
                header_pattern = r"\*+\s+(OPTIMIZATION|TRANSITION STATE)\s+CONVERGED\s+\*+\s+\*+\s+Coordinates \(Angstroms\)\s+ATOM\s+X\s+Y\s+Z"
×
1128
                table_pattern = r"\s+\d+\s+\w+\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)"
×
1129
                footer_pattern = r"\s+Z-matrix Print:"
×
1130
            else:  # pylint: disable-next=line-too-long
1131
                header_pattern = r"(OPTIMIZATION|TRANSITION STATE)\sCONVERGED\s+\*+\s+\*+\s+-+\s+Standard Nuclear Orientation \(Angstroms\)\s+I\s+Atom\s+X\s+Y\s+Z\s+-+"
×
1132
                table_pattern = r"\s*\d+\s+[a-zA-Z]+\s*([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*"
×
1133
                footer_pattern = r"\s*-+"
×
1134
            parsed_optimized_geometries = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern)
×
1135

1136
            if not parsed_optimized_geometries:
×
1137
                self.data["optimized_geometry"] = None
×
1138
                header_pattern = (
×
1139
                    r"^\s+\*+\s+(OPTIMIZATION|TRANSITION STATE) CONVERGED\s+\*+\s+\*+\s+Z-matrix\s+"
1140
                    r"Print:\s+\$molecule\s+[\d\-]+\s+[\d\-]+\n"
1141
                )
1142
                table_pattern = (
×
1143
                    r"\s*(\w+)(?:\s+(\d+)\s+([\d\-\.]+)(?:\s+(\d+)\s+([\d\-\.]+)"
1144
                    r"(?:\s+(\d+)\s+([\d\-\.]+))*)*)*(?:\s+0)*"
1145
                )
1146
                footer_pattern = r"^\$end\n"
×
1147

1148
                self.data["optimized_zmat"] = read_table_pattern(
×
1149
                    self.text, header_pattern, table_pattern, footer_pattern
1150
                )
1151
            else:
1152
                self.data["optimized_geometry"] = process_parsed_coords(parsed_optimized_geometries[0])
×
1153
                self.data["optimized_geometries"] = [process_parsed_coords(i) for i in parsed_optimized_geometries]
×
1154
                if self.data.get("charge") is not None:
×
1155
                    self.data["molecule_from_optimized_geometry"] = Molecule(
×
1156
                        species=self.data.get("species"),
1157
                        coords=self.data.get("optimized_geometry"),
1158
                        charge=self.data.get("charge"),
1159
                        spin_multiplicity=self.data.get("multiplicity"),
1160
                    )
1161
                    self.data["molecules_from_optimized_geometries"] = []
×
1162
                    for geom in self.data["optimized_geometries"]:
×
1163
                        mol = Molecule(
×
1164
                            species=self.data.get("species"),
1165
                            coords=geom,
1166
                            charge=self.data.get("charge"),
1167
                            spin_multiplicity=self.data.get("multiplicity"),
1168
                        )
1169
                        self.data["molecules_from_optimized_geometries"].append(mol)
×
1170

1171
    def _get_grad_format_length(self, header):
1✔
1172
        """
1173
        Determines the maximum number of gradient entries printed on a line,
1174
        which changes for different versions of Q-Chem
1175
        """
1176
        found_end = False
×
1177
        index = 1
×
1178
        pattern = header
×
1179
        while not found_end:
×
1180
            if read_pattern(self.text, {"key": pattern}, terminate_on_match=True).get("key") != [[]]:
×
1181
                found_end = True
×
1182
            else:
1183
                pattern = pattern + r"\s+" + str(index)
×
1184
                index += 1
×
1185
        return index - 2
×
1186

1187
    def _read_gradients(self):
1✔
1188
        """
1189
        Parses all gradients obtained during an optimization trajectory
1190
        """
1191
        grad_header_pattern = r"Gradient of (?:SCF)?(?:MP2)? Energy(?: \(in au\.\))?"
×
1192
        footer_pattern = r"(?:Max gradient component|Gradient time)"
×
1193

1194
        grad_format_length = self._get_grad_format_length(grad_header_pattern)
×
1195
        grad_table_pattern = (
×
1196
            r"(?:\s+\d+(?:\s+\d+)?(?:\s+\d+)?(?:\s+\d+)?(?:\s+\d+)?(?:\s+\d+)?)?\n\s\s\s\s[1-3]\s*" r"(\-?[\d\.]{9,12})"
1197
        )
1198
        if grad_format_length > 1:
×
1199
            for _ in range(1, grad_format_length):
×
1200
                grad_table_pattern = grad_table_pattern + r"(?:\s*(\-?[\d\.]{9,12}))?"
×
1201

1202
        parsed_gradients = read_table_pattern(self.text, grad_header_pattern, grad_table_pattern, footer_pattern)
×
1203
        if len(parsed_gradients) >= 1:
×
1204
            sorted_gradients = np.zeros(shape=(len(parsed_gradients), len(self.data["initial_molecule"]), 3))
×
1205
            for ii, grad in enumerate(parsed_gradients):
×
1206
                for jj in range(int(len(grad) / 3)):
×
1207
                    for kk in range(grad_format_length):
×
1208
                        if grad[jj * 3][kk] != "None":
×
1209
                            sorted_gradients[ii][jj * grad_format_length + kk][0] = grad[jj * 3][kk]
×
1210
                            sorted_gradients[ii][jj * grad_format_length + kk][1] = grad[jj * 3 + 1][kk]
×
1211
                            sorted_gradients[ii][jj * grad_format_length + kk][2] = grad[jj * 3 + 2][kk]
×
1212

1213
            self.data["gradients"] = sorted_gradients
×
1214

1215
            if self.data["solvent_method"] is not None:
×
1216
                header_pattern = r"total gradient after adding PCM contribution --\s+-+\s+Atom\s+X\s+Y\s+Z\s+-+"
×
1217
                table_pattern = r"\s+\d+\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s"
×
1218
                footer_pattern = r"-+"
×
1219

1220
                parsed_gradients = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern)
×
1221

1222
                pcm_gradients = np.zeros(shape=(len(parsed_gradients), len(self.data["initial_molecule"]), 3))
×
1223
                for ii, grad in enumerate(parsed_gradients):
×
1224
                    for jj, entry in enumerate(grad):
×
1225
                        for kk, val in enumerate(entry):
×
1226
                            pcm_gradients[ii][jj][kk] = float(val)
×
1227

1228
                self.data["pcm_gradients"] = pcm_gradients
×
1229
            else:
1230
                self.data["pcm_gradients"] = None
×
1231

1232
            if read_pattern(self.text, {"key": r"Gradient of CDS energy"}, terminate_on_match=True).get("key") == [[]]:
×
1233
                header_pattern = r"Gradient of CDS energy"
×
1234

1235
                parsed_gradients = read_table_pattern(
×
1236
                    self.text, header_pattern, grad_table_pattern, grad_header_pattern
1237
                )
1238

1239
                sorted_gradients = np.zeros(shape=(len(parsed_gradients), len(self.data["initial_molecule"]), 3))
×
1240
                for ii, grad in enumerate(parsed_gradients):
×
1241
                    for jj in range(int(len(grad) / 3)):
×
1242
                        for kk in range(grad_format_length):
×
1243
                            if grad[jj * 3][kk] != "None":
×
1244
                                sorted_gradients[ii][jj * grad_format_length + kk][0] = grad[jj * 3][kk]
×
1245
                                sorted_gradients[ii][jj * grad_format_length + kk][1] = grad[jj * 3 + 1][kk]
×
1246
                                sorted_gradients[ii][jj * grad_format_length + kk][2] = grad[jj * 3 + 2][kk]
×
1247

1248
                self.data["CDS_gradients"] = sorted_gradients
×
1249
            else:
1250
                self.data["CDS_gradients"] = None
×
1251

1252
    def _read_optimization_data(self):
1✔
1253
        if self.data.get("new_optimizer") is None or self.data["version"] == "6":
×
1254
            temp_energy_trajectory = read_pattern(self.text, {"key": r"\sEnergy\sis\s+([\d\-\.]+)"}).get("key")
×
1255
        else:
1256
            temp_energy_trajectory = read_pattern(self.text, {"key": r"\sStep\s*\d+\s*:\s*Energy\s*([\d\-\.]+)"}).get(
×
1257
                "key"
1258
            )
1259
        if self.data.get("new_optimizer") == [[]]:
×
1260
            # Formatting of the new optimizer means we need to prepend the first energy
1261
            if temp_energy_trajectory is not None:
×
1262
                temp_energy_trajectory.insert(0, [str(self.data["Total_energy_in_the_final_basis_set"][0])])
×
1263
        self._read_geometries()
×
1264
        self._read_gradients()
×
1265
        if temp_energy_trajectory is None:
×
1266
            self.data["energy_trajectory"] = []
×
1267
            if read_pattern(self.text, {"key": r"Error in back_transform"}, terminate_on_match=True).get("key") == [[]]:
×
1268
                self.data["errors"] += ["back_transform_error"]
×
1269
        else:
1270
            real_energy_trajectory = np.zeros(len(temp_energy_trajectory))
×
1271
            for ii, entry in enumerate(temp_energy_trajectory):
×
1272
                real_energy_trajectory[ii] = float(entry[0])
×
1273
            self.data["energy_trajectory"] = real_energy_trajectory
×
1274
            if self.data.get("new_optimizer") == [[]]:
×
1275
                temp_norms = read_pattern(self.text, {"key": r"Norm of Stepsize\s*([\d\-\.]+)"}).get("key")
×
1276
                if temp_norms is not None:
×
1277
                    norms = np.zeros(len(temp_norms))
×
1278
                    for ii, val in enumerate(temp_norms):
×
1279
                        norms[ii] = float(val[0])
×
1280
                    self.data["norm_of_stepsize"] = norms
×
1281
            if openbabel is not None:
×
1282
                self.data["structure_change"] = check_for_structure_changes(
×
1283
                    self.data["initial_molecule"],
1284
                    self.data["molecule_from_last_geometry"],
1285
                )
1286
            # Then, if no optimized geometry or z-matrix is found, and no errors have been previously
1287
            # identified, check to see if the optimization failed to converge or if Lambda wasn't able
1288
            # to be determined or if a back transform error was encountered.
1289
            if (
×
1290
                len(self.data.get("errors")) == 0
1291
                and self.data.get("optimized_geometry") is None
1292
                and len(self.data.get("optimized_zmat")) == 0
1293
            ):
1294
                if read_pattern(
×
1295
                    self.text,
1296
                    {"key": r"MAXIMUM OPTIMIZATION CYCLES REACHED"},
1297
                    terminate_on_match=True,
1298
                ).get("key") == [[]]:
1299
                    self.data["errors"] += ["out_of_opt_cycles"]
×
1300
                elif read_pattern(
×
1301
                    self.text,
1302
                    {"key": r"Maximum number of iterations reached during minimization algorithm"},
1303
                    terminate_on_match=True,
1304
                ).get("key") == [[]]:
1305
                    self.data["errors"] += ["out_of_opt_cycles"]
×
1306
                elif read_pattern(
×
1307
                    self.text,
1308
                    {"key": r"UNABLE TO DETERMINE Lamda IN FormD"},
1309
                    terminate_on_match=True,
1310
                ).get("key") == [[]]:
1311
                    self.data["errors"] += ["unable_to_determine_lamda"]
×
1312
                elif read_pattern(self.text, {"key": r"Error in back_transform"}, terminate_on_match=True).get(
×
1313
                    "key"
1314
                ) == [[]]:
1315
                    self.data["errors"] += ["back_transform_error"]
×
1316
                elif read_pattern(self.text, {"key": r"pinv\(\)\: svd failed"}, terminate_on_match=True).get("key") == [
×
1317
                    []
1318
                ]:
1319
                    self.data["errors"] += ["svd_failed"]
×
1320

1321
    def _read_frequency_data(self):
1✔
1322
        """
1323
        Parses cpscf_nseg, frequencies, enthalpy, entropy, and mode vectors.
1324
        """
1325
        if read_pattern(self.text, {"key": r"Calculating MO derivatives via CPSCF"}, terminate_on_match=True).get(
×
1326
            "key"
1327
        ) == [[]]:
1328
            temp_cpscf_nseg = read_pattern(
×
1329
                self.text,
1330
                {"key": r"CPSCF will be done in([\d\s]+)segments to save memory"},
1331
                terminate_on_match=True,
1332
            ).get("key")
1333
            if temp_cpscf_nseg is None:
×
1334
                self.data["cpscf_nseg"] = 1
×
1335
            else:
1336
                self.data["cpscf_nseg"] = int(temp_cpscf_nseg[0][0])
×
1337
        else:
1338
            self.data["cpscf_nseg"] = 0
×
1339

1340
        raman = False
×
1341
        if read_pattern(self.text, {"key": r"doraman\s*(?:=)*\s*true"}, terminate_on_match=True).get("key") == [[]]:
×
1342
            raman = True
×
1343

1344
        temp_dict = read_pattern(
×
1345
            self.text,
1346
            {
1347
                "frequencies": r"\s*Frequency:\s+(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+))*)*",
1348
                "trans_dip": r"TransDip\s+(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7})"
1349
                r"\s*(\-?[\d\.]{5,7}|\*{5,7})\s*"
1350
                r"(?:(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7})\s*"
1351
                r"(?:(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7}))*)*",
1352
                "IR_intens": r"\s*IR Intens:\s*(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+))*)*",
1353
                "IR_active": r"\s*IR Active:\s+([YESNO]+)(?:\s+([YESNO]+)(?:\s+([YESNO]+))*)*",
1354
                "raman_intens": r"\s*Raman Intens:\s*(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+))*)*",
1355
                "depolar": r"\s*Depolar:\s*(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+))*)*",
1356
                "raman_active": r"\s*Raman Active:\s+([YESNO]+)(?:\s+([YESNO]+)(?:\s+([YESNO]+))*)*",
1357
                "ZPE": r"\s*Zero point vibrational energy:\s+([\d\-\.]+)\s+kcal/mol",
1358
                "trans_enthalpy": r"\s*Translational Enthalpy:\s+([\d\-\.]+)\s+kcal/mol",
1359
                "rot_enthalpy": r"\s*Rotational Enthalpy:\s+([\d\-\.]+)\s+kcal/mol",
1360
                "vib_enthalpy": r"\s*Vibrational Enthalpy:\s+([\d\-\.]+)\s+kcal/mol",
1361
                "gas_constant": r"\s*gas constant \(RT\):\s+([\d\-\.]+)\s+kcal/mol",
1362
                "trans_entropy": r"\s*Translational Entropy:\s+([\d\-\.]+)\s+cal/mol\.K",
1363
                "rot_entropy": r"\s*Rotational Entropy:\s+([\d\-\.]+)\s+cal/mol\.K",
1364
                "vib_entropy": r"\s*Vibrational Entropy:\s+([\d\-\.]+)\s+cal/mol\.K",
1365
                "total_enthalpy": r"\s*Total Enthalpy:\s+([\d\-\.]+)\s+kcal/mol",
1366
                "total_entropy": r"\s*Total Entropy:\s+([\d\-\.]+)\s+cal/mol\.K",
1367
            },
1368
        )
1369

1370
        keys = [
×
1371
            "ZPE",
1372
            "trans_enthalpy",
1373
            "rot_enthalpy",
1374
            "vib_enthalpy",
1375
            "gas_constant",
1376
            "trans_entropy",
1377
            "rot_entropy",
1378
            "vib_entropy",
1379
            "total_enthalpy",
1380
            "total_entropy",
1381
        ]
1382

1383
        for key in keys:
×
1384
            if temp_dict.get(key) is None:
×
1385
                self.data[key] = None
×
1386
            else:
1387
                self.data[key] = float(temp_dict.get(key)[0][0])
×
1388

1389
        if temp_dict.get("frequencies") is None:
×
1390
            self.data["frequencies"] = None
×
1391
            self.data["IR_intens"] = None
×
1392
            self.data["IR_active"] = None
×
1393
            self.data["raman_intens"] = None
×
1394
            self.data["raman_active"] = None
×
1395
            self.data["depolar"] = None
×
1396
            self.data["trans_dip"] = None
×
1397
        else:
1398
            temp_freqs = [value for entry in temp_dict.get("frequencies") for value in entry]
×
1399
            temp_IR_intens = [value for entry in temp_dict.get("IR_intens") for value in entry]
×
1400
            IR_active = [value for entry in temp_dict.get("IR_active") for value in entry]
×
1401
            temp_trans_dip = [value for entry in temp_dict.get("trans_dip") for value in entry]
×
1402
            self.data["IR_active"] = IR_active
×
1403

1404
            if raman:
×
1405
                raman_active = [value for entry in temp_dict.get("raman_active") for value in entry]
×
1406
                temp_raman_intens = [value for entry in temp_dict.get("raman_intens") for value in entry]
×
1407
                temp_depolar = [value for entry in temp_dict.get("depolar") for value in entry]
×
1408
                self.data["raman_active"] = raman_active
×
1409
                raman_intens = np.zeros(len(temp_raman_intens) - temp_raman_intens.count("None"))
×
1410
                for ii, entry in enumerate(temp_raman_intens):
×
1411
                    if entry != "None":
×
1412
                        if "*" in entry:
×
1413
                            raman_intens[ii] = float("inf")
×
1414
                        else:
1415
                            raman_intens[ii] = float(entry)
×
1416
                self.data["raman_intens"] = raman_intens
×
1417
                depolar = np.zeros(len(temp_depolar) - temp_depolar.count("None"))
×
1418
                for ii, entry in enumerate(temp_depolar):
×
1419
                    if entry != "None":
×
1420
                        if "*" in entry:
×
1421
                            depolar[ii] = float("inf")
×
1422
                        else:
1423
                            depolar[ii] = float(entry)
×
1424
                self.data["depolar"] = depolar
×
1425
            else:
1426
                self.data["raman_intens"] = None
×
1427
                self.data["raman_active"] = None
×
1428
                self.data["depolar"] = None
×
1429

1430
            trans_dip = np.zeros(shape=(int((len(temp_trans_dip) - temp_trans_dip.count("None")) / 3), 3))
×
1431
            for ii, entry in enumerate(temp_trans_dip):
×
1432
                if entry != "None":
×
1433
                    if "*" in entry:
×
1434
                        trans_dip[int(ii / 3)][ii % 3] = float("inf")
×
1435
                    else:
1436
                        trans_dip[int(ii / 3)][ii % 3] = float(entry)
×
1437
            self.data["trans_dip"] = trans_dip
×
1438

1439
            freqs = np.zeros(len(temp_freqs) - temp_freqs.count("None"))
×
1440
            for ii, entry in enumerate(temp_freqs):
×
1441
                if entry != "None":
×
1442
                    if "*" in entry:
×
1443
                        if ii == 0:
×
1444
                            freqs[ii] = -float("inf")
×
1445
                        elif ii == len(freqs) - 1:
×
1446
                            freqs[ii] = float("inf")
×
1447
                        elif freqs[ii - 1] == -float("inf"):
×
1448
                            freqs[ii] = -float("inf")
×
1449
                        elif "*" in temp_freqs[ii + 1]:
×
1450
                            freqs[ii] = float("inf")
×
1451
                        else:
1452
                            raise RuntimeError(
×
1453
                                "ERROR: Encountered an undefined frequency not at the beginning or end of the "
1454
                                "frequency list, which makes no sense! Exiting..."
1455
                            )
1456
                        if not self.data.get("completion", []):
×
1457
                            if "undefined_frequency" not in self.data["errors"]:
×
1458
                                self.data["errors"] += ["undefined_frequency"]
×
1459
                        else:
1460
                            if "undefined_frequency" not in self.data["warnings"]:
×
1461
                                self.data["warnings"]["undefined_frequency"] = True
×
1462
                    else:
1463
                        freqs[ii] = float(entry)
×
1464
            self.data["frequencies"] = freqs
×
1465

1466
            IR_intens = np.zeros(len(temp_IR_intens) - temp_IR_intens.count("None"))
×
1467
            for ii, entry in enumerate(temp_IR_intens):
×
1468
                if entry != "None":
×
1469
                    if "*" in entry:
×
1470
                        IR_intens[ii] = float("inf")
×
1471
                    else:
1472
                        IR_intens[ii] = float(entry)
×
1473
            self.data["IR_intens"] = IR_intens
×
1474

1475
            if not raman:
×
1476
                header_pattern = r"\s*Raman Active:\s+[YESNO]+\s+(?:[YESNO]+\s+)*X\s+Y\s+Z\s+(?:X\s+Y\s+Z\s+)*"
×
1477
            else:
1478
                header_pattern = r"\s*Depolar:\s*\-?[\d\.\*]+\s+(?:\-?[\d\.\*]+\s+)*X\s+Y\s+Z\s+(?:X\s+Y\s+Z\s+)*"
×
1479
            table_pattern = (
×
1480
                r"\s*[a-zA-Z][a-zA-Z\s]\s*([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*(?:([\d\-\.]+)\s*"
1481
                r"([\d\-\.]+)\s*([\d\-\.]+)\s*(?:([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+))*)*"
1482
            )
1483
            footer_pattern = (
×
1484
                r"TransDip\s+\-?[\d\.\*]+\s*\-?[\d\.\*]+\s*\-?[\d\.\*]+\s*(?:\-?[\d\.\*]+\s*\-?"
1485
                r"[\d\.\*]+\s*\-?[\d\.\*]+\s*)*"
1486
            )
1487
            temp_freq_mode_vecs = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern)
×
1488
            freq_mode_vecs = np.zeros(shape=(len(freqs), len(temp_freq_mode_vecs[0]), 3))
×
1489

1490
            for ii, triple_FMV in enumerate(temp_freq_mode_vecs):
×
1491
                for jj, line in enumerate(triple_FMV):
×
1492
                    for kk, entry in enumerate(line):
×
1493
                        if entry != "None":
×
1494
                            freq_mode_vecs[int(ii * 3 + math.floor(kk / 3)), jj, kk % 3] = float(entry)
×
1495

1496
            self.data["frequency_mode_vectors"] = freq_mode_vecs
×
1497
            freq_length = len(self.data["frequencies"])
×
1498
            if (
×
1499
                len(self.data["frequency_mode_vectors"]) != freq_length
1500
                or len(self.data["IR_intens"]) != freq_length
1501
                or len(self.data["IR_active"]) != freq_length
1502
            ):
1503
                self.data["warnings"]["frequency_length_inconsistency"] = True
×
1504

1505
    def _read_force_data(self):
1✔
1506
        self._read_gradients()
×
1507

1508
    def _read_scan_data(self):
1✔
1509
        temp_energy_trajectory = read_pattern(self.text, {"key": r"\sEnergy\sis\s+([\d\-\.]+)"}).get("key")
×
1510
        if temp_energy_trajectory is None:
×
1511
            self.data["energy_trajectory"] = []
×
1512
        else:
1513
            real_energy_trajectory = np.zeros(len(temp_energy_trajectory))
×
1514
            for ii, entry in enumerate(temp_energy_trajectory):
×
1515
                real_energy_trajectory[ii] = float(entry[0])
×
1516
            self.data["energy_trajectory"] = real_energy_trajectory
×
1517

1518
        self._read_geometries()
×
1519
        if openbabel is not None:
×
1520
            self.data["structure_change"] = check_for_structure_changes(
×
1521
                self.data["initial_molecule"],
1522
                self.data["molecule_from_last_geometry"],
1523
            )
1524
        self._read_gradients()
×
1525

1526
        if len(self.data.get("errors")) == 0:
×
1527
            if read_pattern(self.text, {"key": r"MAXIMUM OPTIMIZATION CYCLES REACHED"}, terminate_on_match=True).get(
×
1528
                "key"
1529
            ) == [[]]:
1530
                self.data["errors"] += ["out_of_opt_cycles"]
×
1531
            elif read_pattern(self.text, {"key": r"UNABLE TO DETERMINE Lamda IN FormD"}, terminate_on_match=True).get(
×
1532
                "key"
1533
            ) == [[]]:
1534
                self.data["errors"] += ["unable_to_determine_lamda"]
×
1535

1536
        header_pattern = r"\s*\-+ Summary of potential scan\: \-+\s*"
×
1537
        row_pattern_single = r"\s*([\-\.0-9]+)\s+([\-\.0-9]+)\s*\n"
×
1538
        row_pattern_double = r"\s*([\-\.0-9]+)\s+([\-\.0-9]+)\s+([\-\.0-9]+)\s*\n"
×
1539
        footer_pattern = r"\s*\-+"
×
1540

1541
        single_data = read_table_pattern(
×
1542
            self.text,
1543
            header_pattern=header_pattern,
1544
            row_pattern=row_pattern_single,
1545
            footer_pattern=footer_pattern,
1546
        )
1547

1548
        self.data["scan_energies"] = []
×
1549
        if len(single_data) == 0:
×
1550
            double_data = read_table_pattern(
×
1551
                self.text,
1552
                header_pattern=header_pattern,
1553
                row_pattern=row_pattern_double,
1554
                footer_pattern=footer_pattern,
1555
            )
1556
            if len(double_data) == 0:
×
1557
                self.data["scan_energies"] = None
×
1558
            else:
1559
                for line in double_data[0]:
×
1560
                    params = [float(line[0]), float(line[1])]
×
1561
                    energy = float(line[2])
×
1562
                    self.data["scan_energies"].append({"params": params, "energy": energy})
×
1563
        else:
1564
            for line in single_data[0]:
×
1565
                param = float(line[0])
×
1566
                energy = float(line[1])
×
1567
                self.data["scan_energies"].append({"params": param, "energy": energy})
×
1568

1569
        scan_inputs_head = r"\s*\$[Ss][Cc][Aa][Nn]"
×
1570
        scan_inputs_row = r"\s*([Ss][Tt][Rr][Ee]|[Tt][Oo][Rr][Ss]|[Bb][Ee][Nn][Dd]) "
×
1571
        scan_inputs_row += r"((?:[0-9]+\s+)+)([\-\.0-9]+)\s+([\-\.0-9]+)\s+([\-\.0-9]+)\s*"
×
1572
        scan_inputs_foot = r"\s*\$[Ee][Nn][Dd]"
×
1573

1574
        constraints_meta = read_table_pattern(
×
1575
            self.text,
1576
            header_pattern=scan_inputs_head,
1577
            row_pattern=scan_inputs_row,
1578
            footer_pattern=scan_inputs_foot,
1579
        )
1580

1581
        self.data["scan_variables"] = {"stre": [], "bend": [], "tors": []}
×
1582
        for row in constraints_meta[0]:
×
1583
            var_type = row[0].lower()
×
1584
            self.data["scan_variables"][var_type].append(
×
1585
                {
1586
                    "atoms": [int(i) for i in row[1].split()],
1587
                    "start": float(row[2]),
1588
                    "end": float(row[3]),
1589
                    "increment": float(row[4]),
1590
                }
1591
            )
1592

1593
        temp_constraint = read_pattern(
×
1594
            self.text,
1595
            {"key": r"\s*(Distance\(Angs\)|Angle|Dihedral)\:\s*((?:[0-9]+\s+)+)+([\.0-9]+)\s+([\.0-9]+)"},
1596
        ).get("key")
1597
        self.data["scan_constraint_sets"] = {"stre": [], "bend": [], "tors": []}
×
1598
        if temp_constraint is not None:
×
1599
            for entry in temp_constraint:
×
1600
                atoms = [int(i) for i in entry[1].split()]
×
1601
                current = float(entry[2])
×
1602
                target = float(entry[3])
×
1603
                if entry[0] == "Distance(Angs)":
×
1604
                    if len(atoms) == 2:
×
1605
                        self.data["scan_constraint_sets"]["stre"].append(
×
1606
                            {"atoms": atoms, "current": current, "target": target}
1607
                        )
1608
                elif entry[0] == "Angle":
×
1609
                    if len(atoms) == 3:
×
1610
                        self.data["scan_constraint_sets"]["bend"].append(
×
1611
                            {"atoms": atoms, "current": current, "target": target}
1612
                        )
1613
                elif entry[0] == "Dihedral":
×
1614
                    if len(atoms) == 4:
×
1615
                        self.data["scan_constraint_sets"]["tors"].append(
×
1616
                            {"atoms": atoms, "current": current, "target": target}
1617
                        )
1618

1619
    def _read_pcm_information(self):
1✔
1620
        """
1621
        Parses information from PCM solvent calculations.
1622
        """
1623
        temp_dict = read_pattern(
×
1624
            self.text,
1625
            {
1626
                "g_electrostatic": r"\s*G_electrostatic\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*",
1627
                "g_cavitation": r"\s*G_cavitation\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*",
1628
                "g_dispersion": r"\s*G_dispersion\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*",
1629
                "g_repulsion": r"\s*G_repulsion\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*",
1630
                "total_contribution_pcm": r"\s*Total\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*",
1631
                "solute_internal_energy": r"Solute Internal Energy \(H0\)\s*=\s*([\d\-\.]+)",
1632
            },
1633
        )
1634

1635
        for key in temp_dict:
×
1636
            if temp_dict.get(key) is None:
×
1637
                self.data["solvent_data"][key] = None
×
1638
            elif len(temp_dict.get(key)) == 1:
×
1639
                self.data["solvent_data"][key] = float(temp_dict.get(key)[0][0])
×
1640
            else:
1641
                temp_result = np.zeros(len(temp_dict.get(key)))
×
1642
                for ii, entry in enumerate(temp_dict.get(key)):
×
1643
                    temp_result[ii] = float(entry[0])
×
1644
                self.data["solvent_data"][key] = temp_result
×
1645

1646
    def _read_smd_information(self):
1✔
1647
        """
1648
        Parses information from SMD solvent calculations.
1649
        """
1650
        temp_dict = read_pattern(
×
1651
            self.text,
1652
            {
1653
                "smd0": r"E-EN\(g\) gas\-phase elect\-nuc energy\s*([\d\-\.]+) a\.u\.",
1654
                "smd3": r"G\-ENP\(liq\) elect\-nuc\-pol free energy of system\s*([\d\-\.]+) a\.u\.",
1655
                "smd4": r"G\-CDS\(liq\) cavity\-dispersion\-solvent structure\s*free energy\s*([\d\-\.]+) kcal\/mol",
1656
                "smd6": r"G\-S\(liq\) free energy of system\s*([\d\-\.]+) a\.u\.",
1657
                "smd9": r"DeltaG\-S\(liq\) free energy of\s*solvation\s*\(9\) = \(6\) \- \(0\)\s*([\d\-\.]+) kcal\/mol",
1658
            },
1659
        )
1660

1661
        for key in temp_dict:
×
1662
            if temp_dict.get(key) is None:
×
1663
                self.data["solvent_data"][key] = None
×
1664
            elif len(temp_dict.get(key)) == 1:
×
1665
                self.data["solvent_data"][key] = float(temp_dict.get(key)[0][0])
×
1666
            else:
1667
                temp_result = np.zeros(len(temp_dict.get(key)))
×
1668
                for ii, entry in enumerate(temp_dict.get(key)):
×
1669
                    temp_result[ii] = float(entry[0])
×
1670
                self.data["solvent_data"][key] = temp_result
×
1671

1672
    def _read_isosvp_information(self):
1✔
1673
        """
1674
        Parses information from ISOSVP solvent calculations.
1675

1676
        There are 5 energies output, as in the example below
1677

1678
        --------------------------------------------------------------------------------
1679
        The Final SS(V)PE energies and Properties
1680
        --------------------------------------------------------------------------------
1681

1682
        Energies
1683
        --------------------
1684
        The Final Solution-Phase Energy =     -40.4850599390
1685
        The Solute Internal Energy =          -40.4846329759
1686
        The Change in Solute Internal Energy =  0.0000121970  (   0.00765 KCAL/MOL)
1687
        The Reaction Field Free Energy =       -0.0004269631  (  -0.26792 KCAL/MOL)
1688
        The Total Solvation Free Energy =      -0.0004147661  (  -0.26027 KCAL/MOL)
1689

1690
        In addition, we need to parse the DIELST fortran variable to get the dielectric
1691
        constant used.
1692
        """
1693
        temp_dict = read_pattern(
1✔
1694
            self.text,
1695
            {
1696
                "final_soln_phase_e": r"\s*The Final Solution-Phase Energy\s+=\s+([\d\-\.]+)\s*",
1697
                "solute_internal_e": r"\s*The Solute Internal Energy\s+=\s+([\d\-\.]+)\s*",
1698
                "total_solvation_free_e": r"\s*The Total Solvation Free Energy\s+=\s+([\d\-\.]+)\s*",
1699
                "change_solute_internal_e": r"\s*The Change in Solute Internal Energy\s+=\s+(\s+[\d\-\.]+)\s+\(\s+([\d\-\.]+)\s+KCAL/MOL\)\s*",  # pylint: disable=line-too-long
1700
                "reaction_field_free_e": r"\s*The Reaction Field Free Energy\s+=\s+(\s+[\d\-\.]+)\s+\(\s+([\d\-\.]+)\s+KCAL/MOL\)\s*",  # pylint: disable=line-too-long
1701
                "isosvp_dielectric": r"\s*DIELST=\s+(\s+[\d\-\.]+)\s*",
1702
            },
1703
        )
1704

1705
        for key, _v in temp_dict.items():
1✔
1706
            if temp_dict.get(key) is None:
1✔
1707
                self.data["solvent_data"]["isosvp"][key] = None
×
1708
            elif len(temp_dict.get(key)) == 1:
1✔
1709
                self.data["solvent_data"]["isosvp"][key] = float(temp_dict.get(key)[0][0])
1✔
1710
            else:
1711
                temp_result = np.zeros(len(temp_dict.get(key)))
×
1712
                for ii, entry in enumerate(temp_dict.get(key)):
×
1713
                    temp_result[ii] = float(entry[0])
×
1714
                self.data["solvent_data"]["isosvp"][key] = temp_result
×
1715

1716
    def _read_cmirs_information(self):
1✔
1717
        """
1718
        Parses information from CMIRS solvent calculations.
1719

1720
        In addition to the 5 energies returned by ISOSVP (and read separately in
1721
        _read_isosvp_information), there are 4 additional energies reported, as shown
1722
        in the example below
1723

1724
        --------------------------------------------------------------------------------
1725
        The Final SS(V)PE energies and Properties
1726
        --------------------------------------------------------------------------------
1727

1728
        Energies
1729
        --------------------
1730
        The Final Solution-Phase Energy =     -40.4751881546
1731
        The Solute Internal Energy =          -40.4748568841
1732
        The Change in Solute Internal Energy =  0.0000089729  (   0.00563 KCAL/MOL)
1733
        The Reaction Field Free Energy =       -0.0003312705  (  -0.20788 KCAL/MOL)
1734
        The Dispersion Energy =                 0.6955550107  (  -2.27836 KCAL/MOL)
1735
        The Exchange Energy =                   0.2652679507  (   2.15397 KCAL/MOL)
1736
        Min. Negative Field Energy =            0.0005235850  (   0.00000 KCAL/MOL)
1737
        Max. Positive Field Energy =            0.0179866718  (   0.00000 KCAL/MOL)
1738
        The Total Solvation Free Energy =      -0.0005205275  (  -0.32664 KCAL/MOL)
1739
        """
1740
        temp_dict = read_pattern(
1✔
1741
            self.text,
1742
            {
1743
                "dispersion_e": r"\s*The Dispersion Energy\s+=\s+(\s+[\d\-\.]+)\s+\(\s+([\d\-\.]+)\s+KCAL/MOL\)\s*",
1744
                "exchange_e": r"\s*The Exchange Energy\s+=\s+(\s+[\d\-\.]+)\s+\(\s+([\d\-\.]+)\s+KCAL/MOL\)\s*",
1745
                "min_neg_field_e": r"\s*Min. Negative Field Energy\s+=\s+(\s+[\d\-\.]+)\s+\(\s+([\d\-\.]+)\s+KCAL/MOL\)\s*",  # pylint: disable=line-too-long
1746
                "max_pos_field_e": r"\s*Max. Positive Field Energy\s+=\s+(\s+[\d\-\.]+)\s+\(\s+([\d\-\.]+)\s+KCAL/MOL\)\s*",  # pylint: disable=line-too-long
1747
            },
1748
        )
1749

1750
        for key, _v in temp_dict.items():
1✔
1751
            if temp_dict.get(key) is None:
1✔
1752
                self.data["solvent_data"]["cmirs"][key] = None
×
1753
            elif len(temp_dict.get(key)) == 1:
1✔
1754
                self.data["solvent_data"]["cmirs"][key] = float(temp_dict.get(key)[0][0])
1✔
1755
            else:
1756
                temp_result = np.zeros(len(temp_dict.get(key)))
×
1757
                for ii, entry in enumerate(temp_dict.get(key)):
×
1758
                    temp_result[ii] = float(entry[0])
×
1759
                self.data["solvent_data"]["cmirs"][key] = temp_result
×
1760

1761
    def _read_nbo_data(self):
1✔
1762
        """
1763
        Parses NBO output
1764
        """
1765
        dfs = nbo_parser(self.filename)
1✔
1766
        nbo_data = {}
1✔
1767
        for key, value in dfs.items():
1✔
1768
            nbo_data[key] = [df.to_dict() for df in value]
1✔
1769
        self.data["nbo_data"] = nbo_data
1✔
1770

1771
    def _check_completion_errors(self):
1✔
1772
        """
1773
        Parses potential errors that can cause jobs to crash
1774
        """
1775
        if read_pattern(
×
1776
            self.text,
1777
            {"key": r"Coordinates do not transform within specified threshold"},
1778
            terminate_on_match=True,
1779
        ).get("key") == [[]]:
1780
            self.data["errors"] += ["failed_to_transform_coords"]
×
1781
        elif read_pattern(
×
1782
            self.text,
1783
            {"key": r"The Q\-Chem input file has failed to pass inspection"},
1784
            terminate_on_match=True,
1785
        ).get("key") == [[]]:
1786
            self.data["errors"] += ["input_file_error"]
×
1787
        elif read_pattern(self.text, {"key": r"Error opening input stream"}, terminate_on_match=True).get("key") == [
×
1788
            []
1789
        ]:
1790
            self.data["errors"] += ["failed_to_read_input"]
×
1791
        elif read_pattern(
×
1792
            self.text,
1793
            {"key": r"FileMan error: End of file reached prematurely"},
1794
            terminate_on_match=True,
1795
        ).get("key") == [[]]:
1796
            self.data["errors"] += ["premature_end_FileMan_error"]
×
1797
        elif read_pattern(
×
1798
            self.text,
1799
            {"key": r"need to increase the array of NLebdevPts"},
1800
            terminate_on_match=True,
1801
        ).get("key") == [[]]:
1802
            self.data["errors"] += ["NLebdevPts"]
×
1803
        elif read_pattern(self.text, {"key": r"method not available"}, terminate_on_match=True).get("key") == [[]]:
×
1804
            self.data["errors"] += ["method_not_available"]
×
1805
        elif read_pattern(
×
1806
            self.text,
1807
            {"key": r"Could not find \$molecule section in ParseQInput"},
1808
            terminate_on_match=True,
1809
        ).get("key") == [[]]:
1810
            self.data["errors"] += ["read_molecule_error"]
×
1811
        elif read_pattern(self.text, {"key": r"Welcome to Q-Chem"}, terminate_on_match=True).get("key") != [[]]:
×
1812
            self.data["errors"] += ["never_called_qchem"]
×
1813
        elif read_pattern(
×
1814
            self.text,
1815
            {"key": r"\*\*\*ERROR\*\*\* Hessian Appears to have all zero or negative eigenvalues"},
1816
            terminate_on_match=True,
1817
        ).get("key") == [[]]:
1818
            self.data["errors"] += ["hessian_eigenvalue_error"]
×
1819
        elif read_pattern(self.text, {"key": r"FlexNet Licensing error"}, terminate_on_match=True).get("key") == [[]]:
×
1820
            self.data["errors"] += ["licensing_error"]
×
1821
        elif read_pattern(self.text, {"key": r"Unable to validate license"}, terminate_on_match=True).get("key") == [
×
1822
            []
1823
        ]:
1824
            self.data["errors"] += ["licensing_error"]
×
1825
        elif read_pattern(
×
1826
            self.text,
1827
            {"key": r"Could not open driver file in ReadDriverFromDisk"},
1828
            terminate_on_match=True,
1829
        ).get("key") == [[]]:
1830
            self.data["errors"] += ["driver_error"]
×
1831
        elif read_pattern(self.text, {"key": r"Basis not supported for the above atom"}, terminate_on_match=True).get(
×
1832
            "key"
1833
        ) == [[]]:
1834
            self.data["errors"] += ["basis_not_supported"]
×
1835
        elif read_pattern(self.text, {"key": r"Unable to find relaxed density"}, terminate_on_match=True).get(
×
1836
            "key"
1837
        ) == [[]]:
1838
            self.data["errors"] += ["failed_cpscf"]
×
1839
        elif read_pattern(self.text, {"key": r"Out of Iterations- IterZ"}, terminate_on_match=True).get("key") == [[]]:
×
1840
            self.data["errors"] += ["failed_cpscf"]
×
1841
        elif read_pattern(
×
1842
            self.text,
1843
            {"key": r"RUN_NBO6 \(rem variable\) is not correct"},
1844
            terminate_on_match=True,
1845
        ).get("key") == [[]]:
1846
            self.data["errors"] += ["bad_old_nbo6_rem"]
×
1847
        elif read_pattern(
×
1848
            self.text,
1849
            {"key": r"NBO_EXTERNAL \(rem variable\) is not correct"},
1850
            terminate_on_match=True,
1851
        ).get("key") == [[]]:
1852
            self.data["errors"] += ["bad_new_nbo_external_rem"]
×
1853
        elif read_pattern(
×
1854
            self.text,
1855
            {"key": r"gen_scfman_exception:  GDM:: Zero or negative preconditioner scaling factor"},
1856
            terminate_on_match=True,
1857
        ).get("key") == [[]]:
1858
            self.data["errors"] += ["gdm_neg_precon_error"]
×
1859
        elif read_pattern(self.text, {"key": r"too many atoms in ESPChgFit"}, terminate_on_match=True).get("key") == [
×
1860
            []
1861
        ]:
1862
            self.data["errors"] += ["esp_chg_fit_error"]
×
1863
        elif read_pattern(self.text, {"key": r"Please use larger MEM_STATIC"}, terminate_on_match=True).get("key") == [
×
1864
            []
1865
        ]:
1866
            self.data["errors"] += ["mem_static_too_small"]
×
1867
        elif read_pattern(self.text, {"key": r"Please increase MEM_STATIC"}, terminate_on_match=True).get("key") == [
×
1868
            []
1869
        ]:
1870
            self.data["errors"] += ["mem_static_too_small"]
×
1871
        elif read_pattern(self.text, {"key": r"Please increase MEM_TOTAL"}, terminate_on_match=True).get("key") == [[]]:
×
1872
            self.data["errors"] += ["mem_total_too_small"]
×
1873
        elif self.text[-34:-2] == "Computing fast CPCM-SWIG hessian":
×
1874
            self.data["errors"] += ["probably_out_of_memory"]
×
1875
        elif self.text[-16:-1] == "Roots Converged":
×
1876
            self.data["errors"] += ["probably_out_of_memory"]
×
1877
        else:
1878
            tmp_failed_line_searches = read_pattern(
×
1879
                self.text,
1880
                {"key": r"\d+\s+failed line searches\.\s+Resetting"},
1881
                terminate_on_match=False,
1882
            ).get("key")
1883
            if tmp_failed_line_searches is not None:
×
1884
                if len(tmp_failed_line_searches) > 10:
×
1885
                    self.data["errors"] += ["SCF_failed_to_converge"]
×
1886
        if self.data.get("errors") == []:
×
1887
            self.data["errors"] += ["unknown_error"]
×
1888

1889
    def as_dict(self):
1✔
1890
        """
1891
        Returns:
1892
            MSONAble dict.
1893
        """
1894
        d = {}
×
1895
        d["data"] = self.data
×
1896
        d["text"] = self.text
×
1897
        d["filename"] = self.filename
×
1898
        return jsanitize(d, strict=True)
×
1899

1900

1901
def check_for_structure_changes(mol1: Molecule, mol2: Molecule) -> str:
1✔
1902
    """
1903
    Compares connectivity of two molecules (using MoleculeGraph w/ OpenBabelNN).
1904
    This function will work with two molecules with different atom orderings,
1905
        but for proper treatment, atoms should be listed in the same order.
1906
    Possible outputs include:
1907
    - no_change: the bonding in the two molecules is identical
1908
    - unconnected_fragments: the MoleculeGraph of mol1 is connected, but the
1909
      MoleculeGraph is mol2 is not connected
1910
    - fewer_bonds: the MoleculeGraph of mol1 has more bonds (edges) than the
1911
      MoleculeGraph of mol2
1912
    - more_bonds: the MoleculeGraph of mol2 has more bonds (edges) than the
1913
      MoleculeGraph of mol1
1914
    - bond_change: this case catches any other non-identical MoleculeGraphs
1915
    Args:
1916
        mol1: Pymatgen Molecule object to be compared.
1917
        mol2: Pymatgen Molecule object to be compared.
1918
    Returns:
1919
        One of ["unconnected_fragments", "fewer_bonds", "more_bonds",
1920
        "bond_change", "no_change"]
1921
    """
1922
    special_elements = ["Li", "Na", "Mg", "Ca", "Zn"]
×
1923
    mol_list = [copy.deepcopy(mol1), copy.deepcopy(mol2)]
×
1924

1925
    if mol1.composition != mol2.composition:
×
1926
        raise RuntimeError("Molecules have different compositions! Exiting...")
×
1927

1928
    for ii, site in enumerate(mol1):
×
1929
        if site.specie.symbol != mol2[ii].specie.symbol:
×
1930
            warnings.warn(
×
1931
                "Comparing molecules with different atom ordering! "
1932
                "Turning off special treatment for coordinating metals."
1933
            )
1934
            special_elements = []
×
1935

1936
    special_sites: list[list] = [[], []]
×
1937
    for ii, mol in enumerate(mol_list):
×
1938
        for jj, site in enumerate(mol):
×
1939
            if site.specie.symbol in special_elements:
×
1940
                distances = [[kk, site.distance(other_site)] for kk, other_site in enumerate(mol)]
×
1941
                special_sites[ii].append([jj, site, distances])
×
1942
        for jj, site in enumerate(mol):
×
1943
            if site.specie.symbol in special_elements:
×
1944
                del mol[jj]
×
1945

1946
    # Can add logic to check the distances in the future if desired
1947

1948
    initial_mol_graph = MoleculeGraph.with_local_env_strategy(mol_list[0], OpenBabelNN())
×
1949
    initial_graph = initial_mol_graph.graph
×
1950
    last_mol_graph = MoleculeGraph.with_local_env_strategy(mol_list[1], OpenBabelNN())
×
1951
    last_graph = last_mol_graph.graph
×
1952
    if initial_mol_graph.isomorphic_to(last_mol_graph):
×
1953
        return "no_change"
×
1954

1955
    if nx.is_connected(initial_graph.to_undirected()) and not nx.is_connected(last_graph.to_undirected()):
×
1956
        return "unconnected_fragments"
×
1957
    if last_graph.number_of_edges() < initial_graph.number_of_edges():
×
1958
        return "fewer_bonds"
×
1959
    if last_graph.number_of_edges() > initial_graph.number_of_edges():
×
1960
        return "more_bonds"
×
1961
    return "bond_change"
×
1962

1963

1964
def jump_to_header(lines: list[str], header: str) -> list[str]:
1✔
1965
    """
1966
    Given a list of lines, truncate the start of the list so that the first line
1967
    of the new list contains the header.
1968

1969
    Args:
1970
            lines: List of lines.
1971
            header: Substring to match.
1972

1973
    Returns:
1974
            Truncated lines.
1975

1976
    Raises:
1977
            RuntimeError
1978
    """
1979

1980
    # Search for the header
1981
    for i, line in enumerate(lines):
1✔
1982
        if header in line.strip():
1✔
1983
            return lines[i:]
1✔
1984

1985
    # Search failed
1986
    raise RuntimeError(f"Header {header} could not be found in the lines.")
1✔
1987

1988

1989
def get_percentage(line: str, orbital: str) -> str:
1✔
1990
    """
1991
    Retrieve the percent character of an orbital.
1992

1993
    Args:
1994
            line: Line containing orbital and percentage.
1995
            orbital: Type of orbital (s, p, d, f).
1996

1997
    Returns:
1998
            Percentage of character.
1999

2000
    Raises:
2001
            n/a
2002
    """
2003

2004
    # Locate orbital in line
2005
    index = line.find(orbital)
1✔
2006
    line = line[index:]
1✔
2007

2008
    # Locate the first open bracket
2009
    index = line.find("(")
1✔
2010
    line = line[index:]
1✔
2011

2012
    # Isolate the percentage
2013
    return line[1:7].strip()
1✔
2014

2015

2016
def z_int(string: str) -> int:
1✔
2017
    """
2018
    Convert string to integer.
2019
    If string empty, return -1.
2020

2021
    Args:
2022
            string: Input to be cast to int.
2023

2024
    Returns:
2025
            Int representation.
2026

2027
    Raises:
2028
            n/a
2029
    """
2030
    try:
1✔
2031
        return int(string)
1✔
2032
    except ValueError:
1✔
2033
        return -1
1✔
2034

2035

2036
def parse_natural_populations(lines: list[str]) -> list[pd.DataFrame]:
1✔
2037
    """
2038
    Parse the natural populations section of NBO output.
2039

2040
    Args:
2041
            lines: QChem output lines.
2042

2043
    Returns:
2044
            Data frame of formatted output.
2045

2046
    Raises:
2047
            RuntimeError
2048
    """
2049

2050
    no_failures = True
1✔
2051
    pop_dfs = []
1✔
2052

2053
    while no_failures:
1✔
2054
        # Natural populations
2055
        try:
1✔
2056
            lines = jump_to_header(lines, "Summary of Natural Population Analysis:")
1✔
2057
        except RuntimeError:
1✔
2058
            no_failures = False
1✔
2059

2060
        if no_failures:
1✔
2061
            # Jump to column names
2062
            lines = lines[4:]
1✔
2063
            columns = lines[0].split()
1✔
2064

2065
            # Jump to values
2066
            lines = lines[2:]
1✔
2067
            data = []
1✔
2068
            for line in lines:
1✔
2069
                # Termination condition
2070
                if "=" in line:
1✔
2071
                    break
1✔
2072

2073
                # Extract the values
2074
                values = line.split()
1✔
2075
                if len(values[0]) > 2:
1✔
2076
                    values.insert(0, values[0][0:-3])
×
2077
                    values[1] = values[1][-3:]
×
2078
                data.append(
1✔
2079
                    [
2080
                        str(values[0]),
2081
                        int(values[1]),
2082
                        float(values[2]),
2083
                        float(values[3]),
2084
                        float(values[4]),
2085
                        float(values[5]),
2086
                        float(values[6]),
2087
                    ]
2088
                )
2089
                if len(columns) == 8:
1✔
2090
                    data[-1].append(float(values[7]))
1✔
2091

2092
            # Store values in a dataframe
2093
            pop_dfs.append(pd.DataFrame(data=data, columns=columns))
1✔
2094
    return pop_dfs
1✔
2095

2096

2097
def parse_hyperbonds(lines: list[str]) -> list[pd.DataFrame]:
1✔
2098
    """
2099
    Parse the natural populations section of NBO output.
2100

2101
    Args:
2102
            lines: QChem output lines.
2103

2104
    Returns:
2105
            Data frame of formatted output.
2106

2107
    Raises:
2108
            RuntimeError
2109
    """
2110

2111
    no_failures = True
1✔
2112
    hyperbond_dfs = []
1✔
2113

2114
    while no_failures:
1✔
2115
        # hyperbonds
2116
        try:
1✔
2117
            lines = jump_to_header(
1✔
2118
                lines,
2119
                "3-Center, 4-Electron A:-B-:C Hyperbonds (A-B :C <=> A: B-C)",
2120
            )
2121
        except RuntimeError:
1✔
2122
            no_failures = False
1✔
2123

2124
        if no_failures:
1✔
2125
            # Jump to values
2126
            lines = lines[2:]
1✔
2127

2128
            # Extract hyperbond data
2129
            hyperbond_data = []
1✔
2130
            for line in lines:
1✔
2131
                # Termination condition
2132
                if "NATURAL BOND ORBITALS" in line:
1✔
2133
                    break
×
2134
                if "SECOND ORDER PERTURBATION THEORY" in line:
1✔
2135
                    break
1✔
2136
                if "NHO DIRECTIONALITY AND BOND BENDING" in line:
1✔
2137
                    break
×
2138
                if "Archival summary:" in line:
1✔
2139
                    break
×
2140

2141
                # Skip conditions
2142
                if line.strip() == "":
1✔
2143
                    continue
1✔
2144
                if "NBOs" in line:
1✔
2145
                    continue
1✔
2146
                if "threshold" in line:
1✔
2147
                    continue
×
2148
                if "-------------" in line:
1✔
2149
                    continue
1✔
2150
                if "A:-B-:C" in line:
1✔
2151
                    continue
1✔
2152

2153
                # Extract the values
2154
                entry: dict[str, str | int | float] = {}
1✔
2155
                entry["hyperbond index"] = int(line[0:4].strip())
1✔
2156
                entry["bond atom 1 symbol"] = str(line[5:8].strip())
1✔
2157
                entry["bond atom 1 index"] = int(line[8:11].strip())
1✔
2158
                entry["bond atom 2 symbol"] = str(line[13:15].strip())
1✔
2159
                entry["bond atom 2 index"] = int(line[15:18].strip())
1✔
2160
                entry["bond atom 3 symbol"] = str(line[20:22].strip())
1✔
2161
                entry["bond atom 3 index"] = int(line[22:25].strip())
1✔
2162
                entry["pctA-B"] = float(line[27:31].strip())
1✔
2163
                entry["pctB-C"] = float(line[32:36].strip())
1✔
2164
                entry["occ"] = float(line[38:44].strip())
1✔
2165
                entry["BD(A-B)"] = int(line[46:53].strip())
1✔
2166
                entry["LP(C)"] = int(line[54:59].strip())
1✔
2167
                entry["h(A)"] = int(line[61:65].strip())
1✔
2168
                entry["h(B)"] = int(line[67:71].strip())
1✔
2169
                entry["h(C)"] = int(line[73:77].strip())
1✔
2170

2171
                hyperbond_data.append(entry)
1✔
2172

2173
            # Store values in a dataframe
2174
            hyperbond_dfs.append(pd.DataFrame(data=hyperbond_data))
1✔
2175

2176
    return hyperbond_dfs
1✔
2177

2178

2179
def parse_hybridization_character(lines: list[str]) -> list[pd.DataFrame]:
1✔
2180
    """
2181
    Parse the hybridization character section of NBO output.
2182

2183
    Args:
2184
            lines: QChem output lines.
2185

2186
    Returns:
2187
            Data frames of formatted output.
2188

2189
    Raises:
2190
            RuntimeError
2191
    """
2192

2193
    # Orbitals
2194
    orbitals = ["s", "p", "d", "f"]
1✔
2195

2196
    no_failures = True
1✔
2197
    lp_and_bd_and_tc_dfs = []
1✔
2198

2199
    while no_failures:
1✔
2200
        # NBO Analysis
2201
        try:
1✔
2202
            lines = jump_to_header(lines, "(Occupancy)   Bond orbital/ Coefficients/ Hybrids")
1✔
2203
        except RuntimeError:
1✔
2204
            try:
1✔
2205
                lines = jump_to_header(lines, "(Occupancy)   Bond orbital / Coefficients / Hybrids")
1✔
2206
            except RuntimeError:
1✔
2207
                no_failures = False
1✔
2208

2209
        if no_failures:
1✔
2210
            # Jump to values
2211
            lines = lines[2:]
1✔
2212

2213
            # Save the data for different types of orbitals
2214
            lp_data = []
1✔
2215
            bd_data = []
1✔
2216
            tc_data = []
1✔
2217

2218
            # Iterate over the lines
2219
            i = -1
1✔
2220
            while True:
2221
                i += 1
1✔
2222
                line = lines[i]
1✔
2223

2224
                # Termination conditions
2225
                if "NHO DIRECTIONALITY AND BOND BENDING" in line:
1✔
2226
                    break
1✔
2227
                if "Archival summary:" in line:
1✔
2228
                    break
×
2229
                if "3-Center, 4-Electron A:-B-:C Hyperbonds (A-B :C <=> A: B-C)" in line:
1✔
2230
                    break
×
2231
                if "SECOND ORDER PERTURBATION THEORY ANALYSIS OF FOCK MATRIX IN NBO BASIS" in line:
1✔
2232
                    break
×
2233
                if "Thank you very much for using Q-Chem.  Have a nice day." in line:
1✔
2234
                    break
×
2235

2236
                # Lone pair
2237
                if "LP" in line or "LV" in line:
1✔
2238
                    LPentry: dict[str, str | int | float] = {orbital: 0.0 for orbital in orbitals}
1✔
2239
                    LPentry["bond index"] = line[0:4].strip()
1✔
2240
                    LPentry["occupancy"] = line[7:14].strip()
1✔
2241
                    LPentry["type"] = line[16:19].strip()
1✔
2242
                    LPentry["orbital index"] = line[20:22].strip()
1✔
2243
                    LPentry["atom symbol"] = line[23:25].strip()
1✔
2244
                    LPentry["atom number"] = line[25:28].strip()
1✔
2245

2246
                    # Populate the orbital percentages
2247
                    for orbital in orbitals:
1✔
2248
                        if orbital in line:
1✔
2249
                            LPentry[orbital] = get_percentage(line, orbital)
1✔
2250

2251
                    # Move one line down
2252
                    i += 1
1✔
2253
                    line = lines[i]
1✔
2254

2255
                    # Populate the orbital percentages
2256
                    for orbital in orbitals:
1✔
2257
                        if orbital in line:
1✔
2258
                            LPentry[orbital] = get_percentage(line, orbital)
1✔
2259

2260
                    # Save the entry
2261
                    lp_data.append(LPentry)
1✔
2262

2263
                # Bonding
2264
                if "BD" in line:
1✔
2265
                    BDentry: dict[str, str | int | float] = {
1✔
2266
                        f"atom {i} {orbital}": 0.0 for orbital in orbitals for i in range(1, 3)
2267
                    }
2268
                    BDentry["bond index"] = line[0:4].strip()
1✔
2269
                    BDentry["occupancy"] = line[7:14].strip()
1✔
2270
                    BDentry["type"] = line[16:19].strip()
1✔
2271
                    BDentry["orbital index"] = line[20:22].strip()
1✔
2272
                    BDentry["atom 1 symbol"] = line[23:25].strip()
1✔
2273
                    BDentry["atom 1 number"] = line[25:28].strip()
1✔
2274
                    BDentry["atom 2 symbol"] = line[29:31].strip()
1✔
2275
                    BDentry["atom 2 number"] = line[31:34].strip()
1✔
2276

2277
                    # Move one line down
2278
                    i += 1
1✔
2279
                    line = lines[i]
1✔
2280

2281
                    BDentry["atom 1 polarization"] = line[16:22].strip()
1✔
2282
                    BDentry["atom 1 pol coeff"] = line[24:33].strip()
1✔
2283

2284
                    # Populate the orbital percentages
2285
                    for orbital in orbitals:
1✔
2286
                        if orbital in line:
1✔
2287
                            BDentry[f"atom 1 {orbital}"] = get_percentage(line, orbital)
1✔
2288

2289
                    # Move one line down
2290
                    i += 1
1✔
2291
                    line = lines[i]
1✔
2292

2293
                    # Populate the orbital percentages
2294
                    for orbital in orbitals:
1✔
2295
                        if orbital in line:
1✔
2296
                            BDentry[f"atom 1 {orbital}"] = get_percentage(line, orbital)
1✔
2297

2298
                    # Move down until you see an orbital
2299
                    while "s" not in line:
1✔
2300
                        i += 1
1✔
2301
                        line = lines[i]
1✔
2302

2303
                    BDentry["atom 2 polarization"] = line[16:22].strip()
1✔
2304
                    BDentry["atom 2 pol coeff"] = line[24:33].strip()
1✔
2305

2306
                    # Populate the orbital percentages
2307
                    for orbital in orbitals:
1✔
2308
                        if orbital in line:
1✔
2309
                            BDentry[f"atom 2 {orbital}"] = get_percentage(line, orbital)
1✔
2310

2311
                    # Move one line down
2312
                    i += 1
1✔
2313
                    line = lines[i]
1✔
2314

2315
                    # Populate the orbital percentages
2316
                    for orbital in orbitals:
1✔
2317
                        if orbital in line:
1✔
2318
                            BDentry[f"atom 2 {orbital}"] = get_percentage(line, orbital)
1✔
2319

2320
                    # Save the entry
2321
                    bd_data.append(BDentry)
1✔
2322

2323
                # 3-center bond
2324
                if "3C" in line:
1✔
2325
                    TCentry: dict[str, str | int | float] = {
1✔
2326
                        f"atom {i} {orbital}": 0.0 for orbital in orbitals for i in range(1, 4)
2327
                    }
2328
                    TCentry["bond index"] = line[0:4].strip()
1✔
2329
                    TCentry["occupancy"] = line[7:14].strip()
1✔
2330
                    TCentry["type"] = line[16:19].strip()
1✔
2331
                    TCentry["orbital index"] = line[20:22].strip()
1✔
2332
                    TCentry["atom 1 symbol"] = line[23:25].strip()
1✔
2333
                    TCentry["atom 1 number"] = line[25:28].strip()
1✔
2334
                    TCentry["atom 2 symbol"] = line[29:31].strip()
1✔
2335
                    TCentry["atom 2 number"] = line[31:34].strip()
1✔
2336
                    TCentry["atom 3 symbol"] = line[35:37].strip()
1✔
2337
                    TCentry["atom 3 number"] = line[37:40].strip()
1✔
2338

2339
                    # Move one line down
2340
                    i += 1
1✔
2341
                    line = lines[i]
1✔
2342

2343
                    TCentry["atom 1 polarization"] = line[16:22].strip()
1✔
2344
                    TCentry["atom 1 pol coeff"] = line[24:33].strip()
1✔
2345

2346
                    # Populate the orbital percentages
2347
                    for orbital in orbitals:
1✔
2348
                        if orbital in line:
1✔
2349
                            TCentry[f"atom 1 {orbital}"] = get_percentage(line, orbital)
1✔
2350

2351
                    # Move one line down
2352
                    i += 1
1✔
2353
                    line = lines[i]
1✔
2354

2355
                    # Populate the orbital percentages
2356
                    for orbital in orbitals:
1✔
2357
                        if orbital in line:
1✔
2358
                            TCentry[f"atom 1 {orbital}"] = get_percentage(line, orbital)
×
2359

2360
                    # Move down until you see an orbital
2361
                    while "s" not in line:
1✔
2362
                        i += 1
1✔
2363
                        line = lines[i]
1✔
2364

2365
                    TCentry["atom 2 polarization"] = line[16:22].strip()
1✔
2366
                    TCentry["atom 2 pol coeff"] = line[24:33].strip()
1✔
2367

2368
                    # Populate the orbital percentages
2369
                    for orbital in orbitals:
1✔
2370
                        if orbital in line:
1✔
2371
                            TCentry[f"atom 2 {orbital}"] = get_percentage(line, orbital)
1✔
2372

2373
                    # Move one line down
2374
                    i += 1
1✔
2375
                    line = lines[i]
1✔
2376

2377
                    # Populate the orbital percentages
2378
                    for orbital in orbitals:
1✔
2379
                        if orbital in line:
1✔
2380
                            TCentry[f"atom 2 {orbital}"] = get_percentage(line, orbital)
×
2381

2382
                    # Move down until you see an orbital
2383
                    while "s" not in line:
1✔
2384
                        i += 1
1✔
2385
                        line = lines[i]
1✔
2386

2387
                    TCentry["atom 3 polarization"] = line[16:22].strip()
1✔
2388
                    TCentry["atom 3 pol coeff"] = line[24:33].strip()
1✔
2389

2390
                    # Populate the orbital percentages
2391
                    for orbital in orbitals:
1✔
2392
                        if orbital in line:
1✔
2393
                            TCentry[f"atom 3 {orbital}"] = get_percentage(line, orbital)
1✔
2394

2395
                    # Move one line down
2396
                    i += 1
1✔
2397
                    line = lines[i]
1✔
2398

2399
                    # Populate the orbital percentages
2400
                    for orbital in orbitals:
1✔
2401
                        if orbital in line:
1✔
2402
                            TCentry[f"atom 3 {orbital}"] = get_percentage(line, orbital)
×
2403

2404
                    # Save the entry
2405
                    tc_data.append(TCentry)
1✔
2406

2407
            # Store values in a dataframe
2408
            lp_and_bd_and_tc_dfs.append(pd.DataFrame(data=lp_data))
1✔
2409
            lp_and_bd_and_tc_dfs.append(pd.DataFrame(data=bd_data))
1✔
2410
            lp_and_bd_and_tc_dfs.append(pd.DataFrame(data=tc_data))
1✔
2411

2412
    return lp_and_bd_and_tc_dfs
1✔
2413

2414

2415
def parse_perturbation_energy(lines: list[str]) -> list[pd.DataFrame]:
1✔
2416
    """
2417
    Parse the perturbation energy section of NBO output.
2418

2419
    Args:
2420
            lines: QChem output lines.
2421

2422
    Returns:
2423
            Data frame of formatted output.
2424

2425
    Raises:
2426
            RuntimeError
2427
    """
2428

2429
    no_failures = True
1✔
2430
    e2_dfs = []
1✔
2431

2432
    while no_failures:
1✔
2433
        # 2nd order perturbation theory analysis
2434
        try:
1✔
2435
            lines = jump_to_header(
1✔
2436
                lines,
2437
                "SECOND ORDER PERTURBATION THEORY ANALYSIS OF FOCK MATRIX IN NBO BASIS",
2438
            )
2439
        except RuntimeError:
1✔
2440
            no_failures = False
1✔
2441

2442
        if no_failures:
1✔
2443
            # Jump to values
2444
            i = -1
1✔
2445
            while True:
2446
                i += 1
1✔
2447
                line = lines[i]
1✔
2448
                if "within" in line:
1✔
2449
                    lines = lines[i:]
1✔
2450
                    break
1✔
2451

2452
            # Extract 2nd order data
2453
            e2_data = []
1✔
2454
            for line in lines:
1✔
2455
                # Termination condition
2456
                if "NATURAL BOND ORBITALS" in line:
1✔
2457
                    break
1✔
2458

2459
                # Skip conditions
2460
                if line.strip() == "":
1✔
2461
                    continue
1✔
2462
                if "unit" in line:
1✔
2463
                    continue
1✔
2464
                if "None" in line:
1✔
2465
                    continue
1✔
2466

2467
                # Extract the values
2468
                entry: dict[str, str | int | float] = {}
1✔
2469
                first_3C = False
1✔
2470
                second_3C = False
1✔
2471
                if line[7] == "3":
1✔
2472
                    first_3C = True
1✔
2473
                if line[35] == "3":
1✔
2474
                    second_3C = True
1✔
2475

2476
                if line[4] == ".":
1✔
2477
                    entry["donor bond index"] = int(line[0:4].strip())
1✔
2478
                    entry["donor type"] = str(line[5:9].strip())
1✔
2479
                    entry["donor orbital index"] = int(line[10:12].strip())
1✔
2480
                    entry["donor atom 1 symbol"] = str(line[13:15].strip())
1✔
2481
                    entry["donor atom 1 number"] = int(line[15:17].strip())
1✔
2482
                    entry["donor atom 2 symbol"] = str(line[18:20].strip())
1✔
2483
                    entry["donor atom 2 number"] = z_int(line[20:22].strip())
1✔
2484
                    entry["acceptor bond index"] = int(line[25:31].strip())
1✔
2485
                    entry["acceptor type"] = str(line[32:36].strip())
1✔
2486
                    entry["acceptor orbital index"] = int(line[37:39].strip())
1✔
2487
                    entry["acceptor atom 1 symbol"] = str(line[40:42].strip())
1✔
2488
                    entry["acceptor atom 1 number"] = int(line[42:44].strip())
1✔
2489
                    entry["acceptor atom 2 symbol"] = str(line[45:47].strip())
1✔
2490
                    entry["acceptor atom 2 number"] = z_int(line[47:49].strip())
1✔
2491
                    entry["perturbation energy"] = float(line[50:62].strip())
1✔
2492
                    entry["energy difference"] = float(line[62:70].strip())
1✔
2493
                    entry["fock matrix element"] = float(line[70:79].strip())
1✔
2494
                elif line[5] == ".":
1✔
2495
                    entry["donor bond index"] = int(line[0:5].strip())
1✔
2496
                    entry["donor type"] = str(line[6:10].strip())
1✔
2497
                    entry["donor orbital index"] = int(line[11:13].strip())
1✔
2498

2499
                    if first_3C:
1✔
2500
                        tmp = str(line[14:28].strip())
1✔
2501
                        split = tmp.split("-")
1✔
2502
                        if len(split) != 3:
1✔
2503
                            raise ValueError("Should have three components! Exiting...")
×
2504
                        # This is a bit hacky, but making new more accurate entry
2505
                        # keys for 3C info forces there to be a NAN for those values
2506
                        # for all other entries, which I believe would nontrivially
2507
                        # increase the size of the data being stored, which is already
2508
                        # very large. So, here we have saved info that should be labeled
2509
                        # "donor 3C 1", "donor 3C 2", and "donor 3C 3" in the
2510
                        # "donor atom 1 symbol", "donor atom 1 number", and
2511
                        # "donor atom 2 symbol" fields, and then put a reminder in the
2512
                        # "donor atom 2 number" field.
2513
                        entry["donor atom 1 symbol"] = split[0]
1✔
2514
                        entry["donor atom 1 number"] = split[1]
1✔
2515
                        entry["donor atom 2 symbol"] = split[2]
1✔
2516
                        entry["donor atom 2 number"] = "info_is_from_3C"
1✔
2517
                    else:
2518
                        entry["donor atom 1 symbol"] = str(line[14:16].strip())
1✔
2519
                        entry["donor atom 1 number"] = int(line[16:19].strip())
1✔
2520
                        entry["donor atom 2 symbol"] = str(line[20:22].strip())
1✔
2521
                        entry["donor atom 2 number"] = z_int(line[22:25].strip())
1✔
2522

2523
                    entry["acceptor bond index"] = int(line[28:33].strip())
1✔
2524
                    entry["acceptor type"] = str(line[34:38].strip())
1✔
2525
                    entry["acceptor orbital index"] = int(line[39:41].strip())
1✔
2526

2527
                    if second_3C:
1✔
2528
                        tmp = str(line[42:56].strip())
1✔
2529
                        split = tmp.split("-")
1✔
2530
                        if len(split) != 3:
1✔
2531
                            raise ValueError("Should have three components! Exiting...")
×
2532
                        # This is a bit hacky, but making new more accurate entry
2533
                        # keys for 3C info forces there to be a NAN for those values
2534
                        # for all other entries, which I believe would nontrivially
2535
                        # increase the size of the data being stored, which is already
2536
                        # very large. So, here we have saved info that should be labeled
2537
                        # "acceptor 3C 1", "acceptor 3C 2", and "acceptor 3C 3" in the
2538
                        # "acceptor atom 1 symbol", "acceptor atom 1 number", and
2539
                        # "acceptor atom 2 symbol" fields, and then put a reminder in the
2540
                        # "acceptor atom 2 number" field.
2541
                        entry["acceptor atom 1 symbol"] = split[0]
1✔
2542
                        entry["acceptor atom 1 number"] = split[1]
1✔
2543
                        entry["acceptor atom 2 symbol"] = split[2]
1✔
2544
                        entry["acceptor atom 2 number"] = "info_is_from_3C"
1✔
2545
                    else:
2546
                        entry["acceptor atom 1 symbol"] = str(line[42:44].strip())
1✔
2547
                        entry["acceptor atom 1 number"] = int(line[44:47].strip())
1✔
2548
                        entry["acceptor atom 2 symbol"] = str(line[48:50].strip())
1✔
2549
                        entry["acceptor atom 2 number"] = z_int(line[50:53].strip())
1✔
2550
                    try:
1✔
2551
                        entry["perturbation energy"] = float(line[56:63].strip())
1✔
2552
                    except ValueError:
1✔
2553
                        if line[56:63].strip() == "*******":
1✔
2554
                            entry["perturbation energy"] = float("inf")
1✔
2555
                        else:
2556
                            raise ValueError("Unknown value error in parsing perturbation energy")
×
2557
                    entry["energy difference"] = float(line[63:71].strip())
1✔
2558
                    entry["fock matrix element"] = float(line[71:79].strip())
1✔
2559
                e2_data.append(entry)
1✔
2560

2561
            # Store values in a dataframe
2562
            e2_dfs.append(pd.DataFrame(data=e2_data))
1✔
2563

2564
    return e2_dfs
1✔
2565

2566

2567
def nbo_parser(filename: str) -> dict[str, list[pd.DataFrame]]:
1✔
2568
    """
2569
    Parse all the important sections of NBO output.
2570

2571
    Args:
2572
            filename: Path to QChem NBO output.
2573

2574
    Returns:
2575
            Data frames of formatted output.
2576

2577
    Raises:
2578
            RuntimeError
2579
    """
2580

2581
    # Open the lines
2582
    with zopen(filename, mode="rt", encoding="ISO-8859-1") as f:
1✔
2583
        lines = f.readlines()
1✔
2584

2585
    # Compile the dataframes
2586
    dfs = {}
1✔
2587
    dfs["natural_populations"] = parse_natural_populations(lines)
1✔
2588
    dfs["hybridization_character"] = parse_hybridization_character(lines)
1✔
2589
    dfs["hyperbonds"] = parse_hyperbonds(lines)
1✔
2590
    dfs["perturbation_energy"] = parse_perturbation_energy(lines)
1✔
2591
    return dfs
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc