• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pyiron / pyiron_base / 11228705564

08 Oct 2024 04:41AM UTC coverage: 71.833%. Remained the same
11228705564

push

github

web-flow
Merge pull request #1659 from pyiron/pre-commit-ci-update-config

[pre-commit.ci] pre-commit autoupdate

7480 of 10413 relevant lines covered (71.83%)

0.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

28.17
/pyiron_base/database/performance.py
1
__copyright__ = (
1✔
2
    "Copyright 2020, Max-Planck-Institut für Eisenforschung GmbH - "
3
    "Computational Materials Design (CM) Department"
4
)
5

6
import pandas
1✔
7
from sqlalchemy import (
1✔
8
    MetaData,
9
    Table,
10
    create_engine,
11
    distinct,
12
    false,
13
    func,
14
    or_,
15
    select,
16
)
17
from sqlalchemy.engine import Connection
1✔
18

19
from pyiron_base.state import state
1✔
20

21
__author__ = "Muhammad Hassani"
1✔
22
__version__ = "1.0"
1✔
23
__maintainer__ = "Muhammad Hassani"
1✔
24
__email__ = "hassani@mpie.de"
1✔
25

26

27
def _checkpoints_interval(conn: Connection) -> dict:
1✔
28
    """
29
    returns the number of checkpoints and their intervals
30
    """
31
    stmt = """
×
32
    SELECT
33
        total_checkpoints,
34
        seconds_since_start / total_checkpoints / 60 AS minutes_between_checkpoints
35
        FROM
36
        (SELECT
37
        EXTRACT(EPOCH FROM (now() - pg_postmaster_start_time())) AS seconds_since_start,
38
        (checkpoints_timed+checkpoints_req) AS total_checkpoints
39
        FROM pg_stat_bgwriter
40
        ) AS sub;
41
    """
42
    check_points = conn.execute(stmt).fetchone()
×
43
    return {"num. checkpoints": check_points[0], "checkpoint interval": check_points[1]}
×
44

45

46
def _duplicate_indices(conn: Connection) -> dict:
1✔
47
    """
48
    returns the duplicates in indices
49
    """
50
    stmt = """
×
51
        SELECT pg_size_pretty(sum(pg_relation_size(idx))::bigint) as size,
52
                   (array_agg(idx))[1] as idx1, (array_agg(idx))[2] as idx2,
53
                   (array_agg(idx))[3] as idx3, (array_agg(idx))[4] as idx4
54
            FROM (
55
                SELECT indexrelid::regclass as idx, 
56
                    (indrelid::text ||E'\n'|| indclass::text ||E'\n'|| indkey::text ||E'\n'||
57
                    coalesce(indexprs::text,'')||E'\n' || coalesce(indpred::text,'')) as key
58
                FROM pg_index) sub
59
            GROUP BY key HAVING count(*)>1
60
            ORDER BY sum(pg_relation_size(idx)) DESC;
61
    """
62
    overlapping_indices = conn.execute(stmt).fetchall()
×
63
    output_dict = {"duplicated indices": []}
×
64
    if len(overlapping_indices) > 0:
×
65
        for pair in overlapping_indices:
×
66
            output_dict["duplicated indices"].append(
×
67
                str(pair[1])
68
                + ", and "
69
                + str(pair[2])
70
                + " with total size: "
71
                + str(pair[0])
72
            )
73
    return output_dict
×
74

75

76
class DatabaseStatistics:
1✔
77
    """
78
    The use case is:
79
    >>> from pyiron_base.database.performance import DatabaseStatistics
80
    >>> db_stat = DatabaseStatistics()
81
    >>> df = db_stat.performance()
82
    >>> df
83
    >>> df['duplicated indices'].values[0]
84
    """
85

86
    def __init__(self):
1✔
87
        connection_string = state.database.sql_connection_string
×
88
        self._job_table = state.database.sql_view_table_name
×
89
        if "postgresql" not in connection_string:
×
90
            raise RuntimeError(
×
91
                """
92
                The detabase statistics is only available for a Postgresql database
93
                """
94
            )
95
        self._engine = create_engine(connection_string)
×
96
        self._performance_dict = {}
×
97
        metadata = MetaData()
×
98
        self._stat_view = Table(
×
99
            "pg_stat_activity", metadata, autoload_with=self._engine
100
        )
101
        self._locks_view = Table("pg_locks", metadata, autoload_with=self._engine)
×
102

103
    def _num_conn(self, conn: Connection) -> dict:
1✔
104
        """
105
        return the number of connections
106
        """
107
        stmt = select(func.count()).select_from(self._stat_view)
×
108
        result = conn.execute(stmt)
×
109
        return {"total num. connection": result.fetchone()[0]}
×
110

111
    def _num_conn_by_state(self, conn: Connection) -> dict:
1✔
112
        """
113
        return the number of connection, categorized by their state:
114
        active, idle, idle in transaction, idle in transaction (aborted)
115
        """
116
        stmt = (
×
117
            select(self._stat_view.c.state, func.count())
118
            .select_from(self._stat_view)
119
            .group_by(self._stat_view.c.state)
120
        )
121
        results = conn.execute(stmt).fetchall()
×
122
        output_dict = {}
×
123
        for result in results:
×
124
            key = "Number of " + str(result[0]) + " connection"
×
125
            val = int(result[1])
×
126
            output_dict[key] = val
×
127
        return output_dict
×
128

129
    def _num_conn_waiting_locks(self, conn: Connection) -> dict:
1✔
130
        """
131
        returns the number of connection waiting for locks
132
        """
133
        stmt = select(func.count(distinct(self._locks_view.c.pid))).where(
×
134
            self._locks_view.c.granted == false()
135
        )
136
        return {"num. of conn. waiting for locks": conn.execute(stmt).fetchone()[0]}
×
137

138
    def _max_trans_age(self, conn: Connection) -> dict:
1✔
139
        """
140
        returns the maximum age of a transaction
141
        """
142
        stmt = (
×
143
            select(func.max(func.now() - self._stat_view.c.xact_start))
144
            .select_from(self._stat_view)
145
            .where(
146
                or_(
147
                    self._stat_view.c.state == "idle in transaction",
148
                    self._stat_view.c.state == "active",
149
                )
150
            )
151
        )
152
        return {"max. transaction age": str(conn.execute(stmt).fetchone()[0])}
×
153

154
    def _index_size(self, conn: Connection) -> dict:
1✔
155
        """
156
        returns the total size of indexes for the pyiron job table
157
        """
158
        stmt = """
×
159
            SELECT
160
                t.schemaname,
161
                t.tablename,
162
                c.reltuples::bigint                            AS num_rows,
163
                pg_size_pretty(pg_relation_size(c.oid))        AS table_size,
164
                psai.indexrelname                              AS index_name,
165
                pg_size_pretty(pg_relation_size(i.indexrelid)) AS index_size,
166
                CASE WHEN i.indisunique THEN 'Y' ELSE 'N' END  AS "unique",
167
                psai.idx_scan                                  AS number_of_scans,
168
                psai.idx_tup_read                              AS tuples_read,
169
                psai.idx_tup_fetch                             AS tuples_fetched
170
            FROM
171
                pg_tables t
172
                LEFT JOIN pg_class c ON t.tablename = c.relname
173
                LEFT JOIN pg_index i ON c.oid = i.indrelid
174
                LEFT JOIN pg_stat_all_indexes psai ON i.indexrelid = psai.indexrelid
175
            WHERE
176
                t.schemaname NOT IN ('pg_catalog', 'information_schema')
177
            ORDER BY 1, 2;        
178
            """
179
        rows = conn.execute(stmt).fetchall()
×
180
        index_usage = 0
×
181
        for row in rows:
×
182
            if row[1] == self._job_table:
×
183
                index_usage += int(str(row[5]).split(" ")[0])
×
184

185
        return {"index size/usage (MB)": index_usage}
×
186

187
    def performance(self) -> pandas.DataFrame:
1✔
188
        """
189
        returns a pandas dataframe with the essential statistics of a pyiron postgres database
190
        """
191
        with self._engine.connect() as conn:
×
192
            self._performance_dict.update(self._num_conn(conn))
×
193
            self._performance_dict.update(self._num_conn_by_state(conn))
×
194
            self._performance_dict.update(self._num_conn_waiting_locks(conn))
×
195
            self._performance_dict.update(self._max_trans_age(conn))
×
196
            self._performance_dict.update(_checkpoints_interval(conn))
×
197
            self._performance_dict.update(self._index_size(conn))
×
198
            self._performance_dict.update(_duplicate_indices(conn))
×
199

200
        return pandas.DataFrame(self._performance_dict, index=["performance"])
×
201

202

203
def get_database_statistics() -> pandas.DataFrame:
1✔
204
    """
205
    This function returns the statistics of pyiron postgres database in the form of a pandas dataframe.
206
    The dataframe includes:
207
    - total number of connection
208
    - number of connection categorized by their state
209
    - maximum age of a transaction
210
    - number of checkpoints and their interval
211
    - size of indices
212
    - pair of duplicate indices and their total size
213
    usage:
214
    >>> from pyiron_base.database.performance import get_database_statistics
215
    >>> get_database_statistics()
216
    """
217

218
    return DatabaseStatistics().performance()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc