• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OCHA-DAP / hdx-analysis-scripts / 19414865571

17 Nov 2025 12:46AM UTC coverage: 87.583% (-0.5%) from 88.089%
19414865571

push

github

web-flow
Add valid maintainers column (#13)

663 of 757 relevant lines covered (87.58%)

0.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.27
/src/hdx/analysis_scripts/common/dataset_statistics.py
1
import logging
1✔
2
import re
1✔
3
from collections import UserDict
1✔
4
from datetime import datetime, timedelta
1✔
5

6
from dateutil.parser import ParserError
1✔
7
from dateutil.relativedelta import relativedelta
1✔
8

9
from hdx.analysis_scripts.common import get_previous_quarter
1✔
10
from hdx.api.configuration import Configuration
1✔
11
from hdx.utilities.dateparse import parse_date
1✔
12

13
logger = logging.getLogger(__name__)
1✔
14

15

16
class DatasetStatistics(UserDict):
1✔
17
    bracketed_date = re.compile(r"\((.*)\)")
1✔
18

19
    def __init__(
1✔
20
        self,
21
        organisations,
22
        users,
23
        today,
24
        dataset_name_to_explorers,
25
        dataset_id_to_requests,
26
        last_modified_aging,
27
        end_date_aging,
28
        dataset,
29
    ):
30
        super().__init__(dataset.data)
1✔
31
        self.organisations = organisations
1✔
32
        self.users = users
1✔
33
        self.today = today
1✔
34
        self.last_3_months = today - relativedelta(months=3)
1✔
35
        self.previous_quarter = get_previous_quarter(today)
1✔
36
        self.dataset_name_to_explorers = dataset_name_to_explorers
1✔
37
        self.dataset_id_to_requests = dataset_id_to_requests
1✔
38
        self.last_modified_aging = last_modified_aging
1✔
39
        self.end_date_aging = end_date_aging
1✔
40
        self.dataset = dataset
1✔
41
        self.last_modified = None
1✔
42
        self.configuration = Configuration.read()
1✔
43
        self.get_status()
1✔
44
        self.get_cod()
1✔
45
        self.get_date_info()
1✔
46
        self.get_update_frequency_info()
1✔
47
        self.get_in_explorer_or_grid()
1✔
48
        self.get_requests()
1✔
49
        self.crisis_tag = False
1✔
50
        self.get_tags()
1✔
51
        self.get_updated_by_script()
1✔
52
        self.get_last_modified_freshness()
1✔
53
        self.get_end_date_freshness()
1✔
54
        self.get_quickcharts()
1✔
55
        self.get_maintainer()
1✔
56

57
    def get_status(self):
1✔
58
        self.public = "N" if self["private"] else "Y"
1✔
59
        self.internal_resources = 0
1✔
60
        self.external_resources = 0
1✔
61
        self.data_link = ""
1✔
62
        self.data_type = ""
1✔
63
        requestable = self.dataset.is_requestable()
1✔
64
        if requestable:
1✔
65
            self.requestable = "Y"
1✔
66
        else:
67
            self.requestable = "N"
1✔
68
            resources = self.dataset.get_resources()
1✔
69
            if resources:
1✔
70
                resource = resources[0]
1✔
71
                self.data_link = resource["url"]
1✔
72
                self.data_type = resource["url_type"]
1✔
73
                for resource in resources:
1✔
74
                    if resource["url_type"] == "api":
1✔
75
                        self.external_resources += 1
1✔
76
                    else:
77
                        self.internal_resources += 1
1✔
78
        self.archived = "Y" if self["archived"] else "N"
1✔
79
        if self.public == "N" or self.requestable == "Y" or self.archived == "Y":
1✔
80
            self.exclude_from_stats = "Y"
1✔
81
        else:
82
            self.exclude_from_stats = "N"
1✔
83

84
    def get_cod(self):
1✔
85
        cod_level = self.get("cod_level")
1✔
86
        if cod_level:
1✔
87
            self.is_cod = "Y"
1✔
88
        else:
89
            self.is_cod = "N"
1✔
90

91
    def get_date_info(self):
1✔
92
        self.created = parse_date(self["metadata_created"], include_microseconds=True)
1✔
93
        try:
1✔
94
            time_period = self.dataset.get_time_period()
1✔
95
        except ParserError:
1✔
96
            time_period = None
1✔
97
        if time_period:
1✔
98
            self.startdate = time_period["startdate_str"]
1✔
99
            if time_period["ongoing"]:
1✔
100
                self.enddate = "ongoing"
1✔
101
            else:
102
                self.enddate = time_period["enddate_str"]
1✔
103
        else:
104
            self.startdate = ""
1✔
105
            self.enddate = ""
1✔
106
            logger.error(f"Dataset {self['name']} has no time period!")
1✔
107
        last_modified = self.get("last_modified")
1✔
108
        if not last_modified:
1✔
109
            logger.error(f"Dataset {self['name']} has no last modified field!")
1✔
110
            self.last_modified = None
1✔
111
            self.updated_last_3_months = ""
1✔
112
            return
1✔
113
        self.last_modified = parse_date(last_modified, include_microseconds=True)
1✔
114
        if self.last_3_months < self.last_modified <= self.today:
1✔
115
            self.updated_last_3_months = "Y"
1✔
116
        else:
117
            self.updated_last_3_months = "N"
1✔
118
        if self.previous_quarter[0] <= self.last_modified <= self.previous_quarter[1]:
1✔
119
            self.updated_previous_qtr = "Y"
1✔
120
        else:
121
            self.updated_previous_qtr = "N"
1✔
122

123
    def get_update_frequency_info(self):
1✔
124
        self.update_frequency = self.get("data_update_frequency", "")
1✔
125
        update_frequency = self.dataset.get_expected_update_frequency()
1✔
126
        if update_frequency == "Live":
1✔
127
            self.live = "Y"
1✔
128
        else:
129
            self.live = "N"
1✔
130
        try:
1✔
131
            time_period = self.dataset.get_time_period()
1✔
132
        except ParserError:
1✔
133
            time_period = None
1✔
134
        if time_period:
1✔
135
            if time_period["ongoing"]:
1✔
136
                self.ongoing = "Y"
1✔
137
            else:
138
                self.ongoing = "N"
1✔
139
        else:
140
            self.ongoing = ""
1✔
141

142
    def get_in_explorer_or_grid(self):
1✔
143
        if self["name"] in self.dataset_name_to_explorers:
1✔
144
            self.in_explorer_or_grid = "Y"
1✔
145
        else:
146
            self.in_explorer_or_grid = "N"
1✔
147

148
    def get_requests(self):
1✔
149
        self.new_requests = 0
1✔
150
        self.open_requests = 0
1✔
151
        self.archived_requests = 0
1✔
152
        self.shared_requests = 0
1✔
153
        self.denied_requests = 0
1✔
154
        for request in self.dataset_id_to_requests.get(self["id"], []):
1✔
155
            if request["state"] == "new":
1✔
156
                self.new_requests += 1
1✔
157
            elif request["state"] == "open":
1✔
158
                self.open_requests += 1
1✔
159
            else:
160
                self.archived_requests += 1
1✔
161
                if request["data_shared"]:
1✔
162
                    self.shared_requests += 1
1✔
163
                elif request["rejected"]:
1✔
164
                    self.denied_requests += 1
1✔
165

166
    def get_tags(self):
1✔
167
        tags = self.dataset.get_tags()
1✔
168
        self.tags = ", ".join(tags)
1✔
169
        for tag in tags:
1✔
170
            if tag[:7] == "crisis-":
1✔
171
                self.crisis_tag = "Y"
1✔
172
                return
1✔
173
        self.crisis_tag = "N"
1✔
174

175
    def add_tags_to_set(self, tagset):
1✔
176
        tags = self.dataset.get_tags()
1✔
177
        tagset.update(tags)
1✔
178

179
    def get_updated_by_script(self):
1✔
180
        updated_by_script = self.get("updated_by_script")
1✔
181
        self.updated_by_script = None
1✔
182
        self.updated_by_noncod_script = "N"
1✔
183
        self.updated_by_cod_script = "N"
1✔
184
        self.old_updated_by_noncod_script = "N"
1✔
185
        self.old_updated_by_cod_script = "N"
1✔
186
        self.outdated_lastmodified = "N"
1✔
187
        if not updated_by_script:
1✔
188
            return
1✔
189
        if self.exclude_from_stats == "Y":
1✔
190
            return
1✔
191
        if "HDXINTERNAL" in updated_by_script:
1✔
192
            if any(x in updated_by_script for x in ("tagbot",)):
1✔
193
                return
1✔
194
        if any(
1✔
195
            x in updated_by_script
196
            for x in (
197
                "HDXPythonLibrary/5.5.6-test (2022-03-15",
198
                "HDXPythonLibrary/5.4.8-test (2022-01-04",
199
                "HDXPythonLibrary/5.4.1-test (2021-11-17",
200
            )
201
        ):  # Mike maintainer bulk change
202
            return
1✔
203
        match = self.bracketed_date.search(updated_by_script)
1✔
204
        if match is None:
1✔
205
            return
×
206
        else:
207
            try:
1✔
208
                self.updated_by_script = parse_date(
1✔
209
                    match.group(1), include_microseconds=True
210
                )
211
            except ParserError:
×
212
                return
×
213
        if "HDXINTERNAL" in updated_by_script and "CODs" in updated_by_script:
1✔
214
            if "cod_level" in self.data:
1✔
215
                self.updated_by_cod_script = "Y"
1✔
216
            else:
217
                # no longer updated by COD script
218
                self.old_updated_by_cod_script = "Y"
1✔
219
            return
1✔
220

221
        if self.last_modified:
1✔
222
            if self.updated_by_script > self.last_modified:
1✔
223
                self.updated_by_noncod_script = "Y"
1✔
224
                update_frequency = self.dataset.get_expected_update_frequency()
1✔
225
                if update_frequency != "Live":
1✔
226
                    difference = self.updated_by_script - self.last_modified
1✔
227
                    if difference > timedelta(hours=1):
1✔
228
                        self.outdated_lastmodified = "Y"
1✔
229
                return
1✔
230
            difference = self.last_modified - self.updated_by_script
1✔
231
            if difference < timedelta(hours=1):
1✔
232
                self.updated_by_noncod_script = "Y"
1✔
233
            else:
234
                self.old_updated_by_noncod_script = "Y"
1✔
235

236
    def calculate_lm_freshness(
1✔
237
        self, last_modified: datetime, update_frequency: int
238
    ) -> str:
239
        """Calculate freshness based on a last modified date and the expected update
240
        frequency. Returns "Fresh", "Due", "Overdue" or "Delinquent".
241

242
        Args:
243
            last_modified (datetime): Last modified date
244
            update_frequency (int): Expected update frequency
245

246
        Returns:
247
            str: "Fresh", "Due", "Overdue" or "Delinquent"
248
        """
249
        delta = self.today - last_modified
1✔
250
        if delta >= self.last_modified_aging[update_frequency]["Delinquent"]:
1✔
251
            return "Delinquent"
1✔
252
        elif delta >= self.last_modified_aging[update_frequency]["Overdue"]:
1✔
253
            return "Overdue"
1✔
254
        elif delta >= self.last_modified_aging[update_frequency]["Due"]:
1✔
255
            return "Due"
1✔
256
        return "Fresh"
1✔
257

258
    def get_last_modified_freshness(self):
1✔
259
        self.last_modified_fresh = ""
1✔
260
        if self.exclude_from_stats == "Y":
1✔
261
            return
1✔
262
        if not self.last_modified:
1✔
263
            return
1✔
264
        review_date = self.get("review_date")
1✔
265
        if review_date is None:
1✔
266
            latest_of_modifieds = self.last_modified
1✔
267
        else:
268
            review_date = parse_date(review_date, include_microseconds=True)
×
269
            if review_date > self.last_modified:
×
270
                latest_of_modifieds = review_date
×
271
            else:
272
                latest_of_modifieds = self.last_modified
×
273
        if self.updated_by_script and self.updated_by_script > latest_of_modifieds:
1✔
274
            latest_of_modifieds = self.updated_by_script
1✔
275
        if self.update_frequency:
1✔
276
            update_frequency = int(self.update_frequency)
1✔
277
            if update_frequency == 0:
1✔
278
                self.last_modified_fresh = "Fresh"
1✔
279
            elif update_frequency == -1:
1✔
280
                self.last_modified_fresh = "Fresh"
1✔
281
            elif update_frequency == -2:
1✔
282
                self.last_modified_fresh = "Fresh"
1✔
283
            else:
284
                self.last_modified_fresh = self.calculate_lm_freshness(
1✔
285
                    latest_of_modifieds, update_frequency
286
                )
287

288
    def calculate_ed_uptodate(self, end_date: datetime, update_frequency: int) -> str:
1✔
289
        """Calculate up to date based on time period end date and the expected
290
        update frequency. Returns "UpToDate" or "OutOfDate".
291

292
        Args:
293
            last_modified (datetime): Last modified date
294
            update_frequency (int): Expected update frequency
295

296
        Returns:
297
            str: "UpToDate" or "OutOfDate"
298
        """
299
        delta = self.today - end_date
1✔
300
        if delta >= self.end_date_aging[update_frequency]["OutOfDate"]:
1✔
301
            return "OutOfDate"
1✔
302
        return "UpToDate"
1✔
303

304
    def get_end_date_freshness(self):
1✔
305
        self.end_date_uptodate = ""
1✔
306
        if self.exclude_from_stats == "Y":
1✔
307
            return
1✔
308
        if self.update_frequency:
1✔
309
            update_frequency = int(self.update_frequency)
1✔
310
            if update_frequency < 0:
1✔
311
                return
1✔
312
            if update_frequency == 0:
1✔
313
                self.end_date_uptodate = "UpToDate"
1✔
314
            elif update_frequency > 0:
1✔
315
                if self.enddate == "ongoing":
1✔
316
                    self.end_date_uptodate = "UpToDate"
1✔
317
                    return
1✔
318
                enddate = parse_date(self.enddate)
1✔
319
                self.end_date_uptodate = self.calculate_ed_uptodate(
1✔
320
                    enddate, update_frequency
321
                )
322

323
    def get_quickcharts(self):
1✔
324
        if self.dataset["has_quickcharts"]:
1✔
325
            self.has_quickcharts = "Y"
1✔
326
        else:
327
            self.has_quickcharts = "N"
1✔
328

329
    def get_maintainer(self):
1✔
330
        self.valid_maintainer = "N"
1✔
331
        maintainer_id = self["maintainer"]
1✔
332
        maintainer = self.users.get(maintainer_id)
1✔
333
        if not maintainer:
1✔
334
            return
1✔
335
        if maintainer["sysadmin"]:
1✔
336
            self.valid_maintainer = "Y"
1✔
337
            return
1✔
338
        organisation_id = self["organization"]["id"]
1✔
339
        organisation = self.organisations[organisation_id]
1✔
340
        for user in organisation.get("users", []):
1✔
341
            if user["id"] == maintainer_id:
1✔
342
                if user["capacity"] in ("admin", "editor"):
1✔
343
                    self.valid_maintainer = "Y"
1✔
344
                return
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc