• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OCHA-DAP / hdx-analysis-scripts / 16409015611

21 Jul 2025 05:09AM UTC coverage: 88.089% (+0.2%) from 87.921%
16409015611

Pull #12

github

mcarans
Fix styling
Pull Request #12: HDXDSYS-2291 Provide list of all datasets / orgs with quickcharts and indicate which are from pipeline

636 of 722 relevant lines covered (88.09%)

0.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.05
/src/hdx/analysis_scripts/common/dataset_statistics.py
1
import logging
1✔
2
import re
1✔
3
from collections import UserDict
1✔
4
from datetime import datetime, timedelta
1✔
5

6
from dateutil.parser import ParserError
1✔
7
from dateutil.relativedelta import relativedelta
1✔
8

9
from hdx.analysis_scripts.common import get_previous_quarter
1✔
10
from hdx.api.configuration import Configuration
1✔
11
from hdx.utilities.dateparse import parse_date
1✔
12

13
logger = logging.getLogger(__name__)
1✔
14

15

16
class DatasetStatistics(UserDict):
1✔
17
    bracketed_date = re.compile(r"\((.*)\)")
1✔
18

19
    def __init__(
1✔
20
        self,
21
        today,
22
        dataset_name_to_explorers,
23
        dataset_id_to_requests,
24
        last_modified_aging,
25
        end_date_aging,
26
        dataset,
27
    ):
28
        super().__init__(dataset.data)
1✔
29
        self.today = today
1✔
30
        self.last_3_months = today - relativedelta(months=3)
1✔
31
        self.previous_quarter = get_previous_quarter(today)
1✔
32
        self.dataset_name_to_explorers = dataset_name_to_explorers
1✔
33
        self.dataset_id_to_requests = dataset_id_to_requests
1✔
34
        self.last_modified_aging = last_modified_aging
1✔
35
        self.end_date_aging = end_date_aging
1✔
36
        self.dataset = dataset
1✔
37
        self.last_modified = None
1✔
38
        self.configuration = Configuration.read()
1✔
39
        self.get_status()
1✔
40
        self.get_cod()
1✔
41
        self.get_date_info()
1✔
42
        self.get_update_frequency_info()
1✔
43
        self.get_in_explorer_or_grid()
1✔
44
        self.get_requests()
1✔
45
        self.crisis_tag = False
1✔
46
        self.get_tags()
1✔
47
        self.get_updated_by_script()
1✔
48
        self.get_last_modified_freshness()
1✔
49
        self.get_end_date_freshness()
1✔
50
        self.get_quickcharts()
1✔
51

52
    def get_status(self):
1✔
53
        self.public = "N" if self["private"] else "Y"
1✔
54
        self.internal_resources = 0
1✔
55
        self.external_resources = 0
1✔
56
        self.data_link = ""
1✔
57
        self.data_type = ""
1✔
58
        requestable = self.dataset.is_requestable()
1✔
59
        if requestable:
1✔
60
            self.requestable = "Y"
1✔
61
        else:
62
            self.requestable = "N"
1✔
63
            resources = self.dataset.get_resources()
1✔
64
            if resources:
1✔
65
                resource = resources[0]
1✔
66
                self.data_link = resource["url"]
1✔
67
                self.data_type = resource["url_type"]
1✔
68
                for resource in resources:
1✔
69
                    if resource["url_type"] == "api":
1✔
70
                        self.external_resources += 1
1✔
71
                    else:
72
                        self.internal_resources += 1
1✔
73
        self.archived = "Y" if self["archived"] else "N"
1✔
74
        if self.public == "N" or self.requestable == "Y" or self.archived == "Y":
1✔
75
            self.exclude_from_stats = "Y"
1✔
76
        else:
77
            self.exclude_from_stats = "N"
1✔
78

79
    def get_cod(self):
1✔
80
        cod_level = self.get("cod_level")
1✔
81
        if cod_level:
1✔
82
            self.is_cod = "Y"
1✔
83
        else:
84
            self.is_cod = "N"
1✔
85

86
    def get_date_info(self):
1✔
87
        self.created = parse_date(self["metadata_created"], include_microseconds=True)
1✔
88
        try:
1✔
89
            time_period = self.dataset.get_time_period()
1✔
90
        except ParserError:
1✔
91
            time_period = None
1✔
92
        if time_period:
1✔
93
            self.startdate = time_period["startdate_str"]
1✔
94
            if time_period["ongoing"]:
1✔
95
                self.enddate = "ongoing"
1✔
96
            else:
97
                self.enddate = time_period["enddate_str"]
1✔
98
        else:
99
            self.startdate = ""
1✔
100
            self.enddate = ""
1✔
101
            logger.error(f"Dataset {self['name']} has no time period!")
1✔
102
        last_modified = self.get("last_modified")
1✔
103
        if not last_modified:
1✔
104
            logger.error(f"Dataset {self['name']} has no last modified field!")
1✔
105
            self.last_modified = None
1✔
106
            self.updated_last_3_months = ""
1✔
107
            return
1✔
108
        self.last_modified = parse_date(last_modified, include_microseconds=True)
1✔
109
        if self.last_3_months < self.last_modified <= self.today:
1✔
110
            self.updated_last_3_months = "Y"
1✔
111
        else:
112
            self.updated_last_3_months = "N"
1✔
113
        if self.previous_quarter[0] <= self.last_modified <= self.previous_quarter[1]:
1✔
114
            self.updated_previous_qtr = "Y"
1✔
115
        else:
116
            self.updated_previous_qtr = "N"
1✔
117

118
    def get_update_frequency_info(self):
1✔
119
        self.update_frequency = self.get("data_update_frequency", "")
1✔
120
        update_frequency = self.dataset.get_expected_update_frequency()
1✔
121
        if update_frequency == "Live":
1✔
122
            self.live = "Y"
1✔
123
        else:
124
            self.live = "N"
1✔
125
        try:
1✔
126
            time_period = self.dataset.get_time_period()
1✔
127
        except ParserError:
1✔
128
            time_period = None
1✔
129
        if time_period:
1✔
130
            if time_period["ongoing"]:
1✔
131
                self.ongoing = "Y"
1✔
132
            else:
133
                self.ongoing = "N"
1✔
134
        else:
135
            self.ongoing = ""
1✔
136

137
    def get_in_explorer_or_grid(self):
1✔
138
        if self["name"] in self.dataset_name_to_explorers:
1✔
139
            self.in_explorer_or_grid = "Y"
1✔
140
        else:
141
            self.in_explorer_or_grid = "N"
1✔
142

143
    def get_requests(self):
1✔
144
        self.new_requests = 0
1✔
145
        self.open_requests = 0
1✔
146
        self.archived_requests = 0
1✔
147
        self.shared_requests = 0
1✔
148
        self.denied_requests = 0
1✔
149
        for request in self.dataset_id_to_requests.get(self["id"], []):
1✔
150
            if request["state"] == "new":
1✔
151
                self.new_requests += 1
1✔
152
            elif request["state"] == "open":
1✔
153
                self.open_requests += 1
1✔
154
            else:
155
                self.archived_requests += 1
1✔
156
                if request["data_shared"]:
1✔
157
                    self.shared_requests += 1
1✔
158
                elif request["rejected"]:
1✔
159
                    self.denied_requests += 1
1✔
160

161
    def get_tags(self):
1✔
162
        tags = self.dataset.get_tags()
1✔
163
        self.tags = ", ".join(tags)
1✔
164
        for tag in tags:
1✔
165
            if tag[:7] == "crisis-":
1✔
166
                self.crisis_tag = "Y"
1✔
167
                return
1✔
168
        self.crisis_tag = "N"
1✔
169

170
    def add_tags_to_set(self, tagset):
1✔
171
        tags = self.dataset.get_tags()
1✔
172
        tagset.update(tags)
1✔
173

174
    def get_updated_by_script(self):
1✔
175
        updated_by_script = self.get("updated_by_script")
1✔
176
        self.updated_by_script = None
1✔
177
        self.updated_by_noncod_script = "N"
1✔
178
        self.updated_by_cod_script = "N"
1✔
179
        self.old_updated_by_noncod_script = "N"
1✔
180
        self.old_updated_by_cod_script = "N"
1✔
181
        self.outdated_lastmodified = "N"
1✔
182
        if not updated_by_script:
1✔
183
            return
1✔
184
        if self.exclude_from_stats == "Y":
1✔
185
            return
1✔
186
        if "HDXINTERNAL" in updated_by_script:
1✔
187
            if any(x in updated_by_script for x in ("tagbot",)):
1✔
188
                return
1✔
189
        if any(
1✔
190
            x in updated_by_script
191
            for x in (
192
                "HDXPythonLibrary/5.5.6-test (2022-03-15",
193
                "HDXPythonLibrary/5.4.8-test (2022-01-04",
194
                "HDXPythonLibrary/5.4.1-test (2021-11-17",
195
            )
196
        ):  # Mike maintainer bulk change
197
            return
1✔
198
        match = self.bracketed_date.search(updated_by_script)
1✔
199
        if match is None:
1✔
200
            return
×
201
        else:
202
            try:
1✔
203
                self.updated_by_script = parse_date(
1✔
204
                    match.group(1), include_microseconds=True
205
                )
206
            except ParserError:
×
207
                return
×
208
        if "HDXINTERNAL" in updated_by_script and "CODs" in updated_by_script:
1✔
209
            if "cod_level" in self.data:
1✔
210
                self.updated_by_cod_script = "Y"
1✔
211
            else:
212
                # no longer updated by COD script
213
                self.old_updated_by_cod_script = "Y"
1✔
214
            return
1✔
215

216
        if self.last_modified:
1✔
217
            if self.updated_by_script > self.last_modified:
1✔
218
                self.updated_by_noncod_script = "Y"
1✔
219
                update_frequency = self.dataset.get_expected_update_frequency()
1✔
220
                if update_frequency != "Live":
1✔
221
                    difference = self.updated_by_script - self.last_modified
1✔
222
                    if difference > timedelta(hours=1):
1✔
223
                        self.outdated_lastmodified = "Y"
1✔
224
                return
1✔
225
            difference = self.last_modified - self.updated_by_script
1✔
226
            if difference < timedelta(hours=1):
1✔
227
                self.updated_by_noncod_script = "Y"
1✔
228
            else:
229
                self.old_updated_by_noncod_script = "Y"
1✔
230

231
    def calculate_lm_freshness(
1✔
232
        self, last_modified: datetime, update_frequency: int
233
    ) -> str:
234
        """Calculate freshness based on a last modified date and the expected update
235
        frequency. Returns "Fresh", "Due", "Overdue" or "Delinquent".
236

237
        Args:
238
            last_modified (datetime): Last modified date
239
            update_frequency (int): Expected update frequency
240

241
        Returns:
242
            str: "Fresh", "Due", "Overdue" or "Delinquent"
243
        """
244
        delta = self.today - last_modified
1✔
245
        if delta >= self.last_modified_aging[update_frequency]["Delinquent"]:
1✔
246
            return "Delinquent"
1✔
247
        elif delta >= self.last_modified_aging[update_frequency]["Overdue"]:
1✔
248
            return "Overdue"
1✔
249
        elif delta >= self.last_modified_aging[update_frequency]["Due"]:
1✔
250
            return "Due"
1✔
251
        return "Fresh"
1✔
252

253
    def get_last_modified_freshness(self):
1✔
254
        self.last_modified_fresh = ""
1✔
255
        if self.exclude_from_stats == "Y":
1✔
256
            return
1✔
257
        if not self.last_modified:
1✔
258
            return
1✔
259
        review_date = self.get("review_date")
1✔
260
        if review_date is None:
1✔
261
            latest_of_modifieds = self.last_modified
1✔
262
        else:
263
            review_date = parse_date(review_date, include_microseconds=True)
×
264
            if review_date > self.last_modified:
×
265
                latest_of_modifieds = review_date
×
266
            else:
267
                latest_of_modifieds = self.last_modified
×
268
        if self.updated_by_script and self.updated_by_script > latest_of_modifieds:
1✔
269
            latest_of_modifieds = self.updated_by_script
1✔
270
        if self.update_frequency:
1✔
271
            update_frequency = int(self.update_frequency)
1✔
272
            if update_frequency == 0:
1✔
273
                self.last_modified_fresh = "Fresh"
1✔
274
            elif update_frequency == -1:
1✔
275
                self.last_modified_fresh = "Fresh"
1✔
276
            elif update_frequency == -2:
1✔
277
                self.last_modified_fresh = "Fresh"
1✔
278
            else:
279
                self.last_modified_fresh = self.calculate_lm_freshness(
1✔
280
                    latest_of_modifieds, update_frequency
281
                )
282

283
    def calculate_ed_uptodate(self, end_date: datetime, update_frequency: int) -> str:
1✔
284
        """Calculate up to date based on time period end date and the expected
285
        update frequency. Returns "UpToDate" or "OutOfDate".
286

287
        Args:
288
            last_modified (datetime): Last modified date
289
            update_frequency (int): Expected update frequency
290

291
        Returns:
292
            str: "UpToDate" or "OutOfDate"
293
        """
294
        delta = self.today - end_date
1✔
295
        if delta >= self.end_date_aging[update_frequency]["OutOfDate"]:
1✔
296
            return "OutOfDate"
1✔
297
        return "UpToDate"
1✔
298

299
    def get_end_date_freshness(self):
1✔
300
        self.end_date_uptodate = ""
1✔
301
        if self.exclude_from_stats == "Y":
1✔
302
            return
1✔
303
        if self.update_frequency:
1✔
304
            update_frequency = int(self.update_frequency)
1✔
305
            if update_frequency < 0:
1✔
306
                return
1✔
307
            if update_frequency == 0:
1✔
308
                self.end_date_uptodate = "UpToDate"
1✔
309
            elif update_frequency > 0:
1✔
310
                if self.enddate == "ongoing":
1✔
311
                    self.end_date_uptodate = "UpToDate"
1✔
312
                    return
1✔
313
                enddate = parse_date(self.enddate)
1✔
314
                self.end_date_uptodate = self.calculate_ed_uptodate(
1✔
315
                    enddate, update_frequency
316
                )
317

318
    def get_quickcharts(self):
1✔
319
        if self.dataset["has_quickcharts"]:
1✔
320
            self.has_quickcharts = "Y"
1✔
321
        else:
322
            self.has_quickcharts = "N"
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc