• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NaturalHistoryMuseum / ckanext-query-dois / #225

16 Jun 2025 12:50PM UTC coverage: 42.144% (-2.7%) from 44.794%
#225

push

coveralls-python

web-flow
merge: PR #62 from dev

Weekly release 2025-06-16

1 of 41 new or added lines in 1 file covered. (2.44%)

3 existing lines in 1 file now uncovered.

228 of 541 relevant lines covered (42.14%)

0.42 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

17.74
/ckanext/query_dois/routes/_helpers.py
1
#!/usr/bin/env python
2
# encoding: utf-8
3
#
4
# This file is part of ckanext-query-dois
5
# Created by the Natural History Museum in London, UK
6

7
import copy
1✔
8
import itertools
1✔
9
import json
1✔
10
import operator
1✔
11
from collections import OrderedDict
1✔
12
from functools import partial
1✔
13
from urllib.parse import urlencode
1✔
14

15
from ckan import model
1✔
16
from ckan.plugins import toolkit
1✔
17

18
from ..lib.stats import DOWNLOAD_ACTION, SAVE_ACTION
1✔
19
from ..lib.utils import get_resource_and_package
1✔
20
from ..model import QueryDOI, QueryDOIStat
1✔
21

22
column_param_mapping = (
1✔
23
    ('doi', QueryDOIStat.doi),
24
    ('identifier', QueryDOIStat.identifier),
25
    ('domain', QueryDOIStat.domain),
26
    ('action', QueryDOIStat.action),
27
)
28

29

30
def get_query_doi(doi):
1✔
31
    """
32
    Retrieves a QueryDOI object from the database for the given DOI, if there is one,
33
    otherwise returns None.
34

35
    :param doi: the doi (full doi, prefix/suffix)
36
    :returns: A QueryDOI object or None
37
    """
38
    return model.Session.query(QueryDOI).filter(QueryDOI.doi == doi).first()
×
39

40

41
def get_authors(packages):
1✔
42
    """
43
    Retrieves all the authors from the given packages, de-duplicates them (if necessary)
44
    and then returns them as a list.
45

46
    Note that this function takes a list of packages as it is multi-package and
47
    therefore multi-resource ready.
48

49
    :param packages: the packages
50
    :returns: a list of author(s)
51
    """
52
    # use an ordered dict in the absence of a sorted set
53
    authors = OrderedDict()
×
54
    for package in packages:
×
55
        author = package['author']
×
56
        # some author values will contain many authors with a separator, perhaps , or ;
57
        for separator in (';', ','):
×
58
            if separator in author:
×
59
                authors.update({a: True for a in author.split(separator)})
×
60
                break
×
61
        else:
62
            # if the author value didn't contain a separator then we can just use the value as is
63
            authors[author] = True
×
64

65
    return list(authors.keys())
×
66

67

68
def encode_params(params, version=None, extras=None, for_api=False):
1✔
69
    """
70
    Encodes the parameters for a query in the CKAK resource view format and returns as a
71
    query string.
72

73
    :param params: a dict of parameters, such as a DatastoreQuery's query dict
74
    :param version: the version to add into the query string (default: None)
75
    :param extras: an optional dict of extra parameters to add as well as the ones found
76
        in the params dict (default: None)
77
    :param for_api: whether the query string is for a CKAN resource view or an API get
78
        as it changes the format (default: False)
79
    :returns: a query string of the query parameters (no ? at the start but will include
80
        & if needed)
81
    """
82
    query_string = {}
×
83
    extras = [] if extras is None else extras.items()
×
84
    # build the query string from the dicts we have first
85
    for param, value in itertools.chain(params.items(), extras):
×
86
        # make sure to ignore all version data in the dicts
87
        if param == 'version':
×
88
            continue
×
89
        if param == 'filters':
×
90
            value = copy.deepcopy(value)
×
91
            if version is None:
×
92
                value.pop('__version__', None)
×
93
        query_string[param] = value
×
94

95
    # now add the version in if needed
96
    if version is not None:
×
97
        query_string.setdefault('filters', {})['__version__'] = version
×
98

99
    # finally format any nested dicts correctly (this is for the filters field basically)
100
    for param, value in query_string.items():
×
101
        if isinstance(value, dict):
×
102
            if for_api:
×
103
                # the API takes the data in JSON format so we just need to serialise it
104
                value = json.dumps(value)
×
105
            else:
106
                # if the data is going in a query string for a resource view it needs to be
107
                # encoded in a special way
108
                parts = []
×
109
                for sub_key, sub_value in value.items():
×
110
                    if not isinstance(sub_value, list):
×
111
                        sub_value = [sub_value]
×
112
                    parts.extend('{}:{}'.format(sub_key, v) for v in sub_value)
×
113
                value = '|'.join(parts)
×
114
            query_string[param] = value
×
115

116
    return urlencode(query_string)
×
117

118

119
def generate_rerun_urls(resource, package, query, rounded_version):
1✔
120
    """
121
    Generate a dict containing all the "rerun" URLs needed to allow the user to revisit the data
122
    either through the website or through the API. The dict returned will look like following:
123

124
        {
125
            "page": {
126
                "original": ...
127
                "current": ...
128
            },
129
            "api": {
130
                "original": ...
131
                "current": ...
132
            }
133
        }
134

135
    :param resource: the resource dict
136
    :param package: the package dict
137
    :param query: the query dict
138
    :param rounded_version: the version rounded down to the nearest available on the resource
139
    :returns: a dict of urls
140
    """
141
    page_url = toolkit.url_for(
×
142
        'resource.read', id=package['name'], resource_id=resource['id']
143
    )
144
    api_url = '/api/action/datastore_search'
×
145
    api_extras = {'resource_id': resource['id']}
×
146
    return {
×
147
        'page': {
148
            'original': page_url + '?' + encode_params(query, version=rounded_version),
149
            'current': page_url + '?' + encode_params(query),
150
        },
151
        'api': {
152
            'original': api_url
153
            + '?'
154
            + encode_params(
155
                query, version=rounded_version, extras=api_extras, for_api=True
156
            ),
157
            'current': api_url
158
            + '?'
159
            + encode_params(query, extras=api_extras, for_api=True),
160
        },
161
    }
162

163

164
def get_stats(query_doi):
1✔
165
    """
166
    Retrieve some simple stats about the query DOI - this includes the total downloads and the
167
    last download timestamp. Note that we are specifically looking for downloads here, no other
168
    actions are considered.
169

170
    :param query_doi: the QueryDOI object
171
    :returns: a 3-tuple containing the total downloads, total saves and the last download timestamp
172
    """
173
    # count how many download stats we have on this doi
174
    download_total = (
×
175
        model.Session.query(QueryDOIStat)
176
        .filter(QueryDOIStat.doi == query_doi.doi)
177
        .filter(QueryDOIStat.action == DOWNLOAD_ACTION)
178
        .count()
179
    )
180
    # count how many save stats we have on this doi
181
    save_total = (
×
182
        model.Session.query(QueryDOIStat)
183
        .filter(QueryDOIStat.doi == query_doi.doi)
184
        .filter(QueryDOIStat.action == SAVE_ACTION)
185
        .count()
186
    )
187
    # find the last stats object we have for this doi
188
    last = (
×
189
        model.Session.query(QueryDOIStat)
190
        .filter(QueryDOIStat.doi == query_doi.doi)
191
        .filter(QueryDOIStat.action == DOWNLOAD_ACTION)
192
        .order_by(QueryDOIStat.id.desc())
193
        .first()
194
    )
195
    return download_total, save_total, last.timestamp if last is not None else None
×
196

197

198
def render_datastore_search_doi_page(query_doi):
1✔
199
    """
200
    Renders a DOI landing page for a datastore_search based query DOI.
201

202
    :param query_doi: the query DOI
203
    :returns: the rendered page
204
    """
205
    # currently we only deal with single resource query DOIs
206
    resource_id = query_doi.get_resource_ids()[0]
×
207
    rounded_version = query_doi.get_rounded_versions()[0]
×
208

NEW
209
    try:
×
NEW
210
        resource, package = get_resource_and_package(resource_id)
×
NEW
211
        is_inaccessible = False
×
NEW
212
    except (toolkit.ObjectNotFound, toolkit.NotAuthorized):
×
NEW
213
        resource = None
×
NEW
214
        package = None
×
NEW
215
        is_inaccessible = True
×
216

217
    # we ignore the saves count as it will always be 0 for a datastore_search DOI
218
    downloads, _saves, last_download_timestamp = get_stats(query_doi)
×
NEW
219
    usage_stats = {
×
220
        'downloads': downloads,
221
        'last_download_timestamp': last_download_timestamp,
222
    }
223

224
    # warnings
NEW
225
    warnings = []
×
NEW
226
    if is_inaccessible:
×
NEW
227
        warnings = [
×
228
            toolkit._(
229
                'All resources associated with this search have been deleted, moved, '
230
                'or are no longer available.'
231
            )
232
        ]
233

UNCOV
234
    context = {
×
235
        'query_doi': query_doi,
236
        'doi': query_doi.doi,
237
        'resource': resource,
238
        'package': package,
239
        'version': rounded_version,
240
        'usage_stats': usage_stats,
241
        'is_inaccessible': is_inaccessible,
242
        'warnings': warnings,
243
        # these are defaults for if the resource is inaccessible
244
        'package_doi': None,
245
        'authors': toolkit._('Unknown'),
246
        'reruns': {},
247
    }
248

NEW
249
    if not is_inaccessible:
×
NEW
250
        context.update(
×
251
            {
252
                # this is effectively an integration point with the ckanext-doi
253
                # extension. If there is demand we should open this up so that we can
254
                # support other dois on packages extensions
255
                'package_doi': (
256
                    package['doi'] if package.get('doi_status', False) else None
257
                ),
258
                'authors': get_authors([package]),
259
                'reruns': generate_rerun_urls(
260
                    resource, package, query_doi.query, rounded_version
261
                ),
262
            }
263
        )
264

UNCOV
265
    return toolkit.render('query_dois/single_landing_page.html', context)
×
266

267

268
def get_package_and_resource_info(resource_ids):
1✔
269
    """
270
    Retrieve basic info about the packages and resources from the list of resource ids.
271

272
    :param resource_ids: a list of resource ids
273
    :returns: two dicts, one of package info and one of resource info
274
    """
275
    raction = partial(toolkit.get_action('resource_show'), {})
×
276
    paction = partial(toolkit.get_action('package_show'), {})
×
277

278
    packages = {}
×
279
    resources = {}
×
NEW
280
    inaccessible_resources = []
×
281
    for resource_id in resource_ids:
×
NEW
282
        try:
×
NEW
283
            resource = raction(dict(id=resource_id))
×
NEW
284
        except (toolkit.ObjectNotFound, toolkit.NotAuthorized):
×
NEW
285
            inaccessible_resources.append(resource_id)
×
NEW
286
            continue
×
287
        package_id = resource['package_id']
×
288
        resources[resource_id] = {
×
289
            'name': resource['name'],
290
            'package_id': package_id,
291
        }
292
        if package_id not in packages:
×
293
            package = paction(dict(id=package_id))
×
294
            packages[package_id] = {
×
295
                'title': package['title'],
296
                'name': package['name'],
297
                'resource_ids': [],
298
            }
299
        packages[package_id]['resource_ids'].append(resource_id)
×
300

NEW
301
    return packages, resources, inaccessible_resources
×
302

303

304
def create_current_slug(query_doi: QueryDOI, ignore_resources=None) -> str:
1✔
305
    """
306
    Creates a slug for the given query DOI at the current version, this is done with a
307
    nav slug which has no version.
308

309
    :param query_doi: the QueryDOI
310
    :param ignore_resources: a list of resource IDs to ignore
311
    :returns: a slug
312
    """
NEW
313
    resource_ids = query_doi.get_resource_ids()
×
NEW
314
    if ignore_resources:
×
NEW
315
        resource_ids = [r for r in resource_ids if r not in ignore_resources]
×
316

UNCOV
317
    slug_data_dict = {
×
318
        'query': query_doi.query,
319
        'query_version': query_doi.query_version,
320
        'resource_ids': resource_ids,
321
        'nav_slug': True,
322
    }
323
    current_slug = toolkit.get_action('vds_slug_create')({}, slug_data_dict)
×
324
    return current_slug['slug']
×
325

326

327
def render_multisearch_doi_page(query_doi: QueryDOI):
1✔
328
    """
329
    Renders a DOI landing page for a datastore_multisearch based query DOI.
330

331
    :param query_doi: the query DOI
332
    :returns: the rendered page
333
    """
NEW
334
    packages, resources, inaccessible_resources = get_package_and_resource_info(
×
335
        query_doi.get_resource_ids()
336
    )
NEW
337
    inaccessible_count = len(inaccessible_resources)
×
338

339
    # usage stats
NEW
340
    downloads, saves, last_download_timestamp = get_stats(query_doi)
×
NEW
341
    usage_stats = {
×
342
        'downloads': downloads,
343
        'saves': saves,
344
        'last_download_timestamp': last_download_timestamp,
345
    }
346

347
    # current details
NEW
348
    sorted_resource_counts = sorted(
×
349
        [(k, v) for k, v in query_doi.resource_counts.items() if k in resources],
350
        key=operator.itemgetter(1),
351
        reverse=True,
352
    )
NEW
353
    current_details = {
×
354
        'resource_count': len(resources),
355
        'package_count': len(packages),
356
        'sorted_resource_counts': sorted_resource_counts,
357
        'record_count': query_doi.count
358
        if inaccessible_count == 0
359
        else sum([v for k, v in sorted_resource_counts]),
360
    }
361

362
    # saved details
NEW
363
    if inaccessible_count == 0:
×
NEW
364
        saved_details = {
×
365
            'resource_count': len(resources),
366
            'record_count': query_doi.count,
367
            'missing_resources': 0,
368
            'missing_records': 0,
369
        }
370
    else:
NEW
371
        saved_details = {
×
372
            'resource_count': len(query_doi.resource_counts),
373
            'record_count': query_doi.count,
374
            'missing_resources': inaccessible_count,
375
            'missing_records': query_doi.count - current_details['record_count'],
376
        }
377

378
    # warnings
NEW
379
    warnings = []
×
NEW
380
    if len(resources) == 0:
×
NEW
381
        current_slug = None
×
NEW
382
        warnings = [
×
383
            toolkit._(
384
                'All resources associated with this search have been deleted, moved, '
385
                'or are no longer available.'
386
            )
387
        ]
388
    else:
NEW
389
        current_slug = create_current_slug(
×
390
            query_doi, ignore_resources=inaccessible_resources
391
        )
NEW
392
        if inaccessible_count > 0:
×
NEW
393
            warnings.append(
×
394
                toolkit._(
395
                    'Some resources have been deleted, moved, or are no longer '
396
                    'available. Affected resources: '
397
                )
398
                + str(inaccessible_count)
399
            )
400

NEW
401
    context = {
×
402
        'query_doi': query_doi,
403
        'original_slug': query_doi.doi,
404
        'current_slug': current_slug,
405
        'usage_stats': usage_stats,
406
        'resources': resources,
407
        'packages': packages,
408
        'details': current_details,
409
        'saved_details': saved_details,
410
        'has_changed': inaccessible_count > 0,
411
        'is_inaccessible': len(resources) == 0,
412
        'warnings': warnings,
413
    }
414
    return toolkit.render('query_dois/multisearch_landing_page.html', context)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc