• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NaturalHistoryMuseum / ckanext-graph / #142

21 May 2025 10:51PM UTC coverage: 9.17% (-0.4%) from 9.607%
#142

push

coveralls-python

web-flow
merge: #37 from josh/update_new_vds

Make graphs work again

1 of 6 new or added lines in 2 files covered. (16.67%)

21 of 229 relevant lines covered (9.17%)

0.09 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/ckanext/graph/db.py
1
#!/usr/bin/env python
2
# encoding: utf-8
3
#
4
# This file is part of ckanext-graph
5
# Created by the Natural History Museum in London, UK
6

7
from abc import abstractmethod, abstractproperty
×
8

9
from ckan.plugins import toolkit
×
10

11
from ckanext.graph.lib import utils
×
12

13

14
class Query(object):
×
15
    """
16
    A base class for retrieving stats from the datastore.
17

18
    Subclass to implement different backend retrieval methods.
19
    """
20

21
    def __init__(self, date_field=None, date_interval=None, count_field=None):
×
22
        """
23
        Construct a new Query object. Use EITHER date args OR count args. Using both
24
        will fail.
25

26
        :param date_field: the name of the field to use for dates
27
        :param date_interval: the length of time between date groupings, e.g. day, month
28
        :param count_field: the name of the field to use for categories
29
        """
30
        if date_field is not None:
×
31
            assert count_field is None
×
32
        self.resource_id = toolkit.c.resource['id']
×
33
        self.filters = utils.get_request_filters()
×
34
        self.q = utils.get_request_query()
×
35
        self.date_field = date_field
×
36
        self.date_interval = date_interval or 'day'
×
37
        self.count_field = count_field
×
38
        self._is_date_query = date_field is not None
×
39

40
    @property
×
41
    def query(self):
×
42
        """
43
        Returns the appropriate query text to send to the datastore backend.
44

45
        :returns: the date query or count query
46
        """
47
        if self._is_date_query:
×
48
            return self._date_query
×
49
        else:
50
            return self._count_query
×
51

52
    @abstractproperty
×
53
    def _date_query(self):
×
54
        """
55
        A query for retrieving results grouped by the date in date_field (in
56
        chronological order, where the interval is date_interval).
57

58
        :returns: a query ready to submit to the backend
59
        """
60
        return ''
×
61

62
    @abstractproperty
×
63
    def _count_query(self):
×
64
        """
65
        A query for retrieving results grouped by the categories in count_field.
66

67
        :returns: a query ready to submit to the backend
68
        """
69
        return ''
×
70

71
    @abstractmethod
×
72
    def run(self):
×
73
        """
74
        Submits the query to the backend and processes the results into the format
75
        [(key, count)].
76

77
        :returns: a list of (key,count) tuples
78
        """
79
        pass
×
80

81
    @classmethod
×
82
    def new(cls, *args, **kwargs):
×
83
        backend_type = toolkit.config.get('ckanext.graph.backend')
×
84
        queries = {'elasticsearch': ElasticSearchQuery, 'sql': SqlQuery}
×
85
        query_class = queries.get(backend_type, ElasticSearchQuery)
×
86
        return query_class(*args, **kwargs)
×
87

88

89
class ElasticSearchQuery(Query):
×
90
    def __init__(self, *args, **kwargs):
×
91
        super(ElasticSearchQuery, self).__init__(*args, **kwargs)
×
92
        self._bucket_name = 'query_buckets'
×
93
        self._aggregated_name = 'agg_buckets'
×
94

95
    def _nest(self, *query_stack):
×
96
        """
97
        Helper method for nesting multiple dicts inside each other (nested stacks can
98
        get quite deep in elastic search queries).
99

100
        :param query_stack: the items to nest, in descending order (i.e. the first item
101
            will be the outermost key)
102
        :returns: a dict of nested items
103
        """
104
        nested = query_stack[-1]
×
105
        for i in query_stack[-2::-1]:
×
106
            nested = {i: nested}
×
107
        return nested
×
108

109
    @property
×
110
    def _filter_stack(self):
×
111
        """
112
        Create the subquery for filtering records (mostly from URL parameters, but date
113
        graphs also require that the date field is not null).
114

115
        :returns: a dict of filter items
116
        """
117
        if self._is_date_query:
×
118
            filters = [self._nest('exists', 'field', f'data.{self.date_field}')]
×
119
        else:
120
            filters = []
×
121

122
        if self.q is not None:
×
123
            filters.append(self._nest('query_string', 'query', self.q))
×
124

125
        def _make_filter_term(filter_field, filter_value):
×
126
            if isinstance(filter_value, list):
×
127
                if len(filter_value) == 1:
×
128
                    return _make_filter_term(filter_field, filter_value[0])
×
129
                else:
130
                    terms = [
×
131
                        _make_filter_term(filter_field, sub_value)
132
                        for sub_value in filter_value
133
                    ]
134
                    return self._nest('bool', 'should', terms)
×
135
            else:
136
                filter_dict = {f'data.{filter_field}': filter_value}
×
137
                return {'term': filter_dict}
×
138

139
        for f, v in self.filters.items():
×
140
            filters.append(_make_filter_term(f, v))
×
141

142
        filter_stack = self._nest('filter', 'bool', 'must', filters)
×
143

144
        return filter_stack
×
145

146
    @property
×
147
    def _date_query(self):
×
148
        field_type = utils.get_datastore_field_types()[self.date_field]
×
149

150
        if field_type == 'date':
×
NEW
151
            histogram_options = {'field': f'data.{self.date_field}._d'}
×
152
        else:
153
            script = f"""try {{
×
154
              def parser = new SimpleDateFormat(\'yyyy-MM-dd\');
155
              def dt = parser.parse(doc[\'data.{self.date_field}\'].value);
156
              return dt.getTime();
157
             }} catch (Exception e) {{
158
              return false;
159
             }}"""
160
            histogram_options = {'script': script}
×
161

NEW
162
        histogram_options['calendar_interval'] = self.date_interval
×
163

164
        select_stack = self._nest(
×
165
            'aggs', self._bucket_name, 'date_histogram', histogram_options
166
        )
167

168
        select_stack.update(self._filter_stack)
×
169

170
        query_stack = self._nest('aggs', self._aggregated_name, select_stack)
×
171

172
        return query_stack
×
173

174
    @property
×
175
    def _count_query(self):
×
176
        agg_options = {
×
177
            'field': f'data.{self.count_field}',
178
            'missing': toolkit._('Empty'),
179
        }
180

181
        query_stack = self._nest('aggs', self._bucket_name, 'terms', agg_options)
×
182

183
        if len(self.filters) > 0 or self.q is not None:
×
184
            query_stack.update(self._filter_stack)
×
185
            query_stack = self._nest('aggs', self._aggregated_name, query_stack)
×
186

187
        return query_stack
×
188

189
    def run(self):
×
190
        # the vds_multi_direct action is admin only to prevent misuse, but we know what
191
        # we're doing, so skip the auth check
NEW
192
        context = {'ignore_auth': True}
×
NEW
193
        data_dict = {'resource_ids': [self.resource_id], 'search': self.query}
×
NEW
194
        results = toolkit.get_action('vds_multi_direct')(context, data_dict)
×
195
        aggs = results['aggregations']
×
196
        extra_nesting = (
×
197
            self._is_date_query or len(self.filters) > 0 or self.q is not None
198
        )
199
        buckets = (aggs[self._aggregated_name] if extra_nesting else aggs)[
×
200
            self._bucket_name
201
        ]['buckets']
202
        records = [(b['key'], b.get('doc_count', 0)) for b in buckets]
×
203
        return records
×
204

205

206
class SqlQuery(Query):
×
207
    @property
×
208
    def _date_query(self):
×
209
        raise NotImplementedError()
×
210

211
    @property
×
212
    def _count_query(self):
×
213
        raise NotImplementedError()
×
214

215
    def run(self):
×
216
        raise NotImplementedError()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc