Coveralls logob
Coveralls logo
  • Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

open-contracting / extension_registry.py / 52

31 Oct 2018 - 20:52 coverage: 85.612% (-10.09%) from 95.703%
52

Pull #12

travis-ci

9181eb84f9c35729a3bad740fb7f9d93?size=18&default=identiconweb-flow
Move build_profile method
Pull Request #12: Add ProfileBuilder

127 of 176 new or added lines in 5 files covered. (72.16%)

357 of 417 relevant lines covered (85.61%)

0.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.91
/ocdsextensionregistry/profile_builder.py
1
import csv
1×
2
import json
1×
3
import logging
1×
4
import os
1×
5
import re
1×
6
from collections import OrderedDict
1×
7
from io import BytesIO, StringIO
1×
8
from zipfile import ZipFile
1×
9
from urllib.parse import urljoin
1×
10

11
import json_merge_patch
1×
12
import requests
1×
13

14
from .codelist import Codelist
1×
15
from .extension_registry import ExtensionRegistry
1×
16
from .util import json_loads
1×
17

18
logger = logging.getLogger('ocdsextensionregistry')
1×
19

20

21
class ProfileBuilder:
1×
22
    def __init__(self, standard_tag, extension_versions, registry_base_url=None, schema_base_url=None):
1×
23
        """
24
        Accepts an OCDS version and a dictionary of extension identifiers and versions, and initializes a reader of the
25
        extension registry.
26
        """
27
        self.standard_tag = standard_tag
1×
28
        self.extension_versions = extension_versions
1×
29
        self._file_cache = {}
1×
30
        self.schema_base_url = schema_base_url
1×
31

32
        # Allows setting the registry URL to e.g. a pull request, when working on a profile.
33
        if not registry_base_url:
1×
34
            registry_base_url = 'https://raw.githubusercontent.com/open-contracting/extension_registry/master/'
1×
35

36
        self.registry = ExtensionRegistry(registry_base_url + 'extension_versions.csv')
1×
37

38
    def extensions(self):
1×
39
        """
40
        Returns the matching extension versions from the registry.
41
        """
42
        for identifier, version in self.extension_versions.items():
1×
43
            yield self.registry.get(id=identifier, version=version)
1×
44

45
    def release_schema_patch(self):
1×
46
        """
47
        Returns the consolidated release schema patch.
48
        """
49
        profile_patch = OrderedDict()
1×
50

51
        # Replaces `null` with sentinel values, to preserve the null'ing of fields by extensions in the final patch.
52
        for extension in self.extensions():
1×
53
            data = re.sub(r':\s*null\b', ': "REPLACE_WITH_NULL"', extension.remote('release-schema.json'))
1×
54
            json_merge_patch.merge(profile_patch, json_loads(data))
1×
55

56
        return json_loads(json.dumps(profile_patch).replace('"REPLACE_WITH_NULL"', 'null'))
1×
57

58
    def patched_release_schema(self):
1×
59
        """
60
        Returns the patched release schema.
61
        """
62
        content = self.get_standard_file_contents('release-schema.json')
1×
63
        patched = json_merge_patch.merge(json_loads(content), self.release_schema_patch())
1×
64
        if self.schema_base_url:
1×
NEW
65
            patched['id'] = urljoin(self.schema_base_url, 'release-schema.json')
!
66

67
        return patched
1×
68

69
    def release_package_schema(self):
1×
70
        """
71
        Returns a release package schema. If `schema_base_url` was provided, updates schema URLs.
72
        """
NEW
73
        data = json_loads(self.get_standard_file_contents('release-package-schema.json'))
!
74

NEW
75
        if self.schema_base_url:
!
NEW
76
            data['id'] = urljoin(self.schema_base_url, 'release-package-schema.json')
!
NEW
77
            data['properties']['releases']['items']['$ref'] = urljoin(self.schema_base_url, 'release-schema.json')
!
78

NEW
79
        return data
!
80

81
    def standard_codelists(self):
1×
82
        """
83
        Returns the standard's codelists as Codelist objects.
84
        """
85
        codelists = OrderedDict()
1×
86

87
        # Populate the file cache.
88
        self.get_standard_file_contents('release-schema.json')
1×
89

90
        # This method shouldn't need to know about `_file_cache`.
91
        for path, content in self._file_cache.items():
1×
92
            name = os.path.basename(path)
1×
93
            if 'codelists' in path.split(os.sep) and name:
1×
94
                codelists[name] = Codelist(name)
1×
95
                codelists[name].extend(csv.DictReader(StringIO(content)), 'OCDS Core')
1×
96

97
        return list(codelists.values())
1×
98

99
    def extension_codelists(self):
1×
100
        """
101
        Returns the extensions' codelists as Codelist objects.
102

103
        The extensions' codelists may be new, or may add codes to (+name.csv), remove codes from (-name.csv) or replace
104
        (name.csv) the codelists of the standard or other extensions.
105

106
        Codelist additions and removals are merged across extensions. If new codelists or codelist replacements differ
107
        across extensions, an error is raised.
108
        """
109
        codelists = OrderedDict()
1×
110

111
        # Keep the original content of codelists, to compare across extensions.
112
        originals = {}
1×
113

114
        for extension in self.extensions():
1×
115
            # We use the "codelists" field in extension.json (which standard-maintenance-scripts validates). An
116
            # extension is not guaranteed to offer a download URL, which is the only other way to get codelists.
117
            for name in extension.metadata.get('codelists', []):
1×
118
                content = extension.remote('codelists/' + name)
1×
119

120
                if name not in codelists:
1×
121
                    codelists[name] = Codelist(name)
1×
122
                    originals[name] = content
1×
123
                elif not codelists[name].patch:
1×
124
                    assert originals[name] == content, 'codelist {} differs across extensions'.format(name)
1×
NEW
125
                    continue
!
126

127
                codelists[name].extend(csv.DictReader(StringIO(content)), extension.metadata['name']['en'])
1×
128

129
        # If a codelist replacement (name.csv) is consistent with additions (+name.csv) and removals (-name.csv), the
130
        # latter should be removed. In other words, the expectations are that:
131
        #
132
        # * A codelist replacement shouldn't omit added codes.
133
        # * A codelist replacement shouldn't include removed codes.
134
        # * If codes are added after a codelist is replaced, this should result in duplicate codes.
135
        # * If codes are removed after a codelist is replaced, this should result in no change.
136
        #
137
        # If these expectations are not met, an error is raised. As such, profile authors only have to handle cases
138
        # where codelist modifications are inconsistent across extensions.
139
        for codelist in list(codelists.values()):
1×
140
            basename = codelist.basename
1×
141
            if codelist.patch and basename in codelists:
1×
142
                name = codelist.name
1×
143
                codes = codelists[basename].codes
1×
144
                if codelist.addend:
1×
145
                    for row in codelist:
1×
146
                        code = row['Code']
1×
147
                        assert code in codes, '{} added by {}, but not in {}'.format(code, name, basename)
1×
148
                    logger.info('{0} has the codes added by {1} - ignoring {1}'.format(basename, name))
1×
149
                else:
NEW
150
                    for row in codelist:
!
NEW
151
                        code = row['Code']
!
NEW
152
                        assert code not in codes, '{} removed by {}, but in {}'.format(code, name, basename)
!
NEW
153
                    logger.info('{0} has no codes removed by {1} - ignoring {1}'.format(basename, name))
!
154
                del codelists[name]
1×
155

156
        return list(codelists.values())
1×
157

158
    def patched_codelists(self):
1×
159
        """
160
        Returns patched and new codelists as Codelist objects.
161
        """
162
        codelists = OrderedDict()
1×
163

164
        for codelist in self.standard_codelists():
1×
165
            codelists[codelist.name] = codelist
1×
166

167
        for codelist in self.extension_codelists():
1×
168
            if codelist.patch:
1×
169
                basename = codelist.basename
1×
170
                if codelist.addend:
1×
171
                    # Add the rows.
172
                    codelists[basename].rows.extend(codelist.rows)
1×
173
                    # Note that the rows may not all have the same columns, but DictWriter can handle this.
174
                else:
175
                    # Remove the codes. Multiple extensions can remove the same codes.
176
                    removed = codelist.codes
1×
177
                    codelists[basename].rows = [row for row in codelists[basename] if row['Code'] not in removed]
1×
178
            else:
179
                # Set or replace the rows.
180
                codelists[codelist.name] = codelist
1×
181

182
        return list(codelists.values())
1×
183

184
    def get_standard_file_contents(self, basename):
1×
185
        """
186
        Returns the contents of the file within the standard.
187

188
        Downloads the given version of the standard, and caches the contents of files in the schema/ directory.
189
        """
190
        if not self._file_cache:
1×
191
            url = 'https://codeload.github.com/open-contracting/standard/zip/' + self.standard_tag
1×
192
            response = requests.get(url)
1×
193
            response.raise_for_status()
1×
194
            zipfile = ZipFile(BytesIO(response.content))
1×
195
            names = zipfile.namelist()
1×
196
            path = 'standard/schema/'
1×
197
            start = len(names[0] + path)
1×
198
            for name in names[1:]:
1×
199
                if path in name:
1×
200
                    self._file_cache[name[start:]] = zipfile.read(name).decode('utf-8')
1×
201

202
        return self._file_cache[basename]
1×
Troubleshooting · Open an Issue · Sales · Support · ENTERPRISE · CAREERS · STATUS
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2023 Coveralls, Inc