• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

DemocracyClub / yournextrepresentative / 74df76c8-4768-48d5-bb7f-5ee50aa05217

06 Nov 2023 01:38PM UTC coverage: 67.523% (-0.3%) from 67.801%
74df76c8-4768-48d5-bb7f-5ee50aa05217

Pull #2177

circleci

VirginiaDooley
Create TextractResults model
Pull Request #2177: Spike: AWS Textract

1640 of 2760 branches covered (0.0%)

Branch coverage included in aggregate %.

12 of 62 new or added lines in 3 files covered. (19.35%)

110 existing lines in 10 files now uncovered.

6662 of 9535 relevant lines covered (69.87%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/ynr/apps/sopn_parsing/management/commands/sopn_tooling_compare_raw_people.py
1
import json
×
2
import os
×
3
from collections import Counter
×
4

5
from bulk_adding.models import RawPeople
×
6
from candidates.models import Ballot
×
7
from django.core.management import call_command
×
8
from official_documents.models import OfficialDocument
×
9
from popolo.models import Membership
×
10
from sopn_parsing.helpers.command_helpers import BaseSOPNParsingCommand
×
11
from sopn_parsing.models import ParsedSOPN
×
12

13

14
class Command(BaseSOPNParsingCommand):
×
15
    CORRECT_EXACTLY = "correct_exactly"
×
16
    NUM_CORRECT_MISSING_PARTIES = "num_correct_some_parties_missing"
×
17
    NUM_INCORRECT = "num_incorrect"
×
18
    ZERO_CANDIDATES = "zero_candidates"
×
19

20
    def add_arguments(self, parser):
×
21
        super().add_arguments(parser)
×
22
        parser.add_argument("--loud", action="store_true", default=False)
×
23

24
    def handle(self, *args, **options):
×
25
        """
26
        - Check we have a baseline file to compare with
27
        - Prepare some OfficialDocuments
28
        - Re-parse the documents
29
        - Loop through the created RawPeople objects, comparing to our baseline
30
        to make sure that we are parsing at least as many people as before
31
        - If no asserts failed, use the data to write a new baseline file
32
        """
33

34
        self.loud = options.pop("loud")
×
35

36
        self.candidates_results = {
×
37
            "correct_exactly": [],
38
            "num_correct_some_parties_missing": [],
39
            "num_incorrect": [],
40
            "zero_candidates": [],
41
        }
42

43
        raw_people_file = "ynr/apps/sopn_parsing/tests/data/sopn_baseline.json"
×
44
        if not os.path.isfile(raw_people_file):
×
45
            call_command("sopn_tooling_write_baseline")
×
46
            self.stdout.write("Baseline file didn't exist so one was created")
×
47

48
        options.update({"testing": True})
×
49

50
        OfficialDocument.objects.update(relevant_pages="")
×
51
        call_command("sopn_parsing_extract_page_numbers", *args, **options)
×
52
        ParsedSOPN.objects.all().delete()
×
53
        call_command("sopn_parsing_extract_tables", *args, **options)
×
54
        RawPeople.objects.all().delete()
×
55
        call_command("sopn_parsing_parse_tables", *args, **options)
×
56

UNCOV
57
        with open(raw_people_file) as file:
×
58
            old_raw_people = json.loads(file.read())
×
59

UNCOV
60
        self.new_raw_people = {}
×
61
        for ballot in Ballot.objects.exclude(officialdocument__isnull=True):
×
62
            ballot_data = old_raw_people.get(ballot.ballot_paper_id, {})
×
63

UNCOV
64
            self.compare_relevant_pages(ballot=ballot, ballot_data=ballot_data)
×
65

UNCOV
66
            self.compare_raw_people(ballot=ballot, ballot_data=ballot_data)
×
67

68
        # display some overall totals
UNCOV
69
        self.stdout.write(
×
70
            "Old total 'people' parsed WAS {old}\n"
71
            "New total 'people' parsed IS {new}".format(
72
                old=self.count_people_parsed(old_raw_people),
73
                new=self.count_people_parsed(self.new_raw_people),
74
            )
75
        )
76

UNCOV
77
        old_raw_people_obj_count = len(
×
78
            {k: v for k, v in old_raw_people.items() if v["raw_people"]}
79
        )
UNCOV
80
        new_raw_people_obj_count = RawPeople.objects.count()
×
81
        style = self.style.SUCCESS
×
82
        if new_raw_people_obj_count < old_raw_people_obj_count:
×
83
            style = self.style.ERROR
×
84
        self.stdout.write(
×
85
            style(
86
                f"Old RawPeople count: {old_raw_people_obj_count}\n"
87
                f"New total RawPeople count: {new_raw_people_obj_count}"
88
            )
89
        )
90

UNCOV
91
        for result, ballots in self.candidates_results.items():
×
92
            total = len(ballots)
×
93
            self.stdout.write(f"{total} ballots parsed {result}")
×
94
            # Write a new baseline
UNCOV
95
        call_command("sopn_tooling_write_baseline")
×
96

UNCOV
97
    def compare_relevant_pages(self, ballot, ballot_data):
×
98
        old_relevant_pages = ballot_data.get("relevant_pages", "")
×
99
        new_relevant_pages = ballot.sopn.relevant_pages
×
100

UNCOV
101
        if old_relevant_pages != new_relevant_pages:
×
102
            self.stdout.write(
×
103
                self.style.WARNING(
104
                    f"RELEVANT PAGES CHANGED FROM {old_relevant_pages} to {new_relevant_pages} for {ballot.ballot_paper_id}"
105
                )
106
            )
107

UNCOV
108
    def compare_raw_people(self, ballot, ballot_data):
×
109
        try:
×
110
            raw_people = ballot.rawpeople.data
×
111
        except RawPeople.DoesNotExist:
×
112
            raw_people = []
×
113

UNCOV
114
        old_raw_people_for_ballot = ballot_data.get("raw_people", [])
×
115
        old_count = len(old_raw_people_for_ballot)
×
116
        new_count = len(raw_people)
×
117
        if new_count < old_count:
×
118
            self.stderr.write(
×
119
                f"Uh oh, parsed people for {ballot.ballot_paper_id} decreased from {old_count} to {new_count}. Stopping."
120
            )
121

UNCOV
122
        if new_count > old_count:
×
123
            self.stdout.write(
×
124
                f"{ballot.ballot_paper_id} increased from {old_count} to {new_count} parsed people.\n"
125
                f"Check the SOPN at https://candidates.democracyclub.org.uk{ballot.get_sopn_url()}."
126
            )
UNCOV
127
            for person in raw_people:
×
128
                if person not in old_raw_people_for_ballot:
×
129
                    self.stdout.write(self.style.SUCCESS(person))
×
130

131
        # when people parsed have changed e.g. different name/different party print it for further checking
UNCOV
132
        changed_people = [
×
133
            person
134
            for person in old_raw_people_for_ballot
135
            if person not in raw_people
136
        ]
UNCOV
137
        if changed_people:
×
138
            self.stdout.write(
×
139
                self.style.WARNING(
140
                    f"Parsed data changed for {ballot.ballot_paper_id}\n"
141
                    f"New raw people data:\n"
142
                    f"{raw_people}\n"
143
                    "Missing people:"
144
                )
145
            )
UNCOV
146
            for person in changed_people:
×
147
                self.stderr.write(str(person))
×
148

UNCOV
149
        self.new_raw_people[ballot.ballot_paper_id] = {"raw_people": raw_people}
×
150

UNCOV
151
        self.parties_correct(ballot, raw_people)
×
152

UNCOV
153
    def count_people_parsed(self, raw_people_data):
×
154
        """
155
        Returns the total number of "people" that were parsed.
156
        NB that just because something was parsed, it doesnt mean that it was
157
        accurately parsed. Therefore this total is best used to look for large
158
        changes that should then be checked in detail.
159
        """
UNCOV
160
        return sum(
×
161
            [len(data["raw_people"]) for data in raw_people_data.values()]
162
        )
163

UNCOV
164
    def parties_correct(self, ballot, raw_people_for_ballot):
×
165
        candidates = Membership.objects.filter(ballot=ballot)
×
166
        if not candidates:
×
167
            self.stdout.write(
×
168
                self.style.WARNING(
169
                    f"We dont have candidates for {ballot.ballot_paper_id}. Try updating with the live site first?"
170
                )
171
            )
172

UNCOV
173
        if not raw_people_for_ballot:
×
174
            self.candidates_results[self.ZERO_CANDIDATES].append(
×
175
                ballot.ballot_paper_id
176
            )
UNCOV
177
            return None
×
178

UNCOV
179
        num_candidates_correct = candidates.count() == len(
×
180
            raw_people_for_ballot
181
        )
182

UNCOV
183
        if self.loud:
×
184
            if num_candidates_correct:
×
185
                self.stdout.write(
×
186
                    self.style.SUCCESS(
187
                        f"Correct number of people parsed as expected for {ballot.ballot_paper_id}"
188
                    )
189
                )
190
            else:
UNCOV
191
                self.stdout.write(
×
192
                    self.style.ERROR(
193
                        f"Incorrect number of people parsed for {ballot.ballot_paper_id}"
194
                    )
195
                )
196

UNCOV
197
        parsed = sorted(
×
198
            [person["party_id"] for person in raw_people_for_ballot]
199
        )
UNCOV
200
        expected = list(
×
201
            candidates.values_list("party__ec_id", flat=True).order_by(
202
                "party__ec_id"
203
            )
204
        )
205

UNCOV
206
        if parsed == expected:
×
207
            return self.candidates_results[self.CORRECT_EXACTLY].append(
×
208
                ballot.ballot_paper_id
209
            )
210

211
        # count number of each missing party ID as there could be more than one
212
        # missing candidate for a party e.g. 1 missing Green, 2 missing independents
UNCOV
213
        parsed = Counter(parsed)
×
214
        expected = Counter(expected)
×
215
        missing = expected - parsed
×
216
        if missing:
×
217
            total = sum(missing.values())
×
218
            self.stderr.write(
×
219
                f"{total} MISSING parties for {ballot.ballot_paper_id} (party_id:num_missing)\n{missing}"
220
            )
221
        else:
222
            # sometimes we incorrectly parse extra people - often independents
223
            # due to an empty row
UNCOV
224
            extras = parsed - expected
×
225
            total = sum(extras.values())
×
226
            self.stderr.write(
×
227
                f"{total} EXTRA parties for {ballot.ballot_paper_id}\n{extras}"
228
            )
229

UNCOV
230
        if num_candidates_correct:
×
231
            return self.candidates_results[
×
232
                self.NUM_CORRECT_MISSING_PARTIES
233
            ].append(ballot.ballot_paper_id)
234

UNCOV
235
        return self.candidates_results[self.NUM_INCORRECT].append(
×
236
            ballot.ballot_paper_id
237
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc