• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

DemocracyClub / UK-Polling-Stations / ffa7ebc6-ed96-4c7e-8309-df57c737cdd9

pending completion
ffa7ebc6-ed96-4c7e-8309-df57c737cdd9

Pull #5663

circleci

GeoWill
fixup! Write ems importer class for the FCS API
Pull Request #5663: WiP FCS api importer

79 of 79 new or added lines in 1 file covered. (100.0%)

3161 of 4399 relevant lines covered (71.86%)

0.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

62.3
/polling_stations/apps/data_importers/ems_importers.py
1
"""
2
Specialised base import classes for handling data exported from
3
popular Electoral Management Software packages
4
"""
5
import abc
1✔
6
import json
1✔
7
import logging
1✔
8
import os
1✔
9
import tempfile
1✔
10
import urllib
1✔
11

12
import requests
1✔
13
from django.core.exceptions import ObjectDoesNotExist
1✔
14
from django.contrib.gis.geos import Point
1✔
15
from django.utils.text import slugify
1✔
16
from addressbase.models import Address
1✔
17
from data_importers.addresshelpers import (
1✔
18
    format_residential_address,
19
    format_polling_station_address,
20
)
21
from data_importers.base_importers import (
1✔
22
    BaseCsvStationsCsvAddressesImporter,
23
    BaseGenericApiImporter,
24
    BaseStationsImporter,
25
    BaseAddressesImporter,
26
)
27
from data_finder.helpers import geocode_point_only, PostcodeError
1✔
28
from uk_geo_utils.helpers import Postcode
1✔
29

30
from data_importers.data_types import StationSet, AddressList
1✔
31

32
"""
1✔
33
We see a lot of CSVs exported from Xpress
34
electoral service software: http://www.xssl.uk/
35
with the addresses and stations in a single CSV file
36

37
There are 2 formats we see:
38
* WebLookup export (hopefully we will start seeing less of these)
39
* DemocracyClub export (hopefully we will start seeing more of these)
40
This is the parent class for both of them.
41
"""
42

43

44
class BaseXpressCsvImporter(BaseCsvStationsCsvAddressesImporter, metaclass=abc.ABCMeta):
1✔
45
    csv_delimiter = ","
1✔
46

47
    # Set this to false in an import script if we want to only set a station
48
    # point based on UPRN or co-ordinates (even if we've got a valid postcode)
49

50
    @property
1✔
51
    @abc.abstractmethod
1✔
52
    def station_postcode_field(self):
1✔
53
        pass
×
54

55
    @property
1✔
56
    @abc.abstractmethod
1✔
57
    def station_address_fields(self):
1✔
58
        pass
×
59

60
    @property
1✔
61
    @abc.abstractmethod
1✔
62
    def station_id_field(self):
1✔
63
        pass
×
64

65
    @property
1✔
66
    @abc.abstractmethod
1✔
67
    def easting_field(self):
1✔
68
        pass
×
69

70
    @property
1✔
71
    @abc.abstractmethod
1✔
72
    def northing_field(self):
1✔
73
        pass
×
74

75
    @property
1✔
76
    def station_uprn_field(self):
1✔
77
        return None
1✔
78

79
    def get_station_hash(self, record):
1✔
80
        return "-".join([getattr(record, self.station_id_field)])
1✔
81

82
    def get_station_address(self, record):
1✔
83
        address = format_polling_station_address(
1✔
84
            [getattr(record, field).strip() for field in self.station_address_fields]
85
        )
86
        return address
1✔
87

88
    def get_station_postcode(self, record):
1✔
89
        return getattr(record, self.station_postcode_field).strip()
1✔
90

91
    def geocode_from_postcode(self, record):
1✔
92
        if not self.allow_station_point_from_postcode:
1✔
93
            return None
1✔
94

95
        postcode = self.get_station_postcode(record)
×
96
        if not postcode:
×
97
            return None
×
98
        try:
×
99
            location_data = geocode_point_only(postcode)
×
100
            return location_data.centroid
×
101
        except PostcodeError:
×
102
            return None
×
103

104
    def geocode_from_uprn(self, record):
1✔
105
        uprn = getattr(record, self.station_uprn_field)
×
106
        uprn = uprn.lstrip("0")
×
107
        ab_rec = Address.objects.get(uprn=uprn)
×
108
        ab_postcode = Postcode(ab_rec.postcode)
×
109
        station_postcode = Postcode(self.get_station_postcode(record))
×
110
        if ab_postcode != station_postcode:
×
111
            ab_address = ab_rec.address
×
112
            rec_address = self.get_station_address(record).replace(os.linesep, ", ")
×
113
            station_id = getattr(record, self.station_id_field)
×
114
            message = "\n".join(
×
115
                [
116
                    "Geocoding with UPRN. Station record postcode does not match addressbase postcode.",
117
                    f"Station address: '{rec_address}, {station_postcode.with_space}' (id: {station_id})",
118
                    f"Addressbase: '{ab_address}, {ab_postcode.with_space}'",
119
                    "SUGGESTION:",
120
                    f"        # '{rec_address}, {station_postcode.with_space}' (id: {station_id})",
121
                    f"        if record.{self.station_id_field} == '{station_id}': record = record._replace({self.station_postcode_field}='{ab_postcode.with_space}')",
122
                ]
123
            )
124
            self.logger.log_message(logging.WARNING, message + "\n")
×
125
        return ab_rec.location
×
126

127
    def get_station_point(self, record):
1✔
128
        location = None
1✔
129

130
        if (
1✔
131
            hasattr(record, self.easting_field)
132
            and hasattr(record, self.northing_field)
133
            and getattr(record, self.easting_field) != "0"
134
            and getattr(record, self.easting_field) != ""
135
            and getattr(record, self.northing_field) != "0"
136
            and getattr(record, self.northing_field) != ""
137
        ):
138
            # if we've got points, use them
139
            location = Point(
×
140
                float(getattr(record, self.easting_field)),
141
                float(getattr(record, self.northing_field)),
142
                srid=27700,
143
            )
144
            self.logger.log_message(
×
145
                logging.INFO,
146
                "using grid reference for station %s",
147
                getattr(record, self.station_id_field),
148
            )
149
        elif (
1✔
150
            self.station_uprn_field and getattr(record, self.station_uprn_field).strip()
151
        ):
152
            # if we have a UPRN, try that
153
            try:
×
154
                location = self.geocode_from_uprn(record)
×
155
                self.logger.log_message(
×
156
                    logging.INFO,
157
                    "using UPRN for station %s",
158
                    getattr(record, self.station_id_field),
159
                )
160
            except ObjectDoesNotExist:
×
161
                # if that fails, fall back to postcode
162
                location = self.geocode_from_postcode(record)
×
163
                self.logger.log_message(
×
164
                    logging.INFO,
165
                    "using postcode for station %s",
166
                    getattr(record, self.station_id_field),
167
                )
168
        else:
169
            # otherwise, geocode using postcode
170
            location = self.geocode_from_postcode(record)
1✔
171
            self.logger.log_message(
1✔
172
                logging.INFO,
173
                "using postcode for station %s",
174
                getattr(record, self.station_id_field),
175
            )
176

177
        return location
1✔
178

179
    def station_record_to_dict(self, record):
1✔
180
        address = self.get_station_address(record)
1✔
181
        location = self.get_station_point(record)
1✔
182
        return {
1✔
183
            "internal_council_id": getattr(record, self.station_id_field).strip(),
184
            "postcode": self.get_station_postcode(record),
185
            "address": address.strip(),
186
            "location": location,
187
        }
188

189

190
"""
1✔
191
Specialised case of BaseCsvStationsCsvAddressesImporter
192
with some sensible presets for processing WebLookup
193
CSVs exported from Xpress
194
"""
195

196

197
class BaseXpressWebLookupCsvImporter(BaseXpressCsvImporter, metaclass=abc.ABCMeta):
1✔
198
    station_postcode_field = "pollingplaceaddress7"
1✔
199
    station_address_fields = [
1✔
200
        "pollingplaceaddress1",
201
        "pollingplaceaddress2",
202
        "pollingplaceaddress3",
203
        "pollingplaceaddress4",
204
        "pollingplaceaddress5",
205
        "pollingplaceaddress6",
206
    ]
207
    station_id_field = "pollingplaceid"
1✔
208
    easting_field = "pollingplaceeasting"
1✔
209
    northing_field = "pollingplacenorthing"
1✔
210
    residential_uprn_field = "uprn"
1✔
211

212
    def address_record_to_dict(self, record):
1✔
213
        if record.postcode.strip() == "":
1✔
214
            return None
1✔
215

216
        if record.propertynumber.strip() == "0" or record.propertynumber.strip() == "":
1✔
217
            address = record.streetname.strip()
1✔
218
        else:
219
            address = "%s %s" % (
1✔
220
                record.propertynumber.strip(),
221
                record.streetname.strip(),
222
            )
223

224
        uprn = getattr(record, self.residential_uprn_field).strip()
1✔
225

226
        return {
1✔
227
            "address": address.strip(),
228
            "postcode": record.postcode.strip(),
229
            "polling_station_id": getattr(record, self.station_id_field).strip(),
230
            "uprn": uprn,
231
        }
232

233

234
"""
1✔
235
Specialised case of BaseCsvStationsCsvAddressesImporter
236
with some sensible presets for processing DemocracyClub
237
CSVs exported from Xpress
238
"""
239

240

241
class BaseXpressDemocracyClubCsvImporter(BaseXpressCsvImporter, metaclass=abc.ABCMeta):
1✔
242
    station_postcode_field = "polling_place_postcode"
1✔
243
    station_address_fields = [
1✔
244
        "polling_place_name",
245
        "polling_place_address_1",
246
        "polling_place_address_2",
247
        "polling_place_address_3",
248
        "polling_place_address_4",
249
    ]
250
    station_id_field = "polling_place_id"
1✔
251
    station_uprn_field = "polling_place_uprn"
1✔
252
    easting_field = "polling_place_easting"
1✔
253
    northing_field = "polling_place_northing"
1✔
254
    residential_uprn_field = "property_urn"
1✔
255

256
    def address_record_to_dict(self, record):
1✔
257
        if record.addressline6.strip() == "":
1✔
258
            return None
1✔
259

260
        address = format_residential_address(
1✔
261
            [
262
                record.addressline1,
263
                record.addressline2,
264
                record.addressline3,
265
                record.addressline4,
266
                record.addressline5,
267
            ]
268
        )
269

270
        uprn = getattr(record, self.residential_uprn_field).strip()
1✔
271

272
        return {
1✔
273
            "address": address.strip(),
274
            "postcode": record.addressline6.strip(),
275
            "polling_station_id": getattr(record, self.station_id_field).strip(),
276
            "uprn": uprn,
277
        }
278

279

280
"""
1✔
281
Sometimes the postcode doesn't appear in a consistent
282
column and we need to work around that
283
"""
284

285

286
class BaseXpressDCCsvInconsistentPostcodesImporter(
1✔
287
    BaseXpressDemocracyClubCsvImporter, metaclass=abc.ABCMeta
288
):
289
    # concat all the address columns together into address
290
    # don't bother trying to split into address/postcode
291
    station_address_fields = [
1✔
292
        "polling_place_name",
293
        "polling_place_address_1",
294
        "polling_place_address_2",
295
        "polling_place_address_3",
296
        "polling_place_address_4",
297
        "polling_place_postcode",
298
    ]
299
    station_postcode_search_fields = [
1✔
300
        "polling_place_postcode",
301
        "polling_place_address_4",
302
        "polling_place_address_3",
303
    ]
304

305
    def station_record_to_dict(self, record):
1✔
306
        address = self.get_station_address(record)
×
307
        location = self.get_station_point(record)
×
308
        return {
×
309
            "internal_council_id": getattr(record, self.station_id_field).strip(),
310
            "postcode": "",  # don't rely on get_station_postcode()
311
            "address": address.strip(),
312
            "location": location,
313
        }
314

315
    def get_station_postcode(self, record):
1✔
316
        # postcode does not appear in a consistent column
317
        # return the contents of the last populated address
318
        # field and we'll attempt to geocode with that
319
        for field in self.station_postcode_search_fields:
×
320
            if getattr(record, field):
×
321
                return getattr(record, field).strip()
×
322
        return None
×
323

324

325
"""
1✔
326
We see a lot of CSVs exported from Halarose
327
electoral service software: https://www.halarose.co.uk/
328
with the addresses and stations in a single CSV file
329

330
This is a specialised case of BaseCsvStationsCsvAddressesImporter
331
with some sensible presets for processing CSVs in this format
332
but we can override them if necessary
333
"""
334

335

336
class BaseHalaroseCsvImporter(
1✔
337
    BaseCsvStationsCsvAddressesImporter, metaclass=abc.ABCMeta
338
):
339
    csv_delimiter = ","
1✔
340
    station_postcode_field = "pollingstationpostcode"
1✔
341
    station_address_fields = [
1✔
342
        "pollingstationname",
343
        "pollingstationaddress_1",
344
        "pollingstationaddress_2",
345
        "pollingstationaddress_3",
346
        "pollingstationaddress_4",
347
        "pollingstationaddress_5",
348
    ]
349
    residential_uprn_field = "uprn"
1✔
350

351
    def get_station_hash(self, record):
1✔
352
        return "-".join(
1✔
353
            [
354
                record.pollingstationnumber.strip(),
355
                slugify(record.pollingstationname.strip())[:90],
356
            ]
357
        )
358

359
    def get_station_address(self, record):
1✔
360
        address = format_polling_station_address(
1✔
361
            [
362
                getattr(record, field).strip()
363
                for field in self.station_address_fields
364
                if getattr(record, field).strip()
365
            ]
366
        )
367
        return address
1✔
368

369
    def get_station_point(self, record):
1✔
370
        if not self.allow_station_point_from_postcode:
1✔
371
            return None
1✔
372

373
        location = None
×
374

375
        # geocode using postcode
376
        postcode = getattr(record, self.station_postcode_field).strip()
×
377
        if postcode == "":
×
378
            return None
×
379

380
        try:
×
381
            location_data = geocode_point_only(postcode)
×
382
            location = location_data.centroid
×
383
        except PostcodeError:
×
384
            location = None
×
385

386
        return location
×
387

388
    def station_record_to_dict(self, record):
1✔
389
        if record.pollingstationnumber.strip() == "n/a":
1✔
390
            return None
1✔
391

392
        address = self.get_station_address(record)
1✔
393
        location = self.get_station_point(record)
1✔
394
        return {
1✔
395
            "internal_council_id": self.get_station_hash(record),
396
            "postcode": getattr(record, self.station_postcode_field).strip(),
397
            "address": address.strip(),
398
            "location": location,
399
        }
400

401
    def get_residential_address(self, record):
1✔
402
        def replace_na(text):
1✔
403
            if text.strip() == "n/a":
1✔
404
                return ""
1✔
405
            return text.strip()
1✔
406

407
        address_line_1 = replace_na(record.housename)
1✔
408
        if replace_na(record.substreetname):
1✔
409
            address_line_2 = (
1✔
410
                replace_na(record.housenumber) + " " + replace_na(record.substreetname)
411
            ).strip()
412
            address_line_3 = (
1✔
413
                replace_na(record.streetnumber) + " " + replace_na(record.streetname)
414
            ).strip()
415
        else:
416
            address_line_2 = (
1✔
417
                replace_na(record.housenumber) + " " + replace_na(record.streetname)
418
            ).strip()
419
            address_line_3 = ""
1✔
420

421
        address = format_residential_address(
1✔
422
            [
423
                address_line_1.strip(),
424
                address_line_2.strip(),
425
                address_line_3.strip(),
426
                replace_na(record.locality),
427
                replace_na(record.town),
428
                replace_na(record.adminarea),
429
            ]
430
        )
431

432
        return address.strip()
1✔
433

434
    def address_record_to_dict(self, record):
1✔
435
        if record.streetname.lower().strip() == "other electors":
1✔
436
            return None
1✔
437
        if record.streetname.lower().strip() == "other voters":
1✔
438
            return None
1✔
439
        if record.streetname.lower().strip() == "other electors address":
1✔
440
            return None
×
441

442
        if record.housepostcode.strip() == "":
1✔
443
            return None
1✔
444

445
        address = self.get_residential_address(record)
1✔
446

447
        if record.pollingstationnumber.strip() == "n/a":
1✔
448
            station_id = ""
1✔
449
        else:
450
            station_id = self.get_station_hash(record)
1✔
451

452
        uprn = getattr(record, self.residential_uprn_field).strip()
1✔
453

454
        return {
1✔
455
            "address": address,
456
            "postcode": record.housepostcode.strip(),
457
            "polling_station_id": station_id,
458
            "uprn": uprn,
459
        }
460

461

462
"""
1✔
463
We see a lot of CSVs exported from Democracy Counts
464
electoral service software: http://www.democracycounts.co.uk/
465
with the addresses and stations in a single CSV file
466

467
This is a specialised case of BaseCsvStationsCsvAddressesImporter
468
with some sensible presets for processing CSVs in this format
469
but we can override them if necessary
470
"""
471

472

473
class BaseDemocracyCountsCsvImporter(
1✔
474
    BaseCsvStationsCsvAddressesImporter, metaclass=abc.ABCMeta
475
):
476
    csv_delimiter = ","
1✔
477
    station_name_field = "placename"
1✔
478
    address_fields = ["add1", "add2", "add3", "add4", "add5", "add6"]
1✔
479
    postcode_field = "postcode"
1✔
480
    station_id_field = "stationcode"
1✔
481
    residential_uprn_field = "uprn"
1✔
482

483
    def address_record_to_dict(self, record):
1✔
484
        if getattr(record, self.postcode_field).strip() == "A1 1AA":
1✔
485
            # this is a dummy record
486
            return None
×
487

488
        if not getattr(record, self.postcode_field).strip():
1✔
489
            return None
1✔
490

491
        address = format_residential_address(
1✔
492
            [getattr(record, field) for field in self.address_fields]
493
        )
494

495
        if "Dummy Record" in address:
1✔
496
            return None
×
497

498
        uprn = getattr(record, self.residential_uprn_field).strip()
1✔
499

500
        return {
1✔
501
            "address": address,
502
            "postcode": getattr(record, self.postcode_field).strip(),
503
            "polling_station_id": getattr(record, self.station_id_field).strip(),
504
            "uprn": uprn,
505
        }
506

507
    def get_station_point(self, record):
1✔
508
        location = None
1✔
509

510
        badvalues = ["", "0", "0.00"]
1✔
511
        if record.xordinate not in badvalues and record.yordinate not in badvalues:
1✔
512
            # if we've got points, use them
513
            location = Point(
×
514
                float(record.xordinate), float(record.yordinate), srid=27700
515
            )
516
        else:
517
            if not self.allow_station_point_from_postcode:
1✔
518
                return None
1✔
519

520
            # otherwise, geocode using postcode
521
            postcode = record.postcode.strip()
×
522
            if postcode == "":
×
523
                return None
×
524

525
            try:
×
526
                location_data = geocode_point_only(postcode)
×
527
                location = location_data.centroid
×
528
            except PostcodeError:
×
529
                location = None
×
530

531
        return location
×
532

533
    def station_record_to_dict(self, record):
1✔
534
        address = format_polling_station_address(
1✔
535
            [getattr(record, self.station_name_field)]
536
            + [getattr(record, field) for field in self.address_fields]
537
        )
538

539
        location = self.get_station_point(record)
1✔
540

541
        return {
1✔
542
            "internal_council_id": getattr(record, self.station_id_field).strip(),
543
            "postcode": getattr(record, self.postcode_field).strip(),
544
            "address": address,
545
            "location": location,
546
        }
547

548

549
class BaseFcsDemocracyClubApiImporter(
1✔
550
    BaseStationsImporter, BaseAddressesImporter, metaclass=abc.ABCMeta
551
):
552
    local_files = False
1✔
553
    addresses_filetype = json
1✔
554
    addresses_name = None
1✔
555
    stations_url = None
1✔
556
    stations_name = None
1✔
557
    srid = 4326
1✔
558
    stations_filetype = "json"
1✔
559
    station_name_field = "name"
1✔
560
    address_fields = [
1✔
561
        "addressLine1",
562
        "addressLine2",
563
        "addressLine3",
564
        "addressLine4",
565
        "addressLine5",
566
    ]
567
    postcode_field = "addressPostCode"
1✔
568
    station_id_field = "id"
1✔
569
    residential_uprn_field = "addressUprn"
1✔
570

571
    def get_api_key(self):
1✔
572
        return os.environ.get(f"FCS_API_KEY_{self.council_id}")
×
573

574
    def pre_import(self):
1✔
575
        raise NotImplementedError
×
576

577
    def import_data(self):
1✔
578
        # Optional step for pre import tasks
579
        try:
×
580
            self.pre_import()
×
581
        except NotImplementedError:
×
582
            pass
×
583

584
        self.stations = StationSet()
×
585
        self.addresses = AddressList(self.logger)
×
586
        self.import_residential_addresses()
×
587
        self.import_polling_stations()
×
588
        self.addresses.check_records()
×
589
        self.addresses.update_uprn_to_council_model()
×
590
        self.stations.save()
×
591

592
    def get_addresses(self):
1✔
593
        with tempfile.NamedTemporaryFile("w") as tmp:
×
594
            response = requests.get(
×
595
                self.stations_url,
596
                headers={
597
                    "X-API-KEY": self.get_api_key(),
598
                    "User-Agent": "Scraper/DemocracyClub",
599
                    "Accept": "*/*",
600
                },
601
                verify=False,
602
            )
603
            addresses = []
×
604
            stations = response.json()
×
605
            for station in stations:
×
606
                for property in station["properties"]:
×
607
                    property[self.station_id_field] = station[self.station_id_field]
×
608
                addresses += station["properties"]
×
609
            # Was getting JsonDecodeError when using json.dump(addresses,tmp)
610
            tmp.write(json.dumps(addresses))
×
611
            return self.get_data(self.stations_filetype, tmp.name)
×
612

613
    def address_record_to_dict(self, record):
1✔
614
        if not record.get(self.postcode_field).strip():
×
615
            return None
×
616

617
        address = format_residential_address(
×
618
            [record.get(field) for field in self.address_fields]
619
        )
620

621
        uprn = str(record.get(self.residential_uprn_field))
×
622

623
        return {
×
624
            "address": address,
625
            "postcode": record.get(self.postcode_field).strip(),
626
            "polling_station_id": str(record.get(self.station_id_field)),
627
            "uprn": uprn,
628
        }
629

630
    def get_stations(self):
1✔
631
        with tempfile.NamedTemporaryFile() as tmp:
×
632
            response = requests.get(
×
633
                self.stations_url,
634
                headers={
635
                    "X-API-KEY": self.get_api_key(),
636
                    "User-Agent": "Scraper/DemocracyClub",
637
                    "Accept": "*/*",
638
                },
639
                verify=False,
640
            )
641

642
            tmp.write(response.content)
×
643
            return self.get_data(self.stations_filetype, tmp.name)
×
644

645
    def get_station_point(self, record):
1✔
646
        pass
×
647
        location = None
×
648
        badvalues = ["", 0, None]
×
649
        if record["latitude"] not in badvalues and record["longitude"] not in badvalues:
×
650
            # if we've got points, use them
651
            location = Point(
×
652
                float(record["latitude"]), float(record["longitude"]), srid=self.srid
653
            )
654
            return location
×
655
        if not self.allow_station_point_from_postcode:
×
656
            return None
×
657

658
        # otherwise, geocode using postcode
659
        postcode = record[self.postcode_field].strip()
×
660
        if postcode == "":
×
661
            return None
×
662

663
        try:
×
664
            location_data = geocode_point_only(postcode)
×
665
            location = location_data.centroid
×
666
        except PostcodeError:
×
667
            location = None
×
668

669
        return location
×
670

671
    def station_record_to_dict(self, record):
1✔
672
        address = format_polling_station_address(
×
673
            [record.get(self.station_name_field)]
674
            + [record.get(field) for field in self.address_fields]
675
        )
676

677
        location = self.get_station_point(record)
×
678

679
        return {
×
680
            "internal_council_id": record.get(self.station_id_field),
681
            "postcode": record.get(self.postcode_field).strip(),
682
            "address": address,
683
            "location": location,
684
        }
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc