• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rero / sonar / 17425918180

03 Sep 2025 07:11AM UTC coverage: 95.796% (-0.6%) from 96.378%
17425918180

push

github

PascalRepond
translations: extract messages

Co-Authored-by: Pascal Repond <pascal.repond@rero.ch>

7816 of 8159 relevant lines covered (95.8%)

0.96 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.76
/sonar/modules/documents/dojson/rerodoc/model.py
1
# Swiss Open Access Repository
2
# Copyright (C) 2021 RERO
3
#
4
# This program is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU Affero General Public License as published by
6
# the Free Software Foundation, version 3 of the License.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU Affero General Public License for more details.
12
#
13
# You should have received a copy of the GNU Affero General Public License
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
15

16
"""RERODOC MARC21 model definition."""
17

18
import hashlib
1✔
19
import re
1✔
20

21
from dojson import utils
1✔
22
from flask import current_app
1✔
23
from invenio_db import db
1✔
24

25
from sonar.modules.collections.api import Record as CollectionRecord
1✔
26
from sonar.modules.documents.dojson.rerodoc.overdo import Overdo
1✔
27
from sonar.modules.organisations.api import OrganisationRecord
1✔
28
from sonar.modules.subdivisions.api import Record as SubdivisionRecord
1✔
29
from sonar.modules.subdivisions.api import RecordSearch
1✔
30
from sonar.modules.utils import remove_trailing_punctuation
1✔
31

32
overdo = Overdo()
1✔
33

34
TYPE_MAPPINGS = {
1✔
35
    "PREPRINT|": "coar:c_816b",
36
    "POSTPRINT|ART_JOURNAL": "coar:c_6501",
37
    "POSTPRINT|ART_INBOOK": "coar:c_3248",
38
    "POSTPRINT|ART_INPROC": "coar:c_5794",
39
    "BOOK|": "coar:c_2f33",
40
    "DISSERTATION|DISS_MASTER": "coar:c_bdcc",
41
    "DISSERTATION|DISS_BACHELOR": "coar:c_7a1f",
42
    "DISSERTATION|DISS_CONT_EDU": "coar:c_46ec",
43
    "THESIS|": "coar:c_db06",
44
    "THESIS|TH_PHD": "coar:c_db06",
45
    "THESIS|TH_HABILIT": "coar:c_46ec",
46
    "MAP|": "coar:c_12cc",
47
    "REPORT|": "coar:c_18ws",
48
    "NEWSPAPER|": "coar:c_2fe3",
49
    "JOURNAL|": "coar:c_0640",
50
    "PRINT_MEDIA|": "coar:c_2fe3",
51
    "AUDIO|": "coar:c_18cc",
52
    "IMAGE|": "coar:c_ecc8",
53
    "PARTITION|": "coar:c_18cw",
54
}
55

56

57
@overdo.over("type", "^980")
1✔
58
@utils.ignore_value
1✔
59
def marc21_to_type_and_organisation(self, key, value):
1✔
60
    """Get document type and organisation from 980 field."""
61
    subdivision_name = None
1✔
62

63
    # organisation
64
    if value.get("b"):
1✔
65
        organisation = value.get("b").lower()
1✔
66

67
        # Specific transformation for `unisi`, because the real acronym is
68
        # `usi`.
69
        if organisation == "unisi":
1✔
70
            organisation = "usi"
1✔
71

72
        # Specific transformation for `hep bejune`, in order to fill out
73
        # custom fields `Filière` (`customField1`) and `Titre obtenu`
74
        # (`customField2`)
75
        if organisation == "hepbejune" and value.get("f"):
1✔
76
            document_subtype = value.get("f").lower()
×
77
            custom_field1 = ""
×
78
            custom_field2 = ""
×
79
            if document_subtype == "diss_bachelor":
×
80
                custom_field1 = "Enseignement primaire"
×
81
                custom_field2 = "Bachelor of Arts in Pre-Primary and Primary Education"
×
82
            elif document_subtype == "diss_master":
×
83
                custom_field1 = "Enseignement secondaire"
×
84
                custom_field2 = "Master of Arts or of Science in Secondary Education"
×
85
            if custom_field1:
×
86
                self["customField1"] = [custom_field1]
×
87
            if custom_field2:
×
88
                self["customField2"] = [custom_field2]
×
89

90
        # Specific transformation for `hepfr`, which should be imported as
91
        # a faculty AND a subdivision of FOLIA/unifr
92
        if organisation == "hepfr":
1✔
93
            organisation = "unifr"
×
94
            self["organisation"] = [{"$ref": OrganisationRecord.get_ref_link("organisations", organisation)}]
×
95
            # `hepfr` is a faculty of FOLIA/unifr
96
            self["customField1"] = ["HEP|PH FR"]
×
97
            # `hepfr` is a subdivision of FOLIA/unifr
98
            subdivision_name = "HEP Fribourg"
×
99
            # Store subdivision
100
            # TODO: avoid possible clashes between subdivision
101
            # names in different languages
102
            result = (
×
103
                RecordSearch()
104
                .filter("term", organisation__pid=organisation)
105
                .filter("term", name__value__raw=subdivision_name)
106
                .source(includes="pid")
107
                .scan()
108
            )
109
            subdivision_pid = next(result).pid
×
110
            # If the subdivision exists, assign it to the record
111
            if subdivision_pid:
×
112
                self["subdivisions"] = [{"$ref": SubdivisionRecord.get_ref_link("subdivisions", subdivision_pid)}]
×
113

114
        # Specific transformation for `bpuge` and `mhnge`, because the real
115
        # acronym is `vge`.
116
        subdivision_name = None
1✔
117

118
        if organisation in ["bpuge", "mhnge", "baage", "bmuge", "imvge", "mhsge"]:
1✔
119
            subdivision_name = "bge" if organisation == "bpuge" else organisation
1✔
120
            organisation = "vge"
1✔
121

122
        if organisation not in overdo.registererd_organisations:
1✔
123
            overdo.create_organisation(organisation)
1✔
124
            overdo.registererd_organisations.append(organisation)
1✔
125

126
        self["organisation"] = [{"$ref": OrganisationRecord.get_ref_link("organisations", organisation)}]
1✔
127

128
        if subdivision_name:
1✔
129
            # Store subdivision
130
            hash_key = hashlib.md5((subdivision_name + organisation).encode()).hexdigest()
1✔
131

132
            subdivision_pid = SubdivisionRecord.get_pid_by_hash_key(hash_key)
1✔
133

134
            # No subdivision found
135
            if not subdivision_pid:
1✔
136
                subdivision = SubdivisionRecord.create(
1✔
137
                    {
138
                        "name": [{"language": "eng", "value": subdivision_name}],
139
                        "organisation": {"$ref": OrganisationRecord.get_ref_link("organisations", organisation)},
140
                        "hashKey": hash_key,
141
                    }
142
                )
143
                subdivision.commit()
1✔
144
                subdivision.reindex()
1✔
145
                db.session.commit()
1✔
146
                subdivision_pid = subdivision["pid"]
1✔
147

148
            self["subdivisions"] = [{"$ref": SubdivisionRecord.get_ref_link("subdivisions", subdivision_pid)}]
1✔
149

150
    # get doc type by mapping
151
    key = value.get("a", "") + "|" + value.get("f", "")
1✔
152
    if key not in TYPE_MAPPINGS:
1✔
153
        current_app.logger.warning(f'Document type not found in mapping for type "{key}"')
1✔
154
        return
1✔
155

156
    # Store types to records
157
    self["documentType"] = TYPE_MAPPINGS[key]
1✔
158

159
    return
1✔
160

161

162
@overdo.over("language", "^041")
1✔
163
@utils.for_each_value
1✔
164
@utils.ignore_value
1✔
165
def marc21_to_language(self, key, value):
1✔
166
    """Get languages."""
167
    if not value.get("a"):
1✔
168
        return
1✔
169

170
    language = self.get("language", [])
1✔
171

172
    for code in utils.force_list(value.get("a")):
1✔
173
        language.append({"type": "bf:Language", "value": code})
1✔
174

175
    self["language"] = language
1✔
176

177
    return
1✔
178

179

180
@overdo.over("title", "^245..")
1✔
181
@utils.for_each_value
1✔
182
@utils.ignore_value
1✔
183
def marc21_to_title_245(self, key, value):
1✔
184
    """Get title."""
185
    main_title = value.get("a")
1✔
186
    language = value.get("9", "eng")
1✔
187
    subtitle = value.get("b")
1✔
188

189
    if not main_title:
1✔
190
        return None
1✔
191

192
    title = {
1✔
193
        "type": "bf:Title",
194
        "mainTitle": [{"value": main_title.rstrip(":"), "language": language}],
195
    }
196

197
    if subtitle:
1✔
198
        title["subtitle"] = [{"value": subtitle, "language": language}]
1✔
199

200
    return title
1✔
201

202

203
@overdo.over("title", "^246..")
1✔
204
@utils.for_each_value
1✔
205
@utils.ignore_value
1✔
206
def marc21_to_title_246(self, key, value):
1✔
207
    """Get title."""
208
    main_title = value.get("a")
1✔
209
    language = value.get("9", "eng")
1✔
210

211
    if not main_title:
1✔
212
        return
1✔
213

214
    title = self.get("title", [{"type": "bf:Title", "mainTitle": []}])
1✔
215

216
    # Add title 246 to last title in mainTitle propert
217
    title[-1]["mainTitle"].append({"value": main_title, "language": language})
1✔
218

219
    self["title"] = title
1✔
220

221
    return
1✔
222

223

224
@overdo.over("editionStatement", "^250..")
1✔
225
@utils.ignore_value
1✔
226
def marc21_to_edition_statement(self, key, value):
1✔
227
    """Get edition statement data."""
228
    if not value.get("a") or not value.get("b"):
1✔
229
        return None
1✔
230

231
    return {
1✔
232
        "editionDesignation": {"value": value.get("a")},
233
        "responsibility": {"value": value.get("b")},
234
    }
235

236

237
@overdo.over("provisionActivity", "^260..")
1✔
238
@utils.for_each_value
1✔
239
@utils.ignore_value
1✔
240
def marc21_to_provision_activity_field_260(self, key, value):
1✔
241
    """Get provision activity data from field 260."""
242
    provision_activity = self.get("provisionActivity", [])
1✔
243

244
    # Only if there is a date
245
    if value.get("c"):
1✔
246
        publication = {"type": "bf:Publication", "statement": []}
1✔
247

248
        # Place
249
        if value.get("a"):
1✔
250
            publication["statement"].append({"type": "bf:Place", "label": [{"value": value.get("a")}]})
1✔
251

252
        # Agent
253
        if value.get("b"):
1✔
254
            publication["statement"].append(
1✔
255
                {
256
                    "type": "bf:Agent",
257
                    "label": [{"value": remove_trailing_punctuation(value.get("b"))}],
258
                }
259
            )
260

261
        years = value.get("c").split("-")
1✔
262

263
        # Start date
264
        if years and re.match(r"^\d{4}$", years[0]):
1✔
265
            publication["startDate"] = years[0]
1✔
266

267
            publication["statement"].append({"type": "Date", "label": [{"value": value.get("c")}]})
1✔
268

269
        # End date
270
        if len(years) > 1 and re.match(r"^\d{4}$", years[1]):
1✔
271
            publication["endDate"] = years[1]
1✔
272

273
        provision_activity.append(publication)
1✔
274

275
    # Manufacture
276
    if value.get("e") or value.get("f"):
1✔
277
        manufacture = {"type": "bf:Manufacture", "statement": []}
1✔
278

279
        if value.get("e"):
1✔
280
            manufacture["statement"].append(
1✔
281
                {
282
                    "type": "bf:Place",
283
                    "label": [{"value": remove_trailing_punctuation(value.get("e"))}],
284
                }
285
            )
286

287
        if value.get("f"):
1✔
288
            manufacture["statement"].append({"type": "bf:Agent", "label": [{"value": value.get("f")}]})
1✔
289

290
        provision_activity.append(manufacture)
1✔
291

292
    # Re-assign provision activity
293
    if provision_activity:
1✔
294
        self["provisionActivity"] = provision_activity
1✔
295

296
    return
1✔
297

298

299
@overdo.over("provisionActivity", "^269..")
1✔
300
@utils.ignore_value
1✔
301
def marc21_to_provision_activity_field_269(self, key, value):
1✔
302
    """Get provision activity data from field 269."""
303
    # 260$c has priority to this date
304
    if overdo.blob_record.get("260__", {}).get("c"):
1✔
305
        return
1✔
306

307
    # No date, skipping
308
    if not value.get("c"):
1✔
309
        return
1✔
310

311
    # Assign start date
312
    match = re.search(r"^[0-9]{4}(-[0-9]{2}-[0-9]{2})?$", value.get("c"))
1✔
313

314
    # Date does not match "YYYY" or "YYYY-MM-DD"
315
    if not match:
1✔
316
        return
1✔
317

318
    add_provision_activity_start_date(self, value.get("c"))
1✔
319

320
    return
1✔
321

322

323
@overdo.over("formats", "^300..")
1✔
324
@utils.ignore_value
1✔
325
def marc21_to_description(self, key, value):
1✔
326
    """Get extent, otherMaterialCharacteristics, formats.
327

328
    extent: 300$a (the first one if many)
329
    otherMaterialCharacteristics: 300$b (the first one if many)
330
    formats: 300 [$c repetitive]
331
    """
332
    if value.get("a") and not self.get("extent"):
1✔
333
        self["extent"] = remove_trailing_punctuation(overdo.not_repetitive(value, "a"))
1✔
334

335
    if value.get("b") and self.get("otherMaterialCharacteristics", []) == []:
1✔
336
        self["otherMaterialCharacteristics"] = remove_trailing_punctuation(overdo.not_repetitive(value, "b"))
1✔
337

338
    if value.get("c"):
1✔
339
        formats = self.get("formats")
1✔
340

341
        if not formats:
1✔
342
            data = value.get("c")
1✔
343
            formats = list(utils.force_list(data))
1✔
344

345
        return formats
1✔
346

347
    return None
1✔
348

349

350
@overdo.over("series", "^490..")
1✔
351
@utils.for_each_value
1✔
352
@utils.ignore_value
1✔
353
def marc21_to_series(self, key, value):
1✔
354
    """Get series.
355

356
    series.name: [490$a repetitive]
357
    series.number: [490$v repetitive]
358
    """
359
    series = {}
1✔
360

361
    name = value.get("a")
1✔
362
    if name:
1✔
363
        series["name"] = ", ".join(utils.force_list(name))
1✔
364

365
    number = value.get("v")
1✔
366
    if number:
1✔
367
        series["number"] = ", ".join(utils.force_list(number))
1✔
368

369
    return series
1✔
370

371

372
@overdo.over("abstracts", "^520..")
1✔
373
@utils.for_each_value
1✔
374
@utils.ignore_value
1✔
375
def marc21_to_abstract(self, key, value):
1✔
376
    """Get abstract."""
377
    abstract = value.get("a")
1✔
378
    language = value.get("9", "eng")
1✔
379

380
    if not abstract:
1✔
381
        return
1✔
382

383
    if language == "fr":
1✔
384
        language = "fre"
1✔
385

386
    abstracts_data = self.get("abstracts", [])
1✔
387
    abstracts_data.append({"value": abstract, "language": language})
1✔
388

389
    self["abstracts"] = abstracts_data
1✔
390

391
    return
1✔
392

393

394
@overdo.over("identifiedBy", "001")
1✔
395
@utils.ignore_value
1✔
396
def marc21_to_identified_by_from_001(self, key, value):
1✔
397
    """Get identifier from field 001."""
398
    identified_by = self.get("identifiedBy", [])
1✔
399

400
    identified_by.append({"type": "bf:Local", "source": "RERO DOC", "value": value})
1✔
401

402
    return identified_by
1✔
403

404

405
@overdo.over("identifiedBy", "^020..")
1✔
406
@utils.ignore_value
1✔
407
def marc21_to_identified_by_from_020(self, key, value):
1✔
408
    """Get identifier from field 020."""
409
    identified_by = self.get("identifiedBy", [])
1✔
410

411
    if not value.get("a"):
1✔
412
        return None
1✔
413

414
    identified_by.append({"type": "bf:Isbn", "value": value.get("a")})
1✔
415

416
    return identified_by
1✔
417

418

419
@overdo.over("identifiedBy", "^0247.")
1✔
420
@utils.ignore_value
1✔
421
def marc21_to_identified_by_from_024(self, key, value):
1✔
422
    """Get identifier from field 024."""
423
    identified_by = self.get("identifiedBy", [])
1✔
424

425
    if not value.get("a") or value.get("2") != "urn":
1✔
426
        return None
1✔
427

428
    identified_by.append({"type": "bf:Urn", "value": value.get("a")})
1✔
429

430
    return identified_by
1✔
431

432

433
@overdo.over("identifiedBy", "^027..")
1✔
434
@utils.ignore_value
1✔
435
def marc21_to_identified_by_from_027(self, key, value):
1✔
436
    """Get identifier from field 027."""
437
    identified_by = self.get("identifiedBy", [])
1✔
438

439
    if not value.get("a"):
1✔
440
        return None
1✔
441

442
    identified_by.append({"type": "bf:Strn", "value": value.get("a")})
1✔
443

444
    return identified_by
1✔
445

446

447
@overdo.over("identifiedBy", "^035..")
1✔
448
@utils.ignore_value
1✔
449
def marc21_to_identified_by_from_035(self, key, value):
1✔
450
    """Get identifier from field 035."""
451
    identified_by = self.get("identifiedBy", [])
1✔
452

453
    if not value.get("a"):
1✔
454
        return None
1✔
455

456
    identified_by.append({"type": "bf:Local", "source": "RERO", "value": value.get("a")})
1✔
457

458
    return identified_by
1✔
459

460

461
@overdo.over("identifiedBy", "^037..")
1✔
462
@utils.ignore_value
1✔
463
def marc21_to_identified_by_from_037(self, key, value):
1✔
464
    """Get identifier from field 037."""
465
    identified_by = self.get("identifiedBy", [])
1✔
466

467
    if not value.get("a"):
1✔
468
        return None
1✔
469

470
    identified_by.append(
1✔
471
        {
472
            "type": "bf:Local",
473
            "source": "Swissbib",
474
            "value": value.get("a").replace("swissbib.ch:", "").strip(),
475
        }
476
    )
477

478
    return identified_by
1✔
479

480

481
@overdo.over("identifiedBy", "^088..")
1✔
482
@utils.ignore_value
1✔
483
def marc21_to_identified_by_from_088(self, key, value):
1✔
484
    """Get identifier from field 088."""
485
    identified_by = self.get("identifiedBy", [])
1✔
486

487
    if not value.get("a"):
1✔
488
        return None
1✔
489

490
    identified_by.append({"type": "bf:ReportNumber", "value": value.get("a")})
1✔
491

492
    return identified_by
1✔
493

494

495
@overdo.over("identifiedBy", "^091..")
1✔
496
@utils.ignore_value
1✔
497
def marc21_to_identified_by_from_091(self, key, value):
1✔
498
    """Get identifier from field 091."""
499
    identified_by = self.get("identifiedBy", [])
1✔
500

501
    if not value.get("a") or value.get("b") != "pmid":
1✔
502
        return None
1✔
503

504
    identified_by.append({"type": "bf:Local", "value": value.get("a"), "source": "PMID"})
1✔
505

506
    return identified_by
1✔
507

508

509
@overdo.over("notes", "^500..")
1✔
510
@utils.for_each_value
1✔
511
@utils.ignore_value
1✔
512
def marc21_to_notes(self, key, value):
1✔
513
    """Get  notes."""
514
    return overdo.not_repetitive(value, "a")
1✔
515

516

517
@overdo.over("subjects", "^600..|695..")
1✔
518
@utils.for_each_value
1✔
519
@utils.ignore_value
1✔
520
def marc21_to_subjects(self, key, value):
1✔
521
    """Get subjects."""
522
    if not value.get("a"):
1✔
523
        return None
1✔
524

525
    subjects = {"label": {"value": [item for item in value.get("a").split(" ; ") if item]}}
1✔
526

527
    # If field is 695 and no language is available
528
    if key == "695__":
1✔
529
        if not value.get("9"):
1✔
530
            return None
1✔
531

532
        subjects["label"]["language"] = value.get("9")
1✔
533

534
    # If field is 600 and no source is available
535
    if key == "600__":
1✔
536
        if not value.get("2"):
1✔
537
            return None
1✔
538

539
        subjects["source"] = value.get("2")
1✔
540

541
    return subjects
1✔
542

543

544
@overdo.over("files", "^856..")
1✔
545
@utils.for_each_value
1✔
546
@utils.ignore_value
1✔
547
def marc21_to_files(self, key, value):
1✔
548
    """Get files."""
549
    key = value.get("f")
1✔
550
    url = value.get("u")
1✔
551
    mime_type = value.get("q", "text/plain")
1✔
552

553
    if not key or not url:
1✔
554
        return None
1✔
555

556
    # TODO: Check why this type of file exists. Real example with rerodoc ID
557
    # 29085
558
    if mime_type == "pdt/download":
1✔
559
        current_app.logger.warning(
1✔
560
            f"File {key} for record {self['identifiedBy']} has a strange pdt/download mime "
561
            "type, skipping import of file..."
562
        )
563
        return None
1✔
564

565
    url = url.strip()
1✔
566

567
    # Retreive file order
568
    # If order not set we put a value to 99 for easily point theses files
569
    order = 99
1✔
570
    if value.get("y"):
1✔
571
        match = re.search(r"order:([0-9]+)$", value.get("y"))
1✔
572
        if match:
1✔
573
            order = int(match.group(1))
1✔
574

575
    return {
1✔
576
        "key": key,
577
        "url": url,
578
        "label": value.get("z", key),
579
        "type": "file",
580
        "order": order,
581
    }
582

583

584
@overdo.over("otherEdition", "^775..")
1✔
585
@utils.for_each_value
1✔
586
@utils.ignore_value
1✔
587
def marc21_to_other_edition(self, key, value):
1✔
588
    """Get other edition."""
589
    electronic_locator = value.get("o")
1✔
590
    public_note = value.get("g")
1✔
591

592
    if not electronic_locator or not public_note:
1✔
593
        return None
1✔
594

595
    # if the value matches a DOI, apply `identifiedBy[type:bf:Doi]`
596
    matches = re.search(r"(?P<doi>10\.\d{4,9}/[-._;()/:a-zA-Z0-9]+)", value.get("o"))
1✔
597
    if matches and matches.group("doi"):
1✔
598
        identified_by = self.get("identifiedBy", [])
1✔
599
        identified_by.append({"type": "bf:Doi", "value": matches.group("doi")})
1✔
600
        self["identifiedBy"] = identified_by
1✔
601
        return None
1✔
602
    return {
1✔
603
        "document": {"electronicLocator": electronic_locator},
604
        "publicNote": public_note,
605
    }
606

607

608
@overdo.over("collections", "^982..")
1✔
609
@utils.for_each_value
1✔
610
@utils.ignore_value
1✔
611
def marc21_to_specific_collection(self, key, value):
1✔
612
    """Extract collection for record."""
613
    if not value.get("a"):
1✔
614
        return None
1✔
615

616
    # No organisation found, the collection is not imported.
617
    if not self.get("organisation"):
1✔
618
        return None
1✔
619

620
    organisation_pid = OrganisationRecord.get_pid_by_ref_link(self["organisation"][0]["$ref"])
1✔
621

622
    hash_key = hashlib.md5((value.get("a") + organisation_pid).encode()).hexdigest()
1✔
623

624
    collection_pid = CollectionRecord.get_pid_by_hash_key(hash_key)
1✔
625

626
    # No collection found
627
    if not collection_pid:
1✔
628
        collection = CollectionRecord.create(
1✔
629
            {
630
                "name": [{"language": "eng", "value": value.get("a")}],
631
                "organisation": {"$ref": self["organisation"][0]["$ref"]},
632
                "hashKey": hash_key,
633
            }
634
        )
635
        collection.commit()
1✔
636
        collection.reindex()
1✔
637
        db.session.commit()
1✔
638
        collection_pid = collection["pid"]
1✔
639

640
    return {"$ref": CollectionRecord.get_ref_link("collections", collection_pid)}
1✔
641

642

643
@overdo.over("classification", "^080..")
1✔
644
@utils.for_each_value
1✔
645
@utils.ignore_value
1✔
646
def marc21_to_classification_field_080(self, key, value):
1✔
647
    """Get classification data from field 080."""
648
    if not value.get("a"):
1✔
649
        return None
1✔
650

651
    return {"type": "bf:ClassificationUdc", "classificationPortion": value.get("a")}
1✔
652

653

654
@overdo.over("classification", "^084..")
1✔
655
@utils.for_each_value
1✔
656
@utils.ignore_value
1✔
657
def marc21_to_classification_field_084(self, key, value):
1✔
658
    """Get classification data from field 084."""
659
    if not value.get("a") or value.get("2") != "ddc":
1✔
660
        return None
1✔
661

662
    return {"type": "bf:ClassificationDdc", "classificationPortion": value.get("a")}
1✔
663

664

665
@overdo.over("contentNote", "^505..")
1✔
666
@utils.for_each_value
1✔
667
@utils.ignore_value
1✔
668
def marc21_to_content_note(self, key, value):
1✔
669
    """Extract collection for record."""
670
    return value.get("a")
1✔
671

672

673
@overdo.over("dissertation", "^502..")
1✔
674
@utils.ignore_value
1✔
675
def marc21_to_dissertation_field_502(self, key, value):
1✔
676
    """Extract dissertation degree."""
677
    record = overdo.blob_record
1✔
678
    if value.get("a"):
1✔
679
        dissertation = self.get("dissertation", {})
1✔
680
        dissertation["degree"] = value.get("a")
1✔
681
        self["dissertation"] = dissertation
1✔
682

683
        # try to parse the thesis note more precisely
684
        matches = re.match(
1✔
685
            r"^(?P<degree>[^:]+) : (?P<grantingInstitution>[^,]+) ?[,:] (?P<date>\d{4})( ; .*)?$",
686
            value.get("a"),
687
        )
688
        if matches:
1✔
689
            if matches.group("degree"):
1✔
690
                dissertation["degree"] = matches.group("degree")
1✔
691
            if matches.group("grantingInstitution"):
1✔
692
                dissertation["grantingInstitution"] = matches.group("grantingInstitution")
1✔
693
            if matches.group("date"):
1✔
694
                dissertation["date"] = matches.group("date")
1✔
695

696
            # Specific transformation for `hep bejune`, in order to fill out
697
            # custom fields `Filière` (`customField1`) and `Titre obtenu`
698
            # (`customField2`)
699
            organisation = record.get("980__", {}).get("b")
1✔
700
            if organisation and organisation.lower() == "hepbejune":
1✔
701
                degree = matches.group("degree").lower()
×
702
                custom_field1 = None
×
703
                custom_field2 = None
×
704
                if "mémoire de master spécialisé" in degree:
×
705
                    custom_field1 = "Enseignement spécialisé"
×
706
                    custom_field2 = "Master of Arts in special needs education, orientation enseignement spécialisé"
×
707
                elif "mémoire de master" in degree:
×
708
                    custom_field1 = "Enseignement secondaire"
×
709
                    custom_field2 = "Master of Arts or of Science in Secondary Education"
×
710
                elif "mémoire de bachelor" in degree:
×
711
                    custom_field1 = "Enseignement primaire"
×
712
                    custom_field2 = "Bachelor of Arts in Pre-Primary and Primary Education"
×
713
                if custom_field1:
×
714
                    self["customField1"] = [custom_field1]
×
715
                if custom_field2:
×
716
                    self["customField2"] = [custom_field2]
×
717

718
    # Try to get start date and store in provision activity
719
    # 260$c and 269$c have priority to this date
720
    if record.get("260__", {}).get("c") or record.get("269__", {}).get("c") or record.get("773__", {}).get("g"):
1✔
721
        return
1✔
722

723
    # No date, skipping
724
    if not value.get("9"):
1✔
725
        return
1✔
726

727
    # Match either 2019 or 2019-01-01
728
    match = re.search(r"^[0-9]{4}(-[0-9]{2}-[0-9]{2})?$", value.get("9"))
1✔
729

730
    if not match:
1✔
731
        return
1✔
732

733
    add_provision_activity_start_date(self, value.get("9"))
1✔
734

735
    return
1✔
736

737

738
@overdo.over("dissertation", "^508..")
1✔
739
@utils.ignore_value
1✔
740
def marc21_to_dissertation_field_508(self, key, value):
1✔
741
    """Extract dissertation note."""
742
    if not value.get("a"):
1✔
743
        return
1✔
744

745
    dissertation = self.get("dissertation", {})
1✔
746
    dissertation["jury_note"] = value.get("a")
1✔
747

748
    self["dissertation"] = dissertation
1✔
749

750
    return
1✔
751

752

753
@overdo.over("usageAndAccessPolicy", "^540..")
1✔
754
@utils.ignore_value
1✔
755
def marc21_to_usage_and_access_policy(self, key, value):
1✔
756
    """Extract usage and access policy."""
757
    if not value.get("a"):
1✔
758
        return None
1✔
759

760
    return {"label": value.get("a"), "license": "License undefined"}
1✔
761

762

763
@overdo.over("contribution", "^100..")
1✔
764
@utils.ignore_value
1✔
765
def marc21_to_contribution_field_100(self, key, value):
1✔
766
    """Extract contribution from field 100."""
767
    if not value.get("a"):
1✔
768
        return
1✔
769

770
    contribution = self.get("contribution", [])
1✔
771

772
    data = {
1✔
773
        "agent": {"type": "bf:Person", "preferred_name": value.get("a")},
774
        "role": ["cre"],
775
    }
776

777
    # Affiliation
778
    if value.get("u"):
1✔
779
        data["affiliation"] = value.get("u")
1✔
780

781
    # Date of birth - date of death
782
    date_of_birth, date_of_death = overdo.extract_date(value.get("d"))
1✔
783

784
    if date_of_birth:
1✔
785
        data["agent"]["date_of_birth"] = date_of_birth
1✔
786

787
    if date_of_death:
1✔
788
        data["agent"]["date_of_death"] = date_of_death
1✔
789

790
    contribution.append(data)
1✔
791
    self["contribution"] = contribution
1✔
792

793
    return
1✔
794

795

796
@overdo.over("contribution", "^700..")
1✔
797
@utils.for_each_value
1✔
798
@utils.ignore_value
1✔
799
def marc21_to_contribution_field_700(self, key, value):
1✔
800
    """Extract contribution from field 100."""
801
    if not value.get("a"):
1✔
802
        return
1✔
803

804
    contribution = self.get("contribution", [])
1✔
805

806
    role = overdo.get_contributor_role(value.get("e"))
1✔
807

808
    if not role:
1✔
809
        raise Exception(f"No role found for contributor {value}")
1✔
810

811
    data = {
1✔
812
        "agent": {"type": "bf:Person", "preferred_name": value.get("a")},
813
        "role": [role],
814
    }
815

816
    # Affiliation
817
    if value.get("u"):
1✔
818
        data["affiliation"] = value.get("u")
1✔
819

820
    # Date of birth - date of death
821
    date_of_birth, date_of_death = overdo.extract_date(value.get("d"))
1✔
822

823
    if date_of_birth:
1✔
824
        data["agent"]["date_of_birth"] = date_of_birth
1✔
825

826
    if date_of_death:
1✔
827
        data["agent"]["date_of_death"] = date_of_death
1✔
828

829
    contribution.append(data)
1✔
830
    self["contribution"] = contribution
1✔
831

832
    return
1✔
833

834

835
@overdo.over("contribution", "^710..")
1✔
836
@utils.for_each_value
1✔
837
@utils.ignore_value
1✔
838
def marc21_to_contribution_field_710(self, key, value):
1✔
839
    """Extract contribution from field 710."""
840
    if not value.get("a"):
1✔
841
        return
1✔
842

843
    contribution = self.get("contribution", [])
1✔
844
    contribution.append(
1✔
845
        {
846
            "agent": {"type": "bf:Organization", "preferred_name": value.get("a")},
847
            "role": ["ctb"],
848
        }
849
    )
850
    self["contribution"] = contribution
1✔
851

852
    return
1✔
853

854

855
@overdo.over("contribution", "^711..")
1✔
856
@utils.for_each_value
1✔
857
@utils.ignore_value
1✔
858
def marc21_to_contribution_field_711(self, key, value):
1✔
859
    """Extract contribution from field 711."""
860
    if not value.get("a"):
1✔
861
        return
1✔
862

863
    contribution = self.get("contribution", [])
1✔
864

865
    data = {
1✔
866
        "agent": {"type": "bf:Meeting", "preferred_name": value.get("a")},
867
        "role": ["cre"],
868
    }
869

870
    # Place
871
    if value.get("c"):
1✔
872
        data["agent"]["place"] = value.get("c")
1✔
873

874
    # Date
875
    if value.get("d"):
1✔
876
        data["agent"]["date"] = value.get("d")
1✔
877

878
    # Number
879
    if value.get("n"):
1✔
880
        data["agent"]["number"] = value.get("n")
1✔
881

882
    contribution.append(data)
1✔
883
    self["contribution"] = contribution
1✔
884

885
    return
1✔
886

887

888
@overdo.over("customField1", "^918..")
1✔
889
@utils.ignore_value
1✔
890
def marc21_to_faculty_and_department(self, key, value):
1✔
891
    """Extract faculty and department for UNIFR."""
892
    record = overdo.blob_record
1✔
893
    org = record.get("980__", {}).get("b")
1✔
894
    if org and org == "UNIFR":
1✔
895
        faculty = value.get("a")
1✔
896
        if faculty:
1✔
897
            self["customField1"] = [faculty]
1✔
898
        dep = value.get("c")
1✔
899
        if dep:
1✔
900
            self["customField2"] = [dep]
×
901
    return
1✔
902

903

904
@overdo.over("partOf", "^773..")
1✔
905
@utils.for_each_value
1✔
906
@utils.ignore_value
1✔
907
def marc21_to_part_of(self, key, value):
1✔
908
    """Extract related document for record."""
909
    if not value.get("g"):
1✔
910
        return None
1✔
911

912
    # Split value for getting numbering values
913
    numbering = value.get("g").split("/")
1✔
914

915
    # Numbering year
916
    if not numbering[0]:
1✔
917
        return None
1✔
918

919
    data = {"numberingYear": numbering[0]}
1✔
920

921
    # Volume
922
    if len(numbering) > 1 and numbering[1]:
1✔
923
        data["numberingVolume"] = numbering[1]
1✔
924

925
    # Issue
926
    if len(numbering) > 2 and numbering[2]:
1✔
927
        data["numberingIssue"] = numbering[2]
1✔
928

929
    # Pages
930
    if len(numbering) > 3 and numbering[3] and numbering[3] != "-":
1✔
931
        data["numberingPages"] = numbering[3]
1✔
932

933
    document = {}
1✔
934

935
    # Title is found
936
    if value.get("t"):
1✔
937
        document["title"] = value.get("t")
1✔
938

939
    # Contribution
940
    if value.get("c"):
1✔
941
        document["contribution"] = list(value.get("c").split(";"))
1✔
942

943
    record = overdo.blob_record
1✔
944

945
    # Publication based on document sub type
946
    sub_type = record.get("980__", {}).get("f")
1✔
947
    if value.get("d") or sub_type == "ART_INBOOK":
1✔
948
        document["publication"] = {}
1✔
949

950
        if value.get("d"):
1✔
951
            document["publication"]["statement"] = value.get("d")
1✔
952

953
        if sub_type == "ART_INBOOK":
1✔
954
            document["publication"]["startDate"] = numbering[0]
1✔
955

956
    # If no field 260$c or 269$c, store start date
957
    if not record.get("260__", {}).get("c") and not record.get("269__", {}).get("c"):
1✔
958
        add_provision_activity_start_date(self, numbering[0])
1✔
959

960
    if document:
1✔
961
        data["document"] = document
1✔
962

963
    return data
1✔
964

965

966
def add_provision_activity_start_date(data, date):
1✔
967
    """Add start date for provision activity.
968

969
    :param data: Data dictionary.
970
    :param date: Date to add.
971
    """
972
    provisition_activity = data.get("provisionActivity", [])
1✔
973

974
    def get_publication():
1✔
975
        """Get stored publication."""
976
        for key, item in enumerate(provisition_activity):
1✔
977
            if item["type"] == "bf:Publication":
1✔
978
                return provisition_activity.pop(key)
1✔
979

980
        return {"type": "bf:Publication", "startDate": None}
1✔
981

982
    publication = get_publication()
1✔
983

984
    publication["startDate"] = date
1✔
985

986
    # Inject publiction into provision activity
987
    provisition_activity.append(publication)
1✔
988

989
    # Re-assign provisionActivity
990
    data["provisionActivity"] = provisition_activity
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc