• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rero / rero-mef / 16621609190

30 Jul 2025 11:43AM UTC coverage: 84.491% (+0.008%) from 84.483%
16621609190

push

github

rerowep
chore: update dependencies

Co-Authored-by: Peter Weber <peter.weber@rero.ch>

4560 of 5397 relevant lines covered (84.49%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.63
/rero_mef/marctojson/do_gnd_places.py
1
# RERO MEF
2
# Copyright (C) 2024 RERO
3
#
4
# This program is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU Affero General Public License as published by
6
# the Free Software Foundation, version 3 of the License.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU Affero General Public License for more details.
12
#
13
# You should have received a copy of the GNU Affero General Public License
14
# along with this program. If not, see <http://www.gnu.org/licenses/>.
15

16
"""Marctojsons transformer for GND records."""
17
# https://www.dnb.de/EN/Professionell/Metadatendienste/Datenbezug/GND_Aenderungsdienst/gndAenderungsdienst_node.html
18

19
import contextlib
1✔
20
from datetime import datetime, timezone
1✔
21

22
from rero_mef.marctojson.helper import (
1✔
23
    build_string_from_field,
24
    build_string_list_from_fields,
25
    get_source_and_id,
26
)
27

28
RECORD_TYPES = {
1✔
29
    "p": "bf:Person",
30
    "b": "bf:Organisation",
31
    "f": "bf:Organisation",
32
    "g": "bf:Place",
33
    "s": "bf:Topic",
34
    "u": "bf:Title",
35
}
36

37

38
class Transformation:
1✔
39
    """Transformation MARC21 to JSON for GND autority place."""
40

41
    def __init__(self, marc, logger=None, verbose=False, transform=True):
1✔
42
        """Constructor."""
43
        self.marc = marc
1✔
44
        self.logger = logger
1✔
45
        self.verbose = verbose
1✔
46
        self.json_dict = {}
1✔
47
        if transform:
1✔
48
            self._transform()
×
49

50
    def get_type(self):
1✔
51
        """Get type of record.
52

53
        Entitäten der GND (Satztypen) 075 $b TYPE $2 gndgen
54
        - b Körperschaft
55
        - f Konferenz
56
        - g Geografikum
57
        - n Person (nicht individualisiert)
58
        - p Person (individualisiert)
59
        - s Sachbegriff
60
        - u Werk
61
        """
62
        for field_075 in self.marc.get_fields("075") or []:
×
63
            if field_075.get("2") and field_075["2"] == "gndgen":
×
64
                return RECORD_TYPES.get(field_075["b"])
×
65
        return None
×
66

67
    def _transform(self):
1✔
68
        """Call the transformation functions."""
69
        record_type = self.get_type()
×
70
        if record_type in {"bf:Place"}:
×
71
            for func in dir(self):
×
72
                if func.startswith("trans"):
×
73
                    func = getattr(self, func)
×
74
                    func()
×
75
        else:
76
            msg = f"Not a place: {record_type}"
×
77
            if self.logger and self.verbose:
×
78
                self.logger.warning(f"NO TRANSFORMATION: {msg}")
×
79
            self.json_dict = {"NO TRANSFORMATION": msg}
×
80
            self.trans_gnd_pid()
×
81

82
    @property
1✔
83
    def json(self):
1✔
84
        """Json data."""
85
        return self.json_dict or None
1✔
86

87
    def trans_gnd_deleted(self):
1✔
88
        """Transformation deleted leader 5.
89

90
        $c: Redirect notification
91
        $x: Redirect
92
        $c: Deletion notification
93
        $d: Deletion
94

95
        https://www.dnb.de/EN/Professionell/Metadatendienste/Datenbezug/
96
        GND_Aenderungsdienst/gndAenderungsdienst_node.html
97
        """
98
        if self.logger and self.verbose:
1✔
99
            self.logger.info("Call Function", "trans_gnd_deleted")
1✔
100
        if self.marc.leader[5] in ["c", "d", "x"]:
1✔
101
            self.json_dict["deleted"] = datetime.now(timezone.utc).isoformat()
1✔
102

103
    def trans_gnd_pid(self):
1✔
104
        """Transformation pid from field 001."""
105
        if self.logger and self.verbose:
1✔
106
            self.logger.info("Call Function", "trans_gnd_pid")
1✔
107
        if field_001 := self.marc.get_fields("001"):
1✔
108
            self.json_dict["pid"] = field_001[0].data
1✔
109
            self.json_dict["type"] = "bf:Place"
1✔
110

111
    def trans_gnd_identifier(self):
1✔
112
        """Transformation identifier from field 024, 035."""
113
        if self.logger and self.verbose:
1✔
114
            self.logger.info("Call Function", "trans_gnd_identifier")
1✔
115
        fields_024 = self.marc.get_fields("024")
1✔
116
        for field_024 in fields_024:
1✔
117
            subfield_0 = field_024.get("0")
1✔
118
            if isinstance(subfield_0, list):
1✔
119
                subfield_0 = subfield_0[0]
×
120
            subfield_2 = field_024.get("2")
1✔
121
            if isinstance(subfield_2, list):
1✔
122
                subfield_2 = subfield_2[0]
×
123
            if subfield_0 and subfield_2:
1✔
124
                self.json_dict.setdefault("identifiedBy", []).append(
1✔
125
                    {
126
                        "source": subfield_2.upper(),
127
                        "type": "uri",
128
                        "value": subfield_0,
129
                    }
130
                )
131
        for field_035 in self.marc.get_fields("035"):
1✔
132
            if field_035.get("a"):
1✔
133
                subfield_a = field_035["a"]
1✔
134
                if subfield_a.startswith(("(DE-101)", "(DE-588)")):
1✔
135
                    self.json_dict.setdefault("identifiedBy", []).append(
1✔
136
                        {
137
                            "source": "GND",
138
                            "type": "bf:Nbn",
139
                            "value": subfield_a,
140
                        }
141
                    )
142

143
    def trans_gnd_authorized_access_point(self):
1✔
144
        """Transformation authorized_access_point 151."""
145
        if self.logger and self.verbose:
1✔
146
            self.logger.info("Call Function", "trans_gnd_authorized_access_point")
1✔
147
        tag = "151"
1✔
148
        subfields = {"a": ", ", "g": " , ", "x": " - ", "z": " - "}
1✔
149
        tag_grouping = [
1✔
150
            {
151
                "subtags": "g",
152
                "start": " (",
153
                "end": ")",
154
                "delimiter": "",
155
                "subdelimiter": ", ",
156
            }
157
        ]
158
        try:
1✔
159
            if authorized_ap := build_string_from_field(
1✔
160
                field=self.marc[tag], subfields=subfields, tag_grouping=tag_grouping
161
            ):
162
                self.json_dict["authorized_access_point"] = authorized_ap
1✔
163
        except Exception:
×
164
            self.json_dict["authorized_access_point"] = f"TAG: {tag} NOT FOUND"
×
165

166
    def trans_gnd_variant_access_point(self):
1✔
167
        """Transformation variant_access_point 451."""
168
        if self.logger and self.verbose:
1✔
169
            self.logger.info("Call Function", "trans_gnd_variant_access_point")
1✔
170
        tag = "451"
1✔
171
        subfields = {"a": ", ", "g": " , "}
1✔
172
        tag_grouping = [
1✔
173
            {
174
                "subtags": "g",
175
                "start": " (",
176
                "end": ")",
177
                "delimiter": "",
178
                "subdelimiter": ", ",
179
            }
180
        ]
181
        if variant_access_point := build_string_list_from_fields(
1✔
182
            record=self.marc, tag=tag, subfields=subfields, tag_grouping=tag_grouping
183
        ):
184
            self.json_dict["variant_access_point"] = variant_access_point
1✔
185

186
    def trans_gnd_relation(self):
1✔
187
        """Transformation relation pids 682 $0 551.
188

189
        https://www.dnb.de/EN/Professionell/Metadatendienste/Datenbezug/
190
        GND_Aenderungsdienst/gndAenderungsdienst_node.html
191
        """
192
        if self.logger and self.verbose:
1✔
193
            self.logger.info("Call Function", "trans_gnd_relation")
1✔
194
        fields_682 = self.marc.get_fields("682")
1✔
195
        for field_682 in fields_682:
1✔
196
            if field_682.get("i") and field_682["i"] == "Umlenkung":
1✔
197
                subfields_0 = field_682.get_subfields("0")
1✔
198
                for subfield_0 in subfields_0:
1✔
199
                    if subfield_0.startswith("(DE-101)"):
1✔
200
                        self.json_dict["relation_pid"] = {
1✔
201
                            "value": subfield_0.replace("(DE-101)", ""),
202
                            "type": "redirect_to",
203
                        }
204
        relations = {}
1✔
205
        for field_551 in self.marc.get_fields("551"):
1✔
206
            authorized_aps = set()
1✔
207
            with contextlib.suppress(Exception):
1✔
208
                relation_type = "related"
1✔
209
                if subfield_4 := field_551.get("4"):
1✔
210
                    if subfield_4 in ["geoa", "geow", "nach", "obpa", "orta"]:
1✔
211
                        relation_type = "broader"
1✔
212
                    elif subfield_4[0] in ["vorg"]:
×
213
                        relation_type = "narrower"
×
214

215
                subfields = {"a": ", ", "g": " , "}
1✔
216
                tag_grouping = [
1✔
217
                    {
218
                        "subtags": "g",
219
                        "start": " (",
220
                        "end": ")",
221
                        "delimiter": "",
222
                        "subdelimiter": ", ",
223
                    }
224
                ]
225
                if authorized_ap := build_string_from_field(
1✔
226
                    field=field_551, subfields=subfields, tag_grouping=tag_grouping
227
                ):
228
                    relations.setdefault(relation_type, [])
1✔
229
                    if authorized_ap not in authorized_aps:
1✔
230
                        authorized_aps.add(authorized_ap)
1✔
231
                        relations[relation_type].append(
1✔
232
                            {"authorized_access_point": authorized_ap}
233
                        )
234
        for relation, value in relations.items():
1✔
235
            if value:
1✔
236
                self.json_dict[relation] = value
1✔
237

238
    def trans_gnd_classification(self):
1✔
239
        """Transformation classification from field 686."""
240
        if self.logger and self.verbose:
×
241
            self.logger.info("Call Function", "trans_gnd_classification")
×
242
        # TODO: find classification
243

244
    def trans_gnd_match(self):
1✔
245
        """Transformation closeMatch and exactMatch field 751."""
246
        if self.logger and self.verbose:
1✔
247
            self.logger.info("Call Function", "trans_gnd_match")
1✔
248
        for field_751 in self.marc.get_fields("751"):
1✔
249
            with contextlib.suppress(Exception):
1✔
250
                match_type = None
1✔
251
                subfield_i = field_751["i"]
1✔
252
                if subfield_i == "Aequivalenz":
1✔
253
                    match_type = "closeMatch"
×
254
                elif subfield_i == "exakte Aequivalenz":
1✔
255
                    match_type = "exactMatch"
1✔
256
                if match_type:
1✔
257
                    subfields = {"a": ", ", "g": " , ", "x": " - ", "z": " - "}
1✔
258
                    tag_grouping = [
1✔
259
                        {
260
                            "subtags": "g",
261
                            "start": " (",
262
                            "end": ")",
263
                            "delimiter": "",
264
                            "subdelimiter": ", ",
265
                        }
266
                    ]
267
                    if authorized_ap := build_string_from_field(
1✔
268
                        field=field_751, subfields=subfields, tag_grouping=tag_grouping
269
                    ):
270
                        match_data = {
1✔
271
                            "authorized_access_point": authorized_ap,
272
                            "source": "GND",
273
                        }
274
                        identified_by = []
1✔
275
                        other_source = None
1✔
276
                        for subfield_0 in field_751.get_subfields("0"):
1✔
277
                            if subfield_0.startswith("http"):
1✔
278
                                identified_by.insert(
×
279
                                    0,
280
                                    {
281
                                        "type": "uri",
282
                                        "value": subfield_0,
283
                                    },
284
                                )
285
                                if other_source:
×
286
                                    identified_by[0]["source"] = other_source
×
287
                            else:
288
                                source, id_ = get_source_and_id(subfield_0)
1✔
289
                                if source:
1✔
290
                                    insert_pos = -1
1✔
291
                                    if source != "GND":
1✔
292
                                        other_source = source
1✔
293
                                        match_data["source"] = other_source
1✔
294
                                        insert_pos = 0
1✔
295
                                    identified_by.insert(
1✔
296
                                        insert_pos,
297
                                        {
298
                                            "source": source,
299
                                            "type": "bf:Nbn",
300
                                            "value": id_,
301
                                        },
302
                                    )
303
                        if identified_by:
1✔
304
                            match_data["identifiedBy"] = identified_by
1✔
305
                        self.json_dict.setdefault(match_type, []).append(match_data)
1✔
306

307
    def trans_gnd_note(self):
1✔
308
        """Transformation notes from field.
309

310
        677 $a: general
311
        678 $a: general
312
        670 $a - $u: dataSource
313
        680 $a: general
314
        """
315
        if self.logger and self.verbose:
1✔
316
            self.logger.info("Call Function", "trans_gnd_note")
1✔
317
        notes = {
1✔
318
            "dataSource": [],
319
            "dataNotFound": [],
320
            "general": [],
321
            "seeReference": [],
322
        }
323
        for field in self.marc.get_fields("677", "678", "680"):
1✔
324
            if field.get("a"):
1✔
325
                notes["general"].append(field["a"].strip())
×
326
            if field.get("b"):
1✔
327
                notes["general"].append(field["b"].strip())
1✔
328
        for field in self.marc.get_fields("670"):
1✔
329
            if field.get("a") and field.get("u"):
1✔
330
                fields_u = field.get("u")
1✔
331
                if isinstance(fields_u, str):
1✔
332
                    fields_u = [fields_u]
1✔
333
                info = f"{field['a'].strip()} - {', '.join(fields_u)}"
1✔
334
                notes["dataSource"].append(info)
1✔
335
        for note, value in notes.items():
1✔
336
            if value:
1✔
337
                self.json_dict.setdefault("note", [])
1✔
338
                self.json_dict["note"].append({"noteType": note, "label": value})
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc