• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rero / rero-mef / 16621609190

30 Jul 2025 11:43AM UTC coverage: 84.491% (+0.008%) from 84.483%
16621609190

push

github

rerowep
chore: update dependencies

Co-Authored-by: Peter Weber <peter.weber@rero.ch>

4560 of 5397 relevant lines covered (84.49%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.9
/rero_mef/marctojson/do_gnd_concepts.py
1
# RERO MEF
2
# Copyright (C) 2024 RERO
3
#
4
# This program is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU Affero General Public License as published by
6
# the Free Software Foundation, version 3 of the License.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU Affero General Public License for more details.
12
#
13
# You should have received a copy of the GNU Affero General Public License
14
# along with this program. If not, see <http://www.gnu.org/licenses/>.
15

16
"""Marctojsons transformer for GND records."""
17
# https://www.dnb.de/EN/Professionell/Metadatendienste/Datenbezug/GND_Aenderungsdienst/gndAenderungsdienst_node.html
18

19
import contextlib
1✔
20
from datetime import datetime, timezone
1✔
21

22
from rero_mef.marctojson.helper import (
1✔
23
    build_string_from_field,
24
    build_string_list_from_fields,
25
    get_source_and_id,
26
)
27

28
RECORD_TYPES = {
1✔
29
    "p": "bf:Person",
30
    "b": "bf:Organisation",
31
    "f": "bf:Organisation",
32
    "g": "bf:Place",
33
    "s": "bf:Topic",
34
    "u": "bf:Title",
35
}
36

37

38
class Transformation:
1✔
39
    """Transformation MARC21 to JSON for GND autority concept."""
40

41
    def __init__(self, marc, logger=None, verbose=False, transform=True):
1✔
42
        """Constructor."""
43
        self.marc = marc
1✔
44
        self.logger = logger
1✔
45
        self.verbose = verbose
1✔
46
        self.json_dict = {}
1✔
47
        if transform:
1✔
48
            self._transform()
×
49

50
    def _transform(self):
1✔
51
        """Call the transformation functions."""
52
        for func in dir(self):
×
53
            if func.startswith("trans"):
×
54
                func = getattr(self, func)
×
55
                func()
×
56

57
    @property
1✔
58
    def json(self):
1✔
59
        """Json data."""
60
        return self.json_dict or None
1✔
61

62
    def trans_gnd_deleted(self):
1✔
63
        """Transformation deleted leader 5.
64

65
        $c: Redirect notification
66
        $x: Redirect
67
        $c: Deletion notification
68
        $d: Deletion
69

70
        https://www.dnb.de/EN/Professionell/Metadatendienste/Datenbezug/
71
        GND_Aenderungsdienst/gndAenderungsdienst_node.html
72
        """
73
        if self.logger and self.verbose:
1✔
74
            self.logger.info("Call Function", "trans_gnd_deleted")
1✔
75
        if self.marc.leader[5] in ["c", "d", "x"]:
1✔
76
            self.json_dict["deleted"] = datetime.now(timezone.utc).isoformat()
1✔
77

78
    def trans_gnd_pid(self):
1✔
79
        """Transformation pid from field 001."""
80
        if self.logger and self.verbose:
1✔
81
            self.logger.info("Call Function", "trans_gnd_pid")
1✔
82
        if field_001 := self.marc.get_fields("001"):
1✔
83
            self.json_dict["pid"] = field_001[0].data
1✔
84
            self.json_dict["type"] = "bf:Topic"
1✔
85

86
    def trans_gnd_identifier(self):
1✔
87
        """Transformation identifier from field 024, 035."""
88
        if self.logger and self.verbose:
1✔
89
            self.logger.info("Call Function", "trans_gnd_identifier")
1✔
90
        fields_024 = self.marc.get_fields("024")
1✔
91
        for field_024 in fields_024:
1✔
92
            subfield_0 = field_024.get("0")
1✔
93
            if isinstance(subfield_0, list):
1✔
94
                subfield_0 = subfield_0[0]
×
95
            subfield_2 = field_024.get("2")
1✔
96
            if isinstance(subfield_2, list):
1✔
97
                subfield_2 = subfield_2[0]
×
98
            if subfield_0 and subfield_2:
1✔
99
                self.json_dict.setdefault("identifiedBy", []).append(
1✔
100
                    {
101
                        "source": subfield_2.upper(),
102
                        "type": "uri",
103
                        "value": subfield_0,
104
                    }
105
                )
106
        for field_035 in self.marc.get_fields("035"):
1✔
107
            if field_035.get("a"):
1✔
108
                subfield_a = field_035["a"]
1✔
109
                if subfield_a.startswith(("(DE-101)", "(DE-588)")):
1✔
110
                    self.json_dict.setdefault("identifiedBy", []).append(
1✔
111
                        {
112
                            "source": "GND",
113
                            "type": "bf:Nbn",
114
                            "value": subfield_a,
115
                        }
116
                    )
117

118
    def trans_gnd_authorized_access_point(self):
1✔
119
        """Transformation authorized_access_point 150."""
120
        if self.logger and self.verbose:
1✔
121
            self.logger.info("Call Function", "trans_gnd_authorized_access_point")
1✔
122
        tag = "150"
1✔
123
        subfields = {"a": ", ", "g": " , ", "x": " - "}
1✔
124
        tag_grouping = [
1✔
125
            {
126
                "subtags": "g",
127
                "start": " (",
128
                "end": ")",
129
                "delimiter": "",
130
                "subdelimiter": ", ",
131
            }
132
        ]
133
        try:
1✔
134
            if authorized_ap := build_string_from_field(
1✔
135
                field=self.marc[tag], subfields=subfields, tag_grouping=tag_grouping
136
            ):
137
                self.json_dict["authorized_access_point"] = authorized_ap
1✔
138
        except Exception:
×
139
            self.json_dict["authorized_access_point"] = f"TAG: {tag} NOT FOUND"
×
140

141
    def trans_gnd_variant_access_point(self):
1✔
142
        """Transformation variant_access_point 450."""
143
        if self.logger and self.verbose:
1✔
144
            self.logger.info("Call Function", "trans_gnd_variant_access_point")
1✔
145
        tag = "450"
1✔
146
        subfields = {"a": ", ", "g": " , "}
1✔
147
        tag_grouping = [
1✔
148
            {
149
                "subtags": "g",
150
                "start": " (",
151
                "end": ")",
152
                "delimiter": "",
153
                "subdelimiter": ", ",
154
            }
155
        ]
156
        if variant_access_point := build_string_list_from_fields(
1✔
157
            record=self.marc, tag=tag, subfields=subfields, tag_grouping=tag_grouping
158
        ):
159
            self.json_dict["variant_access_point"] = variant_access_point
1✔
160

161
    def trans_gnd_relation_pid(self):
1✔
162
        """Transformation relation pids 682 $0."""
163
        if self.logger and self.verbose:
1✔
164
            self.logger.info("Call Function", "trans_gnd_relation")
1✔
165
        fields_682 = self.marc.get_fields("682")
1✔
166
        for field_682 in fields_682:
1✔
167
            if field_682.get("i") and field_682["i"] == "Umlenkung":
1✔
168
                subfields_0 = field_682.get_subfields("0")
1✔
169
                for subfield_0 in subfields_0:
1✔
170
                    if subfield_0.startswith("(DE-101)"):
1✔
171
                        self.json_dict["relation_pid"] = {
1✔
172
                            "value": subfield_0.replace("(DE-101)", ""),
173
                            "type": "redirect_to",
174
                        }
175

176
    def trans_gnd_relation(self):
1✔
177
        """Transformation relation 550."""
178
        if self.logger and self.verbose:
1✔
179
            self.logger.info("Call Function", "trans_gnd_relation")
1✔
180
        relations = {}
1✔
181
        for field_550 in self.marc.get_fields("550"):
1✔
182
            authorized_aps = set()
1✔
183
            with contextlib.suppress(Exception):
1✔
184
                relation_type = "related"
1✔
185
                if subfield_4 := field_550.get("4"):
1✔
186
                    if subfield_4 in ["nach", "obal", "obge", "obin"]:
1✔
187
                        relation_type = "broader"
1✔
188
                    elif subfield_4[0] in ["vorg"]:
1✔
189
                        relation_type = "narrower"
×
190

191
                subfields = {"a": ", ", "g": " , "}
1✔
192
                tag_grouping = [
1✔
193
                    {
194
                        "subtags": "g",
195
                        "start": " (",
196
                        "end": ")",
197
                        "delimiter": "",
198
                        "subdelimiter": ", ",
199
                    }
200
                ]
201
                if authorized_ap := build_string_from_field(
1✔
202
                    field=field_550, subfields=subfields, tag_grouping=tag_grouping
203
                ):
204
                    relations.setdefault(relation_type, [])
1✔
205
                    if authorized_ap not in authorized_aps:
1✔
206
                        authorized_aps.add(authorized_ap)
1✔
207
                        relations[relation_type].append(
1✔
208
                            {"authorized_access_point": authorized_ap}
209
                        )
210
        for relation, value in relations.items():
1✔
211
            if value:
1✔
212
                self.json_dict[relation] = value
1✔
213

214
    def trans_gnd_match(self):
1✔
215
        """Transformation closeMatch and exactMatch from field 750."""
216
        if self.logger and self.verbose:
1✔
217
            self.logger.info("Call Function", "trans_gnd_match")
1✔
218
        for field_750 in self.marc.get_fields("750"):
1✔
219
            with contextlib.suppress(Exception):
1✔
220
                match_type = None
1✔
221
                subfield_i = field_750["i"]
1✔
222
                if subfield_i == "Aequivalenz":
1✔
223
                    match_type = "closeMatch"
1✔
224
                elif subfield_i == "exakte Aequivalenz":
1✔
225
                    match_type = "exactMatch"
1✔
226
                if match_type:
1✔
227
                    subfields = {
1✔
228
                        "a": ", ",
229
                        "g": " , ",
230
                        "x": " - ",
231
                        "y": " - ",
232
                        "z": " - ",
233
                    }
234
                    tag_grouping = [
1✔
235
                        {
236
                            "subtags": "g",
237
                            "start": " (",
238
                            "end": ")",
239
                            "delimiter": "",
240
                            "subdelimiter": ", ",
241
                        }
242
                    ]
243
                    if authorized_ap := build_string_from_field(
1✔
244
                        field=field_750, subfields=subfields, tag_grouping=tag_grouping
245
                    ):
246
                        match_data = {
1✔
247
                            "authorized_access_point": authorized_ap,
248
                            "source": "GND",
249
                        }
250
                        identified_by = []
1✔
251
                        other_source = None
1✔
252
                        for subfield_0 in field_750.get_subfields("0"):
1✔
253
                            if subfield_0.startswith("http"):
1✔
254
                                identified_by.insert(
1✔
255
                                    0,
256
                                    {
257
                                        "type": "uri",
258
                                        "value": subfield_0,
259
                                    },
260
                                )
261
                                if other_source:
1✔
262
                                    identified_by[0]["source"] = other_source
1✔
263
                            else:
264
                                source, id_ = get_source_and_id(subfield_0)
1✔
265
                                if source:
1✔
266
                                    insert_pos = -1
1✔
267
                                    if source != "GND":
1✔
268
                                        other_source = source
1✔
269
                                        match_data["source"] = other_source
1✔
270
                                        insert_pos = 0
1✔
271
                                    identified_by.insert(
1✔
272
                                        insert_pos,
273
                                        {
274
                                            "source": source,
275
                                            "type": "bf:Nbn",
276
                                            "value": id_,
277
                                        },
278
                                    )
279
                        if identified_by:
1✔
280
                            match_data["identifiedBy"] = identified_by
1✔
281
                        self.json_dict.setdefault(match_type, []).append(match_data)
1✔
282

283
    def trans_gnd_note(self):
1✔
284
        """Transformation notes from field.
285

286
        670 $a - $u: dataSource
287
        677 $a: general
288
        678 $a: general
289
        680 $a: general
290
        """
291
        if self.logger and self.verbose:
1✔
292
            self.logger.info("Call Function", "trans_gnd_note")
1✔
293
        notes = {
1✔
294
            "dataSource": [],
295
            "dataNotFound": [],
296
            "general": [],
297
            "seeReference": [],
298
        }
299
        for field in self.marc.get_fields("677", "678", "680"):
1✔
300
            if field.get("a"):
1✔
301
                notes["general"].append(field["a"].strip())
×
302
            if field.get("b"):
1✔
303
                notes["general"].append(field["b"].strip())
1✔
304
        for field in self.marc.get_fields("670"):
1✔
305
            if field.get("a") and field.get("u"):
1✔
306
                fields_u = field.get("u")
1✔
307
                if isinstance(fields_u, str):
1✔
308
                    fields_u = [fields_u]
1✔
309
                info = f"{field['a'].strip()} - {', '.join(fields_u)}"
1✔
310
                notes["dataSource"].append(info)
1✔
311
        for note, value in notes.items():
1✔
312
            if value:
1✔
313
                self.json_dict.setdefault("note", [])
1✔
314
                self.json_dict["note"].append({"noteType": note, "label": value})
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc