• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rero / rero-mef / 16621609190

30 Jul 2025 11:43AM UTC coverage: 84.491% (+0.008%) from 84.483%
16621609190

push

github

rerowep
chore: update dependencies

Co-Authored-by: Peter Weber <peter.weber@rero.ch>

4560 of 5397 relevant lines covered (84.49%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.01
/rero_mef/agents/viaf/api.py
1
# RERO MEF
2
# Copyright (C) 2020 RERO
3
#
4
# This program is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU Affero General Public License as published by
6
# the Free Software Foundation, version 3 of the License.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU Affero General Public License for more details.
12
#
13
# You should have received a copy of the GNU Affero General Public License
14
# along with this program. If not, see <http://www.gnu.org/licenses/>.
15

16
"""API for manipulating VIAF record."""
17

18
from copy import deepcopy
1✔
19

20
import click
1✔
21
import requests
1✔
22
from elasticsearch_dsl.query import Q
1✔
23
from flask import current_app
1✔
24
from invenio_search.api import RecordsSearch
1✔
25

26
from rero_mef.filter import exists_filter
1✔
27
from rero_mef.utils import (
1✔
28
    add_md5,
29
    get_entity_class,
30
    progressbar,
31
    requests_retry_session,
32
)
33

34
from .. import AgentGndRecord, AgentIdrefRecord, AgentMefRecord, AgentReroRecord
1✔
35
from ..api import Action, EntityIndexer, EntityRecord
1✔
36
from .fetchers import viaf_id_fetcher
1✔
37
from .minters import viaf_id_minter
1✔
38
from .models import ViafMetadata
1✔
39
from .providers import ViafProvider
1✔
40

41

42
class AgentViafSearch(RecordsSearch):
1✔
43
    """RecordsSearch."""
44

45
    class Meta:
1✔
46
        """Search only on index."""
47

48
        index = "viaf"
1✔
49
        doc_types = None
1✔
50
        fields = ("*",)
1✔
51
        facets = {}
1✔
52

53
        default_filter = None
1✔
54

55

56
class AgentViafRecord(EntityRecord):
1✔
57
    """VIAF agent class."""
58

59
    minter = viaf_id_minter
1✔
60
    fetcher = viaf_id_fetcher
1✔
61
    provider = ViafProvider
1✔
62
    name = "viaf"
1✔
63
    model_cls = ViafMetadata
1✔
64
    search = AgentViafSearch
1✔
65
    # https://viaf.org/
66
    sources = {
1✔
67
        "SUDOC": {
68
            "name": "idref",
69
            "info": "Sudoc [ABES], France",
70
            "record_class": AgentIdrefRecord,
71
        },
72
        "DNB": {
73
            "name": "gnd",
74
            "info": "German National Library",
75
            "record_class": AgentGndRecord,
76
        },
77
        "RERO": {
78
            "name": "rero",
79
            "info": "RERO - Library Network of Western Switzerland",
80
            "record_class": AgentReroRecord,
81
        },
82
        "SZ": {"name": "sz", "info": "Swiss National Library"},
83
        "BNE": {"name": "bne", "info": "National Library of Spain"},
84
        "BNF": {"name": "bnf", "info": "National Library of France"},
85
        "ICCU": {
86
            "name": "iccu",
87
            "info": "Central Institute for the Union Catalogue of the "
88
            "Italian libraries",
89
        },
90
        "ISNI": {"name": "isni", "info": "ISNI"},
91
        "WKP": {"name": "wiki", "info": "Wikidata"},
92
        # 'LC': 'loc',  # Library of Congress
93
        # 'SELIBR': 'selibr',  # National Library of Sweden
94
        # 'NLA': 'nla',  # National Library of Australia
95
        # 'PTBNP': 'ptbnp',  # National Library of Portugal
96
        # 'BLBNB': 'BLBNB',  # National Library of Brazil
97
        # 'NKC': 'nkc',  # National Library of the Czech Republic
98
        # 'J9U': 'j9u',  # National Library of Israel
99
        # 'EGAXA': 'egaxa',  # Library of Alexandria, Egypt
100
        # 'BAV': 'bav',  # Vatican Library
101
        # 'CAOONL': 'caoonl',  # Library and Archives Canada/PFAN
102
        # 'JPG': 'jpg',  # Union List of Artist Names [Getty Research Institute]
103
        # 'NUKAT': 'nukat',  # NUKAT Center of Warsaw University Library
104
        # 'NSZL': 'NSZL',  # National Széchényi Library, Hungary
105
        # 'VLACC': 'vlacc',  # Flemish Public Libraries National Library of Russia
106
        # 'NTA': 'nta',  # National Library of Netherlands
107
        # 'BIBSYS': 'bibsys',  # BIBSYS
108
        # 'GRATEVE': 'grateve',  # National Library of Greece
109
        # 'ARBABN': 'arbabn',  # National Library of Argentina
110
        # 'W2Z': 'w2z',  # National Library of Norway
111
        # 'DBC': 'dbc',  # DBC (Danish Bibliographic Center)
112
        # 'NDL': 'ndl',  # National Diet Library, Japan
113
        # 'NII': 'nii',  # NII (Japan)
114
        # 'NLB': 'nlb',  # National Library Board, Singapore
115
        # 'LNB': 'lnb',  # National Library of Latvia
116
        # 'PLWABN': 'plwabn',  # National Library of Poland
117
        # 'BNC': 'BNC',  # National Library of Catalonia
118
        # 'LNL': 'lnl',  # Lebanese National Library
119
        # 'PERSEUS': 'perseus',  # Perseus Digital Library
120
        # 'SRP': 'srp',  # Syriac Reference Portal
121
        # 'N6I': 'n6i',  # National Library of Ireland
122
        # 'NSK': 'nsk',  # National and University Library in Zagreb
123
        # 'CYT': 'cyt',  # National Central Library, Taiwan
124
        # 'B2Q': 'b2q',  # National Library and Archives of Québec
125
        # 'KRLNK': 'krlnk',  # National Library of Korea
126
        # 'BNL': 'BNL',  # National Library of Luxembourg
127
        # 'BNCHL': 'bnchl',  # National Library of Chile
128
        # 'MRBNR': 'mrbnr',  # National Library of Morocco
129
        # 'XA': 'xa',  # xA Extended Authorities
130
        # 'XR': 'xr',  # xR Extended Relationships
131
        # 'FAST': 'fast',  # FAST Subjects
132
        # 'ERRR': 'errr',  # National Library of Estonia
133
        # 'UIY': 'uiy',  # National and University Library of Iceland (NULI)
134
        # 'NYNYRILM': 'nynyrilm',  # Repertoire International de Litterature Musicale, Inc. (RILM)
135
        # 'DE663': 'de663',  # International Inventory of Musical Sources (RISM)
136
        # 'SIMACOB': 'simacob',  # NUK/COBISS.SI, Slovenia
137
        # 'LIH': 'lih',  # National Library of Lithuania
138
        # 'SKMASNL': 'skmasnl',  # Slovak National Library
139
        # 'UAE': 'uae',  # United Arab Emirates University
140
    }
141

142
    def __init__(self, data, model=None, **kwargs):
1✔
143
        """Initialize instance with dictionary data and SQLAlchemy model.
144

145
        :param data: Dict with record metadata.
146
        :param model: :class:`~invenio_records.models.RecordMetadata` instance.
147
        """
148
        super().__init__(data or {}, model=model, **kwargs)
1✔
149
        self.sources_used = {}
1✔
150
        for data in self.sources.values():
1✔
151
            if record_class := data.get("record_class"):
1✔
152
                self.sources_used[data["name"]] = record_class
1✔
153

154
    @classmethod
1✔
155
    def filters(cls):
1✔
156
        """Filters for sources."""
157
        return {
1✔
158
            source["name"]: exists_filter(f"{source['name']}_pid")
159
            for source in cls.sources.values()
160
        }
161

162
    @classmethod
1✔
163
    def aggregations(cls):
1✔
164
        """Aggregations for sources."""
165
        return {
1✔
166
            source["name"]: {"filter": {"exists": {"field": f"{source['name']}_pid"}}}
167
            for source in cls.sources.values()
168
        }
169

170
    def create_mef_and_agents(
1✔
171
        self,
172
        dbcommit=False,
173
        reindex=False,
174
        online=None,
175
        verbose=False,
176
        online_verbose=False,
177
    ):
178
        """Create MEF and agents records.
179

180
        :param dbcommit: Commit changes to DB.
181
        :param reindex: Reindex record.
182
        :param online: Search online for new VIAF record.
183
        :param verbose: Verbose.
184
        :param online_verbose: Online verbose
185
        :returns: Actions.
186
        """
187

188
        def update_online(agent_class, pid, online):
1✔
189
            """Update agents online.
190

191
            :param agent_class: Agent class to use.
192
            :param pid: Agent pid to use..
193
            :param online: Try to get following agent types online.
194
            :return: Agent record and performed action.
195
            """
196
            action = Action.NOT_ONLINE
1✔
197
            agent_record = None
1✔
198
            if agent_class.provider.pid_type in online:
1✔
199
                data, msg = agent_class.get_online_record(id_=pid)
1✔
200
                if online_verbose:
1✔
201
                    click.echo(f"\n{msg}")
×
202
                if data and not data.get("NO TRANSFORMATION"):
1✔
203
                    agent_record, action = agent_class.create_or_update(
×
204
                        data=data, dbcommit=dbcommit, reindex=reindex
205
                    )
206
            else:
207
                agent_record = agent_class.get_record_by_pid(pid)
1✔
208
            return agent_record, action
1✔
209

210
        def set_actions(actions, pid, source_name, action, mef_actions=None):
1✔
211
            """Set actions.
212

213
            :param actions: Actions dictionary to change
214
            :param pid: Pid to add.
215
            :param source_name: Source name to add
216
            :param action: Action to add.
217
            :param mef_actions: MEF actions to add (optional).
218
            :return: actions
219
            """
220
            actions.setdefault(pid, {"source": source_name, "action": action})
1✔
221
            if mef_actions:
1✔
222
                actions[pid]["MEF"] = mef_actions
1✔
223
            return actions
1✔
224

225
        actions = {}
1✔
226
        online = online or []
1✔
227
        viaf_agents_data = self.get_entities_pids()
1✔
228
        viaf_agents_pids = [data["pid"] for data in viaf_agents_data]
1✔
229
        # Delete old agent entries from MEF records
230
        old_agents = {}
1✔
231
        for mef_record in AgentMefRecord.get_mef(self.pid, self.name):
1✔
232
            changed = False
1✔
233
            for agent in mef_record.get_entities_records():
1✔
234
                if agent.pid not in viaf_agents_pids:
1✔
235
                    mef_record.pop(agent.name)
1✔
236
                    old_agents[agent.pid] = agent
1✔
237
                    actions = set_actions(
1✔
238
                        actions=actions,
239
                        pid=agent.pid,
240
                        source_name=agent.name,
241
                        action=Action.DISCARD,
242
                        mef_actions={mef_record.pid: Action.DELETE},
243
                    )
244
                    changed = True
1✔
245
            if changed:
1✔
246
                mef_record.update(data=mef_record, dbcommit=dbcommit, reindex=reindex)
1✔
247
        # Recreate MEF records
248
        for data in viaf_agents_data:
1✔
249
            agent_record, action = update_online(
1✔
250
                agent_class=data["record_class"], pid=data["pid"], online=online
251
            )
252
            if agent_record:
1✔
253
                _, mef_actions = agent_record.create_or_update_mef(
1✔
254
                    dbcommit=dbcommit, reindex=reindex, viaf_record=self
255
                )
256
                actions = set_actions(
1✔
257
                    actions=actions,
258
                    pid=agent_record.pid,
259
                    source_name=agent_record.name,
260
                    action=action,
261
                    mef_actions=mef_actions,
262
                )
263
            else:
264
                mef_records = AgentMefRecord.get_mef(data["pid"], data["record_class"])
1✔
265
                mef_actions = {}
1✔
266
                for mef_record in mef_records:
1✔
267
                    mef_record.update(
×
268
                        data=mef_record, dbcommit=dbcommit, reindex=reindex
269
                    )
270
                    mef_actions[mef_record.pid] = Action.DISCARD
×
271
                actions = set_actions(
1✔
272
                    actions=actions,
273
                    pid=data["pid"],
274
                    source_name=data["source"],
275
                    action=Action.NOT_FOUND,
276
                    mef_actions=mef_actions,
277
                )
278
        # Create Mef records for old agents
279
        if reindex:
1✔
280
            AgentMefRecord.flush_indexes()
1✔
281
        for entity_pid, agent in old_agents.items():
1✔
282
            mef_record, mef_actions = agent.create_or_update_mef(
1✔
283
                dbcommit=dbcommit, reindex=reindex
284
            )
285
            actions.setdefault(entity_pid, {})
1✔
286
            actions[entity_pid].setdefault("MEF", {})
1✔
287
            for pid, action in mef_actions.items():
1✔
288
                actions[entity_pid]["MEF"][pid] = action
1✔
289
        return actions
1✔
290

291
    def reindex(self, forceindex=False):
1✔
292
        """Reindex record."""
293
        result = super().reindex(forceindex=forceindex)
1✔
294
        self.flush_indexes()
1✔
295
        return result
1✔
296

297
    @classmethod
1✔
298
    def get_online_record(cls, viaf_source_code, pid, rec_format=None):
1✔
299
        """Get VIAF record.
300

301
        Get's the VIAF record from:
302
        http://www.viaf.org/viaf/sourceID/{source_code}|{pid}
303

304
        :param viaf_source_code: agent source code
305
        :param pid: pid for agent source code
306
        :param rec_format: raw = get the not transformed VIAF record
307
                       link = get the VIAF link record
308
        :returns: VIAF record as json
309
        """
310
        viaf_format = "/viaf.json"
1✔
311
        if rec_format == "link":
1✔
312
            viaf_format = "/justlinks.json"
×
313
            rec_format = "raw"
×
314
        viaf_url = current_app.config.get("RERO_MEF_VIAF_BASE_URL")
1✔
315
        url = f"{viaf_url}/viaf"
1✔
316
        if viaf_source_code.upper() == "VIAF":
1✔
317
            url = f"{url}/{pid}{viaf_format}"
×
318
        else:
319
            url = f"{url}/sourceID/{viaf_source_code}|{pid}{viaf_format}"
1✔
320
        response = requests_retry_session().get(url)
1✔
321
        result = {}
1✔
322
        if response.status_code == requests.codes.ok:
1✔
323
            msg = f"VIAF get: {pid:<15} {url} | OK"
1✔
324
            if rec_format == "raw":
1✔
325
                return response.json(), msg
1✔
326
            data_json = response.json()
1✔
327
            result["pid"] = data_json.get("viafID")
1✔
328
            if sources := data_json.get("sources", {}).get("source"):
1✔
329
                if isinstance(sources, dict):
1✔
330
                    sources = [sources]
×
331
                for source in sources:
1✔
332
                    # get pid
333
                    text = source.get("#text", "|")
1✔
334
                    text = text.split("|")
1✔
335
                    if bib_source := cls.sources.get(text[0], {}).get("name"):
1✔
336
                        result[f"{bib_source}_pid"] = text[1]
1✔
337
                        # get URL
338
                        if nsid := source.get("@nsid"):
1✔
339
                            if nsid.startswith("http"):
1✔
340
                                result[bib_source] = nsid
1✔
341
                # get Wikipedia URLs
342
                x_links = data_json.get("xLinks", {}).get("xLink", [])
1✔
343
                if not isinstance(x_links, list):
1✔
344
                    x_links = [x_links]
×
345
                for x_link in x_links:
1✔
346
                    if isinstance(x_link, dict) and result.get("wiki_pid"):
×
347
                        text = x_link.get("#text")
×
348
                        if text and "wikipedia" in text:
×
349
                            result.setdefault("wiki", []).append(
×
350
                                text.replace('"', "%22")
351
                            )
352
                if wiki_urls := result.get("wiki"):
1✔
353
                    result["wiki"] = sorted(wiki_urls)
×
354

355
        # make sure we got a VIAF with the same pid for source
356
        if viaf_source_code.upper() == "VIAF":
1✔
357
            if result.get("pid") == pid:
×
358
                return result, msg
×
359
        elif (
1✔
360
            result.get(f"{cls.sources.get(viaf_source_code, {}).get('name')}_pid")
361
            == pid
362
        ):
363
            return result, msg
1✔
364
        return {}, f"VIAF get: {pid:<15} {url} | NO RECORD"
×
365

366
    def update_online(self, dbcommit=False, reindex=False):
1✔
367
        """Update online.
368

369
        :param dbcommit: Commit changes to DB.
370
        :param reindex: Reindex record.
371
        :returns: record and actions message.
372
        """
373
        online_data, _ = self.get_online_record(viaf_source_code="VIAF", pid=self.pid)
×
374
        if online_data:
×
375
            online_data["$schema"] = self["$schema"]
×
376
            online_data = add_md5(online_data)
×
377
            if online_data["md5"] == self.get("md5"):
×
378
                return self, Action.UPTODATE
×
379
            return (
×
380
                self.replace(data=online_data, dbcommit=dbcommit, reindex=reindex),
381
                Action.UPDATE,
382
            )
383
        return None, Action.DISCARD
×
384

385
    @classmethod
1✔
386
    def get_viaf(cls, agent):
1✔
387
        """Get VIAF record by agent.
388

389
        :param agent: Agency do get corresponding VIAF record.
390
        """
391
        if isinstance(agent, AgentMefRecord):
1✔
392
            return [cls.get_record_by_pid(agent.get("viaf_pid"))]
1✔
393
        if isinstance(agent, AgentViafRecord):
1✔
394
            return [cls.get_record_by_pid(agent.get("pid"))]
×
395
        pid = agent.get("pid")
1✔
396
        viaf_pid_name = agent.viaf_pid_name
1✔
397
        query = (
1✔
398
            AgentViafSearch()
399
            .filter({"term": {viaf_pid_name: pid}})
400
            .params(preserve_order=True)
401
            .sort({"_updated": {"order": "desc"}})
402
        )
403
        viaf_records = [
1✔
404
            cls.get_record_by_pid(hit.pid) for hit in query.source("pid").scan()
405
        ]
406
        if len(viaf_records) > 1:
1✔
407
            current_app.logger.error(
×
408
                f"MULTIPLE VIAF FOUND FOR: {agent.name} {agent.pid} | "
409
                f"viaf: {', '.join([viaf.pid for viaf in viaf_records])}"
410
            )
411
        return viaf_records
1✔
412

413
    @classmethod
1✔
414
    def create_or_update(
1✔
415
        cls,
416
        data,
417
        id_=None,
418
        delete_pid=True,
419
        dbcommit=False,
420
        reindex=False,
421
        test_md5=False,
422
    ):
423
        """Create or update VIAF record."""
424
        record, action = super().create_or_update(
1✔
425
            data=data,
426
            id_=id_,
427
            delete_pid=delete_pid,
428
            dbcommit=dbcommit,
429
            reindex=reindex,
430
            test_md5=test_md5,
431
        )
432
        if record:
1✔
433
            record.create_mef_and_agents(dbcommit=dbcommit, reindex=reindex)
1✔
434
        return record, action
1✔
435

436
    def delete(self, force=True, dbcommit=False, delindex=False):
1✔
437
        """Delete record and persistent identifier.
438

439
        :param dbcommit: Commit changes to DB.
440
        :param reindex: Reindex record.
441
        :returns: MEF actions message.
442
        """
443
        agents_records = self.get_entities_records()
1✔
444
        mef_records = AgentMefRecord.get_mef(entity_pid=self.pid, entity_name=self.name)
1✔
445
        # delete VIAF record
446
        result = super().delete(force=True, dbcommit=dbcommit, delindex=delindex)
1✔
447

448
        # Clean MEF records
449
        mef_actions = {}
1✔
450
        old_agent_records = {}
1✔
451
        for mef_record in mef_records:
1✔
452
            mef_actions[mef_record.pid] = {}
1✔
453
            mef_agents_records = mef_record.get_entities_records()
1✔
454
            if len(mef_agents_records):
1✔
455
                mef_actions[mef_record.pid][mef_agents_records[0].name] = {
1✔
456
                    mef_agents_records[0].pid: Action.UPDATE
457
                }
458
            for mef_agent_record in mef_agents_records[1:]:
1✔
459
                if mef_agent_record in agents_records:
1✔
460
                    mef_record.pop(mef_agent_record.name)
1✔
461
                    mef_actions[mef_record.pid][mef_agent_record.name] = {
1✔
462
                        mef_agent_record.pid: Action.DELETE
463
                    }
464
                    old_agent_records[mef_agent_record.pid] = mef_agent_record
1✔
465
            viaf_pid = mef_record.pop("viaf_pid", None)
1✔
466
            mef_actions[mef_record.pid]["viaf"] = {viaf_pid: Action.DELETE}
1✔
467
            mef_record.update(data=mef_record, dbcommit=True, reindex=True)
1✔
468
            AgentMefRecord.flush_indexes()
1✔
469
        # recreate MEF records for agents
470
        for agent_record in old_agent_records.values():
1✔
471
            mef, _ = agent_record.create_or_update_mef(dbcommit=True, reindex=True)
1✔
472
            mef_actions[mef.pid] = {
1✔
473
                agent_record.name: {agent_record.pid: Action.CREATE}
474
            }
475
        AgentMefRecord.flush_indexes()
1✔
476
        return result, Action.DELETE, mef_actions
1✔
477

478
    def get_entities_pids(self):
1✔
479
        """Get agent pids."""
480
        agents = []
1✔
481
        for source, record_class in self.sources_used.items():
1✔
482
            if source_pid := self.get(f"{source}_pid"):
1✔
483
                agents.append(
1✔
484
                    {"source": source, "record_class": record_class, "pid": source_pid}
485
                )
486
        return agents
1✔
487

488
    def get_entities_records(self, verbose=False):
1✔
489
        """Get agent records."""
490
        agent_records = []
1✔
491
        for agent in self.get_entities_pids():
1✔
492
            record_class = agent["record_class"]
1✔
493
            if agent_record := record_class.get_record_by_pid(agent["pid"]):
1✔
494
                agent_records.append(agent_record)
1✔
495
            elif verbose:
1✔
496
                current_app.logger.warning(
×
497
                    f"Record not found VIAF: {self.pid} "
498
                    f"{agent['record_class'].name}: {agent['pid']}"
499
                )
500
        return agent_records
1✔
501

502
    @classmethod
1✔
503
    def get_missing_entity_pids(cls, agent, verbose=False):
1✔
504
        """Get all missing pids defined in VIAF.
505

506
        :param agent: Agent to search for missing pids.
507
        :param verbose: Verbose.
508
        :returns: Agent pids without VIAF, VIAF pids without agent
509
        """
510
        if record_class := get_entity_class(agent):
1✔
511
            if verbose:
1✔
512
                click.echo(f"Get pids from {agent} ...")
×
513
            progress = progressbar(
1✔
514
                items=record_class.get_all_pids(),
515
                length=record_class.count(),
516
                verbose=verbose,
517
            )
518
            pids_db = set(progress)
1✔
519

520
            entity_pid_name = f"{record_class.name}_pid"
1✔
521
            if verbose:
1✔
522
                click.echo(f"Get pids from VIAF with {entity_pid_name} ...")
×
523
            query = AgentViafSearch().filter(
1✔
524
                "bool", should=[Q("exists", field=entity_pid_name)]
525
            )
526
            progress = progressbar(
1✔
527
                items=query.source(["pid", entity_pid_name]).scan(),
528
                length=query.count(),
529
                verbose=verbose,
530
            )
531
            pids_viaf = []
1✔
532
            for hit in progress:
1✔
533
                viaf_pid = hit.pid
1✔
534
                entity_pid = hit.to_dict().get(entity_pid_name)
1✔
535
                if entity_pid in pids_db:
1✔
536
                    pids_db.discard(entity_pid)
1✔
537
                else:
538
                    pids_viaf.append(viaf_pid)
1✔
539
            return list(pids_db), pids_viaf
1✔
540
        click.secho(f"ERROR Record class not found for: {agent}", fg="red")
×
541
        return [], []
×
542

543
    @classmethod
1✔
544
    def get_pids_with_multiple_viaf(cls, verbose=False):
1✔
545
        """Get agent pids with multiple MEF records.
546

547
        :param verbose: Verbose.
548
        :returns: pids.
549
        """
550
        multiple_pids = {
1✔
551
            f"{source}_pid": {} for source in AgentViafRecord(data={}).sources_used
552
        }
553
        cleaned_pids = deepcopy(multiple_pids)
1✔
554
        progress = progressbar(
1✔
555
            items=AgentViafSearch()
556
            .params(preserve_order=True)
557
            .sort({"pid": {"order": "asc"}})
558
            .scan(),
559
            length=AgentViafSearch().count(),
560
            verbose=verbose,
561
        )
562
        for hit in progress:
1✔
563
            viaf_pid = hit.pid
1✔
564
            data = hit.to_dict()
1✔
565
            for source in multiple_pids:
1✔
566
                if pid := data.get(source):
1✔
567
                    multiple_pids[source].setdefault(pid, [])
1✔
568
                    multiple_pids[source][pid].append(viaf_pid)
1✔
569
        for source, pids in multiple_pids.items():
1✔
570
            for pid, viaf_pids in pids.items():
1✔
571
                if len(viaf_pids) > 1:
1✔
572
                    cleaned_pids[source][pid] = viaf_pids
1✔
573
        return cleaned_pids
1✔
574

575

576
class AgentViafIndexer(EntityIndexer):
1✔
577
    """Agent VIAF indexer."""
578

579
    record_cls = AgentViafRecord
1✔
580

581
    def bulk_index(self, record_id_iterator):
1✔
582
        """Bulk index records.
583

584
        :param record_id_iterator: Iterator yielding record UUIDs.
585
        """
586
        super().bulk_index(
×
587
            record_id_iterator, index=AgentViafSearch.Meta.index, doc_type="viaf"
588
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc