17425918180

Committed 03 Sep 2025 07:11AM UTC coverage: 95.796% (-0.6%) from 96.378%

Build # 17425918180

Build Type

push

github

Committed by

PascalRepond

Commit Message

translations: extract messages

Co-Authored-by: Pascal Repond <pascal.repond@rero.ch>

Run Details

7816 of 8159 relevant lines covered (95.8%)

0.96 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.59

/sonar/modules/documents/tasks.py

# Swiss Open Access Repository
# Copyright (C) 2021 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Tasks for document in celery."""

from celery import shared_task
from flask import current_app
from invenio_db import db


@shared_task(ignore_result=True)
def import_records(records_to_import):
    """Import records in database and index them.

    Used as celery task. "ignore_result" flag means that we don't want to
    get the status and/or the result of the task, execution is faster.

    :param list records_to_import: List of records to import.
    :returns: List of IDs.
    """
    from sonar.modules.documents.api import DocumentIndexer, DocumentRecord

    indexer = DocumentIndexer(record_cls=DocumentRecord)

    ids = []

    for data in records_to_import:
        try:
            files_data = data.pop("files", [])

            record = DocumentRecord.get_record_by_identifier(data.get("identifiedBy", []))

            # Set record as harvested
            data["harvested"] = True

            if not record:
                record = DocumentRecord.create(data, dbcommit=False, with_bucket=True)
            else:
                current_app.logger.warning(f"Record already imported with PID {record['pid']}: {data}")
                record.update(data)

            for file_data in files_data:
                # Store url and key and remove it from dict to pass dict to
                # kwargs in add_file_from_url method
                url = file_data.pop("url")
                key = file_data.pop("key")

                try:
                    if url.startswith("http"):
                        record.add_file_from_url(url, key, **file_data)
                    else:
                        with open(url, "rb") as pdf_file:
                            record.add_file(pdf_file.read(), key, **file_data)
                except Exception as exception:
                    current_app.logger.warning(
                        f"Error during import of file {key} of record {record['identifiedBy']}: {exception}"
                    )

            # Merge record in database, at this time it's not saved into DB.
            record.commit()

            # Pushing record to database, not yet persisted into DB
            db.session.flush()

            # Add ID for bulk index in elasticsearch
            ids.append(str(record.id))

            current_app.logger.info(f'Record with reference "{record["identifiedBy"]}" imported successfully')

        except Exception as exception:
            current_app.logger.error(f"Error during importation of record {data}: {exception}")

    # Commit and index records
    db.session.commit()
    indexer.bulk_index(ids)
    indexer.process_bulk_queue()

    return ids

1	# Swiss Open Access Repository
2	# Copyright (C) 2021 RERO
3	#
4	# This program is free software: you can redistribute it and/or modify
5	# it under the terms of the GNU Affero General Public License as published by
6	# the Free Software Foundation, version 3 of the License.
7	#
8	# This program is distributed in the hope that it will be useful,
9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11	# GNU Affero General Public License for more details.
12	#
13	# You should have received a copy of the GNU Affero General Public License
14	# along with this program. If not, see <http://www.gnu.org/licenses/>.
15
16	"""Tasks for document in celery."""
17
18	from celery import shared_task	1✔
19	from flask import current_app	1✔
20	from invenio_db import db	1✔
21
22
23	@shared_task(ignore_result=True)	1✔
24	def import_records(records_to_import):	1✔
25	"""Import records in database and index them.
26
27	Used as celery task. "ignore_result" flag means that we don't want to
28	get the status and/or the result of the task, execution is faster.
29
30	:param list records_to_import: List of records to import.
31	:returns: List of IDs.
32	"""
33	from sonar.modules.documents.api import DocumentIndexer, DocumentRecord	1✔
34
35	indexer = DocumentIndexer(record_cls=DocumentRecord)	1✔
36
37	ids = []	1✔
38
39	for data in records_to_import:	1✔
40	try:	1✔
41	files_data = data.pop("files", [])	1✔
42
43	record = DocumentRecord.get_record_by_identifier(data.get("identifiedBy", []))	1✔
44
45	# Set record as harvested
46	data["harvested"] = True	1✔
47
48	if not record:	1✔
49	record = DocumentRecord.create(data, dbcommit=False, with_bucket=True)	1✔
50	else:
51	current_app.logger.warning(f"Record already imported with PID {record['pid']}: {data}")	1✔
52	record.update(data)	1✔
53
54	for file_data in files_data:	1✔
55	# Store url and key and remove it from dict to pass dict to
56	# kwargs in add_file_from_url method
57	url = file_data.pop("url")	1✔
58	key = file_data.pop("key")	1✔
59
60	try:	1✔
61	if url.startswith("http"):	1✔
62	record.add_file_from_url(url, key, **file_data)	1✔
63	else:
64	with open(url, "rb") as pdf_file:	×
65	record.add_file(pdf_file.read(), key, **file_data)	×
66	except Exception as exception:	1✔
67	current_app.logger.warning(	1✔
68	f"Error during import of file {key} of record {record['identifiedBy']}: {exception}"
69	)
70
71	# Merge record in database, at this time it's not saved into DB.
72	record.commit()	1✔
73
74	# Pushing record to database, not yet persisted into DB
75	db.session.flush()	1✔
76
77	# Add ID for bulk index in elasticsearch
78	ids.append(str(record.id))	1✔
79
80	current_app.logger.info(f'Record with reference "{record["identifiedBy"]}" imported successfully')	1✔
81
82	except Exception as exception:	1✔
83	current_app.logger.error(f"Error during importation of record {data}: {exception}")	1✔
84
85	# Commit and index records
86	db.session.commit()	1✔
87	indexer.bulk_index(ids)	1✔
88	indexer.process_bulk_queue()	1✔
89
90	return ids	1✔

rero / sonar / 17425918180

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous