• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sul-dlss / libsys-airflow / 10427186039

16 Aug 2024 10:45PM UTC coverage: 83.867% (-0.5%) from 84.325%
10427186039

Pull #1140

github

web-flow
Merge 0afbbe571 into 331037d9f
Pull Request #1140: Error report and email for OCLC errors with new MARC records

54 of 106 new or added lines in 4 files covered. (50.94%)

8 existing lines in 2 files now uncovered.

4273 of 5095 relevant lines covered (83.87%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.75
/libsys_airflow/plugins/data_exports/oclc_reports.py
1
import logging
1✔
2
import urllib
1✔
3

4
from datetime import datetime
1✔
5
from pathlib import Path
1✔
6
from typing import Union
1✔
7

8
from airflow.configuration import conf
1✔
9
from airflow.decorators import task
1✔
10
from airflow.models import Variable
1✔
11
from jinja2 import DictLoader, Environment
1✔
12

13
logger = logging.getLogger(__name__)
1✔
14

15
holdings_set_template = """
1✔
16
<h1>OCLC Holdings {% if match %}Matched {% endif %}Set Errors on {{ date }} for {{ library }}</h1>
17
<p>
18
  <a href="{{ dag_run_url }}">DAG Run</a>
19
</p>
20
<h2>FOLIO Instances that failed trying to set Holdings {% if match %}after successful Match{% endif %}</h2>
21
<table class="table table-striped">
22
  <thead>
23
    <tr>
24
      <th>Instance</th>
25
      <th>OCLC Response</th>
26
    </tr>
27
  </thead>
28
  <tbody>
29
{% for row in failures %}
30
  <tr>
31
    <td>
32
      <a href="{{ folio_url }}/inventory/view/{{ row.uuid }}">{{ row.uuid }}</a>
33
    </td>
34
    <td>
35
    {% if row.context %}
36
    {% include 'oclc-payload-template.html' %}
37
    {% else %}
38
     No response from OCLC set API call
39
     {% endif %}
40
    </td>
41
  </tr>
42
{% endfor %}
43
  </tbody>
44
</table>
45
"""
46

47
holdings_unset_template = """
1✔
48
<h1>OCLC Holdings Unset Errors on {{ date }} for {{ library }}</h1>
49
<p>
50
  <a href="{{ dag_run_url }}">DAG Run</a>
51
</p>
52
<h2>FOLIO Instances that failed trying to unset Holdings</h2>
53
<table class="table table-striped">
54
  <thead>
55
    <tr>
56
      <th>Instance</th>
57
      <th>OCLC Response</th>
58
    </tr>
59
  </thead>
60
  <tbody>
61
  {% for row in failures %}
62
  <tr>
63
    <td>
64
      <a href="{{ folio_url }}/inventory/view/{{ row.uuid }}">{{ row.uuid }}</a>
65
    </td>
66
    <td>
67
    {% if row.context %}
68
    {% include 'oclc-payload-template.html' %}
69
    {% else %}
70
     No response from OCLC set API call
71
     {% endif %}
72
    </td>
73
  </tr>
74
{% endfor %}
75
  </tbody>
76
</table>
77
"""
78

79
multiple_oclc_numbers_template = """
1✔
80
 <h1>Multiple OCLC Numbers on {{ date }} for {{ library }}</h1>
81

82
 <p>
83
  <a href="{{ dag_run_url }}">DAG Run</a>
84
 </p>
85

86
 <h2>FOLIO Instances with Multiple OCLC Numbers</h2>
87
 <ol>
88
{% for instance in failures.values() %}
89
 <li>
90
   <a href="{{ instance.folio_url }}">{{ instance.uuid }}</a>:
91
   <ul>
92
   {% for num in instance.oclc_numbers %}
93
    <li>{{ num }}</li>
94
   {% endfor %}
95
   </ul>
96
 </li>
97
{% endfor %}
98
  </ol>
99
"""
100

101
new_oclc_invalid_records = """
1✔
102
 {% macro field_table(errors) -%}
103
  <table class="table table-bordered">
104
        <thead>
105
          <tr>
106
            <th>Tag</th>
107
            <th>Error Level</th>
108
            <th>Detail</th>
109
           </tr>
110
         </thead>
111
         <tbody>
112
         {% for error in errors %}
113
           <tr>
114
             <td>{{ error.tag }}</td>
115
             <td>{{ error.errorLevel }}</td>
116
             <td>{{ error.message }}</td>
117
           </tr>
118
         {% endfor %}
119
         </tbody>
120
    </table>
121
 {% endmacro %}
122
 <h1>Invalid MARC Records New to OCLC on {{ date }} for {{ library }}</h1>
123
 <p>
124
  <a href="{{ dag_run_url }}">DAG Run</a>
125
 </p>
126
 <table class="table table-striped">
127
  <thead>
128
    <tr>
129
      <th>Instance</th>
130
      <th>Reason</th>
131
      <th>OCLC Response</th>
132
    </tr>
133
  </thead>
134
  <tbody>
135
  {% for row in failures %}
136
  <tr>
137
    <td>
138
      <a href="{{ folio_url }}/inventory/view/{{ row.uuid }}">{{ row.uuid }}</a>
139
    </td>
140
    <td>
141
      {{ row.reason }} Error Count {{ row.context.errorCount }}
142
    </td>
143
    <td>
144
      <h4>Errors</h4>
145
      <ol>
146
      {% for error in row.context.errors %}
147
        <li>{{ error }}</li>
148
      {% endfor %}
149
      </ol>
150
      <h4>Fixed Field Errors</h4>
151
       {{ field_table(row.context.fixedFieldErrors) }}
152
       <h4>Variable Field Errors</h4>
153
       {{ field_table(row.context.variableFieldErrors) }}
154
    </td>
155
   </tr>
156
  {% endfor %}
157
  </tbody>
158
</table>
159
"""
160

161

162
oclc_payload_template = """<ul>
1✔
163
        <li><strong>Control Number:</strong> {{ row.context.controlNumber }}</li>
164
        <li><strong>Requested Control Number:</strong> {{ row.context.requestedControlNumber }}</li>
165
        <li><strong>Institution:</strong>
166
           <ul>
167
             <li><em>Code:</em> {{ row.context.institutionCode }}</li>
168
             <li><em>Symbol:</em> {{ row.context.institutionSymbol }}</li>
169
           </ul>
170
        </li>
171
        <li><strong>First Time Use:</strong> {{ row.context.firstTimeUse }}</li>
172
        <li><strong>Success:</strong> {{ row.context.success }}</li>
173
        <li><strong>Message:</strong> {{ row.context.message }}</li>
174
        <li><strong>Action:</strong> {{ row.context.action }}</li>
175
     </ul>
176
"""
177

178
jinja_env = Environment(
1✔
179
    loader=DictLoader(
180
        {
181
            "holdings-set.html": holdings_set_template,
182
            "holdings-unset.html": holdings_unset_template,
183
            "multiple-oclc-numbers.html": multiple_oclc_numbers_template,
184
            "new-oclc-marc-errors.html": new_oclc_invalid_records,
185
            "oclc-payload-template.html": oclc_payload_template,
186
        }
187
    )
188
)
189

190

191
def _dag_run_url(dag_run) -> str:
1✔
192
    airflow_url = conf.get('webserver', 'base_url')
1✔
193
    if not airflow_url.endswith("/"):
1✔
194
        airflow_url = f"{airflow_url}/"
1✔
195
    params = urllib.parse.urlencode({"dag_run_id": dag_run.run_id})
1✔
196
    return f"{airflow_url}dags/send_oclc_records/grid?{params}"
1✔
197

198

199
def _filter_failures(failures: dict, errors: dict):
1✔
200
    for library, instances in failures.items():
1✔
201
        if library not in errors:
1✔
202
            errors[library] = {}
1✔
203
        if len(instances) < 1:
1✔
204
            continue
1✔
205
        for instance in instances:
1✔
206
            if instance['reason'].startswith("Match failed"):
1✔
207
                continue
1✔
208
            if instance['reason'] in errors[library]:
1✔
209
                errors[library][instance['reason']].append(
1✔
210
                    {"uuid": instance['uuid'], "context": instance['context']}
211
                )
212
            else:
213
                errors[library][instance['reason']] = [
1✔
214
                    {"uuid": instance['uuid'], "context": instance['context']}
215
                ]
216

217

218
def _generate_holdings_set_report(**kwargs) -> dict:
1✔
219
    date: datetime = kwargs.get('date', datetime.utcnow())
1✔
220

221
    match = kwargs.get("match", False)
1✔
222

223
    if date not in kwargs:
1✔
224
        kwargs["date"] = date
1✔
225

226
    report_dir = "set_holdings"
1✔
227
    kwargs["report_key"] = "Failed to update holdings"
1✔
228
    if match:
1✔
229
        report_dir = "set_holdings_match"
1✔
230
        kwargs["report_key"] = "Failed to update holdings after match"
1✔
231

232
    kwargs['report_template'] = "holdings-set.html"
1✔
233

234
    reports = _reports_by_library(**kwargs)
1✔
235

236
    return _save_reports(
1✔
237
        airflow=kwargs.get('airflow', '/opt/airflow'),
238
        name=report_dir,
239
        reports=reports,
240
        date=date,
241
    )
242

243

244
def _generate_holdings_unset_report(**kwargs) -> dict:
1✔
245
    date: datetime = kwargs.get('date', datetime.utcnow())
1✔
246

247
    if date not in kwargs:
1✔
248
        kwargs["date"] = date
1✔
249

250
    kwargs['report_template'] = "holdings-unset.html"
1✔
251
    kwargs["report_key"] = "Failed holdings_unset"
1✔
252

253
    reports = _reports_by_library(**kwargs)
1✔
254

255
    return _save_reports(
1✔
256
        airflow=kwargs.get('airflow', '/opt/airflow'),
257
        name="unset_holdings",
258
        reports=reports,
259
        date=date,
260
    )
261

262

263
def _generate_multiple_oclc_numbers_report(**kwargs) -> dict:
1✔
264
    multiple_codes: list = kwargs['all_multiple_codes']
1✔
265
    date: datetime = kwargs.get('date', datetime.utcnow())
1✔
266

267
    library_instances: dict = {}
1✔
268

269
    for row in multiple_codes:
1✔
270
        instance_uuid = row[0]
1✔
271
        library_code = row[1]
1✔
272
        oclc_codes = row[2]
1✔
273

274
        if library_code in library_instances:
1✔
275
            library_instances[library_code][instance_uuid] = {
1✔
276
                "oclc_numbers": oclc_codes
277
            }
278
        else:
279
            library_instances[library_code] = {
1✔
280
                instance_uuid: {"oclc_numbers": oclc_codes}
281
            }
282

283
    kwargs['failures'] = library_instances
1✔
284
    kwargs['report_template'] = "multiple-oclc-numbers.html"
1✔
285

286
    reports = _reports_by_library(**kwargs)
1✔
287

288
    return _save_reports(
1✔
289
        airflow=kwargs.get('airflow', '/opt/airflow'),
290
        name="multiple_oclc_numbers",
291
        reports=reports,
292
        date=date,
293
    )
294

295

296
def _generate_new_oclc_invalid_records_report(**kwargs) -> dict:
1✔
NEW
297
    date: datetime = kwargs.get('date', datetime.utcnow())
×
298

NEW
299
    if date not in kwargs:
×
NEW
300
        kwargs["date"] = date
×
301

NEW
302
    kwargs['report_template'] = "new-oclc-marc-errors.html"
×
NEW
303
    kwargs["report_key"] = "Failed to add new MARC record"
×
304

NEW
305
    reports = _reports_by_library(**kwargs)
×
306

NEW
307
    return _save_reports(
×
308
        airflow=kwargs.get('airflow', '/opt/airflow'),
309
        name="new_marc_errors",
310
        reports=reports,
311
        date=date,
312
    )
313

314

315
def _reports_by_library(**kwargs) -> dict:
1✔
316
    failures: dict = kwargs["failures"]
1✔
317
    report_template_name: str = kwargs['report_template']
1✔
318
    report_key: Union[str, None] = kwargs.get("report_key")
1✔
319
    date: datetime = kwargs['date']
1✔
320

321
    reports: dict = dict()
1✔
322

323
    report_template = jinja_env.get_template(report_template_name)
1✔
324

325
    for library, rows in failures.items():
1✔
326
        if len(rows) < 1:
1✔
327
            continue
1✔
328
        filtered_failures = []
1✔
329
        for key, errors in rows.items():
1✔
330
            if len(errors) < 1:
1✔
NEW
331
                continue
×
332
            if report_key and key == report_key:
1✔
333
                filtered_failures = errors
1✔
334
        if len(filtered_failures) < 1:
1✔
335
            filtered_failures = rows
1✔
336
        kwargs["library"] = library
1✔
337
        kwargs["failures"] = filtered_failures
1✔
338
        kwargs["date"] = date.strftime("%d %B %Y")
1✔
339
        kwargs["dag_run_url"] = _dag_run_url(kwargs["dag_run"])
1✔
340
        reports[library] = report_template.render(**kwargs)
1✔
341

342
    return reports
1✔
343

344

345
def _save_reports(**kwargs) -> dict:
1✔
346
    name: str = kwargs['name']
1✔
347
    libraries_reports: dict = kwargs['reports']
1✔
348
    airflow_dir: str = kwargs['airflow']
1✔
349
    time_stamp: datetime = kwargs['date']
1✔
350

351
    airflow = Path(airflow_dir)
1✔
352
    reports_directory = airflow / "data-export-files/oclc/reports"
1✔
353
    output: dict = {}
1✔
354

355
    for library, report in libraries_reports.items():
1✔
356
        reports_path = reports_directory / library / name
1✔
357
        reports_path.mkdir(parents=True, exist_ok=True)
1✔
358
        report_path = reports_path / f"{time_stamp.isoformat()}.html"
1✔
359
        report_path.write_text(report)
1✔
360
        logger.info(f"Created {name} report for {library} at {report_path}")
1✔
361
        output[library] = str(report_path)
1✔
362

363
    return output
1✔
364

365

366
@task
1✔
367
def filter_failures_task(**kwargs) -> dict:
1✔
368
    def _log_expansion_(fail_dict: dict):
1✔
369
        log = ""
1✔
370
        for lib, errors in fail_dict.items():
1✔
371
            log += f"{lib} - {len(errors)}, "
1✔
372
        return log
1✔
373

374
    deleted_failures: dict = kwargs["delete"]
1✔
375
    match_failures: dict = kwargs["match"]
1✔
376
    new_failures: dict = kwargs["new"]
1✔
377
    update_failures: dict = kwargs["update"]
1✔
378

379
    filtered_errors: dict = dict()
1✔
380

381
    logger.info(f"Update failures: {_log_expansion_(update_failures)}")
1✔
382
    _filter_failures(update_failures, filtered_errors)
1✔
383
    logger.info(f"Deleted failures {_log_expansion_(deleted_failures)}")
1✔
384
    _filter_failures(deleted_failures, filtered_errors)
1✔
385
    logger.info(f"Match failures: {_log_expansion_(match_failures)}")
1✔
386
    _filter_failures(match_failures, filtered_errors)
1✔
387
    logger.info(f"New failures {_log_expansion_(new_failures)}")
1✔
388
    _filter_failures(new_failures, filtered_errors)
1✔
389

390
    logger.info(filtered_errors)
1✔
391
    return filtered_errors
1✔
392

393

394
@task
1✔
395
def holdings_set_errors_task(**kwargs):
1✔
396
    kwargs['folio_url'] = Variable.get("FOLIO_URL")
1✔
397

398
    return _generate_holdings_set_report(**kwargs)
1✔
399

400

401
@task
1✔
402
def holdings_unset_errors_task(**kwargs):
1✔
403
    kwargs['folio_url'] = Variable.get("FOLIO_URL")
1✔
404

405
    return _generate_holdings_unset_report(**kwargs)
1✔
406

407

408
@task
1✔
409
def multiple_oclc_numbers_task(**kwargs):
1✔
410
    task_instance = kwargs['ti']
1✔
411

412
    new_multiple_records = task_instance.xcom_pull(
1✔
413
        task_ids='divide_new_records_by_library'
414
    )
415
    deletes_multiple_records = task_instance.xcom_pull(
1✔
416
        task_ids='divide_delete_records_by_library'
417
    )
418
    kwargs['all_multiple_codes'] = new_multiple_records + deletes_multiple_records
1✔
419

420
    kwargs['folio_url'] = Variable.get("FOLIO_URL")
1✔
421

422
    return _generate_multiple_oclc_numbers_report(**kwargs)
1✔
423

424

425
@task
1✔
426
def new_oclc_marc_errors_task(**kwargs):
1✔
NEW
427
    kwargs['folio_url'] = Variable.get("FOLIO_URL")
×
428

NEW
429
    return _generate_new_oclc_invalid_records_report(**kwargs)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc