• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

adsabs / ADSCitationCapture / 11921709590

19 Nov 2024 09:02PM UTC coverage: 70.135%. First build
11921709590

Pull #70

github

web-flow
Merge 47217066b into 497a59c1b
Pull Request #70: Concept doi metadata updates

7 of 14 new or added lines in 3 files covered. (50.0%)

2501 of 3566 relevant lines covered (70.13%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

52.11
/ADSCitationCapture/db.py
1
import os
1✔
2
from typing import OrderedDict
1✔
3
from psycopg2 import IntegrityError
1✔
4
from dateutil.tz import tzutc
1✔
5
from ADSCitationCapture.models import Citation, CitationTarget, Event, Reader
1✔
6
from ADSCitationCapture import doi
1✔
7
from adsmsg import CitationChange
1✔
8
import datetime
1✔
9
from adsputils import setup_logging
1✔
10
from sqlalchemy_continuum import version_class
1✔
11

12
# ============================= INITIALIZATION ==================================== #
13
# - Use app logger:
14
#import logging
15
#logger = logging.getLogger('ads-citation-capture')
16
# - Or individual logger for this file:
17
from adsputils import setup_logging, load_config
1✔
18
proj_home = os.path.realpath(os.path.join(os.path.dirname(__file__), '../'))
1✔
19
config = load_config(proj_home=proj_home)
1✔
20
logger = setup_logging(__name__, proj_home=proj_home,
1✔
21
                        level=config.get('LOGGING_LEVEL', 'INFO'),
22
                        attach_stdout=config.get('LOG_STDOUT', False))
23

24
#Dictionary that defines the output files for ADSDataPipeline
25
file_names=OrderedDict()
1✔
26
file_names['bibcode'] =proj_home+'/logs/output/bibcodes_CC.list.can.'
1✔
27
file_names['citations'] = proj_home+'/logs/output/citations_CC.list.'
1✔
28
file_names['references'] = proj_home+'/logs/output/references_CC.list.'
1✔
29
file_names['authors'] = proj_home+'/logs/output/facet_authors_CC.list.'
1✔
30

31
env_name = config.get('ENVIRONMENT', 'back-dev') 
1✔
32
for key in file_names.keys():
1✔
33
    file_names[key] = file_names[key] + str(env_name)
1✔
34

35
# =============================== FUNCTIONS ======================================= #
36
def store_event(app, data):
1✔
37
    """
38
    Stores a new event in the DB
39
    """
40
    stored = False
1✔
41
    with app.session_scope() as session:
1✔
42
        event = Event()
1✔
43
        event.data = data
1✔
44
        session.add(event)
1✔
45
        try:
1✔
46
            session.commit()
1✔
47
        except:
×
48
            logger.exception("Problem storing event '%s'", str(event))
×
49
        else:
50
            stored = True
1✔
51
    return stored
1✔
52

53
def store_citation_target(app, citation_change, content_type, raw_metadata, parsed_metadata, status, associated=None):
1✔
54
    """
55
    Stores a new citation target in the DB
56
    """
57
    stored = False
1✔
58
    with app.session_scope() as session:
1✔
59
        citation_target = CitationTarget()
1✔
60
        citation_target.content = citation_change.content
1✔
61
        citation_target.content_type = content_type
1✔
62
        citation_target.raw_cited_metadata = raw_metadata
1✔
63
        citation_target.parsed_cited_metadata = parsed_metadata
1✔
64
        citation_target.curated_metadata = {}
1✔
65
        citation_target.status = status
1✔
66
        citation_target.bibcode = parsed_metadata.get("bibcode", None)
1✔
67
        citation_target.associated_works = associated
1✔
68
        session.add(citation_target)
1✔
69
        try:
1✔
70
            session.commit()
1✔
71
        except IntegrityError as e:
×
72
            # IntegrityError: (psycopg2.IntegrityError) duplicate key value violates unique constraint "citing_content_unique_constraint"
73
            logger.error("Ignoring new citation target (citing '%s', content '%s' and timestamp '%s') because it already exists in the database (another new citation may have been processed before this one): '%s'", citation_change.citing, citation_change.content, citation_change.timestamp.ToJsonString(), str(e))
×
74
        else:
75
            logger.info("Stored new citation target (citing '%s', content '%s' and timestamp '%s')", citation_change.citing, citation_change.content, citation_change.timestamp.ToJsonString())
1✔
76
            stored = True
1✔
77
    return stored
1✔
78

79
def _update_citation_target_metadata_session(session, content, raw_metadata, parsed_metadata, curated_metadata={}, status=None, bibcode=None, associated=None):
1✔
80
    """
81
    Actual calls to database session for update_citation_target_metadata
82
    """
83
    citation_target = session.query(CitationTarget).filter(CitationTarget.content == content).first()
×
84
    if type(raw_metadata) is bytes:
×
85
        try:
×
86
            raw_metadata = raw_metadata.decode('utf-8')
×
87
        except UnicodeEncodeError:
×
88
            pass
×
89
    if citation_target.raw_cited_metadata != raw_metadata or citation_target.parsed_cited_metadata != parsed_metadata or \
×
90
            (status is not None and citation_target.status != status) or citation_target.curated_metadata != curated_metadata or \
91
        citation_target.bibcode != bibcode or citation_target.associated_works != associated:
92
        citation_target.raw_cited_metadata = raw_metadata
×
93
        citation_target.parsed_cited_metadata = parsed_metadata
×
94
        citation_target.curated_metadata = curated_metadata
×
95
        citation_target.bibcode = bibcode
×
96
        if(citation_target.associated_works != associated):
×
97
            logger.debug("associated works set for {} set from {} to {}".format(citation_target.content, citation_target.associated_works, associated))
×
98
            citation_target.associated_works = associated
×
99
        if status is not None:
×
100
            citation_target.status = status
×
101
        session.add(citation_target)
×
102
        session.commit()
×
103
        logger.info("Updated metadata for citation target '%s' (alternative bibcodes '%s')", content, ", ".join(curated_metadata.get('alternate_bibcode', [])))
×
104
        metadata_updated = True
×
105
        return metadata_updated
×
106

107
def update_citation_target_metadata(app, content, raw_metadata, parsed_metadata, curated_metadata={}, status=None, bibcode=None, associated=None):
1✔
108
    """
109
    Update metadata for a citation target
110
    """
111
    metadata_updated = False
×
112
    if not bibcode: bibcode = parsed_metadata.get('bibcode', None)
×
113
    with app.session_scope() as session:
×
114
        metadata_updated =  _update_citation_target_metadata_session(session, content, raw_metadata, parsed_metadata, curated_metadata, status=status, bibcode=bibcode, associated=associated)
×
115
    return metadata_updated
×
116

117
def write_citation_target_data(app, only_status=None):
1✔
118
    """
119
    Writes Canonical bibcodes to file for DataPipeline
120
    returns: Reference Network File
121
             Citation Network File
122
             Canonical Bibcodes File
123
             Facet Authors File
124
    """
125
    with app.session_scope() as session:
×
126
        if only_status:
×
127
            records_db = session.query(CitationTarget).filter_by(status=only_status).all()
×
128
            disable_filter = only_status in ['DISCARDED','EMITTABLE']
×
129
        else:
130
            records_db = session.query(CitationTarget).all()
×
131
            disable_filter = True
×
132
        bibcodes = [r.bibcode for r in records_db]
×
133
        records = _extract_key_citation_target_data(records_db, disable_filter=disable_filter)
×
134
        #writes canonical bibcodes to file.
135
        with open(file_names['bibcode']+".tmp", 'w') as f:
×
136
            f.write("\n".join(bibcodes))
×
137
        logger.info("Writing Citation/Reference Network Files.")
×
138
        _write_key_citation_reference_data(app, bibcodes)
×
139
        logger.info("Writing author data for {} records".format(len(records)))
×
140
        _write_key_citation_target_authors(app, records)
×
141
        for file in file_names:
×
142
            status = os.system('cp {} {}'.format(file_names[file]+".tmp", file_names[file]))
×
143
            if status == 0:    
×
144
                logger.info("Copied {}.tmp to {}".format(file_names[file], file_names[file]))
×
145
                os.system('rm {}'.format(file_names[file]+".tmp"))
×
146
                logger.debug('Removed {}.tmp file from /app/logs/output/'.format(file_names[file]))
×
147
            else:
148
                logger.warning("Copying file: {} Failed with exit code: {}".format(file_names[file], status))
×
149

150
def _write_key_citation_target_authors(app, records):
1✔
151
    """
152
    Writes facet author data to file.
153
    """
154
    try:
×
155
        with open(file_names['authors']+".tmp", 'w') as f:
×
156
            for rec in records:
×
157
                parsed_metadata = get_citation_target_metadata(app, rec['content']).get('parsed', {})
×
158
                if parsed_metadata:
×
159
                    f.write(str(rec['bibcode'])+"\t"+"\t".join(parsed_metadata.get('normalized_authors',''))+"\n")
×
160

161
        logger.info("Wrote file {} to disk.".format('authors'))
×
162
    except Exception as e:
×
163
        logger.exception("Failed to write file {}.".format(file_names['authors']+".tmp"))
×
164
        raise Exception("Failed to write file {}.".format(file_names['authors']+".tmp"))
×
165

166
def _write_key_citation_reference_data(app, bibcodes):
1✔
167
    """
168
    Write the two network files:
169
    Citation Network File: X cites software record
170
    Reference Network File: software record is cited by X
171

172
    Both are needed to integrate software records into classic record metrics.
173
    """
174
    try:
×
175
        with open(file_names['citations']+".tmp", 'w') as f, open(file_names['references']+".tmp", 'w') as g:
×
176
            for bib in bibcodes:
×
177
                cites=get_citations_by_bibcode(app, bib)
×
178
                for cite in cites:
×
179
                    g.write(str(cite)+"\t"+str(bib)+"\n")
×
180
                    f.write(str(bib)+"\t"+str(cite)+"\n")
×
181
        logger.info("Wrote files {} and {} to disk.".format(file_names['citations'], file_names['references']))
×
182
    except Exception as e:
×
183
        logger.exception("Failed to write files {} and {}.".format(file_names['citations']+".tmp", file_names['references']+".tmp"))
×
184
        raise Exception("Failed to write files {} and {}.".format(file_names['citations']+".tmp", file_names['references']+".tmp"))
×
185

186
def _update_citation_target_curator_message_session(session, content, msg):
1✔
187
    """
188
    Actual calls to database session for update_citation_target_metadata
189
    """
190
    citation_target = session.query(CitationTarget).filter(CitationTarget.content == content).first()
×
191
    if citation_target:
×
192
        citation_target.curated_metadata = msg
×
193
        session.add(citation_target)
×
194
        session.commit()
×
195
        msg_updated = True
×
196
        return msg_updated
×
197

198
def update_citation_target_curator_message(app, content, msg):
1✔
199
    """
200
    Update metadata for a citation target
201
    """
202
    msg_updated = False
×
203
    with app.session_scope() as session:
×
204
        msg_updated =  _update_citation_target_curator_message_session(session, content, msg)
×
205
    return msg_updated
×
206

207
def store_citation(app, citation_change, content_type, raw_metadata, parsed_metadata, status):
1✔
208
    """
209
    Stores a new citation in the DB
210
    """
211
    stored = False
1✔
212
    with app.session_scope() as session:
1✔
213
        citation = Citation()
1✔
214
        citation.citing = citation_change.citing
1✔
215
        citation.cited = citation_change.cited
1✔
216
        citation.content = citation_change.content
1✔
217
        citation.resolved = citation_change.resolved
1✔
218
        citation.timestamp = citation_change.timestamp.ToDatetime().replace(tzinfo=tzutc())
1✔
219
        citation.status = status
1✔
220
        session.add(citation)
1✔
221
        try:
1✔
222
            session.commit()
1✔
223
        except IntegrityError as e:
×
224
            # IntegrityError: (psycopg2.IntegrityError) duplicate key value violates unique constraint "citing_content_unique_constraint"
225
            logger.error("Ignoring new citation (citing '%s', content '%s' and timestamp '%s') because it already exists in the database when it is not supposed to (race condition?): '%s'", citation_change.citing, citation_change.content, citation_change.timestamp.ToJsonString(), str(e))
×
226
        else:
227
            logger.info("Stored new citation (citing '%s', content '%s' and timestamp '%s')", citation_change.citing, citation_change.content, citation_change.timestamp.ToJsonString())
1✔
228
            stored = True
1✔
229
    return stored
1✔
230

231
def store_reader_data(app, reader_change, status):
1✔
232
    """
233
    Stores a new citation in the DB
234
    """
235
    stored = False
×
236
    with app.session_scope() as session:
×
237
        reads = Reader()
×
238
        reads.bibcode = reader_change['bibcode']
×
239
        reads.reader = reader_change['reader']
×
240
        reads.timestamp = reader_change['timestamp']#.ToDatetime().replace(tzinfo=tzutc())
×
241
        reads.status = status
×
242
        session.add(reads)
×
243
        try:
×
244
            session.commit()
×
245
        except IntegrityError as e:
×
246
            # IntegrityError: (psycopg2.IntegrityError) duplicate key value violates unique constraint "citing_content_unique_constraint"
247
            logger.error("Ignoring new reader information (bibcode '%s', reader '%s') because it already exists in the database when it is not supposed to (race condition?): '%s'", reader_change['bibcode'], reader_change['readers'], str(e))
×
248
        else:
249
            logger.info("Stored new reader (bibcode: '%s', reader '%s' timestamp '%s)", reader_change['bibcode'], reader_change['reader'], reader_change['timestamp'])
×
250
            stored = True
×
251
    return stored
×
252

253
def get_citation_target_count(app):
1✔
254
    """
255
    Return the number of citation targets registered in the database
256
    """
257
    citation_target_count = 0
1✔
258
    with app.session_scope() as session:
1✔
259
        citation_target_count = session.query(CitationTarget).count()
1✔
260
    return citation_target_count
1✔
261

262
def get_citation_count(app):
1✔
263
    """
264
    Return the number of citations registered in the database
265
    """
266
    citation_count = 0
1✔
267
    with app.session_scope() as session:
1✔
268
        citation_count = session.query(Citation).count()
1✔
269
    return citation_count
1✔
270

271
def _extract_key_citation_target_data(records_db, disable_filter=False):
1✔
272
    """
273
    Convert list of CitationTarget to a list of dictionaries with key data
274
    """
275
    records = [
1✔
276
        {
277
            'bibcode': record_db.bibcode,
278
            'alternate_bibcode': record_db.parsed_cited_metadata.get('alternate_bibcode', []),
279
            'version': record_db.parsed_cited_metadata.get('version', None),
280
            'content': record_db.content,
281
            'content_type': record_db.content_type,
282
            'curated_metadata': record_db.curated_metadata if record_db.curated_metadata is not None else {},
283
            'associated_works': record_db.associated_works,
284
        }
285
        for record_db in records_db
286
        if disable_filter or record_db.parsed_cited_metadata.get('bibcode', None) is not None
287
    ]
288
    return records
1✔
289

290
def get_citation_targets_by_bibcode(app, bibcodes, only_status='REGISTERED'):
1✔
291
    """
292
    Return a list of dict with the requested citation targets based on their bibcode
293
    """
294
    with app.session_scope() as session:
1✔
295
        records_db = []
1✔
296
        for bibcode in bibcodes:
1✔
297
            if only_status:
1✔
298
                record_db = session.query(CitationTarget).filter(CitationTarget.bibcode == bibcode).filter_by(status=only_status).first()
1✔
299
            else:
300
                record_db = session.query(CitationTarget).filter(CitationTarget.bibcode == bibcode).first()
×
301
            if record_db:
1✔
302
                records_db.append(record_db)
×
303

304
        if only_status:
1✔
305
            disable_filter = only_status == 'DISCARDED'
1✔
306
        else:
307
            disable_filter = True
×
308
        records = _extract_key_citation_target_data(records_db, disable_filter=disable_filter)
1✔
309
    return records
1✔
310

311
def get_citation_targets_by_alt_bibcode(app, alt_bibcodes, only_status='REGISTERED'):
1✔
312
    """
313
    Return a list of dict with the requested citation targets based on their bibcode
314
    """
315
    with app.session_scope() as session:
1✔
316
        records_db = []
1✔
317
        for alt_bibcode in alt_bibcodes:
1✔
318
            if only_status:
1✔
319
                record_db = session.query(CitationTarget).filter(CitationTarget.parsed_cited_metadata['alternate_bibcode'].contains([alt_bibcode])).filter_by(status=only_status).first()
1✔
320
            else:
321
                record_db = session.query(CitationTarget).filter(CitationTarget.parsed_cited_metadata['alternate_bibcode'].contains([alt_bibcode])).first()
×
322
            if record_db:
1✔
323
                records_db.append(record_db)
×
324

325
        if only_status:
1✔
326
            disable_filter = only_status == 'DISCARDED'
1✔
327
        else:
328
            disable_filter = True
×
329
        records = _extract_key_citation_target_data(records_db, disable_filter=disable_filter)
1✔
330
    return records
1✔
331

332
def get_citation_targets_by_doi(app, dois, only_status='REGISTERED'):
1✔
333
    """
334
    Return a list of dict with the requested citation targets based on their DOI
335
    - Records without a bibcode in the database will not be returned
336
    """
337
    with app.session_scope() as session:
1✔
338
        if only_status:
1✔
339
            records_db = session.query(CitationTarget).filter(CitationTarget.content.in_(dois)).filter_by(status=only_status).all()
1✔
340
            disable_filter = only_status == 'DISCARDED'
1✔
341
        else:
342
            records_db = session.query(CitationTarget).filter(CitationTarget.content.in_(dois)).all()
×
343
            disable_filter = True
×
344
        records = _extract_key_citation_target_data(records_db, disable_filter=disable_filter)
1✔
345
    return records
1✔
346

347
def _get_citation_targets_session(session, only_status='REGISTERED'):
1✔
348
    """
349
    Actual calls to database session for get_citation_targets
350
    """
351
    if only_status:
×
352
        records_db = session.query(CitationTarget).filter_by(status=only_status).all()
×
353
        disable_filter = only_status in ['DISCARDED', 'EMITTABLE']
×
354
    else:
355
        records_db = session.query(CitationTarget).all()
×
356
        disable_filter = True
×
357
    records = _extract_key_citation_target_data(records_db, disable_filter=disable_filter)
×
358
    return records
×
359
    
360
def get_associated_works_by_doi(app, all_versions_doi, only_status='REGISTERED'):
1✔
361
    dois = all_versions_doi['versions']
×
362
    concept_doi = all_versions_doi['concept_doi'].lower()
×
363
    try:
×
364
        versions = {"Version "+str(records.get('version', '')): records.get('bibcode', '') for records in get_citation_targets_by_doi(app, dois, only_status)}
×
365
        root_ver = get_citation_targets_by_doi(app, [concept_doi], only_status)
×
366
        if root_ver != []:
×
367
            root_record = {'Software Source':root_ver[0]['bibcode']}
×
368
            versions.update(root_record)
×
369
        if versions != {}:
×
370
            return versions
×
371
        else:
372
            logger.info('No associated works for %s in database', dois[0])
×
373
            return None
×
374
    except:
×
375
        logger.info('No associated works for %s in database', dois[0])
×
376
        return None
×
377
        
378
def get_citation_targets(app, only_status='REGISTERED'):
1✔
379
    """
380
    Return a list of dict with all citation targets (or only the registered ones)
381
    - Records without a bibcode in the database will not be returned
382
    """
383
    with app.session_scope() as session:
×
384
        records = _get_citation_targets_session(session, only_status)
×
385
    return records
×
386

387
def _get_citation_target_metadata_session(session, doi, citation_in_db, metadata, curate=True, concept=False):
1✔
388
    """
389
    Actual calls to database session for get_citation_target_metadata
390
    """
391
    citation_target = session.query(CitationTarget).filter_by(content=doi).first()
1✔
392
    citation_target_in_db = citation_target is not None
1✔
393
    if citation_target_in_db:
1✔
394
        metadata['raw'] = citation_target.raw_cited_metadata
1✔
395
        metadata['curated'] = citation_target.curated_metadata if citation_target.curated_metadata is not None else {}
1✔
396
        metadata['status'] = citation_target.status
1✔
397
        if curate:
1✔
398
            #modified metadata updates every field that isn't the doi or the canonical bibcode
399
            metadata['parsed'] = generate_modified_metadata(citation_target.parsed_cited_metadata, metadata['curated']) if citation_target.parsed_cited_metadata is not None else {}
1✔
400
            #This line replaces the parsed bibcode with the bibcode column
401
            if citation_target.bibcode: metadata['parsed'].update({'bibcode': citation_target.bibcode})
1✔
402
        else:
403
            metadata['parsed'] = citation_target.parsed_cited_metadata if citation_target.parsed_cited_metadata is not None else {}
×
404
        if concept:
1✔
NEW
405
            metadata['parsed']['pubdate']=citation_target.versions[0].parsed_cited_metadata.get('pubdate')
×
406
        metadata['associated'] = citation_target.associated_works
1✔
407
    return metadata
1✔
408

409
def get_citation_target_metadata(app, doi, curate=True, concept=False):
1✔
410
    """
411
    If the citation target already exists in the database, return the raw and
412
    parsed metadata together with the status of the citation target in the
413
    database.
414
    If not, return an empty dictionary.
415
    """
416
    citation_in_db = False
1✔
417
    metadata = {}
1✔
418
    with app.session_scope() as session:
1✔
419
        metadata = _get_citation_target_metadata_session(session, doi, citation_in_db, metadata, curate, concept) 
1✔
420
    return metadata
1✔
421

422
def get_citation_target_entry_date(app, doi):
1✔
423
    """
424
    If the citation target already exists in the database, return the entry date.
425
    If not, return None.
426
    """
427
    citation_in_db = False
1✔
428
    entry_date = None
1✔
429
    with app.session_scope() as session:
1✔
430
        citation_target = session.query(CitationTarget).filter_by(content=doi).first()
1✔
431
        citation_target_in_db = citation_target is not None
1✔
432
        if citation_target_in_db:
1✔
433
            entry_date = citation_target.created
1✔
434
    return entry_date
1✔
435

436
def get_citations_by_bibcode(app, bibcode):
1✔
437
    """
438
    Transform bibcode into content and get all the citations by content.
439
    It will ignore DELETED and DISCARDED citations and citations targets.
440
    """
441
    citations = []
1✔
442
    if bibcode is not None:
1✔
443
        with app.session_scope() as session:
1✔
444
            #bibcode = "2015zndo.....14475J"
445
            citation_target = session.query(CitationTarget).filter(CitationTarget.bibcode == bibcode).filter_by(status="REGISTERED").first()
1✔
446
            if citation_target:
1✔
447
                dummy_citation_change = CitationChange(content=citation_target.content)
1✔
448
                citations = get_citations(app, dummy_citation_change)
1✔
449
    return citations
1✔
450

451
def get_citations(app, citation_change):
1✔
452
    """
453
    Return all the citations (bibcodes) to a given content.
454
    It will ignore DELETED and DISCARDED citations.
455
    """
456
    with app.session_scope() as session:
1✔
457
        citation_bibcodes = [r.citing for r in session.query(Citation).filter_by(content=citation_change.content, status="REGISTERED").all()]
1✔
458
    return citation_bibcodes
1✔
459

460
def get_citation_target_readers(app, bibcode, alt_bibcodes):
1✔
461
    """
462
    Return all the Reader hashes for a given content.
463
    It will ignore DELETED and DISCARDED hashes.
464
    """
465
    with app.session_scope() as session:
1✔
466
        reader_hashes = [r.reader for r in session.query(Reader).filter_by(bibcode=bibcode, status="REGISTERED").all()]
1✔
467
        for alt_bibcode in alt_bibcodes:
1✔
468
            reader_hashes = reader_hashes + [r.reader for r in session.query(Reader).filter_by(bibcode=alt_bibcode, status="REGISTERED").all()]
×
469

470
    return reader_hashes
1✔
471

472
def generate_modified_metadata(parsed_metadata, curated_entry):
1✔
473
    """
474
    modify parsed_metadata with any curated metadata. return results.
475
    """
476
    modified_metadata = parsed_metadata.copy()
1✔
477
    bad_keys=[]
1✔
478
    if not modified_metadata.get('alternate_bibcode', None): modified_metadata.update({'alternate_bibcode':[]})
1✔
479
    for key in curated_entry.keys():
1✔
480
        if key not in ['bibcode', 'doi', 'error']:
×
481
            if key in modified_metadata.keys():
×
482
                try:
×
483
                    modified_metadata[key] = curated_entry[key]
×
484
                except Exception as e:
×
485
                    logger.error("Failed setting {} for {} with Exception: {}.".format(key, parsed_metadata.get('bibcode'), e))
×
486
            else:
487
                logger.warn("{} is not a valid entry for parsed_cited_metadata. Flagging key for removal.".format(key))
×
488
                bad_keys.append(key)
×
489
    #remove bad keys from curated entries.
490
    for key in bad_keys:
1✔
491
        curated_entry.pop(key)
×
492
    return modified_metadata
1✔
493

494
def citation_already_exists(app, citation_change):
1✔
495
    """
496
    Is this citation already stored in the DB?
497
    """
498
    citation_in_db = False
1✔
499
    with app.session_scope() as session:
1✔
500
        citation = session.query(Citation).filter_by(citing=citation_change.citing, content=citation_change.content).first()
1✔
501
        citation_in_db = citation is not None
1✔
502
    return citation_in_db
1✔
503

504
def update_citation(app, citation_change):
1✔
505
    """
506
    Update cited information
507
    """
508
    updated = False
1✔
509
    with app.session_scope() as session:
1✔
510
        citation = session.query(Citation).with_for_update().filter_by(citing=citation_change.citing, content=citation_change.content).first()
1✔
511
        change_timestamp = citation_change.timestamp.ToDatetime().replace(tzinfo=tzutc()) # Consider it as UTC to be able to compare it
1✔
512
        if citation.timestamp < change_timestamp:
1✔
513
            #citation.citing = citation_change.citing # This should not change
514
            #citation.content = citation_change.content # This should not change
515
            citation.cited = citation_change.cited
1✔
516
            citation.resolved = citation_change.resolved
1✔
517
            citation.timestamp = change_timestamp
1✔
518
            session.add(citation)
1✔
519
            session.commit()
1✔
520
            updated = True
1✔
521
            logger.info("Updated citation (citing '%s', content '%s' and timestamp '%s')", citation_change.citing, citation_change.content, citation_change.timestamp.ToJsonString())
1✔
522
        else:
523
            logger.info("Ignoring citation update (citing '%s', content '%s' and timestamp '%s') because received timestamp is equal/older than timestamp in database", citation_change.citing, citation_change.content, citation_change.timestamp.ToJsonString())
×
524
    return updated
1✔
525

526
def mark_citation_as_deleted(app, citation_change):
1✔
527
    """
528
    Update status to DELETED for a given citation
529
    """
530
    marked_as_deleted = False
1✔
531
    previous_status = None
1✔
532
    with app.session_scope() as session:
1✔
533
        citation = session.query(Citation).with_for_update().filter_by(citing=citation_change.citing, content=citation_change.content).first()
1✔
534
        previous_status = citation.status
1✔
535
        change_timestamp = citation_change.timestamp.ToDatetime().replace(tzinfo=tzutc()) # Consider it as UTC to be able to compare it
1✔
536
        if citation.timestamp < change_timestamp:
1✔
537
            citation.status = "DELETED"
1✔
538
            citation.timestamp = change_timestamp
1✔
539
            session.add(citation)
1✔
540
            session.commit()
1✔
541
            marked_as_deleted = True
1✔
542
            logger.info("Marked citation as deleted (citing '%s', content '%s' and timestamp '%s')", citation_change.citing, citation_change.content, citation_change.timestamp.ToJsonString())
1✔
543
        else:
544
            logger.info("Ignoring citation deletion (citing '%s', content '%s' and timestamp '%s') because received timestamp is equal/older than timestamp in database", citation_change.citing, citation_change.content, citation_change.timestamp.ToJsonString())
×
545
    return marked_as_deleted, previous_status
1✔
546

547
def mark_reader_as_deleted(app, reader_change):
1✔
548
    """
549
    Update status to DELETED for a given reader
550
    """
551
    marked_as_deleted = False
×
552
    previous_status = None
×
553
    with app.session_scope() as session:
×
554
        reader = session.query(Reader).with_for_update().filter_by(bibcode=reader_change['bibcode'], reader=reader_change['reader']).first()
×
555
        previous_status = reader.status
×
556
        change_timestamp = reader_change['timestamp']#.ToDatetime().replace(tzinfo=tzutc()) # Consider it as UTC to be able to compare it
×
557
        if str(reader.timestamp) < reader_change['timestamp']:
×
558
            reader.status = "DELETED"
×
559
            reader.timestamp = reader_change['timestamp']
×
560
            session.add(reader)
×
561
            session.commit()
×
562
            marked_as_deleted = True
×
563
            logger.info("Marked reader as deleted (citing '%s', content '%s')", reader_change['bibcode'], reader_change['reader'])#, reader_change.timestamp.ToJsonString())
×
564
        else:
565
            logger.info("Ignoring reader deletion (citing '%s', content '%s' and timestamp '%s') because received timestamp is equal/older than timestamp in database", reader_change['bibcode'], reader_change['reader'], reader_change['timestamp'])
×
566
    return marked_as_deleted, previous_status
×
567

568
def mark_all_discarded_citations_as_registered(app, content):
1✔
569
    """
570
    Update status to REGISTERED for all discarded citations of a given content
571
    """
572
    marked_as_registered = False
×
573
    previous_status = None
×
574
    with app.session_scope() as session:
×
575
        citations = session.query(Citation).with_for_update().filter_by(status='DISCARDED', content=content).all()
×
576
        for citation in citations:
×
577
            citation.status = 'REGISTERED'
×
578
            session.add(citation)
×
579
        session.commit()
×
580

581
def populate_bibcode_column(main_session):
1✔
582
    """
583
    Pulls all citation targets from DB and populates the bibcode column using parsed metadata
584
    """
585
    logger.debug("Collecting Citation Targets")
×
586
    records = _get_citation_targets_session(main_session, only_status = None)
×
587
    for record in records:
×
588
        content = record.get('content', None)
×
589
        bibcode = record.get('bibcode', None)
×
590
        associated = record.get('associate_works', {})
×
591
        logger.debug("Collecting metadata for {}".format(record.get('content')))
×
592
        citation_in_db = False
×
593
        metadata = {}
×
594
        metadata = _get_citation_target_metadata_session(main_session, content, citation_in_db, metadata, curate=False)
×
595
        if metadata:
×
596
            logger.debug("Populating Bibcode field for {}".format(record.get('content')))
×
597
            raw_metadata = metadata.get('raw', {})
×
598
            parsed_metadata = metadata.get('parsed', {})
×
599
            curated_metadata = metadata.get('curated',{})
×
600
            modified_metadata = generate_modified_metadata(parsed_metadata, curated_metadata)
×
601
            status = metadata.get('status', None)
×
602
            #Allows for the column to be repopulated even if curated_metadata exists.
603
            if curated_metadata:
×
604
                zenodo_bibstem = "zndo"
×
605
                bibcode = doi.build_bibcode(modified_metadata, doi.zenodo_doi_re, zenodo_bibstem)
×
606
                bibcode = parsed_metadata['bibcode'][:4] + bibcode[4:]
×
607
            else:
608
                bibcode = parsed_metadata.get('bibcode',None)
×
609

610
            _update_citation_target_metadata_session(main_session, content, raw_metadata, parsed_metadata, curated_metadata, status, bibcode, associated)
×
611

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc