• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

adsabs / ADSCitationCapture / 11922386106

19 Nov 2024 09:48PM UTC coverage: 70.135% (+1.6%) from 68.508%
11922386106

push

github

web-flow
Fix branch name for default branch. (#71)

2501 of 3566 relevant lines covered (70.13%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.07
/ADSCitationCapture/webhook.py
1
import requests
1✔
2
import json
1✔
3
from adsputils import setup_logging
1✔
4
import adsmsg
1✔
5
import datetime
1✔
6
import os
1✔
7
import errno
1✔
8
import re
1✔
9

10
# ============================= INITIALIZATION ==================================== #
11
# - Use app logger:
12
#import logging
13
#logger = logging.getLogger('ads-citation-capture')
14
# - Or individual logger for this file:
15
from adsputils import setup_logging, load_config
1✔
16
proj_home = os.path.realpath(os.path.join(os.path.dirname(__file__), '../'))
1✔
17
config = load_config(proj_home=proj_home)
1✔
18
logger = setup_logging(__name__, proj_home=proj_home,
1✔
19
                        level=config.get('LOGGING_LEVEL', 'INFO'),
20
                        attach_stdout=config.get('LOG_STDOUT', False))
21

22

23
# =============================== FUNCTIONS ======================================= #
24
def _build_data(event_type, original_relationship_name, source_bibcode, target_id, target_id_schema, target_id_url, source_type="software", source_license=""):
1✔
25
    now = datetime.datetime.now()
1✔
26
    data = {
1✔
27
        "RelationshipType": {
28
            "SubTypeSchema": "DataCite",
29
            "SubType": original_relationship_name,
30
            "Name": "References"
31
        },
32
        "Source": {
33
            "Identifier": {
34
                "IDScheme": "ads",
35
                "IDURL": "https://ui.adsabs.harvard.edu/abs/{}".format(source_bibcode),
36
                "ID": source_bibcode
37
            },
38
            "Type": {
39
                "Name": "unknown"
40
            }
41
        },
42
        "LicenseURL": source_license,
43
        "Target": {
44
            "Identifier": {
45
                "IDScheme": target_id_schema,
46
                "IDURL": target_id_url,
47
                "ID": target_id
48
            },
49
            "Type": {
50
                "Name": source_type
51
            }
52
        },
53
        "LinkPublicationDate": now.strftime("%Y-%m-%d"),
54
        "LinkProvider": [
55
            {
56
                "Name": "SAO/NASA Astrophysics Data System"
57
            }
58
        ]
59
    }
60
    return data
1✔
61

62
def _target_elements(citation_change):
1✔
63
    target_id = citation_change.content
1✔
64
    if citation_change.content_type == adsmsg.CitationChangeContentType.doi:
1✔
65
        target_id_schema = "doi"
1✔
66
        target_id_url = "https://doi.org/{}".format(target_id)
1✔
67
    elif citation_change.content_type == adsmsg.CitationChangeContentType.pid:
1✔
68
        target_id_schema = "ascl"
1✔
69
        target_id_url = "https://ascl.net/{}".format(target_id)
1✔
70
    elif citation_change.content_type == adsmsg.CitationChangeContentType.url:
1✔
71
        target_id_schema = "url"
1✔
72
        target_id_url = citation_change.content
1✔
73
    else:
74
        raise Exception("Unknown citation change data type")
×
75
    return target_id, target_id_schema, target_id_url
1✔
76

77
def _source_cites_target(citation_change, parsed_metadata, deleted=False):
1✔
78
    if deleted:
1✔
79
        event_type = "relation_deleted"
×
80
    else:
81
        event_type = "relation_created"
1✔
82
    original_relationship_name = "Cites"
1✔
83
    target_id, target_id_schema, target_id_url = _target_elements(citation_change)
1✔
84
    source_bibcode = citation_change.citing
1✔
85
    source_type = parsed_metadata.get('doctype', "").lower()
1✔
86
    source_license = parsed_metadata.get('license_url', "")
1✔
87
    data = _build_data(event_type, original_relationship_name, source_bibcode, target_id, target_id_schema, target_id_url, source_type, source_license)
1✔
88
    return data
1✔
89

90
#def _source_is_identical_to_target(citation_change, deleted=False):
91
    #if deleted:
92
        #event_type = "relation_deleted"
93
    #else:
94
        #event_type = "relation_created"
95
    #original_relationship_name = "IsIdenticalTo"
96
    #source_bibcode = citation_change.cited
97
    #target_id, target_id_schema, target_id_url = _target_elements(citation_change)
98
    #data = _build_data(event_type, original_relationship_name, source_bibcode, target_id, target_id_schema, target_id_url)
99
    #return data
100

101
def citation_change_to_event_data(citation_change, parsed_metadata):
1✔
102
    if citation_change.status == adsmsg.Status.new:
1✔
103
        return _source_cites_target(citation_change, parsed_metadata, deleted=False)
1✔
104
    elif citation_change.status == adsmsg.Status.updated and citation_change.cited != '...................' and citation_change.resolved:
1✔
105
        ### Only accept cited bibcode if score is 1 (resolved == True), if not the bibcode is just an unresolved attempt
106
        ##return _source_is_identical_to_target(citation_change)
107
        # Citation change shows that the process building the raw input file matched the DOI with a bibcode,
108
        # but this matching should be wrong since that process does not have access to software records created by this pipeline
109
        # and no event should be emitted
110
        logger.warning("Ignoring citation change due to bad bibcode - DOI matching (IsIdenticalTo event will not be emitted): {}".format(citation_change))
1✔
111
        return {}
1✔
112
    elif citation_change.status == adsmsg.Status.deleted:
1✔
113
        ##return _source_cites_target(citation_change, deleted=True)
114
        ### https://github.com/asclepias/asclepias-broker/issues/24
115
        logger.error("The broker does not support deletions yet: citing='{}', cited='{}', content='{}'".format(citation_change.citing, citation_change.cited, citation_change.content))
1✔
116
        return {}
1✔
117
    else:
118
        logger.warning("Citation change does not match any defined events: {}".format(citation_change))
1✔
119
        return {}
1✔
120

121
def identical_bibcodes_event_data(source_bibcode, target_bibcode, deleted=False):
1✔
122
    if deleted:
1✔
123
        event_type = "relation_deleted"
×
124
    else:
125
        event_type = "relation_created"
1✔
126
    original_relationship_name = "IsIdenticalTo"
1✔
127
    source_bibcode = source_bibcode
1✔
128
    target_id_schema = "ads"
1✔
129
    target_id_url = "https://ui.adsabs.harvard.edu/abs/{}".format(target_bibcode)
1✔
130
    target_id = target_bibcode
1✔
131
    data = _build_data(event_type, original_relationship_name, source_bibcode, target_id, target_id_schema, target_id_url)
1✔
132
    return data
1✔
133

134
def identical_bibcode_and_doi_event_data(source_bibcode, target_doi, deleted=False):
1✔
135
    if deleted:
1✔
136
        event_type = "relation_deleted"
×
137
    else:
138
        event_type = "relation_created"
1✔
139
    original_relationship_name = "IsIdenticalTo"
1✔
140
    source_bibcode = source_bibcode
1✔
141
    target_id_schema = "doi"
1✔
142
    target_id_url = "https://doi.org/{}".format(target_doi)
1✔
143
    target_id = target_doi
1✔
144
    data = _build_data(event_type, original_relationship_name, source_bibcode, target_id, target_id_schema, target_id_url)
1✔
145
    return data
1✔
146

147

148
def emit_event(ads_webhook_url, ads_webhook_auth_token, event_data, timeout=30):
1✔
149
    emitted = False
1✔
150
    if event_data:
1✔
151
        data = [event_data]
1✔
152
        headers = {}
1✔
153
        headers["Content-Type"] = "application/json"
1✔
154
        headers["Authorization"] = "Bearer {}".format(ads_webhook_auth_token)
1✔
155
        r = requests.post(ads_webhook_url, data=json.dumps(data), headers=headers, timeout=timeout)
1✔
156
        if not r.ok:
1✔
157
            logger.error("Emit event failed with status code '{}': {}".format(r.status_code, r.content))
×
158
            raise Exception("HTTP Post to '{}' failed: {}".format(ads_webhook_url, json.dumps(data)))
×
159
        else:
160
            relationship = event_data.get("RelationshipType", {}).get("SubType", None)
1✔
161
            source_id = event_data.get("Source", {}).get("Identifier", {}).get("ID", None)
1✔
162
            target_id = event_data.get("Target", {}).get("Identifier", {}).get("ID", None)
1✔
163
            logger.info("Emitted event (relationship '%s', source '%s' and target '%s')", relationship, source_id, target_id)
1✔
164
            emitted = True
1✔
165
    return emitted
1✔
166

167
def _mkdir_p(path):
1✔
168
    """
169
    Creates a directory. Same behaviour as 'mkdir -p'.
170
    """
171
    try:
×
172
        os.makedirs(path)
×
173
    except OSError as exc: # Python >2.5
×
174
        if exc.errno == errno.EEXIST:
×
175
            pass
×
176
        else:
177
            raise
×
178

179
def dump_event(event_data, prefix="emitted"):
1✔
180
    """
181
    Save the event in JSON format in the log directory
182
    """
183
    dump_created = False
×
184
    if event_data:
×
185
        try:
×
186
            logs_dirname = os.path.dirname(logger.handlers[0].baseFilename)
×
187
        except:
×
188
            logger.exception("Logger's target directory not found")
×
189
        else:
190
            base_dirname = os.path.join(logs_dirname, prefix)
×
191
            now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
×
192
            relationship = event_data.get("RelationshipType", {}).get("SubType", None)
×
193
            source_id = event_data.get("Source", {}).get("Identifier", {}).get("ID", "")
×
194
            target_id = event_data.get("Target", {}).get("Identifier", {}).get("ID", "")
×
195
            source_id = re.sub(r'[^-\w\s]+', '-', source_id)
×
196
            target_id = re.sub(r'[^-\w\s]+', '-', target_id)
×
197
            logger.info("Dumped event (relationship '%s', source '%s' and target '%s')", relationship, source_id, target_id)
×
198
            filename = "{}_{}_{}_{}.json".format(now, source_id, relationship, target_id)
×
199
            try:
×
200
                if not os.path.exists(base_dirname):
×
201
                    _mkdir_p(base_dirname)
×
202
                json.dump(event_data, open(os.path.join(base_dirname, filename), "w"), indent=2)
×
203
            except:
×
204
                logger.exception("Impossible to dump event")
×
205
            else:
206
                dump_created = True
×
207
    return dump_created
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc