• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

adsabs / ADSCitationCapture / 11922386106

19 Nov 2024 09:48PM UTC coverage: 70.135% (+1.6%) from 68.508%
11922386106

push

github

web-flow
Fix branch name for default branch. (#71)

2501 of 3566 relevant lines covered (70.13%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

12.21
/ADSCitationCapture/api.py
1

2
import os
1✔
3
import requests
1✔
4
import ADSCitationCapture.url as url
1✔
5
import urllib.request, urllib.parse, urllib.error
1✔
6
import math
1✔
7
from adsputils import setup_logging
1✔
8

9
# ============================= INITIALIZATION ==================================== #
10
# - Use app logger:
11
#import logging
12
#logger = logging.getLogger('ads-citation-capture')
13
# - Or individual logger for this file:
14
from adsputils import setup_logging, load_config
1✔
15
proj_home = os.path.realpath(os.path.join(os.path.dirname(__file__), '../'))
1✔
16
config = load_config(proj_home=proj_home)
1✔
17
logger = setup_logging(__name__, proj_home=proj_home,
1✔
18
                        level=config.get('LOGGING_LEVEL', 'INFO'),
19
                        attach_stdout=config.get('LOG_STDOUT', False))
20

21

22
# =============================== FUNCTIONS ======================================= #
23
def _request_citations_page(app, bibcode, start, rows):
1✔
24
    params = urllib.parse.urlencode({
×
25
                'fl': 'bibcode',
26
                'q': 'citations(bibcode:{0})'.format(bibcode),
27
                'start': start,
28
                'rows': rows,
29
                'sort': 'date desc, bibcode desc',
30
            })
31
    headers = {}
×
32
    headers["Authorization"] = "Bearer {}".format(app.conf['ADS_API_TOKEN'])
×
33
    url = app.conf['ADS_API_URL']+"search/query?"+params
×
34
    r_json = {}
×
35
    try:
×
36
        r = requests.get(url, headers=headers)
×
37
    except:
×
38
        logger.error("Search API request failed for citations (start: %i): %s", start, bibcode)
×
39
        raise
×
40
    if not r.ok:
×
41
        msg = "Search API request with error code '{}' for bibcode (start: {}): {}".format(r.status_code, start, bibcode)
×
42
        logger.error(msg)
×
43
        raise Exception(msg)
×
44
    else:
45
        try:
×
46
            r_json = r.json()
×
47
        except ValueError:
×
48
            msg = "No JSON object could be decoded from Search API response when searching canonical bibcodes (start: {}) for: {}".format(start, bibcode)
×
49
            logger.error(msg)
×
50
            raise Exception(msg)
×
51
        else:
52
            return r_json
×
53
    return r_json
×
54

55
def request_existing_citations(app, bibcode):
1✔
56
    start = 0
×
57
    rows = 25
×
58
    existing_citation_bibcodes = []
×
59
    n_existing_citations = None
×
60
    while True:
61
        retries = 0
×
62
        while True:
63
            try:
×
64
                answer = _request_citations_page(app, bibcode, start, rows)
×
65
            except:
×
66
                if retries < 3:
×
67
                    logger.info("Retrying Search API request for citations (start: %i): %s", start, bibcode)
×
68
                    retries += 1
×
69
                else:
70
                    logger.exception("Failed Search API request for citations (start: %i): %s", start, bibcode)
×
71
                    raise
×
72
            else:
73
                break
×
74
        existing_citation_bibcodes += answer['response']['docs']
×
75
        if n_existing_citations is None:
×
76
            n_existing_citations = answer['response']['numFound']
×
77
        start += rows
×
78
        if start > int(n_existing_citations):
×
79
            break
×
80
    # Transform from list of dict to list of bibcodes:
81
    existing_citation_bibcodes = [b['bibcode'] for b in existing_citation_bibcodes]
×
82
    return existing_citation_bibcodes
×
83

84
def get_canonical_bibcodes(app, bibcodes, timeout=30):
1✔
85
    """
86
    Convert input bibcodes into their canonical form if they exist, hence
87
    the returned list can be smaller than the input bibcode list
88
    """
89
    chunk_size = 2000 # Max number of records supported by bigquery
×
90
    bibcodes_chunks = [bibcodes[i * chunk_size:(i + 1) * chunk_size] for i in range(int(round(((len(bibcodes) + chunk_size - 1))) / chunk_size ))]
×
91
    canonical_bibcodes = []
×
92
    total_n_chunks = len(bibcodes_chunks)
×
93
    # Execute multiple requests to bigquery if the list of bibcodes is longer than the accepted maximum
94
    for n_chunk, bibcodes_chunk in enumerate(bibcodes_chunks):
×
95
        retries = 0
×
96
        while True:
97
            try:
×
98
                canonical_bibcodes += _get_canonical_bibcodes(app, n_chunk, total_n_chunks, bibcodes_chunk, timeout)
×
99
            except:
×
100
                if retries < 3:
×
101
                    logger.info("Retrying BigQuery API request for bibcodes (chunk: %i/%i): %s", n_chunk+1, total_n_chunks, " ".join(bibcodes_chunk))
×
102
                    retries += 1
×
103
                else:
104
                    logger.exception("Failed BigQuery API request for bibcodes (chunk: %i/%i): %s", n_chunk+1, total_n_chunks, " ".join(bibcodes_chunk))
×
105
                    raise
×
106
            else:
107
                break
×
108
    return canonical_bibcodes
×
109

110
def _get_canonical_bibcodes(app, n_chunk, total_n_chunks, bibcodes_chunk, timeout):
1✔
111
    canonical_bibcodes = []
×
112
    params = urllib.parse.urlencode({
×
113
                'fl': 'bibcode',
114
                'q': '*:*',
115
                'wt': 'json',
116
                'fq':'{!bitset}',
117
                'rows': len(bibcodes_chunk),
118
            })
119
    headers = {}
×
120
    headers["Authorization"] = "Bearer {}".format(app.conf['ADS_API_TOKEN'])
×
121
    headers["Content-Type"] = "big-query/csv"
×
122
    url = app.conf['ADS_API_URL']+"search/bigquery?"+params
×
123
    r_json = {}
×
124
    data = "bibcode\n" + "\n".join(bibcodes_chunk)
×
125
    try:
×
126
        r = requests.post(url, headers=headers, data=data, timeout=timeout)
×
127
    except:
×
128
        logger.error("BigQuery API request failed for bibcodes (chunk: %i/%i): %s", n_chunk+1, total_n_chunks, " ".join(bibcodes_chunk))
×
129
        raise
×
130
    if not r.ok:
×
131
        msg = "BigQuery API request with error code '{}' for bibcodes (chunk: {}/{}): {}".format(r.status_code, n_chunk+1, total_n_chunks, " ".join(bibcodes_chunk))
×
132
        logger.error(msg)
×
133
        raise Exception(msg)
×
134
    else:
135
        try:
×
136
            r_json = r.json()
×
137
        except ValueError:
×
138
            msg = "No JSON object could be decoded from BigQuery API response when searching canonical bibcodes (chunk: {}/{}) for: {}".format(n_chunk+1, total_n_chunks, " ".join(bibcodes_chunk))
×
139
            logger.error(msg)
×
140
            raise Exception(msg)
×
141
        else:
142
            for paper in r_json.get('response', {}).get('docs', []):
×
143
                canonical_bibcodes.append(paper['bibcode'])
×
144
    return canonical_bibcodes
×
145

146
def get_canonical_bibcode(app, bibcode, timeout=30):
1✔
147
    """
148
    Convert input bibcodes into their canonical form if they exist
149
    """
150
    canonical = get_canonical_bibcodes(app, [bibcode], timeout=timeout)
×
151
    if len(canonical) == 0:
×
152
        return None
×
153
    else:
154
        return canonical[0]
×
155

156
def get_github_metadata(app, citation_url):
1✔
157
    """
158
    Retrieve License and related metadata from GitHub API
159
    """
160
    license_name = ""
×
161
    license_url = ""
×
162
    headers = {}
×
163
    headers['User-Agent'] = "ads-citation-capture"
×
164
    headers['Authorization'] = "token {}".format(app.conf['GITHUB_API_TOKEN'])
×
165

166
    if url.is_github(citation_url) and not url.is_gist(citation_url):
×
167
        github_api = None
×
168
        try:
×
169
            path = urllib.parse.urlparse(citation_url).path.split("/")
×
170
            github_api = app.conf['GITHUB_API_URL']+"repos/{}/{}/license".format(path[1],path[2]) if path[1] else None
×
171

172
        except Exception as e:
×
173
            msg = "Failed to parse :{} with Exception: {}".format(citation_url,e)
×
174
            logger.error(msg)
×
175
        
176
        if github_api:
×
177
            try:
×
178
                git_return = requests.get(github_api, headers=headers)
×
179
                json_return = git_return.json()
×
180
                license_name = json_return["license"]["key"] 
×
181
                license_url = json_return["license"]["url"] if json_return["license"]["url"] is not None else ""
×
182
            except:
×
183
                msg = "Request to {} failed with status code: {}".format(github_api,git_return.status_code)
×
184
                logger.error(msg)
×
185

186
    else:
187
        msg = "URL:{} is not a github repository returning default license info.".format(citation_url)
×
188
        logger.error(msg)
×
189

190
    return {'license_name': license_name, 'license_url': license_url}
×
191

192

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc