• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IdentityPython / pyFF / 760

14 Apr 2021 - 8:28 coverage: 73.418% (-0.04%) from 73.455%
760

Pull #214

travis-ci

web-flow
Merge 3757d7dfb into 648c5bc7e
Pull Request #214: minor fixes

19 of 23 new or added lines in 4 files covered. (82.61%)

2 existing lines in 1 file now uncovered.

2900 of 3950 relevant lines covered (73.42%)

1.47 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.38
/src/pyff/api.py
1
import importlib
2×
2
import threading
2×
3
from datetime import datetime, timedelta
2×
4
from json import dumps
2×
5

6
import pkg_resources
2×
7
import pyramid.httpexceptions as exc
2×
8
import pytz
2×
9
import requests
2×
10
from accept_types import AcceptableType
2×
11
from cachetools import TTLCache
2×
12
from lxml import etree
2×
13
from pyramid.config import Configurator
2×
14
from pyramid.events import NewRequest
2×
15
from pyramid.response import Response
2×
16
from six import b
2×
17
from six.moves.urllib_parse import quote_plus
2×
18

19
from .constants import config
2×
20
from .exceptions import ResourceException
2×
21
from .logs import get_log
2×
22
from .pipes import plumbing
2×
23
from .repo import MDRepository
2×
24
from .samlmd import entity_display_name
2×
25
from .utils import b2u, dumptree, duration2timedelta, hash_id, json_serializer
2×
26

27
log = get_log(__name__)
2×
28

29

30
class NoCache(object):
2×
31
    def __init__(self):
2×
32
        pass
!
33

34
    def __getitem__(self, item):
2×
35
        return None
!
36

37
    def __setitem__(self, instance, value):
2×
38
        return value
!
39

40

41
def robots_handler(request):
2×
42
    """
43
    Impelements robots.txt
44

45
    :param request: the HTTP request
46
    :return: robots.txt
47
    """
48
    return Response(
2×
49
        """
50
User-agent: *
51
Disallow: /
52
"""
53
    )
54

55

56
def status_handler(request):
2×
57
    """
58
    Implements the /api/status endpoint
59

60
    :param request: the HTTP request
61
    :return: JSON status
62
    """
63
    d = {}
2×
64
    for r in request.registry.md.rm:
2×
65
        if 'Validation Errors' in r.info and r.info['Validation Errors']:
2×
66
            d[r.url] = r.info['Validation Errors']
!
67
    _status = dict(
2×
68
        version=pkg_resources.require("pyFF")[0].version,
69
        invalids=d,
70
        icon_store=dict(size=request.registry.md.icon_store.size()),
71
        jobs=[dict(id=j.id, next_run_time=j.next_run_time) for j in request.registry.scheduler.get_jobs()],
72
        threads=[t.name for t in threading.enumerate()],
73
        store=dict(size=request.registry.md.store.size()),
74
    )
75
    response = Response(dumps(_status, default=json_serializer))
2×
76
    response.headers['Content-Type'] = 'application/json'
2×
77
    return response
2×
78

79

80
class MediaAccept(object):
2×
81
    def __init__(self, accept):
2×
82
        self._type = AcceptableType(accept)
2×
83

84
    def has_key(self, key):
2×
85
        return True
!
86

87
    def get(self, item):
2×
88
        return self._type.matches(item)
!
89

90
    def __contains__(self, item):
2×
91
        return self._type.matches(item)
2×
92

93
    def __str__(self):
2×
94
        return str(self._type)
!
95

96

97
xml_types = ('text/xml', 'application/xml', 'application/samlmetadata+xml')
2×
98

99

100
def _is_xml_type(accepter):
2×
101
    return any([x in accepter for x in xml_types])
2×
102

103

104
def _is_xml(data):
2×
105
    return isinstance(data, (etree._Element, etree._ElementTree))
2×
106

107

108
def _fmt(data, accepter):
2×
109
    if data is None or len(data) == 0:
2×
110
        return "", 'text/plain'
!
111
    if _is_xml(data) and _is_xml_type(accepter):
2×
112
        return dumptree(data), 'application/samlmetadata+xml'
2×
113
    if isinstance(data, (dict, list)) and accepter.get('application/json'):
!
114
        return dumps(data, default=json_serializer), 'application/json'
!
115

116
    raise exc.exception_response(406)
!
117

118

119
def call(entry: str) -> None:
2×
120
    url = f'{config.base_url}/api/call/{entry}'
2×
121
    log.debug(f'Calling API endpoint at {url}')
2×
122
    resp = requests.post(url)
2×
NEW
123
    if resp.status_code >= 300:
!
NEW
124
        log.error(f'POST request to API endpoint at {url} failed: {resp.status_code} {resp.reason}')
!
NEW
125
    return None
!
126

127

128
def request_handler(request):
2×
129
    """
130
    The main GET request handler for pyFF. Implements caching and forwards the request to process_handler
131

132
    :param request: the HTTP request object
133
    :return: the data to send to the client
134
    """
135
    key = request.path
2×
136
    r = None
2×
137
    try:
2×
138
        r = request.registry.cache[key]
2×
139
    except KeyError:
2×
140
        pass
2×
141
    if r is None:
2×
142
        r = process_handler(request)
2×
143
        request.registry.cache[key] = r
2×
144
    return r
2×
145

146

147
def process_handler(request):
2×
148
    """
149
    The main request handler for pyFF. Implements API call hooks and content negotiation.
150

151
    :param request: the HTTP request object
152
    :return: the data to send to the client
153
    """
154
    _ctypes = {'xml': 'application/samlmetadata+xml;application/xml;text/xml', 'json': 'application/json'}
2×
155

156
    def _d(x, do_split=True):
2×
157
        if x is not None:
2×
158
            x = x.strip()
2×
159

160
        if x is None or len(x) == 0:
2×
161
            return None, None
2×
162

163
        if '.' in x:
2×
164
            (pth, dot, extn) = x.rpartition('.')
2×
165
            assert dot == '.'
2×
166
            if extn in _ctypes:
2×
167
                return pth, extn
2×
168

169
        return x, None
!
170

171
    log.debug(request)
2×
172

173
    if request.matchdict is None:
2×
174
        raise exc.exception_response(400)
!
175

176
    if request.body:
2×
177
        try:
!
178
            request.matchdict.update(request.json_body)
!
179
        except ValueError as ex:
!
180
            pass
!
181

182
    entry = request.matchdict.get('entry', 'request')
2×
183
    path = list(request.matchdict.get('path', []))
2×
184
    match = request.params.get('q', request.params.get('query', None))
2×
185

186
    # Enable matching on scope.
187
    match = match.split('@').pop() if match and not match.endswith('@') else match
2×
188
    log.debug("match={}".format(match))
2×
189

190
    if 0 == len(path):
2×
191
        path = ['entities']
2×
192

193
    alias = path.pop(0)
2×
194
    path = '/'.join(path)
2×
195

196
    # Ugly workaround bc WSGI drops double-slashes.
197
    path = path.replace(':/', '://')
2×
198

199
    msg = "handling entry={}, alias={}, path={}"
2×
200
    log.debug(msg.format(entry, alias, path))
2×
201

202
    pfx = None
2×
203
    if 'entities' not in alias:
2×
204
        pfx = request.registry.aliases.get(alias, None)
!
205
        if pfx is None:
!
206
            raise exc.exception_response(404)
!
207

208
    # content_negotiation_policy is one of three values:
209
    # 1. extension - current default, inspect the path and if it ends in
210
    #    an extension, e.g. .xml or .json, always strip off the extension to
211
    #    get the entityID and if no accept header or a wildcard header, then
212
    #    use the extension to determine the return Content-Type.
213
    #
214
    # 2. adaptive - only if no accept header or if a wildcard, then inspect
215
    #    the path and if it ends in an extension strip off the extension to
216
    #    get the entityID and use the extension to determine the return
217
    #    Content-Type.
218
    #
219
    # 3. header - future default, do not inspect the path for an extension and
220
    #    use only the Accept header to determine the return Content-Type.
221
    policy = config.content_negotiation_policy
2×
222

223
    # TODO - sometimes the client sends > 1 accept header value with ','.
224
    accept = str(request.accept).split(',')[0]
2×
225
    valid_accept = accept and not ('application/*' in accept or 'text/*' in accept or '*/*' in accept)
2×
226

227
    path_no_extension, extension = _d(path, True)
2×
228
    accept_from_extension = _ctypes.get(extension, accept)
2×
229

230
    if policy == 'extension':
2×
231
        path = path_no_extension
2×
232
        if not valid_accept:
2×
233
            accept = accept_from_extension
2×
234
    elif policy == 'adaptive':
!
235
        if not valid_accept:
!
236
            path = path_no_extension
!
237
            accept = accept_from_extension
!
238

239
    if pfx and path:
2×
240
        q = "{%s}%s" % (pfx, path)
!
241
        path = "/%s/%s" % (alias, path)
!
242
    else:
243
        q = path
2×
244

245
    try:
2×
246
        accepter = MediaAccept(accept)
2×
247
        for p in request.registry.plumbings:
2×
248
            state = {
2×
249
                entry: True,
250
                'headers': {'Content-Type': None},
251
                'accept': accepter,
252
                'url': request.current_route_url(),
253
                'select': q,
254
                'match': match.lower() if match else match,
255
                'path': path,
256
                'stats': {},
257
            }
258

259
            r = p.process(request.registry.md, state=state, raise_exceptions=True, scheduler=request.registry.scheduler)
2×
260
            log.debug(r)
2×
261
            if r is None:
2×
262
                r = []
!
263

264
            response = Response()
2×
265
            response.headers.update(state.get('headers', {}))
2×
266
            ctype = state.get('headers').get('Content-Type', None)
2×
267
            if not ctype:
2×
268
                r, t = _fmt(r, accepter)
2×
269
                ctype = t
2×
270

271
            response.text = b2u(r)
2×
272
            response.size = len(r)
2×
273
            response.content_type = ctype
2×
274
            cache_ttl = int(state.get('cache', 0))
2×
275
            response.expires = datetime.now() + timedelta(seconds=cache_ttl)
2×
276
            return response
2×
277
    except ResourceException as ex:
!
278
        import traceback
!
279

280
        log.debug(traceback.format_exc())
!
281
        log.warn(ex)
!
282
        raise exc.exception_response(409)
!
283
    except BaseException as ex:
!
284
        import traceback
!
285

286
        log.debug(traceback.format_exc())
!
287
        log.error(ex)
!
288
        raise exc.exception_response(500)
!
289

290
    if request.method == 'GET':
!
291
        raise exc.exception_response(404)
!
292

293

294
def webfinger_handler(request):
2×
295
    """An implementation the webfinger protocol
296
    (http://tools.ietf.org/html/draft-ietf-appsawg-webfinger-12)
297
    in order to provide information about up and downstream metadata available at
298
    this pyFF instance.
299

300
    Example:
301

302
    .. code-block:: bash
303

304
    # curl http://my.org/.well-known/webfinger?resource=http://my.org
305

306
    This should result in a JSON structure that looks something like this:
307

308
    .. code-block:: json
309

310
    {
311
     "expires": "2013-04-13T17:40:42.188549",
312
     "links": [
313
     {
314
      "href": "http://reep.refeds.org:8080/role/sp.xml",
315
      "rel": "urn:oasis:names:tc:SAML:2.0:metadata"
316
      },
317
     {
318
      "href": "http://reep.refeds.org:8080/role/sp.json",
319
      "rel": "disco-json"
320
      }
321
     ],
322
     "subject": "http://reep.refeds.org:8080"
323
    }
324

325
    Depending on which version of pyFF your're running and the configuration you
326
    may also see downstream metadata listed using the 'role' attribute to the link
327
    elements.
328
    """
329

330
    resource = request.params.get('resource', None)
2×
331
    rel = request.params.get('rel', None)
2×
332

333
    if resource is None:
2×
334
        resource = request.host_url
!
335

336
    jrd = dict()
2×
337
    dt = datetime.now() + duration2timedelta("PT1H")
2×
338
    jrd['expires'] = dt.isoformat()
2×
339
    jrd['subject'] = request.host_url
2×
340
    links = list()
2×
341
    jrd['links'] = links
2×
342

343
    _dflt_rels = {
2×
344
        'urn:oasis:names:tc:SAML:2.0:metadata': ['.xml', 'application/xml'],
345
        'disco-json': ['.json', 'application/json'],
346
    }
347

348
    if rel is None or len(rel) == 0:
2×
349
        rel = _dflt_rels.keys()
2×
350
    else:
351
        rel = [rel]
2×
352

353
    def _links(url, title=None):
2×
354
        if url.startswith('/'):
2×
355
            url = url.lstrip('/')
2×
356
        for r in rel:
2×
357
            suffix = ""
2×
358
            if not url.endswith('/'):
2×
359
                suffix = _dflt_rels[r][0]
2×
360
            links.append(dict(rel=r, type=_dflt_rels[r][1], href='%s/%s%s' % (request.host_url, url, suffix)))
2×
361

362
    _links('/entities/')
2×
363
    for a in request.registry.md.store.collections():
2×
364
        if a is not None and '://' not in a:
2×
365
            _links(a)
2×
366

367
    for entity in request.registry.md.store.lookup('entities'):
2×
368
        entity_display = entity_display_name(entity)
2×
369
        _links("/entities/%s" % hash_id(entity.get('entityID')), title=entity_display)
2×
370

371
    aliases = request.registry.aliases
2×
372
    for a in aliases.keys():
2×
373
        for v in request.registry.md.store.attribute(aliases[a]):
2×
374
            _links('%s/%s' % (a, quote_plus(v)))
2×
375

376
    response = Response(dumps(jrd, default=json_serializer))
2×
377
    response.headers['Content-Type'] = 'application/json'
2×
378

379
    return response
2×
380

381

382
def resources_handler(request):
2×
383
    """
384
    Implements the /api/resources endpoint
385

386
    :param request: the HTTP request
387
    :return: a JSON representation of the set of resources currently loaded by the server
388
    """
389

390
    def _info(r):
!
391
        nfo = r.info
!
392
        nfo['Valid'] = r.is_valid()
!
393
        nfo['Parser'] = r.last_parser
!
394
        if r.last_seen is not None:
!
395
            nfo['Last Seen'] = r.last_seen
!
396
        if len(r.children) > 0:
!
397
            nfo['Children'] = [_info(cr) for cr in r.children]
!
398

399
        return nfo
!
400

401
    _resources = [_info(r) for r in request.registry.md.rm.children]
!
402
    response = Response(dumps(_resources, default=json_serializer))
!
403
    response.headers['Content-Type'] = 'application/json'
!
404

405
    return response
!
406

407

408
def pipeline_handler(request):
2×
409
    """
410
    Implements the /api/resources endpoint
411

412
    :param request: the HTTP request
413
    :return: a JSON representation of the active pipeline
414
    """
415
    response = Response(dumps(request.registry.plumbings, default=json_serializer))
!
416
    response.headers['Content-Type'] = 'application/json'
!
417

418
    return response
!
419

420

421
def search_handler(request):
2×
422
    """
423
    Implements the /api/search endpoint
424

425
    :param request: the HTTP request with the 'query' request parameter
426
    :return: a JSON search result
427
    """
428
    match = request.params.get('q', request.params.get('query', None))
!
429

430
    # Enable matching on scope.
431
    match = match.split('@').pop() if match and not match.endswith('@') else match
!
432

433
    entity_filter = request.params.get('entity_filter', '{http://pyff.io/role}idp')
!
434
    log.debug("match={}".format(match))
!
435
    store = request.registry.md.store
!
436

437
    def _response():
!
438
        yield b('[')
!
439
        in_loop = False
!
440
        entities = store.search(query=match.lower(), entity_filter=entity_filter)
!
441
        for e in entities:
!
442
            if in_loop:
!
443
                yield b(',')
!
444
            yield b(dumps(e))
!
445
            in_loop = True
!
446
        yield b(']')
!
447

448
    response = Response(content_type='application/json')
!
449
    response.app_iter = _response()
!
450
    return response
!
451

452

453
def add_cors_headers_response_callback(event):
2×
454
    def cors_headers(request, response):
2×
455
        response.headers.update(
2×
456
            {
457
                'Access-Control-Allow-Origin': '*',
458
                'Access-Control-Allow-Methods': 'POST,GET,DELETE,PUT,OPTIONS',
459
                'Access-Control-Allow-Headers': ('Origin, Content-Type, Accept, ' 'Authorization'),
460
                'Access-Control-Allow-Credentials': 'true',
461
                'Access-Control-Max-Age': '1728000',
462
            }
463
        )
464

465
    event.request.add_response_callback(cors_headers)
2×
466

467

468
def launch_memory_usage_server(port=9002):
2×
469
    import cherrypy
!
470
    import dowser
!
471

472
    cherrypy.tree.mount(dowser.Root())
!
473
    cherrypy.config.update({'environment': 'embedded', 'server.socket_port': port})
!
474

475
    cherrypy.engine.start()
!
476

477

478
def mkapp(*args, **kwargs):
2×
479
    md = kwargs.pop('md', None)
2×
480
    if md is None:
2×
481
        md = MDRepository()
2×
482

483
    if config.devel_memory_profile:
2×
484
        launch_memory_usage_server()
!
485

486
    with Configurator(debug_logger=log) as ctx:
2×
487
        ctx.add_subscriber(add_cors_headers_response_callback, NewRequest)
2×
488

489
        if config.aliases is None:
2×
490
            config.aliases = dict()
!
491

492
        if config.modules is None:
2×
493
            config.modules = []
!
494

495
        ctx.registry.config = config
2×
496
        config.modules.append('pyff.builtins')
2×
497
        for mn in config.modules:
2×
498
            importlib.import_module(mn)
2×
499

500
        pipeline = args or None
2×
501
        if pipeline is None and config.pipeline:
2×
502
            pipeline = [config.pipeline]
!
503

504
        ctx.registry.scheduler = md.scheduler
2×
505
        if pipeline is not None:
2×
506
            ctx.registry.pipeline = pipeline
2×
507
            ctx.registry.plumbings = [plumbing(v) for v in pipeline]
2×
508
        ctx.registry.aliases = config.aliases
2×
509
        ctx.registry.md = md
2×
510
        if config.caching_enabled:
2×
511
            ctx.registry.cache = TTLCache(config.cache_size, config.cache_ttl)
2×
512
        else:
513
            ctx.registry.cache = NoCache()
!
514

515
        ctx.add_route('robots', '/robots.txt')
2×
516
        ctx.add_view(robots_handler, route_name='robots')
2×
517

518
        ctx.add_route('webfinger', '/.well-known/webfinger', request_method='GET')
2×
519
        ctx.add_view(webfinger_handler, route_name='webfinger')
2×
520

521
        ctx.add_route('search', '/api/search', request_method='GET')
2×
522
        ctx.add_view(search_handler, route_name='search')
2×
523

524
        ctx.add_route('status', '/api/status', request_method='GET')
2×
525
        ctx.add_view(status_handler, route_name='status')
2×
526

527
        ctx.add_route('resources', '/api/resources', request_method='GET')
2×
528
        ctx.add_view(resources_handler, route_name='resources')
2×
529

530
        ctx.add_route('pipeline', '/api/pipeline', request_method='GET')
2×
531
        ctx.add_view(pipeline_handler, route_name='pipeline')
2×
532

533
        ctx.add_route('call', '/api/call/{entry}', request_method=['POST', 'PUT'])
2×
534
        ctx.add_view(process_handler, route_name='call')
2×
535

536
        ctx.add_route('request', '/*path', request_method='GET')
2×
537
        ctx.add_view(request_handler, route_name='request')
2×
538

539
        start = datetime.utcnow() + timedelta(seconds=1)
2×
540
        log.debug(start)
2×
541
        if config.update_frequency > 0:
2×
542
            ctx.registry.scheduler.add_job(
2×
543
                call,
544
                'interval',
545
                id="call/update",
546
                args=['update'],
547
                start_date=start,
548
                misfire_grace_time=10,
549
                seconds=config.update_frequency,
550
                replace_existing=True,
551
                max_instances=1,
552
                timezone=pytz.utc,
553
            )
554

555
        return ctx.make_wsgi_app()
2×
Troubleshooting · Open an Issue · Sales · Support · ENTERPRISE · CAREERS · STATUS
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2023 Coveralls, Inc