• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 14886030889

07 May 2025 02:32PM UTC coverage: 74.898% (-0.009%) from 74.907%
14886030889

Pull #1238

github

web-flow
Merge branch 'master' into chore/fix_improper_certificate_validation_pps_1936
Pull Request #1238: Replace AutoAddPolicy with RejectPolicy and load known hosts to prevent man in the middle attacks

8095 of 10808 relevant lines covered (74.9%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.43
fence/resources/ga4gh/passports.py
1
import flask
1✔
2
import os
1✔
3
import collections
1✔
4
import hashlib
1✔
5
import time
1✔
6
import datetime
1✔
7
import jwt
1✔
8

9
# the whole fence_create module is imported to avoid issue with circular imports
10
import fence.scripting.fence_create
1✔
11

12
from authutils.errors import JWTError
1✔
13
from authutils.token.core import get_iss, get_kid
1✔
14
from cdislogging import get_logger
1✔
15
from flask import current_app
1✔
16

17
from fence.jwt.validate import validate_jwt
1✔
18
from fence.config import config
1✔
19
from fence.models import (
1✔
20
    create_user,
21
    query_for_user,
22
    query_for_user_by_id,
23
    GA4GHVisaV1,
24
    GA4GHPassportCache,
25
    IdentityProvider,
26
    IssSubPairToUser,
27
)
28

29
logger = get_logger(__name__)
1✔
30

31
# cache will be in following format
32
#   passport_hash: ([user_id_0, user_id_1, ...], expires_at)
33
PASSPORT_CACHE = {}
1✔
34

35

36
def sync_gen3_users_authz_from_ga4gh_passports(
1✔
37
    passports, pkey_cache=None, db_session=None, skip_google_updates=False
38
):
39
    """
40
    Validate passports and embedded visas, using each valid visa's identity
41
    established by <iss, sub> combination to possibly create and definitely
42
    determine a Fence user who is added to the list returned by this
43
    function. In the process of determining Fence users from visas, visa
44
    authorization information is also persisted in Fence and synced to
45
    Arborist.
46

47
    Args:
48
        passports (list): a list of raw encoded passport strings, each
49
                          including header, payload, and signature
50
        skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
51

52
    Return:
53
        list: a list of users, each corresponding to a valid visa identity
54
              embedded within the passports passed in
55
    """
56
    db_session = db_session or current_app.scoped_session()
1✔
57

58
    # {"username": user, "username2": user2}
59
    users_from_all_passports = {}
1✔
60
    for passport in passports:
1✔
61
        try:
1✔
62
            cached_usernames = get_gen3_usernames_for_passport_from_cache(
1✔
63
                passport=passport, db_session=db_session
64
            )
65
            if cached_usernames:
1✔
66
                # there's a chance a given username exists in the cache but no longer in
67
                # the database. if not all are in db, ignore the cache and actually parse
68
                # and validate the passport
69
                all_users_exist_in_db = True
1✔
70
                usernames_to_update = {}
1✔
71
                for username in cached_usernames:
1✔
72
                    user = query_for_user(session=db_session, username=username)
1✔
73
                    if not user:
1✔
74
                        all_users_exist_in_db = False
1✔
75
                        continue
1✔
76
                    usernames_to_update[user.username] = user
1✔
77

78
                if all_users_exist_in_db:
1✔
79
                    users_from_all_passports.update(usernames_to_update)
1✔
80
                    # existence in the cache and a user in db means that this passport
81
                    # was validated previously (expiration was also checked)
82
                    continue
1✔
83

84
            # below function also validates passport (or raises exception)
85
            raw_visas = get_unvalidated_visas_from_valid_passport(
1✔
86
                passport, pkey_cache=pkey_cache
87
            )
88
        except Exception as exc:
×
89
            logger.warning(f"Invalid passport provided, ignoring. Error: {exc}")
×
90
            continue
×
91

92
        # an empty raw_visas list means that either the current passport is
93
        # invalid or that it has no visas. in both cases, the current passport
94
        # is ignored and we move on to the next passport
95
        if not raw_visas:
1✔
96
            continue
1✔
97

98
        identity_to_visas = collections.defaultdict(list)
1✔
99
        min_visa_expiration = int(time.time()) + datetime.timedelta(hours=1).seconds
1✔
100
        for raw_visa in raw_visas:
1✔
101
            try:
1✔
102
                validated_decoded_visa = validate_visa(raw_visa, pkey_cache=pkey_cache)
1✔
103
                identity_to_visas[
1✔
104
                    (
105
                        validated_decoded_visa.get("iss"),
106
                        validated_decoded_visa.get("sub"),
107
                    )
108
                ].append((raw_visa, validated_decoded_visa))
109
                min_visa_expiration = min(
1✔
110
                    min_visa_expiration, validated_decoded_visa.get("exp")
111
                )
112
            except Exception as exc:
1✔
113
                logger.warning(f"Invalid visa provided, ignoring. Error: {exc}")
1✔
114
                continue
1✔
115

116
        expired_authz_removal_job_freq_in_seconds = config[
1✔
117
            "EXPIRED_AUTHZ_REMOVAL_JOB_FREQ_IN_SECONDS"
118
        ]
119
        min_visa_expiration -= expired_authz_removal_job_freq_in_seconds
1✔
120
        if min_visa_expiration <= int(time.time()):
1✔
121
            logger.warning(
1✔
122
                "The passport's earliest valid visa expiration time is set to "
123
                f"occur within {expired_authz_removal_job_freq_in_seconds} "
124
                "seconds from now, which is too soon an expiration to handle."
125
            )
126
            continue
1✔
127

128
        users_from_current_passport = []
1✔
129
        for (issuer, subject_id), visas in identity_to_visas.items():
1✔
130
            gen3_user = get_or_create_gen3_user_from_iss_sub(
1✔
131
                issuer, subject_id, db_session=db_session
132
            )
133

134
            ga4gh_visas = [
1✔
135
                GA4GHVisaV1(
136
                    user=gen3_user,
137
                    source=validated_decoded_visa["ga4gh_visa_v1"]["source"],
138
                    type=validated_decoded_visa["ga4gh_visa_v1"]["type"],
139
                    asserted=int(validated_decoded_visa["ga4gh_visa_v1"]["asserted"]),
140
                    expires=int(validated_decoded_visa["exp"]),
141
                    ga4gh_visa=raw_visa,
142
                )
143
                for raw_visa, validated_decoded_visa in visas
144
            ]
145
            # NOTE: does not validate, assumes validation occurs above.
146
            #       This adds the visas to the database session but doesn't commit until
147
            #       the end of this function
148
            _sync_validated_visa_authorization(
1✔
149
                gen3_user=gen3_user,
150
                ga4gh_visas=ga4gh_visas,
151
                expiration=min_visa_expiration,
152
                db_session=db_session,
153
                skip_google_updates=skip_google_updates,
154
            )
155
            users_from_current_passport.append(gen3_user)
1✔
156

157
        for user in users_from_current_passport:
1✔
158
            users_from_all_passports[user.username] = user
1✔
159

160
        put_gen3_usernames_for_passport_into_cache(
1✔
161
            passport=passport,
162
            user_ids_from_passports=list(users_from_all_passports.keys()),
163
            expires_at=min_visa_expiration,
164
            db_session=db_session,
165
        )
166

167
    db_session.commit()
1✔
168

169
    logger.info(
1✔
170
        f"Got Gen3 usernames from passport(s): {list(users_from_all_passports.keys())}"
171
    )
172
    return users_from_all_passports
1✔
173

174

175
def get_unvalidated_visas_from_valid_passport(passport, pkey_cache=None):
1✔
176
    """
177
    Return encoded visas after extracting and validating encoded passport
178

179
    Args:
180
        passport (string): encoded ga4gh passport
181
        pkey_cache (dict): app cache of public keys_dir
182

183
    Return:
184
        list: list of encoded GA4GH visas
185
    """
186
    decoded_passport = {}
1✔
187
    passport_issuer, passport_kid = None, None
1✔
188

189
    if not pkey_cache:
1✔
190
        pkey_cache = {}
1✔
191

192
    try:
1✔
193
        passport_issuer = get_iss(passport)
1✔
194
        passport_kid = get_kid(passport)
1✔
195
    except Exception as e:
1✔
196
        logger.error(
1✔
197
            "Could not get issuer or kid from passport: {}. Discarding passport.".format(
198
                e
199
            )
200
        )
201
        # ignore malformed/invalid passports
202
        return []
1✔
203

204
    public_key = pkey_cache.get(passport_issuer, {}).get(passport_kid)
1✔
205

206
    try:
1✔
207
        decoded_passport = validate_jwt(
1✔
208
            encoded_token=passport,
209
            public_key=public_key,
210
            attempt_refresh=True,
211
            require_purpose=False,
212
            scope={"openid"},
213
            issuers=config.get("GA4GH_VISA_ISSUER_ALLOWLIST", []),
214
            options={
215
                "require_iat": True,
216
                "require_exp": True,
217
                "verify_aud": False,
218
            },
219
        )
220

221
        if "sub" not in decoded_passport:
1✔
222
            raise JWTError(f"Passport is missing the 'sub' claim")
×
223
    except Exception as e:
×
224
        logger.error("Passport failed validation: {}. Discarding passport.".format(e))
×
225
        # ignore malformed/invalid passports
226
        return []
×
227

228
    return decoded_passport.get("ga4gh_passport_v1", [])
1✔
229

230

231
def validate_visa(raw_visa, pkey_cache=None):
1✔
232
    """
233
    Validate a raw visa in accordance with:
234
        - GA4GH AAI spec (https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md)
235
        - GA4GH DURI spec (https://github.com/ga4gh-duri/ga4gh-duri.github.io/blob/master/researcher_ids/ga4gh_passport_v1.md)
236

237
    Args:
238
        raw_visa (str): a raw, encoded visa including header, payload, and signature
239

240
    Return:
241
        dict: the decoded payload if validation was successful. an exception
242
              is raised if validation was unsuccessful
243
    """
244
    if jwt.get_unverified_header(raw_visa).get("jku"):
1✔
245
        raise Exception(
×
246
            "Visa Document Tokens are not currently supported by passing "
247
            '"jku" in the header. Only Visa Access Tokens are supported.'
248
        )
249

250
    logger.info("Attempting to validate visa")
1✔
251

252
    decoded_visa = validate_jwt(
1✔
253
        raw_visa,
254
        attempt_refresh=True,
255
        scope={"openid", "ga4gh_passport_v1"},
256
        require_purpose=False,
257
        issuers=config["GA4GH_VISA_ISSUER_ALLOWLIST"],
258
        options={"require_iat": True, "require_exp": True, "verify_aud": False},
259
        pkey_cache=pkey_cache,
260
    )
261
    logger.info(f'Visa jti: "{decoded_visa.get("jti", "")}"')
1✔
262
    logger.info(f'Visa txn: "{decoded_visa.get("txn", "")}"')
1✔
263

264
    for claim in ["sub", "ga4gh_visa_v1"]:
1✔
265
        if claim not in decoded_visa:
1✔
266
            raise Exception(f'Visa does not contain REQUIRED "{claim}" claim')
×
267

268
    if "aud" in decoded_visa:
1✔
269
        raise Exception('Visa MUST NOT contain "aud" claim')
×
270

271
    field_to_allowed_values = config["GA4GH_VISA_V1_CLAIM_REQUIRED_FIELDS"]
1✔
272
    for field, allowed_values in field_to_allowed_values.items():
1✔
273
        if field not in decoded_visa["ga4gh_visa_v1"]:
1✔
274
            raise Exception(
×
275
                f'"ga4gh_visa_v1" claim does not contain REQUIRED "{field}" field'
276
            )
277
        if decoded_visa["ga4gh_visa_v1"][field] not in allowed_values:
1✔
278
            raise Exception(
×
279
                f'{field}={decoded_visa["ga4gh_visa_v1"][field]} field in "ga4gh_visa_v1" is not equal to one of the allowed_values: {allowed_values}'
280
            )
281

282
    if "asserted" not in decoded_visa["ga4gh_visa_v1"]:
1✔
283
        raise Exception(
×
284
            '"ga4gh_visa_v1" claim does not contain REQUIRED "asserted" field'
285
        )
286
    asserted = decoded_visa["ga4gh_visa_v1"]["asserted"]
1✔
287
    if type(asserted) not in (int, float):
1✔
288
        raise Exception(
×
289
            '"ga4gh_visa_v1" claim object\'s "asserted" field\'s type is not '
290
            "JSON numeric"
291
        )
292
    if decoded_visa["iat"] < asserted:
1✔
293
        raise Exception(
×
294
            "The Passport Visa Assertion Source made the claim after the visa "
295
            'was minted (i.e. "ga4gh_visa_v1" claim object\'s "asserted" '
296
            'field is greater than the visa\'s "iat" claim)'
297
        )
298

299
    if "conditions" in decoded_visa["ga4gh_visa_v1"]:
1✔
300
        logger.warning(
×
301
            'Condition checking is not yet supported, but a visa was received that contained the "conditions" field'
302
        )
303
        if decoded_visa["ga4gh_visa_v1"]["conditions"]:
×
304
            raise Exception('"conditions" field in "ga4gh_visa_v1" is not empty')
×
305

306
    logger.info("Visa was successfully validated")
1✔
307
    return decoded_visa
1✔
308

309

310
def get_or_create_gen3_user_from_iss_sub(issuer, subject_id, db_session=None):
1✔
311
    """
312
    Get a user from the Fence database corresponding to the visa identity
313
    indicated by the <issuer, subject_id> combination. If a Fence user has
314
    not yet been created for the given <issuer, subject_id> combination,
315
    create and return such a user.
316

317
    Args:
318
        issuer (str): the issuer of a given visa
319
        subject_id (str): the subject of a given visa
320

321
    Return:
322
        userdatamodel.user.User: the Fence user corresponding to issuer and subject_id
323
    """
324
    db_session = db_session or current_app.scoped_session()
1✔
325
    logger.debug(
1✔
326
        f"get_or_create_gen3_user_from_iss_sub: issuer: {issuer} & subject_id: {subject_id}"
327
    )
328
    iss_sub_pair_to_user = db_session.query(IssSubPairToUser).get((issuer, subject_id))
1✔
329
    if not iss_sub_pair_to_user:
1✔
330
        username = subject_id + issuer[len("https://") :]
1✔
331
        gen3_user = query_for_user(session=db_session, username=username)
1✔
332
        idp_name = IssSubPairToUser.ISSUER_TO_IDP.get(issuer)
1✔
333
        logger.debug(f"issuer_to_idp: {IssSubPairToUser.ISSUER_TO_IDP}")
1✔
334
        if not gen3_user:
1✔
335
            gen3_user = create_user(db_session, logger, username, idp_name=idp_name)
1✔
336
            if not idp_name:
1✔
337
                logger.info(
1✔
338
                    f"The user (id:{gen3_user.id}) was created without a linked identity "
339
                    f"provider since it could not be determined based on "
340
                    f"the issuer {issuer}"
341
                )
342

343
        # ensure user has an associated identity provider
344
        if not gen3_user.identity_provider:
1✔
345
            idp = (
1✔
346
                db_session.query(IdentityProvider)
347
                .filter(IdentityProvider.name == idp_name)
348
                .first()
349
            )
350
            if not idp:
1✔
351
                idp = IdentityProvider(name=idp_name)
1✔
352
            gen3_user.identity_provider = idp
1✔
353

354
        logger.info(
1✔
355
            f'Mapping subject id ("{subject_id}") and issuer '
356
            f'("{issuer}") combination to Fence user '
357
            f'"{gen3_user.username}" with IdP = "{idp_name}"'
358
        )
359
        iss_sub_pair_to_user = IssSubPairToUser(iss=issuer, sub=subject_id)
1✔
360
        iss_sub_pair_to_user.user = gen3_user
1✔
361

362
        db_session.add(iss_sub_pair_to_user)
1✔
363
        db_session.commit()
1✔
364

365
    return iss_sub_pair_to_user.user
1✔
366

367

368
def _sync_validated_visa_authorization(
1✔
369
    gen3_user, ga4gh_visas, expiration, db_session=None, skip_google_updates=False
370
):
371
    """
372
    Wrapper around UserSyncer.sync_single_user_visas method, which parses
373
    authorization information from the provided visas, persists it in Fence,
374
    and syncs it to Arborist.
375

376
    IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISAS. ENSURE THIS IS DONE
377
                    BEFORE THIS.
378

379
    Args:
380
        gen3_user (userdatamodel.user.User): the Fence user whose visas'
381
                                             authz info is being synced
382
        ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
383
                            that are parsed
384
        expiration (int): time at which synced Arborist policies and
385
                          inclusion in any GBAG are set to expire
386
        skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
387
    Return:
388
        None
389
    """
390
    db_session = db_session or current_app.scoped_session()
1✔
391
    default_args = fence.scripting.fence_create.get_default_init_syncer_inputs(
1✔
392
        authz_provider="GA4GH"
393
    )
394
    syncer = fence.scripting.fence_create.init_syncer(**default_args)
1✔
395

396
    synced_visas = syncer.sync_single_user_visas(
1✔
397
        gen3_user,
398
        ga4gh_visas,
399
        db_session,
400
        expires=expiration,
401
        skip_google_updates=skip_google_updates,
402
    )
403

404
    # after syncing authorization, persist the visas that were parsed successfully.
405
    for visa in ga4gh_visas:
1✔
406
        if visa not in synced_visas:
1✔
407
            logger.debug(f"deleting visa with id={visa.id} from db session")
×
408
            db_session.delete(visa)
×
409
        else:
410
            logger.debug(f"adding visa with id={visa.id} to db session")
1✔
411
            db_session.add(visa)
1✔
412

413

414
def get_gen3_usernames_for_passport_from_cache(passport, db_session=None):
1✔
415
    """
416
    Attempt to retrieve a cached list of users ids for a previously validated and
417
    non-expired passport.
418

419
    Args:
420
        passport (str): ga4gh encoded passport JWT
421
        db_session (None, sqlalchemy session): optional database session to use
422

423
    Returns:
424
        list[str]: list of usernames for users referred to by the previously validated
425
                   and non-expired passport
426
    """
427
    db_session = db_session or current_app.scoped_session()
1✔
428
    user_ids_from_passports = None
1✔
429
    current_time = int(time.time())
1✔
430

431
    passport_hash = hashlib.sha256(passport.encode("utf-8")).hexdigest()
1✔
432

433
    # try to retrieve from local in-memory cache
434
    if passport_hash in PASSPORT_CACHE:
1✔
435
        user_ids_from_passports, expires = PASSPORT_CACHE[passport_hash]
1✔
436
        if expires > current_time:
1✔
437
            logger.debug(
1✔
438
                f"Got users {user_ids_from_passports} for provided passport from in-memory cache. "
439
                f"Expires: {expires}, Current Time: {current_time}"
440
            )
441
            return user_ids_from_passports
1✔
442
        else:
443
            # expired, so remove it
444
            del PASSPORT_CACHE[passport_hash]
1✔
445

446
    # try to retrieve from database cache
447
    cached_passport = (
1✔
448
        db_session.query(GA4GHPassportCache)
449
        .filter(GA4GHPassportCache.passport_hash == passport_hash)
450
        .first()
451
    )
452
    if cached_passport:
1✔
453
        if cached_passport.expires_at > current_time:
1✔
454
            user_ids_from_passports = cached_passport.user_ids
1✔
455

456
            # update local cache
457
            PASSPORT_CACHE[passport_hash] = (
1✔
458
                user_ids_from_passports,
459
                cached_passport.expires_at,
460
            )
461

462
            logger.debug(
1✔
463
                f"Got users {user_ids_from_passports} for provided passport from "
464
                f"database cache and placed in in-memory cache. "
465
                f"Expires: {cached_passport.expires_at}, Current Time: {current_time}"
466
            )
467
            return user_ids_from_passports
1✔
468
        else:
469
            # expired, so delete it
470
            db_session.delete(cached_passport)
×
471
            db_session.commit()
×
472

473
    return user_ids_from_passports
1✔
474

475

476
def put_gen3_usernames_for_passport_into_cache(
1✔
477
    passport, user_ids_from_passports, expires_at, db_session=None
478
):
479
    """
480
    Cache a validated and non-expired passport and map to the user_ids referenced
481
    by the content.
482

483
    Args:
484
        passport (str): ga4gh encoded passport JWT
485
        db_session (None, sqlalchemy session): optional database session to use
486
        user_ids_from_passports (list[str]): list of user identifiers referred to by
487
            the previously validated and non-expired passport
488
        expires_at (int): expiration time in unix time
489
    """
490
    db_session = db_session or current_app.scoped_session()
1✔
491

492
    passport_hash = hashlib.sha256(passport.encode("utf-8")).hexdigest()
1✔
493

494
    # stores back to cache and db
495
    PASSPORT_CACHE[passport_hash] = user_ids_from_passports, expires_at
1✔
496

497
    db_session.execute(
1✔
498
        """\
499
        INSERT INTO ga4gh_passport_cache (
500
            passport_hash,
501
            expires_at,
502
            user_ids
503
        ) VALUES (
504
            :passport_hash,
505
            :expires_at,
506
            :user_ids
507
        ) ON CONFLICT (passport_hash) DO UPDATE SET
508
            expires_at = EXCLUDED.expires_at,
509
            user_ids = EXCLUDED.user_ids;""",
510
        dict(
511
            passport_hash=passport_hash,
512
            expires_at=expires_at,
513
            user_ids=user_ids_from_passports,
514
        ),
515
    )
516

517
    logger.debug(
1✔
518
        f"Cached {user_ids_from_passports} passport in "
519
        f"database. "
520
        f"Expires: {expires_at}"
521
    )
522

523

524
# TODO to be called after login
525
def map_gen3_iss_sub_pair_to_user(gen3_issuer, gen3_subject_id, gen3_user):
1✔
526
    pass
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc