• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 20358027677

19 Dec 2025 02:40AM UTC coverage: 75.002% (+0.02%) from 74.987%
20358027677

Pull #1312

github

nss10
Run with HELM master branch
Pull Request #1312: Update Fence to Python 3.13 + Run as gen3 user

8440 of 11253 relevant lines covered (75.0%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

60.41
fence/scripting/fence_create.py
1
from datetime import datetime, timedelta, UTC
1✔
2
import os
1✔
3
import os.path
1✔
4
import requests
1✔
5
import time
1✔
6
from yaml import safe_load
1✔
7
import json
1✔
8
import pprint
1✔
9
import asyncio
1✔
10

11
from alembic.config import main as alembic_main
1✔
12
from gen3cirrus import GoogleCloudManager
1✔
13
from gen3cirrus.google_cloud.errors import GoogleAuthError
1✔
14
from gen3cirrus.config import config as cirrus_config
1✔
15
from cdislogging import get_logger
1✔
16
from sqlalchemy import func
1✔
17
from userdatamodel.models import (
1✔
18
    AccessPrivilege,
19
    Bucket,
20
    CloudProvider,
21
    GoogleProxyGroup,
22
    Group,
23
    IdentityProvider,
24
    Project,
25
    StorageAccess,
26
    User,
27
    ProjectToBucket,
28
)
29
from sqlalchemy import and_
1✔
30

31
from fence.blueprints.link import (
1✔
32
    force_update_user_google_account_expiration,
33
    add_new_user_google_account,
34
)
35
from fence.errors import Unauthorized
1✔
36
from fence.jwt.token import (
1✔
37
    generate_signed_access_token,
38
    generate_signed_refresh_token,
39
    issued_and_expiration_times,
40
)
41
from fence.job.access_token_updater import TokenAndAuthUpdater
1✔
42
from fence.models import (
1✔
43
    Client,
44
    GoogleServiceAccount,
45
    GoogleServiceAccountKey,
46
    UserGoogleAccount,
47
    UserGoogleAccountToProxyGroup,
48
    GoogleBucketAccessGroup,
49
    GoogleProxyGroupToGoogleBucketAccessGroup,
50
    UserRefreshToken,
51
    ServiceAccountToGoogleBucketAccessGroup,
52
    query_for_user,
53
    GA4GHVisaV1,
54
    get_client_expires_at,
55
)
56
from fence.scripting.google_monitor import email_users_without_access, validation_check
1✔
57
from fence.config import config
1✔
58
from fence.sync.sync_users import UserSyncer
1✔
59
from fence.utils import (
1✔
60
    create_client,
61
    get_valid_expiration,
62
    generate_client_credentials,
63
    get_SQLAlchemyDriver,
64
)
65
from sqlalchemy.orm.attributes import flag_modified
1✔
66
from gen3authz.client.arborist.client import ArboristClient
1✔
67

68
logger = get_logger(__name__)
1✔
69

70

71
def list_client_action(db):
1✔
72
    try:
1✔
73
        driver = get_SQLAlchemyDriver(db)
1✔
74
        with driver.session as s:
1✔
75
            for row in s.query(Client).all():
1✔
76
                pprint.pprint(row.__dict__)
1✔
77
    except Exception as e:
×
78
        logger.error(str(e))
×
79

80

81
def modify_client_action(
1✔
82
    DB,
83
    client=None,
84
    delete_urls=False,
85
    urls=None,
86
    name=None,
87
    description=None,
88
    set_auto_approve=False,
89
    unset_auto_approve=False,
90
    arborist=None,
91
    policies=None,
92
    allowed_scopes=None,
93
    append=False,
94
    expires_in=None,
95
):
96
    driver = get_SQLAlchemyDriver(DB)
1✔
97
    with driver.session as s:
1✔
98
        client_name = client
1✔
99
        clients = s.query(Client).filter(Client.name == client_name).all()
1✔
100
        if not clients:
1✔
101
            raise Exception("client {} does not exist".format(client_name))
×
102
        for client in clients:
1✔
103
            metadata = client.client_metadata
1✔
104
            if urls:
1✔
105
                if append:
1✔
106
                    metadata["redirect_uris"] += urls
1✔
107
                    logger.info("Adding {} to urls".format(urls))
1✔
108
                else:
109
                    if isinstance(urls, list):
1✔
110
                        metadata["redirect_uris"] = urls
1✔
111
                    else:
112
                        metadata["redirect_uris"] = [urls]
×
113
                    logger.info("Changing urls to {}".format(urls))
1✔
114
            if delete_urls:
1✔
115
                metadata["redirect_uris"] = []
×
116
                logger.info("Deleting urls")
×
117
            if set_auto_approve:
1✔
118
                client.auto_approve = True
1✔
119
                logger.info("Auto approve set to True")
1✔
120
            if unset_auto_approve:
1✔
121
                client.auto_approve = False
×
122
                logger.info("Auto approve set to False")
×
123
            if name:
1✔
124
                client.name = name
1✔
125
                logger.info("Updating name to {}".format(name))
1✔
126
            if description:
1✔
127
                client.description = description
1✔
128
                logger.info("Updating description to {}".format(description))
1✔
129
            if allowed_scopes:
1✔
130
                if append:
1✔
131
                    new_scopes = client.scope.split() + allowed_scopes
1✔
132
                    metadata["scope"] = " ".join(new_scopes)
1✔
133
                    logger.info("Adding {} to allowed_scopes".format(allowed_scopes))
1✔
134
                else:
135
                    metadata["scope"] = " ".join(allowed_scopes)
1✔
136
                    logger.info("Updating allowed_scopes to {}".format(allowed_scopes))
1✔
137
            if expires_in:
1✔
138
                client.expires_at = get_client_expires_at(
1✔
139
                    expires_in=expires_in, grant_types=client.grant_types
140
                )
141
            # Call setter on Json object to persist changes if any
142
            client.set_client_metadata(metadata)
1✔
143
        s.commit()
1✔
144
        if arborist is not None and policies:
1✔
145
            arborist.update_client(client.client_id, policies)
×
146

147

148
def create_client_action(
1✔
149
    DB,
150
    username=None,
151
    client=None,
152
    urls=None,
153
    auto_approve=False,
154
    expires_in=None,
155
    **kwargs,
156
):
157
    print("\nSave these credentials! Fence will not save the unhashed client secret.")
1✔
158
    res = create_client(
1✔
159
        DB=DB,
160
        username=username,
161
        urls=urls,
162
        name=client,
163
        auto_approve=auto_approve,
164
        expires_in=expires_in,
165
        **kwargs,
166
    )
167
    print("client id, client secret:")
1✔
168
    # This should always be the last line of output and should remain in this format--
169
    # cloud-auto and gen3-qa use the output programmatically.
170
    print(res)
1✔
171

172

173
def delete_client_action(DB, client_name):
1✔
174
    try:
1✔
175
        cirrus_config.update(**config["CIRRUS_CFG"])
1✔
176
    except AttributeError:
×
177
        # no cirrus config, continue anyway. we don't have client service accounts
178
        # to delete
179
        pass
×
180

181
    try:
1✔
182
        driver = get_SQLAlchemyDriver(DB)
1✔
183
        with driver.session as current_session:
1✔
184
            if (
1✔
185
                not current_session.query(Client)
186
                .filter(Client.name == client_name)
187
                .first()
188
            ):
189
                raise Exception("client {} does not exist".format(client_name))
×
190

191
            clients = (
1✔
192
                current_session.query(Client).filter(Client.name == client_name).all()
193
            )
194

195
            for client in clients:
1✔
196
                _remove_client_service_accounts(current_session, client)
1✔
197
                current_session.delete(client)
1✔
198
            current_session.commit()
1✔
199

200
        logger.info("Client '{}' deleted".format(client_name))
1✔
201
    except Exception as e:
×
202
        logger.error(str(e))
×
203

204

205
def delete_expired_clients_action(DB, slack_webhook=None, warning_days=None):
1✔
206
    """
207
    Args:
208
        slack_webhook (str): Slack webhook to post warnings when clients expired or are about to expire
209
        warning_days (int): how many days before a client expires should we post a warning on
210
            Slack (default: 7)
211
    """
212
    try:
1✔
213
        cirrus_config.update(**config["CIRRUS_CFG"])
1✔
214
    except AttributeError:
×
215
        # no cirrus config, continue anyway. we don't have client service accounts
216
        # to delete
217
        pass
×
218

219
    def format_uris(uris):
1✔
220
        if not uris or uris == [None]:
1✔
221
            return uris
×
222
        return " ".join(uris)
1✔
223

224
    now = datetime.now().timestamp()
1✔
225
    driver = get_SQLAlchemyDriver(DB)
1✔
226
    expired_messages = ["Some expired OIDC clients have been deleted!"]
1✔
227
    with driver.session as current_session:
1✔
228
        clients = (
1✔
229
            current_session.query(Client)
230
            # for backwards compatibility, 0 means no expiration
231
            .filter(Client.expires_at != 0)
232
            .filter(Client.expires_at <= now)
233
            .all()
234
        )
235

236
        for client in clients:
1✔
237
            expired_messages.append(
1✔
238
                f"Client '{client.name}' (ID '{client.client_id}') expired at {datetime.fromtimestamp(client.expires_at)} UTC. Redirect URIs: {format_uris(client.redirect_uris)})"
239
            )
240
            _remove_client_service_accounts(current_session, client)
1✔
241
            current_session.delete(client)
1✔
242
            current_session.commit()
1✔
243

244
        # get the clients that are expiring soon
245
        warning_days = float(warning_days) if warning_days else 7
1✔
246
        warning_days_in_secs = warning_days * 24 * 60 * 60  # days to seconds
1✔
247
        warning_expiry = (
1✔
248
            datetime.now(UTC) + timedelta(seconds=warning_days_in_secs)
249
        ).timestamp()
250
        expiring_clients = (
1✔
251
            current_session.query(Client)
252
            .filter(Client.expires_at != 0)
253
            .filter(Client.expires_at <= warning_expiry)
254
            .all()
255
        )
256

257
    expiring_messages = ["Some OIDC clients are expiring soon!"]
1✔
258
    expiring_messages.extend(
1✔
259
        [
260
            f"Client '{client.name}' (ID '{client.client_id}') expires at {datetime.fromtimestamp(client.expires_at)} UTC. Redirect URIs: {format_uris(client.redirect_uris)}"
261
            for client in expiring_clients
262
        ]
263
    )
264

265
    for post_msgs, nothing_to_do_msg in (
1✔
266
        (expired_messages, "No expired clients to delete"),
267
        (expiring_messages, "No clients are close to expiring"),
268
    ):
269
        if len(post_msgs) > 1:
1✔
270
            for e in post_msgs:
1✔
271
                logger.info(e)
1✔
272
            if slack_webhook:  # post a warning on Slack
1✔
273
                logger.info("Posting to Slack...")
1✔
274
                payload = {
1✔
275
                    "attachments": [
276
                        {
277
                            "fallback": post_msgs[0],
278
                            "title": post_msgs[0],
279
                            "text": "\n- " + "\n- ".join(post_msgs[1:]),
280
                            "color": "#FF5F15",
281
                        }
282
                    ]
283
                }
284
                resp = requests.post(slack_webhook, json=payload)
1✔
285
                resp.raise_for_status()
1✔
286
        else:
287
            logger.info(nothing_to_do_msg)
×
288

289

290
def rotate_client_action(DB, client_name, expires_in=None):
1✔
291
    """
292
    Rorate a client's credentials (client ID and secret). The old credentials are
293
    NOT deactivated and must be deleted or expired separately. This allows for a
294
    rotation without downtime.
295

296
    Args:
297
        DB (str): database connection string
298
        client_name (str): name of the client to rotate credentials for
299
        expires_in (optional): number of days until this client expires (by default, no expiration)
300

301
    Returns:
302
        This functions does not return anything, but it prints the new set of credentials.
303
    """
304
    driver = get_SQLAlchemyDriver(DB)
1✔
305
    with driver.session as s:
1✔
306
        client = s.query(Client).filter(Client.name == client_name).first()
1✔
307
        if not client:
1✔
308
            raise Exception("client {} does not exist".format(client_name))
×
309

310
        # create a new row in the DB for the same client, with a new ID, secret and expiration
311
        client_id, client_secret, hashed_secret = generate_client_credentials(
1✔
312
            client.is_confidential
313
        )
314
        client = Client(
1✔
315
            client_id=client_id,
316
            client_secret=hashed_secret,
317
            expires_in=expires_in,
318
            # the rest is identical to the client being rotated
319
            user=client.user,
320
            redirect_uris=client.redirect_uris,
321
            allowed_scopes=client.scope,
322
            description=client.description,
323
            name=client.name,
324
            auto_approve=client.auto_approve,
325
            grant_types=client.grant_types,
326
            is_confidential=client.is_confidential,
327
            token_endpoint_auth_method=client.token_endpoint_auth_method,
328
        )
329
        s.add(client)
1✔
330
        s.commit()
1✔
331

332
    res = (client_id, client_secret)
1✔
333
    print(
1✔
334
        f"\nSave these credentials! Fence will not save the unhashed client secret.\nclient id, client secret:\n{res}"
335
    )
336

337

338
def _remove_client_service_accounts(db_session, client):
1✔
339
    client_service_accounts = (
1✔
340
        db_session.query(GoogleServiceAccount)
341
        .filter(GoogleServiceAccount.client_id == client.client_id)
342
        .all()
343
    )
344

345
    if client_service_accounts:
1✔
346
        with GoogleCloudManager() as g_mgr:
1✔
347
            for service_account in client_service_accounts:
1✔
348
                logger.info(
1✔
349
                    "Deleting client {}'s service account: {}".format(
350
                        client.name, service_account.email
351
                    )
352
                )
353
                response = g_mgr.delete_service_account(service_account.email)
1✔
354
                if not response.get("error"):
1✔
355
                    db_session.delete(service_account)
1✔
356
                    db_session.commit()
1✔
357
                else:
358
                    logger.error("ERROR - from Google: {}".format(response))
1✔
359
                    logger.error(
1✔
360
                        "ERROR - Could not delete client service account: {}".format(
361
                            service_account.email
362
                        )
363
                    )
364

365

366
def get_default_init_syncer_inputs(authz_provider):
1✔
367
    DB = os.environ.get("FENCE_DB") or config.get("DB")
1✔
368

369
    arborist = ArboristClient(
1✔
370
        arborist_base_url=config["ARBORIST"],
371
        logger=get_logger("user_syncer.arborist_client"),
372
        authz_provider=authz_provider,
373
    )
374
    dbGaP = os.environ.get("dbGaP") or config.get("dbGaP")
1✔
375
    if not isinstance(dbGaP, list):
1✔
376
        dbGaP = [dbGaP]
×
377

378
    storage_creds = config["STORAGE_CREDENTIALS"]
1✔
379

380
    return {
1✔
381
        "DB": DB,
382
        "arborist": arborist,
383
        "dbGaP": dbGaP,
384
        "STORAGE_CREDENTIALS": storage_creds,
385
    }
386

387

388
def init_syncer(
1✔
389
    dbGaP,
390
    STORAGE_CREDENTIALS,
391
    DB,
392
    projects=None,
393
    is_sync_from_dbgap_server=False,
394
    sync_from_local_csv_dir=None,
395
    sync_from_local_yaml_file=None,
396
    arborist=None,
397
    folder=None,
398
):
399
    """
400
    sync ACL files from dbGap to auth db and storage backends
401
    imports from config is done here because dbGap is
402
    an optional requirement for fence so it might not be specified
403
    in config
404
    Args:
405
        projects: path to project_mapping yaml file which contains mapping
406
        from dbgap phsids to projects in fence database
407
    Returns:
408
        None
409
    Examples:
410
        the expected yaml structure sould look like:
411
        .. code-block:: yaml
412
            phs000178:
413
              - name: TCGA
414
                auth_id: phs000178
415
              - name: TCGA-PCAWG
416
                auth_id: TCGA-PCAWG
417
            phs000235:
418
              - name: CGCI
419
                auth_id: phs000235
420
    """
421
    try:
1✔
422
        cirrus_config.update(**config["CIRRUS_CFG"])
1✔
423
    except AttributeError:
×
424
        # no cirrus config, continue anyway. Google APIs will probably fail.
425
        # this is okay if users don't need access to Google buckets
426
        pass
×
427

428
    if projects is not None and not os.path.exists(projects):
1✔
429
        logger.error("====={} is not found!!!=======".format(projects))
×
430
        return
×
431
    if sync_from_local_csv_dir and not os.path.exists(sync_from_local_csv_dir):
1✔
432
        logger.error("====={} is not found!!!=======".format(sync_from_local_csv_dir))
×
433
        return
×
434
    if sync_from_local_yaml_file and not os.path.exists(sync_from_local_yaml_file):
1✔
435
        logger.error("====={} is not found!!!=======".format(sync_from_local_yaml_file))
×
436
        return
×
437

438
    project_mapping = None
1✔
439
    if projects:
1✔
440
        try:
×
441
            with open(projects, "r") as f:
×
442
                project_mapping = safe_load(f)
×
443
        except IOError:
×
444
            pass
×
445

446
    return UserSyncer(
1✔
447
        dbGaP,
448
        DB,
449
        project_mapping=project_mapping,
450
        storage_credentials=STORAGE_CREDENTIALS,
451
        is_sync_from_dbgap_server=is_sync_from_dbgap_server,
452
        sync_from_local_csv_dir=sync_from_local_csv_dir,
453
        sync_from_local_yaml_file=sync_from_local_yaml_file,
454
        arborist=arborist,
455
        folder=folder,
456
    )
457

458

459
def download_dbgap_files(
1✔
460
    # Note: need to keep all parameter to prevent download failure
461
    dbGaP,
462
    STORAGE_CREDENTIALS,
463
    DB,
464
    projects=None,
465
    is_sync_from_dbgap_server=False,
466
    sync_from_local_csv_dir=None,
467
    sync_from_local_yaml_file=None,
468
    arborist=None,
469
    folder=None,
470
):
471
    syncer = init_syncer(
×
472
        dbGaP,
473
        STORAGE_CREDENTIALS,
474
        DB,
475
        projects,
476
        is_sync_from_dbgap_server,
477
        sync_from_local_csv_dir,
478
        sync_from_local_yaml_file,
479
        arborist,
480
        folder,
481
    )
482
    if not syncer:
×
483
        exit(1)
×
484
    syncer.download()
×
485

486

487
def sync_users(
1✔
488
    dbGaP,
489
    STORAGE_CREDENTIALS,
490
    DB,
491
    projects=None,
492
    is_sync_from_dbgap_server=False,
493
    sync_from_local_csv_dir=None,
494
    sync_from_local_yaml_file=None,
495
    arborist=None,
496
    folder=None,
497
):
498
    syncer = init_syncer(
×
499
        dbGaP,
500
        STORAGE_CREDENTIALS,
501
        DB,
502
        projects,
503
        is_sync_from_dbgap_server,
504
        sync_from_local_csv_dir,
505
        sync_from_local_yaml_file,
506
        arborist,
507
        folder,
508
    )
509
    if not syncer:
×
510
        exit(1)
×
511
    syncer.sync()
×
512

513

514
def create_sample_data(DB, yaml_file_path):
1✔
515
    with open(yaml_file_path, "r") as f:
×
516
        data = safe_load(f)
×
517

518
    db = get_SQLAlchemyDriver(DB)
×
519
    with db.session as s:
×
520
        create_cloud_providers(s, data)
×
521
        create_projects(s, data)
×
522
        create_group(s, data)
×
523
        create_users_with_group(DB, s, data)
×
524

525

526
def create_group(s, data):
1✔
527
    groups = data.get("groups", {})
1✔
528
    for group_name, fields in groups.items():
1✔
529
        projects = fields.get("projects", [])
1✔
530
        group = s.query(Group).filter(Group.name == group_name).first()
1✔
531
        if not group:
1✔
532
            group = Group(name=group_name)
1✔
533
            s.add(group)
1✔
534
        for project_data in projects:
1✔
535
            grant_project_to_group_or_user(s, project_data, group)
1✔
536

537

538
def create_projects(s, data):
1✔
539
    projects = data.get("projects", [])
1✔
540
    for project in projects:
1✔
541
        create_project(s, project)
1✔
542

543

544
def create_project(s, project_data):
1✔
545
    auth_id = project_data["auth_id"]
1✔
546
    name = project_data.get("name", auth_id)
1✔
547
    project = s.query(Project).filter_by(name=name).first()
1✔
548
    if project is None:
1✔
549
        project = Project(name=name, auth_id=auth_id)
1✔
550
        s.add(project)
1✔
551
    if "storage_accesses" in project_data:
1✔
552
        sa_list = project_data.get("storage_accesses", [])
1✔
553
        for storage_access in sa_list:
1✔
554
            provider = storage_access["name"]
1✔
555
            buckets = storage_access.get("buckets", [])
1✔
556

557
            sa = (
1✔
558
                s.query(StorageAccess)
559
                .join(StorageAccess.provider)
560
                .join(StorageAccess.project)
561
                .filter(Project.name == project.name)
562
                .filter(CloudProvider.name == provider)
563
                .first()
564
            )
565
            c_provider = s.query(CloudProvider).filter_by(name=provider).first()
1✔
566
            assert c_provider, "CloudProvider {} does not exist".format(provider)
1✔
567
            if not sa:
1✔
568
                sa = StorageAccess(provider=c_provider, project=project)
1✔
569
                s.add(sa)
1✔
570
                logger.info(
1✔
571
                    "created storage access for {} to {}".format(
572
                        project.name, c_provider.name
573
                    )
574
                )
575
            for bucket in buckets:
1✔
576
                b = (
1✔
577
                    s.query(Bucket)
578
                    .filter_by(name=bucket)
579
                    .join(Bucket.provider)
580
                    .filter(CloudProvider.name == provider)
581
                    .first()
582
                )
583
                if not b:
1✔
584
                    b = Bucket(name=bucket)
1✔
585
                    b.provider = c_provider
1✔
586
                    s.add(b)
1✔
587
                    logger.info("created bucket {} in db".format(bucket))
1✔
588

589
    return project
1✔
590

591

592
def grant_project_to_group_or_user(s, project_data, group=None, user=None):
1✔
593
    privilege = project_data.get("privilege", [])
1✔
594
    project = create_project(s, project_data)
1✔
595
    if group:
1✔
596
        ap = (
1✔
597
            s.query(AccessPrivilege)
598
            .join(AccessPrivilege.project)
599
            .join(AccessPrivilege.group)
600
            .filter(Project.name == project.name, Group.name == group.name)
601
            .first()
602
        )
603
        name = group.name
1✔
604
    elif user:
×
605
        ap = (
×
606
            s.query(AccessPrivilege)
607
            .join(AccessPrivilege.project)
608
            .join(AccessPrivilege.user)
609
            .filter(
610
                Project.name == project.name,
611
                func.lower(User.username) == func.lower(user.username),
612
            )
613
            .first()
614
        )
615
        name = user.username
×
616
    else:
617
        raise Exception("need to provide either a user or group")
×
618
    if not ap:
1✔
619
        if group:
1✔
620
            ap = AccessPrivilege(project=project, group=group, privilege=privilege)
1✔
621
        elif user:
×
622
            ap = AccessPrivilege(project=project, user=user, privilege=privilege)
×
623
        else:
624
            raise Exception("need to provide either a user or group")
×
625
        s.add(ap)
1✔
626
        logger.info(
1✔
627
            "created access privilege {} of project {} to {}".format(
628
                privilege, project.name, name
629
            )
630
        )
631
    else:
632
        ap.privilege = privilege
×
633
        logger.info(
×
634
            "updated access privilege {} of project {} to {}".format(
635
                privilege, project.name, name
636
            )
637
        )
638

639

640
def create_cloud_providers(s, data):
1✔
641
    cloud_data = data.get("cloud_providers", {})
×
642
    for name, fields in cloud_data.items():
×
643
        cloud_provider = (
×
644
            s.query(CloudProvider).filter(CloudProvider.name == name).first()
645
        )
646
        if not cloud_provider:
×
647
            cloud_provider = CloudProvider(
×
648
                name=name,
649
                backend=fields.get("backend", "cleversafe"),
650
                service=fields.get("service", "storage"),
651
            )
652
            s.add(cloud_provider)
×
653

654

655
def create_users_with_group(DB, s, data):
1✔
656
    providers = {}
×
657
    data_groups = data.get("groups", {})
×
658
    users = data.get("users", {})
×
659
    for username, data in users.items():
×
660
        is_existing_user = True
×
661
        user = query_for_user(session=s, username=username)
×
662

663
        admin = data.get("admin", False)
×
664

665
        if not user:
×
666
            is_existing_user = False
×
667
            provider_name = data.get("provider", "google")
×
668
            provider = providers.get(provider_name)
×
669
            if not provider:
×
670
                provider = (
×
671
                    s.query(IdentityProvider)
672
                    .filter(IdentityProvider.name == provider_name)
673
                    .first()
674
                )
675
                providers[provider_name] = provider
×
676
                if not provider:
×
677
                    raise Exception("provider {} not found".format(provider_name))
×
678

679
            user = User(username=username, idp_id=provider.id, is_admin=admin)
×
680
        user.is_admin = admin
×
681
        group_names = data.get("groups", [])
×
682
        for group_name in group_names:
×
683
            assign_group_to_user(s, user, group_name, data_groups[group_name])
×
684
        projects = data.get("projects", [])
×
685
        for project in projects:
×
686
            grant_project_to_group_or_user(s, project, user=user)
×
687
        if not is_existing_user:
×
688
            s.add(user)
×
689
        for client in data.get("clients", []):
×
690
            create_client_action(DB, username=username, **client)
×
691

692

693
def assign_group_to_user(s, user, group_name, group_data):
1✔
694
    group = s.query(Group).filter(Group.name == group_name).first()
×
695
    if not group:
×
696
        group = Group(name=group_name)
×
697
        s.add(group)
×
698
        user.groups.append(group)
×
699
    if group not in user.groups:
×
700
        user.groups.append(group)
×
701

702

703
def google_init(db):
1✔
704
    """
705
    DEPRECATED - Initial user proxy group / service account creation.
706
    No longer necessary as proxy groups and service accounts are lazily
707
    created.
708
    """
709
    pass
×
710

711

712
def remove_expired_google_service_account_keys(db):
1✔
713
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
714

715
    db = get_SQLAlchemyDriver(db)
1✔
716
    with db.session as current_session:
1✔
717
        client_service_accounts = current_session.query(
1✔
718
            GoogleServiceAccount, Client
719
        ).filter(GoogleServiceAccount.client_id == Client.client_id)
720

721
        current_time = int(time.time())
1✔
722
        logger.info("Current time: {}\n".format(current_time))
1✔
723

724
        expired_sa_keys_for_users = current_session.query(
1✔
725
            GoogleServiceAccountKey
726
        ).filter(GoogleServiceAccountKey.expires <= current_time)
727

728
        with GoogleCloudManager() as g_mgr:
1✔
729
            # handle service accounts with default max expiration
730
            for service_account, client in client_service_accounts:
1✔
731
                g_mgr.handle_expired_service_account_keys(service_account.email)
1✔
732

733
            # handle service accounts with custom expiration
734
            for expired_user_key in expired_sa_keys_for_users:
1✔
735
                logger.info("expired_user_key: {}\n".format(expired_user_key))
1✔
736
                sa = (
1✔
737
                    current_session.query(GoogleServiceAccount)
738
                    .filter(
739
                        GoogleServiceAccount.id == expired_user_key.service_account_id
740
                    )
741
                    .first()
742
                )
743

744
                response = g_mgr.delete_service_account_key(
1✔
745
                    account=sa.email, key_name=expired_user_key.key_id
746
                )
747
                response_error_code = response.get("error", {}).get("code")
1✔
748
                response_error_status = response.get("error", {}).get("status")
1✔
749
                logger.info("response_error_code: {}\n".format(response_error_code))
1✔
750
                logger.info("response_error_status: {}\n".format(response_error_status))
1✔
751

752
                if not response_error_code:
1✔
753
                    current_session.delete(expired_user_key)
1✔
754
                    logger.info(
1✔
755
                        "INFO: Removed expired service account key {} "
756
                        "for service account {} (owned by user with id {}).\n".format(
757
                            expired_user_key.key_id, sa.email, sa.user_id
758
                        )
759
                    )
760
                elif (
×
761
                    response_error_code == 404
762
                    or response_error_status == "FAILED_PRECONDITION"
763
                ):
764
                    logger.info(
×
765
                        "INFO: Service account key {} for service account {} "
766
                        "(owned by user with id {}) does not exist in Google. "
767
                        "Removing from database...\n".format(
768
                            expired_user_key.key_id, sa.email, sa.user_id
769
                        )
770
                    )
771
                    current_session.delete(expired_user_key)
×
772
                else:
773
                    logger.error(
×
774
                        "ERROR: Google returned an error when attempting to "
775
                        "remove service account key {} "
776
                        "for service account {} (owned by user with id {}). "
777
                        "Error:\n{}\n".format(
778
                            expired_user_key.key_id, sa.email, sa.user_id, response
779
                        )
780
                    )
781

782

783
def remove_expired_google_accounts_from_proxy_groups(db):
1✔
784
    cirrus_config.update(**config["CIRRUS_CFG"])
×
785

786
    db = get_SQLAlchemyDriver(db)
×
787
    with db.session as current_session:
×
788
        current_time = int(time.time())
×
789
        logger.info("Current time: {}".format(current_time))
×
790

791
        expired_accounts = current_session.query(UserGoogleAccountToProxyGroup).filter(
×
792
            UserGoogleAccountToProxyGroup.expires <= current_time
793
        )
794

795
        with GoogleCloudManager() as g_mgr:
×
796
            for expired_account_access in expired_accounts:
×
797
                g_account = (
×
798
                    current_session.query(UserGoogleAccount)
799
                    .filter(
800
                        UserGoogleAccount.id
801
                        == expired_account_access.user_google_account_id
802
                    )
803
                    .first()
804
                )
805
                try:
×
806
                    response = g_mgr.remove_member_from_group(
×
807
                        member_email=g_account.email,
808
                        group_id=expired_account_access.proxy_group_id,
809
                    )
810
                    response_error_code = response.get("error", {}).get("code")
×
811

812
                    if not response_error_code:
×
813
                        current_session.delete(expired_account_access)
×
814
                        logger.info(
×
815
                            "INFO: Removed {} from proxy group with id {}.\n".format(
816
                                g_account.email, expired_account_access.proxy_group_id
817
                            )
818
                        )
819
                    else:
820
                        logger.error(
×
821
                            "ERROR: Google returned an error when attempting to "
822
                            "remove member {} from proxy group {}. Error:\n{}\n".format(
823
                                g_account.email,
824
                                expired_account_access.proxy_group_id,
825
                                response,
826
                            )
827
                        )
828
                except Exception as exc:
×
829
                    logger.error(
×
830
                        "ERROR: Google returned an error when attempting to "
831
                        "remove member {} from proxy group {}. Error:\n{}\n".format(
832
                            g_account.email, expired_account_access.proxy_group_id, exc
833
                        )
834
                    )
835

836

837
def delete_users(DB, usernames):
1✔
838
    driver = get_SQLAlchemyDriver(DB)
1✔
839
    with driver.session as session:
1✔
840
        # NOTE that calling ``.delete()`` on the query itself will not follow
841
        # cascade deletion rules set up in any relationships.
842
        lowercase_usernames = [x.lower() for x in usernames]
1✔
843
        users_to_delete = (
1✔
844
            session.query(User)
845
            .filter(func.lower(User.username).in_(lowercase_usernames))
846
            .all()
847
        )
848
        for user in users_to_delete:
1✔
849
            session.delete(user)
1✔
850
        session.commit()
1✔
851

852

853
def cleanup_expired_ga4gh_information(DB):
1✔
854
    """
855
    Remove any expired passports/visas from the database if they're expired.
856

857
    IMPORTANT NOTE: This DOES NOT actually remove authorization, it assumes that the
858
                    same expiration was set and honored in the authorization system.
859
    """
860
    driver = get_SQLAlchemyDriver(DB)
1✔
861
    with driver.session as session:
1✔
862
        current_time = int(time.time())
1✔
863

864
        # Get expires field from db, if None default to NOT expired
865
        records_to_delete = (
1✔
866
            session.query(GA4GHVisaV1)
867
            .filter(
868
                and_(
869
                    GA4GHVisaV1.expires.isnot(None),
870
                    GA4GHVisaV1.expires < current_time,
871
                )
872
            )
873
            .all()
874
        )
875
        num_deleted_records = 0
1✔
876
        if records_to_delete:
1✔
877
            for record in records_to_delete:
1✔
878
                try:
1✔
879
                    session.delete(record)
1✔
880
                    session.commit()
1✔
881

882
                    num_deleted_records += 1
1✔
883
                except Exception as e:
×
884
                    logger.error(
×
885
                        "ERROR: Could not remove GA4GHVisaV1 with id={}. Detail {}".format(
886
                            record.id, e
887
                        )
888
                    )
889

890
        logger.info(
1✔
891
            f"Removed {num_deleted_records} expired GA4GHVisaV1 records from db."
892
        )
893

894

895
def delete_expired_google_access(DB):
1✔
896
    """
897
    Delete all expired Google data access (e.g. remove proxy groups from Google Bucket
898
    Access Groups if expired).
899
    """
900
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
901

902
    driver = get_SQLAlchemyDriver(DB)
1✔
903
    with driver.session as session:
1✔
904
        current_time = int(time.time())
1✔
905

906
        # Get expires field from db, if None default to NOT expired
907
        records_to_delete = (
1✔
908
            session.query(GoogleProxyGroupToGoogleBucketAccessGroup)
909
            .filter(
910
                and_(
911
                    GoogleProxyGroupToGoogleBucketAccessGroup.expires.isnot(None),
912
                    GoogleProxyGroupToGoogleBucketAccessGroup.expires < current_time,
913
                )
914
            )
915
            .all()
916
        )
917
        num_deleted_records = 0
1✔
918
        if records_to_delete:
1✔
919
            with GoogleCloudManager() as manager:
1✔
920
                for record in records_to_delete:
1✔
921
                    try:
1✔
922
                        member_email = record.proxy_group.email
1✔
923
                        access_group_email = record.access_group.email
1✔
924
                        manager.remove_member_from_group(
1✔
925
                            member_email, access_group_email
926
                        )
927
                        logger.info(
1✔
928
                            "Removed {} from {}, expired {}. Current time: {} ".format(
929
                                member_email,
930
                                access_group_email,
931
                                record.expires,
932
                                current_time,
933
                            )
934
                        )
935
                        session.delete(record)
1✔
936
                        session.commit()
1✔
937

938
                        num_deleted_records += 1
1✔
939
                    except Exception as e:
1✔
940
                        logger.error(
1✔
941
                            "ERROR: Could not remove Google group member {} from access group {}. Detail {}".format(
942
                                member_email, access_group_email, e
943
                            )
944
                        )
945

946
        logger.info(
1✔
947
            f"Removed {num_deleted_records} expired Google Access records from db and Google."
948
        )
949

950

951
def delete_expired_service_accounts(DB):
1✔
952
    """
953
    Delete all expired service accounts.
954
    """
955
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
956

957
    driver = get_SQLAlchemyDriver(DB)
1✔
958
    with driver.session as session:
1✔
959
        current_time = int(time.time())
1✔
960
        records_to_delete = (
1✔
961
            session.query(ServiceAccountToGoogleBucketAccessGroup)
962
            .filter(ServiceAccountToGoogleBucketAccessGroup.expires < current_time)
963
            .all()
964
        )
965
        if len(records_to_delete):
1✔
966
            with GoogleCloudManager() as manager:
1✔
967
                for record in records_to_delete:
1✔
968
                    try:
1✔
969
                        manager.remove_member_from_group(
1✔
970
                            record.service_account.email, record.access_group.email
971
                        )
972
                        session.delete(record)
1✔
973
                        logger.info(
1✔
974
                            "Removed expired service account: {}".format(
975
                                record.service_account.email
976
                            )
977
                        )
978
                    except Exception as e:
1✔
979
                        logger.error(
1✔
980
                            "ERROR: Could not delete service account {}. Details: {}".format(
981
                                record.service_account.email, e
982
                            )
983
                        )
984

985
                session.commit()
1✔
986

987

988
def verify_bucket_access_group(DB):
1✔
989
    """
990
    Go through all the google group members, remove them from Google group and Google
991
    user service account if they are not in Fence
992

993
    Args:
994
        DB(str): db connection string
995

996
    Returns:
997
        None
998

999
    """
1000
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
1001

1002
    driver = get_SQLAlchemyDriver(DB)
1✔
1003
    with driver.session as session:
1✔
1004
        access_groups = session.query(GoogleBucketAccessGroup).all()
1✔
1005
        with GoogleCloudManager() as manager:
1✔
1006
            for access_group in access_groups:
1✔
1007
                try:
1✔
1008
                    members = manager.get_group_members(access_group.email)
1✔
1009
                    logger.debug(f"google group members response: {members}")
1✔
1010
                except GoogleAuthError as e:
×
1011
                    logger.error("ERROR: Authentication error!!!. Detail {}".format(e))
×
1012
                    return
×
1013
                except Exception as e:
×
1014
                    logger.error(
×
1015
                        "ERROR: Could not list group members of {}. Detail {}".format(
1016
                            access_group.email, e
1017
                        )
1018
                    )
1019
                    return
×
1020

1021
                for member in members:
1✔
1022
                    if member.get("type") == "GROUP":
1✔
1023
                        _verify_google_group_member(session, access_group, member)
1✔
1024
                    elif member.get("type") == "USER":
1✔
1025
                        _verify_google_service_account_member(
1✔
1026
                            session, access_group, member
1027
                        )
1028

1029

1030
def _verify_google_group_member(session, access_group, member):
1✔
1031
    """
1032
    Delete if the member which is a google group is not in Fence.
1033

1034
    Args:
1035
        session(Session): db session
1036
        access_group(GoogleBucketAccessGroup): access group
1037
        member(dict): group member info
1038

1039
    Returns:
1040
        None
1041

1042
    """
1043
    account_emails = [
1✔
1044
        granted_group.proxy_group.email
1045
        for granted_group in (
1046
            session.query(GoogleProxyGroupToGoogleBucketAccessGroup)
1047
            .filter_by(access_group_id=access_group.id)
1048
            .all()
1049
        )
1050
    ]
1051

1052
    if not any([email for email in account_emails if email == member.get("email")]):
1✔
1053
        try:
1✔
1054
            with GoogleCloudManager() as manager:
1✔
1055
                manager.remove_member_from_group(
1✔
1056
                    member.get("email"), access_group.email
1057
                )
1058
                logger.info(
1✔
1059
                    "Removed {} from {}, not found in fence but found "
1060
                    "in Google Group.".format(member.get("email"), access_group.email)
1061
                )
1062
        except Exception as e:
×
1063
            logger.error(
×
1064
                "ERROR: Could not remove google group memeber {} from access group {}. Detail {}".format(
1065
                    member.get("email"), access_group.email, e
1066
                )
1067
            )
1068

1069

1070
def _verify_google_service_account_member(session, access_group, member):
1✔
1071
    """
1072
    Delete if the member which is a service account is not in Fence.
1073

1074
    Args:
1075
        session(session): db session
1076
        access_group(GoogleBucketAccessGroup): access group
1077
        members(dict): service account member info
1078

1079
    Returns:
1080
        None
1081

1082
    """
1083

1084
    account_emails = [
1✔
1085
        account.service_account.email
1086
        for account in (
1087
            session.query(ServiceAccountToGoogleBucketAccessGroup)
1088
            .filter_by(access_group_id=access_group.id)
1089
            .all()
1090
        )
1091
    ]
1092

1093
    if not any([email for email in account_emails if email == member.get("email")]):
1✔
1094
        try:
1✔
1095
            with GoogleCloudManager() as manager:
1✔
1096
                manager.remove_member_from_group(
1✔
1097
                    member.get("email"), access_group.email
1098
                )
1099
                logger.info(
1✔
1100
                    "Removed {} from {}, not found in fence but found "
1101
                    "in Google Group.".format(member.get("email"), access_group.email)
1102
                )
1103
        except Exception as e:
×
1104
            logger.error(
×
1105
                "ERROR: Could not remove service account memeber {} from access group {}. Detail {}".format(
1106
                    member.get("email"), access_group.email, e
1107
                )
1108
            )
1109

1110

1111
class JWTCreator(object):
1✔
1112
    required_kwargs = ["kid", "private_key", "username", "scopes"]
1✔
1113
    all_kwargs = required_kwargs + ["expires_in", "client_id"]
1✔
1114

1115
    default_expiration = 3600
1✔
1116

1117
    def __init__(self, db, base_url, **kwargs):
1✔
1118
        self.db = db
1✔
1119
        self.base_url = base_url
1✔
1120

1121
        # These get assigned values just below here, with setattr. Defined here
1122
        # so linters won't complain they're undefined.
1123
        self.kid = None
1✔
1124
        self.private_key = None
1✔
1125
        self.username = None
1✔
1126
        self.scopes = None
1✔
1127
        self.client_id = None
1✔
1128

1129
        for required_kwarg in self.required_kwargs:
1✔
1130
            if required_kwarg not in kwargs:
1✔
1131
                raise ValueError("missing required argument: " + required_kwarg)
×
1132

1133
        # Set attributes on this object from the kwargs.
1134
        for kwarg_name in self.all_kwargs:
1✔
1135
            setattr(self, kwarg_name, kwargs.get(kwarg_name))
1✔
1136

1137
        # If the scopes look like this:
1138
        #
1139
        #     'openid,fence,data'
1140
        #
1141
        # convert them to this:
1142
        #
1143
        #     ['openid', 'fence', 'data']
1144
        if isinstance(getattr(self, "scopes", ""), str):
1✔
1145
            self.scopes = [scope.strip() for scope in self.scopes.split(",")]
1✔
1146

1147
        self.expires_in = kwargs.get("expires_in") or self.default_expiration
1✔
1148

1149
    def create_access_token(self):
1✔
1150
        """
1151
        Create a new access token.
1152

1153
        Return:
1154
            JWTResult: result containing the encoded token and claims
1155
        """
1156
        driver = get_SQLAlchemyDriver(self.db)
1✔
1157
        with driver.session as current_session:
1✔
1158
            user = query_for_user(session=current_session, username=self.username)
1✔
1159

1160
            if not user:
1✔
1161
                raise EnvironmentError(
1✔
1162
                    "no user found with given username: " + self.username
1163
                )
1164
            return generate_signed_access_token(
1✔
1165
                self.kid,
1166
                self.private_key,
1167
                self.expires_in,
1168
                self.scopes,
1169
                user=user,
1170
                iss=self.base_url,
1171
            )
1172

1173
    def create_refresh_token(self):
1✔
1174
        """
1175
        Create a new refresh token and add its entry to the database.
1176

1177
        Return:
1178
            JWTResult: the refresh token result
1179
        """
1180
        driver = get_SQLAlchemyDriver(self.db)
1✔
1181
        with driver.session as current_session:
1✔
1182
            user = query_for_user(session=current_session, username=self.username)
1✔
1183

1184
            if not user:
1✔
1185
                raise EnvironmentError(
1✔
1186
                    "no user found with given username: " + self.username
1187
                )
1188
            if not self.client_id:
1✔
1189
                raise EnvironmentError(
×
1190
                    "no client id is provided. Required for creating refresh token"
1191
                )
1192
            jwt_result = generate_signed_refresh_token(
1✔
1193
                self.kid,
1194
                self.private_key,
1195
                user,
1196
                self.expires_in,
1197
                self.scopes,
1198
                iss=self.base_url,
1199
                client_id=self.client_id,
1200
            )
1201

1202
            current_session.add(
1✔
1203
                UserRefreshToken(
1204
                    jti=jwt_result.claims["jti"],
1205
                    userid=user.id,
1206
                    expires=jwt_result.claims["exp"],
1207
                )
1208
            )
1209

1210
            return jwt_result
1✔
1211

1212

1213
def link_bucket_to_project(db, bucket_id, bucket_provider, project_auth_id):
1✔
1214
    """
1215
    Associate a bucket to a specific project (with provided auth_id).
1216

1217
    Args:
1218
        db (TYPE): database
1219
        bucket_id (str): bucket db id or unique name
1220
            WARNING: name uniqueness is only required for Google so it's not
1221
                     a requirement of the db table. You will get an error if
1222
                     there are multiple buckets with the given name. In that
1223
                     case, you'll have to use the bucket's id.
1224
        bucket_provider (str): CloudProvider.name for the bucket
1225
        project_auth_id (str): Project.auth_id to link to bucket
1226
    """
1227
    driver = get_SQLAlchemyDriver(db)
×
1228
    with driver.session as current_session:
×
1229
        cloud_provider = (
×
1230
            current_session.query(CloudProvider).filter_by(name=bucket_provider).first()
1231
        )
1232
        if not cloud_provider:
×
1233
            raise NameError(
×
1234
                'No bucket with provider "{}" exists.'.format(bucket_provider)
1235
            )
1236

1237
        # first try by searching using id
1238
        try:
×
1239
            bucket_id = int(bucket_id)
×
1240
            bucket_db_entry = (
×
1241
                current_session.query(Bucket).filter_by(
1242
                    id=bucket_id, provider_id=cloud_provider.id
1243
                )
1244
            ).first()
1245
        except ValueError:
×
1246
            # invalid id, must be int
1247
            bucket_db_entry = None
×
1248

1249
        # nothing found? try searching for single bucket with name bucket_id
1250
        if not bucket_db_entry:
×
1251
            buckets_by_name = current_session.query(Bucket).filter_by(
×
1252
                name=bucket_id, provider_id=cloud_provider.id
1253
            )
1254
            # don't get a bucket if the name isn't unique. NOTE: for Google,
1255
            # these have to be globally unique so they'll be unique here.
1256
            buckets_with_name = buckets_by_name.count()
×
1257
            if buckets_with_name == 1:
×
1258
                bucket_db_entry = buckets_by_name[0]
×
1259
            elif buckets_with_name > 1:
×
1260
                raise NameError(
×
1261
                    'No bucket with id "{bucket_id}" exists. Tried buckets '
1262
                    'with name "{bucket_id}", but this returned multiple '
1263
                    "buckets. Please specify the id from the db and not just "
1264
                    "the name.".format(bucket_id=bucket_id)
1265
                )
1266
            else:
1267
                # keep bucket_db_entry as None
1268
                pass
×
1269

1270
        if not bucket_db_entry:
×
1271
            raise NameError('No bucket with id or name "{}" exists.'.format(bucket_id))
×
1272

1273
        project_db_entry = (
×
1274
            current_session.query(Project).filter_by(auth_id=project_auth_id).first()
1275
        )
1276
        if not project_db_entry:
×
1277
            logger.warning(
×
1278
                'WARNING: No project with auth_id "{}" exists. Creating...'.format(
1279
                    project_auth_id
1280
                )
1281
            )
1282
            project_db_entry = Project(name=project_auth_id, auth_id=project_auth_id)
×
1283
            current_session.add(project_db_entry)
×
1284
            current_session.commit()
×
1285

1286
        # Add StorageAccess if it doesn't exist for the project
1287
        storage_access = (
×
1288
            current_session.query(StorageAccess)
1289
            .filter_by(project_id=project_db_entry.id, provider_id=cloud_provider.id)
1290
            .first()
1291
        )
1292
        if not storage_access:
×
1293
            storage_access = StorageAccess(
×
1294
                project_id=project_db_entry.id, provider_id=cloud_provider.id
1295
            )
1296
            current_session.add(storage_access)
×
1297
            current_session.commit()
×
1298

1299
        project_linkage = (
×
1300
            current_session.query(ProjectToBucket)
1301
            .filter_by(project_id=project_db_entry.id, bucket_id=bucket_db_entry.id)
1302
            .first()
1303
        )
1304
        if not project_linkage:
×
1305
            project_linkage = ProjectToBucket(
×
1306
                project_id=project_db_entry.id,
1307
                bucket_id=bucket_db_entry.id,
1308
                privilege=["owner"],  # TODO What should this be???
1309
            )
1310
            current_session.add(project_linkage)
×
1311
            current_session.commit()
×
1312

1313

1314
def create_or_update_google_bucket(
1✔
1315
    db,
1316
    name,
1317
    storage_class=None,
1318
    public=None,
1319
    requester_pays=False,
1320
    google_project_id=None,
1321
    project_auth_id=None,
1322
    access_logs_bucket=None,
1323
    allowed_privileges=None,
1324
):
1325
    """
1326
    Create a Google bucket and populate database with necessary information.
1327

1328
    If the bucket is not public, this will also create a Google Bucket Access
1329
    Group(s) to control access to the new bucket. In order to give access
1330
    to a new user, simply add them to the Google Bucket Access Group.
1331

1332
    NOTE: At the moment, a different Google Bucket Access Group is created
1333
          for each different privilege in allowed_privileges (which defaults
1334
          to ['read', 'write']). So there will be separate Google Groups for
1335
          each access level.
1336

1337
    Args:
1338
        db (TYPE): database
1339
        name (str): name for the bucket, must be globally unique throughout Google
1340
        storage_class (str): enum, one of the cirrus's GOOGLE_STORAGE_CLASSES
1341
        public (bool or None, optional): whether or not the bucket should be public.
1342
            None means leave IAM on the bucket unchanged.
1343
        requester_pays (bool, optional): Whether or not to enable requester_pays
1344
            on the bucket
1345
        google_project_id (str, optional): Google project this bucket should be
1346
            associated with
1347
        project_auth_id (str, optional): a Project.auth_id to associate this
1348
            bucket with. The project must exist in the db already.
1349
        access_logs_bucket (str, optional): Enables logging. Must provide a
1350
            Google bucket name which will store the access logs
1351
        allowed_privileges (List(str), optional): privileges to allow on
1352
            the bucket. Defaults to ['read', 'write']. Also allows:
1353
            ['admin'] for all permission on the bucket including delete,
1354
            ['read'] for viewing access,
1355
            ['write'] for creation rights but not viewing access
1356
    """
1357
    cirrus_config.update(**config["CIRRUS_CFG"])
×
1358

1359
    google_project_id = google_project_id or cirrus_config.GOOGLE_PROJECT_ID
×
1360

1361
    # determine project where buckets are located
1362
    # default to same project, try to get storage creds project from key file
1363
    storage_creds_project_id = _get_storage_project_id() or google_project_id
×
1364

1365
    # default to read access
1366
    allowed_privileges = allowed_privileges or ["read", "write"]
×
1367

1368
    driver = get_SQLAlchemyDriver(db)
×
1369
    with driver.session as current_session:
×
1370
        # use storage creds to create bucket
1371
        # (default creds don't have permission)
1372
        bucket_db_entry = _create_or_update_google_bucket_and_db(
×
1373
            db_session=current_session,
1374
            name=name,
1375
            storage_class=storage_class,
1376
            requester_pays=requester_pays,
1377
            storage_creds_project_id=storage_creds_project_id,
1378
            public=public,
1379
            project_auth_id=project_auth_id,
1380
            access_logs_bucket=access_logs_bucket,
1381
        )
1382

1383
        if public is not None and not public:
×
1384
            for privilege in allowed_privileges:
×
1385
                _setup_google_bucket_access_group(
×
1386
                    db_session=current_session,
1387
                    google_bucket_name=name,
1388
                    bucket_db_id=bucket_db_entry.id,
1389
                    google_project_id=google_project_id,
1390
                    storage_creds_project_id=storage_creds_project_id,
1391
                    privileges=[privilege],
1392
                )
1393

1394

1395
def create_google_logging_bucket(name, storage_class=None, google_project_id=None):
1✔
1396
    cirrus_config.update(**config["CIRRUS_CFG"])
×
1397

1398
    # determine project where buckets are located if not provided, default
1399
    # to configured project if checking creds doesn't work
1400
    storage_creds_project_id = (
×
1401
        google_project_id
1402
        or _get_storage_project_id()
1403
        or cirrus_config.GOOGLE_PROJECT_ID
1404
    )
1405

1406
    manager = GoogleCloudManager(
×
1407
        storage_creds_project_id, creds=cirrus_config.configs["GOOGLE_STORAGE_CREDS"]
1408
    )
1409
    with manager as g_mgr:
×
1410
        g_mgr.create_or_update_bucket(
×
1411
            name,
1412
            storage_class=storage_class,
1413
            public=False,
1414
            requester_pays=False,
1415
            for_logging=True,
1416
        )
1417

1418
        logger.info(
×
1419
            "Successfully created Google Bucket {} "
1420
            "to store Access Logs.".format(name)
1421
        )
1422

1423

1424
def _get_storage_project_id():
1✔
1425
    """
1426
    Determine project where buckets are located.
1427
    Try to get storage creds project from key file
1428
    """
1429
    storage_creds_project_id = None
×
1430
    storage_creds_file = cirrus_config.configs["GOOGLE_STORAGE_CREDS"]
×
1431
    if os.path.exists(storage_creds_file):
×
1432
        with open(storage_creds_file) as creds_file:
×
1433
            storage_creds_project_id = json.load(creds_file).get("project_id")
×
1434
    return storage_creds_project_id
×
1435

1436

1437
def _create_or_update_google_bucket_and_db(
1✔
1438
    db_session,
1439
    name,
1440
    storage_class,
1441
    public,
1442
    requester_pays,
1443
    storage_creds_project_id,
1444
    project_auth_id,
1445
    access_logs_bucket,
1446
):
1447
    """
1448
    Handles creates the Google bucket and adding necessary db entry
1449
    """
1450
    manager = GoogleCloudManager(
×
1451
        storage_creds_project_id, creds=cirrus_config.configs["GOOGLE_STORAGE_CREDS"]
1452
    )
1453
    with manager as g_mgr:
×
1454
        g_mgr.create_or_update_bucket(
×
1455
            name,
1456
            storage_class=storage_class,
1457
            public=public,
1458
            requester_pays=requester_pays,
1459
            access_logs_bucket=access_logs_bucket,
1460
        )
1461

1462
        # add bucket to db
1463
        google_cloud_provider = _get_or_create_google_provider(db_session)
×
1464

1465
        bucket_db_entry = (
×
1466
            db_session.query(Bucket)
1467
            .filter_by(name=name, provider_id=google_cloud_provider.id)
1468
            .first()
1469
        )
1470
        if not bucket_db_entry:
×
1471
            bucket_db_entry = Bucket(name=name, provider_id=google_cloud_provider.id)
×
1472
            db_session.add(bucket_db_entry)
×
1473
            db_session.commit()
×
1474

1475
        logger.info("Successfully updated Google Bucket {}.".format(name))
×
1476

1477
        # optionally link this new bucket to an existing project
1478
        if project_auth_id:
×
1479
            project_db_entry = (
×
1480
                db_session.query(Project).filter_by(auth_id=project_auth_id).first()
1481
            )
1482
            if not project_db_entry:
×
1483
                logger.info(
×
1484
                    "No project with auth_id {} found. No linking "
1485
                    "occured.".format(project_auth_id)
1486
                )
1487
            else:
1488
                project_linkage = (
×
1489
                    db_session.query(ProjectToBucket)
1490
                    .filter_by(
1491
                        project_id=project_db_entry.id, bucket_id=bucket_db_entry.id
1492
                    )
1493
                    .first()
1494
                )
1495
                if not project_linkage:
×
1496
                    project_linkage = ProjectToBucket(
×
1497
                        project_id=project_db_entry.id,
1498
                        bucket_id=bucket_db_entry.id,
1499
                        privilege=["owner"],  # TODO What should this be???
1500
                    )
1501
                    db_session.add(project_linkage)
×
1502
                    db_session.commit()
×
1503
                logger.info(
×
1504
                    "Successfully linked project with auth_id {} "
1505
                    "to the bucket.".format(project_auth_id)
1506
                )
1507

1508
                # Add StorageAccess if it doesn't exist for the project
1509
                storage_access = (
×
1510
                    db_session.query(StorageAccess)
1511
                    .filter_by(
1512
                        project_id=project_db_entry.id,
1513
                        provider_id=google_cloud_provider.id,
1514
                    )
1515
                    .first()
1516
                )
1517
                if not storage_access:
×
1518
                    storage_access = StorageAccess(
×
1519
                        project_id=project_db_entry.id,
1520
                        provider_id=google_cloud_provider.id,
1521
                    )
1522
                    db_session.add(storage_access)
×
1523
                    db_session.commit()
×
1524

1525
    return bucket_db_entry
×
1526

1527

1528
def _setup_google_bucket_access_group(
1✔
1529
    db_session,
1530
    google_bucket_name,
1531
    bucket_db_id,
1532
    google_project_id,
1533
    storage_creds_project_id,
1534
    privileges,
1535
):
1536
    access_group = _create_google_bucket_access_group(
×
1537
        db_session, google_bucket_name, bucket_db_id, google_project_id, privileges
1538
    )
1539
    # use storage creds to update bucket iam
1540
    storage_manager = GoogleCloudManager(
×
1541
        storage_creds_project_id, creds=cirrus_config.configs["GOOGLE_STORAGE_CREDS"]
1542
    )
1543
    with storage_manager as g_mgr:
×
1544
        g_mgr.give_group_access_to_bucket(
×
1545
            access_group.email, google_bucket_name, access=privileges
1546
        )
1547

1548
    logger.info(
×
1549
        "Successfully set up Google Bucket Access Group {} "
1550
        "for Google Bucket {}.".format(access_group.email, google_bucket_name)
1551
    )
1552

1553
    return access_group
×
1554

1555

1556
def _create_google_bucket_access_group(
1✔
1557
    db_session, google_bucket_name, bucket_db_id, google_project_id, privileges
1558
):
1559
    access_group = None
1✔
1560
    prefix = config.get("GOOGLE_GROUP_PREFIX", "")
1✔
1561
    # use default creds for creating group and iam policies
1562
    with GoogleCloudManager(google_project_id) as g_mgr:
1✔
1563
        # create bucket access group
1564
        result = g_mgr.create_group(
1✔
1565
            name=prefix
1566
            + "_"
1567
            + google_bucket_name
1568
            + "_"
1569
            + "_".join(privileges)
1570
            + "_gbag"
1571
        )
1572
        group_email = result["email"]
1✔
1573

1574
        # add bucket group to db
1575
        access_group = (
1✔
1576
            db_session.query(GoogleBucketAccessGroup)
1577
            .filter_by(bucket_id=bucket_db_id, email=group_email)
1578
            .first()
1579
        )
1580
        if not access_group:
1✔
1581
            access_group = GoogleBucketAccessGroup(
1✔
1582
                bucket_id=bucket_db_id, email=group_email, privileges=privileges
1583
            )
1584
            db_session.add(access_group)
1✔
1585
            db_session.commit()
1✔
1586

1587
    return access_group
1✔
1588

1589

1590
def _get_or_create_google_provider(db_session):
1✔
1591
    google_cloud_provider = (
1✔
1592
        db_session.query(CloudProvider).filter_by(name="google").first()
1593
    )
1594
    if not google_cloud_provider:
1✔
1595
        google_cloud_provider = CloudProvider(
1✔
1596
            name="google", description="Google Cloud Platform", service="general"
1597
        )
1598
        db_session.add(google_cloud_provider)
1✔
1599
        db_session.commit()
1✔
1600
    return google_cloud_provider
1✔
1601

1602

1603
def link_external_bucket(db, name):
1✔
1604
    """
1605
    Link with bucket owned by an external party. This will create the bucket
1606
    in fence database and create a google group to access the bucket in both
1607
    Google and fence database.
1608
    The external party will need to add the google group read access to bucket
1609
    afterwards.
1610
    """
1611

1612
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
1613

1614
    google_project_id = cirrus_config.GOOGLE_PROJECT_ID
1✔
1615

1616
    db = get_SQLAlchemyDriver(db)
1✔
1617
    with db.session as current_session:
1✔
1618
        google_cloud_provider = _get_or_create_google_provider(current_session)
1✔
1619

1620
        # search for existing bucket based on name, try to use existing group email
1621
        existing_bucket = current_session.query(Bucket).filter_by(name=name).first()
1✔
1622
        if existing_bucket:
1✔
1623
            access_group = (
×
1624
                current_session.query(GoogleBucketAccessGroup)
1625
                .filter(GoogleBucketAccessGroup.privileges.any("read"))
1626
                .filter_by(bucket_id=existing_bucket.id)
1627
                .all()
1628
            )
1629
            if len(access_group) > 1:
×
1630
                raise Exception(
×
1631
                    f"Existing bucket {name} has more than 1 associated "
1632
                    "Google Bucket Access Group with privilege of 'read'. "
1633
                    "This is not expected and we cannot continue linking."
1634
                )
1635
            elif len(access_group) == 0:
×
1636
                raise Exception(
×
1637
                    f"Existing bucket {name} has no associated "
1638
                    "Google Bucket Access Group with privilege of 'read'. "
1639
                    "This is not expected and we cannot continue linking."
1640
                )
1641

1642
            access_group = access_group[0]
×
1643

1644
            email = access_group.email
×
1645

1646
            logger.warning(
×
1647
                f"bucket already exists with name: {name}, using existing group email: {email}"
1648
            )
1649

1650
            return email
×
1651

1652
        bucket_db_entry = Bucket(name=name, provider_id=google_cloud_provider.id)
1✔
1653
        current_session.add(bucket_db_entry)
1✔
1654
        current_session.commit()
1✔
1655
        privileges = ["read"]
1✔
1656

1657
        access_group = _create_google_bucket_access_group(
1✔
1658
            current_session,
1659
            name,
1660
            bucket_db_entry.id,
1661
            google_project_id,
1662
            privileges,
1663
        )
1664

1665
    logger.info("bucket access group email: {}".format(access_group.email))
1✔
1666
    return access_group.email
1✔
1667

1668

1669
def verify_user_registration(DB):
1✔
1670
    """
1671
    Validate user registration
1672
    """
1673
    cirrus_config.update(**config["CIRRUS_CFG"])
×
1674

1675
    validation_check(DB)
×
1676

1677

1678
def force_update_google_link(DB, username, google_email, expires_in=None):
1✔
1679
    """
1680
    WARNING: This function circumvents Google Auth flow, and should only be
1681
    used for internal testing!
1682
    WARNING: This function assumes that a user already has a proxy group!
1683

1684
    Adds user's google account to proxy group and/or updates expiration for
1685
    that google account's access.
1686
    WARNING: This assumes that provided arguments represent valid information.
1687
             This BLINDLY adds without verification. Do verification
1688
             before this.
1689
    Specifically, this ASSUMES that the proxy group provided belongs to the
1690
    given user and that the user has ALREADY authenticated to prove the
1691
    provided google_email is also their's.
1692

1693
    Args:
1694
        DB
1695
        username (str): Username to link with
1696
        google_email (str): Google email to link to
1697

1698
    Raises:
1699
        NotFound: Linked Google account not found
1700
        Unauthorized: Couldn't determine user
1701

1702
    Returns:
1703
        Expiration time of the newly updated google account's access
1704
    """
1705
    cirrus_config.update(**config["CIRRUS_CFG"])
×
1706

1707
    db = get_SQLAlchemyDriver(DB)
×
1708
    with db.session as session:
×
1709
        user_account = query_for_user(session=session, username=username)
×
1710

1711
        if user_account:
×
1712
            user_id = user_account.id
×
1713
            proxy_group_id = user_account.google_proxy_group_id
×
1714
        else:
1715
            raise Unauthorized(
×
1716
                "Could not determine authed user "
1717
                "from session. Unable to link Google account."
1718
            )
1719

1720
        user_google_account = (
×
1721
            session.query(UserGoogleAccount)
1722
            .filter(UserGoogleAccount.email == google_email)
1723
            .first()
1724
        )
1725
        if not user_google_account:
×
1726
            user_google_account = add_new_user_google_account(
×
1727
                user_id, google_email, session
1728
            )
1729

1730
        # time until the SA will lose bucket access
1731
        # by default: use configured time or 7 days
1732
        default_expires_in = config.get(
×
1733
            "GOOGLE_USER_SERVICE_ACCOUNT_ACCESS_EXPIRES_IN", 604800
1734
        )
1735
        # use expires_in from arg if it was provided and it was not greater than the default
1736
        expires_in = get_valid_expiration(
×
1737
            expires_in, max_limit=default_expires_in, default=default_expires_in
1738
        )
1739
        # convert expires_in to timestamp
1740
        expiration = int(time.time() + expires_in)
×
1741

1742
        force_update_user_google_account_expiration(
×
1743
            user_google_account, proxy_group_id, google_email, expiration, session
1744
        )
1745

1746
        session.commit()
×
1747

1748
        return expiration
×
1749

1750

1751
def notify_problem_users(db, emails, auth_ids, check_linking, google_project_id):
1✔
1752
    """
1753
    Builds a list of users (from provided list of emails) who do not
1754
    have access to any subset of provided auth_ids. Send email to users
1755
    informing them to get access to needed projects.
1756

1757
    db (string): database instance
1758
    emails (list(string)): list of emails to check for access
1759
    auth_ids (list(string)): list of project auth_ids to check that emails have access
1760
    check_linking (bool): flag for if emails should be checked for linked google email
1761
    """
1762
    email_users_without_access(db, auth_ids, emails, check_linking, google_project_id)
×
1763

1764

1765
def migrate_database():
1✔
1766
    alembic_main(["--raiseerr", "upgrade", "head"])
×
1767
    logger.info("Done.")
×
1768

1769

1770
def google_list_authz_groups(db):
1✔
1771
    """
1772
    Builds a list of Google authorization information which includes
1773
    the Google Bucket Access Group emails, Bucket(s) they're associated with, and
1774
    underlying Fence Project auth_id that provides access to that Bucket/Group
1775

1776
    db (string): database instance
1777
    """
1778
    driver = get_SQLAlchemyDriver(db)
×
1779

1780
    with driver.session as db_session:
×
1781
        google_authz = (
×
1782
            db_session.query(
1783
                GoogleBucketAccessGroup.email,
1784
                Bucket.name,
1785
                Project.auth_id,
1786
                ProjectToBucket,
1787
            )
1788
            .join(Project, ProjectToBucket.project_id == Project.id)
1789
            .join(Bucket, ProjectToBucket.bucket_id == Bucket.id)
1790
            .join(
1791
                GoogleBucketAccessGroup, GoogleBucketAccessGroup.bucket_id == Bucket.id
1792
            )
1793
        ).all()
1794

1795
        print("GoogleBucketAccessGroup.email, Bucket.name, Project.auth_id")
×
1796
        for item in google_authz:
×
1797
            print(", ".join(item[:-1]))
×
1798

1799
        return google_authz
×
1800

1801

1802
def access_token_polling_job(
1✔
1803
    db, chunk_size=None, concurrency=None, thread_pool_size=None, buffer_size=None
1804
):
1805
    """
1806
    Update visas and refresh tokens for users with valid visas and refresh tokens
1807

1808
    db (string): database instance
1809
    chunk_size (int): size of chunk of users we want to take from each iteration
1810
    concurrency (int): number of concurrent users going through the visa update flow
1811
    thread_pool_size (int): number of Docker container CPU used for jwt verifcation
1812
    buffer_size (int): max size of queue
1813
    """
1814
    # Instantiating a new client here because the existing
1815
    # client uses authz_provider
1816
    arborist = ArboristClient(
×
1817
        arborist_base_url=config["ARBORIST"],
1818
        logger=get_logger("user_syncer.arborist_client"),
1819
    )
1820
    driver = get_SQLAlchemyDriver(db)
×
1821
    job = TokenAndAuthUpdater(
×
1822
        chunk_size=int(chunk_size) if chunk_size else None,
1823
        concurrency=int(concurrency) if concurrency else None,
1824
        thread_pool_size=int(thread_pool_size) if thread_pool_size else None,
1825
        buffer_size=int(buffer_size) if buffer_size else None,
1826
        arborist=arborist,
1827
    )
1828
    with driver.session as db_session:
×
1829
        loop = asyncio.get_event_loop()
×
1830
        loop.run_until_complete(job.update_tokens(db_session))
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc