• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 12416371639

19 Dec 2024 04:05PM UTC coverage: 75.106% (-0.1%) from 75.24%
12416371639

Pull #1199

github

web-flow
Merge branch 'master' into automatedCopy-feature/oidc-oauth-groups
Pull Request #1199: Automated copy feature/OIDC oauth groups

7965 of 10605 relevant lines covered (75.11%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

60.35
fence/scripting/fence_create.py
1
from datetime import datetime, timedelta
1✔
2
import os
1✔
3
import os.path
1✔
4
import requests
1✔
5
import time
1✔
6
from yaml import safe_load
1✔
7
import json
1✔
8
import pprint
1✔
9
import asyncio
1✔
10

11
from alembic.config import main as alembic_main
1✔
12
from gen3cirrus import GoogleCloudManager
1✔
13
from gen3cirrus.google_cloud.errors import GoogleAuthError
1✔
14
from gen3cirrus.config import config as cirrus_config
1✔
15
from cdislogging import get_logger
1✔
16
from sqlalchemy import func
1✔
17
from userdatamodel.models import (
1✔
18
    AccessPrivilege,
19
    Bucket,
20
    CloudProvider,
21
    GoogleProxyGroup,
22
    Group,
23
    IdentityProvider,
24
    Project,
25
    StorageAccess,
26
    User,
27
    ProjectToBucket,
28
)
29
from sqlalchemy import and_
1✔
30

31
from fence.blueprints.link import (
1✔
32
    force_update_user_google_account_expiration,
33
    add_new_user_google_account,
34
)
35
from fence.errors import Unauthorized
1✔
36
from fence.jwt.token import (
1✔
37
    generate_signed_access_token,
38
    generate_signed_refresh_token,
39
    issued_and_expiration_times,
40
)
41
from fence.job.access_token_updater import AccessTokenUpdater
1✔
42
from fence.models import (
1✔
43
    Client,
44
    GoogleServiceAccount,
45
    GoogleServiceAccountKey,
46
    UserGoogleAccount,
47
    UserGoogleAccountToProxyGroup,
48
    GoogleBucketAccessGroup,
49
    GoogleProxyGroupToGoogleBucketAccessGroup,
50
    UserRefreshToken,
51
    ServiceAccountToGoogleBucketAccessGroup,
52
    query_for_user,
53
    GA4GHVisaV1,
54
    get_client_expires_at,
55
)
56
from fence.scripting.google_monitor import email_users_without_access, validation_check
1✔
57
from fence.config import config
1✔
58
from fence.sync.sync_users import UserSyncer
1✔
59
from fence.utils import (
1✔
60
    create_client,
61
    get_valid_expiration,
62
    generate_client_credentials,
63
    get_SQLAlchemyDriver,
64
)
65
from sqlalchemy.orm.attributes import flag_modified
1✔
66
from gen3authz.client.arborist.client import ArboristClient
1✔
67

68
logger = get_logger(__name__)
1✔
69

70

71
def list_client_action(db):
1✔
72
    try:
1✔
73
        driver = get_SQLAlchemyDriver(db)
1✔
74
        with driver.session as s:
1✔
75
            for row in s.query(Client).all():
1✔
76
                pprint.pprint(row.__dict__)
1✔
77
    except Exception as e:
×
78
        logger.error(str(e))
×
79

80

81
def modify_client_action(
1✔
82
    DB,
83
    client=None,
84
    delete_urls=False,
85
    urls=None,
86
    name=None,
87
    description=None,
88
    set_auto_approve=False,
89
    unset_auto_approve=False,
90
    arborist=None,
91
    policies=None,
92
    allowed_scopes=None,
93
    append=False,
94
    expires_in=None,
95
):
96
    driver = get_SQLAlchemyDriver(DB)
1✔
97
    with driver.session as s:
1✔
98
        client_name = client
1✔
99
        clients = s.query(Client).filter(Client.name == client_name).all()
1✔
100
        if not clients:
1✔
101
            raise Exception("client {} does not exist".format(client_name))
×
102
        for client in clients:
1✔
103
            metadata = client.client_metadata
1✔
104
            if urls:
1✔
105
                if append:
1✔
106
                    metadata["redirect_uris"] += urls
1✔
107
                    logger.info("Adding {} to urls".format(urls))
1✔
108
                else:
109
                    if isinstance(urls, list):
1✔
110
                        metadata["redirect_uris"] = urls
1✔
111
                    else:
112
                        metadata["redirect_uris"] = [urls]
×
113
                    logger.info("Changing urls to {}".format(urls))
1✔
114
            if delete_urls:
1✔
115
                metadata["redirect_uris"] = []
×
116
                logger.info("Deleting urls")
×
117
            if set_auto_approve:
1✔
118
                client.auto_approve = True
1✔
119
                logger.info("Auto approve set to True")
1✔
120
            if unset_auto_approve:
1✔
121
                client.auto_approve = False
×
122
                logger.info("Auto approve set to False")
×
123
            if name:
1✔
124
                client.name = name
1✔
125
                logger.info("Updating name to {}".format(name))
1✔
126
            if description:
1✔
127
                client.description = description
1✔
128
                logger.info("Updating description to {}".format(description))
1✔
129
            if allowed_scopes:
1✔
130
                if append:
1✔
131
                    new_scopes = client.scope.split() + allowed_scopes
1✔
132
                    metadata["scope"] = " ".join(new_scopes)
1✔
133
                    logger.info("Adding {} to allowed_scopes".format(allowed_scopes))
1✔
134
                else:
135
                    metadata["scope"] = " ".join(allowed_scopes)
1✔
136
                    logger.info("Updating allowed_scopes to {}".format(allowed_scopes))
1✔
137
            if expires_in:
1✔
138
                client.expires_at = get_client_expires_at(
1✔
139
                    expires_in=expires_in, grant_types=client.grant_types
140
                )
141
            # Call setter on Json object to persist changes if any
142
            client.set_client_metadata(metadata)
1✔
143
        s.commit()
1✔
144
        if arborist is not None and policies:
1✔
145
            arborist.update_client(client.client_id, policies)
1✔
146

147

148
def create_client_action(
1✔
149
    DB,
150
    username=None,
151
    client=None,
152
    urls=None,
153
    auto_approve=False,
154
    expires_in=None,
155
    **kwargs,
156
):
157
    print("\nSave these credentials! Fence will not save the unhashed client secret.")
1✔
158
    res = create_client(
1✔
159
        DB=DB,
160
        username=username,
161
        urls=urls,
162
        name=client,
163
        auto_approve=auto_approve,
164
        expires_in=expires_in,
165
        **kwargs,
166
    )
167
    print("client id, client secret:")
1✔
168
    # This should always be the last line of output and should remain in this format--
169
    # cloud-auto and gen3-qa use the output programmatically.
170
    print(res)
1✔
171

172

173
def delete_client_action(DB, client_name):
1✔
174
    try:
1✔
175
        cirrus_config.update(**config["CIRRUS_CFG"])
1✔
176
    except AttributeError:
×
177
        # no cirrus config, continue anyway. we don't have client service accounts
178
        # to delete
179
        pass
×
180

181
    try:
1✔
182
        driver = get_SQLAlchemyDriver(DB)
1✔
183
        with driver.session as current_session:
1✔
184
            if (
1✔
185
                not current_session.query(Client)
186
                .filter(Client.name == client_name)
187
                .first()
188
            ):
189
                raise Exception("client {} does not exist".format(client_name))
×
190

191
            clients = (
1✔
192
                current_session.query(Client).filter(Client.name == client_name).all()
193
            )
194

195
            for client in clients:
1✔
196
                _remove_client_service_accounts(current_session, client)
1✔
197
                current_session.delete(client)
1✔
198
            current_session.commit()
1✔
199

200
        logger.info("Client '{}' deleted".format(client_name))
1✔
201
    except Exception as e:
×
202
        logger.error(str(e))
×
203

204

205
def delete_expired_clients_action(DB, slack_webhook=None, warning_days=None):
1✔
206
    """
207
    Args:
208
        slack_webhook (str): Slack webhook to post warnings when clients expired or are about to expire
209
        warning_days (int): how many days before a client expires should we post a warning on
210
            Slack (default: 7)
211
    """
212
    try:
1✔
213
        cirrus_config.update(**config["CIRRUS_CFG"])
1✔
214
    except AttributeError:
×
215
        # no cirrus config, continue anyway. we don't have client service accounts
216
        # to delete
217
        pass
×
218

219
    def format_uris(uris):
1✔
220
        if not uris or uris == [None]:
1✔
221
            return uris
×
222
        return " ".join(uris)
1✔
223

224
    now = datetime.now().timestamp()
1✔
225
    driver = get_SQLAlchemyDriver(DB)
1✔
226
    expired_messages = ["Some expired OIDC clients have been deleted!"]
1✔
227
    with driver.session as current_session:
1✔
228
        clients = (
1✔
229
            current_session.query(Client)
230
            # for backwards compatibility, 0 means no expiration
231
            .filter(Client.expires_at != 0)
232
            .filter(Client.expires_at <= now)
233
            .all()
234
        )
235

236
        for client in clients:
1✔
237
            expired_messages.append(
1✔
238
                f"Client '{client.name}' (ID '{client.client_id}') expired at {datetime.fromtimestamp(client.expires_at)} UTC. Redirect URIs: {format_uris(client.redirect_uris)})"
239
            )
240
            _remove_client_service_accounts(current_session, client)
1✔
241
            current_session.delete(client)
1✔
242
            current_session.commit()
1✔
243

244
        # get the clients that are expiring soon
245
        warning_days = float(warning_days) if warning_days else 7
1✔
246
        warning_days_in_secs = warning_days * 24 * 60 * 60  # days to seconds
1✔
247
        warning_expiry = (
1✔
248
            datetime.utcnow() + timedelta(seconds=warning_days_in_secs)
249
        ).timestamp()
250
        expiring_clients = (
1✔
251
            current_session.query(Client)
252
            .filter(Client.expires_at != 0)
253
            .filter(Client.expires_at <= warning_expiry)
254
            .all()
255
        )
256

257
    expiring_messages = ["Some OIDC clients are expiring soon!"]
1✔
258
    expiring_messages.extend(
1✔
259
        [
260
            f"Client '{client.name}' (ID '{client.client_id}') expires at {datetime.fromtimestamp(client.expires_at)} UTC. Redirect URIs: {format_uris(client.redirect_uris)}"
261
            for client in expiring_clients
262
        ]
263
    )
264

265
    for post_msgs, nothing_to_do_msg in (
1✔
266
        (expired_messages, "No expired clients to delete"),
267
        (expiring_messages, "No clients are close to expiring"),
268
    ):
269
        if len(post_msgs) > 1:
1✔
270
            for e in post_msgs:
1✔
271
                logger.info(e)
1✔
272
            if slack_webhook:  # post a warning on Slack
1✔
273
                logger.info("Posting to Slack...")
1✔
274
                payload = {
1✔
275
                    "attachments": [
276
                        {
277
                            "fallback": post_msgs[0],
278
                            "title": post_msgs[0],
279
                            "text": "\n- " + "\n- ".join(post_msgs[1:]),
280
                            "color": "#FF5F15",
281
                        }
282
                    ]
283
                }
284
                resp = requests.post(slack_webhook, json=payload)
1✔
285
                resp.raise_for_status()
1✔
286
        else:
287
            logger.info(nothing_to_do_msg)
×
288

289

290
def rotate_client_action(DB, client_name, expires_in=None):
1✔
291
    """
292
    Rorate a client's credentials (client ID and secret). The old credentials are
293
    NOT deactivated and must be deleted or expired separately. This allows for a
294
    rotation without downtime.
295

296
    Args:
297
        DB (str): database connection string
298
        client_name (str): name of the client to rotate credentials for
299
        expires_in (optional): number of days until this client expires (by default, no expiration)
300

301
    Returns:
302
        This functions does not return anything, but it prints the new set of credentials.
303
    """
304
    driver = get_SQLAlchemyDriver(DB)
1✔
305
    with driver.session as s:
1✔
306
        client = s.query(Client).filter(Client.name == client_name).first()
1✔
307
        if not client:
1✔
308
            raise Exception("client {} does not exist".format(client_name))
×
309

310
        # create a new row in the DB for the same client, with a new ID, secret and expiration
311
        client_id, client_secret, hashed_secret = generate_client_credentials(
1✔
312
            client.is_confidential
313
        )
314
        client = Client(
1✔
315
            client_id=client_id,
316
            client_secret=hashed_secret,
317
            expires_in=expires_in,
318
            # the rest is identical to the client being rotated
319
            user=client.user,
320
            redirect_uris=client.redirect_uris,
321
            allowed_scopes=client.scope,
322
            description=client.description,
323
            name=client.name,
324
            auto_approve=client.auto_approve,
325
            grant_types=client.grant_types,
326
            is_confidential=client.is_confidential,
327
            token_endpoint_auth_method=client.token_endpoint_auth_method,
328
        )
329
        s.add(client)
1✔
330
        s.commit()
1✔
331

332
    res = (client_id, client_secret)
1✔
333
    print(
1✔
334
        f"\nSave these credentials! Fence will not save the unhashed client secret.\nclient id, client secret:\n{res}"
335
    )
336

337

338
def _remove_client_service_accounts(db_session, client):
1✔
339
    client_service_accounts = (
1✔
340
        db_session.query(GoogleServiceAccount)
341
        .filter(GoogleServiceAccount.client_id == client.client_id)
342
        .all()
343
    )
344

345
    if client_service_accounts:
1✔
346
        with GoogleCloudManager() as g_mgr:
1✔
347
            for service_account in client_service_accounts:
1✔
348
                logger.info(
1✔
349
                    "Deleting client {}'s service account: {}".format(
350
                        client.name, service_account.email
351
                    )
352
                )
353
                response = g_mgr.delete_service_account(service_account.email)
1✔
354
                if not response.get("error"):
1✔
355
                    db_session.delete(service_account)
1✔
356
                    db_session.commit()
1✔
357
                else:
358
                    logger.error("ERROR - from Google: {}".format(response))
1✔
359
                    logger.error(
1✔
360
                        "ERROR - Could not delete client service account: {}".format(
361
                            service_account.email
362
                        )
363
                    )
364

365

366
def get_default_init_syncer_inputs(authz_provider):
1✔
367
    DB = os.environ.get("FENCE_DB") or config.get("DB")
1✔
368
    if DB is None:
1✔
369
        try:
×
370
            from fence.settings import DB
×
371
        except ImportError:
×
372
            pass
×
373

374
    arborist = ArboristClient(
1✔
375
        arborist_base_url=config["ARBORIST"],
376
        logger=get_logger("user_syncer.arborist_client"),
377
        authz_provider=authz_provider,
378
    )
379
    dbGaP = os.environ.get("dbGaP") or config.get("dbGaP")
1✔
380
    if not isinstance(dbGaP, list):
1✔
381
        dbGaP = [dbGaP]
×
382

383
    storage_creds = config["STORAGE_CREDENTIALS"]
1✔
384

385
    return {
1✔
386
        "DB": DB,
387
        "arborist": arborist,
388
        "dbGaP": dbGaP,
389
        "STORAGE_CREDENTIALS": storage_creds,
390
    }
391

392

393
def init_syncer(
1✔
394
    dbGaP,
395
    STORAGE_CREDENTIALS,
396
    DB,
397
    projects=None,
398
    is_sync_from_dbgap_server=False,
399
    sync_from_local_csv_dir=None,
400
    sync_from_local_yaml_file=None,
401
    arborist=None,
402
    folder=None,
403
):
404
    """
405
    sync ACL files from dbGap to auth db and storage backends
406
    imports from config is done here because dbGap is
407
    an optional requirement for fence so it might not be specified
408
    in config
409
    Args:
410
        projects: path to project_mapping yaml file which contains mapping
411
        from dbgap phsids to projects in fence database
412
    Returns:
413
        None
414
    Examples:
415
        the expected yaml structure sould look like:
416
        .. code-block:: yaml
417
            phs000178:
418
              - name: TCGA
419
                auth_id: phs000178
420
              - name: TCGA-PCAWG
421
                auth_id: TCGA-PCAWG
422
            phs000235:
423
              - name: CGCI
424
                auth_id: phs000235
425
    """
426
    try:
1✔
427
        cirrus_config.update(**config["CIRRUS_CFG"])
1✔
428
    except AttributeError:
×
429
        # no cirrus config, continue anyway. Google APIs will probably fail.
430
        # this is okay if users don't need access to Google buckets
431
        pass
×
432

433
    if projects is not None and not os.path.exists(projects):
1✔
434
        logger.error("====={} is not found!!!=======".format(projects))
×
435
        return
×
436
    if sync_from_local_csv_dir and not os.path.exists(sync_from_local_csv_dir):
1✔
437
        logger.error("====={} is not found!!!=======".format(sync_from_local_csv_dir))
×
438
        return
×
439
    if sync_from_local_yaml_file and not os.path.exists(sync_from_local_yaml_file):
1✔
440
        logger.error("====={} is not found!!!=======".format(sync_from_local_yaml_file))
×
441
        return
×
442

443
    project_mapping = None
1✔
444
    if projects:
1✔
445
        try:
×
446
            with open(projects, "r") as f:
×
447
                project_mapping = safe_load(f)
×
448
        except IOError:
×
449
            pass
×
450

451
    return UserSyncer(
1✔
452
        dbGaP,
453
        DB,
454
        project_mapping=project_mapping,
455
        storage_credentials=STORAGE_CREDENTIALS,
456
        is_sync_from_dbgap_server=is_sync_from_dbgap_server,
457
        sync_from_local_csv_dir=sync_from_local_csv_dir,
458
        sync_from_local_yaml_file=sync_from_local_yaml_file,
459
        arborist=arborist,
460
        folder=folder,
461
    )
462

463

464
def download_dbgap_files(
1✔
465
    # Note: need to keep all parameter to prevent download failure
466
    dbGaP,
467
    STORAGE_CREDENTIALS,
468
    DB,
469
    projects=None,
470
    is_sync_from_dbgap_server=False,
471
    sync_from_local_csv_dir=None,
472
    sync_from_local_yaml_file=None,
473
    arborist=None,
474
    folder=None,
475
):
476
    syncer = init_syncer(
×
477
        dbGaP,
478
        STORAGE_CREDENTIALS,
479
        DB,
480
        projects,
481
        is_sync_from_dbgap_server,
482
        sync_from_local_csv_dir,
483
        sync_from_local_yaml_file,
484
        arborist,
485
        folder,
486
    )
487
    if not syncer:
×
488
        exit(1)
×
489
    syncer.download()
×
490

491

492
def sync_users(
1✔
493
    dbGaP,
494
    STORAGE_CREDENTIALS,
495
    DB,
496
    projects=None,
497
    is_sync_from_dbgap_server=False,
498
    sync_from_local_csv_dir=None,
499
    sync_from_local_yaml_file=None,
500
    arborist=None,
501
    folder=None,
502
):
503
    syncer = init_syncer(
×
504
        dbGaP,
505
        STORAGE_CREDENTIALS,
506
        DB,
507
        projects,
508
        is_sync_from_dbgap_server,
509
        sync_from_local_csv_dir,
510
        sync_from_local_yaml_file,
511
        arborist,
512
        folder,
513
    )
514
    if not syncer:
×
515
        exit(1)
×
516
    syncer.sync()
×
517

518

519
def create_sample_data(DB, yaml_file_path):
1✔
520
    with open(yaml_file_path, "r") as f:
×
521
        data = safe_load(f)
×
522

523
    db = get_SQLAlchemyDriver(DB)
×
524
    with db.session as s:
×
525
        create_cloud_providers(s, data)
×
526
        create_projects(s, data)
×
527
        create_group(s, data)
×
528
        create_users_with_group(DB, s, data)
×
529

530

531
def create_group(s, data):
1✔
532
    groups = data.get("groups", {})
1✔
533
    for group_name, fields in groups.items():
1✔
534
        projects = fields.get("projects", [])
1✔
535
        group = s.query(Group).filter(Group.name == group_name).first()
1✔
536
        if not group:
1✔
537
            group = Group(name=group_name)
1✔
538
            s.add(group)
1✔
539
        for project_data in projects:
1✔
540
            grant_project_to_group_or_user(s, project_data, group)
1✔
541

542

543
def create_projects(s, data):
1✔
544
    projects = data.get("projects", [])
1✔
545
    for project in projects:
1✔
546
        create_project(s, project)
1✔
547

548

549
def create_project(s, project_data):
1✔
550
    auth_id = project_data["auth_id"]
1✔
551
    name = project_data.get("name", auth_id)
1✔
552
    project = s.query(Project).filter_by(name=name).first()
1✔
553
    if project is None:
1✔
554
        project = Project(name=name, auth_id=auth_id)
1✔
555
        s.add(project)
1✔
556
    if "storage_accesses" in project_data:
1✔
557
        sa_list = project_data.get("storage_accesses", [])
1✔
558
        for storage_access in sa_list:
1✔
559
            provider = storage_access["name"]
1✔
560
            buckets = storage_access.get("buckets", [])
1✔
561
            sa = (
1✔
562
                s.query(StorageAccess)
563
                .join(StorageAccess.provider, StorageAccess.project)
564
                .filter(Project.name == project.name)
565
                .filter(CloudProvider.name == provider)
566
                .first()
567
            )
568
            c_provider = s.query(CloudProvider).filter_by(name=provider).first()
1✔
569
            assert c_provider, "CloudProvider {} does not exist".format(provider)
1✔
570
            if not sa:
1✔
571
                sa = StorageAccess(provider=c_provider, project=project)
1✔
572
                s.add(sa)
1✔
573
                logger.info(
1✔
574
                    "created storage access for {} to {}".format(
575
                        project.name, c_provider.name
576
                    )
577
                )
578
            for bucket in buckets:
1✔
579
                b = (
1✔
580
                    s.query(Bucket)
581
                    .filter_by(name=bucket)
582
                    .join(Bucket.provider)
583
                    .filter(CloudProvider.name == provider)
584
                    .first()
585
                )
586
                if not b:
1✔
587
                    b = Bucket(name=bucket)
1✔
588
                    b.provider = c_provider
1✔
589
                    s.add(b)
1✔
590
                    logger.info("created bucket {} in db".format(bucket))
1✔
591

592
    return project
1✔
593

594

595
def grant_project_to_group_or_user(s, project_data, group=None, user=None):
1✔
596
    privilege = project_data.get("privilege", [])
1✔
597
    project = create_project(s, project_data)
1✔
598
    if group:
1✔
599
        ap = (
1✔
600
            s.query(AccessPrivilege)
601
            .join(AccessPrivilege.project)
602
            .join(AccessPrivilege.group)
603
            .filter(Project.name == project.name, Group.name == group.name)
604
            .first()
605
        )
606
        name = group.name
1✔
607
    elif user:
×
608
        ap = (
×
609
            s.query(AccessPrivilege)
610
            .join(AccessPrivilege.project)
611
            .join(AccessPrivilege.user)
612
            .filter(
613
                Project.name == project.name,
614
                func.lower(User.username) == func.lower(user.username),
615
            )
616
            .first()
617
        )
618
        name = user.username
×
619
    else:
620
        raise Exception("need to provide either a user or group")
×
621
    if not ap:
1✔
622
        if group:
1✔
623
            ap = AccessPrivilege(project=project, group=group, privilege=privilege)
1✔
624
        elif user:
×
625
            ap = AccessPrivilege(project=project, user=user, privilege=privilege)
×
626
        else:
627
            raise Exception("need to provide either a user or group")
×
628
        s.add(ap)
1✔
629
        logger.info(
1✔
630
            "created access privilege {} of project {} to {}".format(
631
                privilege, project.name, name
632
            )
633
        )
634
    else:
635
        ap.privilege = privilege
×
636
        logger.info(
×
637
            "updated access privilege {} of project {} to {}".format(
638
                privilege, project.name, name
639
            )
640
        )
641

642

643
def create_cloud_providers(s, data):
1✔
644
    cloud_data = data.get("cloud_providers", {})
×
645
    for name, fields in cloud_data.items():
×
646
        cloud_provider = (
×
647
            s.query(CloudProvider).filter(CloudProvider.name == name).first()
648
        )
649
        if not cloud_provider:
×
650
            cloud_provider = CloudProvider(
×
651
                name=name,
652
                backend=fields.get("backend", "cleversafe"),
653
                service=fields.get("service", "storage"),
654
            )
655
            s.add(cloud_provider)
×
656

657

658
def create_users_with_group(DB, s, data):
1✔
659
    providers = {}
×
660
    data_groups = data.get("groups", {})
×
661
    users = data.get("users", {})
×
662
    for username, data in users.items():
×
663
        is_existing_user = True
×
664
        user = query_for_user(session=s, username=username)
×
665

666
        admin = data.get("admin", False)
×
667

668
        if not user:
×
669
            is_existing_user = False
×
670
            provider_name = data.get("provider", "google")
×
671
            provider = providers.get(provider_name)
×
672
            if not provider:
×
673
                provider = (
×
674
                    s.query(IdentityProvider)
675
                    .filter(IdentityProvider.name == provider_name)
676
                    .first()
677
                )
678
                providers[provider_name] = provider
×
679
                if not provider:
×
680
                    raise Exception("provider {} not found".format(provider_name))
×
681

682
            user = User(username=username, idp_id=provider.id, is_admin=admin)
×
683
        user.is_admin = admin
×
684
        group_names = data.get("groups", [])
×
685
        for group_name in group_names:
×
686
            assign_group_to_user(s, user, group_name, data_groups[group_name])
×
687
        projects = data.get("projects", [])
×
688
        for project in projects:
×
689
            grant_project_to_group_or_user(s, project, user=user)
×
690
        if not is_existing_user:
×
691
            s.add(user)
×
692
        for client in data.get("clients", []):
×
693
            create_client_action(DB, username=username, **client)
×
694

695

696
def assign_group_to_user(s, user, group_name, group_data):
1✔
697
    group = s.query(Group).filter(Group.name == group_name).first()
×
698
    if not group:
×
699
        group = Group(name=group_name)
×
700
        s.add(group)
×
701
        user.groups.append(group)
×
702
    if group not in user.groups:
×
703
        user.groups.append(group)
×
704

705

706
def google_init(db):
1✔
707
    """
708
    DEPRECATED - Initial user proxy group / service account creation.
709
    No longer necessary as proxy groups and service accounts are lazily
710
    created.
711
    """
712
    pass
×
713

714

715
def remove_expired_google_service_account_keys(db):
1✔
716
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
717

718
    db = get_SQLAlchemyDriver(db)
1✔
719
    with db.session as current_session:
1✔
720
        client_service_accounts = current_session.query(
1✔
721
            GoogleServiceAccount, Client
722
        ).filter(GoogleServiceAccount.client_id == Client.client_id)
723

724
        current_time = int(time.time())
1✔
725
        logger.info("Current time: {}\n".format(current_time))
1✔
726

727
        expired_sa_keys_for_users = current_session.query(
1✔
728
            GoogleServiceAccountKey
729
        ).filter(GoogleServiceAccountKey.expires <= current_time)
730

731
        with GoogleCloudManager() as g_mgr:
1✔
732
            # handle service accounts with default max expiration
733
            for service_account, client in client_service_accounts:
1✔
734
                g_mgr.handle_expired_service_account_keys(service_account.email)
1✔
735

736
            # handle service accounts with custom expiration
737
            for expired_user_key in expired_sa_keys_for_users:
1✔
738
                logger.info("expired_user_key: {}\n".format(expired_user_key))
1✔
739
                sa = (
1✔
740
                    current_session.query(GoogleServiceAccount)
741
                    .filter(
742
                        GoogleServiceAccount.id == expired_user_key.service_account_id
743
                    )
744
                    .first()
745
                )
746

747
                response = g_mgr.delete_service_account_key(
1✔
748
                    account=sa.email, key_name=expired_user_key.key_id
749
                )
750
                response_error_code = response.get("error", {}).get("code")
1✔
751
                response_error_status = response.get("error", {}).get("status")
1✔
752
                logger.info("response_error_code: {}\n".format(response_error_code))
1✔
753
                logger.info("response_error_status: {}\n".format(response_error_status))
1✔
754

755
                if not response_error_code:
1✔
756
                    current_session.delete(expired_user_key)
1✔
757
                    logger.info(
1✔
758
                        "INFO: Removed expired service account key {} "
759
                        "for service account {} (owned by user with id {}).\n".format(
760
                            expired_user_key.key_id, sa.email, sa.user_id
761
                        )
762
                    )
763
                elif (
×
764
                    response_error_code == 404
765
                    or response_error_status == "FAILED_PRECONDITION"
766
                ):
767
                    logger.info(
×
768
                        "INFO: Service account key {} for service account {} "
769
                        "(owned by user with id {}) does not exist in Google. "
770
                        "Removing from database...\n".format(
771
                            expired_user_key.key_id, sa.email, sa.user_id
772
                        )
773
                    )
774
                    current_session.delete(expired_user_key)
×
775
                else:
776
                    logger.error(
×
777
                        "ERROR: Google returned an error when attempting to "
778
                        "remove service account key {} "
779
                        "for service account {} (owned by user with id {}). "
780
                        "Error:\n{}\n".format(
781
                            expired_user_key.key_id, sa.email, sa.user_id, response
782
                        )
783
                    )
784

785

786
def remove_expired_google_accounts_from_proxy_groups(db):
1✔
787
    cirrus_config.update(**config["CIRRUS_CFG"])
×
788

789
    db = get_SQLAlchemyDriver(db)
×
790
    with db.session as current_session:
×
791
        current_time = int(time.time())
×
792
        logger.info("Current time: {}".format(current_time))
×
793

794
        expired_accounts = current_session.query(UserGoogleAccountToProxyGroup).filter(
×
795
            UserGoogleAccountToProxyGroup.expires <= current_time
796
        )
797

798
        with GoogleCloudManager() as g_mgr:
×
799
            for expired_account_access in expired_accounts:
×
800
                g_account = (
×
801
                    current_session.query(UserGoogleAccount)
802
                    .filter(
803
                        UserGoogleAccount.id
804
                        == expired_account_access.user_google_account_id
805
                    )
806
                    .first()
807
                )
808
                try:
×
809
                    response = g_mgr.remove_member_from_group(
×
810
                        member_email=g_account.email,
811
                        group_id=expired_account_access.proxy_group_id,
812
                    )
813
                    response_error_code = response.get("error", {}).get("code")
×
814

815
                    if not response_error_code:
×
816
                        current_session.delete(expired_account_access)
×
817
                        logger.info(
×
818
                            "INFO: Removed {} from proxy group with id {}.\n".format(
819
                                g_account.email, expired_account_access.proxy_group_id
820
                            )
821
                        )
822
                    else:
823
                        logger.error(
×
824
                            "ERROR: Google returned an error when attempting to "
825
                            "remove member {} from proxy group {}. Error:\n{}\n".format(
826
                                g_account.email,
827
                                expired_account_access.proxy_group_id,
828
                                response,
829
                            )
830
                        )
831
                except Exception as exc:
×
832
                    logger.error(
×
833
                        "ERROR: Google returned an error when attempting to "
834
                        "remove member {} from proxy group {}. Error:\n{}\n".format(
835
                            g_account.email, expired_account_access.proxy_group_id, exc
836
                        )
837
                    )
838

839

840
def delete_users(DB, usernames):
1✔
841
    driver = get_SQLAlchemyDriver(DB)
1✔
842
    with driver.session as session:
1✔
843
        # NOTE that calling ``.delete()`` on the query itself will not follow
844
        # cascade deletion rules set up in any relationships.
845
        lowercase_usernames = [x.lower() for x in usernames]
1✔
846
        users_to_delete = (
1✔
847
            session.query(User)
848
            .filter(func.lower(User.username).in_(lowercase_usernames))
849
            .all()
850
        )
851
        for user in users_to_delete:
1✔
852
            session.delete(user)
1✔
853
        session.commit()
1✔
854

855

856
def cleanup_expired_ga4gh_information(DB):
1✔
857
    """
858
    Remove any expired passports/visas from the database if they're expired.
859

860
    IMPORTANT NOTE: This DOES NOT actually remove authorization, it assumes that the
861
                    same expiration was set and honored in the authorization system.
862
    """
863
    driver = get_SQLAlchemyDriver(DB)
1✔
864
    with driver.session as session:
1✔
865
        current_time = int(time.time())
1✔
866

867
        # Get expires field from db, if None default to NOT expired
868
        records_to_delete = (
1✔
869
            session.query(GA4GHVisaV1)
870
            .filter(
871
                and_(
872
                    GA4GHVisaV1.expires.isnot(None),
873
                    GA4GHVisaV1.expires < current_time,
874
                )
875
            )
876
            .all()
877
        )
878
        num_deleted_records = 0
1✔
879
        if records_to_delete:
1✔
880
            for record in records_to_delete:
1✔
881
                try:
1✔
882
                    session.delete(record)
1✔
883
                    session.commit()
1✔
884

885
                    num_deleted_records += 1
1✔
886
                except Exception as e:
×
887
                    logger.error(
×
888
                        "ERROR: Could not remove GA4GHVisaV1 with id={}. Detail {}".format(
889
                            record.id, e
890
                        )
891
                    )
892

893
        logger.info(
1✔
894
            f"Removed {num_deleted_records} expired GA4GHVisaV1 records from db."
895
        )
896

897

898
def delete_expired_google_access(DB):
1✔
899
    """
900
    Delete all expired Google data access (e.g. remove proxy groups from Google Bucket
901
    Access Groups if expired).
902
    """
903
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
904

905
    driver = get_SQLAlchemyDriver(DB)
1✔
906
    with driver.session as session:
1✔
907
        current_time = int(time.time())
1✔
908

909
        # Get expires field from db, if None default to NOT expired
910
        records_to_delete = (
1✔
911
            session.query(GoogleProxyGroupToGoogleBucketAccessGroup)
912
            .filter(
913
                and_(
914
                    GoogleProxyGroupToGoogleBucketAccessGroup.expires.isnot(None),
915
                    GoogleProxyGroupToGoogleBucketAccessGroup.expires < current_time,
916
                )
917
            )
918
            .all()
919
        )
920
        num_deleted_records = 0
1✔
921
        if records_to_delete:
1✔
922
            with GoogleCloudManager() as manager:
1✔
923
                for record in records_to_delete:
1✔
924
                    try:
1✔
925
                        member_email = record.proxy_group.email
1✔
926
                        access_group_email = record.access_group.email
1✔
927
                        manager.remove_member_from_group(
1✔
928
                            member_email, access_group_email
929
                        )
930
                        logger.info(
1✔
931
                            "Removed {} from {}, expired {}. Current time: {} ".format(
932
                                member_email,
933
                                access_group_email,
934
                                record.expires,
935
                                current_time,
936
                            )
937
                        )
938
                        session.delete(record)
1✔
939
                        session.commit()
1✔
940

941
                        num_deleted_records += 1
1✔
942
                    except Exception as e:
1✔
943
                        logger.error(
1✔
944
                            "ERROR: Could not remove Google group member {} from access group {}. Detail {}".format(
945
                                member_email, access_group_email, e
946
                            )
947
                        )
948

949
        logger.info(
1✔
950
            f"Removed {num_deleted_records} expired Google Access records from db and Google."
951
        )
952

953

954
def delete_expired_service_accounts(DB):
1✔
955
    """
956
    Delete all expired service accounts.
957
    """
958
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
959

960
    driver = get_SQLAlchemyDriver(DB)
1✔
961
    with driver.session as session:
1✔
962
        current_time = int(time.time())
1✔
963
        records_to_delete = (
1✔
964
            session.query(ServiceAccountToGoogleBucketAccessGroup)
965
            .filter(ServiceAccountToGoogleBucketAccessGroup.expires < current_time)
966
            .all()
967
        )
968
        if len(records_to_delete):
1✔
969
            with GoogleCloudManager() as manager:
1✔
970
                for record in records_to_delete:
1✔
971
                    try:
1✔
972
                        manager.remove_member_from_group(
1✔
973
                            record.service_account.email, record.access_group.email
974
                        )
975
                        session.delete(record)
1✔
976
                        logger.info(
1✔
977
                            "Removed expired service account: {}".format(
978
                                record.service_account.email
979
                            )
980
                        )
981
                    except Exception as e:
1✔
982
                        logger.error(
1✔
983
                            "ERROR: Could not delete service account {}. Details: {}".format(
984
                                record.service_account.email, e
985
                            )
986
                        )
987

988
                session.commit()
1✔
989

990

991
def verify_bucket_access_group(DB):
1✔
992
    """
993
    Go through all the google group members, remove them from Google group and Google
994
    user service account if they are not in Fence
995

996
    Args:
997
        DB(str): db connection string
998

999
    Returns:
1000
        None
1001

1002
    """
1003
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
1004

1005
    driver = get_SQLAlchemyDriver(DB)
1✔
1006
    with driver.session as session:
1✔
1007
        access_groups = session.query(GoogleBucketAccessGroup).all()
1✔
1008
        with GoogleCloudManager() as manager:
1✔
1009
            for access_group in access_groups:
1✔
1010
                try:
1✔
1011
                    members = manager.get_group_members(access_group.email)
1✔
1012
                    logger.debug(f"google group members response: {members}")
1✔
1013
                except GoogleAuthError as e:
×
1014
                    logger.error("ERROR: Authentication error!!!. Detail {}".format(e))
×
1015
                    return
×
1016
                except Exception as e:
×
1017
                    logger.error(
×
1018
                        "ERROR: Could not list group members of {}. Detail {}".format(
1019
                            access_group.email, e
1020
                        )
1021
                    )
1022
                    return
×
1023

1024
                for member in members:
1✔
1025
                    if member.get("type") == "GROUP":
1✔
1026
                        _verify_google_group_member(session, access_group, member)
1✔
1027
                    elif member.get("type") == "USER":
1✔
1028
                        _verify_google_service_account_member(
1✔
1029
                            session, access_group, member
1030
                        )
1031

1032

1033
def _verify_google_group_member(session, access_group, member):
1✔
1034
    """
1035
    Delete if the member which is a google group is not in Fence.
1036

1037
    Args:
1038
        session(Session): db session
1039
        access_group(GoogleBucketAccessGroup): access group
1040
        member(dict): group member info
1041

1042
    Returns:
1043
        None
1044

1045
    """
1046
    account_emails = [
1✔
1047
        granted_group.proxy_group.email
1048
        for granted_group in (
1049
            session.query(GoogleProxyGroupToGoogleBucketAccessGroup)
1050
            .filter_by(access_group_id=access_group.id)
1051
            .all()
1052
        )
1053
    ]
1054

1055
    if not any([email for email in account_emails if email == member.get("email")]):
1✔
1056
        try:
1✔
1057
            with GoogleCloudManager() as manager:
1✔
1058
                manager.remove_member_from_group(
1✔
1059
                    member.get("email"), access_group.email
1060
                )
1061
                logger.info(
1✔
1062
                    "Removed {} from {}, not found in fence but found "
1063
                    "in Google Group.".format(member.get("email"), access_group.email)
1064
                )
1065
        except Exception as e:
×
1066
            logger.error(
×
1067
                "ERROR: Could not remove google group memeber {} from access group {}. Detail {}".format(
1068
                    member.get("email"), access_group.email, e
1069
                )
1070
            )
1071

1072

1073
def _verify_google_service_account_member(session, access_group, member):
1✔
1074
    """
1075
    Delete if the member which is a service account is not in Fence.
1076

1077
    Args:
1078
        session(session): db session
1079
        access_group(GoogleBucketAccessGroup): access group
1080
        members(dict): service account member info
1081

1082
    Returns:
1083
        None
1084

1085
    """
1086

1087
    account_emails = [
1✔
1088
        account.service_account.email
1089
        for account in (
1090
            session.query(ServiceAccountToGoogleBucketAccessGroup)
1091
            .filter_by(access_group_id=access_group.id)
1092
            .all()
1093
        )
1094
    ]
1095

1096
    if not any([email for email in account_emails if email == member.get("email")]):
1✔
1097
        try:
1✔
1098
            with GoogleCloudManager() as manager:
1✔
1099
                manager.remove_member_from_group(
1✔
1100
                    member.get("email"), access_group.email
1101
                )
1102
                logger.info(
1✔
1103
                    "Removed {} from {}, not found in fence but found "
1104
                    "in Google Group.".format(member.get("email"), access_group.email)
1105
                )
1106
        except Exception as e:
×
1107
            logger.error(
×
1108
                "ERROR: Could not remove service account memeber {} from access group {}. Detail {}".format(
1109
                    member.get("email"), access_group.email, e
1110
                )
1111
            )
1112

1113

1114
class JWTCreator(object):
1✔
1115
    required_kwargs = ["kid", "private_key", "username", "scopes"]
1✔
1116
    all_kwargs = required_kwargs + ["expires_in", "client_id"]
1✔
1117

1118
    default_expiration = 3600
1✔
1119

1120
    def __init__(self, db, base_url, **kwargs):
1✔
1121
        self.db = db
1✔
1122
        self.base_url = base_url
1✔
1123

1124
        # These get assigned values just below here, with setattr. Defined here
1125
        # so linters won't complain they're undefined.
1126
        self.kid = None
1✔
1127
        self.private_key = None
1✔
1128
        self.username = None
1✔
1129
        self.scopes = None
1✔
1130
        self.client_id = None
1✔
1131

1132
        for required_kwarg in self.required_kwargs:
1✔
1133
            if required_kwarg not in kwargs:
1✔
1134
                raise ValueError("missing required argument: " + required_kwarg)
×
1135

1136
        # Set attributes on this object from the kwargs.
1137
        for kwarg_name in self.all_kwargs:
1✔
1138
            setattr(self, kwarg_name, kwargs.get(kwarg_name))
1✔
1139

1140
        # If the scopes look like this:
1141
        #
1142
        #     'openid,fence,data'
1143
        #
1144
        # convert them to this:
1145
        #
1146
        #     ['openid', 'fence', 'data']
1147
        if isinstance(getattr(self, "scopes", ""), str):
1✔
1148
            self.scopes = [scope.strip() for scope in self.scopes.split(",")]
1✔
1149

1150
        self.expires_in = kwargs.get("expires_in") or self.default_expiration
1✔
1151

1152
    def create_access_token(self):
1✔
1153
        """
1154
        Create a new access token.
1155

1156
        Return:
1157
            JWTResult: result containing the encoded token and claims
1158
        """
1159
        driver = get_SQLAlchemyDriver(self.db)
1✔
1160
        with driver.session as current_session:
1✔
1161
            user = query_for_user(session=current_session, username=self.username)
1✔
1162

1163
            if not user:
1✔
1164
                raise EnvironmentError(
1✔
1165
                    "no user found with given username: " + self.username
1166
                )
1167
            return generate_signed_access_token(
1✔
1168
                self.kid,
1169
                self.private_key,
1170
                self.expires_in,
1171
                self.scopes,
1172
                user=user,
1173
                iss=self.base_url,
1174
            )
1175

1176
    def create_refresh_token(self):
1✔
1177
        """
1178
        Create a new refresh token and add its entry to the database.
1179

1180
        Return:
1181
            JWTResult: the refresh token result
1182
        """
1183
        driver = get_SQLAlchemyDriver(self.db)
1✔
1184
        with driver.session as current_session:
1✔
1185
            user = query_for_user(session=current_session, username=self.username)
1✔
1186

1187
            if not user:
1✔
1188
                raise EnvironmentError(
1✔
1189
                    "no user found with given username: " + self.username
1190
                )
1191
            if not self.client_id:
1✔
1192
                raise EnvironmentError(
×
1193
                    "no client id is provided. Required for creating refresh token"
1194
                )
1195
            jwt_result = generate_signed_refresh_token(
1✔
1196
                self.kid,
1197
                self.private_key,
1198
                user,
1199
                self.expires_in,
1200
                self.scopes,
1201
                iss=self.base_url,
1202
                client_id=self.client_id,
1203
            )
1204

1205
            current_session.add(
1✔
1206
                UserRefreshToken(
1207
                    jti=jwt_result.claims["jti"],
1208
                    userid=user.id,
1209
                    expires=jwt_result.claims["exp"],
1210
                )
1211
            )
1212

1213
            return jwt_result
1✔
1214

1215

1216
def link_bucket_to_project(db, bucket_id, bucket_provider, project_auth_id):
1✔
1217
    """
1218
    Associate a bucket to a specific project (with provided auth_id).
1219

1220
    Args:
1221
        db (TYPE): database
1222
        bucket_id (str): bucket db id or unique name
1223
            WARNING: name uniqueness is only required for Google so it's not
1224
                     a requirement of the db table. You will get an error if
1225
                     there are multiple buckets with the given name. In that
1226
                     case, you'll have to use the bucket's id.
1227
        bucket_provider (str): CloudProvider.name for the bucket
1228
        project_auth_id (str): Project.auth_id to link to bucket
1229
    """
1230
    driver = get_SQLAlchemyDriver(db)
×
1231
    with driver.session as current_session:
×
1232
        cloud_provider = (
×
1233
            current_session.query(CloudProvider).filter_by(name=bucket_provider).first()
1234
        )
1235
        if not cloud_provider:
×
1236
            raise NameError(
×
1237
                'No bucket with provider "{}" exists.'.format(bucket_provider)
1238
            )
1239

1240
        # first try by searching using id
1241
        try:
×
1242
            bucket_id = int(bucket_id)
×
1243
            bucket_db_entry = (
×
1244
                current_session.query(Bucket).filter_by(
1245
                    id=bucket_id, provider_id=cloud_provider.id
1246
                )
1247
            ).first()
1248
        except ValueError:
×
1249
            # invalid id, must be int
1250
            bucket_db_entry = None
×
1251

1252
        # nothing found? try searching for single bucket with name bucket_id
1253
        if not bucket_db_entry:
×
1254
            buckets_by_name = current_session.query(Bucket).filter_by(
×
1255
                name=bucket_id, provider_id=cloud_provider.id
1256
            )
1257
            # don't get a bucket if the name isn't unique. NOTE: for Google,
1258
            # these have to be globally unique so they'll be unique here.
1259
            buckets_with_name = buckets_by_name.count()
×
1260
            if buckets_with_name == 1:
×
1261
                bucket_db_entry = buckets_by_name[0]
×
1262
            elif buckets_with_name > 1:
×
1263
                raise NameError(
×
1264
                    'No bucket with id "{bucket_id}" exists. Tried buckets '
1265
                    'with name "{bucket_id}", but this returned multiple '
1266
                    "buckets. Please specify the id from the db and not just "
1267
                    "the name.".format(bucket_id=bucket_id)
1268
                )
1269
            else:
1270
                # keep bucket_db_entry as None
1271
                pass
1272

1273
        if not bucket_db_entry:
×
1274
            raise NameError('No bucket with id or name "{}" exists.'.format(bucket_id))
×
1275

1276
        project_db_entry = (
×
1277
            current_session.query(Project).filter_by(auth_id=project_auth_id).first()
1278
        )
1279
        if not project_db_entry:
×
1280
            logger.warning(
×
1281
                'WARNING: No project with auth_id "{}" exists. Creating...'.format(
1282
                    project_auth_id
1283
                )
1284
            )
1285
            project_db_entry = Project(name=project_auth_id, auth_id=project_auth_id)
×
1286
            current_session.add(project_db_entry)
×
1287
            current_session.commit()
×
1288

1289
        # Add StorageAccess if it doesn't exist for the project
1290
        storage_access = (
×
1291
            current_session.query(StorageAccess)
1292
            .filter_by(project_id=project_db_entry.id, provider_id=cloud_provider.id)
1293
            .first()
1294
        )
1295
        if not storage_access:
×
1296
            storage_access = StorageAccess(
×
1297
                project_id=project_db_entry.id, provider_id=cloud_provider.id
1298
            )
1299
            current_session.add(storage_access)
×
1300
            current_session.commit()
×
1301

1302
        project_linkage = (
×
1303
            current_session.query(ProjectToBucket)
1304
            .filter_by(project_id=project_db_entry.id, bucket_id=bucket_db_entry.id)
1305
            .first()
1306
        )
1307
        if not project_linkage:
×
1308
            project_linkage = ProjectToBucket(
×
1309
                project_id=project_db_entry.id,
1310
                bucket_id=bucket_db_entry.id,
1311
                privilege=["owner"],  # TODO What should this be???
1312
            )
1313
            current_session.add(project_linkage)
×
1314
            current_session.commit()
×
1315

1316

1317
def create_or_update_google_bucket(
1✔
1318
    db,
1319
    name,
1320
    storage_class=None,
1321
    public=None,
1322
    requester_pays=False,
1323
    google_project_id=None,
1324
    project_auth_id=None,
1325
    access_logs_bucket=None,
1326
    allowed_privileges=None,
1327
):
1328
    """
1329
    Create a Google bucket and populate database with necessary information.
1330

1331
    If the bucket is not public, this will also create a Google Bucket Access
1332
    Group(s) to control access to the new bucket. In order to give access
1333
    to a new user, simply add them to the Google Bucket Access Group.
1334

1335
    NOTE: At the moment, a different Google Bucket Access Group is created
1336
          for each different privilege in allowed_privileges (which defaults
1337
          to ['read', 'write']). So there will be separate Google Groups for
1338
          each access level.
1339

1340
    Args:
1341
        db (TYPE): database
1342
        name (str): name for the bucket, must be globally unique throughout Google
1343
        storage_class (str): enum, one of the cirrus's GOOGLE_STORAGE_CLASSES
1344
        public (bool or None, optional): whether or not the bucket should be public.
1345
            None means leave IAM on the bucket unchanged.
1346
        requester_pays (bool, optional): Whether or not to enable requester_pays
1347
            on the bucket
1348
        google_project_id (str, optional): Google project this bucket should be
1349
            associated with
1350
        project_auth_id (str, optional): a Project.auth_id to associate this
1351
            bucket with. The project must exist in the db already.
1352
        access_logs_bucket (str, optional): Enables logging. Must provide a
1353
            Google bucket name which will store the access logs
1354
        allowed_privileges (List(str), optional): privileges to allow on
1355
            the bucket. Defaults to ['read', 'write']. Also allows:
1356
            ['admin'] for all permission on the bucket including delete,
1357
            ['read'] for viewing access,
1358
            ['write'] for creation rights but not viewing access
1359
    """
1360
    cirrus_config.update(**config["CIRRUS_CFG"])
×
1361

1362
    google_project_id = google_project_id or cirrus_config.GOOGLE_PROJECT_ID
×
1363

1364
    # determine project where buckets are located
1365
    # default to same project, try to get storage creds project from key file
1366
    storage_creds_project_id = _get_storage_project_id() or google_project_id
×
1367

1368
    # default to read access
1369
    allowed_privileges = allowed_privileges or ["read", "write"]
×
1370

1371
    driver = get_SQLAlchemyDriver(db)
×
1372
    with driver.session as current_session:
×
1373
        # use storage creds to create bucket
1374
        # (default creds don't have permission)
1375
        bucket_db_entry = _create_or_update_google_bucket_and_db(
×
1376
            db_session=current_session,
1377
            name=name,
1378
            storage_class=storage_class,
1379
            requester_pays=requester_pays,
1380
            storage_creds_project_id=storage_creds_project_id,
1381
            public=public,
1382
            project_auth_id=project_auth_id,
1383
            access_logs_bucket=access_logs_bucket,
1384
        )
1385

1386
        if public is not None and not public:
×
1387
            for privilege in allowed_privileges:
×
1388
                _setup_google_bucket_access_group(
×
1389
                    db_session=current_session,
1390
                    google_bucket_name=name,
1391
                    bucket_db_id=bucket_db_entry.id,
1392
                    google_project_id=google_project_id,
1393
                    storage_creds_project_id=storage_creds_project_id,
1394
                    privileges=[privilege],
1395
                )
1396

1397

1398
def create_google_logging_bucket(name, storage_class=None, google_project_id=None):
1✔
1399
    cirrus_config.update(**config["CIRRUS_CFG"])
×
1400

1401
    # determine project where buckets are located if not provided, default
1402
    # to configured project if checking creds doesn't work
1403
    storage_creds_project_id = (
×
1404
        google_project_id
1405
        or _get_storage_project_id()
1406
        or cirrus_config.GOOGLE_PROJECT_ID
1407
    )
1408

1409
    manager = GoogleCloudManager(
×
1410
        storage_creds_project_id, creds=cirrus_config.configs["GOOGLE_STORAGE_CREDS"]
1411
    )
1412
    with manager as g_mgr:
×
1413
        g_mgr.create_or_update_bucket(
×
1414
            name,
1415
            storage_class=storage_class,
1416
            public=False,
1417
            requester_pays=False,
1418
            for_logging=True,
1419
        )
1420

1421
        logger.info(
×
1422
            "Successfully created Google Bucket {} "
1423
            "to store Access Logs.".format(name)
1424
        )
1425

1426

1427
def _get_storage_project_id():
1✔
1428
    """
1429
    Determine project where buckets are located.
1430
    Try to get storage creds project from key file
1431
    """
1432
    storage_creds_project_id = None
×
1433
    storage_creds_file = cirrus_config.configs["GOOGLE_STORAGE_CREDS"]
×
1434
    if os.path.exists(storage_creds_file):
×
1435
        with open(storage_creds_file) as creds_file:
×
1436
            storage_creds_project_id = json.load(creds_file).get("project_id")
×
1437
    return storage_creds_project_id
×
1438

1439

1440
def _create_or_update_google_bucket_and_db(
1✔
1441
    db_session,
1442
    name,
1443
    storage_class,
1444
    public,
1445
    requester_pays,
1446
    storage_creds_project_id,
1447
    project_auth_id,
1448
    access_logs_bucket,
1449
):
1450
    """
1451
    Handles creates the Google bucket and adding necessary db entry
1452
    """
1453
    manager = GoogleCloudManager(
×
1454
        storage_creds_project_id, creds=cirrus_config.configs["GOOGLE_STORAGE_CREDS"]
1455
    )
1456
    with manager as g_mgr:
×
1457
        g_mgr.create_or_update_bucket(
×
1458
            name,
1459
            storage_class=storage_class,
1460
            public=public,
1461
            requester_pays=requester_pays,
1462
            access_logs_bucket=access_logs_bucket,
1463
        )
1464

1465
        # add bucket to db
1466
        google_cloud_provider = _get_or_create_google_provider(db_session)
×
1467

1468
        bucket_db_entry = (
×
1469
            db_session.query(Bucket)
1470
            .filter_by(name=name, provider_id=google_cloud_provider.id)
1471
            .first()
1472
        )
1473
        if not bucket_db_entry:
×
1474
            bucket_db_entry = Bucket(name=name, provider_id=google_cloud_provider.id)
×
1475
            db_session.add(bucket_db_entry)
×
1476
            db_session.commit()
×
1477

1478
        logger.info("Successfully updated Google Bucket {}.".format(name))
×
1479

1480
        # optionally link this new bucket to an existing project
1481
        if project_auth_id:
×
1482
            project_db_entry = (
×
1483
                db_session.query(Project).filter_by(auth_id=project_auth_id).first()
1484
            )
1485
            if not project_db_entry:
×
1486
                logger.info(
×
1487
                    "No project with auth_id {} found. No linking "
1488
                    "occured.".format(project_auth_id)
1489
                )
1490
            else:
1491
                project_linkage = (
×
1492
                    db_session.query(ProjectToBucket)
1493
                    .filter_by(
1494
                        project_id=project_db_entry.id, bucket_id=bucket_db_entry.id
1495
                    )
1496
                    .first()
1497
                )
1498
                if not project_linkage:
×
1499
                    project_linkage = ProjectToBucket(
×
1500
                        project_id=project_db_entry.id,
1501
                        bucket_id=bucket_db_entry.id,
1502
                        privilege=["owner"],  # TODO What should this be???
1503
                    )
1504
                    db_session.add(project_linkage)
×
1505
                    db_session.commit()
×
1506
                logger.info(
×
1507
                    "Successfully linked project with auth_id {} "
1508
                    "to the bucket.".format(project_auth_id)
1509
                )
1510

1511
                # Add StorageAccess if it doesn't exist for the project
1512
                storage_access = (
×
1513
                    db_session.query(StorageAccess)
1514
                    .filter_by(
1515
                        project_id=project_db_entry.id,
1516
                        provider_id=google_cloud_provider.id,
1517
                    )
1518
                    .first()
1519
                )
1520
                if not storage_access:
×
1521
                    storage_access = StorageAccess(
×
1522
                        project_id=project_db_entry.id,
1523
                        provider_id=google_cloud_provider.id,
1524
                    )
1525
                    db_session.add(storage_access)
×
1526
                    db_session.commit()
×
1527

1528
    return bucket_db_entry
×
1529

1530

1531
def _setup_google_bucket_access_group(
1✔
1532
    db_session,
1533
    google_bucket_name,
1534
    bucket_db_id,
1535
    google_project_id,
1536
    storage_creds_project_id,
1537
    privileges,
1538
):
1539
    access_group = _create_google_bucket_access_group(
×
1540
        db_session, google_bucket_name, bucket_db_id, google_project_id, privileges
1541
    )
1542
    # use storage creds to update bucket iam
1543
    storage_manager = GoogleCloudManager(
×
1544
        storage_creds_project_id, creds=cirrus_config.configs["GOOGLE_STORAGE_CREDS"]
1545
    )
1546
    with storage_manager as g_mgr:
×
1547
        g_mgr.give_group_access_to_bucket(
×
1548
            access_group.email, google_bucket_name, access=privileges
1549
        )
1550

1551
    logger.info(
×
1552
        "Successfully set up Google Bucket Access Group {} "
1553
        "for Google Bucket {}.".format(access_group.email, google_bucket_name)
1554
    )
1555

1556
    return access_group
×
1557

1558

1559
def _create_google_bucket_access_group(
1✔
1560
    db_session, google_bucket_name, bucket_db_id, google_project_id, privileges
1561
):
1562
    access_group = None
1✔
1563
    prefix = config.get("GOOGLE_GROUP_PREFIX", "")
1✔
1564
    # use default creds for creating group and iam policies
1565
    with GoogleCloudManager(google_project_id) as g_mgr:
1✔
1566
        # create bucket access group
1567
        result = g_mgr.create_group(
1✔
1568
            name=prefix
1569
            + "_"
1570
            + google_bucket_name
1571
            + "_"
1572
            + "_".join(privileges)
1573
            + "_gbag"
1574
        )
1575
        group_email = result["email"]
1✔
1576

1577
        # add bucket group to db
1578
        access_group = (
1✔
1579
            db_session.query(GoogleBucketAccessGroup)
1580
            .filter_by(bucket_id=bucket_db_id, email=group_email)
1581
            .first()
1582
        )
1583
        if not access_group:
1✔
1584
            access_group = GoogleBucketAccessGroup(
1✔
1585
                bucket_id=bucket_db_id, email=group_email, privileges=privileges
1586
            )
1587
            db_session.add(access_group)
1✔
1588
            db_session.commit()
1✔
1589

1590
    return access_group
1✔
1591

1592

1593
def _get_or_create_google_provider(db_session):
1✔
1594
    google_cloud_provider = (
1✔
1595
        db_session.query(CloudProvider).filter_by(name="google").first()
1596
    )
1597
    if not google_cloud_provider:
1✔
1598
        google_cloud_provider = CloudProvider(
1✔
1599
            name="google", description="Google Cloud Platform", service="general"
1600
        )
1601
        db_session.add(google_cloud_provider)
1✔
1602
        db_session.commit()
1✔
1603
    return google_cloud_provider
1✔
1604

1605

1606
def link_external_bucket(db, name):
1✔
1607
    """
1608
    Link with bucket owned by an external party. This will create the bucket
1609
    in fence database and create a google group to access the bucket in both
1610
    Google and fence database.
1611
    The external party will need to add the google group read access to bucket
1612
    afterwards.
1613
    """
1614

1615
    cirrus_config.update(**config["CIRRUS_CFG"])
1✔
1616

1617
    google_project_id = cirrus_config.GOOGLE_PROJECT_ID
1✔
1618

1619
    db = get_SQLAlchemyDriver(db)
1✔
1620
    with db.session as current_session:
1✔
1621
        google_cloud_provider = _get_or_create_google_provider(current_session)
1✔
1622

1623
        # search for existing bucket based on name, try to use existing group email
1624
        existing_bucket = current_session.query(Bucket).filter_by(name=name).first()
1✔
1625
        if existing_bucket:
1✔
1626
            access_group = (
×
1627
                current_session.query(GoogleBucketAccessGroup)
1628
                .filter(GoogleBucketAccessGroup.privileges.any("read"))
1629
                .filter_by(bucket_id=existing_bucket.id)
1630
                .all()
1631
            )
1632
            if len(access_group) > 1:
×
1633
                raise Exception(
×
1634
                    f"Existing bucket {name} has more than 1 associated "
1635
                    "Google Bucket Access Group with privilege of 'read'. "
1636
                    "This is not expected and we cannot continue linking."
1637
                )
1638
            elif len(access_group) == 0:
×
1639
                raise Exception(
×
1640
                    f"Existing bucket {name} has no associated "
1641
                    "Google Bucket Access Group with privilege of 'read'. "
1642
                    "This is not expected and we cannot continue linking."
1643
                )
1644

1645
            access_group = access_group[0]
×
1646

1647
            email = access_group.email
×
1648

1649
            logger.warning(
×
1650
                f"bucket already exists with name: {name}, using existing group email: {email}"
1651
            )
1652

1653
            return email
×
1654

1655
        bucket_db_entry = Bucket(name=name, provider_id=google_cloud_provider.id)
1✔
1656
        current_session.add(bucket_db_entry)
1✔
1657
        current_session.commit()
1✔
1658
        privileges = ["read"]
1✔
1659

1660
        access_group = _create_google_bucket_access_group(
1✔
1661
            current_session,
1662
            name,
1663
            bucket_db_entry.id,
1664
            google_project_id,
1665
            privileges,
1666
        )
1667

1668
    logger.info("bucket access group email: {}".format(access_group.email))
1✔
1669
    return access_group.email
1✔
1670

1671

1672
def verify_user_registration(DB):
1✔
1673
    """
1674
    Validate user registration
1675
    """
1676
    cirrus_config.update(**config["CIRRUS_CFG"])
×
1677

1678
    validation_check(DB)
×
1679

1680

1681
def force_update_google_link(DB, username, google_email, expires_in=None):
1✔
1682
    """
1683
    WARNING: This function circumvents Google Auth flow, and should only be
1684
    used for internal testing!
1685
    WARNING: This function assumes that a user already has a proxy group!
1686

1687
    Adds user's google account to proxy group and/or updates expiration for
1688
    that google account's access.
1689
    WARNING: This assumes that provided arguments represent valid information.
1690
             This BLINDLY adds without verification. Do verification
1691
             before this.
1692
    Specifically, this ASSUMES that the proxy group provided belongs to the
1693
    given user and that the user has ALREADY authenticated to prove the
1694
    provided google_email is also their's.
1695

1696
    Args:
1697
        DB
1698
        username (str): Username to link with
1699
        google_email (str): Google email to link to
1700

1701
    Raises:
1702
        NotFound: Linked Google account not found
1703
        Unauthorized: Couldn't determine user
1704

1705
    Returns:
1706
        Expiration time of the newly updated google account's access
1707
    """
1708
    cirrus_config.update(**config["CIRRUS_CFG"])
×
1709

1710
    db = get_SQLAlchemyDriver(DB)
×
1711
    with db.session as session:
×
1712
        user_account = query_for_user(session=session, username=username)
×
1713

1714
        if user_account:
×
1715
            user_id = user_account.id
×
1716
            proxy_group_id = user_account.google_proxy_group_id
×
1717
        else:
1718
            raise Unauthorized(
×
1719
                "Could not determine authed user "
1720
                "from session. Unable to link Google account."
1721
            )
1722

1723
        user_google_account = (
×
1724
            session.query(UserGoogleAccount)
1725
            .filter(UserGoogleAccount.email == google_email)
1726
            .first()
1727
        )
1728
        if not user_google_account:
×
1729
            user_google_account = add_new_user_google_account(
×
1730
                user_id, google_email, session
1731
            )
1732

1733
        # time until the SA will lose bucket access
1734
        # by default: use configured time or 7 days
1735
        default_expires_in = config.get(
×
1736
            "GOOGLE_USER_SERVICE_ACCOUNT_ACCESS_EXPIRES_IN", 604800
1737
        )
1738
        # use expires_in from arg if it was provided and it was not greater than the default
1739
        expires_in = get_valid_expiration(
×
1740
            expires_in, max_limit=default_expires_in, default=default_expires_in
1741
        )
1742
        # convert expires_in to timestamp
1743
        expiration = int(time.time() + expires_in)
×
1744

1745
        force_update_user_google_account_expiration(
×
1746
            user_google_account, proxy_group_id, google_email, expiration, session
1747
        )
1748

1749
        session.commit()
×
1750

1751
        return expiration
×
1752

1753

1754
def notify_problem_users(db, emails, auth_ids, check_linking, google_project_id):
1✔
1755
    """
1756
    Builds a list of users (from provided list of emails) who do not
1757
    have access to any subset of provided auth_ids. Send email to users
1758
    informing them to get access to needed projects.
1759

1760
    db (string): database instance
1761
    emails (list(string)): list of emails to check for access
1762
    auth_ids (list(string)): list of project auth_ids to check that emails have access
1763
    check_linking (bool): flag for if emails should be checked for linked google email
1764
    """
1765
    email_users_without_access(db, auth_ids, emails, check_linking, google_project_id)
×
1766

1767

1768
def migrate_database():
1✔
1769
    alembic_main(["--raiseerr", "upgrade", "head"])
×
1770
    logger.info("Done.")
×
1771

1772

1773
def google_list_authz_groups(db):
1✔
1774
    """
1775
    Builds a list of Google authorization information which includes
1776
    the Google Bucket Access Group emails, Bucket(s) they're associated with, and
1777
    underlying Fence Project auth_id that provides access to that Bucket/Group
1778

1779
    db (string): database instance
1780
    """
1781
    driver = get_SQLAlchemyDriver(db)
×
1782

1783
    with driver.session as db_session:
×
1784
        google_authz = (
×
1785
            db_session.query(
1786
                GoogleBucketAccessGroup.email,
1787
                Bucket.name,
1788
                Project.auth_id,
1789
                ProjectToBucket,
1790
            )
1791
            .join(Project, ProjectToBucket.project_id == Project.id)
1792
            .join(Bucket, ProjectToBucket.bucket_id == Bucket.id)
1793
            .join(
1794
                GoogleBucketAccessGroup, GoogleBucketAccessGroup.bucket_id == Bucket.id
1795
            )
1796
        ).all()
1797

1798
        print("GoogleBucketAccessGroup.email, Bucket.name, Project.auth_id")
×
1799
        for item in google_authz:
×
1800
            print(", ".join(item[:-1]))
×
1801

1802
        return google_authz
×
1803

1804

1805
def access_token_polling_job(
1✔
1806
    db, chunk_size=None, concurrency=None, thread_pool_size=None, buffer_size=None
1807
):
1808
    """
1809
    Update visas and refresh tokens for users with valid visas and refresh tokens
1810

1811
    db (string): database instance
1812
    chunk_size (int): size of chunk of users we want to take from each iteration
1813
    concurrency (int): number of concurrent users going through the visa update flow
1814
    thread_pool_size (int): number of Docker container CPU used for jwt verifcation
1815
    buffer_size (int): max size of queue
1816
    """
1817
    # Instantiating a new client here because the existing
1818
    # client uses authz_provider
1819
    arborist = ArboristClient(
×
1820
        arborist_base_url=config["ARBORIST"],
1821
        logger=get_logger("user_syncer.arborist_client"),
1822
    )
1823
    driver = get_SQLAlchemyDriver(db)
×
1824
    job = AccessTokenUpdater(
×
1825
        chunk_size=int(chunk_size) if chunk_size else None,
1826
        concurrency=int(concurrency) if concurrency else None,
1827
        thread_pool_size=int(thread_pool_size) if thread_pool_size else None,
1828
        buffer_size=int(buffer_size) if buffer_size else None,
1829
        arborist=arborist,
1830
    )
1831
    with driver.session as db_session:
×
1832
        loop = asyncio.get_event_loop()
×
1833
        loop.run_until_complete(job.update_tokens(db_session))
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc