• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 13597038828

28 Feb 2025 09:45PM UTC coverage: 75.427% (+0.2%) from 75.268%
13597038828

Pull #1209

github

AlbertSnows
Update integration_tests.yaml
Pull Request #1209: move backoff settings as well as other functions out of utils

7855 of 10414 relevant lines covered (75.43%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

40.6
fence/scripting/google_monitor.py
1
"""
2
Google Monitoring and Validation Logic
3

4
This file contains scripts to monitor user-registered service accounts and
5
their respective Google projects. The functions in this file will also
6
handle invalid service accounts and projects.
7
"""
8
import traceback
1✔
9

10
from gen3cirrus.google_cloud.iam import GooglePolicyMember
1✔
11
from gen3cirrus import GoogleCloudManager
1✔
12
from gen3cirrus.google_cloud.errors import GoogleAPIError
1✔
13

14
from cdislogging import get_logger
1✔
15

16
import fence.config
1✔
17
from fence.resources.google.validity import (
1✔
18
    GoogleProjectValidity,
19
    GoogleServiceAccountValidity,
20
)
21

22
from fence.resources.google.utils import (
1✔
23
    get_all_registered_service_accounts,
24
    get_linked_google_account_email,
25
    is_google_managed_service_account,
26
)
27

28
from fence.resources.google.access_utils import (
1✔
29
    get_google_project_number,
30
    get_project_from_auth_id,
31
    get_user_by_email,
32
    get_user_by_linked_email,
33
    force_remove_service_account_from_access,
34
    force_remove_service_account_from_db,
35
    user_has_access_to_project,
36
)
37

38
from fence import utils
1✔
39
from fence.config import config
1✔
40
from fence.models import User
1✔
41
from fence.errors import Unauthorized
1✔
42

43
logger = get_logger(__name__)
1✔
44

45

46
def validation_check(db):
1✔
47
    """
48
    Google validation check for all user-registered service accounts
49
    and projects.
50

51
    This will remove any invalid registered service accounts. It will also
52
    remove all registered service accounts for a given project if the project
53
    itself is invalid.
54

55
    NOTE: This entire function should be time-efficient and finish in less
56
          than 90 seconds.
57
          TODO: Test this function with various amounts of service accounts
58
                and delays from the google API
59
    """
60
    registered_service_accounts = get_all_registered_service_accounts(db=db)
1✔
61
    project_service_account_mapping = _get_project_service_account_mapping(
1✔
62
        registered_service_accounts
63
    )
64

65
    for google_project_id, sa_emails in project_service_account_mapping.items():
1✔
66
        email_required = False
1✔
67
        invalid_registered_service_account_reasons = {}
1✔
68
        invalid_project_reasons = {}
1✔
69
        sa_emails_removed = []
1✔
70
        for sa_email in sa_emails:
1✔
71
            logger.debug("Validating Google Service Account: {}".format(sa_email))
1✔
72
            # Do some basic service account checks, this won't validate
73
            # the data access, that's done when the project's validated
74
            try:
1✔
75
                validity_info = _is_valid_service_account(sa_email, google_project_id)
1✔
76
            except Unauthorized:
×
77
                """
78
                is_validity_service_account can raise an exception if the monitor does
79
                not have access, which will be caught and handled during the Project check below
80
                The logic in the endpoints is reversed (Project is checked first,
81
                not SAs) which is why there's is a sort of weird handling of it here.
82
                """
83
                logger.info(
×
84
                    "Monitor does not have access to validate "
85
                    "service account {}. This should be handled "
86
                    "in project validation.".format(sa_email)
87
                )
88
                continue
×
89

90
            if not validity_info:
1✔
91
                logger.info(
1✔
92
                    "INVALID SERVICE ACCOUNT {} DETECTED. REMOVING. Validity Information: {}".format(
93
                        sa_email, str(getattr(validity_info, "_info", None))
94
                    )
95
                )
96
                force_remove_service_account_from_access(
1✔
97
                    sa_email, google_project_id, db=db
98
                )
99
                if validity_info["policy_accessible"] is False:
1✔
100
                    logger.info(
1✔
101
                        "SERVICE ACCOUNT POLICY NOT ACCESSIBLE OR DOES NOT "
102
                        "EXIST. SERVICE ACCOUNT WILL BE REMOVED FROM FENCE DB"
103
                    )
104
                    force_remove_service_account_from_db(sa_email, db=db)
1✔
105

106
                # remove from list so we don't try to remove again
107
                # if project is invalid too
108
                sa_emails_removed.append(sa_email)
1✔
109

110
                invalid_registered_service_account_reasons[
1✔
111
                    sa_email
112
                ] = _get_service_account_removal_reasons(validity_info)
113
                email_required = True
1✔
114

115
        for sa_email in sa_emails_removed:
1✔
116
            sa_emails.remove(sa_email)
1✔
117

118
        logger.debug("Validating Google Project: {}".format(google_project_id))
1✔
119
        google_project_validity = _is_valid_google_project(google_project_id, db=db)
1✔
120

121
        if not google_project_validity:
1✔
122
            # for now, if we detect in invalid project, remove ALL service
123
            # accounts from access for that project.
124
            #
125
            # TODO: If the issue is ONLY a specific service account,
126
            # it may be possible to isolate it and only remove that
127
            # from access.
128
            logger.info(
×
129
                "INVALID GOOGLE PROJECT {} DETECTED. REMOVING ALL SERVICE ACCOUNTS. "
130
                "Validity Information: {}".format(
131
                    google_project_id,
132
                    str(getattr(google_project_validity, "_info", None)),
133
                )
134
            )
135
            for sa_email in sa_emails:
×
136
                force_remove_service_account_from_access(
×
137
                    sa_email, google_project_id, db=db
138
                )
139

140
            # projects can be invalid for project-related reasons or because
141
            # of NON-registered service accounts
142
            invalid_project_reasons["general"] = _get_general_project_removal_reasons(
×
143
                google_project_validity
144
            )
145
            invalid_project_reasons[
×
146
                "non_registered_service_accounts"
147
            ] = _get_invalid_sa_project_removal_reasons(google_project_validity)
148
            invalid_project_reasons["access"] = _get_access_removal_reasons(
×
149
                google_project_validity
150
            )
151
            email_required = True
×
152

153
        email_required &= config["REMOVE_SERVICE_ACCOUNT_EMAIL_NOTIFICATION"]["enable"]
1✔
154
        if email_required:
1✔
155
            logger.debug(
1✔
156
                "Sending email with service account removal reasons: {} and project "
157
                "removal reasons: {}.".format(
158
                    invalid_registered_service_account_reasons, invalid_project_reasons
159
                )
160
            )
161

162
            try:
1✔
163
                user_email_list = (
1✔
164
                    _get_user_email_list_from_google_project_with_owner_role(
165
                        google_project_id
166
                    )
167
                )
168
            except GoogleAPIError:
×
169
                logger.warning(
×
170
                    "DID NOT EMAIL USERS. Unable to get user(s) email(s) about service account "
171
                    "removal in Google project {}. If fence's monitoring SA is not present "
172
                    "then we cannot make the Google API call to know who to email.".format(
173
                        google_project_id
174
                    )
175
                )
176
                return
×
177

178
            _send_emails_informing_service_account_removal(
1✔
179
                user_email_list,
180
                invalid_registered_service_account_reasons,
181
                invalid_project_reasons,
182
                google_project_id,
183
            )
184

185

186
def _is_valid_service_account(sa_email, google_project_id):
1✔
187
    """
188
    Validate the given registered service account and remove if invalid.
189

190
    Args:
191
        sa_email(str): service account email
192
        google_project_id(str): google project id
193
    """
194
    with GoogleCloudManager(google_project_id) as gcm:
1✔
195
        google_project_number = get_google_project_number(google_project_id, gcm)
1✔
196

197
    has_access = bool(google_project_number)
1✔
198
    if not has_access:
1✔
199
        # if our monitor doesn't have access at this point, just don't return any
200
        # information. When the project check runs, it will catch the monitor missing
201
        # error and add it to the removal reasons
202
        raise Unauthorized(
×
203
            "Google Monitoring SA doesn't have access to Google Project: {}".format(
204
                google_project_id
205
            )
206
        )
207

208
    try:
1✔
209
        sa_validity = GoogleServiceAccountValidity(
1✔
210
            sa_email, google_project_id, google_project_number=google_project_number
211
        )
212

213
        if is_google_managed_service_account(sa_email):
1✔
214
            sa_validity.check_validity(
×
215
                early_return=True,
216
                check_type=True,
217
                check_policy_accessible=True,
218
                check_external_access=False,
219
            )
220
        else:
221
            sa_validity.check_validity(
1✔
222
                early_return=True,
223
                check_type=True,
224
                check_policy_accessible=True,
225
                check_external_access=True,
226
            )
227

228
    except Exception as exc:
×
229
        # any issues, assume invalid
230
        # TODO not sure if this is the right way to handle this...
231
        logger.warning(
×
232
            "Service Account {} determined invalid due to unhandled exception: {}. "
233
            "Assuming service account is invalid.".format(sa_email, str(exc))
234
        )
235
        traceback.print_exc()
×
236
        sa_validity = None
×
237

238
    return sa_validity
1✔
239

240

241
def _is_valid_google_project(google_project_id, db=None):
1✔
242
    """
243
    Validate the given google project id and remove all registered service
244
    accounts under that project if invalid.
245
    """
246
    try:
1✔
247
        project_validity = GoogleProjectValidity(google_project_id)
1✔
248
        project_validity.check_validity(early_return=True, db=db)
1✔
249
    except Exception as exc:
×
250
        # any issues, assume invalid
251
        # TODO not sure if this is the right way to handle this...
252
        logger.warning(
×
253
            "Project {} determined invalid due to unhandled exception: {}. "
254
            "Assuming project is invalid.".format(google_project_id, str(exc))
255
        )
256
        traceback.print_exc()
×
257
        project_validity = None
×
258

259
    return project_validity
1✔
260

261

262
def _get_service_account_removal_reasons(service_account_validity):
1✔
263
    """
264
    Get service account removal reason
265

266
    Args:
267
        service_account_validity(GoogleServiceAccountValidity): service account validity
268

269
    Returns:
270
        List[str]: the reason(s) the service account was removed
271
    """
272
    removal_reasons = []
×
273

274
    if service_account_validity is None:
×
275
        return removal_reasons
×
276

277
    if service_account_validity["valid_type"] is False:
×
278
        removal_reasons.append(
×
279
            "It must be a Compute Engine service account or an user-managed service account."
280
        )
281
    if service_account_validity["no_external_access"] is False:
×
282
        removal_reasons.append(
×
283
            "It has either roles attached to it or service account keys generated. We do not allow this because we need to restrict external access."
284
        )
285
    if service_account_validity["owned_by_project"] is False:
×
286
        removal_reasons.append("It is not owned by the project.")
×
287
    if service_account_validity["policy_accessible"] is False:
×
288
        removal_reasons.append(
×
289
            "Either it doesn't exist in Google or "
290
            "we could not access its policy, "
291
            "which is need for further checks."
292
        )
293

294
    return removal_reasons
×
295

296

297
def _get_general_project_removal_reasons(google_project_validity):
1✔
298
    """
299
    Get service account removal reason
300

301
    Args:
302
        google_project_validity(GoogleProjectValidity): google project validity
303

304
    Returns:
305
        List[str]: the reason(s) project was removed
306
    """
307
    removal_reasons = []
×
308

309
    if google_project_validity is None:
×
310
        return removal_reasons
×
311

312
    if google_project_validity["user_has_access"] is False:
×
313
        removal_reasons.append("User isn't a member on the Google Project.")
×
314

315
    if google_project_validity["monitor_has_access"] is False:
×
316
        removal_reasons.append(
×
317
            "Cannot access the project, ensure monitoring service accounts have necessary permissions."
318
        )
319

320
    if google_project_validity["valid_parent_org"] is False:
×
321
        removal_reasons.append("Google Project has a parent orgnization.")
×
322

323
    if google_project_validity["valid_member_types"] is False:
×
324
        removal_reasons.append(
×
325
            "There are members in the Google Project other than Google Users or Google Service Accounts."
326
        )
327

328
    if google_project_validity["members_exist_in_fence"] is False:
×
329
        removal_reasons.append(
×
330
            "Some Google Users on the Google Project do not exist in authentication database."
331
        )
332

333
    return removal_reasons
×
334

335

336
def _get_invalid_sa_project_removal_reasons(google_project_validity):
1✔
337
    """
338
    Get invalid non-registered service account removal reasons
339

340
    Args:
341
        google_project_validity(GoogleProjectValidity): google project validity
342

343
    Returns:
344
        dict: service_account_email: ["list of of why removed", "more reasons"]
345
    """
346
    removal_reasons = {}
×
347

348
    if google_project_validity is None:
×
349
        return removal_reasons
×
350

351
    for sa_email, sa_validity in google_project_validity.get("service_accounts", {}):
×
352
        if not sa_validity:
×
353
            removal_reasons[sa_email] = _get_service_account_removal_reasons(
×
354
                sa_validity
355
            )
356

357
    return removal_reasons
×
358

359

360
def _get_access_removal_reasons(google_project_validity):
1✔
361
    removal_reasons = {}
×
362

363
    if google_project_validity is None:
×
364
        return removal_reasons
×
365

366
    for project, access_validity in google_project_validity.get("access", {}):
×
367
        removal_reasons[project] = []
×
368
        if access_validity["exists"] is False:
×
369
            removal_reasons[project].append(
×
370
                "Data access project {} no longer exists.".format(project)
371
            )
372

373
        if access_validity["all_users_have_access"] is False:
×
374
            removal_reasons[project].append(
×
375
                "Not all users on the Google Project have access to data project {}.".format(
376
                    project
377
                )
378
            )
379

380
    return removal_reasons
×
381

382

383
def _get_google_project_ids_from_service_accounts(registered_service_accounts):
1✔
384
    """
385
    Return a set of just the google project ids that have registered
386
    service accounts.
387
    """
388
    google_projects = set([sa.google_project_id for sa in registered_service_accounts])
×
389
    return google_projects
×
390

391

392
def _get_project_service_account_mapping(registered_service_accounts):
1✔
393
    """
394
    Return a dict with google projects as keys and a list of service accounts
395
    as values.
396

397
    Example:
398
    {
399
        'project_a': [
400
            'service_acount_a@email.com',
401
            'service_acount_b@email.com'
402
        ],
403
        'project_b': [
404
            'service_acount_c@email.com',
405
            'service_acount_d@email.com'
406
        ]
407
    }
408
    """
409
    output = {}
1✔
410
    for sa in registered_service_accounts:
1✔
411
        if sa.google_project_id in output:
1✔
412
            output[sa.google_project_id].append(sa.email)
1✔
413
        else:
414
            output[sa.google_project_id] = [sa.email]
1✔
415

416
    return output
1✔
417

418

419
def _get_user_email_list_from_google_project_with_owner_role(project_id):
1✔
420
    """
421
    Get a list of emails associated to google project id
422

423
    Args:
424
        project_id(str): project id
425

426
    Returns:
427
        list(str): list of emails belong to the project
428

429
    """
430

431
    with GoogleCloudManager(project_id, use_default=False) as prj:
×
432
        members = prj.get_project_membership(project_id)
×
433
        users = [
×
434
            member
435
            for member in members
436
            if member.member_type == GooglePolicyMember.USER
437
        ]
438

439
        return list(
×
440
            {
441
                u.email_id
442
                for u in users
443
                for role in u.roles
444
                if role.name.upper() == "OWNER"
445
            }
446
        )
447

448

449
def _send_emails_informing_service_account_removal(
1✔
450
    to_emails, invalid_service_account_reasons, invalid_project_reasons, project_id
451
):
452
    """
453
    Send emails to list of emails
454

455
    Args:
456
        to_emails(list(str)): list of email addaresses
457
        invalid_service_account_reasons(dict): removal reasons of service accounts
458
        project_id(str): google project id
459

460
    Returns:
461
        httpResponse or None: None if input list is empty
462

463
    Exceptions:
464
        ValueError
465

466
    """
467

468
    if not to_emails:
×
469
        return None
×
470

471
    from_email = config["REMOVE_SERVICE_ACCOUNT_EMAIL_NOTIFICATION"]["from"]
×
472
    subject = config["REMOVE_SERVICE_ACCOUNT_EMAIL_NOTIFICATION"]["subject"]
×
473

474
    domain = config["REMOVE_SERVICE_ACCOUNT_EMAIL_NOTIFICATION"]["domain"]
×
475
    if config["REMOVE_SERVICE_ACCOUNT_EMAIL_NOTIFICATION"]["admin"]:
×
476
        to_emails.extend(config["REMOVE_SERVICE_ACCOUNT_EMAIL_NOTIFICATION"]["admin"])
×
477

478
    text = config["REMOVE_SERVICE_ACCOUNT_EMAIL_NOTIFICATION"]["content"]
×
479
    content = text.format(project_id)
×
480

481
    for email, removal_reasons in invalid_service_account_reasons.items():
×
482
        if removal_reasons:
×
483
            content += (
×
484
                "\n\t - Service account {} was removed from Google Project {}.".format(
485
                    email, project_id
486
                )
487
            )
488
            for reason in removal_reasons:
×
489
                content += "\n\t\t - {}".format(reason)
×
490

491
    general_project_errors = invalid_project_reasons.get("general", {})
×
492
    non_reg_sa_errors = invalid_project_reasons.get(
×
493
        "non_registered_service_accounts", {}
494
    )
495
    access_errors = invalid_project_reasons.get("access")
×
496
    if general_project_errors or non_reg_sa_errors or access_errors:
×
497
        content += (
×
498
            "\n\t - Google Project {} determined invalid. All service "
499
            "accounts with data access will be removed from access.".format(project_id)
500
        )
501
        for removal_reason in general_project_errors:
×
502
            if removal_reason:
×
503
                content += "\n\t\t - {}".format(removal_reason)
×
504

505
        if access_errors:
×
506
            for project, removal_reasons in access_errors.items():
×
507
                for reason in removal_reasons:
×
508
                    content += "\n\t\t - {}".format(reason)
×
509

510
        if non_reg_sa_errors:
×
511
            for sa_email, removal_reasons in non_reg_sa_errors.items():
×
512
                content += "\n\t\t - Google Project Service Account {} determined invalid.".format(
×
513
                    sa_email
514
                )
515
                for reason in removal_reasons:
×
516
                    content += "\n\t\t\t - {}".format(reason)
×
517

518
    return fence.config.send_email(from_email, to_emails, subject, content, domain)
×
519

520

521
def _get_users_without_access(db, auth_ids, user_emails, check_linking):
1✔
522
    """
523
    Build list of users without access to projects identified by auth_ids
524

525
    Args:
526
        db (str): database instance
527
        auth_ids (list(str)): list of project auth_ids to check access against
528
        user_emails (list(str)): list of emails to check access for
529
        check_linking (bool): flag to check for linked google email
530

531
    Returns:
532
        dict{str : (list(str))} : dictionary where keys are user emails,
533
        and values are list of project_ids they do not have access to
534

535
    """
536

537
    no_access = {}
1✔
538

539
    for user_email in user_emails:
1✔
540
        user = get_user_by_email(user_email, db) or get_user_by_linked_email(
1✔
541
            user_email, db
542
        )
543

544
        logger.info("Checking access for {}.".format(user.email))
1✔
545

546
        if not user:
1✔
547
            logger.info(
×
548
                "Email ({}) does not exist in fence database.".format(user_email)
549
            )
550
            continue
×
551

552
        if check_linking:
1✔
553
            link_email = get_linked_google_account_email(user.id, db)
×
554
            if not link_email:
×
555
                logger.info(
×
556
                    "User ({}) does not have a linked google account.".format(
557
                        user_email
558
                    )
559
                )
560
                continue
×
561

562
        no_access_auth_ids = []
1✔
563
        for auth_id in auth_ids:
1✔
564
            project = get_project_from_auth_id(auth_id, db)
1✔
565
            if project:
1✔
566
                if not user_has_access_to_project(user, project.id, db):
1✔
567
                    logger.info(
1✔
568
                        "User ({}) does NOT have access to project (auth_id: {})".format(
569
                            user_email, auth_id
570
                        )
571
                    )
572
                    # add to list to send email
573
                    no_access_auth_ids.append(auth_id)
1✔
574
                else:
575
                    logger.info(
1✔
576
                        "User ({}) has access to project (auth_id: {})".format(
577
                            user_email, auth_id
578
                        )
579
                    )
580
            else:
581
                logger.warning("Project (auth_id: {}) does not exist.".format(auth_id))
×
582

583
        if no_access_auth_ids:
1✔
584
            no_access[user_email] = no_access_auth_ids
1✔
585

586
    return no_access
1✔
587

588

589
def email_user_without_access(user_email, projects, google_project_id):
1✔
590
    """
591
    Send email to user, indicating no access to given projects
592

593
    Args:
594
        user_email (str): address to send email to
595
        projects (list(str)):  list of projects user does not have access to that they should
596
        google_project_id (str): id of google project user belongs to
597
    Returns:
598
        HTTP response
599

600
    """
601
    to_emails = [user_email]
×
602

603
    from_email = config["PROBLEM_USER_EMAIL_NOTIFICATION"]["from"]
×
604
    subject = config["PROBLEM_USER_EMAIL_NOTIFICATION"]["subject"]
×
605

606
    domain = config["PROBLEM_USER_EMAIL_NOTIFICATION"]["domain"]
×
607
    if config["PROBLEM_USER_EMAIL_NOTIFICATION"]["admin"]:
×
608
        to_emails.extend(config["PROBLEM_USER_EMAIL_NOTIFICATION"]["admin"])
×
609

610
    text = config["PROBLEM_USER_EMAIL_NOTIFICATION"]["content"]
×
611
    content = text.format(google_project_id, ",".join(projects))
×
612

613
    return fence.config.send_email(from_email, to_emails, subject, content, domain)
×
614

615

616
def email_users_without_access(
1✔
617
    db, auth_ids, user_emails, check_linking, google_project_id
618
):
619
    """
620
    Build list of users without acess and send emails.
621

622
    Args:
623
        db (str): database instance
624
        auth_ids (list(str)): list of project auth_ids to check access against
625
        user_emails (list(str)): list of emails to check access for
626
        check_linking (bool): flag to check for linked google email
627
    Returns:
628
        None
629
    """
630
    users_without_access = _get_users_without_access(
×
631
        db, auth_ids, user_emails, check_linking
632
    )
633

634
    if len(users_without_access) == len(user_emails):
×
635
        logger.warning(
×
636
            "No user has proper access to provided projects. Contact project administrator. No emails will be sent"
637
        )
638
        return
×
639
    elif len(users_without_access) > 0:
×
640
        logger.info(
×
641
            "Some user(s) do not have proper access to provided projects. Email(s) will be sent to user(s)."
642
        )
643

644
        with GoogleCloudManager(google_project_id) as gcm:
×
645
            members = gcm.get_project_membership(google_project_id)
×
646
            users = []
×
647
            for member in members:
×
648
                if member.member_type == GooglePolicyMember.USER:
×
649
                    users.append(member.email_id)
×
650

651
        for user, projects in users_without_access.items():
×
652
            logger.info(
×
653
                "{} does not have access to the following datasets: {}.".format(
654
                    user, ",".join(projects)
655
                )
656
            )
657
            if user in users:
×
658
                logger.info(
×
659
                    "{} is a member of google project: {}. User will be emailed.".format(
660
                        user, google_project_id
661
                    )
662
                )
663
                email_user_without_access(user, projects, google_project_id)
×
664
            else:
665
                logger.info(
×
666
                    "{} is NOT a member of google project: {}. User will NOT be emailed.".format(
667
                        user, google_project_id
668
                    )
669
                )
670
    else:
671
        logger.info("All users have proper access to provided projects.")
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc