• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 17422041606

03 Sep 2025 03:19AM UTC coverage: 74.9% (+0.003%) from 74.897%
17422041606

push

github

k-burt-uch
Expand grant_bulk_user_policies exception catch

8400 of 11215 relevant lines covered (74.9%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.04
fence/sync/sync_users.py
1
import backoff
1✔
2
import glob
1✔
3

4
import httpx
1✔
5
import jwt
1✔
6
import os
1✔
7
import re
1✔
8
import subprocess as sp
1✔
9
import yaml
1✔
10
import copy
1✔
11
import datetime
1✔
12
import uuid
1✔
13
import collections
1✔
14
import hashlib
1✔
15

16
from contextlib import contextmanager
1✔
17
from collections import defaultdict
1✔
18
from csv import DictReader
1✔
19
from io import StringIO
1✔
20
from stat import S_ISDIR
1✔
21

22
import paramiko
1✔
23
from cdislogging import get_logger
1✔
24
from email_validator import validate_email, EmailNotValidError
1✔
25
from gen3authz.client.arborist.errors import ArboristError
1✔
26
from gen3users.validation import validate_user_yaml
1✔
27
from paramiko.proxy import ProxyCommand
1✔
28
from sqlalchemy.exc import IntegrityError
1✔
29
from sqlalchemy import func
1✔
30

31
from fence.config import config
1✔
32
from fence.models import (
1✔
33
    AccessPrivilege,
34
    AuthorizationProvider,
35
    Project,
36
    Tag,
37
    User,
38
    query_for_user,
39
    Client,
40
    IdentityProvider,
41
    get_project_to_authz_mapping,
42
)
43
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
44
from fence.resources.storage import StorageManager
1✔
45
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
46
from fence.resources.google.access_utils import GoogleUpdateException
1✔
47
from fence.sync import utils
1✔
48
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
49
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
50

51

52
def _format_policy_id(path, privilege):
1✔
53
    resource = ".".join(name for name in path.split("/") if name)
1✔
54
    return "{}-{}".format(resource, privilege)
1✔
55

56

57
def download_dir(sftp, remote_dir, local_dir):
1✔
58
    """
59
    Recursively download file from remote_dir to local_dir
60
    Args:
61
        remote_dir(str)
62
        local_dir(str)
63
    Returns: None
64
    """
65
    dir_items = sftp.listdir_attr(remote_dir)
×
66

67
    for item in dir_items:
×
68
        remote_path = remote_dir + "/" + item.filename
×
69
        local_path = os.path.join(local_dir, item.filename)
×
70
        if S_ISDIR(item.st_mode):
×
71
            download_dir(sftp, remote_path, local_path)
×
72
        else:
73
            sftp.get(remote_path, local_path)
×
74

75

76
def arborist_role_for_permission(permission):
1✔
77
    """
78
    For the programs/projects in the existing fence access control model, in order to
79
    use arborist for checking permissions we generate a policy for each combination of
80
    program/project and privilege. The roles involved all contain only one permission,
81
    for one privilege from the project access model.
82
    """
83
    return {
1✔
84
        "id": permission,
85
        "permissions": [
86
            {"id": permission, "action": {"service": "*", "method": permission}}
87
        ],
88
    }
89

90

91
@contextmanager
1✔
92
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
93
    """
94
    Context manager for reading and optionally decrypting file it only
95
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
96

97
    Args:
98
        filepath (str): path to the file
99
        encrypted (bool): whether the file is encrypted
100

101
    Returns:
102
        Generator[file-like class]: file like object for the file
103
    """
104
    if encrypted:
1✔
105
        p = sp.Popen(
×
106
            [
107
                "ccdecrypt",
108
                "-u",
109
                "-K",
110
                key,
111
                filepath,
112
            ],
113
            stdout=sp.PIPE,
114
            stderr=open(os.devnull, "w"),
115
            universal_newlines=True,
116
        )
117
        try:
×
118
            yield StringIO(p.communicate()[0])
×
119
        except UnicodeDecodeError:
×
120
            logger.error("Could not decode file. Check the decryption key.")
×
121
    else:
122
        f = open(filepath, "r")
1✔
123
        yield f
1✔
124
        f.close()
1✔
125

126

127
class UserYAML(object):
1✔
128
    """
129
    Representation of the information in a YAML file describing user, project, and ABAC
130
    information for access control.
131
    """
132

133
    def __init__(
1✔
134
        self,
135
        projects=None,
136
        user_info=None,
137
        policies=None,
138
        clients=None,
139
        authz=None,
140
        project_to_resource=None,
141
        logger=None,
142
        user_abac=None,
143
    ):
144
        self.projects = projects or {}
1✔
145
        self.user_info = user_info or {}
1✔
146
        self.user_abac = user_abac or {}
1✔
147
        self.policies = policies or {}
1✔
148
        self.clients = clients or {}
1✔
149
        self.authz = authz or {}
1✔
150
        self.project_to_resource = project_to_resource or {}
1✔
151
        self.logger = logger
1✔
152

153
    @classmethod
1✔
154
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
155
        """
156
        Add access by "auth_id" to "self.projects" to update the Fence DB.
157
        Add access by "resource" to "self.user_abac" to update Arborist.
158
        """
159
        data = {}
1✔
160
        if filepath:
1✔
161
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
162
                file_contents = f.read()
1✔
163
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
164
                data = yaml.safe_load(file_contents)
1✔
165
        else:
166
            if logger:
1✔
167
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
168

169
        projects = dict()
1✔
170
        user_info = dict()
1✔
171
        policies = dict()
1✔
172

173
        # resources should be the resource tree to construct in arborist
174
        user_abac = dict()
1✔
175

176
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
177
        if not data.get("authz") and data.get("rbac"):
1✔
178
            if logger:
×
179
                logger.info(
×
180
                    "No authz block found but rbac block present. Using rbac block"
181
                )
182
            data["authz"] = data["rbac"]
×
183

184
        # get user project mapping to arborist resources if it exists
185
        project_to_resource = data.get("authz", dict()).get(
1✔
186
            "user_project_to_resource", dict()
187
        )
188

189
        # read projects and privileges for each user
190
        users = data.get("users", {})
1✔
191
        for username, details in users.items():
1✔
192
            # users should occur only once each; skip if already processed
193
            if username in projects:
1✔
194
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
195
                    username
196
                )
197
                if logger:
×
198
                    logger.error(msg)
×
199
                raise EnvironmentError(msg)
×
200

201
            privileges = {}
1✔
202
            resource_permissions = dict()
1✔
203
            for project in details.get("projects", {}):
1✔
204
                try:
1✔
205
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
206
                except KeyError as e:
×
207
                    if logger:
×
208
                        logger.error("project {} missing field: {}".format(project, e))
×
209
                    continue
×
210

211
                # project may not have `resource` field.
212
                # prefer resource field;
213
                # if no resource or mapping, assume auth_id is resource.
214
                resource = project.get("resource", project["auth_id"])
1✔
215

216
                if project["auth_id"] not in project_to_resource:
1✔
217
                    project_to_resource[project["auth_id"]] = resource
1✔
218
                resource_permissions[resource] = set(project["privilege"])
1✔
219

220
            user_info[username] = {
1✔
221
                "email": details.get("email", ""),
222
                "display_name": details.get("display_name", ""),
223
                "phone_number": details.get("phone_number", ""),
224
                "tags": details.get("tags", {}),
225
                "admin": details.get("admin", False),
226
            }
227
            if not details.get("email"):
1✔
228
                try:
1✔
229
                    valid = validate_email(
1✔
230
                        username, allow_smtputf8=False, check_deliverability=False
231
                    )
232
                    user_info[username]["email"] = valid.email
1✔
233
                except EmailNotValidError:
1✔
234
                    pass
1✔
235
            projects[username] = privileges
1✔
236
            user_abac[username] = resource_permissions
1✔
237

238
            # list of policies we want to grant to this user, which get sent to arborist
239
            # to check if they're allowed to do certain things
240
            policies[username] = details.get("policies", [])
1✔
241

242
        if logger:
1✔
243
            logger.info(
1✔
244
                "Got user project to arborist resource mapping:\n{}".format(
245
                    str(project_to_resource)
246
                )
247
            )
248

249
        authz = data.get("authz", dict())
1✔
250
        if not authz:
1✔
251
            # older version: resources in root, no `authz` section or `rbac` section
252
            if logger:
1✔
253
                logger.warning(
1✔
254
                    "access control YAML file is using old format (missing `authz`/`rbac`"
255
                    " section in the root); assuming that if it exists `resources` will"
256
                    " be on the root level, and continuing"
257
                )
258
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
259
            # the code can pretend it's in the normal place that we expect
260
            resources = data.get("resources", [])
1✔
261
            # keep authz empty dict if resources is not specified
262
            if resources:
1✔
263
                authz["resources"] = data.get("resources", [])
×
264

265
        clients = data.get("clients", {})
1✔
266

267
        return cls(
1✔
268
            projects=projects,
269
            user_info=user_info,
270
            user_abac=user_abac,
271
            policies=policies,
272
            clients=clients,
273
            authz=authz,
274
            project_to_resource=project_to_resource,
275
            logger=logger,
276
        )
277

278
    def persist_project_to_resource(self, db_session):
1✔
279
        """
280
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
281

282
        The mapping comes from an external source, this function persists what was parsed
283
        into memory into the database for future use.
284
        """
285
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
286
            project = (
1✔
287
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
288
            )
289
            if project:
1✔
290
                project.authz = authz_resource
1✔
291
            else:
292
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
293
                db_session.add(project)
×
294
        db_session.commit()
1✔
295

296

297
class UserSyncer(object):
1✔
298
    def __init__(
1✔
299
        self,
300
        dbGaP,
301
        DB,
302
        project_mapping,
303
        storage_credentials=None,
304
        db_session=None,
305
        is_sync_from_dbgap_server=False,
306
        sync_from_local_csv_dir=None,
307
        sync_from_local_yaml_file=None,
308
        arborist=None,
309
        folder=None,
310
    ):
311
        """
312
        Syncs ACL files from dbGap to auth database and storage backends
313
        Args:
314
            dbGaP: a list of dict containing creds to access dbgap sftp
315
            DB: database connection string
316
            project_mapping: a dict containing how dbgap ids map to projects
317
            storage_credentials: a dict containing creds for storage backends
318
            sync_from_dir: path to an alternative dir to sync from instead of
319
                           dbGaP
320
            arborist:
321
                ArboristClient instance if the syncer should also create
322
                resources in arborist
323
            folder: a local folder where dbgap telemetry files will sync to
324
        """
325
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
326
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
327
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
328
        self.dbGaP = dbGaP
1✔
329
        self.session = db_session
1✔
330
        self.driver = get_SQLAlchemyDriver(DB)
1✔
331
        self.project_mapping = project_mapping or {}
1✔
332
        self._projects = dict()
1✔
333
        self._created_roles = set()
1✔
334
        self._created_policies = set()
1✔
335
        self._dbgap_study_to_resources = dict()
1✔
336
        self.logger = get_logger(
1✔
337
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
338
        )
339
        self.arborist_client = arborist
1✔
340
        self.folder = folder
1✔
341

342
        self.auth_source = defaultdict(set)
1✔
343
        # auth_source used for logging. username : [source1, source2]
344
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
345
        self.parent_to_child_studies_mapping = {}
1✔
346
        for dbgap_config in dbGaP:
1✔
347
            self.parent_to_child_studies_mapping.update(
1✔
348
                dbgap_config.get("parent_to_child_studies_mapping", {})
349
            )
350
        if storage_credentials:
1✔
351
            self.storage_manager = StorageManager(
1✔
352
                storage_credentials, logger=self.logger
353
            )
354
        self.id_patterns = []
1✔
355

356
    @staticmethod
1✔
357
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
358
        """
359
        Check if the filename matches dbgap access control file pattern
360

361
        Args:
362
            filepath (str): path to file
363
            encrypted (bool): whether the file is encrypted
364

365
        Returns:
366
            bool: whether the pattern matches
367
        """
368
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
369
        for pattern in id_patterns:
1✔
370
            if encrypted:
1✔
371
                pattern += r".enc"
×
372
            pattern += r"$"
1✔
373
            # when converting the YAML from fence-config,
374
            # python reads it as Python string literal. So "\" turns into "\\"
375
            # which messes with the regex match
376
            pattern.replace("\\\\", "\\")
1✔
377
            if re.match(pattern, os.path.basename(filepath)):
1✔
378
                return True
1✔
379
        return False
1✔
380

381
    def _get_from_sftp_with_proxy(self, server, path):
1✔
382
        """
383
        Download all data from sftp sever to a local dir
384

385
        Args:
386
            server (dict) : dictionary containing info to access sftp server
387
            path (str): path to local directory
388

389
        Returns:
390
            None
391
        """
392
        proxy = None
1✔
393
        if server.get("proxy", "") != "":
1✔
394
            command = "ssh -oHostKeyAlgorithms=+ssh-rsa -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
395
                user=server.get("proxy_user", ""),
396
                proxy=server.get("proxy", ""),
397
                host=server.get("host", ""),
398
                port=server.get("port", 22),
399
            )
400
            self.logger.info("SSH proxy command: {}".format(command))
×
401

402
            proxy = ProxyCommand(command)
×
403

404
        with paramiko.SSHClient() as client:
1✔
405
            client.set_log_channel(self.logger.name)
1✔
406

407
            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
1✔
408
            parameters = {
1✔
409
                "hostname": str(server.get("host", "")),
410
                "username": str(server.get("username", "")),
411
                "password": str(server.get("password", "")),
412
                "port": int(server.get("port", 22)),
413
            }
414
            if proxy:
1✔
415
                parameters["sock"] = proxy
×
416

417
            self.logger.info(
1✔
418
                "SSH connection hostname:post {}:{}".format(
419
                    parameters.get("hostname", "unknown"),
420
                    parameters.get("port", "unknown"),
421
                )
422
            )
423
            self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
424
            with client.open_sftp() as sftp:
×
425
                download_dir(sftp, "./", path)
1✔
426

427
        if proxy:
×
428
            proxy.close()
×
429

430
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
431
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
432
        ssh_client.connect(**parameters)
1✔
433

434
    def _get_from_ftp_with_proxy(self, server, path):
1✔
435
        """
436
        Download data from ftp sever to a local dir
437

438
        Args:
439
            server (dict): dictionary containing information for accessing server
440
            path(str): path to local files
441

442
        Returns:
443
            None
444
        """
445
        execstr = (
×
446
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
447
                server.get("username", ""),
448
                server.get("password", ""),
449
                server.get("host", ""),
450
                server.get("proxy", ""),
451
                path,
452
            )
453
        )
454
        os.system(execstr)
×
455

456
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
457
        return dbgap_config.get(
1✔
458
            "parse_consent_code", True
459
        )  # Should this really be true?
460

461
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
462
        """
463
        parse csv files to python dict
464

465
        Args:
466
            file_dict: a dictionary with key(file path) and value(privileges)
467
            sess: sqlalchemy session
468
            dbgap_config: a dictionary containing information about the dbGaP sftp server
469
                (comes from fence config)
470
            encrypted: boolean indicating whether those files are encrypted
471

472

473
        Return:
474
            Tuple[[dict, dict]]:
475
                (user_project, user_info) where user_project is a mapping from
476
                usernames to project permissions and user_info is a mapping
477
                from usernames to user details, such as email
478

479
        Example:
480

481
            (
482
                {
483
                    username: {
484
                        'project1': {'read-storage','write-storage'},
485
                        'project2': {'read-storage'},
486
                    }
487
                },
488
                {
489
                    username: {
490
                        'email': 'email@mail.com',
491
                        'display_name': 'display name',
492
                        'phone_number': '123-456-789',
493
                        'tags': {'dbgap_role': 'PI'}
494
                    }
495
                },
496
            )
497

498
        """
499
        user_projects = dict()
1✔
500
        user_info = defaultdict(dict)
1✔
501

502
        # parse dbGaP sftp server information
503
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
504

505
        self.id_patterns += (
1✔
506
            [
507
                item.replace("\\\\", "\\")
508
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
509
            ]
510
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
511
            else []
512
        )
513

514
        enable_common_exchange_area_access = dbgap_config.get(
1✔
515
            "enable_common_exchange_area_access", False
516
        )
517
        study_common_exchange_areas = dbgap_config.get(
1✔
518
            "study_common_exchange_areas", {}
519
        )
520
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
521

522
        if parse_consent_code and enable_common_exchange_area_access:
1✔
523
            self.logger.info(
1✔
524
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
525
            )
526

527
        project_id_patterns = [r"phs(\d{6})"]
1✔
528
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
529
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
530
            patterns = [
1✔
531
                pattern.replace("\\\\", "\\") for pattern in patterns
532
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
533
            project_id_patterns += patterns
1✔
534

535
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
536
        for filepath, privileges in file_dict.items():
1✔
537
            self.logger.info("Reading file {}".format(filepath))
1✔
538
            if os.stat(filepath).st_size == 0:
1✔
539
                self.logger.warning("Empty file {}".format(filepath))
×
540
                continue
×
541
            if not self._match_pattern(
1✔
542
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
543
            ):
544
                self.logger.warning(
1✔
545
                    "Filename {} does not match dbgap access control filename pattern;"
546
                    " this could mean that the filename has an invalid format, or has"
547
                    " an unexpected .enc extension, or lacks the .enc extension where"
548
                    " expected. This file is NOT being processed by usersync!".format(
549
                        filepath
550
                    )
551
                )
552
                continue
1✔
553

554
            with _read_file(
1✔
555
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
556
            ) as f:
557
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
558

559
                for row in csv:
1✔
560
                    username = row.get("login") or ""
1✔
561
                    if username == "":
1✔
562
                        continue
×
563

564
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
565
                        phsid = (
1✔
566
                            row.get("phsid") or (row.get("project_id") or "")
567
                        ).split(".")
568
                    else:
569
                        phsid = (row.get("phsid") or "").split(".")
1✔
570

571
                    dbgap_project = phsid[0]
1✔
572
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
573
                    skip = False
1✔
574
                    for pattern in project_id_patterns:
1✔
575
                        self.logger.debug(
1✔
576
                            "Checking pattern:{} with project_id:{}".format(
577
                                pattern, dbgap_project
578
                            )
579
                        )
580
                        if re.match(pattern, dbgap_project):
1✔
581
                            skip = False
1✔
582
                            break
1✔
583
                        else:
584
                            skip = True
1✔
585
                    if skip:
1✔
586
                        self.logger.warning(
1✔
587
                            "Skip processing from file {}, user {} with project {}".format(
588
                                filepath,
589
                                username,
590
                                dbgap_project,
591
                            )
592
                        )
593
                        continue
1✔
594
                    if len(phsid) > 1 and parse_consent_code:
1✔
595
                        consent_code = phsid[-1]
1✔
596

597
                        # c999 indicates full access to all consents and access
598
                        # to a study-specific exchange area
599
                        # access to at least one study-specific exchange area implies access
600
                        # to the parent study's common exchange area
601
                        #
602
                        # NOTE: Handling giving access to all consents is done at
603
                        #       a later time, when we have full information about possible
604
                        #       consents
605
                        self.logger.debug(
1✔
606
                            f"got consent code {consent_code} from dbGaP project "
607
                            f"{dbgap_project}"
608
                        )
609
                        if (
1✔
610
                            consent_code == "c999"
611
                            and enable_common_exchange_area_access
612
                            and dbgap_project in study_common_exchange_areas
613
                        ):
614
                            self.logger.info(
1✔
615
                                "found study with consent c999 and Fence "
616
                                "is configured to parse exchange area data. Giving user "
617
                                f"{username} {privileges} privileges in project: "
618
                                f"{study_common_exchange_areas[dbgap_project]}."
619
                            )
620
                            self._add_dbgap_project_for_user(
1✔
621
                                study_common_exchange_areas[dbgap_project],
622
                                privileges,
623
                                username,
624
                                sess,
625
                                user_projects,
626
                                dbgap_config,
627
                            )
628

629
                        dbgap_project += "." + consent_code
1✔
630

631
                    self._add_children_for_dbgap_project(
1✔
632
                        dbgap_project,
633
                        privileges,
634
                        username,
635
                        sess,
636
                        user_projects,
637
                        dbgap_config,
638
                    )
639

640
                    display_name = row.get("user name") or ""
1✔
641
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
642

643
                    # some dbgap telemetry files have information about a researchers PI
644
                    if "downloader for" in row:
1✔
645
                        tags["pi"] = row["downloader for"]
1✔
646

647
                    # prefer name over previous "downloader for" if it exists
648
                    if "downloader for names" in row:
1✔
649
                        tags["pi"] = row["downloader for names"]
×
650

651
                    user_info[username] = {
1✔
652
                        "email": row.get("email")
653
                        or user_info[username].get("email")
654
                        or "",
655
                        "display_name": display_name,
656
                        "phone_number": row.get("phone")
657
                        or user_info[username].get("phone_number")
658
                        or "",
659
                        "tags": tags,
660
                    }
661

662
                    self._process_dbgap_project(
1✔
663
                        dbgap_project,
664
                        privileges,
665
                        username,
666
                        sess,
667
                        user_projects,
668
                        dbgap_config,
669
                    )
670

671
        return user_projects, user_info
1✔
672

673
    def _get_children(self, dbgap_project):
1✔
674
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
675

676
    def _add_children_for_dbgap_project(
1✔
677
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
678
    ):
679
        """
680
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
681
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
682
        child studies.
683
        """
684
        parent_phsid = dbgap_project
1✔
685
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
686
        child_suffix = ""
1✔
687
        if parse_consent_code and re.match(
1✔
688
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
689
        ):
690
            parent_phsid_parts = dbgap_project.split(".")
1✔
691
            parent_phsid = parent_phsid_parts[0]
1✔
692
            child_suffix = "." + parent_phsid_parts[1]
1✔
693

694
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
695
            return
1✔
696

697
        self.logger.info(
1✔
698
            f"found parent study {parent_phsid} and Fence "
699
            "is configured to provide additional access to child studies. Giving user "
700
            f"{username} {privileges} privileges in projects: "
701
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
702
        )
703
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
704
        for child_study in child_studies:
1✔
705
            self._add_dbgap_project_for_user(
1✔
706
                child_study + child_suffix,
707
                privileges,
708
                username,
709
                sess,
710
                user_projects,
711
                dbgap_config,
712
            )
713

714
    def _add_dbgap_project_for_user(
1✔
715
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
716
    ):
717
        """
718
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
719
        and then updates the dictionary containing all user's project access
720
        """
721
        if dbgap_project not in self._projects:
1✔
722
            self.logger.debug(
1✔
723
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
724
            )
725

726
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
727

728
            # need to add dbgap project to arborist
729
            if self.arborist_client:
1✔
730
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
731

732
            if project.name is None:
1✔
733
                project.name = dbgap_project
1✔
734
            self._projects[dbgap_project] = project
1✔
735
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
736
        if username in user_projects:
1✔
737
            user_projects[username].update(phsid_privileges)
1✔
738
        else:
739
            user_projects[username] = phsid_privileges
1✔
740

741
    @staticmethod
1✔
742
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
743
        """
744
        Merge user_info1 into user_info2. Values in user_info2 are overriden
745
        by values in user_info1. user_info2 ends up containing the merged dict.
746

747
        Args:
748
            user_info1 (dict): nested dict
749
            user_info2 (dict): nested dict
750

751
            Example:
752
            {username: {'email': 'abc@email.com'}}
753

754
        Returns:
755
            None
756
        """
757
        user_info2.update(user_info1)
1✔
758

759
    def sync_two_phsids_dict(
1✔
760
        self,
761
        phsids1,
762
        phsids2,
763
        source1=None,
764
        source2=None,
765
        phsids2_overrides_phsids1=True,
766
    ):
767
        """
768
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
769
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
770
        the merged dict (see explanation below).
771
        `source1` and `source2`: for logging.
772

773
        Args:
774
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
775

776
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
777

778
            Example:
779
            {
780
                username: {
781
                    phsid1: {'read-storage','write-storage'},
782
                    phsid2: {'read-storage'},
783
                }
784
            }
785

786
        Return:
787
            None
788

789
        Explanation:
790
            Consider merging projects of the same user:
791

792
                {user1: {phsid1: privillege1}}
793

794
                {user1: {phsid2: privillege2}}
795

796
            case 1: phsid1 != phsid2. Output:
797

798
                {user1: {phsid1: privillege1, phsid2: privillege2}}
799

800
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
801

802
                {user1: {phsid1: union(privillege1, privillege2)}}
803

804
            For the other cases, just simple addition
805
        """
806

807
        for user, projects1 in phsids1.items():
1✔
808
            if not phsids2.get(user):
1✔
809
                if source1:
1✔
810
                    self.auth_source[user].add(source1)
1✔
811
                phsids2[user] = projects1
1✔
812
            elif phsids2_overrides_phsids1:
1✔
813
                if source1:
1✔
814
                    self.auth_source[user].add(source1)
×
815
                if source2:
1✔
816
                    self.auth_source[user].add(source2)
×
817
                for phsid1, privilege1 in projects1.items():
1✔
818
                    if phsid1 not in phsids2[user]:
1✔
819
                        phsids2[user][phsid1] = set()
1✔
820
                    phsids2[user][phsid1].update(privilege1)
1✔
821
            elif source2:
×
822
                self.auth_source[user].add(source2)
×
823

824
    def sync_to_db_and_storage_backend(
1✔
825
        self,
826
        user_project,
827
        user_info,
828
        sess,
829
        do_not_revoke_from_db_and_storage=False,
830
        expires=None,
831
    ):
832
        """
833
        sync user access control to database and storage backend
834

835
        Args:
836
            user_project (dict): a dictionary of
837

838
                {
839
                    username: {
840
                        'project1': {'read-storage','write-storage'},
841
                        'project2': {'read-storage'}
842
                    }
843
                }
844

845
            user_info (dict): a dictionary of {username: user_info{}}
846
            sess: a sqlalchemy session
847

848
        Return:
849
            None
850
        """
851
        google_bulk_mapping = None
1✔
852
        if config["GOOGLE_BULK_UPDATES"]:
1✔
853
            google_bulk_mapping = {}
1✔
854

855
        self._init_projects(user_project, sess)
1✔
856

857
        auth_provider_list = [
1✔
858
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
859
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
860
        ]
861

862
        cur_db_user_project_list = {
1✔
863
            (ua.user.username.lower(), ua.project.auth_id)
864
            for ua in sess.query(AccessPrivilege).all()
865
        }
866

867
        # we need to compare db -> whitelist case-insensitively for username.
868
        # db stores case-sensitively, but we need to query case-insensitively
869
        user_project_lowercase = {}
1✔
870
        syncing_user_project_list = set()
1✔
871
        for username, projects in user_project.items():
1✔
872
            user_project_lowercase[username.lower()] = projects
1✔
873
            for project, _ in projects.items():
1✔
874
                syncing_user_project_list.add((username.lower(), project))
1✔
875

876
        user_info_lowercase = {
1✔
877
            username.lower(): info for username, info in user_info.items()
878
        }
879

880
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
881
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
882
        to_update = set.intersection(
1✔
883
            cur_db_user_project_list, syncing_user_project_list
884
        )
885

886
        # when updating users we want to maintain case sesitivity in the username so
887
        # pass the original, non-lowered user_info dict
888
        self._upsert_userinfo(sess, user_info)
1✔
889

890
        if not do_not_revoke_from_db_and_storage:
1✔
891
            self._revoke_from_storage(
1✔
892
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
893
            )
894
            self._revoke_from_db(sess, to_delete)
1✔
895

896
        self._grant_from_storage(
1✔
897
            to_add,
898
            user_project_lowercase,
899
            sess,
900
            google_bulk_mapping=google_bulk_mapping,
901
            expires=expires,
902
        )
903

904
        self._grant_from_db(
1✔
905
            sess,
906
            to_add,
907
            user_info_lowercase,
908
            user_project_lowercase,
909
            auth_provider_list,
910
        )
911

912
        # re-grant
913
        self._grant_from_storage(
1✔
914
            to_update,
915
            user_project_lowercase,
916
            sess,
917
            google_bulk_mapping=google_bulk_mapping,
918
            expires=expires,
919
        )
920
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
921

922
        if not do_not_revoke_from_db_and_storage:
1✔
923
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
924

925
        sess.commit()
1✔
926

927
        if config["GOOGLE_BULK_UPDATES"]:
1✔
928
            self.logger.info("Doing bulk Google update...")
1✔
929
            update_google_groups_for_users(google_bulk_mapping)
1✔
930
            self.logger.info("Bulk Google update done!")
×
931

932
        sess.commit()
1✔
933

934
    def sync_to_storage_backend(
1✔
935
        self, user_project, user_info, sess, expires, skip_google_updates=False
936
    ):
937
        """
938
        sync user access control to storage backend with given expiration
939

940
        Args:
941
            user_project (dict): a dictionary of
942

943
                {
944
                    username: {
945
                        'project1': {'read-storage','write-storage'},
946
                        'project2': {'read-storage'}
947
                    }
948
                }
949

950
            user_info (dict): a dictionary of attributes for a user.
951
            sess: a sqlalchemy session
952
            expires (int): time at which synced Arborist policies and
953
                   inclusion in any GBAG are set to expire
954
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
955
        Return:
956
            None
957
        """
958
        if not expires:
1✔
959
            raise Exception(
×
960
                f"sync to storage backend requires an expiration. you provided: {expires}"
961
            )
962

963
        google_group_user_mapping = None
1✔
964
        if config["GOOGLE_BULK_UPDATES"]:
1✔
965
            google_group_user_mapping = {}
×
966
            get_or_create_proxy_group_id(
×
967
                expires=expires,
968
                user_id=user_info["user_id"],
969
                username=user_info["username"],
970
                session=sess,
971
                storage_manager=self.storage_manager,
972
            )
973

974
        # TODO: eventually it'd be nice to remove this step but it's required
975
        #       so that grant_from_storage can determine what storage backends
976
        #       are needed for a project.
977
        self._init_projects(user_project, sess)
1✔
978

979
        # we need to compare db -> whitelist case-insensitively for username.
980
        # db stores case-sensitively, but we need to query case-insensitively
981
        user_project_lowercase = {}
1✔
982
        syncing_user_project_list = set()
1✔
983
        for username, projects in user_project.items():
1✔
984
            user_project_lowercase[username.lower()] = projects
1✔
985
            for project, _ in projects.items():
1✔
986
                syncing_user_project_list.add((username.lower(), project))
1✔
987

988
        to_add = set(syncing_user_project_list)
1✔
989

990
        # when updating users we want to maintain case sensitivity in the username so
991
        # pass the original, non-lowered user_info dict
992
        self._upsert_userinfo(sess, {user_info["username"].lower(): user_info})
1✔
993
        if not skip_google_updates:
1✔
994
            self._grant_from_storage(
1✔
995
                to_add,
996
                user_project_lowercase,
997
                sess,
998
                google_bulk_mapping=google_group_user_mapping,
999
                expires=expires,
1000
            )
1001

1002
            if config["GOOGLE_BULK_UPDATES"]:
1✔
1003
                self.logger.info("Updating user's google groups ...")
×
1004
                update_google_groups_for_users(google_group_user_mapping)
×
1005
                self.logger.info("Google groups update done!!")
×
1006

1007
        sess.commit()
1✔
1008

1009
    def _revoke_from_db(self, sess, to_delete):
1✔
1010
        """
1011
        Revoke user access to projects in the auth database
1012

1013
        Args:
1014
            sess: sqlalchemy session
1015
            to_delete: a set of (username, project.auth_id) to be revoked from db
1016
        Return:
1017
            None
1018
        """
1019
        for username, project_auth_id in to_delete:
1✔
1020
            q = (
1✔
1021
                sess.query(AccessPrivilege)
1022
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1023
                .join(AccessPrivilege.user)
1024
                .filter(func.lower(User.username) == username)
1025
                .all()
1026
            )
1027
            for access in q:
1✔
1028
                self.logger.info(
1✔
1029
                    "revoke {} access to {} in db".format(username, project_auth_id)
1030
                )
1031
                sess.delete(access)
1✔
1032

1033
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1034
        """
1035
        Make sure there is no admin user that is not in yaml/csv files
1036

1037
        Args:
1038
            sess: sqlalchemy session
1039
            user_info: a dict of
1040
            {
1041
                username: {
1042
                    'email': email,
1043
                    'display_name': display_name,
1044
                    'phone_number': phonenum,
1045
                    'tags': {'k1':'v1', 'k2': 'v2'}
1046
                    'admin': is_admin
1047
                }
1048
            }
1049
        Returns:
1050
            None
1051
        """
1052
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1053
            if admin_user.username.lower() not in user_info:
1✔
1054
                admin_user.is_admin = False
×
1055
                sess.add(admin_user)
×
1056
                self.logger.info(
×
1057
                    "remove admin access from {} in db".format(
1058
                        admin_user.username.lower()
1059
                    )
1060
                )
1061

1062
    def _update_from_db(self, sess, to_update, user_project):
1✔
1063
        """
1064
        Update user access to projects in the auth database
1065

1066
        Args:
1067
            sess: sqlalchemy session
1068
            to_update:
1069
                a set of (username, project.auth_id) to be updated from db
1070

1071
        Return:
1072
            None
1073
        """
1074

1075
        for username, project_auth_id in to_update:
1✔
1076
            q = (
1✔
1077
                sess.query(AccessPrivilege)
1078
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1079
                .join(AccessPrivilege.user)
1080
                .filter(func.lower(User.username) == username)
1081
                .all()
1082
            )
1083
            for access in q:
1✔
1084
                access.privilege = user_project[username][project_auth_id]
1✔
1085
                self.logger.info(
1✔
1086
                    "update {} with {} access to {} in db".format(
1087
                        username, access.privilege, project_auth_id
1088
                    )
1089
                )
1090

1091
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1092
        """
1093
        Grant user access to projects in the auth database
1094
        Args:
1095
            sess: sqlalchemy session
1096
            to_add: a set of (username, project.auth_id) to be granted
1097
            user_project:
1098
                a dictionary of {username: {project: {'read','write'}}
1099
        Return:
1100
            None
1101
        """
1102
        for username, project_auth_id in to_add:
1✔
1103
            u = query_for_user(session=sess, username=username)
1✔
1104

1105
            auth_provider = auth_provider_list[0]
1✔
1106
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1107
                auth_provider = auth_provider_list[1]
1✔
1108
            user_access = AccessPrivilege(
1✔
1109
                user=u,
1110
                project=self._projects[project_auth_id],
1111
                privilege=list(user_project[username][project_auth_id]),
1112
                auth_provider=auth_provider,
1113
            )
1114
            self.logger.info(
1✔
1115
                "grant user {} to {} with access {}".format(
1116
                    username, user_access.project, user_access.privilege
1117
                )
1118
            )
1119
            sess.add(user_access)
1✔
1120

1121
    def _upsert_userinfo(self, sess, user_info):
1✔
1122
        """
1123
        update user info to database.
1124

1125
        Args:
1126
            sess: sqlalchemy session
1127
            user_info:
1128
                a dict of {username: {display_name, phone_number, tags, admin}
1129

1130
        Return:
1131
            None
1132
        """
1133

1134
        for username in user_info:
1✔
1135
            u = query_for_user(session=sess, username=username)
1✔
1136

1137
            if u is None:
1✔
1138
                self.logger.info("create user {}".format(username))
1✔
1139
                u = User(username=username)
1✔
1140
                sess.add(u)
1✔
1141

1142
            if self.arborist_client:
1✔
1143
                self.arborist_client.create_user({"name": username})
1✔
1144

1145
            u.email = user_info[username].get("email", "")
1✔
1146
            u.display_name = user_info[username].get("display_name", "")
1✔
1147
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1148
            u.is_admin = user_info[username].get("admin", False)
1✔
1149

1150
            idp_name = user_info[username].get("idp_name", "")
1✔
1151
            if idp_name and not u.identity_provider:
1✔
1152
                idp = (
×
1153
                    sess.query(IdentityProvider)
1154
                    .filter(IdentityProvider.name == idp_name)
1155
                    .first()
1156
                )
1157
                if not idp:
×
1158
                    idp = IdentityProvider(name=idp_name)
×
1159
                u.identity_provider = idp
×
1160

1161
            # do not update if there is no tag
1162
            if not user_info[username].get("tags"):
1✔
1163
                continue
1✔
1164

1165
            # remove user db tags if they are not shown in new tags
1166
            for tag in u.tags:
1✔
1167
                if tag.key not in user_info[username]["tags"]:
1✔
1168
                    u.tags.remove(tag)
1✔
1169

1170
            # sync
1171
            for k, v in user_info[username]["tags"].items():
1✔
1172
                found = False
1✔
1173
                for tag in u.tags:
1✔
1174
                    if tag.key == k:
1✔
1175
                        found = True
1✔
1176
                        tag.value = v
1✔
1177
                # create new tag if not found
1178
                if not found:
1✔
1179
                    tag = Tag(key=k, value=v)
1✔
1180
                    u.tags.append(tag)
1✔
1181

1182
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1183
        """
1184
        If a project have storage backend, revoke user's access to buckets in
1185
        the storage backend.
1186

1187
        Args:
1188
            to_delete: a set of (username, project.auth_id) to be revoked
1189

1190
        Return:
1191
            None
1192
        """
1193
        for username, project_auth_id in to_delete:
1✔
1194
            project = (
1✔
1195
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1196
            )
1197
            for sa in project.storage_access:
1✔
1198
                if not hasattr(self, "storage_manager"):
1✔
1199
                    self.logger.error(
×
1200
                        (
1201
                            "CANNOT revoke {} access to {} in {} because there is NO "
1202
                            "configured storage accesses at all. See configuration. "
1203
                            "Continuing anyway..."
1204
                        ).format(username, project_auth_id, sa.provider.name)
1205
                    )
1206
                    continue
×
1207

1208
                self.logger.info(
1✔
1209
                    "revoke {} access to {} in {}".format(
1210
                        username, project_auth_id, sa.provider.name
1211
                    )
1212
                )
1213
                self.storage_manager.revoke_access(
1✔
1214
                    provider=sa.provider.name,
1215
                    username=username,
1216
                    project=project,
1217
                    session=sess,
1218
                    google_bulk_mapping=google_bulk_mapping,
1219
                )
1220

1221
    def _grant_from_storage(
1✔
1222
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1223
    ):
1224
        """
1225
        If a project have storage backend, grant user's access to buckets in
1226
        the storage backend.
1227

1228
        Args:
1229
            to_add: a set of (username, project.auth_id)  to be granted
1230
            user_project: a dictionary like:
1231

1232
                    {username: {phsid: {'read-storage','write-storage'}}}
1233

1234
        Return:
1235
            dict of the users' storage usernames to their user_projects and the respective storage access.
1236
        """
1237
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1238
        for username, project_auth_id in to_add:
1✔
1239
            project = self._projects[project_auth_id]
1✔
1240
            for sa in project.storage_access:
1✔
1241
                access = list(user_project[username][project_auth_id])
1✔
1242
                if not hasattr(self, "storage_manager"):
1✔
1243
                    self.logger.error(
×
1244
                        (
1245
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1246
                            "configured storage accesses at all. See configuration. "
1247
                            "Continuing anyway..."
1248
                        ).format(username, access, project_auth_id, sa.provider.name)
1249
                    )
1250
                    continue
×
1251

1252
                self.logger.info(
1✔
1253
                    "grant {} access {} to {} in {}".format(
1254
                        username, access, project_auth_id, sa.provider.name
1255
                    )
1256
                )
1257
                storage_username = self.storage_manager.grant_access(
1✔
1258
                    provider=sa.provider.name,
1259
                    username=username,
1260
                    project=project,
1261
                    access=access,
1262
                    session=sess,
1263
                    google_bulk_mapping=google_bulk_mapping,
1264
                    expires=expires,
1265
                )
1266

1267
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1268
        return storage_user_to_sa_and_user_project
1✔
1269

1270
    def _init_projects(self, user_project, sess):
1✔
1271
        """
1272
        initialize projects
1273
        """
1274

1275
        if self.project_mapping:
1✔
1276
            for projects in list(self.project_mapping.values()):
1✔
1277
                for p in projects:
1✔
1278
                    self.logger.debug(
1✔
1279
                        "creating Project with info from project_mapping: {}".format(p)
1280
                    )
1281
                    project = self._get_or_create(sess, Project, **p)
1✔
1282
                    self._projects[p["auth_id"]] = project
1✔
1283
        for _, projects in user_project.items():
1✔
1284
            for auth_id in list(projects.keys()):
1✔
1285
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1286
                if not project:
1✔
1287
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1288
                    try:
1✔
1289
                        project = self._get_or_create(sess, Project, **data)
1✔
1290
                    except IntegrityError as e:
×
1291
                        sess.rollback()
×
1292
                        self.logger.error(
×
1293
                            f"Project {auth_id} already exists. Detail {str(e)}"
1294
                        )
1295
                        raise Exception(
×
1296
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1297
                                auth_id, str(e)
1298
                            )
1299
                        )
1300
                if auth_id not in self._projects:
1✔
1301
                    self._projects[auth_id] = project
1✔
1302

1303
    @staticmethod
1✔
1304
    def _get_or_create(sess, model, **kwargs):
1✔
1305
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1306
        if not instance:
1✔
1307
            instance = model(**kwargs)
1✔
1308
            sess.add(instance)
1✔
1309
        return instance
1✔
1310

1311
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1312
        """
1313
        Args:
1314
            dbgap_config : a dictionary containing information about a single
1315
                           dbgap sftp server (from fence config)
1316
            sess: database session
1317

1318
        Return:
1319
            user_projects (dict)
1320
            user_info (dict)
1321
        """
1322
        dbgap_file_list = []
1✔
1323
        hostname = dbgap_config["info"]["host"]
1✔
1324
        username = dbgap_config["info"]["username"]
1✔
1325
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1326
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1327

1328
        try:
1✔
1329
            if os.path.exists(folderdir):
1✔
1330
                dbgap_file_list = glob.glob(
×
1331
                    os.path.join(folderdir, "*")
1332
                )  # get lists of file from folder
1333
            else:
1334
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1335
                dbgap_file_list = self._download(dbgap_config)
1✔
1336
        except Exception as e:
1✔
1337
            self.logger.error(e)
1✔
1338
            exit(1)
1✔
1339
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1340
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1341
            dbgap_file_list,
1342
            encrypted=encrypted,
1343
            session=sess,
1344
            dbgap_config=dbgap_config,
1345
        )
1346

1347
        user_projects = self.parse_projects(user_projects)
×
1348
        return user_projects, user_info
×
1349

1350
    def _get_user_permissions_from_csv_list(
1✔
1351
        self, file_list, encrypted, session, dbgap_config={}
1352
    ):
1353
        """
1354
        Args:
1355
            file_list: list of files (represented as strings)
1356
            encrypted: boolean indicating whether those files are encrypted
1357
            session: sqlalchemy session
1358
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1359
                    (comes from fence config)
1360

1361
        Return:
1362
            user_projects (dict)
1363
            user_info (dict)
1364
        """
1365
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1366
        user_projects, user_info = self._parse_csv(
1✔
1367
            dict(list(zip(file_list, permissions))),
1368
            sess=session,
1369
            dbgap_config=dbgap_config,
1370
            encrypted=encrypted,
1371
        )
1372
        return user_projects, user_info
1✔
1373

1374
    def _merge_multiple_local_csv_files(
1✔
1375
        self, dbgap_file_list, encrypted, dbgap_configs, session
1376
    ):
1377
        """
1378
        Args:
1379
            dbgap_file_list (list): a list of whitelist file locations stored locally
1380
            encrypted (bool): whether the file is encrypted (comes from fence config)
1381
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1382
            session (sqlalchemy.Session): database session
1383

1384
        Return:
1385
            merged_user_projects (dict)
1386
            merged_user_info (dict)
1387
        """
1388
        merged_user_projects = {}
1✔
1389
        merged_user_info = {}
1✔
1390

1391
        for dbgap_config in dbgap_configs:
1✔
1392
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1393
                dbgap_file_list,
1394
                encrypted,
1395
                session=session,
1396
                dbgap_config=dbgap_config,
1397
            )
1398
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1399
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1400
        return merged_user_projects, merged_user_info
1✔
1401

1402
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1403
        """
1404
        Args:
1405
            dbgap_servers : a list of dictionaries each containging config on
1406
                           dbgap sftp server (comes from fence config)
1407
            sess: database session
1408

1409
        Return:
1410
            merged_user_projects (dict)
1411
            merged_user_info (dict)
1412
        """
1413
        merged_user_projects = {}
1✔
1414
        merged_user_info = {}
1✔
1415
        for dbgap in dbgap_servers:
1✔
1416
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1417
            # merge into merged_user_info
1418
            # user_info overrides original info in merged_user_info
1419
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1420

1421
            # merge all access info dicts into "merged_user_projects".
1422
            # the access info is combined - if the user_projects access is
1423
            # ["read"] and the merged_user_projects is ["read-storage"], the
1424
            # resulting access is ["read", "read-storage"].
1425
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1426
        return merged_user_projects, merged_user_info
1✔
1427

1428
    def parse_projects(self, user_projects):
1✔
1429
        """
1430
        helper function for parsing projects
1431
        """
1432
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1433

1434
    def _process_dbgap_project(
1✔
1435
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1436
    ):
1437
        if dbgap_project not in self.project_mapping:
1✔
1438
            self._add_dbgap_project_for_user(
1✔
1439
                dbgap_project,
1440
                privileges,
1441
                username,
1442
                sess,
1443
                user_projects,
1444
                dbgap_config,
1445
            )
1446

1447
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1448
            try:
1✔
1449
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1450

1451
                # need to add dbgap project to arborist
1452
                if self.arborist_client:
1✔
1453
                    self._determine_arborist_resource(
1✔
1454
                        element_dict["auth_id"], dbgap_config
1455
                    )
1456

1457
                if username not in user_projects:
1✔
1458
                    user_projects[username] = {}
1✔
1459
                user_projects[username].update(phsid_privileges)
1✔
1460

1461
            except ValueError as e:
×
1462
                self.logger.info(e)
×
1463

1464
    def _process_user_projects(
1✔
1465
        self,
1466
        user_projects,
1467
        enable_common_exchange_area_access,
1468
        study_common_exchange_areas,
1469
        dbgap_config,
1470
        sess,
1471
    ):
1472
        user_projects_to_modify = copy.deepcopy(user_projects)
1✔
1473
        for username in user_projects.keys():
1✔
1474
            for project in user_projects[username].keys():
1✔
1475
                phsid = project.split(".")
1✔
1476
                dbgap_project = phsid[0]
1✔
1477
                privileges = user_projects[username][project]
1✔
1478
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1479
                    consent_code = phsid[-1]
1✔
1480

1481
                    # c999 indicates full access to all consents and access
1482
                    # to a study-specific exchange area
1483
                    # access to at least one study-specific exchange area implies access
1484
                    # to the parent study's common exchange area
1485
                    #
1486
                    # NOTE: Handling giving access to all consents is done at
1487
                    #       a later time, when we have full information about possible
1488
                    #       consents
1489
                    self.logger.debug(
1✔
1490
                        f"got consent code {consent_code} from dbGaP project "
1491
                        f"{dbgap_project}"
1492
                    )
1493
                    if (
1✔
1494
                        consent_code == "c999"
1495
                        and enable_common_exchange_area_access
1496
                        and dbgap_project in study_common_exchange_areas
1497
                    ):
1498
                        self.logger.info(
1✔
1499
                            "found study with consent c999 and Fence "
1500
                            "is configured to parse exchange area data. Giving user "
1501
                            f"{username} {privileges} privileges in project: "
1502
                            f"{study_common_exchange_areas[dbgap_project]}."
1503
                        )
1504
                        self._add_dbgap_project_for_user(
1✔
1505
                            study_common_exchange_areas[dbgap_project],
1506
                            privileges,
1507
                            username,
1508
                            sess,
1509
                            user_projects_to_modify,
1510
                            dbgap_config,
1511
                        )
1512

1513
                    dbgap_project += "." + consent_code
1✔
1514

1515
                self._process_dbgap_project(
1✔
1516
                    dbgap_project,
1517
                    privileges,
1518
                    username,
1519
                    sess,
1520
                    user_projects_to_modify,
1521
                    dbgap_config,
1522
                )
1523
        for user in user_projects_to_modify.keys():
1✔
1524
            user_projects[user] = user_projects_to_modify[user]
1✔
1525

1526
    def sync(self):
1✔
1527
        if self.session:
1✔
1528
            self._sync(self.session)
1✔
1529
        else:
1530
            with self.driver.session as s:
×
1531
                self._sync(s)
×
1532

1533
    def download(self):
1✔
1534
        for dbgap_server in self.dbGaP:
×
1535
            self._download(dbgap_server)
×
1536

1537
    def _download(self, dbgap_config):
1✔
1538
        """
1539
        Download files from dbgap server.
1540
        """
1541
        server = dbgap_config["info"]
1✔
1542
        protocol = dbgap_config["protocol"]
1✔
1543
        hostname = server["host"]
1✔
1544
        username = server["username"]
1✔
1545
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1546

1547
        if not os.path.exists(folderdir):
1✔
1548
            os.makedirs(folderdir)
1✔
1549

1550
        self.logger.info("Download from server")
1✔
1551
        try:
1✔
1552
            if protocol == "sftp":
1✔
1553
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1554
            else:
1555
                self._get_from_ftp_with_proxy(server, folderdir)
×
1556
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1557
            return dbgap_files
×
1558
        except Exception as e:
1✔
1559
            self.logger.error(e)
1✔
1560
            raise
1✔
1561

1562
    def _sync(self, sess):
1✔
1563
        """
1564
        Collect files from dbgap server(s), sync csv and yaml files to storage
1565
        backend and fence DB
1566
        """
1567

1568
        # get all dbgap files
1569
        user_projects = {}
1✔
1570
        user_info = {}
1✔
1571
        if self.is_sync_from_dbgap_server:
1✔
1572
            self.logger.debug(
1✔
1573
                "Pulling telemetry files from {} dbgap sftp servers".format(
1574
                    len(self.dbGaP)
1575
                )
1576
            )
1577
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1578

1579
        local_csv_file_list = []
1✔
1580
        if self.sync_from_local_csv_dir:
1✔
1581
            local_csv_file_list = glob.glob(
1✔
1582
                os.path.join(self.sync_from_local_csv_dir, "*")
1583
            )
1584
            # Sort the list so the order of of files is consistent across platforms
1585
            local_csv_file_list.sort()
1✔
1586

1587
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1588
            local_csv_file_list,
1589
            encrypted=False,
1590
            session=sess,
1591
            dbgap_configs=self.dbGaP,
1592
        )
1593

1594
        try:
1✔
1595
            user_yaml = UserYAML.from_file(
1✔
1596
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1597
            )
1598
        except (EnvironmentError, AssertionError) as e:
1✔
1599
            self.logger.error(str(e))
1✔
1600
            self.logger.error("aborting early")
1✔
1601
            raise
1✔
1602

1603
        # parse all projects
1604
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1605
        user_projects = self.parse_projects(user_projects)
1✔
1606
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1607

1608
        # merge all user info dicts into "user_info".
1609
        # the user info (such as email) in the user.yaml files
1610
        # overrides the user info from the CSV files.
1611
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1612
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1613

1614
        # merge all access info dicts into "user_projects".
1615
        # the access info is combined - if the user.yaml access is
1616
        # ["read"] and the CSV file access is ["read-storage"], the
1617
        # resulting access is ["read", "read-storage"].
1618
        self.sync_two_phsids_dict(
1✔
1619
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1620
        )
1621
        self.sync_two_phsids_dict(
1✔
1622
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1623
        )
1624

1625
        # Note: if there are multiple dbgap sftp servers configured
1626
        # this parameter is always from the config for the first dbgap sftp server
1627
        # not any additional ones
1628
        for dbgap_config in self.dbGaP:
1✔
1629
            if self._get_parse_consent_code(dbgap_config):
1✔
1630
                self._grant_all_consents_to_c999_users(
1✔
1631
                    user_projects, user_yaml.project_to_resource
1632
                )
1633

1634
        google_update_ex = None
1✔
1635

1636
        try:
1✔
1637
            # update the Fence DB
1638
            if user_projects:
1✔
1639
                self.logger.info("Sync to db and storage backend")
1✔
1640
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1641
                self.logger.info("Finish syncing to db and storage backend")
1✔
1642
            else:
1643
                self.logger.info("No users for syncing")
×
1644
        except GoogleUpdateException as ex:
1✔
1645
            # save this to reraise later after all non-Google syncing has finished
1646
            # this way, any issues with Google only affect Google data access and don't
1647
            # cascade problems into non-Google AWS or Azure access
1648
            google_update_ex = ex
1✔
1649

1650
        # update the Arborist DB (resources, roles, policies, groups)
1651
        if user_yaml.authz:
1✔
1652
            if not self.arborist_client:
1✔
1653
                raise EnvironmentError(
×
1654
                    "yaml file contains authz section but sync is not configured with"
1655
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1656
                )
1657
            self.logger.info("Synchronizing arborist...")
1✔
1658
            success = self._update_arborist(sess, user_yaml)
1✔
1659
            if success:
1✔
1660
                self.logger.info("Finished synchronizing arborist")
1✔
1661
            else:
1662
                self.logger.error("Could not synchronize successfully")
×
1663
                exit(1)
×
1664
        else:
1665
            self.logger.info("No `authz` section; skipping arborist sync")
×
1666

1667
        # update the Arborist DB (user access)
1668
        if self.arborist_client:
1✔
1669
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1670
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1671
            if success:
1✔
1672
                self.logger.info(
1✔
1673
                    "Finished synchronizing authorization info to arborist"
1674
                )
1675
            else:
1676
                self.logger.error(
×
1677
                    "Could not synchronize authorization info successfully to arborist"
1678
                )
1679
                exit(1)
×
1680
        else:
1681
            self.logger.error("No arborist client set; skipping arborist sync")
×
1682

1683
        # Logging authz source
1684
        for u, s in self.auth_source.items():
1✔
1685
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1686

1687
        self.logger.info(
1✔
1688
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1689
        )
1690
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1691
        if google_update_ex is not None:
1✔
1692
            raise google_update_ex
1✔
1693

1694
    def _grant_all_consents_to_c999_users(
1✔
1695
        self, user_projects, user_yaml_project_to_resources
1696
    ):
1697
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1698
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1699
        # are in either)
1700
        all_projects = set(
1✔
1701
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1702
        )
1703

1704
        self.logger.debug(f"all projects: {all_projects}")
1✔
1705

1706
        # construct a mapping from phsid (without consent) to all accessions with consent
1707
        consent_mapping = {}
1✔
1708
        for project in all_projects:
1✔
1709
            phs_match = access_number_matcher.match(project)
1✔
1710
            if phs_match:
1✔
1711
                accession_number = phs_match.groupdict()
1✔
1712

1713
                # TODO: This is not handling the .v1.p1 at all
1714
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1715
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1716
                )
1717
                children = self._get_children(accession_number["phsid"])
1✔
1718
                if children:
1✔
1719
                    for child_phs in children:
1✔
1720
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1721
                            ".".join(
1722
                                [child_phs, accession_number["consent"]]
1723
                            )  # Assign parent consent to child study
1724
                        )
1725

1726
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1727

1728
        # go through existing access and find any c999's and make sure to give access to
1729
        # all accessions with consent for that phsid
1730
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1731
            for project, _ in user_project_info.items():
1✔
1732
                phs_match = access_number_matcher.match(project)
1✔
1733
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1734
                    # give access to all consents
1735
                    all_phsids_with_consent = consent_mapping.get(
1✔
1736
                        phs_match.groupdict()["phsid"], []
1737
                    )
1738
                    self.logger.info(
1✔
1739
                        f"user {username} has c999 consent group for: {project}. "
1740
                        f"Granting access to all consents: {all_phsids_with_consent}"
1741
                    )
1742
                    # NOTE: Only giving read-storage at the moment (this is same
1743
                    #       permission we give for other dbgap projects)
1744
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1745
                        user_projects[username].update(
1✔
1746
                            {phsid_with_consent: {"read-storage", "read"}}
1747
                        )
1748

1749
    def _update_arborist(self, session, user_yaml):
1✔
1750
        """
1751
        Create roles, resources, policies, groups in arborist from the information in
1752
        ``user_yaml``.
1753

1754
        The projects are sent to arborist as resources with paths like
1755
        ``/projects/{project}``. Roles are created with just the original names
1756
        for the privileges like ``"read-storage", "read"`` etc.
1757

1758
        Args:
1759
            session (sqlalchemy.Session)
1760
            user_yaml (UserYAML)
1761

1762
        Return:
1763
            bool: success
1764
        """
1765
        healthy = self._is_arborist_healthy()
1✔
1766
        if not healthy:
1✔
1767
            return False
×
1768

1769
        # Set up the resource tree in arborist by combining provided resources with any
1770
        # dbgap resources that were created before this.
1771
        #
1772
        # Why add dbgap resources if they've already been created?
1773
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1774
        #   resources was created under `/programs/phs000178` anything provided in
1775
        #   user.yaml under `/programs` would completely wipe it out.
1776
        resources = user_yaml.authz.get("resources", [])
1✔
1777

1778
        dbgap_resource_paths = []
1✔
1779
        for path_list in self._dbgap_study_to_resources.values():
1✔
1780
            dbgap_resource_paths.extend(path_list)
1✔
1781

1782
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1783
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1784

1785
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1786
            resources, dbgap_resource_paths
1787
        )
1788

1789
        for resource in combined_resources:
1✔
1790
            try:
1✔
1791
                self.logger.debug(
1✔
1792
                    "attempting to update arborist resource: {}".format(resource)
1793
                )
1794
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1795
            except ArboristError as e:
×
1796
                self.logger.error(e)
×
1797
                # keep going; maybe just some conflicts from things existing already
1798

1799
        # update roles
1800
        roles = user_yaml.authz.get("roles", [])
1✔
1801
        for role in roles:
1✔
1802
            try:
1✔
1803
                response = self.arborist_client.update_role(role["id"], role)
1✔
1804
                if response:
1✔
1805
                    self._created_roles.add(role["id"])
1✔
1806
            except ArboristError as e:
×
1807
                self.logger.info(
×
1808
                    "couldn't update role '{}', creating instead".format(str(e))
1809
                )
1810
                try:
×
1811
                    response = self.arborist_client.create_role(role)
×
1812
                    if response:
×
1813
                        self._created_roles.add(role["id"])
×
1814
                except ArboristError as e:
×
1815
                    self.logger.error(e)
×
1816
                    # keep going; maybe just some conflicts from things existing already
1817

1818
        # update policies
1819
        policies = user_yaml.authz.get("policies", [])
1✔
1820
        for policy in policies:
1✔
1821
            policy_id = policy.pop("id")
1✔
1822
            try:
1✔
1823
                self.logger.debug(
1✔
1824
                    "Trying to upsert policy with id {}".format(policy_id)
1825
                )
1826
                response = self.arborist_client.update_policy(
1✔
1827
                    policy_id, policy, create_if_not_exist=True
1828
                )
1829
            except ArboristError as e:
×
1830
                self.logger.error(e)
×
1831
                # keep going; maybe just some conflicts from things existing already
1832
            else:
1833
                if response:
1✔
1834
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1835
                    self._created_policies.add(policy_id)
1✔
1836

1837
        # update groups
1838
        groups = user_yaml.authz.get("groups", [])
1✔
1839

1840
        # delete from arborist the groups that have been deleted
1841
        # from the user.yaml
1842
        arborist_groups = set(
1✔
1843
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1844
        )
1845
        useryaml_groups = set(g["name"] for g in groups)
1✔
1846
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1847
            # do not try to delete built in groups
1848
            if deleted_group not in ["anonymous", "logged-in"]:
×
1849
                self.arborist_client.delete_group(deleted_group)
×
1850

1851
        # create/update the groups defined in the user.yaml
1852
        for group in groups:
1✔
1853
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1854
            if missing:
×
1855
                name = group.get("name", "{MISSING NAME}")
×
1856
                self.logger.error(
×
1857
                    "group {} missing required field(s): {}".format(name, list(missing))
1858
                )
1859
                continue
×
1860
            try:
×
1861
                response = self.arborist_client.put_group(
×
1862
                    group["name"],
1863
                    # Arborist doesn't handle group descriptions yet
1864
                    # description=group.get("description", ""),
1865
                    users=group["users"],
1866
                    policies=group["policies"],
1867
                )
1868
            except ArboristError as e:
×
1869
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1870

1871
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1872

1873
        # First recreate these groups in order to clear out old, possibly deleted policies
1874
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1875
            try:
1✔
1876
                response = self.arborist_client.put_group(builtin_group)
1✔
1877
            except ArboristError as e:
×
1878
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1879

1880
        # Now add back policies that are in the user.yaml
1881
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1882
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1883

1884
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1885
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1886

1887
        return True
1✔
1888

1889
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1890
        """
1891
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1892
        mfa_policy after revoking all policies.
1893
        """
1894

1895
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1896
            idp, {}
1897
        )
1898

1899
        if not is_mfa_enabled:
1✔
1900
            # TODO This should be a diff, not a revocation of all policies.
1901
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1902
            return
1✔
1903

1904
        policies = []
1✔
1905
        try:
1✔
1906
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1907
            policies = user_data_from_arborist["policies"]
1✔
1908
        except Exception as e:
×
1909
            self.logger.error(
×
1910
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1911
            )
1912
        finally:
1913
            # TODO This should be a diff, not a revocation of all policies.
1914
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1915

1916
        if "mfa_policy" in policies:
1✔
1917
            self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1918

1919
    def _grant_arborist_policies(
1✔
1920
        self, username, incoming_policies, user_yaml, expires=None
1921
    ):
1922
        """
1923
        Find the difference between the existing policies for a user and the incoming policies,
1924
        and decide whether to add, remove, or keep policies.
1925

1926
        Args:
1927
            username (str): the username of the user
1928
            incoming_policies (set): set of policies to be applied to the user
1929
            user_yaml (UserYAML): UserYAML object containing authz information
1930
            expires (int): time at which authz info in Arborist should expire
1931

1932
        Return:
1933
            bool: True if policies were successfully updated, False otherwise
1934
        """
1935
        user_existing_policies = set()
1✔
1936
        to_keep = set()
1✔
1937
        to_add = set()
1✔
1938
        to_remove = set()
1✔
1939
        is_revoke_all = False
1✔
1940

1941
        try:
1✔
1942
            user_existing_policies = set(
1✔
1943
                policy["policy"]
1944
                for policy in self.arborist_client.get_user(username)["policies"]
1945
            )
1946
            self.logger.info(
1✔
1947
                f"Fetched user {username} existing policies: {user_existing_policies}"
1948
            )
1949
        except ArboristError as e:
1✔
1950
            self.logger.error(
1✔
1951
                f"Could not get user {username} policies from Arborist: {e} Revoking all policies..."
1952
            )
1953
            # if getting existing policies fails, revoke all policies and re-apply
1954
            is_revoke_all = True
1✔
1955

1956
        if is_revoke_all is False and len(incoming_policies) > 0:
1✔
1957
            to_keep = incoming_policies & user_existing_policies
1✔
1958
            to_add = incoming_policies - user_existing_policies
1✔
1959
            to_remove = user_existing_policies - incoming_policies
1✔
1960

1961
            if user_yaml:
1✔
1962
                anonymous_policies = set()
1✔
1963
                for policy in to_remove:
1✔
1964
                    if policy in user_yaml.authz.get(
×
1965
                        "anonymous_policies", []
1966
                    ) or policy in user_yaml.authz.get("all_users_policies", []):
1967
                        self.logger.warning(
×
1968
                            f"Policy {policy} is an anonymous policy, not revoking it for user {username}."
1969
                        )
1970
                        anonymous_policies.add(policy)
×
1971
                to_remove -= anonymous_policies
1✔
1972
        else:
1973
            # if incoming_policies is empty, we revoke all policies
1974
            is_revoke_all = True
1✔
1975

1976
        if not is_revoke_all:
1✔
1977
            try:
1✔
1978
                if to_remove:
1✔
1979
                    for policy in to_remove:
1✔
1980
                        self.logger.info(
1✔
1981
                            f"Revoking policy {policy} for user {username}."
1982
                        )
1983
                        self.arborist_client.revoke_user_policy(username, policy)
1✔
1984
            except ArboristError as e:
×
1985
                self.logger.error(
×
1986
                    f"Could not revoke user {username} policy {policy}. Revoking all instead: {e}"
1987
                )
1988
                is_revoke_all = True
×
1989

1990
        if is_revoke_all:
1✔
1991
            try:
1✔
1992
                self.logger.info(f"Revoking all policies for user {username}.")
1✔
1993
                self.arborist_client.revoke_all_policies_for_user(username)
1✔
1994
            except ArboristError as e:
×
1995
                self.logger.error(
×
1996
                    f"Could not revoke all policies for user {username}. Error: {e}"
1997
                )
1998
                return False
×
1999
            to_add = incoming_policies  # if we revoke all, we need to add all incoming policies
1✔
2000

2001
        if (
1✔
2002
            "mfa_policy" not in incoming_policies
2003
            and "mfa_policy" in user_existing_policies
2004
        ):
2005
            to_add.add("mfa_policy")
×
2006

2007
        if to_add:
1✔
2008
            try:
1✔
2009
                self.logger.info(f"Bulk granting user {username} policies {to_add}.")
1✔
2010
                response_json = self.arborist_client.grant_bulk_user_policy(
1✔
2011
                    username, list(to_add), expires
2012
                )
2013
            except ArboristError as e:
×
2014
                self.logger.error(
×
2015
                    f"Could not grant user {username} policies {to_add}. Error: {e}"
2016
                )
2017
                return False
×
2018

2019
        return True
1✔
2020

2021
    def _update_authz_in_arborist(
1✔
2022
        self,
2023
        session,
2024
        user_projects,
2025
        user_yaml=None,
2026
        single_user_sync=False,
2027
        expires=None,
2028
    ):
2029
        """
2030
        Assign users policies in arborist from the information in
2031
        ``user_projects`` and optionally a ``user_yaml``.
2032

2033
        The projects are sent to arborist as resources with paths like
2034
        ``/projects/{project}``. Roles are created with just the original names
2035
        for the privileges like ``"read-storage", "read"`` etc.
2036

2037
        Args:
2038
            user_projects (dict)
2039
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
2040
            single_user_sync (bool) whether authz update is for a single user
2041
            expires (int) time at which authz info in Arborist should expire
2042

2043
        Return:
2044
            bool: success
2045
        """
2046
        healthy = self._is_arborist_healthy()
1✔
2047
        if not healthy:
1✔
2048
            return False
×
2049

2050
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
2051

2052
        if user_yaml:
1✔
2053
            self.logger.debug(
1✔
2054
                "useryaml abac before lowering usernames: {}".format(
2055
                    user_yaml.user_abac
2056
                )
2057
            )
2058
            user_yaml.user_abac = {
1✔
2059
                key.lower(): value for key, value in user_yaml.user_abac.items()
2060
            }
2061
            # update the project info with `projects` specified in user.yaml
2062
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
2063

2064
        # get list of users from arborist to make sure users that are completely removed
2065
        # from authorization sources get policies revoked
2066

2067
        arborist_user_projects = {}
1✔
2068
        if not single_user_sync:
1✔
2069

2070
            try:
1✔
2071
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
2072

2073
                # construct user information, NOTE the lowering of the username. when adding/
2074
                # removing access, the case in the Fence db is used. For combining access, it is
2075
                # case-insensitive, so we lower
2076
                arborist_user_projects = {
1✔
2077
                    user["name"].lower(): {} for user in arborist_users
2078
                }
2079
            except (ArboristError, KeyError, AttributeError) as error:
×
2080
                # TODO usersync should probably exit with non-zero exit code at the end,
2081
                #      but sync should continue from this point so there are no partial
2082
                #      updates
2083
                self.logger.warning(
×
2084
                    "Could not get list of users in Arborist, continuing anyway. "
2085
                    "WARNING: this sync will NOT remove access for users no longer in "
2086
                    f"authorization sources. Error: {error}"
2087
                )
2088

2089
            # update the project info with users from arborist
2090
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
2091

2092
        # prefer in-memory if available from user_yaml, if not, get from database
2093
        if user_yaml and user_yaml.project_to_resource:
1✔
2094
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
2095
            self.logger.debug(
1✔
2096
                f"using in-memory project to authz resource mapping from "
2097
                f"user.yaml (instead of database): {project_to_authz_mapping}"
2098
            )
2099
        else:
2100
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
2101
            self.logger.debug(
1✔
2102
                f"using persisted project to authz resource mapping from database "
2103
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2104
            )
2105

2106
        self.logger.debug(
1✔
2107
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2108
        )
2109
        all_resources = [
1✔
2110
            r
2111
            for resources in self._dbgap_study_to_resources.values()
2112
            for r in resources
2113
        ]
2114
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2115
        self._create_arborist_resources(all_resources)
1✔
2116

2117
        for username, user_project_info in user_projects.items():
1✔
2118
            self.logger.info("processing user `{}`".format(username))
1✔
2119
            user = query_for_user(session=session, username=username)
1✔
2120
            idp = None
1✔
2121
            if user:
1✔
2122
                username = user.username
1✔
2123
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2124

2125
            self.arborist_client.create_user_if_not_exist(username)
1✔
2126

2127
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2128
            # previously mapped each project to the same set of privileges
2129
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2130
            # single policy with ('read', 'read-storage') being the single
2131
            # key
2132
            unique_policies = self._determine_unique_policies(
1✔
2133
                user_project_info, project_to_authz_mapping
2134
            )
2135
            for roles in unique_policies.keys():
1✔
2136
                for role in roles:
1✔
2137
                    self._create_arborist_role(role)
1✔
2138

2139
            incoming_policies = set()  # set of policies for current user.
1✔
2140

2141
            if single_user_sync:
1✔
2142
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2143
                    policy_hash = self._hash_policy_contents(
1✔
2144
                        ordered_roles, ordered_resources
2145
                    )
2146
                    self._create_arborist_policy(
1✔
2147
                        policy_hash,
2148
                        ordered_roles,
2149
                        ordered_resources,
2150
                        skip_if_exists=True,
2151
                    )
2152
                    # return here as it is not expected single_user_sync
2153
                    # will need any of the remaining user_yaml operations
2154
                    # left in _update_authz_in_arborist
2155
                    return self._grant_arborist_policy(
1✔
2156
                        username, policy_hash, expires=expires
2157
                    )
2158
            else:
2159
                policy_ids_to_grant = set()
1✔
2160
                for roles, resources in unique_policies.items():
1✔
2161
                    for role in roles:
1✔
2162
                        for resource in resources:
1✔
2163
                            # grant a policy to this user which is a single
2164
                            # role on a single resource
2165

2166
                            # format project '/x/y/z' -> 'x.y.z'
2167
                            # so the policy id will be something like 'x.y.z-create'
2168
                            policy_id = _format_policy_id(resource, role)
1✔
2169
                            incoming_policies.add(policy_id)
1✔
2170
                            if policy_id not in self._created_policies:
1✔
2171
                                try:
1✔
2172
                                    self.arborist_client.update_policy(
1✔
2173
                                        policy_id,
2174
                                        {
2175
                                            "description": "policy created by fence sync",
2176
                                            "role_ids": [role],
2177
                                            "resource_paths": [resource],
2178
                                        },
2179
                                        create_if_not_exist=True,
2180
                                    )
2181
                                except ArboristError as e:
×
2182
                                    self.logger.info(
×
2183
                                        "not creating policy in arborist; {}".format(
2184
                                            str(e)
2185
                                        )
2186
                                    )
2187
                                self._created_policies.add(policy_id)
1✔
2188
                            policy_ids_to_grant.add(policy_id)
1✔
2189
                self._grant_bulk_user_policies(
1✔
2190
                    username, policy_ids_to_grant, expires=expires
2191
                )
2192

2193
            if user_yaml:
1✔
2194
                user_yaml_policies = set(user_yaml.policies.get(username, []))
1✔
2195
                incoming_policies = (
1✔
2196
                    incoming_policies | user_yaml_policies
2197
                )  # add policies from whitelist and useryaml
2198

2199
            self._grant_arborist_policies(
1✔
2200
                username, incoming_policies, user_yaml, expires=expires
2201
            )
2202

2203
        if user_yaml:
1✔
2204
            for client_name, client_details in user_yaml.clients.items():
1✔
2205
                client_policies = client_details.get("policies", [])
×
2206
                clients = session.query(Client).filter_by(name=client_name).all()
×
2207
                # update existing clients, do not create new ones
2208
                if not clients:
×
2209
                    self.logger.warning(
×
2210
                        "client to update (`{}`) does not exist in fence: skipping".format(
2211
                            client_name
2212
                        )
2213
                    )
2214
                    continue
×
2215
                self.logger.debug(
×
2216
                    "updating client `{}` (found {} client IDs)".format(
2217
                        client_name, len(clients)
2218
                    )
2219
                )
2220
                # there may be more than 1 client with this name if credentials are being rotated,
2221
                # so we grant access to each client ID
2222
                for client in clients:
×
2223
                    try:
×
2224
                        self.arborist_client.update_client(
×
2225
                            client.client_id, client_policies
2226
                        )
2227
                    except ArboristError as e:
×
2228
                        self.logger.info(
×
2229
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2230
                                client_policies, client_name, client.client_id, str(e)
2231
                            )
2232
                        )
2233

2234
        return True
1✔
2235

2236
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2237
        """
2238
        Determine and return a dictionary of unique policies.
2239

2240
        Args (examples):
2241
            user_project_info (dict):
2242
            {
2243
                'phs000002.c1': { 'read-storage', 'read' },
2244
                'phs000001.c1': { 'read', 'read-storage' },
2245
                'phs000004.c1': { 'write', 'read' },
2246
                'phs000003.c1': { 'read', 'write' },
2247
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2248
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2249
            }
2250
            project_to_authz_mapping (dict):
2251
            {
2252
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2253
            }
2254

2255
        Return (for examples):
2256
            dict:
2257
            {
2258
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2259
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2260
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2261
            }
2262
        """
2263
        roles_to_resources = collections.defaultdict(list)
1✔
2264
        for study, roles in user_project_info.items():
1✔
2265
            ordered_roles = tuple(sorted(roles))
1✔
2266
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2267
            if study in project_to_authz_mapping:
1✔
2268
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2269
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2270

2271
        policies = {}
1✔
2272
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2273
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2274
        return policies
1✔
2275

2276
    def _create_arborist_role(self, role):
1✔
2277
        """
2278
        Wrapper around gen3authz's create_role with additional logging
2279

2280
        Args:
2281
            role (str): what the Arborist identity should be of the created role
2282

2283
        Return:
2284
            bool: True if the role was created successfully or it already
2285
                  exists. False otherwise
2286
        """
2287
        if role in self._created_roles:
1✔
2288
            return True
1✔
2289
        try:
1✔
2290
            response_json = self.arborist_client.create_role(
1✔
2291
                arborist_role_for_permission(role)
2292
            )
2293
        except ArboristError as e:
×
2294
            self.logger.error(
×
2295
                "could not create `{}` role in Arborist: {}".format(role, e)
2296
            )
2297
            return False
×
2298
        self._created_roles.add(role)
1✔
2299

2300
        if response_json is None:
1✔
2301
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2302
        else:
2303
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2304
        return True
1✔
2305

2306
    def _create_arborist_resources(self, resources):
1✔
2307
        """
2308
        Create resources in Arborist
2309

2310
        Args:
2311
            resources (list): a list of full Arborist resource paths to create
2312
            [
2313
                "/programs/DEV/projects/phs000001.c1",
2314
                "/programs/DEV/projects/phs000002.c1",
2315
                "/programs/DEV/projects/phs000003.c1"
2316
            ]
2317

2318
        Return:
2319
            bool: True if the resources were successfully created, False otherwise
2320

2321

2322
        As of 2/11/2022, for resources above,
2323
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2324
        [
2325
            { 'name': 'programs', 'subresources': [
2326
                { 'name': 'DEV', 'subresources': [
2327
                    { 'name': 'projects', 'subresources': [
2328
                        { 'name': 'phs000001.c1', 'subresources': []},
2329
                        { 'name': 'phs000002.c1', 'subresources': []},
2330
                        { 'name': 'phs000003.c1', 'subresources': []}
2331
                    ]}
2332
                ]}
2333
            ]}
2334
        ]
2335
        Because this list has a single object, only a single network request gets
2336
        sent to Arborist.
2337

2338
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2339
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2340
        [
2341
            {'name': 'phs000001.c1', 'subresources': []},
2342
            {'name': 'phs000002.c1', 'subresources': []},
2343
            {'name': 'phs000003.c1', 'subresources': []}
2344
        ]
2345
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2346

2347
        As a practical matter, for sync_single_user_visas, studies
2348
        should be nested under the `/programs` resource as in the former
2349
        example (i.e. only one network request gets made).
2350

2351
        TODO for the sake of simplicity, it would be nice if only one network
2352
        request was made no matter the input.
2353
        """
2354
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2355
            try:
1✔
2356
                response_json = self.arborist_client.update_resource(
1✔
2357
                    "/", request_body, merge=True
2358
                )
2359
            except ArboristError as e:
×
2360
                self.logger.error(
×
2361
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2362
                        request_body, e
2363
                    )
2364
                )
2365
                return False
×
2366

2367
        self.logger.debug(
1✔
2368
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2369
        )
2370
        return True
1✔
2371

2372
    def _create_arborist_policy(
1✔
2373
        self, policy_id, roles, resources, skip_if_exists=False
2374
    ):
2375
        """
2376
        Wrapper around gen3authz's create_policy with additional logging
2377

2378
        Args:
2379
            policy_id (str): what the Arborist identity should be of the created policy
2380
            roles (iterable): what roles the create policy should have
2381
            resources (iterable): what resources the created policy should have
2382
            skip_if_exists (bool): if True, this function will not treat an already
2383
                                   existent policy as an error
2384

2385
        Return:
2386
            bool: True if policy creation was successful. False otherwise
2387
        """
2388
        try:
1✔
2389
            response_json = self.arborist_client.create_policy(
1✔
2390
                {
2391
                    "id": policy_id,
2392
                    "role_ids": roles,
2393
                    "resource_paths": resources,
2394
                },
2395
                skip_if_exists=skip_if_exists,
2396
            )
2397
        except ArboristError as e:
×
2398
            self.logger.error(
×
2399
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2400
            )
2401
            return False
×
2402

2403
        if response_json is None:
1✔
2404
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2405
        else:
2406
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2407
        return True
1✔
2408

2409
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2410
        """
2411
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2412

2413
        Args:
2414
            ordered_roles (iterable): policy roles in sorted order
2415
            ordered_resources (iterable): policy resources in sorted order
2416

2417
        Return:
2418
            str: SHA256 hex digest
2419
        """
2420

2421
        def escape(s):
1✔
2422
            return s.replace(",", "\,")
1✔
2423

2424
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2425
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2426
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2427
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2428

2429
        return policy_hash
1✔
2430

2431
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2432
        """
2433
        Wrapper around gen3authz's grant_user_policy with additional logging
2434

2435
        Args:
2436
            username (str): username of user in Arborist who policy should be
2437
                            granted to
2438
            policy_id (str): Arborist policy id
2439
            expires (int): POSIX timestamp for when policy should expire
2440

2441
        Return:
2442
            bool: True if granting of policy was successful, False otherwise
2443
        """
2444
        try:
1✔
2445
            response_json = self.arborist_client.grant_user_policy(
1✔
2446
                username,
2447
                policy_id,
2448
                expires_at=expires,
2449
            )
2450
        except ArboristError as e:
×
2451
            self.logger.error(
×
2452
                "could not grant policy `{}` to user `{}`: {}".format(
2453
                    policy_id, username, e
2454
                )
2455
            )
2456
            return False
×
2457

2458
        self.logger.debug(
1✔
2459
            "granted policy `{}` to user `{}`".format(policy_id, username)
2460
        )
2461
        return True
1✔
2462

2463
    def _grant_bulk_user_policies(self, username, policy_ids, expires=None):
1✔
2464
        """
2465
        Wrapper around gen3authz's grant_user_policies with additional logging
2466

2467
        Args:
2468
            username (str): username of user in Arborist who policy should be
2469
                            granted to
2470
            policy_ids (set[str]): Arborist policy ids
2471

2472
        Return:
2473
            bool: True if granting of policies was successful, False otherwise
2474
        """
2475
        try:
1✔
2476
            response_json = self.arborist_client.grant_bulk_user_policy(
1✔
2477
                username, policy_ids, expires
2478
            )
2479
        except (
×
2480
            Exception
2481
        ) as e:  # TODO Update gen3authz to throw ArboristError when httpx timeout exceptions raise.
2482
            self.logger.error(
×
2483
                "could not grant bulk policies  to user `{}`: {}".format(username, e)
2484
            )
2485
            return False
×
2486

2487
        return True
1✔
2488

2489
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2490
        """
2491
        Determine the arborist resource path and add it to
2492
        _self._dbgap_study_to_resources
2493

2494
        Args:
2495
            dbgap_study (str): study phs identifier
2496
            dbgap_config (dict): dictionary of config for dbgap server
2497

2498
        """
2499
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2500
            "_default", ["/"]
2501
        )
2502
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2503
            dbgap_study, default_namespaces
2504
        )
2505

2506
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2507

2508
        arborist_resource_namespaces = [
1✔
2509
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2510
        ]
2511

2512
        for resource_namespace in arborist_resource_namespaces:
1✔
2513
            full_resource_path = resource_namespace + dbgap_study
1✔
2514
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2515
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2516
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2517
        return arborist_resource_namespaces
1✔
2518

2519
    def _is_arborist_healthy(self):
1✔
2520
        if not self.arborist_client:
1✔
2521
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2522
            return False
×
2523
        if not self.arborist_client.healthy():
1✔
2524
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2525
            self.logger.error(
×
2526
                "arborist service is unavailable; skipping main arborist dbgap sync"
2527
            )
2528
            return False
×
2529
        return True
1✔
2530

2531
    def _pick_sync_type(self, visa):
1✔
2532
        """
2533
        Pick type of visa to parse according to the visa provider
2534
        """
2535
        sync_client = None
1✔
2536
        if visa.type in self.visa_types["ras"]:
1✔
2537
            sync_client = self.ras_sync_client
1✔
2538
        else:
2539
            raise Exception(
×
2540
                "Visa type {} not recognized. Configure in fence-config".format(
2541
                    visa.type
2542
                )
2543
            )
2544
        if not sync_client:
1✔
2545
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2546

2547
        return sync_client
1✔
2548

2549
    def sync_single_user_visas(
1✔
2550
        self, user, ga4gh_visas, sess=None, expires=None, skip_google_updates=False
2551
    ):
2552
        """
2553
        Sync a single user's visas during login or DRS/data access
2554

2555
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2556
                        BEFORE THIS.
2557

2558
        Args:
2559
            user (userdatamodel.user.User): Fence user whose visas'
2560
                                            authz info is being synced
2561
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2562
                                that are ALREADY VALIDATED
2563
            sess (sqlalchemy.orm.session.Session): database session
2564
            expires (int): time at which synced Arborist policies and
2565
                           inclusion in any GBAG are set to expire
2566
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
2567

2568
        Return:
2569
            list of successfully parsed visas
2570
        """
2571
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2572
        dbgap_config = self.dbGaP[0]
1✔
2573
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2574
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2575
            "enable_common_exchange_area_access", False
2576
        )
2577
        study_common_exchange_areas = dbgap_config.get(
1✔
2578
            "study_common_exchange_areas", {}
2579
        )
2580

2581
        try:
1✔
2582
            user_yaml = UserYAML.from_file(
1✔
2583
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2584
            )
2585
        except (EnvironmentError, AssertionError) as e:
×
2586
            self.logger.error(str(e))
×
2587
            self.logger.error("aborting early")
×
2588
            raise
×
2589

2590
        user_projects = dict()
1✔
2591
        projects = {}
1✔
2592
        info = {}
1✔
2593
        parsed_visas = []
1✔
2594

2595
        for visa in ga4gh_visas:
1✔
2596
            project = {}
1✔
2597
            visa_type = self._pick_sync_type(visa)
1✔
2598
            encoded_visa = visa.ga4gh_visa
1✔
2599

2600
            try:
1✔
2601
                project, info = visa_type._parse_single_visa(
1✔
2602
                    user,
2603
                    encoded_visa,
2604
                    visa.expires,
2605
                    parse_consent_code,
2606
                )
2607
            except Exception:
×
2608
                self.logger.warning(
×
2609
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2610
                )
2611
                continue
×
2612

2613
            projects = {**projects, **project}
1✔
2614
            parsed_visas.append(visa)
1✔
2615

2616
        info["user_id"] = user.id
1✔
2617
        info["username"] = user.username
1✔
2618
        user_projects[user.username] = projects
1✔
2619

2620
        user_projects = self.parse_projects(user_projects)
1✔
2621

2622
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2623
            self.logger.info(
1✔
2624
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2625
            )
2626

2627
        self._process_user_projects(
1✔
2628
            user_projects,
2629
            enable_common_exchange_area_access,
2630
            study_common_exchange_areas,
2631
            dbgap_config,
2632
            sess,
2633
        )
2634

2635
        if parse_consent_code:
1✔
2636
            self._grant_all_consents_to_c999_users(
1✔
2637
                user_projects, user_yaml.project_to_resource
2638
            )
2639

2640
        if user_projects:
1✔
2641
            self.sync_to_storage_backend(
1✔
2642
                user_projects,
2643
                info,
2644
                sess,
2645
                expires=expires,
2646
                skip_google_updates=skip_google_updates,
2647
            )
2648
        else:
2649
            self.logger.info("No users for syncing")
×
2650

2651
        # update arborist db (user access)
2652
        if self.arborist_client:
1✔
2653
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2654
            success = self._update_authz_in_arborist(
1✔
2655
                sess,
2656
                user_projects,
2657
                user_yaml=user_yaml,
2658
                single_user_sync=True,
2659
                expires=expires,
2660
            )
2661
            if success:
1✔
2662
                self.logger.info(
1✔
2663
                    "Finished synchronizing authorization info to arborist"
2664
                )
2665
            else:
2666
                self.logger.error(
×
2667
                    "Could not synchronize authorization info successfully to arborist"
2668
                )
2669
        else:
2670
            self.logger.error("No arborist client set; skipping arborist sync")
×
2671

2672
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc