• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 17446050098

03 Sep 2025 09:03PM UTC coverage: 74.889% (-0.008%) from 74.897%
17446050098

Pull #1238

github

web-flow
Merge branch 'master' into chore/fix_improper_certificate_validation_pps_1936
Pull Request #1238: Replace AutoAddPolicy with RejectPolicy and load known hosts to prevent man in the middle attacks

8404 of 11222 relevant lines covered (74.89%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.86
fence/sync/sync_users.py
1
import paramiko.ssh_exception
1✔
2
import backoff
1✔
3
import glob
1✔
4
import jwt
1✔
5
import os
1✔
6
import re
1✔
7
import subprocess as sp
1✔
8
import yaml
1✔
9
import copy
1✔
10
import datetime
1✔
11
import uuid
1✔
12
import collections
1✔
13
import hashlib
1✔
14

15
from contextlib import contextmanager
1✔
16
from collections import defaultdict
1✔
17
from csv import DictReader
1✔
18
from io import StringIO
1✔
19
from stat import S_ISDIR
1✔
20

21
import paramiko
1✔
22
from cdislogging import get_logger
1✔
23
from email_validator import validate_email, EmailNotValidError
1✔
24
from gen3authz.client.arborist.errors import ArboristError
1✔
25
from gen3users.validation import validate_user_yaml
1✔
26
from paramiko.proxy import ProxyCommand
1✔
27
from sqlalchemy.exc import IntegrityError
1✔
28
from sqlalchemy import func
1✔
29

30
from fence.config import config
1✔
31
from fence.models import (
1✔
32
    AccessPrivilege,
33
    AuthorizationProvider,
34
    Project,
35
    Tag,
36
    User,
37
    query_for_user,
38
    Client,
39
    IdentityProvider,
40
    get_project_to_authz_mapping,
41
)
42
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
43
from fence.resources.storage import StorageManager
1✔
44
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
45
from fence.resources.google.access_utils import GoogleUpdateException
1✔
46
from fence.sync import utils
1✔
47
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
48
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
49

50

51
def _format_policy_id(path, privilege):
1✔
52
    resource = ".".join(name for name in path.split("/") if name)
1✔
53
    return "{}-{}".format(resource, privilege)
1✔
54

55

56
def download_dir(sftp, remote_dir, local_dir):
1✔
57
    """
58
    Recursively download file from remote_dir to local_dir
59
    Args:
60
        remote_dir(str)
61
        local_dir(str)
62
    Returns: None
63
    """
64
    dir_items = sftp.listdir_attr(remote_dir)
×
65

66
    for item in dir_items:
×
67
        remote_path = remote_dir + "/" + item.filename
×
68
        local_path = os.path.join(local_dir, item.filename)
×
69
        if S_ISDIR(item.st_mode):
×
70
            download_dir(sftp, remote_path, local_path)
×
71
        else:
72
            sftp.get(remote_path, local_path)
×
73

74

75
def arborist_role_for_permission(permission):
1✔
76
    """
77
    For the programs/projects in the existing fence access control model, in order to
78
    use arborist for checking permissions we generate a policy for each combination of
79
    program/project and privilege. The roles involved all contain only one permission,
80
    for one privilege from the project access model.
81
    """
82
    return {
1✔
83
        "id": permission,
84
        "permissions": [
85
            {"id": permission, "action": {"service": "*", "method": permission}}
86
        ],
87
    }
88

89

90
@contextmanager
1✔
91
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
92
    """
93
    Context manager for reading and optionally decrypting file it only
94
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
95

96
    Args:
97
        filepath (str): path to the file
98
        encrypted (bool): whether the file is encrypted
99

100
    Returns:
101
        Generator[file-like class]: file like object for the file
102
    """
103
    if encrypted:
1✔
104
        p = sp.Popen(
×
105
            [
106
                "ccdecrypt",
107
                "-u",
108
                "-K",
109
                key,
110
                filepath,
111
            ],
112
            stdout=sp.PIPE,
113
            stderr=open(os.devnull, "w"),
114
            universal_newlines=True,
115
        )
116
        try:
×
117
            yield StringIO(p.communicate()[0])
×
118
        except UnicodeDecodeError:
×
119
            logger.error("Could not decode file. Check the decryption key.")
×
120
    else:
121
        f = open(filepath, "r")
1✔
122
        yield f
1✔
123
        f.close()
1✔
124

125

126
class UserYAML(object):
1✔
127
    """
128
    Representation of the information in a YAML file describing user, project, and ABAC
129
    information for access control.
130
    """
131

132
    def __init__(
1✔
133
        self,
134
        projects=None,
135
        user_info=None,
136
        policies=None,
137
        clients=None,
138
        authz=None,
139
        project_to_resource=None,
140
        logger=None,
141
        user_abac=None,
142
    ):
143
        self.projects = projects or {}
1✔
144
        self.user_info = user_info or {}
1✔
145
        self.user_abac = user_abac or {}
1✔
146
        self.policies = policies or {}
1✔
147
        self.clients = clients or {}
1✔
148
        self.authz = authz or {}
1✔
149
        self.project_to_resource = project_to_resource or {}
1✔
150
        self.logger = logger
1✔
151

152
    @classmethod
1✔
153
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
154
        """
155
        Add access by "auth_id" to "self.projects" to update the Fence DB.
156
        Add access by "resource" to "self.user_abac" to update Arborist.
157
        """
158
        data = {}
1✔
159
        if filepath:
1✔
160
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
161
                file_contents = f.read()
1✔
162
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
163
                data = yaml.safe_load(file_contents)
1✔
164
        else:
165
            if logger:
1✔
166
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
167

168
        projects = dict()
1✔
169
        user_info = dict()
1✔
170
        policies = dict()
1✔
171

172
        # resources should be the resource tree to construct in arborist
173
        user_abac = dict()
1✔
174

175
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
176
        if not data.get("authz") and data.get("rbac"):
1✔
177
            if logger:
×
178
                logger.info(
×
179
                    "No authz block found but rbac block present. Using rbac block"
180
                )
181
            data["authz"] = data["rbac"]
×
182

183
        # get user project mapping to arborist resources if it exists
184
        project_to_resource = data.get("authz", dict()).get(
1✔
185
            "user_project_to_resource", dict()
186
        )
187

188
        # read projects and privileges for each user
189
        users = data.get("users", {})
1✔
190
        for username, details in users.items():
1✔
191
            # users should occur only once each; skip if already processed
192
            if username in projects:
1✔
193
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
194
                    username
195
                )
196
                if logger:
×
197
                    logger.error(msg)
×
198
                raise EnvironmentError(msg)
×
199

200
            privileges = {}
1✔
201
            resource_permissions = dict()
1✔
202
            for project in details.get("projects", {}):
1✔
203
                try:
1✔
204
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
205
                except KeyError as e:
×
206
                    if logger:
×
207
                        logger.error("project {} missing field: {}".format(project, e))
×
208
                    continue
×
209

210
                # project may not have `resource` field.
211
                # prefer resource field;
212
                # if no resource or mapping, assume auth_id is resource.
213
                resource = project.get("resource", project["auth_id"])
1✔
214

215
                if project["auth_id"] not in project_to_resource:
1✔
216
                    project_to_resource[project["auth_id"]] = resource
1✔
217
                resource_permissions[resource] = set(project["privilege"])
1✔
218

219
            user_info[username] = {
1✔
220
                "email": details.get("email", ""),
221
                "display_name": details.get("display_name", ""),
222
                "phone_number": details.get("phone_number", ""),
223
                "tags": details.get("tags", {}),
224
                "admin": details.get("admin", False),
225
            }
226
            if not details.get("email"):
1✔
227
                try:
1✔
228
                    valid = validate_email(
1✔
229
                        username, allow_smtputf8=False, check_deliverability=False
230
                    )
231
                    user_info[username]["email"] = valid.email
1✔
232
                except EmailNotValidError:
1✔
233
                    pass
1✔
234
            projects[username] = privileges
1✔
235
            user_abac[username] = resource_permissions
1✔
236

237
            # list of policies we want to grant to this user, which get sent to arborist
238
            # to check if they're allowed to do certain things
239
            policies[username] = details.get("policies", [])
1✔
240

241
        if logger:
1✔
242
            logger.info(
1✔
243
                "Got user project to arborist resource mapping:\n{}".format(
244
                    str(project_to_resource)
245
                )
246
            )
247

248
        authz = data.get("authz", dict())
1✔
249
        if not authz:
1✔
250
            # older version: resources in root, no `authz` section or `rbac` section
251
            if logger:
1✔
252
                logger.warning(
1✔
253
                    "access control YAML file is using old format (missing `authz`/`rbac`"
254
                    " section in the root); assuming that if it exists `resources` will"
255
                    " be on the root level, and continuing"
256
                )
257
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
258
            # the code can pretend it's in the normal place that we expect
259
            resources = data.get("resources", [])
1✔
260
            # keep authz empty dict if resources is not specified
261
            if resources:
1✔
262
                authz["resources"] = data.get("resources", [])
×
263

264
        clients = data.get("clients", {})
1✔
265

266
        return cls(
1✔
267
            projects=projects,
268
            user_info=user_info,
269
            user_abac=user_abac,
270
            policies=policies,
271
            clients=clients,
272
            authz=authz,
273
            project_to_resource=project_to_resource,
274
            logger=logger,
275
        )
276

277
    def persist_project_to_resource(self, db_session):
1✔
278
        """
279
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
280

281
        The mapping comes from an external source, this function persists what was parsed
282
        into memory into the database for future use.
283
        """
284
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
285
            project = (
1✔
286
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
287
            )
288
            if project:
1✔
289
                project.authz = authz_resource
1✔
290
            else:
291
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
292
                db_session.add(project)
×
293
        db_session.commit()
1✔
294

295

296
class UserSyncer(object):
1✔
297
    def __init__(
1✔
298
        self,
299
        dbGaP,
300
        DB,
301
        project_mapping,
302
        storage_credentials=None,
303
        db_session=None,
304
        is_sync_from_dbgap_server=False,
305
        sync_from_local_csv_dir=None,
306
        sync_from_local_yaml_file=None,
307
        arborist=None,
308
        folder=None,
309
    ):
310
        """
311
        Syncs ACL files from dbGap to auth database and storage backends
312
        Args:
313
            dbGaP: a list of dict containing creds to access dbgap sftp
314
            DB: database connection string
315
            project_mapping: a dict containing how dbgap ids map to projects
316
            storage_credentials: a dict containing creds for storage backends
317
            sync_from_dir: path to an alternative dir to sync from instead of
318
                           dbGaP
319
            arborist:
320
                ArboristClient instance if the syncer should also create
321
                resources in arborist
322
            folder: a local folder where dbgap telemetry files will sync to
323
        """
324
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
325
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
326
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
327
        self.dbGaP = dbGaP
1✔
328
        self.session = db_session
1✔
329
        self.driver = get_SQLAlchemyDriver(DB)
1✔
330
        self.project_mapping = project_mapping or {}
1✔
331
        self._projects = dict()
1✔
332
        self._created_roles = set()
1✔
333
        self._created_policies = set()
1✔
334
        self._dbgap_study_to_resources = dict()
1✔
335
        self.logger = get_logger(
1✔
336
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
337
        )
338
        self.arborist_client = arborist
1✔
339
        self.folder = folder
1✔
340

341
        self.auth_source = defaultdict(set)
1✔
342
        # auth_source used for logging. username : [source1, source2]
343
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
344
        self.parent_to_child_studies_mapping = {}
1✔
345
        for dbgap_config in dbGaP:
1✔
346
            self.parent_to_child_studies_mapping.update(
1✔
347
                dbgap_config.get("parent_to_child_studies_mapping", {})
348
            )
349
        if storage_credentials:
1✔
350
            self.storage_manager = StorageManager(
1✔
351
                storage_credentials, logger=self.logger
352
            )
353
        self.id_patterns = []
1✔
354

355
    @staticmethod
1✔
356
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
357
        """
358
        Check if the filename matches dbgap access control file pattern
359

360
        Args:
361
            filepath (str): path to file
362
            encrypted (bool): whether the file is encrypted
363

364
        Returns:
365
            bool: whether the pattern matches
366
        """
367
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
368
        for pattern in id_patterns:
1✔
369
            if encrypted:
1✔
370
                pattern += r".enc"
×
371
            pattern += r"$"
1✔
372
            # when converting the YAML from fence-config,
373
            # python reads it as Python string literal. So "\" turns into "\\"
374
            # which messes with the regex match
375
            pattern.replace("\\\\", "\\")
1✔
376
            if re.match(pattern, os.path.basename(filepath)):
1✔
377
                return True
1✔
378
        return False
1✔
379

380
    def _get_from_sftp_with_proxy(self, server, path):
1✔
381
        """
382
        Download all data from sftp sever to a local dir
383

384
        Args:
385
            server (dict) : dictionary containing info to access sftp server
386
            path (str): path to local directory
387

388
        Returns:
389
            None
390
        """
391
        proxy = None
1✔
392
        if server.get("proxy", "") != "":
1✔
393
            command = "ssh -oHostKeyAlgorithms=+ssh-rsa -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
394
                user=server.get("proxy_user", ""),
395
                proxy=server.get("proxy", ""),
396
                host=server.get("host", ""),
397
                port=server.get("port", 22),
398
            )
399
            self.logger.info("SSH proxy command: {}".format(command))
×
400

401
            proxy = ProxyCommand(command)
×
402

403
        with paramiko.SSHClient() as client:
1✔
404
            client.set_log_channel(self.logger.name)
1✔
405

406
            # Load known host keys
407
            known_hosts_path = os.path.expanduser("~/.ssh/known_hosts")
1✔
408
            if os.path.exists(known_hosts_path):
1✔
409
                client.load_host_keys(known_hosts_path)
×
410
            else:
411
                self.logger.warning(
1✔
412
                    "No known_hosts file found — rejecting unknown hosts - make sure the SFTP host key is present in known_hosts before attempting connection."
413
                )
414

415
            client.set_missing_host_key_policy(paramiko.RejectPolicy())
1✔
416
            parameters = {
1✔
417
                "hostname": str(server.get("host", "")),
418
                "username": str(server.get("username", "")),
419
                "password": str(server.get("password", "")),
420
                "port": int(server.get("port", 22)),
421
            }
422
            if proxy:
1✔
423
                parameters["sock"] = proxy
×
424

425
            self.logger.info(
1✔
426
                "SSH connection hostname:post {}:{}".format(
427
                    parameters.get("hostname", "unknown"),
428
                    parameters.get("port", "unknown"),
429
                )
430
            )
431
            try:
1✔
432
                self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
433

434
                with client.open_sftp() as sftp:
×
435
                    download_dir(sftp, "./", path)
×
436
            except paramiko.ssh_exception.SSHException as e:
1✔
437
                self.logger.error(f"SSH connection failed, error: {e}")
×
438

439
        if proxy:
×
440
            proxy.close()
×
441

442
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
443
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
444
        ssh_client.connect(**parameters)
1✔
445

446
    def _get_from_ftp_with_proxy(self, server, path):
1✔
447
        """
448
        Download data from ftp sever to a local dir
449

450
        Args:
451
            server (dict): dictionary containing information for accessing server
452
            path(str): path to local files
453

454
        Returns:
455
            None
456
        """
457
        execstr = (
×
458
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
459
                server.get("username", ""),
460
                server.get("password", ""),
461
                server.get("host", ""),
462
                server.get("proxy", ""),
463
                path,
464
            )
465
        )
466
        os.system(execstr)
×
467

468
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
469
        return dbgap_config.get(
1✔
470
            "parse_consent_code", True
471
        )  # Should this really be true?
472

473
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
474
        """
475
        parse csv files to python dict
476

477
        Args:
478
            file_dict: a dictionary with key(file path) and value(privileges)
479
            sess: sqlalchemy session
480
            dbgap_config: a dictionary containing information about the dbGaP sftp server
481
                (comes from fence config)
482
            encrypted: boolean indicating whether those files are encrypted
483

484

485
        Return:
486
            Tuple[[dict, dict]]:
487
                (user_project, user_info) where user_project is a mapping from
488
                usernames to project permissions and user_info is a mapping
489
                from usernames to user details, such as email
490

491
        Example:
492

493
            (
494
                {
495
                    username: {
496
                        'project1': {'read-storage','write-storage'},
497
                        'project2': {'read-storage'},
498
                    }
499
                },
500
                {
501
                    username: {
502
                        'email': 'email@mail.com',
503
                        'display_name': 'display name',
504
                        'phone_number': '123-456-789',
505
                        'tags': {'dbgap_role': 'PI'}
506
                    }
507
                },
508
            )
509

510
        """
511
        user_projects = dict()
1✔
512
        user_info = defaultdict(dict)
1✔
513

514
        # parse dbGaP sftp server information
515
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
516

517
        self.id_patterns += (
1✔
518
            [
519
                item.replace("\\\\", "\\")
520
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
521
            ]
522
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
523
            else []
524
        )
525

526
        enable_common_exchange_area_access = dbgap_config.get(
1✔
527
            "enable_common_exchange_area_access", False
528
        )
529
        study_common_exchange_areas = dbgap_config.get(
1✔
530
            "study_common_exchange_areas", {}
531
        )
532
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
533

534
        if parse_consent_code and enable_common_exchange_area_access:
1✔
535
            self.logger.info(
1✔
536
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
537
            )
538

539
        project_id_patterns = [r"phs(\d{6})"]
1✔
540
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
541
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
542
            patterns = [
1✔
543
                pattern.replace("\\\\", "\\") for pattern in patterns
544
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
545
            project_id_patterns += patterns
1✔
546

547
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
548
        for filepath, privileges in file_dict.items():
1✔
549
            self.logger.info("Reading file {}".format(filepath))
1✔
550
            if os.stat(filepath).st_size == 0:
1✔
551
                self.logger.warning("Empty file {}".format(filepath))
×
552
                continue
×
553
            if not self._match_pattern(
1✔
554
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
555
            ):
556
                self.logger.warning(
1✔
557
                    "Filename {} does not match dbgap access control filename pattern;"
558
                    " this could mean that the filename has an invalid format, or has"
559
                    " an unexpected .enc extension, or lacks the .enc extension where"
560
                    " expected. This file is NOT being processed by usersync!".format(
561
                        filepath
562
                    )
563
                )
564
                continue
1✔
565

566
            with _read_file(
1✔
567
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
568
            ) as f:
569
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
570

571
                for row in csv:
1✔
572
                    username = row.get("login") or ""
1✔
573
                    if username == "":
1✔
574
                        continue
×
575

576
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
577
                        phsid = (
1✔
578
                            row.get("phsid") or (row.get("project_id") or "")
579
                        ).split(".")
580
                    else:
581
                        phsid = (row.get("phsid") or "").split(".")
1✔
582

583
                    dbgap_project = phsid[0]
1✔
584
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
585
                    skip = False
1✔
586
                    for pattern in project_id_patterns:
1✔
587
                        self.logger.debug(
1✔
588
                            "Checking pattern:{} with project_id:{}".format(
589
                                pattern, dbgap_project
590
                            )
591
                        )
592
                        if re.match(pattern, dbgap_project):
1✔
593
                            skip = False
1✔
594
                            break
1✔
595
                        else:
596
                            skip = True
1✔
597
                    if skip:
1✔
598
                        self.logger.warning(
1✔
599
                            "Skip processing from file {}, user {} with project {}".format(
600
                                filepath,
601
                                username,
602
                                dbgap_project,
603
                            )
604
                        )
605
                        continue
1✔
606
                    if len(phsid) > 1 and parse_consent_code:
1✔
607
                        consent_code = phsid[-1]
1✔
608

609
                        # c999 indicates full access to all consents and access
610
                        # to a study-specific exchange area
611
                        # access to at least one study-specific exchange area implies access
612
                        # to the parent study's common exchange area
613
                        #
614
                        # NOTE: Handling giving access to all consents is done at
615
                        #       a later time, when we have full information about possible
616
                        #       consents
617
                        self.logger.debug(
1✔
618
                            f"got consent code {consent_code} from dbGaP project "
619
                            f"{dbgap_project}"
620
                        )
621
                        if (
1✔
622
                            consent_code == "c999"
623
                            and enable_common_exchange_area_access
624
                            and dbgap_project in study_common_exchange_areas
625
                        ):
626
                            self.logger.info(
1✔
627
                                "found study with consent c999 and Fence "
628
                                "is configured to parse exchange area data. Giving user "
629
                                f"{username} {privileges} privileges in project: "
630
                                f"{study_common_exchange_areas[dbgap_project]}."
631
                            )
632
                            self._add_dbgap_project_for_user(
1✔
633
                                study_common_exchange_areas[dbgap_project],
634
                                privileges,
635
                                username,
636
                                sess,
637
                                user_projects,
638
                                dbgap_config,
639
                            )
640

641
                        dbgap_project += "." + consent_code
1✔
642

643
                    self._add_children_for_dbgap_project(
1✔
644
                        dbgap_project,
645
                        privileges,
646
                        username,
647
                        sess,
648
                        user_projects,
649
                        dbgap_config,
650
                    )
651

652
                    display_name = row.get("user name") or ""
1✔
653
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
654

655
                    # some dbgap telemetry files have information about a researchers PI
656
                    if "downloader for" in row:
1✔
657
                        tags["pi"] = row["downloader for"]
1✔
658

659
                    # prefer name over previous "downloader for" if it exists
660
                    if "downloader for names" in row:
1✔
661
                        tags["pi"] = row["downloader for names"]
×
662

663
                    user_info[username] = {
1✔
664
                        "email": row.get("email")
665
                        or user_info[username].get("email")
666
                        or "",
667
                        "display_name": display_name,
668
                        "phone_number": row.get("phone")
669
                        or user_info[username].get("phone_number")
670
                        or "",
671
                        "tags": tags,
672
                    }
673

674
                    self._process_dbgap_project(
1✔
675
                        dbgap_project,
676
                        privileges,
677
                        username,
678
                        sess,
679
                        user_projects,
680
                        dbgap_config,
681
                    )
682

683
        return user_projects, user_info
1✔
684

685
    def _get_children(self, dbgap_project):
1✔
686
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
687

688
    def _add_children_for_dbgap_project(
1✔
689
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
690
    ):
691
        """
692
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
693
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
694
        child studies.
695
        """
696
        parent_phsid = dbgap_project
1✔
697
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
698
        child_suffix = ""
1✔
699
        if parse_consent_code and re.match(
1✔
700
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
701
        ):
702
            parent_phsid_parts = dbgap_project.split(".")
1✔
703
            parent_phsid = parent_phsid_parts[0]
1✔
704
            child_suffix = "." + parent_phsid_parts[1]
1✔
705

706
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
707
            return
1✔
708

709
        self.logger.info(
1✔
710
            f"found parent study {parent_phsid} and Fence "
711
            "is configured to provide additional access to child studies. Giving user "
712
            f"{username} {privileges} privileges in projects: "
713
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
714
        )
715
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
716
        for child_study in child_studies:
1✔
717
            self._add_dbgap_project_for_user(
1✔
718
                child_study + child_suffix,
719
                privileges,
720
                username,
721
                sess,
722
                user_projects,
723
                dbgap_config,
724
            )
725

726
    def _add_dbgap_project_for_user(
1✔
727
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
728
    ):
729
        """
730
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
731
        and then updates the dictionary containing all user's project access
732
        """
733
        if dbgap_project not in self._projects:
1✔
734
            self.logger.debug(
1✔
735
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
736
            )
737

738
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
739

740
            # need to add dbgap project to arborist
741
            if self.arborist_client:
1✔
742
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
743

744
            if project.name is None:
1✔
745
                project.name = dbgap_project
1✔
746
            self._projects[dbgap_project] = project
1✔
747
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
748
        if username in user_projects:
1✔
749
            user_projects[username].update(phsid_privileges)
1✔
750
        else:
751
            user_projects[username] = phsid_privileges
1✔
752

753
    @staticmethod
1✔
754
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
755
        """
756
        Merge user_info1 into user_info2. Values in user_info2 are overriden
757
        by values in user_info1. user_info2 ends up containing the merged dict.
758

759
        Args:
760
            user_info1 (dict): nested dict
761
            user_info2 (dict): nested dict
762

763
            Example:
764
            {username: {'email': 'abc@email.com'}}
765

766
        Returns:
767
            None
768
        """
769
        user_info2.update(user_info1)
1✔
770

771
    def sync_two_phsids_dict(
1✔
772
        self,
773
        phsids1,
774
        phsids2,
775
        source1=None,
776
        source2=None,
777
        phsids2_overrides_phsids1=True,
778
    ):
779
        """
780
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
781
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
782
        the merged dict (see explanation below).
783
        `source1` and `source2`: for logging.
784

785
        Args:
786
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
787

788
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
789

790
            Example:
791
            {
792
                username: {
793
                    phsid1: {'read-storage','write-storage'},
794
                    phsid2: {'read-storage'},
795
                }
796
            }
797

798
        Return:
799
            None
800

801
        Explanation:
802
            Consider merging projects of the same user:
803

804
                {user1: {phsid1: privillege1}}
805

806
                {user1: {phsid2: privillege2}}
807

808
            case 1: phsid1 != phsid2. Output:
809

810
                {user1: {phsid1: privillege1, phsid2: privillege2}}
811

812
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
813

814
                {user1: {phsid1: union(privillege1, privillege2)}}
815

816
            For the other cases, just simple addition
817
        """
818

819
        for user, projects1 in phsids1.items():
1✔
820
            if not phsids2.get(user):
1✔
821
                if source1:
1✔
822
                    self.auth_source[user].add(source1)
1✔
823
                phsids2[user] = projects1
1✔
824
            elif phsids2_overrides_phsids1:
1✔
825
                if source1:
1✔
826
                    self.auth_source[user].add(source1)
×
827
                if source2:
1✔
828
                    self.auth_source[user].add(source2)
×
829
                for phsid1, privilege1 in projects1.items():
1✔
830
                    if phsid1 not in phsids2[user]:
1✔
831
                        phsids2[user][phsid1] = set()
1✔
832
                    phsids2[user][phsid1].update(privilege1)
1✔
833
            elif source2:
×
834
                self.auth_source[user].add(source2)
×
835

836
    def sync_to_db_and_storage_backend(
1✔
837
        self,
838
        user_project,
839
        user_info,
840
        sess,
841
        do_not_revoke_from_db_and_storage=False,
842
        expires=None,
843
    ):
844
        """
845
        sync user access control to database and storage backend
846

847
        Args:
848
            user_project (dict): a dictionary of
849

850
                {
851
                    username: {
852
                        'project1': {'read-storage','write-storage'},
853
                        'project2': {'read-storage'}
854
                    }
855
                }
856

857
            user_info (dict): a dictionary of {username: user_info{}}
858
            sess: a sqlalchemy session
859

860
        Return:
861
            None
862
        """
863
        google_bulk_mapping = None
1✔
864
        if config["GOOGLE_BULK_UPDATES"]:
1✔
865
            google_bulk_mapping = {}
1✔
866

867
        self._init_projects(user_project, sess)
1✔
868

869
        auth_provider_list = [
1✔
870
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
871
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
872
        ]
873

874
        cur_db_user_project_list = {
1✔
875
            (ua.user.username.lower(), ua.project.auth_id)
876
            for ua in sess.query(AccessPrivilege).all()
877
        }
878

879
        # we need to compare db -> whitelist case-insensitively for username.
880
        # db stores case-sensitively, but we need to query case-insensitively
881
        user_project_lowercase = {}
1✔
882
        syncing_user_project_list = set()
1✔
883
        for username, projects in user_project.items():
1✔
884
            user_project_lowercase[username.lower()] = projects
1✔
885
            for project, _ in projects.items():
1✔
886
                syncing_user_project_list.add((username.lower(), project))
1✔
887

888
        user_info_lowercase = {
1✔
889
            username.lower(): info for username, info in user_info.items()
890
        }
891

892
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
893
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
894
        to_update = set.intersection(
1✔
895
            cur_db_user_project_list, syncing_user_project_list
896
        )
897

898
        # when updating users we want to maintain case sesitivity in the username so
899
        # pass the original, non-lowered user_info dict
900
        self._upsert_userinfo(sess, user_info)
1✔
901

902
        if not do_not_revoke_from_db_and_storage:
1✔
903
            self._revoke_from_storage(
1✔
904
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
905
            )
906
            self._revoke_from_db(sess, to_delete)
1✔
907

908
        self._grant_from_storage(
1✔
909
            to_add,
910
            user_project_lowercase,
911
            sess,
912
            google_bulk_mapping=google_bulk_mapping,
913
            expires=expires,
914
        )
915

916
        self._grant_from_db(
1✔
917
            sess,
918
            to_add,
919
            user_info_lowercase,
920
            user_project_lowercase,
921
            auth_provider_list,
922
        )
923

924
        # re-grant
925
        self._grant_from_storage(
1✔
926
            to_update,
927
            user_project_lowercase,
928
            sess,
929
            google_bulk_mapping=google_bulk_mapping,
930
            expires=expires,
931
        )
932
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
933

934
        if not do_not_revoke_from_db_and_storage:
1✔
935
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
936

937
        sess.commit()
1✔
938

939
        if config["GOOGLE_BULK_UPDATES"]:
1✔
940
            self.logger.info("Doing bulk Google update...")
1✔
941
            update_google_groups_for_users(google_bulk_mapping)
1✔
942
            self.logger.info("Bulk Google update done!")
×
943

944
        sess.commit()
1✔
945

946
    def sync_to_storage_backend(
1✔
947
        self, user_project, user_info, sess, expires, skip_google_updates=False
948
    ):
949
        """
950
        sync user access control to storage backend with given expiration
951

952
        Args:
953
            user_project (dict): a dictionary of
954

955
                {
956
                    username: {
957
                        'project1': {'read-storage','write-storage'},
958
                        'project2': {'read-storage'}
959
                    }
960
                }
961

962
            user_info (dict): a dictionary of attributes for a user.
963
            sess: a sqlalchemy session
964
            expires (int): time at which synced Arborist policies and
965
                   inclusion in any GBAG are set to expire
966
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
967
        Return:
968
            None
969
        """
970
        if not expires:
1✔
971
            raise Exception(
×
972
                f"sync to storage backend requires an expiration. you provided: {expires}"
973
            )
974

975
        google_group_user_mapping = None
1✔
976
        if config["GOOGLE_BULK_UPDATES"]:
1✔
977
            google_group_user_mapping = {}
×
978
            get_or_create_proxy_group_id(
×
979
                expires=expires,
980
                user_id=user_info["user_id"],
981
                username=user_info["username"],
982
                session=sess,
983
                storage_manager=self.storage_manager,
984
            )
985

986
        # TODO: eventually it'd be nice to remove this step but it's required
987
        #       so that grant_from_storage can determine what storage backends
988
        #       are needed for a project.
989
        self._init_projects(user_project, sess)
1✔
990

991
        # we need to compare db -> whitelist case-insensitively for username.
992
        # db stores case-sensitively, but we need to query case-insensitively
993
        user_project_lowercase = {}
1✔
994
        syncing_user_project_list = set()
1✔
995
        for username, projects in user_project.items():
1✔
996
            user_project_lowercase[username.lower()] = projects
1✔
997
            for project, _ in projects.items():
1✔
998
                syncing_user_project_list.add((username.lower(), project))
1✔
999

1000
        to_add = set(syncing_user_project_list)
1✔
1001

1002
        # when updating users we want to maintain case sensitivity in the username so
1003
        # pass the original, non-lowered user_info dict
1004
        self._upsert_userinfo(sess, {user_info["username"].lower(): user_info})
1✔
1005
        if not skip_google_updates:
1✔
1006
            self._grant_from_storage(
1✔
1007
                to_add,
1008
                user_project_lowercase,
1009
                sess,
1010
                google_bulk_mapping=google_group_user_mapping,
1011
                expires=expires,
1012
            )
1013

1014
            if config["GOOGLE_BULK_UPDATES"]:
1✔
1015
                self.logger.info("Updating user's google groups ...")
×
1016
                update_google_groups_for_users(google_group_user_mapping)
×
1017
                self.logger.info("Google groups update done!!")
×
1018

1019
        sess.commit()
1✔
1020

1021
    def _revoke_from_db(self, sess, to_delete):
1✔
1022
        """
1023
        Revoke user access to projects in the auth database
1024

1025
        Args:
1026
            sess: sqlalchemy session
1027
            to_delete: a set of (username, project.auth_id) to be revoked from db
1028
        Return:
1029
            None
1030
        """
1031
        for username, project_auth_id in to_delete:
1✔
1032
            q = (
1✔
1033
                sess.query(AccessPrivilege)
1034
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1035
                .join(AccessPrivilege.user)
1036
                .filter(func.lower(User.username) == username)
1037
                .all()
1038
            )
1039
            for access in q:
1✔
1040
                self.logger.info(
1✔
1041
                    "revoke {} access to {} in db".format(username, project_auth_id)
1042
                )
1043
                sess.delete(access)
1✔
1044

1045
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1046
        """
1047
        Make sure there is no admin user that is not in yaml/csv files
1048

1049
        Args:
1050
            sess: sqlalchemy session
1051
            user_info: a dict of
1052
            {
1053
                username: {
1054
                    'email': email,
1055
                    'display_name': display_name,
1056
                    'phone_number': phonenum,
1057
                    'tags': {'k1':'v1', 'k2': 'v2'}
1058
                    'admin': is_admin
1059
                }
1060
            }
1061
        Returns:
1062
            None
1063
        """
1064
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1065
            if admin_user.username.lower() not in user_info:
1✔
1066
                admin_user.is_admin = False
×
1067
                sess.add(admin_user)
×
1068
                self.logger.info(
×
1069
                    "remove admin access from {} in db".format(
1070
                        admin_user.username.lower()
1071
                    )
1072
                )
1073

1074
    def _update_from_db(self, sess, to_update, user_project):
1✔
1075
        """
1076
        Update user access to projects in the auth database
1077

1078
        Args:
1079
            sess: sqlalchemy session
1080
            to_update:
1081
                a set of (username, project.auth_id) to be updated from db
1082

1083
        Return:
1084
            None
1085
        """
1086

1087
        for username, project_auth_id in to_update:
1✔
1088
            q = (
1✔
1089
                sess.query(AccessPrivilege)
1090
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1091
                .join(AccessPrivilege.user)
1092
                .filter(func.lower(User.username) == username)
1093
                .all()
1094
            )
1095
            for access in q:
1✔
1096
                access.privilege = user_project[username][project_auth_id]
1✔
1097
                self.logger.info(
1✔
1098
                    "update {} with {} access to {} in db".format(
1099
                        username, access.privilege, project_auth_id
1100
                    )
1101
                )
1102

1103
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1104
        """
1105
        Grant user access to projects in the auth database
1106
        Args:
1107
            sess: sqlalchemy session
1108
            to_add: a set of (username, project.auth_id) to be granted
1109
            user_project:
1110
                a dictionary of {username: {project: {'read','write'}}
1111
        Return:
1112
            None
1113
        """
1114
        for username, project_auth_id in to_add:
1✔
1115
            u = query_for_user(session=sess, username=username)
1✔
1116

1117
            auth_provider = auth_provider_list[0]
1✔
1118
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1119
                auth_provider = auth_provider_list[1]
1✔
1120
            user_access = AccessPrivilege(
1✔
1121
                user=u,
1122
                project=self._projects[project_auth_id],
1123
                privilege=list(user_project[username][project_auth_id]),
1124
                auth_provider=auth_provider,
1125
            )
1126
            self.logger.info(
1✔
1127
                "grant user {} to {} with access {}".format(
1128
                    username, user_access.project, user_access.privilege
1129
                )
1130
            )
1131
            sess.add(user_access)
1✔
1132

1133
    def _upsert_userinfo(self, sess, user_info):
1✔
1134
        """
1135
        update user info to database.
1136

1137
        Args:
1138
            sess: sqlalchemy session
1139
            user_info:
1140
                a dict of {username: {display_name, phone_number, tags, admin}
1141

1142
        Return:
1143
            None
1144
        """
1145

1146
        for username in user_info:
1✔
1147
            u = query_for_user(session=sess, username=username)
1✔
1148

1149
            if u is None:
1✔
1150
                self.logger.info("create user {}".format(username))
1✔
1151
                u = User(username=username)
1✔
1152
                sess.add(u)
1✔
1153

1154
            if self.arborist_client:
1✔
1155
                self.arborist_client.create_user({"name": username})
1✔
1156

1157
            u.email = user_info[username].get("email", "")
1✔
1158
            u.display_name = user_info[username].get("display_name", "")
1✔
1159
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1160
            u.is_admin = user_info[username].get("admin", False)
1✔
1161

1162
            idp_name = user_info[username].get("idp_name", "")
1✔
1163
            if idp_name and not u.identity_provider:
1✔
1164
                idp = (
×
1165
                    sess.query(IdentityProvider)
1166
                    .filter(IdentityProvider.name == idp_name)
1167
                    .first()
1168
                )
1169
                if not idp:
×
1170
                    idp = IdentityProvider(name=idp_name)
×
1171
                u.identity_provider = idp
×
1172

1173
            # do not update if there is no tag
1174
            if not user_info[username].get("tags"):
1✔
1175
                continue
1✔
1176

1177
            # remove user db tags if they are not shown in new tags
1178
            for tag in u.tags:
1✔
1179
                if tag.key not in user_info[username]["tags"]:
1✔
1180
                    u.tags.remove(tag)
1✔
1181

1182
            # sync
1183
            for k, v in user_info[username]["tags"].items():
1✔
1184
                found = False
1✔
1185
                for tag in u.tags:
1✔
1186
                    if tag.key == k:
1✔
1187
                        found = True
1✔
1188
                        tag.value = v
1✔
1189
                # create new tag if not found
1190
                if not found:
1✔
1191
                    tag = Tag(key=k, value=v)
1✔
1192
                    u.tags.append(tag)
1✔
1193

1194
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1195
        """
1196
        If a project have storage backend, revoke user's access to buckets in
1197
        the storage backend.
1198

1199
        Args:
1200
            to_delete: a set of (username, project.auth_id) to be revoked
1201

1202
        Return:
1203
            None
1204
        """
1205
        for username, project_auth_id in to_delete:
1✔
1206
            project = (
1✔
1207
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1208
            )
1209
            for sa in project.storage_access:
1✔
1210
                if not hasattr(self, "storage_manager"):
1✔
1211
                    self.logger.error(
×
1212
                        (
1213
                            "CANNOT revoke {} access to {} in {} because there is NO "
1214
                            "configured storage accesses at all. See configuration. "
1215
                            "Continuing anyway..."
1216
                        ).format(username, project_auth_id, sa.provider.name)
1217
                    )
1218
                    continue
×
1219

1220
                self.logger.info(
1✔
1221
                    "revoke {} access to {} in {}".format(
1222
                        username, project_auth_id, sa.provider.name
1223
                    )
1224
                )
1225
                self.storage_manager.revoke_access(
1✔
1226
                    provider=sa.provider.name,
1227
                    username=username,
1228
                    project=project,
1229
                    session=sess,
1230
                    google_bulk_mapping=google_bulk_mapping,
1231
                )
1232

1233
    def _grant_from_storage(
1✔
1234
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1235
    ):
1236
        """
1237
        If a project have storage backend, grant user's access to buckets in
1238
        the storage backend.
1239

1240
        Args:
1241
            to_add: a set of (username, project.auth_id)  to be granted
1242
            user_project: a dictionary like:
1243

1244
                    {username: {phsid: {'read-storage','write-storage'}}}
1245

1246
        Return:
1247
            dict of the users' storage usernames to their user_projects and the respective storage access.
1248
        """
1249
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1250
        for username, project_auth_id in to_add:
1✔
1251
            project = self._projects[project_auth_id]
1✔
1252
            for sa in project.storage_access:
1✔
1253
                access = list(user_project[username][project_auth_id])
1✔
1254
                if not hasattr(self, "storage_manager"):
1✔
1255
                    self.logger.error(
×
1256
                        (
1257
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1258
                            "configured storage accesses at all. See configuration. "
1259
                            "Continuing anyway..."
1260
                        ).format(username, access, project_auth_id, sa.provider.name)
1261
                    )
1262
                    continue
×
1263

1264
                self.logger.info(
1✔
1265
                    "grant {} access {} to {} in {}".format(
1266
                        username, access, project_auth_id, sa.provider.name
1267
                    )
1268
                )
1269
                storage_username = self.storage_manager.grant_access(
1✔
1270
                    provider=sa.provider.name,
1271
                    username=username,
1272
                    project=project,
1273
                    access=access,
1274
                    session=sess,
1275
                    google_bulk_mapping=google_bulk_mapping,
1276
                    expires=expires,
1277
                )
1278

1279
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1280
        return storage_user_to_sa_and_user_project
1✔
1281

1282
    def _init_projects(self, user_project, sess):
1✔
1283
        """
1284
        initialize projects
1285
        """
1286

1287
        if self.project_mapping:
1✔
1288
            for projects in list(self.project_mapping.values()):
1✔
1289
                for p in projects:
1✔
1290
                    self.logger.debug(
1✔
1291
                        "creating Project with info from project_mapping: {}".format(p)
1292
                    )
1293
                    project = self._get_or_create(sess, Project, **p)
1✔
1294
                    self._projects[p["auth_id"]] = project
1✔
1295
        for _, projects in user_project.items():
1✔
1296
            for auth_id in list(projects.keys()):
1✔
1297
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1298
                if not project:
1✔
1299
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1300
                    try:
1✔
1301
                        project = self._get_or_create(sess, Project, **data)
1✔
1302
                    except IntegrityError as e:
×
1303
                        sess.rollback()
×
1304
                        self.logger.error(
×
1305
                            f"Project {auth_id} already exists. Detail {str(e)}"
1306
                        )
1307
                        raise Exception(
×
1308
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1309
                                auth_id, str(e)
1310
                            )
1311
                        )
1312
                if auth_id not in self._projects:
1✔
1313
                    self._projects[auth_id] = project
1✔
1314

1315
    @staticmethod
1✔
1316
    def _get_or_create(sess, model, **kwargs):
1✔
1317
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1318
        if not instance:
1✔
1319
            instance = model(**kwargs)
1✔
1320
            sess.add(instance)
1✔
1321
        return instance
1✔
1322

1323
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1324
        """
1325
        Args:
1326
            dbgap_config : a dictionary containing information about a single
1327
                           dbgap sftp server (from fence config)
1328
            sess: database session
1329

1330
        Return:
1331
            user_projects (dict)
1332
            user_info (dict)
1333
        """
1334
        dbgap_file_list = []
1✔
1335
        hostname = dbgap_config["info"]["host"]
1✔
1336
        username = dbgap_config["info"]["username"]
1✔
1337
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1338
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1339

1340
        try:
1✔
1341
            if os.path.exists(folderdir):
1✔
1342
                dbgap_file_list = glob.glob(
×
1343
                    os.path.join(folderdir, "*")
1344
                )  # get lists of file from folder
1345
            else:
1346
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1347
                dbgap_file_list = self._download(dbgap_config)
1✔
1348
        except Exception as e:
1✔
1349
            self.logger.error(e)
1✔
1350
            exit(1)
1✔
1351
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1352
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1353
            dbgap_file_list,
1354
            encrypted=encrypted,
1355
            session=sess,
1356
            dbgap_config=dbgap_config,
1357
        )
1358

1359
        user_projects = self.parse_projects(user_projects)
×
1360
        return user_projects, user_info
×
1361

1362
    def _get_user_permissions_from_csv_list(
1✔
1363
        self, file_list, encrypted, session, dbgap_config={}
1364
    ):
1365
        """
1366
        Args:
1367
            file_list: list of files (represented as strings)
1368
            encrypted: boolean indicating whether those files are encrypted
1369
            session: sqlalchemy session
1370
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1371
                    (comes from fence config)
1372

1373
        Return:
1374
            user_projects (dict)
1375
            user_info (dict)
1376
        """
1377
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1378
        user_projects, user_info = self._parse_csv(
1✔
1379
            dict(list(zip(file_list, permissions))),
1380
            sess=session,
1381
            dbgap_config=dbgap_config,
1382
            encrypted=encrypted,
1383
        )
1384
        return user_projects, user_info
1✔
1385

1386
    def _merge_multiple_local_csv_files(
1✔
1387
        self, dbgap_file_list, encrypted, dbgap_configs, session
1388
    ):
1389
        """
1390
        Args:
1391
            dbgap_file_list (list): a list of whitelist file locations stored locally
1392
            encrypted (bool): whether the file is encrypted (comes from fence config)
1393
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1394
            session (sqlalchemy.Session): database session
1395

1396
        Return:
1397
            merged_user_projects (dict)
1398
            merged_user_info (dict)
1399
        """
1400
        merged_user_projects = {}
1✔
1401
        merged_user_info = {}
1✔
1402

1403
        for dbgap_config in dbgap_configs:
1✔
1404
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1405
                dbgap_file_list,
1406
                encrypted,
1407
                session=session,
1408
                dbgap_config=dbgap_config,
1409
            )
1410
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1411
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1412
        return merged_user_projects, merged_user_info
1✔
1413

1414
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1415
        """
1416
        Args:
1417
            dbgap_servers : a list of dictionaries each containging config on
1418
                           dbgap sftp server (comes from fence config)
1419
            sess: database session
1420

1421
        Return:
1422
            merged_user_projects (dict)
1423
            merged_user_info (dict)
1424
        """
1425
        merged_user_projects = {}
1✔
1426
        merged_user_info = {}
1✔
1427
        for dbgap in dbgap_servers:
1✔
1428
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1429
            # merge into merged_user_info
1430
            # user_info overrides original info in merged_user_info
1431
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1432

1433
            # merge all access info dicts into "merged_user_projects".
1434
            # the access info is combined - if the user_projects access is
1435
            # ["read"] and the merged_user_projects is ["read-storage"], the
1436
            # resulting access is ["read", "read-storage"].
1437
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1438
        return merged_user_projects, merged_user_info
1✔
1439

1440
    def parse_projects(self, user_projects):
1✔
1441
        """
1442
        helper function for parsing projects
1443
        """
1444
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1445

1446
    def _process_dbgap_project(
1✔
1447
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1448
    ):
1449
        if dbgap_project not in self.project_mapping:
1✔
1450
            self._add_dbgap_project_for_user(
1✔
1451
                dbgap_project,
1452
                privileges,
1453
                username,
1454
                sess,
1455
                user_projects,
1456
                dbgap_config,
1457
            )
1458

1459
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1460
            try:
1✔
1461
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1462

1463
                # need to add dbgap project to arborist
1464
                if self.arborist_client:
1✔
1465
                    self._determine_arborist_resource(
1✔
1466
                        element_dict["auth_id"], dbgap_config
1467
                    )
1468

1469
                if username not in user_projects:
1✔
1470
                    user_projects[username] = {}
1✔
1471
                user_projects[username].update(phsid_privileges)
1✔
1472

1473
            except ValueError as e:
×
1474
                self.logger.info(e)
×
1475

1476
    def _process_user_projects(
1✔
1477
        self,
1478
        user_projects,
1479
        enable_common_exchange_area_access,
1480
        study_common_exchange_areas,
1481
        dbgap_config,
1482
        sess,
1483
    ):
1484
        user_projects_to_modify = copy.deepcopy(user_projects)
1✔
1485
        for username in user_projects.keys():
1✔
1486
            for project in user_projects[username].keys():
1✔
1487
                phsid = project.split(".")
1✔
1488
                dbgap_project = phsid[0]
1✔
1489
                privileges = user_projects[username][project]
1✔
1490
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1491
                    consent_code = phsid[-1]
1✔
1492

1493
                    # c999 indicates full access to all consents and access
1494
                    # to a study-specific exchange area
1495
                    # access to at least one study-specific exchange area implies access
1496
                    # to the parent study's common exchange area
1497
                    #
1498
                    # NOTE: Handling giving access to all consents is done at
1499
                    #       a later time, when we have full information about possible
1500
                    #       consents
1501
                    self.logger.debug(
1✔
1502
                        f"got consent code {consent_code} from dbGaP project "
1503
                        f"{dbgap_project}"
1504
                    )
1505
                    if (
1✔
1506
                        consent_code == "c999"
1507
                        and enable_common_exchange_area_access
1508
                        and dbgap_project in study_common_exchange_areas
1509
                    ):
1510
                        self.logger.info(
1✔
1511
                            "found study with consent c999 and Fence "
1512
                            "is configured to parse exchange area data. Giving user "
1513
                            f"{username} {privileges} privileges in project: "
1514
                            f"{study_common_exchange_areas[dbgap_project]}."
1515
                        )
1516
                        self._add_dbgap_project_for_user(
1✔
1517
                            study_common_exchange_areas[dbgap_project],
1518
                            privileges,
1519
                            username,
1520
                            sess,
1521
                            user_projects_to_modify,
1522
                            dbgap_config,
1523
                        )
1524

1525
                    dbgap_project += "." + consent_code
1✔
1526

1527
                self._process_dbgap_project(
1✔
1528
                    dbgap_project,
1529
                    privileges,
1530
                    username,
1531
                    sess,
1532
                    user_projects_to_modify,
1533
                    dbgap_config,
1534
                )
1535
        for user in user_projects_to_modify.keys():
1✔
1536
            user_projects[user] = user_projects_to_modify[user]
1✔
1537

1538
    def sync(self):
1✔
1539
        if self.session:
1✔
1540
            self._sync(self.session)
1✔
1541
        else:
1542
            with self.driver.session as s:
×
1543
                self._sync(s)
×
1544

1545
    def download(self):
1✔
1546
        for dbgap_server in self.dbGaP:
×
1547
            self._download(dbgap_server)
×
1548

1549
    def _download(self, dbgap_config):
1✔
1550
        """
1551
        Download files from dbgap server.
1552
        """
1553
        server = dbgap_config["info"]
1✔
1554
        protocol = dbgap_config["protocol"]
1✔
1555
        hostname = server["host"]
1✔
1556
        username = server["username"]
1✔
1557
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1558

1559
        if not os.path.exists(folderdir):
1✔
1560
            os.makedirs(folderdir)
1✔
1561

1562
        self.logger.info("Download from server")
1✔
1563
        try:
1✔
1564
            if protocol == "sftp":
1✔
1565
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1566
            else:
1567
                self._get_from_ftp_with_proxy(server, folderdir)
×
1568
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1569
            return dbgap_files
×
1570
        except Exception as e:
1✔
1571
            self.logger.error(e)
1✔
1572
            raise
1✔
1573

1574
    def _sync(self, sess):
1✔
1575
        """
1576
        Collect files from dbgap server(s), sync csv and yaml files to storage
1577
        backend and fence DB
1578
        """
1579

1580
        # get all dbgap files
1581
        user_projects = {}
1✔
1582
        user_info = {}
1✔
1583
        if self.is_sync_from_dbgap_server:
1✔
1584
            self.logger.debug(
1✔
1585
                "Pulling telemetry files from {} dbgap sftp servers".format(
1586
                    len(self.dbGaP)
1587
                )
1588
            )
1589
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1590

1591
        local_csv_file_list = []
1✔
1592
        if self.sync_from_local_csv_dir:
1✔
1593
            local_csv_file_list = glob.glob(
1✔
1594
                os.path.join(self.sync_from_local_csv_dir, "*")
1595
            )
1596
            # Sort the list so the order of of files is consistent across platforms
1597
            local_csv_file_list.sort()
1✔
1598

1599
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1600
            local_csv_file_list,
1601
            encrypted=False,
1602
            session=sess,
1603
            dbgap_configs=self.dbGaP,
1604
        )
1605

1606
        try:
1✔
1607
            user_yaml = UserYAML.from_file(
1✔
1608
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1609
            )
1610
        except (EnvironmentError, AssertionError) as e:
1✔
1611
            self.logger.error(str(e))
1✔
1612
            self.logger.error("aborting early")
1✔
1613
            raise
1✔
1614

1615
        # parse all projects
1616
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1617
        user_projects = self.parse_projects(user_projects)
1✔
1618
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1619

1620
        # merge all user info dicts into "user_info".
1621
        # the user info (such as email) in the user.yaml files
1622
        # overrides the user info from the CSV files.
1623
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1624
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1625

1626
        # merge all access info dicts into "user_projects".
1627
        # the access info is combined - if the user.yaml access is
1628
        # ["read"] and the CSV file access is ["read-storage"], the
1629
        # resulting access is ["read", "read-storage"].
1630
        self.sync_two_phsids_dict(
1✔
1631
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1632
        )
1633
        self.sync_two_phsids_dict(
1✔
1634
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1635
        )
1636

1637
        # Note: if there are multiple dbgap sftp servers configured
1638
        # this parameter is always from the config for the first dbgap sftp server
1639
        # not any additional ones
1640
        for dbgap_config in self.dbGaP:
1✔
1641
            if self._get_parse_consent_code(dbgap_config):
1✔
1642
                self._grant_all_consents_to_c999_users(
1✔
1643
                    user_projects, user_yaml.project_to_resource
1644
                )
1645

1646
        google_update_ex = None
1✔
1647

1648
        try:
1✔
1649
            # update the Fence DB
1650
            if user_projects:
1✔
1651
                self.logger.info("Sync to db and storage backend")
1✔
1652
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1653
                self.logger.info("Finish syncing to db and storage backend")
1✔
1654
            else:
1655
                self.logger.info("No users for syncing")
×
1656
        except GoogleUpdateException as ex:
1✔
1657
            # save this to reraise later after all non-Google syncing has finished
1658
            # this way, any issues with Google only affect Google data access and don't
1659
            # cascade problems into non-Google AWS or Azure access
1660
            google_update_ex = ex
1✔
1661

1662
        # update the Arborist DB (resources, roles, policies, groups)
1663
        if user_yaml.authz:
1✔
1664
            if not self.arborist_client:
1✔
1665
                raise EnvironmentError(
×
1666
                    "yaml file contains authz section but sync is not configured with"
1667
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1668
                )
1669
            self.logger.info("Synchronizing arborist...")
1✔
1670
            success = self._update_arborist(sess, user_yaml)
1✔
1671
            if success:
1✔
1672
                self.logger.info("Finished synchronizing arborist")
1✔
1673
            else:
1674
                self.logger.error("Could not synchronize successfully")
×
1675
                exit(1)
×
1676
        else:
1677
            self.logger.info("No `authz` section; skipping arborist sync")
×
1678

1679
        # update the Arborist DB (user access)
1680
        if self.arborist_client:
1✔
1681
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1682
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1683
            if success:
1✔
1684
                self.logger.info(
1✔
1685
                    "Finished synchronizing authorization info to arborist"
1686
                )
1687
            else:
1688
                self.logger.error(
×
1689
                    "Could not synchronize authorization info successfully to arborist"
1690
                )
1691
                exit(1)
×
1692
        else:
1693
            self.logger.error("No arborist client set; skipping arborist sync")
×
1694

1695
        # Logging authz source
1696
        for u, s in self.auth_source.items():
1✔
1697
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1698

1699
        self.logger.info(
1✔
1700
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1701
        )
1702
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1703
        if google_update_ex is not None:
1✔
1704
            raise google_update_ex
1✔
1705

1706
    def _grant_all_consents_to_c999_users(
1✔
1707
        self, user_projects, user_yaml_project_to_resources
1708
    ):
1709
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1710
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1711
        # are in either)
1712
        all_projects = set(
1✔
1713
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1714
        )
1715

1716
        self.logger.debug(f"all projects: {all_projects}")
1✔
1717

1718
        # construct a mapping from phsid (without consent) to all accessions with consent
1719
        consent_mapping = {}
1✔
1720
        for project in all_projects:
1✔
1721
            phs_match = access_number_matcher.match(project)
1✔
1722
            if phs_match:
1✔
1723
                accession_number = phs_match.groupdict()
1✔
1724

1725
                # TODO: This is not handling the .v1.p1 at all
1726
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1727
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1728
                )
1729
                children = self._get_children(accession_number["phsid"])
1✔
1730
                if children:
1✔
1731
                    for child_phs in children:
1✔
1732
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1733
                            ".".join(
1734
                                [child_phs, accession_number["consent"]]
1735
                            )  # Assign parent consent to child study
1736
                        )
1737

1738
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1739

1740
        # go through existing access and find any c999's and make sure to give access to
1741
        # all accessions with consent for that phsid
1742
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1743
            for project, _ in user_project_info.items():
1✔
1744
                phs_match = access_number_matcher.match(project)
1✔
1745
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1746
                    # give access to all consents
1747
                    all_phsids_with_consent = consent_mapping.get(
1✔
1748
                        phs_match.groupdict()["phsid"], []
1749
                    )
1750
                    self.logger.info(
1✔
1751
                        f"user {username} has c999 consent group for: {project}. "
1752
                        f"Granting access to all consents: {all_phsids_with_consent}"
1753
                    )
1754
                    # NOTE: Only giving read-storage at the moment (this is same
1755
                    #       permission we give for other dbgap projects)
1756
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1757
                        user_projects[username].update(
1✔
1758
                            {phsid_with_consent: {"read-storage", "read"}}
1759
                        )
1760

1761
    def _update_arborist(self, session, user_yaml):
1✔
1762
        """
1763
        Create roles, resources, policies, groups in arborist from the information in
1764
        ``user_yaml``.
1765

1766
        The projects are sent to arborist as resources with paths like
1767
        ``/projects/{project}``. Roles are created with just the original names
1768
        for the privileges like ``"read-storage", "read"`` etc.
1769

1770
        Args:
1771
            session (sqlalchemy.Session)
1772
            user_yaml (UserYAML)
1773

1774
        Return:
1775
            bool: success
1776
        """
1777
        healthy = self._is_arborist_healthy()
1✔
1778
        if not healthy:
1✔
1779
            return False
×
1780

1781
        # Set up the resource tree in arborist by combining provided resources with any
1782
        # dbgap resources that were created before this.
1783
        #
1784
        # Why add dbgap resources if they've already been created?
1785
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1786
        #   resources was created under `/programs/phs000178` anything provided in
1787
        #   user.yaml under `/programs` would completely wipe it out.
1788
        resources = user_yaml.authz.get("resources", [])
1✔
1789

1790
        dbgap_resource_paths = []
1✔
1791
        for path_list in self._dbgap_study_to_resources.values():
1✔
1792
            dbgap_resource_paths.extend(path_list)
1✔
1793

1794
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1795
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1796

1797
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1798
            resources, dbgap_resource_paths
1799
        )
1800

1801
        for resource in combined_resources:
1✔
1802
            try:
1✔
1803
                self.logger.debug(
1✔
1804
                    "attempting to update arborist resource: {}".format(resource)
1805
                )
1806
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1807
            except ArboristError as e:
×
1808
                self.logger.error(e)
×
1809
                # keep going; maybe just some conflicts from things existing already
1810

1811
        # update roles
1812
        roles = user_yaml.authz.get("roles", [])
1✔
1813
        for role in roles:
1✔
1814
            try:
1✔
1815
                response = self.arborist_client.update_role(role["id"], role)
1✔
1816
                if response:
1✔
1817
                    self._created_roles.add(role["id"])
1✔
1818
            except ArboristError as e:
×
1819
                self.logger.info(
×
1820
                    "couldn't update role '{}', creating instead".format(str(e))
1821
                )
1822
                try:
×
1823
                    response = self.arborist_client.create_role(role)
×
1824
                    if response:
×
1825
                        self._created_roles.add(role["id"])
×
1826
                except ArboristError as e:
×
1827
                    self.logger.error(e)
×
1828
                    # keep going; maybe just some conflicts from things existing already
1829

1830
        # update policies
1831
        policies = user_yaml.authz.get("policies", [])
1✔
1832
        for policy in policies:
1✔
1833
            policy_id = policy.pop("id")
1✔
1834
            try:
1✔
1835
                self.logger.debug(
1✔
1836
                    "Trying to upsert policy with id {}".format(policy_id)
1837
                )
1838
                response = self.arborist_client.update_policy(
1✔
1839
                    policy_id, policy, create_if_not_exist=True
1840
                )
1841
            except ArboristError as e:
×
1842
                self.logger.error(e)
×
1843
                # keep going; maybe just some conflicts from things existing already
1844
            else:
1845
                if response:
1✔
1846
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1847
                    self._created_policies.add(policy_id)
1✔
1848

1849
        # update groups
1850
        groups = user_yaml.authz.get("groups", [])
1✔
1851

1852
        # delete from arborist the groups that have been deleted
1853
        # from the user.yaml
1854
        arborist_groups = set(
1✔
1855
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1856
        )
1857
        useryaml_groups = set(g["name"] for g in groups)
1✔
1858
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1859
            # do not try to delete built in groups
1860
            if deleted_group not in ["anonymous", "logged-in"]:
×
1861
                self.arborist_client.delete_group(deleted_group)
×
1862

1863
        # create/update the groups defined in the user.yaml
1864
        for group in groups:
1✔
1865
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1866
            if missing:
×
1867
                name = group.get("name", "{MISSING NAME}")
×
1868
                self.logger.error(
×
1869
                    "group {} missing required field(s): {}".format(name, list(missing))
1870
                )
1871
                continue
×
1872
            try:
×
1873
                response = self.arborist_client.put_group(
×
1874
                    group["name"],
1875
                    # Arborist doesn't handle group descriptions yet
1876
                    # description=group.get("description", ""),
1877
                    users=group["users"],
1878
                    policies=group["policies"],
1879
                )
1880
            except ArboristError as e:
×
1881
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1882

1883
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1884

1885
        # First recreate these groups in order to clear out old, possibly deleted policies
1886
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1887
            try:
1✔
1888
                response = self.arborist_client.put_group(builtin_group)
1✔
1889
            except ArboristError as e:
×
1890
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1891

1892
        # Now add back policies that are in the user.yaml
1893
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1894
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1895

1896
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1897
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1898

1899
        return True
1✔
1900

1901
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1902
        """
1903
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1904
        mfa_policy after revoking all policies.
1905
        """
1906

1907
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1908
            idp, {}
1909
        )
1910

1911
        if not is_mfa_enabled:
1✔
1912
            # TODO This should be a diff, not a revocation of all policies.
1913
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1914
            return
1✔
1915

1916
        policies = []
1✔
1917
        try:
1✔
1918
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1919
            policies = user_data_from_arborist["policies"]
1✔
1920
        except Exception as e:
×
1921
            self.logger.error(
×
1922
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1923
            )
1924
        finally:
1925
            # TODO This should be a diff, not a revocation of all policies.
1926
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1927

1928
        if "mfa_policy" in policies:
1✔
1929
            self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1930

1931
    def _grant_arborist_policies(
1✔
1932
        self, username, incoming_policies, user_yaml, expires=None
1933
    ):
1934
        """
1935
        Find the difference between the existing policies for a user and the incoming policies,
1936
        and decide whether to add, remove, or keep policies.
1937

1938
        Args:
1939
            username (str): the username of the user
1940
            incoming_policies (set): set of policies to be applied to the user
1941
            user_yaml (UserYAML): UserYAML object containing authz information
1942
            expires (int): time at which authz info in Arborist should expire
1943

1944
        Return:
1945
            bool: True if policies were successfully updated, False otherwise
1946
        """
1947
        user_existing_policies = set()
1✔
1948
        to_keep = set()
1✔
1949
        to_add = set()
1✔
1950
        to_remove = set()
1✔
1951
        is_revoke_all = False
1✔
1952

1953
        try:
1✔
1954
            user_existing_policies = set(
1✔
1955
                policy["policy"]
1956
                for policy in self.arborist_client.get_user(username)["policies"]
1957
            )
1958
            self.logger.info(
1✔
1959
                f"Fetched user {username} existing policies: {user_existing_policies}"
1960
            )
1961
        except ArboristError as e:
1✔
1962
            self.logger.error(
1✔
1963
                f"Could not get user {username} policies from Arborist: {e} Revoking all policies..."
1964
            )
1965
            # if getting existing policies fails, revoke all policies and re-apply
1966
            is_revoke_all = True
1✔
1967

1968
        if is_revoke_all is False and len(incoming_policies) > 0:
1✔
1969
            to_keep = incoming_policies & user_existing_policies
1✔
1970
            to_add = incoming_policies - user_existing_policies
1✔
1971
            to_remove = user_existing_policies - incoming_policies
1✔
1972

1973
            if user_yaml:
1✔
1974
                anonymous_policies = set()
1✔
1975
                for policy in to_remove:
1✔
1976
                    if policy in user_yaml.authz.get(
×
1977
                        "anonymous_policies", []
1978
                    ) or policy in user_yaml.authz.get("all_users_policies", []):
1979
                        self.logger.warning(
×
1980
                            f"Policy {policy} is an anonymous policy, not revoking it for user {username}."
1981
                        )
1982
                        anonymous_policies.add(policy)
×
1983
                to_remove -= anonymous_policies
1✔
1984
        else:
1985
            # if incoming_policies is empty, we revoke all policies
1986
            is_revoke_all = True
1✔
1987

1988
        if not is_revoke_all:
1✔
1989
            try:
1✔
1990
                if to_remove:
1✔
1991
                    for policy in to_remove:
1✔
1992
                        self.logger.info(
1✔
1993
                            f"Revoking policy {policy} for user {username}."
1994
                        )
1995
                        self.arborist_client.revoke_user_policy(username, policy)
1✔
1996
            except ArboristError as e:
×
1997
                self.logger.error(
×
1998
                    f"Could not revoke user {username} policy {policy}. Revoking all instead: {e}"
1999
                )
2000
                is_revoke_all = True
×
2001

2002
        if is_revoke_all:
1✔
2003
            try:
1✔
2004
                self.logger.info(f"Revoking all policies for user {username}.")
1✔
2005
                self.arborist_client.revoke_all_policies_for_user(username)
1✔
2006
            except ArboristError as e:
×
2007
                self.logger.error(
×
2008
                    f"Could not revoke all policies for user {username}. Error: {e}"
2009
                )
2010
                return False
×
2011
            to_add = incoming_policies  # if we revoke all, we need to add all incoming policies
1✔
2012

2013
        if (
1✔
2014
            "mfa_policy" not in incoming_policies
2015
            and "mfa_policy" in user_existing_policies
2016
        ):
2017
            to_add.add("mfa_policy")
×
2018

2019
        if to_add:
1✔
2020
            try:
1✔
2021
                self.logger.info(f"Bulk granting user {username} policies {to_add}.")
1✔
2022
                response_json = self.arborist_client.grant_bulk_user_policy(
1✔
2023
                    username, list(to_add), expires
2024
                )
2025
            except ArboristError as e:
×
2026
                self.logger.error(
×
2027
                    f"Could not grant user {username} policies {to_add}. Error: {e}"
2028
                )
2029
                return False
×
2030

2031
        return True
1✔
2032

2033
    def _update_authz_in_arborist(
1✔
2034
        self,
2035
        session,
2036
        user_projects,
2037
        user_yaml=None,
2038
        single_user_sync=False,
2039
        expires=None,
2040
    ):
2041
        """
2042
        Assign users policies in arborist from the information in
2043
        ``user_projects`` and optionally a ``user_yaml``.
2044

2045
        The projects are sent to arborist as resources with paths like
2046
        ``/projects/{project}``. Roles are created with just the original names
2047
        for the privileges like ``"read-storage", "read"`` etc.
2048

2049
        Args:
2050
            user_projects (dict)
2051
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
2052
            single_user_sync (bool) whether authz update is for a single user
2053
            expires (int) time at which authz info in Arborist should expire
2054

2055
        Return:
2056
            bool: success
2057
        """
2058
        healthy = self._is_arborist_healthy()
1✔
2059
        if not healthy:
1✔
2060
            return False
×
2061

2062
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
2063

2064
        if user_yaml:
1✔
2065
            self.logger.debug(
1✔
2066
                "useryaml abac before lowering usernames: {}".format(
2067
                    user_yaml.user_abac
2068
                )
2069
            )
2070
            user_yaml.user_abac = {
1✔
2071
                key.lower(): value for key, value in user_yaml.user_abac.items()
2072
            }
2073
            # update the project info with `projects` specified in user.yaml
2074
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
2075

2076
        # get list of users from arborist to make sure users that are completely removed
2077
        # from authorization sources get policies revoked
2078

2079
        arborist_user_projects = {}
1✔
2080
        if not single_user_sync:
1✔
2081

2082
            try:
1✔
2083
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
2084

2085
                # construct user information, NOTE the lowering of the username. when adding/
2086
                # removing access, the case in the Fence db is used. For combining access, it is
2087
                # case-insensitive, so we lower
2088
                arborist_user_projects = {
1✔
2089
                    user["name"].lower(): {} for user in arborist_users
2090
                }
2091
            except (ArboristError, KeyError, AttributeError) as error:
×
2092
                # TODO usersync should probably exit with non-zero exit code at the end,
2093
                #      but sync should continue from this point so there are no partial
2094
                #      updates
2095
                self.logger.warning(
×
2096
                    "Could not get list of users in Arborist, continuing anyway. "
2097
                    "WARNING: this sync will NOT remove access for users no longer in "
2098
                    f"authorization sources. Error: {error}"
2099
                )
2100

2101
            # update the project info with users from arborist
2102
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
2103

2104
        # prefer in-memory if available from user_yaml, if not, get from database
2105
        if user_yaml and user_yaml.project_to_resource:
1✔
2106
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
2107
            self.logger.debug(
1✔
2108
                f"using in-memory project to authz resource mapping from "
2109
                f"user.yaml (instead of database): {project_to_authz_mapping}"
2110
            )
2111
        else:
2112
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
2113
            self.logger.debug(
1✔
2114
                f"using persisted project to authz resource mapping from database "
2115
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2116
            )
2117

2118
        self.logger.debug(
1✔
2119
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2120
        )
2121
        all_resources = [
1✔
2122
            r
2123
            for resources in self._dbgap_study_to_resources.values()
2124
            for r in resources
2125
        ]
2126
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2127
        self._create_arborist_resources(all_resources)
1✔
2128

2129
        for username, user_project_info in user_projects.items():
1✔
2130
            self.logger.info("processing user `{}`".format(username))
1✔
2131
            user = query_for_user(session=session, username=username)
1✔
2132
            idp = None
1✔
2133
            if user:
1✔
2134
                username = user.username
1✔
2135
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2136

2137
            self.arborist_client.create_user_if_not_exist(username)
1✔
2138

2139
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2140
            # previously mapped each project to the same set of privileges
2141
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2142
            # single policy with ('read', 'read-storage') being the single
2143
            # key
2144
            unique_policies = self._determine_unique_policies(
1✔
2145
                user_project_info, project_to_authz_mapping
2146
            )
2147
            for roles in unique_policies.keys():
1✔
2148
                for role in roles:
1✔
2149
                    self._create_arborist_role(role)
1✔
2150

2151
            incoming_policies = set()  # set of policies for current user.
1✔
2152

2153
            if single_user_sync:
1✔
2154
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2155
                    policy_hash = self._hash_policy_contents(
1✔
2156
                        ordered_roles, ordered_resources
2157
                    )
2158
                    self._create_arborist_policy(
1✔
2159
                        policy_hash,
2160
                        ordered_roles,
2161
                        ordered_resources,
2162
                        skip_if_exists=True,
2163
                    )
2164
                    # return here as it is not expected single_user_sync
2165
                    # will need any of the remaining user_yaml operations
2166
                    # left in _update_authz_in_arborist
2167
                    return self._grant_arborist_policy(
1✔
2168
                        username, policy_hash, expires=expires
2169
                    )
2170
            else:
2171
                policy_ids_to_grant = set()
1✔
2172
                for roles, resources in unique_policies.items():
1✔
2173
                    for role in roles:
1✔
2174
                        for resource in resources:
1✔
2175
                            # grant a policy to this user which is a single
2176
                            # role on a single resource
2177

2178
                            # format project '/x/y/z' -> 'x.y.z'
2179
                            # so the policy id will be something like 'x.y.z-create'
2180
                            policy_id = _format_policy_id(resource, role)
1✔
2181
                            incoming_policies.add(policy_id)
1✔
2182
                            if policy_id not in self._created_policies:
1✔
2183
                                try:
1✔
2184
                                    self.arborist_client.update_policy(
1✔
2185
                                        policy_id,
2186
                                        {
2187
                                            "description": "policy created by fence sync",
2188
                                            "role_ids": [role],
2189
                                            "resource_paths": [resource],
2190
                                        },
2191
                                        create_if_not_exist=True,
2192
                                    )
2193
                                except ArboristError as e:
×
2194
                                    self.logger.info(
×
2195
                                        "not creating policy in arborist; {}".format(
2196
                                            str(e)
2197
                                        )
2198
                                    )
2199
                                self._created_policies.add(policy_id)
1✔
2200
                            policy_ids_to_grant.add(policy_id)
1✔
2201
                self._grant_bulk_user_policies(
1✔
2202
                    username, policy_ids_to_grant, expires=expires
2203
                )
2204

2205
            if user_yaml:
1✔
2206
                user_yaml_policies = set(user_yaml.policies.get(username, []))
1✔
2207
                incoming_policies = (
1✔
2208
                    incoming_policies | user_yaml_policies
2209
                )  # add policies from whitelist and useryaml
2210

2211
            self._grant_arborist_policies(
1✔
2212
                username, incoming_policies, user_yaml, expires=expires
2213
            )
2214

2215
        if user_yaml:
1✔
2216
            for client_name, client_details in user_yaml.clients.items():
1✔
2217
                client_policies = client_details.get("policies", [])
×
2218
                clients = session.query(Client).filter_by(name=client_name).all()
×
2219
                # update existing clients, do not create new ones
2220
                if not clients:
×
2221
                    self.logger.warning(
×
2222
                        "client to update (`{}`) does not exist in fence: skipping".format(
2223
                            client_name
2224
                        )
2225
                    )
2226
                    continue
×
2227
                self.logger.debug(
×
2228
                    "updating client `{}` (found {} client IDs)".format(
2229
                        client_name, len(clients)
2230
                    )
2231
                )
2232
                # there may be more than 1 client with this name if credentials are being rotated,
2233
                # so we grant access to each client ID
2234
                for client in clients:
×
2235
                    try:
×
2236
                        self.arborist_client.update_client(
×
2237
                            client.client_id, client_policies
2238
                        )
2239
                    except ArboristError as e:
×
2240
                        self.logger.info(
×
2241
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2242
                                client_policies, client_name, client.client_id, str(e)
2243
                            )
2244
                        )
2245

2246
        return True
1✔
2247

2248
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2249
        """
2250
        Determine and return a dictionary of unique policies.
2251

2252
        Args (examples):
2253
            user_project_info (dict):
2254
            {
2255
                'phs000002.c1': { 'read-storage', 'read' },
2256
                'phs000001.c1': { 'read', 'read-storage' },
2257
                'phs000004.c1': { 'write', 'read' },
2258
                'phs000003.c1': { 'read', 'write' },
2259
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2260
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2261
            }
2262
            project_to_authz_mapping (dict):
2263
            {
2264
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2265
            }
2266

2267
        Return (for examples):
2268
            dict:
2269
            {
2270
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2271
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2272
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2273
            }
2274
        """
2275
        roles_to_resources = collections.defaultdict(list)
1✔
2276
        for study, roles in user_project_info.items():
1✔
2277
            ordered_roles = tuple(sorted(roles))
1✔
2278
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2279
            if study in project_to_authz_mapping:
1✔
2280
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2281
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2282

2283
        policies = {}
1✔
2284
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2285
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2286
        return policies
1✔
2287

2288
    def _create_arborist_role(self, role):
1✔
2289
        """
2290
        Wrapper around gen3authz's create_role with additional logging
2291

2292
        Args:
2293
            role (str): what the Arborist identity should be of the created role
2294

2295
        Return:
2296
            bool: True if the role was created successfully or it already
2297
                  exists. False otherwise
2298
        """
2299
        if role in self._created_roles:
1✔
2300
            return True
1✔
2301
        try:
1✔
2302
            response_json = self.arborist_client.create_role(
1✔
2303
                arborist_role_for_permission(role)
2304
            )
2305
        except ArboristError as e:
×
2306
            self.logger.error(
×
2307
                "could not create `{}` role in Arborist: {}".format(role, e)
2308
            )
2309
            return False
×
2310
        self._created_roles.add(role)
1✔
2311

2312
        if response_json is None:
1✔
2313
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2314
        else:
2315
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2316
        return True
1✔
2317

2318
    def _create_arborist_resources(self, resources):
1✔
2319
        """
2320
        Create resources in Arborist
2321

2322
        Args:
2323
            resources (list): a list of full Arborist resource paths to create
2324
            [
2325
                "/programs/DEV/projects/phs000001.c1",
2326
                "/programs/DEV/projects/phs000002.c1",
2327
                "/programs/DEV/projects/phs000003.c1"
2328
            ]
2329

2330
        Return:
2331
            bool: True if the resources were successfully created, False otherwise
2332

2333

2334
        As of 2/11/2022, for resources above,
2335
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2336
        [
2337
            { 'name': 'programs', 'subresources': [
2338
                { 'name': 'DEV', 'subresources': [
2339
                    { 'name': 'projects', 'subresources': [
2340
                        { 'name': 'phs000001.c1', 'subresources': []},
2341
                        { 'name': 'phs000002.c1', 'subresources': []},
2342
                        { 'name': 'phs000003.c1', 'subresources': []}
2343
                    ]}
2344
                ]}
2345
            ]}
2346
        ]
2347
        Because this list has a single object, only a single network request gets
2348
        sent to Arborist.
2349

2350
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2351
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2352
        [
2353
            {'name': 'phs000001.c1', 'subresources': []},
2354
            {'name': 'phs000002.c1', 'subresources': []},
2355
            {'name': 'phs000003.c1', 'subresources': []}
2356
        ]
2357
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2358

2359
        As a practical matter, for sync_single_user_visas, studies
2360
        should be nested under the `/programs` resource as in the former
2361
        example (i.e. only one network request gets made).
2362

2363
        TODO for the sake of simplicity, it would be nice if only one network
2364
        request was made no matter the input.
2365
        """
2366
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2367
            try:
1✔
2368
                response_json = self.arborist_client.update_resource(
1✔
2369
                    "/", request_body, merge=True
2370
                )
2371
            except ArboristError as e:
×
2372
                self.logger.error(
×
2373
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2374
                        request_body, e
2375
                    )
2376
                )
2377
                return False
×
2378

2379
        self.logger.debug(
1✔
2380
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2381
        )
2382
        return True
1✔
2383

2384
    def _create_arborist_policy(
1✔
2385
        self, policy_id, roles, resources, skip_if_exists=False
2386
    ):
2387
        """
2388
        Wrapper around gen3authz's create_policy with additional logging
2389

2390
        Args:
2391
            policy_id (str): what the Arborist identity should be of the created policy
2392
            roles (iterable): what roles the create policy should have
2393
            resources (iterable): what resources the created policy should have
2394
            skip_if_exists (bool): if True, this function will not treat an already
2395
                                   existent policy as an error
2396

2397
        Return:
2398
            bool: True if policy creation was successful. False otherwise
2399
        """
2400
        try:
1✔
2401
            response_json = self.arborist_client.create_policy(
1✔
2402
                {
2403
                    "id": policy_id,
2404
                    "role_ids": roles,
2405
                    "resource_paths": resources,
2406
                },
2407
                skip_if_exists=skip_if_exists,
2408
            )
2409
        except ArboristError as e:
×
2410
            self.logger.error(
×
2411
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2412
            )
2413
            return False
×
2414

2415
        if response_json is None:
1✔
2416
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2417
        else:
2418
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2419
        return True
1✔
2420

2421
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2422
        """
2423
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2424

2425
        Args:
2426
            ordered_roles (iterable): policy roles in sorted order
2427
            ordered_resources (iterable): policy resources in sorted order
2428

2429
        Return:
2430
            str: SHA256 hex digest
2431
        """
2432

2433
        def escape(s):
1✔
2434
            return s.replace(",", "\,")
1✔
2435

2436
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2437
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2438
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2439
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2440

2441
        return policy_hash
1✔
2442

2443
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2444
        """
2445
        Wrapper around gen3authz's grant_user_policy with additional logging
2446

2447
        Args:
2448
            username (str): username of user in Arborist who policy should be
2449
                            granted to
2450
            policy_id (str): Arborist policy id
2451
            expires (int): POSIX timestamp for when policy should expire
2452

2453
        Return:
2454
            bool: True if granting of policy was successful, False otherwise
2455
        """
2456
        try:
1✔
2457
            response_json = self.arborist_client.grant_user_policy(
1✔
2458
                username,
2459
                policy_id,
2460
                expires_at=expires,
2461
            )
2462
        except ArboristError as e:
×
2463
            self.logger.error(
×
2464
                "could not grant policy `{}` to user `{}`: {}".format(
2465
                    policy_id, username, e
2466
                )
2467
            )
2468
            return False
×
2469

2470
        self.logger.debug(
1✔
2471
            "granted policy `{}` to user `{}`".format(policy_id, username)
2472
        )
2473
        return True
1✔
2474

2475
    def _grant_bulk_user_policies(self, username, policy_ids, expires=None):
1✔
2476
        """
2477
        Wrapper around gen3authz's grant_user_policies with additional logging
2478

2479
        Args:
2480
            username (str): username of user in Arborist who policy should be
2481
                            granted to
2482
            policy_ids (set[str]): Arborist policy ids
2483

2484
        Return:
2485
            bool: True if granting of policies was successful, False otherwise
2486
        """
2487
        try:
1✔
2488
            response_json = self.arborist_client.grant_bulk_user_policy(
1✔
2489
                username, policy_ids, expires
2490
            )
2491
        except ArboristError as e:
×
2492
            self.logger.error(
×
2493
                "could not grant bulk policies  to user `{}`: {}".format(username, e)
2494
            )
2495
            return False
×
2496
        return True
1✔
2497

2498
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2499
        """
2500
        Determine the arborist resource path and add it to
2501
        _self._dbgap_study_to_resources
2502

2503
        Args:
2504
            dbgap_study (str): study phs identifier
2505
            dbgap_config (dict): dictionary of config for dbgap server
2506

2507
        """
2508
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2509
            "_default", ["/"]
2510
        )
2511
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2512
            dbgap_study, default_namespaces
2513
        )
2514

2515
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2516

2517
        arborist_resource_namespaces = [
1✔
2518
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2519
        ]
2520

2521
        for resource_namespace in arborist_resource_namespaces:
1✔
2522
            full_resource_path = resource_namespace + dbgap_study
1✔
2523
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2524
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2525
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2526
        return arborist_resource_namespaces
1✔
2527

2528
    def _is_arborist_healthy(self):
1✔
2529
        if not self.arborist_client:
1✔
2530
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2531
            return False
×
2532
        if not self.arborist_client.healthy():
1✔
2533
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2534
            self.logger.error(
×
2535
                "arborist service is unavailable; skipping main arborist dbgap sync"
2536
            )
2537
            return False
×
2538
        return True
1✔
2539

2540
    def _pick_sync_type(self, visa):
1✔
2541
        """
2542
        Pick type of visa to parse according to the visa provider
2543
        """
2544
        sync_client = None
1✔
2545
        if visa.type in self.visa_types["ras"]:
1✔
2546
            sync_client = self.ras_sync_client
1✔
2547
        else:
2548
            raise Exception(
×
2549
                "Visa type {} not recognized. Configure in fence-config".format(
2550
                    visa.type
2551
                )
2552
            )
2553
        if not sync_client:
1✔
2554
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2555

2556
        return sync_client
1✔
2557

2558
    def sync_single_user_visas(
1✔
2559
        self, user, ga4gh_visas, sess=None, expires=None, skip_google_updates=False
2560
    ):
2561
        """
2562
        Sync a single user's visas during login or DRS/data access
2563

2564
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2565
                        BEFORE THIS.
2566

2567
        Args:
2568
            user (userdatamodel.user.User): Fence user whose visas'
2569
                                            authz info is being synced
2570
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2571
                                that are ALREADY VALIDATED
2572
            sess (sqlalchemy.orm.session.Session): database session
2573
            expires (int): time at which synced Arborist policies and
2574
                           inclusion in any GBAG are set to expire
2575
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
2576

2577
        Return:
2578
            list of successfully parsed visas
2579
        """
2580
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2581
        dbgap_config = self.dbGaP[0]
1✔
2582
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2583
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2584
            "enable_common_exchange_area_access", False
2585
        )
2586
        study_common_exchange_areas = dbgap_config.get(
1✔
2587
            "study_common_exchange_areas", {}
2588
        )
2589

2590
        try:
1✔
2591
            user_yaml = UserYAML.from_file(
1✔
2592
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2593
            )
2594
        except (EnvironmentError, AssertionError) as e:
×
2595
            self.logger.error(str(e))
×
2596
            self.logger.error("aborting early")
×
2597
            raise
×
2598

2599
        user_projects = dict()
1✔
2600
        projects = {}
1✔
2601
        info = {}
1✔
2602
        parsed_visas = []
1✔
2603

2604
        for visa in ga4gh_visas:
1✔
2605
            project = {}
1✔
2606
            visa_type = self._pick_sync_type(visa)
1✔
2607
            encoded_visa = visa.ga4gh_visa
1✔
2608

2609
            try:
1✔
2610
                project, info = visa_type._parse_single_visa(
1✔
2611
                    user,
2612
                    encoded_visa,
2613
                    visa.expires,
2614
                    parse_consent_code,
2615
                )
2616
            except Exception:
×
2617
                self.logger.warning(
×
2618
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2619
                )
2620
                continue
×
2621

2622
            projects = {**projects, **project}
1✔
2623
            parsed_visas.append(visa)
1✔
2624

2625
        info["user_id"] = user.id
1✔
2626
        info["username"] = user.username
1✔
2627
        user_projects[user.username] = projects
1✔
2628

2629
        user_projects = self.parse_projects(user_projects)
1✔
2630

2631
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2632
            self.logger.info(
1✔
2633
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2634
            )
2635

2636
        self._process_user_projects(
1✔
2637
            user_projects,
2638
            enable_common_exchange_area_access,
2639
            study_common_exchange_areas,
2640
            dbgap_config,
2641
            sess,
2642
        )
2643

2644
        if parse_consent_code:
1✔
2645
            self._grant_all_consents_to_c999_users(
1✔
2646
                user_projects, user_yaml.project_to_resource
2647
            )
2648

2649
        if user_projects:
1✔
2650
            self.sync_to_storage_backend(
1✔
2651
                user_projects,
2652
                info,
2653
                sess,
2654
                expires=expires,
2655
                skip_google_updates=skip_google_updates,
2656
            )
2657
        else:
2658
            self.logger.info("No users for syncing")
×
2659

2660
        # update arborist db (user access)
2661
        if self.arborist_client:
1✔
2662
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2663
            success = self._update_authz_in_arborist(
1✔
2664
                sess,
2665
                user_projects,
2666
                user_yaml=user_yaml,
2667
                single_user_sync=True,
2668
                expires=expires,
2669
            )
2670
            if success:
1✔
2671
                self.logger.info(
1✔
2672
                    "Finished synchronizing authorization info to arborist"
2673
                )
2674
            else:
2675
                self.logger.error(
×
2676
                    "Could not synchronize authorization info successfully to arborist"
2677
                )
2678
        else:
2679
            self.logger.error("No arborist client set; skipping arborist sync")
×
2680

2681
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc