• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 15540989374

09 Jun 2025 05:49PM UTC coverage: 74.809% (-0.02%) from 74.827%
15540989374

push

github

BinamB
check for anon policies

8116 of 10849 relevant lines covered (74.81%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.36
fence/sync/sync_users.py
1
import backoff
1✔
2
import glob
1✔
3
import jwt
1✔
4
import os
1✔
5
import re
1✔
6
import subprocess as sp
1✔
7
import yaml
1✔
8
import copy
1✔
9
import datetime
1✔
10
import uuid
1✔
11
import collections
1✔
12
import hashlib
1✔
13

14
from contextlib import contextmanager
1✔
15
from collections import defaultdict
1✔
16
from csv import DictReader
1✔
17
from io import StringIO
1✔
18
from stat import S_ISDIR
1✔
19

20
import paramiko
1✔
21
from cdislogging import get_logger
1✔
22
from email_validator import validate_email, EmailNotValidError
1✔
23
from gen3authz.client.arborist.errors import ArboristError
1✔
24
from gen3users.validation import validate_user_yaml
1✔
25
from paramiko.proxy import ProxyCommand
1✔
26
from sqlalchemy.exc import IntegrityError
1✔
27
from sqlalchemy import func
1✔
28

29
from fence.config import config
1✔
30
from fence.models import (
1✔
31
    AccessPrivilege,
32
    AuthorizationProvider,
33
    Project,
34
    Tag,
35
    User,
36
    query_for_user,
37
    Client,
38
    IdentityProvider,
39
    get_project_to_authz_mapping,
40
)
41
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
42
from fence.resources.storage import StorageManager
1✔
43
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
44
from fence.resources.google.access_utils import GoogleUpdateException
1✔
45
from fence.sync import utils
1✔
46
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
47
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
48

49

50
def _format_policy_id(path, privilege):
1✔
51
    resource = ".".join(name for name in path.split("/") if name)
1✔
52
    return "{}-{}".format(resource, privilege)
1✔
53

54

55
def download_dir(sftp, remote_dir, local_dir):
1✔
56
    """
57
    Recursively download file from remote_dir to local_dir
58
    Args:
59
        remote_dir(str)
60
        local_dir(str)
61
    Returns: None
62
    """
63
    dir_items = sftp.listdir_attr(remote_dir)
×
64

65
    for item in dir_items:
×
66
        remote_path = remote_dir + "/" + item.filename
×
67
        local_path = os.path.join(local_dir, item.filename)
×
68
        if S_ISDIR(item.st_mode):
×
69
            download_dir(sftp, remote_path, local_path)
×
70
        else:
71
            sftp.get(remote_path, local_path)
×
72

73

74
def arborist_role_for_permission(permission):
1✔
75
    """
76
    For the programs/projects in the existing fence access control model, in order to
77
    use arborist for checking permissions we generate a policy for each combination of
78
    program/project and privilege. The roles involved all contain only one permission,
79
    for one privilege from the project access model.
80
    """
81
    return {
1✔
82
        "id": permission,
83
        "permissions": [
84
            {"id": permission, "action": {"service": "*", "method": permission}}
85
        ],
86
    }
87

88

89
@contextmanager
1✔
90
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
91
    """
92
    Context manager for reading and optionally decrypting file it only
93
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
94

95
    Args:
96
        filepath (str): path to the file
97
        encrypted (bool): whether the file is encrypted
98

99
    Returns:
100
        Generator[file-like class]: file like object for the file
101
    """
102
    if encrypted:
1✔
103
        p = sp.Popen(
×
104
            [
105
                "ccdecrypt",
106
                "-u",
107
                "-K",
108
                key,
109
                filepath,
110
            ],
111
            stdout=sp.PIPE,
112
            stderr=open(os.devnull, "w"),
113
            universal_newlines=True,
114
        )
115
        try:
×
116
            yield StringIO(p.communicate()[0])
×
117
        except UnicodeDecodeError:
×
118
            logger.error("Could not decode file. Check the decryption key.")
×
119
    else:
120
        f = open(filepath, "r")
1✔
121
        yield f
1✔
122
        f.close()
1✔
123

124

125
class UserYAML(object):
1✔
126
    """
127
    Representation of the information in a YAML file describing user, project, and ABAC
128
    information for access control.
129
    """
130

131
    def __init__(
1✔
132
        self,
133
        projects=None,
134
        user_info=None,
135
        policies=None,
136
        clients=None,
137
        authz=None,
138
        project_to_resource=None,
139
        logger=None,
140
        user_abac=None,
141
    ):
142
        self.projects = projects or {}
1✔
143
        self.user_info = user_info or {}
1✔
144
        self.user_abac = user_abac or {}
1✔
145
        self.policies = policies or {}
1✔
146
        self.clients = clients or {}
1✔
147
        self.authz = authz or {}
1✔
148
        self.project_to_resource = project_to_resource or {}
1✔
149
        self.logger = logger
1✔
150

151
    @classmethod
1✔
152
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
153
        """
154
        Add access by "auth_id" to "self.projects" to update the Fence DB.
155
        Add access by "resource" to "self.user_abac" to update Arborist.
156
        """
157
        data = {}
1✔
158
        if filepath:
1✔
159
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
160
                file_contents = f.read()
1✔
161
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
162
                data = yaml.safe_load(file_contents)
1✔
163
        else:
164
            if logger:
1✔
165
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
166

167
        projects = dict()
1✔
168
        user_info = dict()
1✔
169
        policies = dict()
1✔
170

171
        # resources should be the resource tree to construct in arborist
172
        user_abac = dict()
1✔
173

174
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
175
        if not data.get("authz") and data.get("rbac"):
1✔
176
            if logger:
×
177
                logger.info(
×
178
                    "No authz block found but rbac block present. Using rbac block"
179
                )
180
            data["authz"] = data["rbac"]
×
181

182
        # get user project mapping to arborist resources if it exists
183
        project_to_resource = data.get("authz", dict()).get(
1✔
184
            "user_project_to_resource", dict()
185
        )
186

187
        # read projects and privileges for each user
188
        users = data.get("users", {})
1✔
189
        for username, details in users.items():
1✔
190
            # users should occur only once each; skip if already processed
191
            if username in projects:
1✔
192
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
193
                    username
194
                )
195
                if logger:
×
196
                    logger.error(msg)
×
197
                raise EnvironmentError(msg)
×
198

199
            privileges = {}
1✔
200
            resource_permissions = dict()
1✔
201
            for project in details.get("projects", {}):
1✔
202
                try:
1✔
203
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
204
                except KeyError as e:
×
205
                    if logger:
×
206
                        logger.error("project {} missing field: {}".format(project, e))
×
207
                    continue
×
208

209
                # project may not have `resource` field.
210
                # prefer resource field;
211
                # if no resource or mapping, assume auth_id is resource.
212
                resource = project.get("resource", project["auth_id"])
1✔
213

214
                if project["auth_id"] not in project_to_resource:
1✔
215
                    project_to_resource[project["auth_id"]] = resource
1✔
216
                resource_permissions[resource] = set(project["privilege"])
1✔
217

218
            user_info[username] = {
1✔
219
                "email": details.get("email", ""),
220
                "display_name": details.get("display_name", ""),
221
                "phone_number": details.get("phone_number", ""),
222
                "tags": details.get("tags", {}),
223
                "admin": details.get("admin", False),
224
            }
225
            if not details.get("email"):
1✔
226
                try:
1✔
227
                    valid = validate_email(
1✔
228
                        username, allow_smtputf8=False, check_deliverability=False
229
                    )
230
                    user_info[username]["email"] = valid.email
1✔
231
                except EmailNotValidError:
1✔
232
                    pass
1✔
233
            projects[username] = privileges
1✔
234
            user_abac[username] = resource_permissions
1✔
235

236
            # list of policies we want to grant to this user, which get sent to arborist
237
            # to check if they're allowed to do certain things
238
            policies[username] = details.get("policies", [])
1✔
239

240
        if logger:
1✔
241
            logger.info(
1✔
242
                "Got user project to arborist resource mapping:\n{}".format(
243
                    str(project_to_resource)
244
                )
245
            )
246

247
        authz = data.get("authz", dict())
1✔
248
        if not authz:
1✔
249
            # older version: resources in root, no `authz` section or `rbac` section
250
            if logger:
1✔
251
                logger.warning(
1✔
252
                    "access control YAML file is using old format (missing `authz`/`rbac`"
253
                    " section in the root); assuming that if it exists `resources` will"
254
                    " be on the root level, and continuing"
255
                )
256
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
257
            # the code can pretend it's in the normal place that we expect
258
            resources = data.get("resources", [])
1✔
259
            # keep authz empty dict if resources is not specified
260
            if resources:
1✔
261
                authz["resources"] = data.get("resources", [])
×
262

263
        clients = data.get("clients", {})
1✔
264

265
        return cls(
1✔
266
            projects=projects,
267
            user_info=user_info,
268
            user_abac=user_abac,
269
            policies=policies,
270
            clients=clients,
271
            authz=authz,
272
            project_to_resource=project_to_resource,
273
            logger=logger,
274
        )
275

276
    def persist_project_to_resource(self, db_session):
1✔
277
        """
278
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
279

280
        The mapping comes from an external source, this function persists what was parsed
281
        into memory into the database for future use.
282
        """
283
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
284
            project = (
1✔
285
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
286
            )
287
            if project:
1✔
288
                project.authz = authz_resource
1✔
289
            else:
290
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
291
                db_session.add(project)
×
292
        db_session.commit()
1✔
293

294

295
class UserSyncer(object):
1✔
296
    def __init__(
1✔
297
        self,
298
        dbGaP,
299
        DB,
300
        project_mapping,
301
        storage_credentials=None,
302
        db_session=None,
303
        is_sync_from_dbgap_server=False,
304
        sync_from_local_csv_dir=None,
305
        sync_from_local_yaml_file=None,
306
        arborist=None,
307
        folder=None,
308
    ):
309
        """
310
        Syncs ACL files from dbGap to auth database and storage backends
311
        Args:
312
            dbGaP: a list of dict containing creds to access dbgap sftp
313
            DB: database connection string
314
            project_mapping: a dict containing how dbgap ids map to projects
315
            storage_credentials: a dict containing creds for storage backends
316
            sync_from_dir: path to an alternative dir to sync from instead of
317
                           dbGaP
318
            arborist:
319
                ArboristClient instance if the syncer should also create
320
                resources in arborist
321
            folder: a local folder where dbgap telemetry files will sync to
322
        """
323
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
324
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
325
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
326
        self.dbGaP = dbGaP
1✔
327
        self.session = db_session
1✔
328
        self.driver = get_SQLAlchemyDriver(DB)
1✔
329
        self.project_mapping = project_mapping or {}
1✔
330
        self._projects = dict()
1✔
331
        self._created_roles = set()
1✔
332
        self._created_policies = set()
1✔
333
        self._dbgap_study_to_resources = dict()
1✔
334
        self.logger = get_logger(
1✔
335
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
336
        )
337
        self.arborist_client = arborist
1✔
338
        self.folder = folder
1✔
339

340
        self.auth_source = defaultdict(set)
1✔
341
        # auth_source used for logging. username : [source1, source2]
342
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
343
        self.parent_to_child_studies_mapping = {}
1✔
344
        for dbgap_config in dbGaP:
1✔
345
            self.parent_to_child_studies_mapping.update(
1✔
346
                dbgap_config.get("parent_to_child_studies_mapping", {})
347
            )
348
        if storage_credentials:
1✔
349
            self.storage_manager = StorageManager(
1✔
350
                storage_credentials, logger=self.logger
351
            )
352
        self.id_patterns = []
1✔
353

354
    @staticmethod
1✔
355
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
356
        """
357
        Check if the filename matches dbgap access control file pattern
358

359
        Args:
360
            filepath (str): path to file
361
            encrypted (bool): whether the file is encrypted
362

363
        Returns:
364
            bool: whether the pattern matches
365
        """
366
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
367
        for pattern in id_patterns:
1✔
368
            if encrypted:
1✔
369
                pattern += r".enc"
×
370
            pattern += r"$"
1✔
371
            # when converting the YAML from fence-config,
372
            # python reads it as Python string literal. So "\" turns into "\\"
373
            # which messes with the regex match
374
            pattern.replace("\\\\", "\\")
1✔
375
            if re.match(pattern, os.path.basename(filepath)):
1✔
376
                return True
1✔
377
        return False
1✔
378

379
    def _get_from_sftp_with_proxy(self, server, path):
1✔
380
        """
381
        Download all data from sftp sever to a local dir
382

383
        Args:
384
            server (dict) : dictionary containing info to access sftp server
385
            path (str): path to local directory
386

387
        Returns:
388
            None
389
        """
390
        proxy = None
1✔
391
        if server.get("proxy", "") != "":
1✔
392
            command = "ssh -oHostKeyAlgorithms=+ssh-rsa -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
393
                user=server.get("proxy_user", ""),
394
                proxy=server.get("proxy", ""),
395
                host=server.get("host", ""),
396
                port=server.get("port", 22),
397
            )
398
            self.logger.info("SSH proxy command: {}".format(command))
×
399

400
            proxy = ProxyCommand(command)
×
401

402
        with paramiko.SSHClient() as client:
1✔
403
            client.set_log_channel(self.logger.name)
1✔
404

405
            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
1✔
406
            parameters = {
1✔
407
                "hostname": str(server.get("host", "")),
408
                "username": str(server.get("username", "")),
409
                "password": str(server.get("password", "")),
410
                "port": int(server.get("port", 22)),
411
            }
412
            if proxy:
1✔
413
                parameters["sock"] = proxy
×
414

415
            self.logger.info(
1✔
416
                "SSH connection hostname:post {}:{}".format(
417
                    parameters.get("hostname", "unknown"),
418
                    parameters.get("port", "unknown"),
419
                )
420
            )
421
            self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
422
            with client.open_sftp() as sftp:
×
423
                download_dir(sftp, "./", path)
1✔
424

425
        if proxy:
×
426
            proxy.close()
×
427

428
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
429
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
430
        ssh_client.connect(**parameters)
1✔
431

432
    def _get_from_ftp_with_proxy(self, server, path):
1✔
433
        """
434
        Download data from ftp sever to a local dir
435

436
        Args:
437
            server (dict): dictionary containing information for accessing server
438
            path(str): path to local files
439

440
        Returns:
441
            None
442
        """
443
        execstr = (
×
444
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
445
                server.get("username", ""),
446
                server.get("password", ""),
447
                server.get("host", ""),
448
                server.get("proxy", ""),
449
                path,
450
            )
451
        )
452
        os.system(execstr)
×
453

454
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
455
        return dbgap_config.get(
1✔
456
            "parse_consent_code", True
457
        )  # Should this really be true?
458

459
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
460
        """
461
        parse csv files to python dict
462

463
        Args:
464
            file_dict: a dictionary with key(file path) and value(privileges)
465
            sess: sqlalchemy session
466
            dbgap_config: a dictionary containing information about the dbGaP sftp server
467
                (comes from fence config)
468
            encrypted: boolean indicating whether those files are encrypted
469

470

471
        Return:
472
            Tuple[[dict, dict]]:
473
                (user_project, user_info) where user_project is a mapping from
474
                usernames to project permissions and user_info is a mapping
475
                from usernames to user details, such as email
476

477
        Example:
478

479
            (
480
                {
481
                    username: {
482
                        'project1': {'read-storage','write-storage'},
483
                        'project2': {'read-storage'},
484
                    }
485
                },
486
                {
487
                    username: {
488
                        'email': 'email@mail.com',
489
                        'display_name': 'display name',
490
                        'phone_number': '123-456-789',
491
                        'tags': {'dbgap_role': 'PI'}
492
                    }
493
                },
494
            )
495

496
        """
497
        user_projects = dict()
1✔
498
        user_info = defaultdict(dict)
1✔
499

500
        # parse dbGaP sftp server information
501
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
502

503
        self.id_patterns += (
1✔
504
            [
505
                item.replace("\\\\", "\\")
506
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
507
            ]
508
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
509
            else []
510
        )
511

512
        enable_common_exchange_area_access = dbgap_config.get(
1✔
513
            "enable_common_exchange_area_access", False
514
        )
515
        study_common_exchange_areas = dbgap_config.get(
1✔
516
            "study_common_exchange_areas", {}
517
        )
518
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
519

520
        if parse_consent_code and enable_common_exchange_area_access:
1✔
521
            self.logger.info(
1✔
522
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
523
            )
524

525
        project_id_patterns = [r"phs(\d{6})"]
1✔
526
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
527
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
528
            patterns = [
1✔
529
                pattern.replace("\\\\", "\\") for pattern in patterns
530
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
531
            project_id_patterns += patterns
1✔
532

533
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
534
        for filepath, privileges in file_dict.items():
1✔
535
            self.logger.info("Reading file {}".format(filepath))
1✔
536
            if os.stat(filepath).st_size == 0:
1✔
537
                self.logger.warning("Empty file {}".format(filepath))
×
538
                continue
×
539
            if not self._match_pattern(
1✔
540
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
541
            ):
542
                self.logger.warning(
1✔
543
                    "Filename {} does not match dbgap access control filename pattern;"
544
                    " this could mean that the filename has an invalid format, or has"
545
                    " an unexpected .enc extension, or lacks the .enc extension where"
546
                    " expected. This file is NOT being processed by usersync!".format(
547
                        filepath
548
                    )
549
                )
550
                continue
1✔
551

552
            with _read_file(
1✔
553
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
554
            ) as f:
555
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
556

557
                for row in csv:
1✔
558
                    username = row.get("login") or ""
1✔
559
                    if username == "":
1✔
560
                        continue
×
561

562
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
563
                        phsid = (
1✔
564
                            row.get("phsid") or (row.get("project_id") or "")
565
                        ).split(".")
566
                    else:
567
                        phsid = (row.get("phsid") or "").split(".")
1✔
568

569
                    dbgap_project = phsid[0]
1✔
570
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
571
                    skip = False
1✔
572
                    for pattern in project_id_patterns:
1✔
573
                        self.logger.debug(
1✔
574
                            "Checking pattern:{} with project_id:{}".format(
575
                                pattern, dbgap_project
576
                            )
577
                        )
578
                        if re.match(pattern, dbgap_project):
1✔
579
                            skip = False
1✔
580
                            break
1✔
581
                        else:
582
                            skip = True
1✔
583
                    if skip:
1✔
584
                        self.logger.warning(
1✔
585
                            "Skip processing from file {}, user {} with project {}".format(
586
                                filepath,
587
                                username,
588
                                dbgap_project,
589
                            )
590
                        )
591
                        continue
1✔
592
                    if len(phsid) > 1 and parse_consent_code:
1✔
593
                        consent_code = phsid[-1]
1✔
594

595
                        # c999 indicates full access to all consents and access
596
                        # to a study-specific exchange area
597
                        # access to at least one study-specific exchange area implies access
598
                        # to the parent study's common exchange area
599
                        #
600
                        # NOTE: Handling giving access to all consents is done at
601
                        #       a later time, when we have full information about possible
602
                        #       consents
603
                        self.logger.debug(
1✔
604
                            f"got consent code {consent_code} from dbGaP project "
605
                            f"{dbgap_project}"
606
                        )
607
                        if (
1✔
608
                            consent_code == "c999"
609
                            and enable_common_exchange_area_access
610
                            and dbgap_project in study_common_exchange_areas
611
                        ):
612
                            self.logger.info(
1✔
613
                                "found study with consent c999 and Fence "
614
                                "is configured to parse exchange area data. Giving user "
615
                                f"{username} {privileges} privileges in project: "
616
                                f"{study_common_exchange_areas[dbgap_project]}."
617
                            )
618
                            self._add_dbgap_project_for_user(
1✔
619
                                study_common_exchange_areas[dbgap_project],
620
                                privileges,
621
                                username,
622
                                sess,
623
                                user_projects,
624
                                dbgap_config,
625
                            )
626

627
                        dbgap_project += "." + consent_code
1✔
628

629
                    self._add_children_for_dbgap_project(
1✔
630
                        dbgap_project,
631
                        privileges,
632
                        username,
633
                        sess,
634
                        user_projects,
635
                        dbgap_config,
636
                    )
637

638
                    display_name = row.get("user name") or ""
1✔
639
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
640

641
                    # some dbgap telemetry files have information about a researchers PI
642
                    if "downloader for" in row:
1✔
643
                        tags["pi"] = row["downloader for"]
1✔
644

645
                    # prefer name over previous "downloader for" if it exists
646
                    if "downloader for names" in row:
1✔
647
                        tags["pi"] = row["downloader for names"]
×
648

649
                    user_info[username] = {
1✔
650
                        "email": row.get("email")
651
                        or user_info[username].get("email")
652
                        or "",
653
                        "display_name": display_name,
654
                        "phone_number": row.get("phone")
655
                        or user_info[username].get("phone_number")
656
                        or "",
657
                        "tags": tags,
658
                    }
659

660
                    self._process_dbgap_project(
1✔
661
                        dbgap_project,
662
                        privileges,
663
                        username,
664
                        sess,
665
                        user_projects,
666
                        dbgap_config,
667
                    )
668

669
        return user_projects, user_info
1✔
670

671
    def _get_children(self, dbgap_project):
1✔
672
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
673

674
    def _add_children_for_dbgap_project(
1✔
675
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
676
    ):
677
        """
678
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
679
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
680
        child studies.
681
        """
682
        parent_phsid = dbgap_project
1✔
683
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
684
        child_suffix = ""
1✔
685
        if parse_consent_code and re.match(
1✔
686
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
687
        ):
688
            parent_phsid_parts = dbgap_project.split(".")
1✔
689
            parent_phsid = parent_phsid_parts[0]
1✔
690
            child_suffix = "." + parent_phsid_parts[1]
1✔
691

692
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
693
            return
1✔
694

695
        self.logger.info(
1✔
696
            f"found parent study {parent_phsid} and Fence "
697
            "is configured to provide additional access to child studies. Giving user "
698
            f"{username} {privileges} privileges in projects: "
699
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
700
        )
701
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
702
        for child_study in child_studies:
1✔
703
            self._add_dbgap_project_for_user(
1✔
704
                child_study + child_suffix,
705
                privileges,
706
                username,
707
                sess,
708
                user_projects,
709
                dbgap_config,
710
            )
711

712
    def _add_dbgap_project_for_user(
1✔
713
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
714
    ):
715
        """
716
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
717
        and then updates the dictionary containing all user's project access
718
        """
719
        if dbgap_project not in self._projects:
1✔
720
            self.logger.debug(
1✔
721
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
722
            )
723

724
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
725

726
            # need to add dbgap project to arborist
727
            if self.arborist_client:
1✔
728
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
729

730
            if project.name is None:
1✔
731
                project.name = dbgap_project
1✔
732
            self._projects[dbgap_project] = project
1✔
733
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
734
        if username in user_projects:
1✔
735
            user_projects[username].update(phsid_privileges)
1✔
736
        else:
737
            user_projects[username] = phsid_privileges
1✔
738

739
    @staticmethod
1✔
740
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
741
        """
742
        Merge user_info1 into user_info2. Values in user_info2 are overriden
743
        by values in user_info1. user_info2 ends up containing the merged dict.
744

745
        Args:
746
            user_info1 (dict): nested dict
747
            user_info2 (dict): nested dict
748

749
            Example:
750
            {username: {'email': 'abc@email.com'}}
751

752
        Returns:
753
            None
754
        """
755
        user_info2.update(user_info1)
1✔
756

757
    def sync_two_phsids_dict(
1✔
758
        self,
759
        phsids1,
760
        phsids2,
761
        source1=None,
762
        source2=None,
763
        phsids2_overrides_phsids1=True,
764
    ):
765
        """
766
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
767
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
768
        the merged dict (see explanation below).
769
        `source1` and `source2`: for logging.
770

771
        Args:
772
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
773

774
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
775

776
            Example:
777
            {
778
                username: {
779
                    phsid1: {'read-storage','write-storage'},
780
                    phsid2: {'read-storage'},
781
                }
782
            }
783

784
        Return:
785
            None
786

787
        Explanation:
788
            Consider merging projects of the same user:
789

790
                {user1: {phsid1: privillege1}}
791

792
                {user1: {phsid2: privillege2}}
793

794
            case 1: phsid1 != phsid2. Output:
795

796
                {user1: {phsid1: privillege1, phsid2: privillege2}}
797

798
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
799

800
                {user1: {phsid1: union(privillege1, privillege2)}}
801

802
            For the other cases, just simple addition
803
        """
804

805
        for user, projects1 in phsids1.items():
1✔
806
            if not phsids2.get(user):
1✔
807
                if source1:
1✔
808
                    self.auth_source[user].add(source1)
1✔
809
                phsids2[user] = projects1
1✔
810
            elif phsids2_overrides_phsids1:
1✔
811
                if source1:
1✔
812
                    self.auth_source[user].add(source1)
×
813
                if source2:
1✔
814
                    self.auth_source[user].add(source2)
×
815
                for phsid1, privilege1 in projects1.items():
1✔
816
                    if phsid1 not in phsids2[user]:
1✔
817
                        phsids2[user][phsid1] = set()
1✔
818
                    phsids2[user][phsid1].update(privilege1)
1✔
819
            elif source2:
×
820
                self.auth_source[user].add(source2)
×
821

822
    def sync_to_db_and_storage_backend(
1✔
823
        self,
824
        user_project,
825
        user_info,
826
        sess,
827
        do_not_revoke_from_db_and_storage=False,
828
        expires=None,
829
    ):
830
        """
831
        sync user access control to database and storage backend
832

833
        Args:
834
            user_project (dict): a dictionary of
835

836
                {
837
                    username: {
838
                        'project1': {'read-storage','write-storage'},
839
                        'project2': {'read-storage'}
840
                    }
841
                }
842

843
            user_info (dict): a dictionary of {username: user_info{}}
844
            sess: a sqlalchemy session
845

846
        Return:
847
            None
848
        """
849
        google_bulk_mapping = None
1✔
850
        if config["GOOGLE_BULK_UPDATES"]:
1✔
851
            google_bulk_mapping = {}
1✔
852

853
        self._init_projects(user_project, sess)
1✔
854

855
        auth_provider_list = [
1✔
856
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
857
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
858
        ]
859

860
        cur_db_user_project_list = {
1✔
861
            (ua.user.username.lower(), ua.project.auth_id)
862
            for ua in sess.query(AccessPrivilege).all()
863
        }
864

865
        # we need to compare db -> whitelist case-insensitively for username.
866
        # db stores case-sensitively, but we need to query case-insensitively
867
        user_project_lowercase = {}
1✔
868
        syncing_user_project_list = set()
1✔
869
        for username, projects in user_project.items():
1✔
870
            user_project_lowercase[username.lower()] = projects
1✔
871
            for project, _ in projects.items():
1✔
872
                syncing_user_project_list.add((username.lower(), project))
1✔
873

874
        user_info_lowercase = {
1✔
875
            username.lower(): info for username, info in user_info.items()
876
        }
877

878
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
879
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
880
        to_update = set.intersection(
1✔
881
            cur_db_user_project_list, syncing_user_project_list
882
        )
883

884
        # when updating users we want to maintain case sesitivity in the username so
885
        # pass the original, non-lowered user_info dict
886
        self._upsert_userinfo(sess, user_info)
1✔
887

888
        if not do_not_revoke_from_db_and_storage:
1✔
889
            self._revoke_from_storage(
1✔
890
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
891
            )
892
            self._revoke_from_db(sess, to_delete)
1✔
893

894
        self._grant_from_storage(
1✔
895
            to_add,
896
            user_project_lowercase,
897
            sess,
898
            google_bulk_mapping=google_bulk_mapping,
899
            expires=expires,
900
        )
901

902
        self._grant_from_db(
1✔
903
            sess,
904
            to_add,
905
            user_info_lowercase,
906
            user_project_lowercase,
907
            auth_provider_list,
908
        )
909

910
        # re-grant
911
        self._grant_from_storage(
1✔
912
            to_update,
913
            user_project_lowercase,
914
            sess,
915
            google_bulk_mapping=google_bulk_mapping,
916
            expires=expires,
917
        )
918
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
919

920
        if not do_not_revoke_from_db_and_storage:
1✔
921
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
922

923
        sess.commit()
1✔
924

925
        if config["GOOGLE_BULK_UPDATES"]:
1✔
926
            self.logger.info("Doing bulk Google update...")
1✔
927
            update_google_groups_for_users(google_bulk_mapping)
1✔
928
            self.logger.info("Bulk Google update done!")
×
929

930
        sess.commit()
1✔
931

932
    def sync_to_storage_backend(
1✔
933
        self, user_project, user_info, sess, expires, skip_google_updates=False
934
    ):
935
        """
936
        sync user access control to storage backend with given expiration
937

938
        Args:
939
            user_project (dict): a dictionary of
940

941
                {
942
                    username: {
943
                        'project1': {'read-storage','write-storage'},
944
                        'project2': {'read-storage'}
945
                    }
946
                }
947

948
            user_info (dict): a dictionary of attributes for a user.
949
            sess: a sqlalchemy session
950
            expires (int): time at which synced Arborist policies and
951
                   inclusion in any GBAG are set to expire
952
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
953
        Return:
954
            None
955
        """
956
        if not expires:
1✔
957
            raise Exception(
×
958
                f"sync to storage backend requires an expiration. you provided: {expires}"
959
            )
960

961
        google_group_user_mapping = None
1✔
962
        if config["GOOGLE_BULK_UPDATES"]:
1✔
963
            google_group_user_mapping = {}
×
964
            get_or_create_proxy_group_id(
×
965
                expires=expires,
966
                user_id=user_info["user_id"],
967
                username=user_info["username"],
968
                session=sess,
969
                storage_manager=self.storage_manager,
970
            )
971

972
        # TODO: eventually it'd be nice to remove this step but it's required
973
        #       so that grant_from_storage can determine what storage backends
974
        #       are needed for a project.
975
        self._init_projects(user_project, sess)
1✔
976

977
        # we need to compare db -> whitelist case-insensitively for username.
978
        # db stores case-sensitively, but we need to query case-insensitively
979
        user_project_lowercase = {}
1✔
980
        syncing_user_project_list = set()
1✔
981
        for username, projects in user_project.items():
1✔
982
            user_project_lowercase[username.lower()] = projects
1✔
983
            for project, _ in projects.items():
1✔
984
                syncing_user_project_list.add((username.lower(), project))
1✔
985

986
        to_add = set(syncing_user_project_list)
1✔
987

988
        # when updating users we want to maintain case sensitivity in the username so
989
        # pass the original, non-lowered user_info dict
990
        self._upsert_userinfo(sess, {user_info["username"].lower(): user_info})
1✔
991
        if not skip_google_updates:
1✔
992
            self._grant_from_storage(
1✔
993
                to_add,
994
                user_project_lowercase,
995
                sess,
996
                google_bulk_mapping=google_group_user_mapping,
997
                expires=expires,
998
            )
999

1000
            if config["GOOGLE_BULK_UPDATES"]:
1✔
1001
                self.logger.info("Updating user's google groups ...")
×
1002
                update_google_groups_for_users(google_group_user_mapping)
×
1003
                self.logger.info("Google groups update done!!")
×
1004

1005
        sess.commit()
1✔
1006

1007
    def _revoke_from_db(self, sess, to_delete):
1✔
1008
        """
1009
        Revoke user access to projects in the auth database
1010

1011
        Args:
1012
            sess: sqlalchemy session
1013
            to_delete: a set of (username, project.auth_id) to be revoked from db
1014
        Return:
1015
            None
1016
        """
1017
        for username, project_auth_id in to_delete:
1✔
1018
            q = (
1✔
1019
                sess.query(AccessPrivilege)
1020
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1021
                .join(AccessPrivilege.user)
1022
                .filter(func.lower(User.username) == username)
1023
                .all()
1024
            )
1025
            for access in q:
1✔
1026
                self.logger.info(
1✔
1027
                    "revoke {} access to {} in db".format(username, project_auth_id)
1028
                )
1029
                sess.delete(access)
1✔
1030

1031
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1032
        """
1033
        Make sure there is no admin user that is not in yaml/csv files
1034

1035
        Args:
1036
            sess: sqlalchemy session
1037
            user_info: a dict of
1038
            {
1039
                username: {
1040
                    'email': email,
1041
                    'display_name': display_name,
1042
                    'phone_number': phonenum,
1043
                    'tags': {'k1':'v1', 'k2': 'v2'}
1044
                    'admin': is_admin
1045
                }
1046
            }
1047
        Returns:
1048
            None
1049
        """
1050
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1051
            if admin_user.username.lower() not in user_info:
1✔
1052
                admin_user.is_admin = False
×
1053
                sess.add(admin_user)
×
1054
                self.logger.info(
×
1055
                    "remove admin access from {} in db".format(
1056
                        admin_user.username.lower()
1057
                    )
1058
                )
1059

1060
    def _update_from_db(self, sess, to_update, user_project):
1✔
1061
        """
1062
        Update user access to projects in the auth database
1063

1064
        Args:
1065
            sess: sqlalchemy session
1066
            to_update:
1067
                a set of (username, project.auth_id) to be updated from db
1068

1069
        Return:
1070
            None
1071
        """
1072

1073
        for username, project_auth_id in to_update:
1✔
1074
            q = (
1✔
1075
                sess.query(AccessPrivilege)
1076
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1077
                .join(AccessPrivilege.user)
1078
                .filter(func.lower(User.username) == username)
1079
                .all()
1080
            )
1081
            for access in q:
1✔
1082
                access.privilege = user_project[username][project_auth_id]
1✔
1083
                self.logger.info(
1✔
1084
                    "update {} with {} access to {} in db".format(
1085
                        username, access.privilege, project_auth_id
1086
                    )
1087
                )
1088

1089
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1090
        """
1091
        Grant user access to projects in the auth database
1092
        Args:
1093
            sess: sqlalchemy session
1094
            to_add: a set of (username, project.auth_id) to be granted
1095
            user_project:
1096
                a dictionary of {username: {project: {'read','write'}}
1097
        Return:
1098
            None
1099
        """
1100
        for username, project_auth_id in to_add:
1✔
1101
            u = query_for_user(session=sess, username=username)
1✔
1102

1103
            auth_provider = auth_provider_list[0]
1✔
1104
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1105
                auth_provider = auth_provider_list[1]
1✔
1106
            user_access = AccessPrivilege(
1✔
1107
                user=u,
1108
                project=self._projects[project_auth_id],
1109
                privilege=list(user_project[username][project_auth_id]),
1110
                auth_provider=auth_provider,
1111
            )
1112
            self.logger.info(
1✔
1113
                "grant user {} to {} with access {}".format(
1114
                    username, user_access.project, user_access.privilege
1115
                )
1116
            )
1117
            sess.add(user_access)
1✔
1118

1119
    def _upsert_userinfo(self, sess, user_info):
1✔
1120
        """
1121
        update user info to database.
1122

1123
        Args:
1124
            sess: sqlalchemy session
1125
            user_info:
1126
                a dict of {username: {display_name, phone_number, tags, admin}
1127

1128
        Return:
1129
            None
1130
        """
1131

1132
        for username in user_info:
1✔
1133
            u = query_for_user(session=sess, username=username)
1✔
1134

1135
            if u is None:
1✔
1136
                self.logger.info("create user {}".format(username))
1✔
1137
                u = User(username=username)
1✔
1138
                sess.add(u)
1✔
1139

1140
            if self.arborist_client:
1✔
1141
                self.arborist_client.create_user({"name": username})
1✔
1142

1143
            u.email = user_info[username].get("email", "")
1✔
1144
            u.display_name = user_info[username].get("display_name", "")
1✔
1145
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1146
            u.is_admin = user_info[username].get("admin", False)
1✔
1147

1148
            idp_name = user_info[username].get("idp_name", "")
1✔
1149
            if idp_name and not u.identity_provider:
1✔
1150
                idp = (
×
1151
                    sess.query(IdentityProvider)
1152
                    .filter(IdentityProvider.name == idp_name)
1153
                    .first()
1154
                )
1155
                if not idp:
×
1156
                    idp = IdentityProvider(name=idp_name)
×
1157
                u.identity_provider = idp
×
1158

1159
            # do not update if there is no tag
1160
            if not user_info[username].get("tags"):
1✔
1161
                continue
1✔
1162

1163
            # remove user db tags if they are not shown in new tags
1164
            for tag in u.tags:
1✔
1165
                if tag.key not in user_info[username]["tags"]:
1✔
1166
                    u.tags.remove(tag)
1✔
1167

1168
            # sync
1169
            for k, v in user_info[username]["tags"].items():
1✔
1170
                found = False
1✔
1171
                for tag in u.tags:
1✔
1172
                    if tag.key == k:
1✔
1173
                        found = True
1✔
1174
                        tag.value = v
1✔
1175
                # create new tag if not found
1176
                if not found:
1✔
1177
                    tag = Tag(key=k, value=v)
1✔
1178
                    u.tags.append(tag)
1✔
1179

1180
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1181
        """
1182
        If a project have storage backend, revoke user's access to buckets in
1183
        the storage backend.
1184

1185
        Args:
1186
            to_delete: a set of (username, project.auth_id) to be revoked
1187

1188
        Return:
1189
            None
1190
        """
1191
        for username, project_auth_id in to_delete:
1✔
1192
            project = (
1✔
1193
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1194
            )
1195
            for sa in project.storage_access:
1✔
1196
                if not hasattr(self, "storage_manager"):
1✔
1197
                    self.logger.error(
×
1198
                        (
1199
                            "CANNOT revoke {} access to {} in {} because there is NO "
1200
                            "configured storage accesses at all. See configuration. "
1201
                            "Continuing anyway..."
1202
                        ).format(username, project_auth_id, sa.provider.name)
1203
                    )
1204
                    continue
×
1205

1206
                self.logger.info(
1✔
1207
                    "revoke {} access to {} in {}".format(
1208
                        username, project_auth_id, sa.provider.name
1209
                    )
1210
                )
1211
                self.storage_manager.revoke_access(
1✔
1212
                    provider=sa.provider.name,
1213
                    username=username,
1214
                    project=project,
1215
                    session=sess,
1216
                    google_bulk_mapping=google_bulk_mapping,
1217
                )
1218

1219
    def _grant_from_storage(
1✔
1220
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1221
    ):
1222
        """
1223
        If a project have storage backend, grant user's access to buckets in
1224
        the storage backend.
1225

1226
        Args:
1227
            to_add: a set of (username, project.auth_id)  to be granted
1228
            user_project: a dictionary like:
1229

1230
                    {username: {phsid: {'read-storage','write-storage'}}}
1231

1232
        Return:
1233
            dict of the users' storage usernames to their user_projects and the respective storage access.
1234
        """
1235
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1236
        for username, project_auth_id in to_add:
1✔
1237
            project = self._projects[project_auth_id]
1✔
1238
            for sa in project.storage_access:
1✔
1239
                access = list(user_project[username][project_auth_id])
1✔
1240
                if not hasattr(self, "storage_manager"):
1✔
1241
                    self.logger.error(
×
1242
                        (
1243
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1244
                            "configured storage accesses at all. See configuration. "
1245
                            "Continuing anyway..."
1246
                        ).format(username, access, project_auth_id, sa.provider.name)
1247
                    )
1248
                    continue
×
1249

1250
                self.logger.info(
1✔
1251
                    "grant {} access {} to {} in {}".format(
1252
                        username, access, project_auth_id, sa.provider.name
1253
                    )
1254
                )
1255
                storage_username = self.storage_manager.grant_access(
1✔
1256
                    provider=sa.provider.name,
1257
                    username=username,
1258
                    project=project,
1259
                    access=access,
1260
                    session=sess,
1261
                    google_bulk_mapping=google_bulk_mapping,
1262
                    expires=expires,
1263
                )
1264

1265
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1266
        return storage_user_to_sa_and_user_project
1✔
1267

1268
    def _init_projects(self, user_project, sess):
1✔
1269
        """
1270
        initialize projects
1271
        """
1272

1273
        if self.project_mapping:
1✔
1274
            for projects in list(self.project_mapping.values()):
1✔
1275
                for p in projects:
1✔
1276
                    self.logger.debug(
1✔
1277
                        "creating Project with info from project_mapping: {}".format(p)
1278
                    )
1279
                    project = self._get_or_create(sess, Project, **p)
1✔
1280
                    self._projects[p["auth_id"]] = project
1✔
1281
        for _, projects in user_project.items():
1✔
1282
            for auth_id in list(projects.keys()):
1✔
1283
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1284
                if not project:
1✔
1285
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1286
                    try:
1✔
1287
                        project = self._get_or_create(sess, Project, **data)
1✔
1288
                    except IntegrityError as e:
×
1289
                        sess.rollback()
×
1290
                        self.logger.error(
×
1291
                            f"Project {auth_id} already exists. Detail {str(e)}"
1292
                        )
1293
                        raise Exception(
×
1294
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1295
                                auth_id, str(e)
1296
                            )
1297
                        )
1298
                if auth_id not in self._projects:
1✔
1299
                    self._projects[auth_id] = project
1✔
1300

1301
    @staticmethod
1✔
1302
    def _get_or_create(sess, model, **kwargs):
1✔
1303
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1304
        if not instance:
1✔
1305
            instance = model(**kwargs)
1✔
1306
            sess.add(instance)
1✔
1307
        return instance
1✔
1308

1309
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1310
        """
1311
        Args:
1312
            dbgap_config : a dictionary containing information about a single
1313
                           dbgap sftp server (from fence config)
1314
            sess: database session
1315

1316
        Return:
1317
            user_projects (dict)
1318
            user_info (dict)
1319
        """
1320
        dbgap_file_list = []
1✔
1321
        hostname = dbgap_config["info"]["host"]
1✔
1322
        username = dbgap_config["info"]["username"]
1✔
1323
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1324
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1325

1326
        try:
1✔
1327
            if os.path.exists(folderdir):
1✔
1328
                dbgap_file_list = glob.glob(
×
1329
                    os.path.join(folderdir, "*")
1330
                )  # get lists of file from folder
1331
            else:
1332
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1333
                dbgap_file_list = self._download(dbgap_config)
1✔
1334
        except Exception as e:
1✔
1335
            self.logger.error(e)
1✔
1336
            exit(1)
1✔
1337
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1338
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1339
            dbgap_file_list,
1340
            encrypted=encrypted,
1341
            session=sess,
1342
            dbgap_config=dbgap_config,
1343
        )
1344

1345
        user_projects = self.parse_projects(user_projects)
×
1346
        return user_projects, user_info
×
1347

1348
    def _get_user_permissions_from_csv_list(
1✔
1349
        self, file_list, encrypted, session, dbgap_config={}
1350
    ):
1351
        """
1352
        Args:
1353
            file_list: list of files (represented as strings)
1354
            encrypted: boolean indicating whether those files are encrypted
1355
            session: sqlalchemy session
1356
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1357
                    (comes from fence config)
1358

1359
        Return:
1360
            user_projects (dict)
1361
            user_info (dict)
1362
        """
1363
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1364
        user_projects, user_info = self._parse_csv(
1✔
1365
            dict(list(zip(file_list, permissions))),
1366
            sess=session,
1367
            dbgap_config=dbgap_config,
1368
            encrypted=encrypted,
1369
        )
1370
        return user_projects, user_info
1✔
1371

1372
    def _merge_multiple_local_csv_files(
1✔
1373
        self, dbgap_file_list, encrypted, dbgap_configs, session
1374
    ):
1375
        """
1376
        Args:
1377
            dbgap_file_list (list): a list of whitelist file locations stored locally
1378
            encrypted (bool): whether the file is encrypted (comes from fence config)
1379
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1380
            session (sqlalchemy.Session): database session
1381

1382
        Return:
1383
            merged_user_projects (dict)
1384
            merged_user_info (dict)
1385
        """
1386
        merged_user_projects = {}
1✔
1387
        merged_user_info = {}
1✔
1388

1389
        for dbgap_config in dbgap_configs:
1✔
1390
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1391
                dbgap_file_list,
1392
                encrypted,
1393
                session=session,
1394
                dbgap_config=dbgap_config,
1395
            )
1396
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1397
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1398
        return merged_user_projects, merged_user_info
1✔
1399

1400
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1401
        """
1402
        Args:
1403
            dbgap_servers : a list of dictionaries each containging config on
1404
                           dbgap sftp server (comes from fence config)
1405
            sess: database session
1406

1407
        Return:
1408
            merged_user_projects (dict)
1409
            merged_user_info (dict)
1410
        """
1411
        merged_user_projects = {}
1✔
1412
        merged_user_info = {}
1✔
1413
        for dbgap in dbgap_servers:
1✔
1414
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1415
            # merge into merged_user_info
1416
            # user_info overrides original info in merged_user_info
1417
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1418

1419
            # merge all access info dicts into "merged_user_projects".
1420
            # the access info is combined - if the user_projects access is
1421
            # ["read"] and the merged_user_projects is ["read-storage"], the
1422
            # resulting access is ["read", "read-storage"].
1423
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1424
        return merged_user_projects, merged_user_info
1✔
1425

1426
    def parse_projects(self, user_projects):
1✔
1427
        """
1428
        helper function for parsing projects
1429
        """
1430
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1431

1432
    def _process_dbgap_project(
1✔
1433
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1434
    ):
1435
        if dbgap_project not in self.project_mapping:
1✔
1436
            self._add_dbgap_project_for_user(
1✔
1437
                dbgap_project,
1438
                privileges,
1439
                username,
1440
                sess,
1441
                user_projects,
1442
                dbgap_config,
1443
            )
1444

1445
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1446
            try:
1✔
1447
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1448

1449
                # need to add dbgap project to arborist
1450
                if self.arborist_client:
1✔
1451
                    self._determine_arborist_resource(
1✔
1452
                        element_dict["auth_id"], dbgap_config
1453
                    )
1454

1455
                if username not in user_projects:
1✔
1456
                    user_projects[username] = {}
1✔
1457
                user_projects[username].update(phsid_privileges)
1✔
1458

1459
            except ValueError as e:
×
1460
                self.logger.info(e)
×
1461

1462
    def _process_user_projects(
1✔
1463
        self,
1464
        user_projects,
1465
        enable_common_exchange_area_access,
1466
        study_common_exchange_areas,
1467
        dbgap_config,
1468
        sess,
1469
    ):
1470
        user_projects_to_modify = copy.deepcopy(user_projects)
1✔
1471
        for username in user_projects.keys():
1✔
1472
            for project in user_projects[username].keys():
1✔
1473
                phsid = project.split(".")
1✔
1474
                dbgap_project = phsid[0]
1✔
1475
                privileges = user_projects[username][project]
1✔
1476
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1477
                    consent_code = phsid[-1]
1✔
1478

1479
                    # c999 indicates full access to all consents and access
1480
                    # to a study-specific exchange area
1481
                    # access to at least one study-specific exchange area implies access
1482
                    # to the parent study's common exchange area
1483
                    #
1484
                    # NOTE: Handling giving access to all consents is done at
1485
                    #       a later time, when we have full information about possible
1486
                    #       consents
1487
                    self.logger.debug(
1✔
1488
                        f"got consent code {consent_code} from dbGaP project "
1489
                        f"{dbgap_project}"
1490
                    )
1491
                    if (
1✔
1492
                        consent_code == "c999"
1493
                        and enable_common_exchange_area_access
1494
                        and dbgap_project in study_common_exchange_areas
1495
                    ):
1496
                        self.logger.info(
1✔
1497
                            "found study with consent c999 and Fence "
1498
                            "is configured to parse exchange area data. Giving user "
1499
                            f"{username} {privileges} privileges in project: "
1500
                            f"{study_common_exchange_areas[dbgap_project]}."
1501
                        )
1502
                        self._add_dbgap_project_for_user(
1✔
1503
                            study_common_exchange_areas[dbgap_project],
1504
                            privileges,
1505
                            username,
1506
                            sess,
1507
                            user_projects_to_modify,
1508
                            dbgap_config,
1509
                        )
1510

1511
                    dbgap_project += "." + consent_code
1✔
1512

1513
                self._process_dbgap_project(
1✔
1514
                    dbgap_project,
1515
                    privileges,
1516
                    username,
1517
                    sess,
1518
                    user_projects_to_modify,
1519
                    dbgap_config,
1520
                )
1521
        for user in user_projects_to_modify.keys():
1✔
1522
            user_projects[user] = user_projects_to_modify[user]
1✔
1523

1524
    def sync(self):
1✔
1525
        if self.session:
1✔
1526
            self._sync(self.session)
1✔
1527
        else:
1528
            with self.driver.session as s:
×
1529
                self._sync(s)
×
1530

1531
    def download(self):
1✔
1532
        for dbgap_server in self.dbGaP:
×
1533
            self._download(dbgap_server)
×
1534

1535
    def _download(self, dbgap_config):
1✔
1536
        """
1537
        Download files from dbgap server.
1538
        """
1539
        server = dbgap_config["info"]
1✔
1540
        protocol = dbgap_config["protocol"]
1✔
1541
        hostname = server["host"]
1✔
1542
        username = server["username"]
1✔
1543
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1544

1545
        if not os.path.exists(folderdir):
1✔
1546
            os.makedirs(folderdir)
1✔
1547

1548
        self.logger.info("Download from server")
1✔
1549
        try:
1✔
1550
            if protocol == "sftp":
1✔
1551
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1552
            else:
1553
                self._get_from_ftp_with_proxy(server, folderdir)
×
1554
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1555
            return dbgap_files
×
1556
        except Exception as e:
1✔
1557
            self.logger.error(e)
1✔
1558
            raise
1✔
1559

1560
    def _sync(self, sess):
1✔
1561
        """
1562
        Collect files from dbgap server(s), sync csv and yaml files to storage
1563
        backend and fence DB
1564
        """
1565

1566
        # get all dbgap files
1567
        user_projects = {}
1✔
1568
        user_info = {}
1✔
1569
        if self.is_sync_from_dbgap_server:
1✔
1570
            self.logger.debug(
1✔
1571
                "Pulling telemetry files from {} dbgap sftp servers".format(
1572
                    len(self.dbGaP)
1573
                )
1574
            )
1575
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1576

1577
        local_csv_file_list = []
1✔
1578
        if self.sync_from_local_csv_dir:
1✔
1579
            local_csv_file_list = glob.glob(
1✔
1580
                os.path.join(self.sync_from_local_csv_dir, "*")
1581
            )
1582
            # Sort the list so the order of of files is consistent across platforms
1583
            local_csv_file_list.sort()
1✔
1584

1585
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1586
            local_csv_file_list,
1587
            encrypted=False,
1588
            session=sess,
1589
            dbgap_configs=self.dbGaP,
1590
        )
1591

1592
        try:
1✔
1593
            user_yaml = UserYAML.from_file(
1✔
1594
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1595
            )
1596
        except (EnvironmentError, AssertionError) as e:
1✔
1597
            self.logger.error(str(e))
1✔
1598
            self.logger.error("aborting early")
1✔
1599
            raise
1✔
1600

1601
        # parse all projects
1602
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1603
        user_projects = self.parse_projects(user_projects)
1✔
1604
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1605

1606
        # merge all user info dicts into "user_info".
1607
        # the user info (such as email) in the user.yaml files
1608
        # overrides the user info from the CSV files.
1609
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1610
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1611

1612
        # merge all access info dicts into "user_projects".
1613
        # the access info is combined - if the user.yaml access is
1614
        # ["read"] and the CSV file access is ["read-storage"], the
1615
        # resulting access is ["read", "read-storage"].
1616
        self.sync_two_phsids_dict(
1✔
1617
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1618
        )
1619
        self.sync_two_phsids_dict(
1✔
1620
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1621
        )
1622

1623
        # Note: if there are multiple dbgap sftp servers configured
1624
        # this parameter is always from the config for the first dbgap sftp server
1625
        # not any additional ones
1626
        for dbgap_config in self.dbGaP:
1✔
1627
            if self._get_parse_consent_code(dbgap_config):
1✔
1628
                self._grant_all_consents_to_c999_users(
1✔
1629
                    user_projects, user_yaml.project_to_resource
1630
                )
1631

1632
        google_update_ex = None
1✔
1633

1634
        try:
1✔
1635
            # update the Fence DB
1636
            if user_projects:
1✔
1637
                self.logger.info("Sync to db and storage backend")
1✔
1638
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1639
                self.logger.info("Finish syncing to db and storage backend")
1✔
1640
            else:
1641
                self.logger.info("No users for syncing")
×
1642
        except GoogleUpdateException as ex:
1✔
1643
            # save this to reraise later after all non-Google syncing has finished
1644
            # this way, any issues with Google only affect Google data access and don't
1645
            # cascade problems into non-Google AWS or Azure access
1646
            google_update_ex = ex
1✔
1647

1648
        # update the Arborist DB (resources, roles, policies, groups)
1649
        if user_yaml.authz:
1✔
1650
            if not self.arborist_client:
1✔
1651
                raise EnvironmentError(
×
1652
                    "yaml file contains authz section but sync is not configured with"
1653
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1654
                )
1655
            self.logger.info("Synchronizing arborist...")
1✔
1656
            success = self._update_arborist(sess, user_yaml)
1✔
1657
            if success:
1✔
1658
                self.logger.info("Finished synchronizing arborist")
1✔
1659
            else:
1660
                self.logger.error("Could not synchronize successfully")
×
1661
                exit(1)
×
1662
        else:
1663
            self.logger.info("No `authz` section; skipping arborist sync")
×
1664

1665
        # update the Arborist DB (user access)
1666
        if self.arborist_client:
1✔
1667
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1668
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1669
            if success:
1✔
1670
                self.logger.info(
1✔
1671
                    "Finished synchronizing authorization info to arborist"
1672
                )
1673
            else:
1674
                self.logger.error(
×
1675
                    "Could not synchronize authorization info successfully to arborist"
1676
                )
1677
                exit(1)
×
1678
        else:
1679
            self.logger.error("No arborist client set; skipping arborist sync")
×
1680

1681
        # Logging authz source
1682
        for u, s in self.auth_source.items():
1✔
1683
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1684

1685
        self.logger.info(
1✔
1686
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1687
        )
1688
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1689
        if google_update_ex is not None:
1✔
1690
            raise google_update_ex
1✔
1691

1692
    def _grant_all_consents_to_c999_users(
1✔
1693
        self, user_projects, user_yaml_project_to_resources
1694
    ):
1695
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1696
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1697
        # are in either)
1698
        all_projects = set(
1✔
1699
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1700
        )
1701

1702
        self.logger.debug(f"all projects: {all_projects}")
1✔
1703

1704
        # construct a mapping from phsid (without consent) to all accessions with consent
1705
        consent_mapping = {}
1✔
1706
        for project in all_projects:
1✔
1707
            phs_match = access_number_matcher.match(project)
1✔
1708
            if phs_match:
1✔
1709
                accession_number = phs_match.groupdict()
1✔
1710

1711
                # TODO: This is not handling the .v1.p1 at all
1712
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1713
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1714
                )
1715
                children = self._get_children(accession_number["phsid"])
1✔
1716
                if children:
1✔
1717
                    for child_phs in children:
1✔
1718
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1719
                            ".".join(
1720
                                [child_phs, accession_number["consent"]]
1721
                            )  # Assign parent consent to child study
1722
                        )
1723

1724
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1725

1726
        # go through existing access and find any c999's and make sure to give access to
1727
        # all accessions with consent for that phsid
1728
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1729
            for project, _ in user_project_info.items():
1✔
1730
                phs_match = access_number_matcher.match(project)
1✔
1731
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1732
                    # give access to all consents
1733
                    all_phsids_with_consent = consent_mapping.get(
1✔
1734
                        phs_match.groupdict()["phsid"], []
1735
                    )
1736
                    self.logger.info(
1✔
1737
                        f"user {username} has c999 consent group for: {project}. "
1738
                        f"Granting access to all consents: {all_phsids_with_consent}"
1739
                    )
1740
                    # NOTE: Only giving read-storage at the moment (this is same
1741
                    #       permission we give for other dbgap projects)
1742
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1743
                        user_projects[username].update(
1✔
1744
                            {phsid_with_consent: {"read-storage", "read"}}
1745
                        )
1746

1747
    def _update_arborist(self, session, user_yaml):
1✔
1748
        """
1749
        Create roles, resources, policies, groups in arborist from the information in
1750
        ``user_yaml``.
1751

1752
        The projects are sent to arborist as resources with paths like
1753
        ``/projects/{project}``. Roles are created with just the original names
1754
        for the privileges like ``"read-storage", "read"`` etc.
1755

1756
        Args:
1757
            session (sqlalchemy.Session)
1758
            user_yaml (UserYAML)
1759

1760
        Return:
1761
            bool: success
1762
        """
1763
        healthy = self._is_arborist_healthy()
1✔
1764
        if not healthy:
1✔
1765
            return False
×
1766

1767
        # Set up the resource tree in arborist by combining provided resources with any
1768
        # dbgap resources that were created before this.
1769
        #
1770
        # Why add dbgap resources if they've already been created?
1771
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1772
        #   resources was created under `/programs/phs000178` anything provided in
1773
        #   user.yaml under `/programs` would completely wipe it out.
1774
        resources = user_yaml.authz.get("resources", [])
1✔
1775

1776
        dbgap_resource_paths = []
1✔
1777
        for path_list in self._dbgap_study_to_resources.values():
1✔
1778
            dbgap_resource_paths.extend(path_list)
1✔
1779

1780
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1781
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1782

1783
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1784
            resources, dbgap_resource_paths
1785
        )
1786

1787
        for resource in combined_resources:
1✔
1788
            try:
1✔
1789
                self.logger.debug(
1✔
1790
                    "attempting to update arborist resource: {}".format(resource)
1791
                )
1792
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1793
            except ArboristError as e:
×
1794
                self.logger.error(e)
×
1795
                # keep going; maybe just some conflicts from things existing already
1796

1797
        # update roles
1798
        roles = user_yaml.authz.get("roles", [])
1✔
1799
        for role in roles:
1✔
1800
            try:
1✔
1801
                response = self.arborist_client.update_role(role["id"], role)
1✔
1802
                if response:
1✔
1803
                    self._created_roles.add(role["id"])
1✔
1804
            except ArboristError as e:
×
1805
                self.logger.info(
×
1806
                    "couldn't update role '{}', creating instead".format(str(e))
1807
                )
1808
                try:
×
1809
                    response = self.arborist_client.create_role(role)
×
1810
                    if response:
×
1811
                        self._created_roles.add(role["id"])
×
1812
                except ArboristError as e:
×
1813
                    self.logger.error(e)
×
1814
                    # keep going; maybe just some conflicts from things existing already
1815

1816
        # update policies
1817
        policies = user_yaml.authz.get("policies", [])
1✔
1818
        for policy in policies:
1✔
1819
            policy_id = policy.pop("id")
1✔
1820
            try:
1✔
1821
                self.logger.debug(
1✔
1822
                    "Trying to upsert policy with id {}".format(policy_id)
1823
                )
1824
                response = self.arborist_client.update_policy(
1✔
1825
                    policy_id, policy, create_if_not_exist=True
1826
                )
1827
            except ArboristError as e:
×
1828
                self.logger.error(e)
×
1829
                # keep going; maybe just some conflicts from things existing already
1830
            else:
1831
                if response:
1✔
1832
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1833
                    self._created_policies.add(policy_id)
1✔
1834

1835
        # update groups
1836
        groups = user_yaml.authz.get("groups", [])
1✔
1837

1838
        # delete from arborist the groups that have been deleted
1839
        # from the user.yaml
1840
        arborist_groups = set(
1✔
1841
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1842
        )
1843
        useryaml_groups = set(g["name"] for g in groups)
1✔
1844
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1845
            # do not try to delete built in groups
1846
            if deleted_group not in ["anonymous", "logged-in"]:
×
1847
                self.arborist_client.delete_group(deleted_group)
×
1848

1849
        # create/update the groups defined in the user.yaml
1850
        for group in groups:
1✔
1851
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1852
            if missing:
×
1853
                name = group.get("name", "{MISSING NAME}")
×
1854
                self.logger.error(
×
1855
                    "group {} missing required field(s): {}".format(name, list(missing))
1856
                )
1857
                continue
×
1858
            try:
×
1859
                response = self.arborist_client.put_group(
×
1860
                    group["name"],
1861
                    # Arborist doesn't handle group descriptions deff
1862
                    # description=group.get("description", ""),
1863
                    users=group["users"],
1864
                    policies=group["policies"],
1865
                )
1866
            except ArboristError as e:
×
1867
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1868

1869
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1870

1871
        # First recreate these groups in order to clear out old, possibly deleted policies
1872
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1873
            try:
1✔
1874
                response = self.arborist_client.put_group(builtin_group)
1✔
1875
            except ArboristError as e:
×
1876
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1877

1878
        # Now add back policies that are in the user.yaml
1879
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1880
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1881

1882
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1883
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1884

1885
        return True
1✔
1886

1887
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1888
        """
1889
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1890
        mfa_policy after revoking all policies.
1891
        """
1892

1893
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1894
            idp, {}
1895
        )
1896

1897
        if not is_mfa_enabled:
1✔
1898
            # TODO This should be a diff, not a revocation of all policies.
1899
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1900
            return
1✔
1901

1902
        policies = []
1✔
1903
        try:
1✔
1904
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1905
            policies = user_data_from_arborist["policies"]
1✔
1906
        except Exception as e:
×
1907
            self.logger.error(
×
1908
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1909
            )
1910
        finally:
1911
            # TODO This should be a diff, not a revocation of all policies.
1912
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1913

1914
        if "mfa_policy" in policies:
1✔
1915
            self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1916

1917
    def _grant_arborist_policies(
1✔
1918
        self, username, incoming_policies, user_yaml, expires=None
1919
    ):
1920
        """
1921
        Find the difference between the existing policies for a user and the incoming policies,
1922
        and decide whether to add, remove, or keep policies.
1923

1924
        Args:
1925
            user_existing_policies (_type_): _description_
1926
            incoming_policies (_type_): _description_
1927
        """
1928
        user_existing_policies = set()
1✔
1929
        to_keep = set()
1✔
1930
        to_add = set()
1✔
1931
        to_remove = set()
1✔
1932
        is_revoke_all = False
1✔
1933

1934
        try:
1✔
1935
            user_existing_policies = set(
1✔
1936
                policy["policy"]
1937
                for policy in self.arborist_client.get_user(username)["policies"]
1938
            )
1939
        except ArboristError as e:
1✔
1940
            self.logger.error(
1✔
1941
                f"Could not get user {username} policies from Arborist: {e}"
1942
            )
1943
            # if getting existing policies fails, revoke all policies and re-apply
1944
            is_revoke_all = True
1✔
1945

1946
        if is_revoke_all is False and len(incoming_policies) > 0:
1✔
1947
            to_keep = (
1✔
1948
                incoming_policies & user_existing_policies
1949
            )  # policies that remain unchanged
1950
            to_add = (
1✔
1951
                incoming_policies - user_existing_policies
1952
            )  # policies that need to be added
1953
            to_remove = (
1✔
1954
                user_existing_policies - incoming_policies
1955
            )  # policies that need to be removed
1956
            for policy in to_remove:
1✔
1957
                if policy in user_yaml.authz.get(
×
1958
                    "anonymous_policies", []
1959
                ) or policy in user_yaml.authz.get("all_users_policies", []):
1960
                    self.logger.warning(
×
1961
                        f"Policy {policy} is an anonymous policy, not revoking it for user {username}."
1962
                    )
1963
                    to_remove.remove(policy)
×
1964
        else:
1965
            # if incoming_policies is empty, we revoke all policies
1966
            is_revoke_all = True
1✔
1967

1968
        print("--------prints------")
1✔
1969
        print(to_keep)
1✔
1970
        print(to_add)
1✔
1971
        print(to_remove)
1✔
1972
        print(is_revoke_all)
1✔
1973

1974
        if not is_revoke_all:
1✔
1975
            try:
1✔
1976
                if to_remove:
1✔
1977
                    print("----------polices to remove: {}".format(to_remove))
×
1978
                    for policy in to_remove:
×
1979
                        self.logger.info(
×
1980
                            f"Revoking policy {policy} for user {username}."
1981
                        )
1982
                        self.arborist_client.revoke_user_policy(username, policy)
×
1983
            except ArboristError as e:
×
1984
                self.logger.error(
×
1985
                    f"Could not revoke user {username} policy {policy}. Revoking all instead: {e}"
1986
                )
1987
                is_revoke_all = True
×
1988

1989
        if is_revoke_all:
1✔
1990
            try:
1✔
1991
                self.logger.info(f"Revoking all policies for user {username}.")
1✔
1992
                self.arborist_client.revoke_all_policies_for_user(username)
1✔
1993
            except ArboristError as e:
×
1994
                self.logger.error(
×
1995
                    f"Could not revoke all policies for user {username}. Error: {e}"
1996
                )
1997
                return False
×
1998
            to_add = incoming_policies  # if we revoke all, we need to add all incoming policies
1✔
1999

2000
        if (
1✔
2001
            "mfa_policy" not in incoming_policies
2002
            and "mfa_policy" in user_existing_policies
2003
        ):
2004
            to_add.add("mfa_policy")
×
2005

2006
        if to_add:
1✔
2007
            try:
1✔
2008
                response_json = self.arborist_client.grant_bulk_user_policy(
1✔
2009
                    username, list(to_add)
2010
                )
2011
                # TODO: When gen3authz 2.3.0 is released, uncomment this and delete the above call.
2012
                # response_json = self.arborist_client.grant_bulk_user_policy(
2013
                #     username, policy_ids, expires
2014
                # )
2015
            except ArboristError as e:
×
2016
                self.logger.error(
×
2017
                    f"Could not grant user {username} policies {to_add}. Error: {e}"
2018
                )
2019
                return False
×
2020

2021
        return True
1✔
2022

2023
    def _update_authz_in_arborist(
1✔
2024
        self,
2025
        session,
2026
        user_projects,
2027
        user_yaml=None,
2028
        single_user_sync=False,
2029
        expires=None,
2030
    ):
2031
        """
2032
        Assign users policies in arborist from the information in
2033
        ``user_projects`` and optionally a ``user_yaml``.
2034

2035
        The projects are sent to arborist as resources with paths like
2036
        ``/projects/{project}``. Roles are created with just the original names
2037
        for the privileges like ``"read-storage", "read"`` etc.
2038

2039
        Args:
2040
            user_projects (dict)
2041
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
2042
            single_user_sync (bool) whether authz update is for a single user
2043
            expires (int) time at which authz info in Arborist should expire
2044

2045
        Return:
2046
            bool: success
2047
        """
2048
        healthy = self._is_arborist_healthy()
1✔
2049
        if not healthy:
1✔
2050
            return False
×
2051

2052
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
2053

2054
        if user_yaml:
1✔
2055
            self.logger.debug(
1✔
2056
                "useryaml abac before lowering usernames: {}".format(
2057
                    user_yaml.user_abac
2058
                )
2059
            )
2060
            user_yaml.user_abac = {
1✔
2061
                key.lower(): value for key, value in user_yaml.user_abac.items()
2062
            }
2063
            # update the project info with `projects` specified in user.yaml
2064
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
2065

2066
        # get list of users from arborist to make sure users that are completely removed
2067
        # from authorization sources get policies revoked
2068

2069
        arborist_user_projects = {}
1✔
2070
        if not single_user_sync:
1✔
2071
            arborist_users_auth_mapping = {}
1✔
2072
            # to_add, to_remove, to_delete = self._compare_policies(
2073
            #     arborist_users_auth_mapping, user_projects
2074
            # )
2075

2076
            try:
1✔
2077
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
2078

2079
                # construct user information, NOTE the lowering of the username. when adding/
2080
                # removing access, the case in the Fence db is used. For combining access, it is
2081
                # case-insensitive, so we lower
2082
                arborist_user_projects = {
1✔
2083
                    user["name"].lower(): {} for user in arborist_users
2084
                }
2085
            except (ArboristError, KeyError, AttributeError) as error:
×
2086
                # TODO usersync should probably exit with non-zero exit code at the end,
2087
                #      but sync should continue from this point so there are no partial
2088
                #      updates
2089
                self.logger.warning(
×
2090
                    "Could not get list of users in Arborist, continuing anyway. "
2091
                    "WARNING: this sync will NOT remove access for users no longer in "
2092
                    f"authorization sources. Error: {error}"
2093
                )
2094

2095
            # Get auth mapping for users
2096
            for user in arborist_users:
1✔
2097
                username = user["name"]
×
2098
                try:
×
2099
                    arborist_users_auth_mapping[
×
2100
                        username
2101
                    ] = self.arborist_client.auth_mapping(username)
2102

2103
                except (ArboristError, KeyError, AttributeError) as error:
×
2104
                    self.logger.warning(
×
2105
                        "Could not get auth mapping of users in Arborist, continuing anyway. "
2106
                        "WARNING: this sync will NOT remove access for users no longer in "
2107
                        f"authorization sources. Error: {error}"
2108
                    )
2109

2110
            # update the project info with users from arborist
2111
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
2112

2113
        policy_id_list = []
1✔
2114
        policies = []
1✔
2115

2116
        # prefer in-memory if available from user_yaml, if not, get from database
2117
        if user_yaml and user_yaml.project_to_resource:
1✔
2118
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
2119
            self.logger.debug(
1✔
2120
                f"using in-memory project to authz resource mapping from "
2121
                f"user.yaml (instead of database): {project_to_authz_mapping}"
2122
            )
2123
        else:
2124
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
2125
            self.logger.debug(
1✔
2126
                f"using persisted project to authz resource mapping from database "
2127
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2128
            )
2129

2130
        self.logger.debug(
1✔
2131
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2132
        )
2133
        all_resources = [
1✔
2134
            r
2135
            for resources in self._dbgap_study_to_resources.values()
2136
            for r in resources
2137
        ]
2138
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2139
        self._create_arborist_resources(all_resources)
1✔
2140

2141
        for username, user_project_info in user_projects.items():
1✔
2142
            self.logger.info("processing user `{}`".format(username))
1✔
2143
            user = query_for_user(session=session, username=username)
1✔
2144
            idp = None
1✔
2145
            if user:
1✔
2146
                username = user.username
1✔
2147
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2148

2149
            self.arborist_client.create_user_if_not_exist(username)
1✔
2150

2151
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2152
            # previously mapped each project to the same set of privileges
2153
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2154
            # single policy with ('read', 'read-storage') being the single
2155
            # key
2156
            unique_policies = self._determine_unique_policies(
1✔
2157
                user_project_info, project_to_authz_mapping
2158
            )
2159
            for roles in unique_policies.keys():
1✔
2160
                for role in roles:
1✔
2161
                    self._create_arborist_role(role)
1✔
2162

2163
            incoming_policies = set()  # set of policies for current user.
1✔
2164

2165
            if single_user_sync:
1✔
2166
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2167
                    policy_hash = self._hash_policy_contents(
1✔
2168
                        ordered_roles, ordered_resources
2169
                    )
2170
                    self._create_arborist_policy(
1✔
2171
                        policy_hash,
2172
                        ordered_roles,
2173
                        ordered_resources,
2174
                        skip_if_exists=True,
2175
                    )
2176
                    # return here as it is not expected single_user_sync
2177
                    # will need any of the remaining user_yaml operations
2178
                    # left in _update_authz_in_arborist
2179
                    return self._grant_arborist_policy(
1✔
2180
                        username, policy_hash, expires=expires
2181
                    )
2182
            else:
2183

2184
                for roles, resources in unique_policies.items():
1✔
2185
                    for role in roles:
1✔
2186
                        for resource in resources:
1✔
2187
                            # grant a policy to this user which is a single
2188
                            # role on a single resource
2189

2190
                            # format project '/x/y/z' -> 'x.y.z'
2191
                            # so the policy id will be something like 'x.y.z-create'
2192
                            policy_id = _format_policy_id(resource, role)
1✔
2193
                            incoming_policies.add(policy_id)
1✔
2194
                            if policy_id not in self._created_policies:
1✔
2195
                                try:
1✔
2196
                                    self.arborist_client.update_policy(
1✔
2197
                                        policy_id,
2198
                                        {
2199
                                            "description": "policy created by fence sync",
2200
                                            "role_ids": [role],
2201
                                            "resource_paths": [resource],
2202
                                        },
2203
                                        create_if_not_exist=True,
2204
                                    )
2205
                                except ArboristError as e:
×
2206
                                    self.logger.info(
×
2207
                                        "not creating policy in arborist; {}".format(
2208
                                            str(e)
2209
                                        )
2210
                                    )
2211
                                self._created_policies.add(policy_id)
1✔
2212
            if user_yaml:
1✔
2213
                user_yaml_policies = set(user_yaml.policies.get(username, []))
1✔
2214
                incoming_policies = (
1✔
2215
                    incoming_policies | user_yaml_policies
2216
                )  # add policies from whitelist and useryaml
2217

2218
            self._grant_arborist_policies(
1✔
2219
                username, incoming_policies, user_yaml, expires=expires
2220
            )
2221

2222
            # if user_yaml:
2223
            #     for policy in user_yaml.policies.get(username, []):
2224
            #         self.arborist_client.grant_user_policy(
2225
            #             username,
2226
            #             policy,
2227
            #             expires_at=expires,
2228
            #         )
2229

2230
        if user_yaml:
1✔
2231
            for client_name, client_details in user_yaml.clients.items():
1✔
2232
                client_policies = client_details.get("policies", [])
×
2233
                clients = session.query(Client).filter_by(name=client_name).all()
×
2234
                # update existing clients, do not create new ones
2235
                if not clients:
×
2236
                    self.logger.warning(
×
2237
                        "client to update (`{}`) does not exist in fence: skipping".format(
2238
                            client_name
2239
                        )
2240
                    )
2241
                    continue
×
2242
                self.logger.debug(
×
2243
                    "updating client `{}` (found {} client IDs)".format(
2244
                        client_name, len(clients)
2245
                    )
2246
                )
2247
                # there may be more than 1 client with this name if credentials are being rotated,
2248
                # so we grant access to each client ID
2249
                for client in clients:
×
2250
                    try:
×
2251
                        self.arborist_client.update_client(
×
2252
                            client.client_id, client_policies
2253
                        )
2254
                    except ArboristError as e:
×
2255
                        self.logger.info(
×
2256
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2257
                                client_policies, client_name, client.client_id, str(e)
2258
                            )
2259
                        )
2260

2261
        return True
1✔
2262

2263
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2264
        """
2265
        Determine and return a dictionary of unique policies.
2266

2267
        Args (examples):
2268
            user_project_info (dict):
2269
            {
2270
                'phs000002.c1': { 'read-storage', 'read' },
2271
                'phs000001.c1': { 'read', 'read-storage' },
2272
                'phs000004.c1': { 'write', 'read' },
2273
                'phs000003.c1': { 'read', 'write' },
2274
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2275
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2276
            }
2277
            project_to_authz_mapping (dict):
2278
            {
2279
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2280
            }
2281

2282
        Return (for examples):
2283
            dict:
2284
            {
2285
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2286
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2287
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2288
            }
2289
        """
2290
        roles_to_resources = collections.defaultdict(list)
1✔
2291
        for study, roles in user_project_info.items():
1✔
2292
            ordered_roles = tuple(sorted(roles))
1✔
2293
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2294
            if study in project_to_authz_mapping:
1✔
2295
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2296
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2297

2298
        policies = {}
1✔
2299
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2300
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2301
        return policies
1✔
2302

2303
    def _create_arborist_role(self, role):
1✔
2304
        """
2305
        Wrapper around gen3authz's create_role with additional logging
2306

2307
        Args:
2308
            role (str): what the Arborist identity should be of the created role
2309

2310
        Return:
2311
            bool: True if the role was created successfully or it already
2312
                  exists. False otherwise
2313
        """
2314
        if role in self._created_roles:
1✔
2315
            return True
1✔
2316
        try:
1✔
2317
            response_json = self.arborist_client.create_role(
1✔
2318
                arborist_role_for_permission(role)
2319
            )
2320
        except ArboristError as e:
×
2321
            self.logger.error(
×
2322
                "could not create `{}` role in Arborist: {}".format(role, e)
2323
            )
2324
            return False
×
2325
        self._created_roles.add(role)
1✔
2326

2327
        if response_json is None:
1✔
2328
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2329
        else:
2330
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2331
        return True
1✔
2332

2333
    def _create_arborist_resources(self, resources):
1✔
2334
        """
2335
        Create resources in Arborist
2336

2337
        Args:
2338
            resources (list): a list of full Arborist resource paths to create
2339
            [
2340
                "/programs/DEV/projects/phs000001.c1",
2341
                "/programs/DEV/projects/phs000002.c1",
2342
                "/programs/DEV/projects/phs000003.c1"
2343
            ]
2344

2345
        Return:
2346
            bool: True if the resources were successfully created, False otherwise
2347

2348

2349
        As of 2/11/2022, for resources above,
2350
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2351
        [
2352
            { 'name': 'programs', 'subresources': [
2353
                { 'name': 'DEV', 'subresources': [
2354
                    { 'name': 'projects', 'subresources': [
2355
                        { 'name': 'phs000001.c1', 'subresources': []},
2356
                        { 'name': 'phs000002.c1', 'subresources': []},
2357
                        { 'name': 'phs000003.c1', 'subresources': []}
2358
                    ]}
2359
                ]}
2360
            ]}
2361
        ]
2362
        Because this list has a single object, only a single network request gets
2363
        sent to Arborist.
2364

2365
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2366
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2367
        [
2368
            {'name': 'phs000001.c1', 'subresources': []},
2369
            {'name': 'phs000002.c1', 'subresources': []},
2370
            {'name': 'phs000003.c1', 'subresources': []}
2371
        ]
2372
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2373

2374
        As a practical matter, for sync_single_user_visas, studies
2375
        should be nested under the `/programs` resource as in the former
2376
        example (i.e. only one network request gets made).
2377

2378
        TODO for the sake of simplicity, it would be nice if only one network
2379
        request was made no matter the input.
2380
        """
2381
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2382
            try:
1✔
2383
                response_json = self.arborist_client.update_resource(
1✔
2384
                    "/", request_body, merge=True
2385
                )
2386
            except ArboristError as e:
×
2387
                self.logger.error(
×
2388
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2389
                        request_body, e
2390
                    )
2391
                )
2392
                return False
×
2393

2394
        self.logger.debug(
1✔
2395
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2396
        )
2397
        return True
1✔
2398

2399
    def _create_arborist_policy(
1✔
2400
        self, policy_id, roles, resources, skip_if_exists=False
2401
    ):
2402
        """
2403
        Wrapper around gen3authz's create_policy with additional logging
2404

2405
        Args:
2406
            policy_id (str): what the Arborist identity should be of the created policy
2407
            roles (iterable): what roles the create policy should have
2408
            resources (iterable): what resources the created policy should have
2409
            skip_if_exists (bool): if True, this function will not treat an already
2410
                                   existent policy as an error
2411

2412
        Return:
2413
            bool: True if policy creation was successful. False otherwise
2414
        """
2415
        try:
1✔
2416
            response_json = self.arborist_client.create_policy(
1✔
2417
                {
2418
                    "id": policy_id,
2419
                    "role_ids": roles,
2420
                    "resource_paths": resources,
2421
                },
2422
                skip_if_exists=skip_if_exists,
2423
            )
2424
        except ArboristError as e:
×
2425
            self.logger.error(
×
2426
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2427
            )
2428
            return False
×
2429

2430
        if response_json is None:
1✔
2431
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2432
        else:
2433
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2434
        return True
1✔
2435

2436
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2437
        """
2438
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2439

2440
        Args:
2441
            ordered_roles (iterable): policy roles in sorted order
2442
            ordered_resources (iterable): policy resources in sorted order
2443

2444
        Return:
2445
            str: SHA256 hex digest
2446
        """
2447

2448
        def escape(s):
1✔
2449
            return s.replace(",", "\,")
1✔
2450

2451
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2452
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2453
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2454
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2455

2456
        return policy_hash
1✔
2457

2458
    def _compare_policies(self, existing_policies, incoming_policies):
1✔
2459
        """
2460
        Compares a user's existing policies with incoming policies from either user_yaml or dbgap whitelist
2461

2462
        Args:
2463
            existing_policies (_type_): user's existing policies pulled with arborist_client.auth_mapping(username)
2464
            incoming_policies (_type_): user's policies as dictated by authz source
2465

2466
        Return:
2467
            policies_to_add (dict): policies to be added to arborist
2468
            policies_to_remove (dict): policies to be removed from arborist
2469
        """
2470
        pass
×
2471

2472
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2473
        """
2474
        Wrapper around gen3authz's grant_user_policy with additional logging
2475

2476
        Args:
2477
            username (str): username of user in Arborist who policy should be
2478
                            granted to
2479
            policy_id (str): Arborist policy id
2480
            expires (int): POSIX timestamp for when policy should expire
2481

2482
        Return:
2483
            bool: True if granting of policy was successful, False otherwise
2484
        """
2485
        try:
1✔
2486
            response_json = self.arborist_client.grant_user_policy(
1✔
2487
                username,
2488
                policy_id,
2489
                expires_at=expires,
2490
            )
2491
        except ArboristError as e:
×
2492
            self.logger.error(
×
2493
                "could not grant policy `{}` to user `{}`: {}".format(
2494
                    policy_id, username, e
2495
                )
2496
            )
2497
            return False
×
2498

2499
        self.logger.debug(
1✔
2500
            "granted policy `{}` to user `{}`".format(policy_id, username)
2501
        )
2502
        return True
1✔
2503

2504
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2505
        """
2506
        Determine the arborist resource path and add it to
2507
        _self._dbgap_study_to_resources
2508

2509
        Args:
2510
            dbgap_study (str): study phs identifier
2511
            dbgap_config (dict): dictionary of config for dbgap server
2512

2513
        """
2514
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2515
            "_default", ["/"]
2516
        )
2517
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2518
            dbgap_study, default_namespaces
2519
        )
2520

2521
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2522

2523
        arborist_resource_namespaces = [
1✔
2524
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2525
        ]
2526

2527
        for resource_namespace in arborist_resource_namespaces:
1✔
2528
            full_resource_path = resource_namespace + dbgap_study
1✔
2529
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2530
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2531
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2532
        return arborist_resource_namespaces
1✔
2533

2534
    def _is_arborist_healthy(self):
1✔
2535
        if not self.arborist_client:
1✔
2536
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2537
            return False
×
2538
        if not self.arborist_client.healthy():
1✔
2539
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2540
            self.logger.error(
×
2541
                "arborist service is unavailable; skipping main arborist dbgap sync"
2542
            )
2543
            return False
×
2544
        return True
1✔
2545

2546
    def _pick_sync_type(self, visa):
1✔
2547
        """
2548
        Pick type of visa to parse according to the visa provider
2549
        """
2550
        sync_client = None
1✔
2551
        if visa.type in self.visa_types["ras"]:
1✔
2552
            sync_client = self.ras_sync_client
1✔
2553
        else:
2554
            raise Exception(
×
2555
                "Visa type {} not recognized. Configure in fence-config".format(
2556
                    visa.type
2557
                )
2558
            )
2559
        if not sync_client:
1✔
2560
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2561

2562
        return sync_client
1✔
2563

2564
    def sync_single_user_visas(
1✔
2565
        self, user, ga4gh_visas, sess=None, expires=None, skip_google_updates=False
2566
    ):
2567
        """
2568
        Sync a single user's visas during login or DRS/data access
2569

2570
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2571
                        BEFORE THIS.
2572

2573
        Args:
2574
            user (userdatamodel.user.User): Fence user whose visas'
2575
                                            authz info is being synced
2576
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2577
                                that are ALREADY VALIDATED
2578
            sess (sqlalchemy.orm.session.Session): database session
2579
            expires (int): time at which synced Arborist policies and
2580
                           inclusion in any GBAG are set to expire
2581
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
2582

2583
        Return:
2584
            list of successfully parsed visas
2585
        """
2586
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2587
        dbgap_config = self.dbGaP[0]
1✔
2588
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2589
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2590
            "enable_common_exchange_area_access", False
2591
        )
2592
        study_common_exchange_areas = dbgap_config.get(
1✔
2593
            "study_common_exchange_areas", {}
2594
        )
2595

2596
        try:
1✔
2597
            user_yaml = UserYAML.from_file(
1✔
2598
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2599
            )
2600
        except (EnvironmentError, AssertionError) as e:
×
2601
            self.logger.error(str(e))
×
2602
            self.logger.error("aborting early")
×
2603
            raise
×
2604

2605
        user_projects = dict()
1✔
2606
        projects = {}
1✔
2607
        info = {}
1✔
2608
        parsed_visas = []
1✔
2609

2610
        for visa in ga4gh_visas:
1✔
2611
            project = {}
1✔
2612
            visa_type = self._pick_sync_type(visa)
1✔
2613
            encoded_visa = visa.ga4gh_visa
1✔
2614

2615
            try:
1✔
2616
                project, info = visa_type._parse_single_visa(
1✔
2617
                    user,
2618
                    encoded_visa,
2619
                    visa.expires,
2620
                    parse_consent_code,
2621
                )
2622
            except Exception:
×
2623
                self.logger.warning(
×
2624
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2625
                )
2626
                continue
×
2627

2628
            projects = {**projects, **project}
1✔
2629
            parsed_visas.append(visa)
1✔
2630

2631
        info["user_id"] = user.id
1✔
2632
        info["username"] = user.username
1✔
2633
        user_projects[user.username] = projects
1✔
2634

2635
        user_projects = self.parse_projects(user_projects)
1✔
2636

2637
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2638
            self.logger.info(
1✔
2639
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2640
            )
2641

2642
        self._process_user_projects(
1✔
2643
            user_projects,
2644
            enable_common_exchange_area_access,
2645
            study_common_exchange_areas,
2646
            dbgap_config,
2647
            sess,
2648
        )
2649

2650
        if parse_consent_code:
1✔
2651
            self._grant_all_consents_to_c999_users(
1✔
2652
                user_projects, user_yaml.project_to_resource
2653
            )
2654

2655
        if user_projects:
1✔
2656
            self.sync_to_storage_backend(
1✔
2657
                user_projects,
2658
                info,
2659
                sess,
2660
                expires=expires,
2661
                skip_google_updates=skip_google_updates,
2662
            )
2663
        else:
2664
            self.logger.info("No users for syncing")
×
2665

2666
        # update arborist db (user access)
2667
        if self.arborist_client:
1✔
2668
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2669
            success = self._update_authz_in_arborist(
1✔
2670
                sess,
2671
                user_projects,
2672
                user_yaml=user_yaml,
2673
                single_user_sync=True,
2674
                expires=expires,
2675
            )
2676
            if success:
1✔
2677
                self.logger.info(
1✔
2678
                    "Finished synchronizing authorization info to arborist"
2679
                )
2680
            else:
2681
                self.logger.error(
×
2682
                    "Could not synchronize authorization info successfully to arborist"
2683
                )
2684
        else:
2685
            self.logger.error("No arborist client set; skipping arborist sync")
×
2686

2687
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc