• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 22198628345

19 Feb 2026 08:21PM UTC coverage: 75.0% (-0.002%) from 75.002%
22198628345

Pull #1334

github

mpsolano
DEV-3876: Add pubkey AuthN
Pull Request #1334: DEV-3876: Add pubkey AuthN

8442 of 11256 relevant lines covered (75.0%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.78
fence/sync/sync_users.py
1
import paramiko.ssh_exception
1✔
2
import backoff
1✔
3
import glob
1✔
4

5
import httpx
1✔
6
import jwt
1✔
7
import os
1✔
8
import re
1✔
9
import subprocess as sp
1✔
10
import yaml
1✔
11
import copy
1✔
12
import datetime
1✔
13
import uuid
1✔
14
import collections
1✔
15
import hashlib
1✔
16

17
from contextlib import contextmanager
1✔
18
from collections import defaultdict
1✔
19
from csv import DictReader
1✔
20
from io import StringIO
1✔
21
from stat import S_ISDIR
1✔
22

23
import paramiko
1✔
24
from cdislogging import get_logger
1✔
25
from email_validator import validate_email, EmailNotValidError
1✔
26
from gen3authz.client.arborist.errors import ArboristError, ArboristTimeoutError
1✔
27
from gen3users.validation import validate_user_yaml
1✔
28
from paramiko.proxy import ProxyCommand
1✔
29
from sqlalchemy.exc import IntegrityError
1✔
30
from sqlalchemy import func
1✔
31

32
from fence.config import config
1✔
33
from fence.models import (
1✔
34
    AccessPrivilege,
35
    AuthorizationProvider,
36
    Project,
37
    Tag,
38
    User,
39
    query_for_user,
40
    Client,
41
    IdentityProvider,
42
    get_project_to_authz_mapping,
43
)
44
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
45
from fence.resources.storage import StorageManager
1✔
46
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
47
from fence.resources.google.access_utils import GoogleUpdateException
1✔
48
from fence.sync import utils
1✔
49
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
50
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
51

52

53
def _format_policy_id(path, privilege):
1✔
54
    resource = ".".join(name for name in path.split("/") if name)
1✔
55
    return "{}-{}".format(resource, privilege)
1✔
56

57

58
def download_dir(sftp, remote_dir, local_dir):
1✔
59
    """
60
    Recursively download file from remote_dir to local_dir
61
    Args:
62
        remote_dir(str)
63
        local_dir(str)
64
    Returns: None
65
    """
66
    dir_items = sftp.listdir_attr(remote_dir)
×
67

68
    for item in dir_items:
×
69
        remote_path = remote_dir + "/" + item.filename
×
70
        local_path = os.path.join(local_dir, item.filename)
×
71
        if S_ISDIR(item.st_mode):
×
72
            download_dir(sftp, remote_path, local_path)
×
73
        else:
74
            sftp.get(remote_path, local_path)
×
75

76

77
def arborist_role_for_permission(permission):
1✔
78
    """
79
    For the programs/projects in the existing fence access control model, in order to
80
    use arborist for checking permissions we generate a policy for each combination of
81
    program/project and privilege. The roles involved all contain only one permission,
82
    for one privilege from the project access model.
83
    """
84
    return {
1✔
85
        "id": permission,
86
        "permissions": [
87
            {"id": permission, "action": {"service": "*", "method": permission}}
88
        ],
89
    }
90

91

92
@contextmanager
1✔
93
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
94
    """
95
    Context manager for reading and optionally decrypting file it only
96
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
97

98
    Args:
99
        filepath (str): path to the file
100
        encrypted (bool): whether the file is encrypted
101

102
    Returns:
103
        Generator[file-like class]: file like object for the file
104
    """
105
    if encrypted:
1✔
106
        p = sp.Popen(
×
107
            [
108
                "ccdecrypt",
109
                "-u",
110
                "-K",
111
                key,
112
                filepath,
113
            ],
114
            stdout=sp.PIPE,
115
            stderr=open(os.devnull, "w"),
116
            universal_newlines=True,
117
        )
118
        try:
×
119
            yield StringIO(p.communicate()[0])
×
120
        except UnicodeDecodeError:
×
121
            logger.error("Could not decode file. Check the decryption key.")
×
122
    else:
123
        f = open(filepath, "r")
1✔
124
        yield f
1✔
125
        f.close()
1✔
126

127

128
class UserYAML(object):
1✔
129
    """
130
    Representation of the information in a YAML file describing user, project, and ABAC
131
    information for access control.
132
    """
133

134
    def __init__(
1✔
135
        self,
136
        projects=None,
137
        user_info=None,
138
        policies=None,
139
        clients=None,
140
        authz=None,
141
        project_to_resource=None,
142
        logger=None,
143
        user_abac=None,
144
    ):
145
        self.projects = projects or {}
1✔
146
        self.user_info = user_info or {}
1✔
147
        self.user_abac = user_abac or {}
1✔
148
        self.policies = policies or {}
1✔
149
        self.clients = clients or {}
1✔
150
        self.authz = authz or {}
1✔
151
        self.project_to_resource = project_to_resource or {}
1✔
152
        self.logger = logger
1✔
153

154
    @classmethod
1✔
155
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
156
        """
157
        Add access by "auth_id" to "self.projects" to update the Fence DB.
158
        Add access by "resource" to "self.user_abac" to update Arborist.
159
        """
160
        data = {}
1✔
161
        if filepath:
1✔
162
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
163
                file_contents = f.read()
1✔
164
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
165
                data = yaml.safe_load(file_contents)
1✔
166
        else:
167
            if logger:
1✔
168
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
169

170
        projects = dict()
1✔
171
        user_info = dict()
1✔
172
        policies = dict()
1✔
173

174
        # resources should be the resource tree to construct in arborist
175
        user_abac = dict()
1✔
176

177
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
178
        if not data.get("authz") and data.get("rbac"):
1✔
179
            if logger:
×
180
                logger.info(
×
181
                    "No authz block found but rbac block present. Using rbac block"
182
                )
183
            data["authz"] = data["rbac"]
×
184

185
        # get user project mapping to arborist resources if it exists
186
        project_to_resource = data.get("authz", dict()).get(
1✔
187
            "user_project_to_resource", dict()
188
        )
189

190
        # read projects and privileges for each user
191
        users = data.get("users", {})
1✔
192
        for username, details in users.items():
1✔
193
            # users should occur only once each; skip if already processed
194
            if username in projects:
1✔
195
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
196
                    username
197
                )
198
                if logger:
×
199
                    logger.error(msg)
×
200
                raise EnvironmentError(msg)
×
201

202
            privileges = {}
1✔
203
            resource_permissions = dict()
1✔
204
            for project in details.get("projects", {}):
1✔
205
                try:
1✔
206
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
207
                except KeyError as e:
×
208
                    if logger:
×
209
                        logger.error("project {} missing field: {}".format(project, e))
×
210
                    continue
×
211

212
                # project may not have `resource` field.
213
                # prefer resource field;
214
                # if no resource or mapping, assume auth_id is resource.
215
                resource = project.get("resource", project["auth_id"])
1✔
216

217
                if project["auth_id"] not in project_to_resource:
1✔
218
                    project_to_resource[project["auth_id"]] = resource
1✔
219
                resource_permissions[resource] = set(project["privilege"])
1✔
220

221
            user_info[username] = {
1✔
222
                "email": details.get("email", ""),
223
                "display_name": details.get("display_name", ""),
224
                "phone_number": details.get("phone_number", ""),
225
                "tags": details.get("tags", {}),
226
                "admin": details.get("admin", False),
227
            }
228
            if not details.get("email"):
1✔
229
                try:
1✔
230
                    valid = validate_email(
1✔
231
                        username, allow_smtputf8=False, check_deliverability=False
232
                    )
233
                    user_info[username]["email"] = valid.email
1✔
234
                except EmailNotValidError:
1✔
235
                    pass
1✔
236
            projects[username] = privileges
1✔
237
            user_abac[username] = resource_permissions
1✔
238

239
            # list of policies we want to grant to this user, which get sent to arborist
240
            # to check if they're allowed to do certain things
241
            policies[username] = details.get("policies", [])
1✔
242

243
        if logger:
1✔
244
            logger.info(
1✔
245
                "Got user project to arborist resource mapping:\n{}".format(
246
                    str(project_to_resource)
247
                )
248
            )
249

250
        authz = data.get("authz", dict())
1✔
251
        if not authz:
1✔
252
            # older version: resources in root, no `authz` section or `rbac` section
253
            if logger:
1✔
254
                logger.warning(
1✔
255
                    "access control YAML file is using old format (missing `authz`/`rbac`"
256
                    " section in the root); assuming that if it exists `resources` will"
257
                    " be on the root level, and continuing"
258
                )
259
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
260
            # the code can pretend it's in the normal place that we expect
261
            resources = data.get("resources", [])
1✔
262
            # keep authz empty dict if resources is not specified
263
            if resources:
1✔
264
                authz["resources"] = data.get("resources", [])
×
265

266
        clients = data.get("clients", {})
1✔
267

268
        return cls(
1✔
269
            projects=projects,
270
            user_info=user_info,
271
            user_abac=user_abac,
272
            policies=policies,
273
            clients=clients,
274
            authz=authz,
275
            project_to_resource=project_to_resource,
276
            logger=logger,
277
        )
278

279
    def persist_project_to_resource(self, db_session):
1✔
280
        """
281
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
282

283
        The mapping comes from an external source, this function persists what was parsed
284
        into memory into the database for future use.
285
        """
286
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
287
            project = (
1✔
288
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
289
            )
290
            if project:
1✔
291
                project.authz = authz_resource
1✔
292
            else:
293
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
294
                db_session.add(project)
×
295
        db_session.commit()
1✔
296

297

298
class UserSyncer(object):
1✔
299
    def __init__(
1✔
300
        self,
301
        dbGaP,
302
        DB,
303
        project_mapping,
304
        storage_credentials=None,
305
        db_session=None,
306
        is_sync_from_dbgap_server=False,
307
        sync_from_local_csv_dir=None,
308
        sync_from_local_yaml_file=None,
309
        arborist=None,
310
        folder=None,
311
    ):
312
        """
313
        Syncs ACL files from dbGap to auth database and storage backends
314
        Args:
315
            dbGaP: a list of dict containing creds to access dbgap sftp
316
            DB: database connection string
317
            project_mapping: a dict containing how dbgap ids map to projects
318
            storage_credentials: a dict containing creds for storage backends
319
            sync_from_dir: path to an alternative dir to sync from instead of
320
                           dbGaP
321
            arborist:
322
                ArboristClient instance if the syncer should also create
323
                resources in arborist
324
            folder: a local folder where dbgap telemetry files will sync to
325
        """
326
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
327
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
328
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
329
        self.dbGaP = dbGaP
1✔
330
        self.session = db_session
1✔
331
        self.driver = get_SQLAlchemyDriver(DB)
1✔
332
        self.project_mapping = project_mapping or {}
1✔
333
        self._projects = dict()
1✔
334
        self._created_roles = set()
1✔
335
        self._created_policies = set()
1✔
336
        self._dbgap_study_to_resources = dict()
1✔
337
        self.logger = get_logger(
1✔
338
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
339
        )
340
        self.arborist_client = arborist
1✔
341
        self.folder = folder
1✔
342

343
        self.auth_source = defaultdict(set)
1✔
344
        # auth_source used for logging. username : [source1, source2]
345
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
346
        self.parent_to_child_studies_mapping = {}
1✔
347
        for dbgap_config in dbGaP:
1✔
348
            self.parent_to_child_studies_mapping.update(
1✔
349
                dbgap_config.get("parent_to_child_studies_mapping", {})
350
            )
351
        if storage_credentials:
1✔
352
            self.storage_manager = StorageManager(
1✔
353
                storage_credentials, logger=self.logger
354
            )
355
        self.id_patterns = []
1✔
356

357
    @staticmethod
1✔
358
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
359
        """
360
        Check if the filename matches dbgap access control file pattern
361

362
        Args:
363
            filepath (str): path to file
364
            encrypted (bool): whether the file is encrypted
365

366
        Returns:
367
            bool: whether the pattern matches
368
        """
369
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
370
        for pattern in id_patterns:
1✔
371
            if encrypted:
1✔
372
                pattern += r".enc"
×
373
            pattern += r"$"
1✔
374
            # when converting the YAML from fence-config,
375
            # python reads it as Python string literal. So "\" turns into "\\"
376
            # which messes with the regex match
377
            pattern.replace("\\\\", "\\")
1✔
378
            if re.match(pattern, os.path.basename(filepath)):
1✔
379
                return True
1✔
380
        return False
1✔
381

382
    def _get_from_sftp_with_proxy(self, server, path):
1✔
383
        """
384
        Download all data from sftp sever to a local dir
385

386
        Args:
387
            server (dict) : dictionary containing info to access sftp server
388
            path (str): path to local directory
389

390
        Returns:
391
            None
392
        """
393
        proxy = None
1✔
394
        if server.get("proxy", "") != "":
1✔
395
            command = "ssh -oHostKeyAlgorithms=+ssh-rsa -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
396
                user=server.get("proxy_user", ""),
397
                proxy=server.get("proxy", ""),
398
                host=server.get("host", ""),
399
                port=server.get("port", 22),
400
            )
401
            self.logger.info("SSH proxy command: {}".format(command))
×
402

403
            proxy = ProxyCommand(command)
×
404

405
        with paramiko.SSHClient() as client:
1✔
406
            client.set_log_channel(self.logger.name)
1✔
407

408
            # Load known host keys
409
            known_hosts_path = os.path.expanduser("~/.ssh/known_hosts")
1✔
410
            if os.path.exists(known_hosts_path):
1✔
411
                client.load_host_keys(known_hosts_path)
×
412
            else:
413
                self.logger.error(
1✔
414
                    "No known_hosts file found — rejecting unknown hosts - make sure the SFTP host key is present in known_hosts before attempting connection."
415
                )
416

417
            client.set_missing_host_key_policy(paramiko.RejectPolicy())
1✔
418
            parameters = {
1✔
419
                "hostname": str(server.get("host", "")),
420
                "username": str(server.get("username", "")),
421
                "port": int(server.get("port", 22)),
422
            }
423
            if server.get("private_key_filename"):
1✔
424
                parameters["key_filename"] = str(server.get("private_key_filename"))
×
425
            else:
426
                parameters["password"] = str(server.get("password", ""))
1✔
427
            if proxy:
1✔
428
                parameters["sock"] = proxy
×
429

430
            self.logger.info(
1✔
431
                "SSH connection hostname:post {}:{}".format(
432
                    parameters.get("hostname", "unknown"),
433
                    parameters.get("port", "unknown"),
434
                )
435
            )
436
            try:
1✔
437
                self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
438

439
                with client.open_sftp() as sftp:
×
440
                    download_dir(sftp, "./", path)
×
441
            except paramiko.ssh_exception.SSHException as e:
1✔
442
                self.logger.error(f"SSH connection failed, error: {e}")
×
443

444
        if proxy:
×
445
            proxy.close()
×
446

447
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
448
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
449
        ssh_client.connect(**parameters)
1✔
450

451
    def _get_from_ftp_with_proxy(self, server, path):
1✔
452
        """
453
        Download data from ftp sever to a local dir
454

455
        Args:
456
            server (dict): dictionary containing information for accessing server
457
            path(str): path to local files
458

459
        Returns:
460
            None
461
        """
462
        execstr = (
×
463
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
464
                server.get("username", ""),
465
                server.get("password", ""),
466
                server.get("host", ""),
467
                server.get("proxy", ""),
468
                path,
469
            )
470
        )
471
        os.system(execstr)
×
472

473
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
474
        return dbgap_config.get(
1✔
475
            "parse_consent_code", True
476
        )  # Should this really be true?
477

478
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
479
        """
480
        parse csv files to python dict
481

482
        Args:
483
            file_dict: a dictionary with key(file path) and value(privileges)
484
            sess: sqlalchemy session
485
            dbgap_config: a dictionary containing information about the dbGaP sftp server
486
                (comes from fence config)
487
            encrypted: boolean indicating whether those files are encrypted
488

489

490
        Return:
491
            Tuple[[dict, dict]]:
492
                (user_project, user_info) where user_project is a mapping from
493
                usernames to project permissions and user_info is a mapping
494
                from usernames to user details, such as email
495

496
        Example:
497

498
            (
499
                {
500
                    username: {
501
                        'project1': {'read-storage','write-storage'},
502
                        'project2': {'read-storage'},
503
                    }
504
                },
505
                {
506
                    username: {
507
                        'email': 'email@mail.com',
508
                        'display_name': 'display name',
509
                        'phone_number': '123-456-789',
510
                        'tags': {'dbgap_role': 'PI'}
511
                    }
512
                },
513
            )
514

515
        """
516
        user_projects = dict()
1✔
517
        user_info = defaultdict(dict)
1✔
518

519
        # parse dbGaP sftp server information
520
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
521

522
        self.id_patterns += (
1✔
523
            [
524
                item.replace("\\\\", "\\")
525
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
526
            ]
527
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
528
            else []
529
        )
530

531
        enable_common_exchange_area_access = dbgap_config.get(
1✔
532
            "enable_common_exchange_area_access", False
533
        )
534
        study_common_exchange_areas = dbgap_config.get(
1✔
535
            "study_common_exchange_areas", {}
536
        )
537
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
538

539
        if parse_consent_code and enable_common_exchange_area_access:
1✔
540
            self.logger.info(
1✔
541
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
542
            )
543

544
        project_id_patterns = [r"phs(\d{6})"]
1✔
545
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
546
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
547
            patterns = [
1✔
548
                pattern.replace("\\\\", "\\") for pattern in patterns
549
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
550
            project_id_patterns += patterns
1✔
551

552
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
553
        for filepath, privileges in file_dict.items():
1✔
554
            self.logger.info("Reading file {}".format(filepath))
1✔
555
            if os.stat(filepath).st_size == 0:
1✔
556
                self.logger.warning("Empty file {}".format(filepath))
×
557
                continue
×
558
            if not self._match_pattern(
1✔
559
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
560
            ):
561
                self.logger.warning(
1✔
562
                    "Filename {} does not match dbgap access control filename pattern;"
563
                    " this could mean that the filename has an invalid format, or has"
564
                    " an unexpected .enc extension, or lacks the .enc extension where"
565
                    " expected. This file is NOT being processed by usersync!".format(
566
                        filepath
567
                    )
568
                )
569
                continue
1✔
570

571
            with _read_file(
1✔
572
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
573
            ) as f:
574
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
575

576
                for row in csv:
1✔
577
                    username = row.get("login") or ""
1✔
578
                    if username == "":
1✔
579
                        continue
×
580

581
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
582
                        phsid = (
1✔
583
                            row.get("phsid") or (row.get("project_id") or "")
584
                        ).split(".")
585
                    else:
586
                        phsid = (row.get("phsid") or "").split(".")
1✔
587

588
                    dbgap_project = phsid[0]
1✔
589
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
590
                    skip = False
1✔
591
                    for pattern in project_id_patterns:
1✔
592
                        self.logger.debug(
1✔
593
                            "Checking pattern:{} with project_id:{}".format(
594
                                pattern, dbgap_project
595
                            )
596
                        )
597
                        if re.match(pattern, dbgap_project):
1✔
598
                            skip = False
1✔
599
                            break
1✔
600
                        else:
601
                            skip = True
1✔
602
                    if skip:
1✔
603
                        self.logger.warning(
1✔
604
                            "Skip processing from file {}, user {} with project {}".format(
605
                                filepath,
606
                                username,
607
                                dbgap_project,
608
                            )
609
                        )
610
                        continue
1✔
611
                    if len(phsid) > 1 and parse_consent_code:
1✔
612
                        consent_code = phsid[-1]
1✔
613

614
                        # c999 indicates full access to all consents and access
615
                        # to a study-specific exchange area
616
                        # access to at least one study-specific exchange area implies access
617
                        # to the parent study's common exchange area
618
                        #
619
                        # NOTE: Handling giving access to all consents is done at
620
                        #       a later time, when we have full information about possible
621
                        #       consents
622
                        self.logger.debug(
1✔
623
                            f"got consent code {consent_code} from dbGaP project "
624
                            f"{dbgap_project}"
625
                        )
626
                        if (
1✔
627
                            consent_code == "c999"
628
                            and enable_common_exchange_area_access
629
                            and dbgap_project in study_common_exchange_areas
630
                        ):
631
                            self.logger.info(
1✔
632
                                "found study with consent c999 and Fence "
633
                                "is configured to parse exchange area data. Giving user "
634
                                f"{username} {privileges} privileges in project: "
635
                                f"{study_common_exchange_areas[dbgap_project]}."
636
                            )
637
                            self._add_dbgap_project_for_user(
1✔
638
                                study_common_exchange_areas[dbgap_project],
639
                                privileges,
640
                                username,
641
                                sess,
642
                                user_projects,
643
                                dbgap_config,
644
                            )
645

646
                        dbgap_project += "." + consent_code
1✔
647

648
                    self._add_children_for_dbgap_project(
1✔
649
                        dbgap_project,
650
                        privileges,
651
                        username,
652
                        sess,
653
                        user_projects,
654
                        dbgap_config,
655
                    )
656

657
                    display_name = row.get("user name") or ""
1✔
658
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
659

660
                    # some dbgap telemetry files have information about a researchers PI
661
                    if "downloader for" in row:
1✔
662
                        tags["pi"] = row["downloader for"]
1✔
663

664
                    # prefer name over previous "downloader for" if it exists
665
                    if "downloader for names" in row:
1✔
666
                        tags["pi"] = row["downloader for names"]
×
667

668
                    user_info[username] = {
1✔
669
                        "email": row.get("email")
670
                        or user_info[username].get("email")
671
                        or "",
672
                        "display_name": display_name,
673
                        "phone_number": row.get("phone")
674
                        or user_info[username].get("phone_number")
675
                        or "",
676
                        "tags": tags,
677
                    }
678

679
                    self._process_dbgap_project(
1✔
680
                        dbgap_project,
681
                        privileges,
682
                        username,
683
                        sess,
684
                        user_projects,
685
                        dbgap_config,
686
                    )
687

688
        return user_projects, user_info
1✔
689

690
    def _get_children(self, dbgap_project):
1✔
691
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
692

693
    def _add_children_for_dbgap_project(
1✔
694
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
695
    ):
696
        """
697
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
698
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
699
        child studies.
700
        """
701
        parent_phsid = dbgap_project
1✔
702
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
703
        child_suffix = ""
1✔
704
        if parse_consent_code and re.match(
1✔
705
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
706
        ):
707
            parent_phsid_parts = dbgap_project.split(".")
1✔
708
            parent_phsid = parent_phsid_parts[0]
1✔
709
            child_suffix = "." + parent_phsid_parts[1]
1✔
710

711
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
712
            return
1✔
713

714
        self.logger.info(
1✔
715
            f"found parent study {parent_phsid} and Fence "
716
            "is configured to provide additional access to child studies. Giving user "
717
            f"{username} {privileges} privileges in projects: "
718
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
719
        )
720
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
721
        for child_study in child_studies:
1✔
722
            self._add_dbgap_project_for_user(
1✔
723
                child_study + child_suffix,
724
                privileges,
725
                username,
726
                sess,
727
                user_projects,
728
                dbgap_config,
729
            )
730

731
    def _add_dbgap_project_for_user(
1✔
732
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
733
    ):
734
        """
735
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
736
        and then updates the dictionary containing all user's project access
737
        """
738
        if dbgap_project not in self._projects:
1✔
739
            self.logger.debug(
1✔
740
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
741
            )
742

743
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
744

745
            # need to add dbgap project to arborist
746
            if self.arborist_client:
1✔
747
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
748

749
            if project.name is None:
1✔
750
                project.name = dbgap_project
1✔
751
            self._projects[dbgap_project] = project
1✔
752
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
753
        if username in user_projects:
1✔
754
            user_projects[username].update(phsid_privileges)
1✔
755
        else:
756
            user_projects[username] = phsid_privileges
1✔
757

758
    @staticmethod
1✔
759
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
760
        """
761
        Merge user_info1 into user_info2. Values in user_info2 are overriden
762
        by values in user_info1. user_info2 ends up containing the merged dict.
763

764
        Args:
765
            user_info1 (dict): nested dict
766
            user_info2 (dict): nested dict
767

768
            Example:
769
            {username: {'email': 'abc@email.com'}}
770

771
        Returns:
772
            None
773
        """
774
        user_info2.update(user_info1)
1✔
775

776
    def sync_two_phsids_dict(
1✔
777
        self,
778
        phsids1,
779
        phsids2,
780
        source1=None,
781
        source2=None,
782
        phsids2_overrides_phsids1=True,
783
    ):
784
        """
785
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
786
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
787
        the merged dict (see explanation below).
788
        `source1` and `source2`: for logging.
789

790
        Args:
791
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
792

793
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
794

795
            Example:
796
            {
797
                username: {
798
                    phsid1: {'read-storage','write-storage'},
799
                    phsid2: {'read-storage'},
800
                }
801
            }
802

803
        Return:
804
            None
805

806
        Explanation:
807
            Consider merging projects of the same user:
808

809
                {user1: {phsid1: privillege1}}
810

811
                {user1: {phsid2: privillege2}}
812

813
            case 1: phsid1 != phsid2. Output:
814

815
                {user1: {phsid1: privillege1, phsid2: privillege2}}
816

817
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
818

819
                {user1: {phsid1: union(privillege1, privillege2)}}
820

821
            For the other cases, just simple addition
822
        """
823

824
        for user, projects1 in phsids1.items():
1✔
825
            if not phsids2.get(user):
1✔
826
                if source1:
1✔
827
                    self.auth_source[user].add(source1)
1✔
828
                phsids2[user] = projects1
1✔
829
            elif phsids2_overrides_phsids1:
1✔
830
                if source1:
1✔
831
                    self.auth_source[user].add(source1)
×
832
                if source2:
1✔
833
                    self.auth_source[user].add(source2)
×
834
                for phsid1, privilege1 in projects1.items():
1✔
835
                    if phsid1 not in phsids2[user]:
1✔
836
                        phsids2[user][phsid1] = set()
1✔
837
                    phsids2[user][phsid1].update(privilege1)
1✔
838
            elif source2:
×
839
                self.auth_source[user].add(source2)
×
840

841
    def sync_to_db_and_storage_backend(
1✔
842
        self,
843
        user_project,
844
        user_info,
845
        sess,
846
        do_not_revoke_from_db_and_storage=False,
847
        expires=None,
848
    ):
849
        """
850
        sync user access control to database and storage backend
851

852
        Args:
853
            user_project (dict): a dictionary of
854

855
                {
856
                    username: {
857
                        'project1': {'read-storage','write-storage'},
858
                        'project2': {'read-storage'}
859
                    }
860
                }
861

862
            user_info (dict): a dictionary of {username: user_info{}}
863
            sess: a sqlalchemy session
864

865
        Return:
866
            None
867
        """
868
        google_bulk_mapping = None
1✔
869
        if config["GOOGLE_BULK_UPDATES"]:
1✔
870
            google_bulk_mapping = {}
1✔
871

872
        self._init_projects(user_project, sess)
1✔
873

874
        auth_provider_list = [
1✔
875
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
876
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
877
        ]
878

879
        cur_db_user_project_list = {
1✔
880
            (ua.user.username.lower(), ua.project.auth_id)
881
            for ua in sess.query(AccessPrivilege).all()
882
        }
883

884
        # we need to compare db -> whitelist case-insensitively for username.
885
        # db stores case-sensitively, but we need to query case-insensitively
886
        user_project_lowercase = {}
1✔
887
        syncing_user_project_list = set()
1✔
888
        for username, projects in user_project.items():
1✔
889
            user_project_lowercase[username.lower()] = projects
1✔
890
            for project, _ in projects.items():
1✔
891
                syncing_user_project_list.add((username.lower(), project))
1✔
892

893
        user_info_lowercase = {
1✔
894
            username.lower(): info for username, info in user_info.items()
895
        }
896

897
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
898
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
899
        to_update = set.intersection(
1✔
900
            cur_db_user_project_list, syncing_user_project_list
901
        )
902

903
        # when updating users we want to maintain case sesitivity in the username so
904
        # pass the original, non-lowered user_info dict
905
        self._upsert_userinfo(sess, user_info)
1✔
906

907
        if not do_not_revoke_from_db_and_storage:
1✔
908
            self._revoke_from_storage(
1✔
909
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
910
            )
911
            self._revoke_from_db(sess, to_delete)
1✔
912

913
        self._grant_from_storage(
1✔
914
            to_add,
915
            user_project_lowercase,
916
            sess,
917
            google_bulk_mapping=google_bulk_mapping,
918
            expires=expires,
919
        )
920

921
        self._grant_from_db(
1✔
922
            sess,
923
            to_add,
924
            user_info_lowercase,
925
            user_project_lowercase,
926
            auth_provider_list,
927
        )
928

929
        # re-grant
930
        self._grant_from_storage(
1✔
931
            to_update,
932
            user_project_lowercase,
933
            sess,
934
            google_bulk_mapping=google_bulk_mapping,
935
            expires=expires,
936
        )
937
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
938

939
        if not do_not_revoke_from_db_and_storage:
1✔
940
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
941

942
        sess.commit()
1✔
943

944
        if config["GOOGLE_BULK_UPDATES"]:
1✔
945
            self.logger.info("Doing bulk Google update...")
1✔
946
            update_google_groups_for_users(google_bulk_mapping)
1✔
947
            self.logger.info("Bulk Google update done!")
×
948

949
        sess.commit()
1✔
950

951
    def sync_to_storage_backend(
1✔
952
        self, user_project, user_info, sess, expires, skip_google_updates=False
953
    ):
954
        """
955
        sync user access control to storage backend with given expiration
956

957
        Args:
958
            user_project (dict): a dictionary of
959

960
                {
961
                    username: {
962
                        'project1': {'read-storage','write-storage'},
963
                        'project2': {'read-storage'}
964
                    }
965
                }
966

967
            user_info (dict): a dictionary of attributes for a user.
968
            sess: a sqlalchemy session
969
            expires (int): time at which synced Arborist policies and
970
                   inclusion in any GBAG are set to expire
971
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
972
        Return:
973
            None
974
        """
975
        if not expires:
1✔
976
            raise Exception(
×
977
                f"sync to storage backend requires an expiration. you provided: {expires}"
978
            )
979

980
        google_group_user_mapping = None
1✔
981
        if config["GOOGLE_BULK_UPDATES"]:
1✔
982
            google_group_user_mapping = {}
×
983
            get_or_create_proxy_group_id(
×
984
                expires=expires,
985
                user_id=user_info["user_id"],
986
                username=user_info["username"],
987
                session=sess,
988
                storage_manager=self.storage_manager,
989
            )
990

991
        # TODO: eventually it'd be nice to remove this step but it's required
992
        #       so that grant_from_storage can determine what storage backends
993
        #       are needed for a project.
994
        self._init_projects(user_project, sess)
1✔
995

996
        # we need to compare db -> whitelist case-insensitively for username.
997
        # db stores case-sensitively, but we need to query case-insensitively
998
        user_project_lowercase = {}
1✔
999
        syncing_user_project_list = set()
1✔
1000
        for username, projects in user_project.items():
1✔
1001
            user_project_lowercase[username.lower()] = projects
1✔
1002
            for project, _ in projects.items():
1✔
1003
                syncing_user_project_list.add((username.lower(), project))
1✔
1004

1005
        to_add = set(syncing_user_project_list)
1✔
1006

1007
        # when updating users we want to maintain case sensitivity in the username so
1008
        # pass the original, non-lowered user_info dict
1009
        self._upsert_userinfo(sess, {user_info["username"].lower(): user_info})
1✔
1010
        if not skip_google_updates:
1✔
1011
            self._grant_from_storage(
1✔
1012
                to_add,
1013
                user_project_lowercase,
1014
                sess,
1015
                google_bulk_mapping=google_group_user_mapping,
1016
                expires=expires,
1017
            )
1018

1019
            if config["GOOGLE_BULK_UPDATES"]:
1✔
1020
                self.logger.info("Updating user's google groups ...")
×
1021
                update_google_groups_for_users(google_group_user_mapping)
×
1022
                self.logger.info("Google groups update done!!")
×
1023

1024
        sess.commit()
1✔
1025

1026
    def _revoke_from_db(self, sess, to_delete):
1✔
1027
        """
1028
        Revoke user access to projects in the auth database
1029

1030
        Args:
1031
            sess: sqlalchemy session
1032
            to_delete: a set of (username, project.auth_id) to be revoked from db
1033
        Return:
1034
            None
1035
        """
1036
        for username, project_auth_id in to_delete:
1✔
1037
            q = (
1✔
1038
                sess.query(AccessPrivilege)
1039
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1040
                .join(AccessPrivilege.user)
1041
                .filter(func.lower(User.username) == username)
1042
                .all()
1043
            )
1044
            for access in q:
1✔
1045
                self.logger.info(
1✔
1046
                    "revoke {} access to {} in db".format(username, project_auth_id)
1047
                )
1048
                sess.delete(access)
1✔
1049

1050
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1051
        """
1052
        Make sure there is no admin user that is not in yaml/csv files
1053

1054
        Args:
1055
            sess: sqlalchemy session
1056
            user_info: a dict of
1057
            {
1058
                username: {
1059
                    'email': email,
1060
                    'display_name': display_name,
1061
                    'phone_number': phonenum,
1062
                    'tags': {'k1':'v1', 'k2': 'v2'}
1063
                    'admin': is_admin
1064
                }
1065
            }
1066
        Returns:
1067
            None
1068
        """
1069
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1070
            if admin_user.username.lower() not in user_info:
1✔
1071
                admin_user.is_admin = False
×
1072
                sess.add(admin_user)
×
1073
                self.logger.info(
×
1074
                    "remove admin access from {} in db".format(
1075
                        admin_user.username.lower()
1076
                    )
1077
                )
1078

1079
    def _update_from_db(self, sess, to_update, user_project):
1✔
1080
        """
1081
        Update user access to projects in the auth database
1082

1083
        Args:
1084
            sess: sqlalchemy session
1085
            to_update:
1086
                a set of (username, project.auth_id) to be updated from db
1087

1088
        Return:
1089
            None
1090
        """
1091

1092
        for username, project_auth_id in to_update:
1✔
1093
            q = (
1✔
1094
                sess.query(AccessPrivilege)
1095
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1096
                .join(AccessPrivilege.user)
1097
                .filter(func.lower(User.username) == username)
1098
                .all()
1099
            )
1100
            for access in q:
1✔
1101
                access.privilege = user_project[username][project_auth_id]
1✔
1102
                self.logger.info(
1✔
1103
                    "update {} with {} access to {} in db".format(
1104
                        username, access.privilege, project_auth_id
1105
                    )
1106
                )
1107

1108
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1109
        """
1110
        Grant user access to projects in the auth database
1111
        Args:
1112
            sess: sqlalchemy session
1113
            to_add: a set of (username, project.auth_id) to be granted
1114
            user_project:
1115
                a dictionary of {username: {project: {'read','write'}}
1116
        Return:
1117
            None
1118
        """
1119
        for username, project_auth_id in to_add:
1✔
1120
            u = query_for_user(session=sess, username=username)
1✔
1121

1122
            auth_provider = auth_provider_list[0]
1✔
1123
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1124
                auth_provider = auth_provider_list[1]
1✔
1125
            user_access = AccessPrivilege(
1✔
1126
                user=u,
1127
                project=self._projects[project_auth_id],
1128
                privilege=list(user_project[username][project_auth_id]),
1129
                auth_provider=auth_provider,
1130
            )
1131
            self.logger.info(
1✔
1132
                "grant user {} to {} with access {}".format(
1133
                    username, user_access.project, user_access.privilege
1134
                )
1135
            )
1136
            sess.add(user_access)
1✔
1137

1138
    def _upsert_userinfo(self, sess, user_info):
1✔
1139
        """
1140
        update user info to database.
1141

1142
        Args:
1143
            sess: sqlalchemy session
1144
            user_info:
1145
                a dict of {username: {display_name, phone_number, tags, admin}
1146

1147
        Return:
1148
            None
1149
        """
1150

1151
        for username in user_info:
1✔
1152
            u = query_for_user(session=sess, username=username)
1✔
1153

1154
            if u is None:
1✔
1155
                self.logger.info("create user {}".format(username))
1✔
1156
                u = User(username=username)
1✔
1157
                sess.add(u)
1✔
1158

1159
            if self.arborist_client:
1✔
1160
                self.arborist_client.create_user({"name": username})
1✔
1161

1162
            u.email = user_info[username].get("email", "")
1✔
1163
            u.display_name = user_info[username].get("display_name", "")
1✔
1164
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1165
            u.is_admin = user_info[username].get("admin", False)
1✔
1166

1167
            idp_name = user_info[username].get("idp_name", "")
1✔
1168
            if idp_name and not u.identity_provider:
1✔
1169
                idp = (
×
1170
                    sess.query(IdentityProvider)
1171
                    .filter(IdentityProvider.name == idp_name)
1172
                    .first()
1173
                )
1174
                if not idp:
×
1175
                    idp = IdentityProvider(name=idp_name)
×
1176
                u.identity_provider = idp
×
1177

1178
            # do not update if there is no tag
1179
            if not user_info[username].get("tags"):
1✔
1180
                continue
1✔
1181

1182
            # remove user db tags if they are not shown in new tags
1183
            for tag in u.tags:
1✔
1184
                if tag.key not in user_info[username]["tags"]:
1✔
1185
                    u.tags.remove(tag)
1✔
1186

1187
            # sync
1188
            for k, v in user_info[username]["tags"].items():
1✔
1189
                found = False
1✔
1190
                for tag in u.tags:
1✔
1191
                    if tag.key == k:
1✔
1192
                        found = True
1✔
1193
                        tag.value = v
1✔
1194
                # create new tag if not found
1195
                if not found:
1✔
1196
                    tag = Tag(key=k, value=v)
1✔
1197
                    u.tags.append(tag)
1✔
1198

1199
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1200
        """
1201
        If a project have storage backend, revoke user's access to buckets in
1202
        the storage backend.
1203

1204
        Args:
1205
            to_delete: a set of (username, project.auth_id) to be revoked
1206

1207
        Return:
1208
            None
1209
        """
1210
        for username, project_auth_id in to_delete:
1✔
1211
            project = (
1✔
1212
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1213
            )
1214
            for sa in project.storage_access:
1✔
1215
                if not hasattr(self, "storage_manager"):
1✔
1216
                    self.logger.error(
×
1217
                        (
1218
                            "CANNOT revoke {} access to {} in {} because there is NO "
1219
                            "configured storage accesses at all. See configuration. "
1220
                            "Continuing anyway..."
1221
                        ).format(username, project_auth_id, sa.provider.name)
1222
                    )
1223
                    continue
×
1224

1225
                self.logger.info(
1✔
1226
                    "revoke {} access to {} in {}".format(
1227
                        username, project_auth_id, sa.provider.name
1228
                    )
1229
                )
1230
                self.storage_manager.revoke_access(
1✔
1231
                    provider=sa.provider.name,
1232
                    username=username,
1233
                    project=project,
1234
                    session=sess,
1235
                    google_bulk_mapping=google_bulk_mapping,
1236
                )
1237

1238
    def _grant_from_storage(
1✔
1239
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1240
    ):
1241
        """
1242
        If a project have storage backend, grant user's access to buckets in
1243
        the storage backend.
1244

1245
        Args:
1246
            to_add: a set of (username, project.auth_id)  to be granted
1247
            user_project: a dictionary like:
1248

1249
                    {username: {phsid: {'read-storage','write-storage'}}}
1250

1251
        Return:
1252
            dict of the users' storage usernames to their user_projects and the respective storage access.
1253
        """
1254
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1255
        for username, project_auth_id in to_add:
1✔
1256
            project = self._projects[project_auth_id]
1✔
1257
            for sa in project.storage_access:
1✔
1258
                access = list(user_project[username][project_auth_id])
1✔
1259
                if not hasattr(self, "storage_manager"):
1✔
1260
                    self.logger.error(
×
1261
                        (
1262
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1263
                            "configured storage accesses at all. See configuration. "
1264
                            "Continuing anyway..."
1265
                        ).format(username, access, project_auth_id, sa.provider.name)
1266
                    )
1267
                    continue
×
1268

1269
                self.logger.info(
1✔
1270
                    "grant {} access {} to {} in {}".format(
1271
                        username, access, project_auth_id, sa.provider.name
1272
                    )
1273
                )
1274
                storage_username = self.storage_manager.grant_access(
1✔
1275
                    provider=sa.provider.name,
1276
                    username=username,
1277
                    project=project,
1278
                    access=access,
1279
                    session=sess,
1280
                    google_bulk_mapping=google_bulk_mapping,
1281
                    expires=expires,
1282
                )
1283

1284
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1285
        return storage_user_to_sa_and_user_project
1✔
1286

1287
    def _init_projects(self, user_project, sess):
1✔
1288
        """
1289
        initialize projects
1290
        """
1291

1292
        if self.project_mapping:
1✔
1293
            for projects in list(self.project_mapping.values()):
1✔
1294
                for p in projects:
1✔
1295
                    self.logger.debug(
1✔
1296
                        "creating Project with info from project_mapping: {}".format(p)
1297
                    )
1298
                    project = self._get_or_create(sess, Project, **p)
1✔
1299
                    self._projects[p["auth_id"]] = project
1✔
1300
        for _, projects in user_project.items():
1✔
1301
            for auth_id in list(projects.keys()):
1✔
1302
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1303
                if not project:
1✔
1304
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1305
                    try:
1✔
1306
                        project = self._get_or_create(sess, Project, **data)
1✔
1307
                    except IntegrityError as e:
×
1308
                        sess.rollback()
×
1309
                        self.logger.error(
×
1310
                            f"Project {auth_id} already exists. Detail {str(e)}"
1311
                        )
1312
                        raise Exception(
×
1313
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1314
                                auth_id, str(e)
1315
                            )
1316
                        )
1317
                if auth_id not in self._projects:
1✔
1318
                    self._projects[auth_id] = project
1✔
1319

1320
    @staticmethod
1✔
1321
    def _get_or_create(sess, model, **kwargs):
1✔
1322
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1323
        if not instance:
1✔
1324
            instance = model(**kwargs)
1✔
1325
            sess.add(instance)
1✔
1326
        return instance
1✔
1327

1328
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1329
        """
1330
        Args:
1331
            dbgap_config : a dictionary containing information about a single
1332
                           dbgap sftp server (from fence config)
1333
            sess: database session
1334

1335
        Return:
1336
            user_projects (dict)
1337
            user_info (dict)
1338
        """
1339
        dbgap_file_list = []
1✔
1340
        hostname = dbgap_config["info"]["host"]
1✔
1341
        username = dbgap_config["info"]["username"]
1✔
1342
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1343
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1344

1345
        try:
1✔
1346
            if os.path.exists(folderdir):
1✔
1347
                dbgap_file_list = glob.glob(
×
1348
                    os.path.join(folderdir, "*")
1349
                )  # get lists of file from folder
1350
            else:
1351
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1352
                dbgap_file_list = self._download(dbgap_config)
1✔
1353
        except Exception as e:
1✔
1354
            self.logger.error(e)
1✔
1355
            exit(1)
1✔
1356
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1357
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1358
            dbgap_file_list,
1359
            encrypted=encrypted,
1360
            session=sess,
1361
            dbgap_config=dbgap_config,
1362
        )
1363

1364
        user_projects = self.parse_projects(user_projects)
×
1365
        return user_projects, user_info
×
1366

1367
    def _get_user_permissions_from_csv_list(
1✔
1368
        self, file_list, encrypted, session, dbgap_config={}
1369
    ):
1370
        """
1371
        Args:
1372
            file_list: list of files (represented as strings)
1373
            encrypted: boolean indicating whether those files are encrypted
1374
            session: sqlalchemy session
1375
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1376
                    (comes from fence config)
1377

1378
        Return:
1379
            user_projects (dict)
1380
            user_info (dict)
1381
        """
1382
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1383
        user_projects, user_info = self._parse_csv(
1✔
1384
            dict(list(zip(file_list, permissions))),
1385
            sess=session,
1386
            dbgap_config=dbgap_config,
1387
            encrypted=encrypted,
1388
        )
1389
        return user_projects, user_info
1✔
1390

1391
    def _merge_multiple_local_csv_files(
1✔
1392
        self, dbgap_file_list, encrypted, dbgap_configs, session
1393
    ):
1394
        """
1395
        Args:
1396
            dbgap_file_list (list): a list of whitelist file locations stored locally
1397
            encrypted (bool): whether the file is encrypted (comes from fence config)
1398
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1399
            session (sqlalchemy.Session): database session
1400

1401
        Return:
1402
            merged_user_projects (dict)
1403
            merged_user_info (dict)
1404
        """
1405
        merged_user_projects = {}
1✔
1406
        merged_user_info = {}
1✔
1407

1408
        for dbgap_config in dbgap_configs:
1✔
1409
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1410
                dbgap_file_list,
1411
                encrypted,
1412
                session=session,
1413
                dbgap_config=dbgap_config,
1414
            )
1415
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1416
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1417
        return merged_user_projects, merged_user_info
1✔
1418

1419
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1420
        """
1421
        Args:
1422
            dbgap_servers : a list of dictionaries each containging config on
1423
                           dbgap sftp server (comes from fence config)
1424
            sess: database session
1425

1426
        Return:
1427
            merged_user_projects (dict)
1428
            merged_user_info (dict)
1429
        """
1430
        merged_user_projects = {}
1✔
1431
        merged_user_info = {}
1✔
1432
        for dbgap in dbgap_servers:
1✔
1433
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1434
            # merge into merged_user_info
1435
            # user_info overrides original info in merged_user_info
1436
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1437

1438
            # merge all access info dicts into "merged_user_projects".
1439
            # the access info is combined - if the user_projects access is
1440
            # ["read"] and the merged_user_projects is ["read-storage"], the
1441
            # resulting access is ["read", "read-storage"].
1442
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1443
        return merged_user_projects, merged_user_info
1✔
1444

1445
    def parse_projects(self, user_projects):
1✔
1446
        """
1447
        helper function for parsing projects
1448
        """
1449
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1450

1451
    def _process_dbgap_project(
1✔
1452
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1453
    ):
1454
        if dbgap_project not in self.project_mapping:
1✔
1455
            self._add_dbgap_project_for_user(
1✔
1456
                dbgap_project,
1457
                privileges,
1458
                username,
1459
                sess,
1460
                user_projects,
1461
                dbgap_config,
1462
            )
1463

1464
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1465
            try:
1✔
1466
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1467

1468
                # need to add dbgap project to arborist
1469
                if self.arborist_client:
1✔
1470
                    self._determine_arborist_resource(
1✔
1471
                        element_dict["auth_id"], dbgap_config
1472
                    )
1473

1474
                if username not in user_projects:
1✔
1475
                    user_projects[username] = {}
1✔
1476
                user_projects[username].update(phsid_privileges)
1✔
1477

1478
            except ValueError as e:
×
1479
                self.logger.info(e)
×
1480

1481
    def _process_user_projects(
1✔
1482
        self,
1483
        user_projects,
1484
        enable_common_exchange_area_access,
1485
        study_common_exchange_areas,
1486
        dbgap_config,
1487
        sess,
1488
    ):
1489
        user_projects_to_modify = copy.deepcopy(user_projects)
1✔
1490
        for username in user_projects.keys():
1✔
1491
            for project in user_projects[username].keys():
1✔
1492
                phsid = project.split(".")
1✔
1493
                dbgap_project = phsid[0]
1✔
1494
                privileges = user_projects[username][project]
1✔
1495
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1496
                    consent_code = phsid[-1]
1✔
1497

1498
                    # c999 indicates full access to all consents and access
1499
                    # to a study-specific exchange area
1500
                    # access to at least one study-specific exchange area implies access
1501
                    # to the parent study's common exchange area
1502
                    #
1503
                    # NOTE: Handling giving access to all consents is done at
1504
                    #       a later time, when we have full information about possible
1505
                    #       consents
1506
                    self.logger.debug(
1✔
1507
                        f"got consent code {consent_code} from dbGaP project "
1508
                        f"{dbgap_project}"
1509
                    )
1510
                    if (
1✔
1511
                        consent_code == "c999"
1512
                        and enable_common_exchange_area_access
1513
                        and dbgap_project in study_common_exchange_areas
1514
                    ):
1515
                        self.logger.info(
1✔
1516
                            "found study with consent c999 and Fence "
1517
                            "is configured to parse exchange area data. Giving user "
1518
                            f"{username} {privileges} privileges in project: "
1519
                            f"{study_common_exchange_areas[dbgap_project]}."
1520
                        )
1521
                        self._add_dbgap_project_for_user(
1✔
1522
                            study_common_exchange_areas[dbgap_project],
1523
                            privileges,
1524
                            username,
1525
                            sess,
1526
                            user_projects_to_modify,
1527
                            dbgap_config,
1528
                        )
1529

1530
                    dbgap_project += "." + consent_code
1✔
1531

1532
                self._process_dbgap_project(
1✔
1533
                    dbgap_project,
1534
                    privileges,
1535
                    username,
1536
                    sess,
1537
                    user_projects_to_modify,
1538
                    dbgap_config,
1539
                )
1540
        for user in user_projects_to_modify.keys():
1✔
1541
            user_projects[user] = user_projects_to_modify[user]
1✔
1542

1543
    def sync(self):
1✔
1544
        if self.session:
1✔
1545
            self._sync(self.session)
1✔
1546
        else:
1547
            with self.driver.session as s:
×
1548
                self._sync(s)
×
1549

1550
    def download(self):
1✔
1551
        for dbgap_server in self.dbGaP:
×
1552
            self._download(dbgap_server)
×
1553

1554
    def _download(self, dbgap_config):
1✔
1555
        """
1556
        Download files from dbgap server.
1557
        """
1558
        server = dbgap_config["info"]
1✔
1559
        protocol = dbgap_config["protocol"]
1✔
1560
        hostname = server["host"]
1✔
1561
        username = server["username"]
1✔
1562
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1563

1564
        if not os.path.exists(folderdir):
1✔
1565
            os.makedirs(folderdir)
1✔
1566

1567
        self.logger.info("Download from server")
1✔
1568
        try:
1✔
1569
            if protocol == "sftp":
1✔
1570
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1571
            else:
1572
                self._get_from_ftp_with_proxy(server, folderdir)
×
1573
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1574
            return dbgap_files
×
1575
        except Exception as e:
1✔
1576
            self.logger.error(e)
1✔
1577
            raise
1✔
1578

1579
    def _sync(self, sess):
1✔
1580
        """
1581
        Collect files from dbgap server(s), sync csv and yaml files to storage
1582
        backend and fence DB
1583
        """
1584
        # get all dbgap files
1585
        user_projects = {}
1✔
1586
        user_info = {}
1✔
1587
        if self.is_sync_from_dbgap_server:
1✔
1588
            self.logger.debug(
1✔
1589
                "Pulling telemetry files from {} dbgap sftp servers".format(
1590
                    len(self.dbGaP)
1591
                )
1592
            )
1593
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1594

1595
        local_csv_file_list = []
1✔
1596
        if self.sync_from_local_csv_dir:
1✔
1597
            local_csv_file_list = glob.glob(
1✔
1598
                os.path.join(self.sync_from_local_csv_dir, "*")
1599
            )
1600
            # Sort the list so the order of of files is consistent across platforms
1601
            local_csv_file_list.sort()
1✔
1602

1603
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1604
            local_csv_file_list,
1605
            encrypted=False,
1606
            session=sess,
1607
            dbgap_configs=self.dbGaP,
1608
        )
1609

1610
        try:
1✔
1611
            user_yaml = UserYAML.from_file(
1✔
1612
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1613
            )
1614
        except (EnvironmentError, AssertionError) as e:
1✔
1615
            self.logger.error(str(e))
1✔
1616
            self.logger.error("aborting early")
1✔
1617
            raise
1✔
1618

1619
        # parse all projects
1620
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1621
        user_projects = self.parse_projects(user_projects)
1✔
1622
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1623

1624
        # merge all user info dicts into "user_info".
1625
        # the user info (such as email) in the user.yaml files
1626
        # overrides the user info from the CSV files.
1627
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1628
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1629

1630
        # merge all access info dicts into "user_projects".
1631
        # the access info is combined - if the user.yaml access is
1632
        # ["read"] and the CSV file access is ["read-storage"], the
1633
        # resulting access is ["read", "read-storage"].
1634
        self.sync_two_phsids_dict(
1✔
1635
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1636
        )
1637
        self.sync_two_phsids_dict(
1✔
1638
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1639
        )
1640

1641
        # Note: if there are multiple dbgap sftp servers configured
1642
        # this parameter is always from the config for the first dbgap sftp server
1643
        # not any additional ones
1644
        for dbgap_config in self.dbGaP:
1✔
1645
            if self._get_parse_consent_code(dbgap_config):
1✔
1646
                self._grant_all_consents_to_c999_users(
1✔
1647
                    user_projects, user_yaml.project_to_resource
1648
                )
1649

1650
        google_update_ex = None
1✔
1651

1652
        try:
1✔
1653
            # update the Fence DB
1654
            if user_projects:
1✔
1655
                self.logger.info("Sync to db and storage backend")
1✔
1656
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1657
                self.logger.info("Finish syncing to db and storage backend")
1✔
1658
            else:
1659
                self.logger.info("No users for syncing")
×
1660
        except GoogleUpdateException as ex:
1✔
1661
            # save this to reraise later after all non-Google syncing has finished
1662
            # this way, any issues with Google only affect Google data access and don't
1663
            # cascade problems into non-Google AWS or Azure access
1664
            google_update_ex = ex
1✔
1665

1666
        # update the Arborist DB (resources, roles, policies, groups)
1667
        if user_yaml.authz:
1✔
1668
            if not self.arborist_client:
1✔
1669
                raise EnvironmentError(
×
1670
                    "yaml file contains authz section but sync is not configured with"
1671
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1672
                )
1673
            self.logger.info("Synchronizing arborist...")
1✔
1674
            success = self._update_arborist(sess, user_yaml)
1✔
1675
            if success:
1✔
1676
                self.logger.info("Finished synchronizing arborist")
1✔
1677
            else:
1678
                self.logger.error("Could not synchronize successfully")
×
1679
                exit(1)
×
1680
        else:
1681
            self.logger.info("No `authz` section; skipping arborist sync")
×
1682

1683
        # update the Arborist DB (user access)
1684
        if self.arborist_client:
1✔
1685
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1686
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1687
            if success:
1✔
1688
                self.logger.info(
1✔
1689
                    "Finished synchronizing authorization info to arborist"
1690
                )
1691
            else:
1692
                self.logger.error(
×
1693
                    "Could not synchronize authorization info successfully to arborist"
1694
                )
1695
                exit(1)
×
1696
        else:
1697
            self.logger.error("No arborist client set; skipping arborist sync")
×
1698

1699
        # Logging authz source
1700
        for u, s in self.auth_source.items():
1✔
1701
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1702

1703
        self.logger.info(
1✔
1704
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1705
        )
1706
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1707
        if google_update_ex is not None:
1✔
1708
            raise google_update_ex
1✔
1709

1710
    def _grant_all_consents_to_c999_users(
1✔
1711
        self, user_projects, user_yaml_project_to_resources
1712
    ):
1713
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1714
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1715
        # are in either)
1716
        all_projects = set(
1✔
1717
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1718
        )
1719

1720
        self.logger.debug(f"all projects: {all_projects}")
1✔
1721

1722
        # construct a mapping from phsid (without consent) to all accessions with consent
1723
        consent_mapping = {}
1✔
1724
        for project in all_projects:
1✔
1725
            phs_match = access_number_matcher.match(project)
1✔
1726
            if phs_match:
1✔
1727
                accession_number = phs_match.groupdict()
1✔
1728

1729
                # TODO: This is not handling the .v1.p1 at all
1730
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1731
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1732
                )
1733
                children = self._get_children(accession_number["phsid"])
1✔
1734
                if children:
1✔
1735
                    for child_phs in children:
1✔
1736
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1737
                            ".".join(
1738
                                [child_phs, accession_number["consent"]]
1739
                            )  # Assign parent consent to child study
1740
                        )
1741

1742
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1743

1744
        # go through existing access and find any c999's and make sure to give access to
1745
        # all accessions with consent for that phsid
1746
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1747
            for project, _ in user_project_info.items():
1✔
1748
                phs_match = access_number_matcher.match(project)
1✔
1749
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1750
                    # give access to all consents
1751
                    all_phsids_with_consent = consent_mapping.get(
1✔
1752
                        phs_match.groupdict()["phsid"], []
1753
                    )
1754
                    self.logger.info(
1✔
1755
                        f"user {username} has c999 consent group for: {project}. "
1756
                        f"Granting access to all consents: {all_phsids_with_consent}"
1757
                    )
1758
                    # NOTE: Only giving read-storage at the moment (this is same
1759
                    #       permission we give for other dbgap projects)
1760
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1761
                        user_projects[username].update(
1✔
1762
                            {phsid_with_consent: {"read-storage", "read"}}
1763
                        )
1764

1765
    def _update_arborist(self, session, user_yaml):
1✔
1766
        """
1767
        Create roles, resources, policies, groups in arborist from the information in
1768
        ``user_yaml``.
1769

1770
        The projects are sent to arborist as resources with paths like
1771
        ``/projects/{project}``. Roles are created with just the original names
1772
        for the privileges like ``"read-storage", "read"`` etc.
1773

1774
        Args:
1775
            session (sqlalchemy.Session)
1776
            user_yaml (UserYAML)
1777

1778
        Return:
1779
            bool: success
1780
        """
1781
        healthy = self._is_arborist_healthy()
1✔
1782
        if not healthy:
1✔
1783
            return False
×
1784

1785
        # Set up the resource tree in arborist by combining provided resources with any
1786
        # dbgap resources that were created before this.
1787
        #
1788
        # Why add dbgap resources if they've already been created?
1789
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1790
        #   resources was created under `/programs/phs000178` anything provided in
1791
        #   user.yaml under `/programs` would completely wipe it out.
1792
        resources = user_yaml.authz.get("resources", [])
1✔
1793

1794
        dbgap_resource_paths = []
1✔
1795
        for path_list in self._dbgap_study_to_resources.values():
1✔
1796
            dbgap_resource_paths.extend(path_list)
1✔
1797

1798
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1799
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1800

1801
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1802
            resources, dbgap_resource_paths
1803
        )
1804

1805
        for resource in combined_resources:
1✔
1806
            try:
1✔
1807
                self.logger.debug(
1✔
1808
                    "attempting to update arborist resource: {}".format(resource)
1809
                )
1810
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1811
            except ArboristError as e:
×
1812
                self.logger.error(e)
×
1813
                # keep going; maybe just some conflicts from things existing already
1814

1815
        # update roles
1816
        roles = user_yaml.authz.get("roles", [])
1✔
1817
        for role in roles:
1✔
1818
            try:
1✔
1819
                response = self.arborist_client.update_role(role["id"], role)
1✔
1820
                if response:
1✔
1821
                    self._created_roles.add(role["id"])
1✔
1822
            except ArboristError as e:
×
1823
                self.logger.info(
×
1824
                    "couldn't update role '{}', creating instead".format(str(e))
1825
                )
1826
                try:
×
1827
                    response = self.arborist_client.create_role(role)
×
1828
                    if response:
×
1829
                        self._created_roles.add(role["id"])
×
1830
                except ArboristError as e:
×
1831
                    self.logger.error(e)
×
1832
                    # keep going; maybe just some conflicts from things existing already
1833

1834
        # update policies
1835
        policies = user_yaml.authz.get("policies", [])
1✔
1836
        for policy in policies:
1✔
1837
            policy_id = policy.pop("id")
1✔
1838
            try:
1✔
1839
                self.logger.debug(
1✔
1840
                    "Trying to upsert policy with id {}".format(policy_id)
1841
                )
1842
                response = self.arborist_client.update_policy(
1✔
1843
                    policy_id, policy, create_if_not_exist=True
1844
                )
1845
            except ArboristError as e:
×
1846
                self.logger.error(e)
×
1847
                # keep going; maybe just some conflicts from things existing already
1848
            else:
1849
                if response:
1✔
1850
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1851
                    self._created_policies.add(policy_id)
1✔
1852

1853
        # update groups
1854
        groups = user_yaml.authz.get("groups", [])
1✔
1855

1856
        # delete from arborist the groups that have been deleted
1857
        # from the user.yaml
1858
        arborist_groups = set(
1✔
1859
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1860
        )
1861
        useryaml_groups = set(g["name"] for g in groups)
1✔
1862
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1863
            # do not try to delete built in groups
1864
            if deleted_group not in ["anonymous", "logged-in"]:
×
1865
                self.arborist_client.delete_group(deleted_group)
×
1866

1867
        # create/update the groups defined in the user.yaml
1868
        for group in groups:
1✔
1869
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1870
            if missing:
×
1871
                name = group.get("name", "{MISSING NAME}")
×
1872
                self.logger.error(
×
1873
                    "group {} missing required field(s): {}".format(name, list(missing))
1874
                )
1875
                continue
×
1876
            try:
×
1877
                response = self.arborist_client.put_group(
×
1878
                    group["name"],
1879
                    # Arborist doesn't handle group descriptions yet
1880
                    # description=group.get("description", ""),
1881
                    users=group["users"],
1882
                    policies=group["policies"],
1883
                )
1884
            except ArboristError as e:
×
1885
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1886

1887
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1888

1889
        # First recreate these groups in order to clear out old, possibly deleted policies
1890
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1891
            try:
1✔
1892
                response = self.arborist_client.put_group(builtin_group)
1✔
1893
            except ArboristError as e:
×
1894
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1895

1896
        # Now add back policies that are in the user.yaml
1897
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1898
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1899

1900
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1901
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1902

1903
        return True
1✔
1904

1905
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1906
        """
1907
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1908
        mfa_policy after revoking all policies.
1909
        """
1910

1911
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1912
            idp, {}
1913
        )
1914

1915
        if not is_mfa_enabled:
1✔
1916
            # TODO This should be a diff, not a revocation of all policies.
1917
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1918
            return
1✔
1919

1920
        policies = []
1✔
1921
        try:
1✔
1922
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1923
            policies = user_data_from_arborist["policies"]
1✔
1924
        except Exception as e:
×
1925
            self.logger.error(
×
1926
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1927
            )
1928
        finally:
1929
            # TODO This should be a diff, not a revocation of all policies.
1930
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1931

1932
        if "mfa_policy" in policies:
1✔
1933
            self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1934

1935
    def _grant_arborist_policies(
1✔
1936
        self, username, incoming_policies, user_yaml, expires=None
1937
    ):
1938
        """
1939
        Find the difference between the existing policies for a user and the incoming policies,
1940
        and decide whether to add, remove, or keep policies.
1941

1942
        Args:
1943
            username (str): the username of the user
1944
            incoming_policies (set): set of policies to be applied to the user
1945
            user_yaml (UserYAML): UserYAML object containing authz information
1946
            expires (int): time at which authz info in Arborist should expire
1947

1948
        Return:
1949
            bool: True if policies were successfully updated, False otherwise
1950
        """
1951
        user_existing_policies = set()
1✔
1952
        to_add = set()
1✔
1953
        to_remove = set()
1✔
1954
        is_revoke_all = False
1✔
1955

1956
        try:
1✔
1957
            user_existing_policies = set(
1✔
1958
                policy["policy"]
1959
                for policy in self.arborist_client.get_user(username)["policies"]
1960
            )
1961
            self.logger.info(
1✔
1962
                f"Fetched user {username} existing policies: {user_existing_policies}"
1963
            )
1964
        except ArboristError as e:
1✔
1965
            self.logger.error(
1✔
1966
                f"Could not get user {username} policies from Arborist: {e} Revoking all policies..."
1967
            )
1968
            # if getting existing policies fails, revoke all policies and re-apply
1969
            is_revoke_all = True
1✔
1970

1971
        if is_revoke_all is False and len(incoming_policies) > 0:
1✔
1972
            to_add = incoming_policies - user_existing_policies
1✔
1973
            to_remove = user_existing_policies - incoming_policies
1✔
1974

1975
            if user_yaml:
1✔
1976
                anonymous_policies = set()
1✔
1977
                for policy in to_remove:
1✔
1978
                    if policy in user_yaml.authz.get(
×
1979
                        "anonymous_policies", []
1980
                    ) or policy in user_yaml.authz.get("all_users_policies", []):
1981
                        self.logger.warning(
×
1982
                            f"Policy {policy} is an anonymous policy, not revoking it for user {username}."
1983
                        )
1984
                        anonymous_policies.add(policy)
×
1985
                to_remove -= anonymous_policies
1✔
1986
        else:
1987
            # if incoming_policies is empty, we revoke all policies
1988
            is_revoke_all = True
1✔
1989

1990
        if not is_revoke_all:
1✔
1991
            try:
1✔
1992
                if to_remove:
1✔
1993
                    for policy in to_remove:
1✔
1994
                        self.logger.info(
1✔
1995
                            f"Revoking policy {policy} for user {username}."
1996
                        )
1997
                        self.arborist_client.revoke_user_policy(username, policy)
1✔
1998
            except ArboristError as e:
×
1999
                self.logger.error(
×
2000
                    f"Could not revoke user {username} policy {policy}. Revoking all instead: {e}"
2001
                )
2002
                is_revoke_all = True
×
2003

2004
        if is_revoke_all:
1✔
2005
            try:
1✔
2006
                self.logger.info(f"Revoking all policies for user {username}.")
1✔
2007
                self.arborist_client.revoke_all_policies_for_user(username)
1✔
2008
            except ArboristError as e:
×
2009
                self.logger.error(
×
2010
                    f"Could not revoke all policies for user {username}. Error: {e}"
2011
                )
2012
                return False
×
2013
            to_add = incoming_policies  # if we revoke all, we need to add all incoming policies
1✔
2014

2015
        if (
1✔
2016
            "mfa_policy" not in incoming_policies
2017
            and "mfa_policy" in user_existing_policies
2018
        ):
2019
            to_add.add("mfa_policy")
×
2020

2021
        if to_add:
1✔
2022
            self.logger.info(f"Bulk granting user {username} policies {to_add}.")
1✔
2023
            return self._grant_bulk_user_policies(username, to_add, expires)
1✔
2024

2025
        return True
1✔
2026

2027
    def _update_authz_in_arborist(
1✔
2028
        self,
2029
        session,
2030
        user_projects,
2031
        user_yaml=None,
2032
        single_user_sync=False,
2033
        expires=None,
2034
    ):
2035
        """
2036
        Assign users policies in arborist from the information in
2037
        ``user_projects`` and optionally a ``user_yaml``.
2038

2039
        The projects are sent to arborist as resources with paths like
2040
        ``/projects/{project}``. Roles are created with just the original names
2041
        for the privileges like ``"read-storage", "read"`` etc.
2042

2043
        Args:
2044
            user_projects (dict)
2045
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
2046
            single_user_sync (bool) whether authz update is for a single user
2047
            expires (int) time at which authz info in Arborist should expire
2048

2049
        Return:
2050
            bool: success
2051
        """
2052
        healthy = self._is_arborist_healthy()
1✔
2053
        if not healthy:
1✔
2054
            return False
×
2055

2056
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
2057

2058
        if user_yaml:
1✔
2059
            self.logger.debug(
1✔
2060
                "useryaml abac before lowering usernames: {}".format(
2061
                    user_yaml.user_abac
2062
                )
2063
            )
2064
            user_yaml.user_abac = {
1✔
2065
                key.lower(): value for key, value in user_yaml.user_abac.items()
2066
            }
2067
            # update the project info with `projects` specified in user.yaml
2068
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
2069

2070
        # get list of users from arborist to make sure users that are completely removed
2071
        # from authorization sources get policies revoked
2072

2073
        arborist_user_projects = {}
1✔
2074
        if not single_user_sync:
1✔
2075

2076
            try:
1✔
2077
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
2078

2079
                # construct user information, NOTE the lowering of the username. when adding/
2080
                # removing access, the case in the Fence db is used. For combining access, it is
2081
                # case-insensitive, so we lower
2082
                arborist_user_projects = {
1✔
2083
                    user["name"].lower(): {} for user in arborist_users
2084
                }
2085
            except (ArboristError, KeyError, AttributeError) as error:
×
2086
                # TODO usersync should probably exit with non-zero exit code at the end,
2087
                #      but sync should continue from this point so there are no partial
2088
                #      updates
2089
                self.logger.warning(
×
2090
                    "Could not get list of users in Arborist, continuing anyway. "
2091
                    "WARNING: this sync will NOT remove access for users no longer in "
2092
                    f"authorization sources. Error: {error}"
2093
                )
2094

2095
            # update the project info with users from arborist
2096
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
2097

2098
        # prefer in-memory if available from user_yaml, if not, get from database
2099
        if user_yaml and user_yaml.project_to_resource:
1✔
2100
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
2101
            self.logger.debug(
1✔
2102
                f"using in-memory project to authz resource mapping from "
2103
                f"user.yaml (instead of database): {project_to_authz_mapping}"
2104
            )
2105
        else:
2106
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
2107
            self.logger.debug(
1✔
2108
                f"using persisted project to authz resource mapping from database "
2109
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2110
            )
2111

2112
        self.logger.debug(
1✔
2113
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2114
        )
2115
        all_resources = [
1✔
2116
            r
2117
            for resources in self._dbgap_study_to_resources.values()
2118
            for r in resources
2119
        ]
2120
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2121
        self._create_arborist_resources(all_resources)
1✔
2122

2123
        for username, user_project_info in user_projects.items():
1✔
2124
            self.logger.info("processing user `{}`".format(username))
1✔
2125
            user = query_for_user(session=session, username=username)
1✔
2126
            idp = None
1✔
2127
            if user:
1✔
2128
                username = user.username
1✔
2129
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2130

2131
            self.arborist_client.create_user_if_not_exist(username)
1✔
2132

2133
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2134
            # previously mapped each project to the same set of privileges
2135
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2136
            # single policy with ('read', 'read-storage') being the single
2137
            # key
2138
            unique_policies = self._determine_unique_policies(
1✔
2139
                user_project_info, project_to_authz_mapping
2140
            )
2141
            for roles in unique_policies.keys():
1✔
2142
                for role in roles:
1✔
2143
                    self._create_arborist_role(role)
1✔
2144

2145
            incoming_policies = set()  # set of policies for current user.
1✔
2146

2147
            if single_user_sync:
1✔
2148
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2149
                    policy_hash = self._hash_policy_contents(
1✔
2150
                        ordered_roles, ordered_resources
2151
                    )
2152
                    self._create_arborist_policy(
1✔
2153
                        policy_hash,
2154
                        ordered_roles,
2155
                        ordered_resources,
2156
                        skip_if_exists=True,
2157
                    )
2158
                    # return here as it is not expected single_user_sync
2159
                    # will need any of the remaining user_yaml operations
2160
                    # left in _update_authz_in_arborist
2161
                    return self._grant_arborist_policy(
1✔
2162
                        username, policy_hash, expires=expires
2163
                    )
2164
            else:
2165
                policy_ids_to_grant = set()
1✔
2166
                for roles, resources in unique_policies.items():
1✔
2167
                    for role in roles:
1✔
2168
                        for resource in resources:
1✔
2169
                            # grant a policy to this user which is a single
2170
                            # role on a single resource
2171

2172
                            # format project '/x/y/z' -> 'x.y.z'
2173
                            # so the policy id will be something like 'x.y.z-create'
2174
                            policy_id = _format_policy_id(resource, role)
1✔
2175
                            incoming_policies.add(policy_id)
1✔
2176
                            if policy_id not in self._created_policies:
1✔
2177
                                try:
1✔
2178
                                    self.arborist_client.update_policy(
1✔
2179
                                        policy_id,
2180
                                        {
2181
                                            "description": "policy created by fence sync",
2182
                                            "role_ids": [role],
2183
                                            "resource_paths": [resource],
2184
                                        },
2185
                                        create_if_not_exist=True,
2186
                                    )
2187
                                except ArboristError as e:
×
2188
                                    self.logger.info(
×
2189
                                        "not creating policy in arborist; {}".format(
2190
                                            str(e)
2191
                                        )
2192
                                    )
2193
                                self._created_policies.add(policy_id)
1✔
2194
                            policy_ids_to_grant.add(policy_id)
1✔
2195
                self._grant_arborist_policies(
1✔
2196
                    username, policy_ids_to_grant, user_yaml=None, expires=expires
2197
                )
2198

2199
            if user_yaml:
1✔
2200
                user_yaml_policies = set(user_yaml.policies.get(username, []))
1✔
2201
                incoming_policies = (
1✔
2202
                    incoming_policies | user_yaml_policies
2203
                )  # add policies from whitelist and useryaml
2204

2205
            self._grant_arborist_policies(
1✔
2206
                username, incoming_policies, user_yaml, expires=expires
2207
            )
2208

2209
        if user_yaml:
1✔
2210
            for client_name, client_details in user_yaml.clients.items():
1✔
2211
                client_policies = client_details.get("policies", [])
×
2212
                clients = session.query(Client).filter_by(name=client_name).all()
×
2213
                # update existing clients, do not create new ones
2214
                if not clients:
×
2215
                    self.logger.warning(
×
2216
                        "client to update (`{}`) does not exist in fence: skipping".format(
2217
                            client_name
2218
                        )
2219
                    )
2220
                    continue
×
2221
                self.logger.debug(
×
2222
                    "updating client `{}` (found {} client IDs)".format(
2223
                        client_name, len(clients)
2224
                    )
2225
                )
2226
                # there may be more than 1 client with this name if credentials are being rotated,
2227
                # so we grant access to each client ID
2228
                for client in clients:
×
2229
                    try:
×
2230
                        self.arborist_client.update_client(
×
2231
                            client.client_id, client_policies
2232
                        )
2233
                    except ArboristError as e:
×
2234
                        self.logger.info(
×
2235
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2236
                                client_policies, client_name, client.client_id, str(e)
2237
                            )
2238
                        )
2239

2240
        return True
1✔
2241

2242
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2243
        """
2244
        Determine and return a dictionary of unique policies.
2245

2246
        Args (examples):
2247
            user_project_info (dict):
2248
            {
2249
                'phs000002.c1': { 'read-storage', 'read' },
2250
                'phs000001.c1': { 'read', 'read-storage' },
2251
                'phs000004.c1': { 'write', 'read' },
2252
                'phs000003.c1': { 'read', 'write' },
2253
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2254
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2255
            }
2256
            project_to_authz_mapping (dict):
2257
            {
2258
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2259
            }
2260

2261
        Return (for examples):
2262
            dict:
2263
            {
2264
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2265
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2266
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2267
            }
2268
        """
2269
        roles_to_resources = collections.defaultdict(list)
1✔
2270
        for study, roles in user_project_info.items():
1✔
2271
            ordered_roles = tuple(sorted(roles))
1✔
2272
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2273
            if study in project_to_authz_mapping:
1✔
2274
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2275
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2276

2277
        policies = {}
1✔
2278
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2279
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2280
        return policies
1✔
2281

2282
    def _create_arborist_role(self, role):
1✔
2283
        """
2284
        Wrapper around gen3authz's create_role with additional logging
2285

2286
        Args:
2287
            role (str): what the Arborist identity should be of the created role
2288

2289
        Return:
2290
            bool: True if the role was created successfully or it already
2291
                  exists. False otherwise
2292
        """
2293
        if role in self._created_roles:
1✔
2294
            return True
1✔
2295
        try:
1✔
2296
            response_json = self.arborist_client.create_role(
1✔
2297
                arborist_role_for_permission(role)
2298
            )
2299
        except ArboristError as e:
×
2300
            self.logger.error(
×
2301
                "could not create `{}` role in Arborist: {}".format(role, e)
2302
            )
2303
            return False
×
2304
        self._created_roles.add(role)
1✔
2305

2306
        if response_json is None:
1✔
2307
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2308
        else:
2309
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2310
        return True
1✔
2311

2312
    def _create_arborist_resources(self, resources):
1✔
2313
        """
2314
        Create resources in Arborist
2315

2316
        Args:
2317
            resources (list): a list of full Arborist resource paths to create
2318
            [
2319
                "/programs/DEV/projects/phs000001.c1",
2320
                "/programs/DEV/projects/phs000002.c1",
2321
                "/programs/DEV/projects/phs000003.c1"
2322
            ]
2323

2324
        Return:
2325
            bool: True if the resources were successfully created, False otherwise
2326

2327

2328
        As of 2/11/2022, for resources above,
2329
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2330
        [
2331
            { 'name': 'programs', 'subresources': [
2332
                { 'name': 'DEV', 'subresources': [
2333
                    { 'name': 'projects', 'subresources': [
2334
                        { 'name': 'phs000001.c1', 'subresources': []},
2335
                        { 'name': 'phs000002.c1', 'subresources': []},
2336
                        { 'name': 'phs000003.c1', 'subresources': []}
2337
                    ]}
2338
                ]}
2339
            ]}
2340
        ]
2341
        Because this list has a single object, only a single network request gets
2342
        sent to Arborist.
2343

2344
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2345
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2346
        [
2347
            {'name': 'phs000001.c1', 'subresources': []},
2348
            {'name': 'phs000002.c1', 'subresources': []},
2349
            {'name': 'phs000003.c1', 'subresources': []}
2350
        ]
2351
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2352

2353
        As a practical matter, for sync_single_user_visas, studies
2354
        should be nested under the `/programs` resource as in the former
2355
        example (i.e. only one network request gets made).
2356

2357
        TODO for the sake of simplicity, it would be nice if only one network
2358
        request was made no matter the input.
2359
        """
2360
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2361
            try:
1✔
2362
                response_json = self.arborist_client.update_resource(
1✔
2363
                    "/", request_body, merge=True
2364
                )
2365
            except ArboristError as e:
×
2366
                self.logger.error(
×
2367
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2368
                        request_body, e
2369
                    )
2370
                )
2371
                return False
×
2372

2373
        self.logger.debug(
1✔
2374
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2375
        )
2376
        return True
1✔
2377

2378
    def _create_arborist_policy(
1✔
2379
        self, policy_id, roles, resources, skip_if_exists=False
2380
    ):
2381
        """
2382
        Wrapper around gen3authz's create_policy with additional logging
2383

2384
        Args:
2385
            policy_id (str): what the Arborist identity should be of the created policy
2386
            roles (iterable): what roles the create policy should have
2387
            resources (iterable): what resources the created policy should have
2388
            skip_if_exists (bool): if True, this function will not treat an already
2389
                                   existent policy as an error
2390

2391
        Return:
2392
            bool: True if policy creation was successful. False otherwise
2393
        """
2394
        try:
1✔
2395
            response_json = self.arborist_client.create_policy(
1✔
2396
                {
2397
                    "id": policy_id,
2398
                    "role_ids": roles,
2399
                    "resource_paths": resources,
2400
                },
2401
                skip_if_exists=skip_if_exists,
2402
            )
2403
        except ArboristError as e:
×
2404
            self.logger.error(
×
2405
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2406
            )
2407
            return False
×
2408

2409
        if response_json is None:
1✔
2410
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2411
        else:
2412
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2413
        return True
1✔
2414

2415
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2416
        """
2417
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2418

2419
        Args:
2420
            ordered_roles (iterable): policy roles in sorted order
2421
            ordered_resources (iterable): policy resources in sorted order
2422

2423
        Return:
2424
            str: SHA256 hex digest
2425
        """
2426

2427
        def escape(s):
1✔
2428
            return s.replace(",", "\\,")
1✔
2429

2430
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2431
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2432
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2433
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2434

2435
        return policy_hash
1✔
2436

2437
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2438
        """
2439
        Wrapper around gen3authz's grant_user_policy with additional logging
2440

2441
        Args:
2442
            username (str): username of user in Arborist who policy should be
2443
                            granted to
2444
            policy_id (str): Arborist policy id
2445
            expires (int): POSIX timestamp for when policy should expire
2446

2447
        Return:
2448
            bool: True if granting of policy was successful, False otherwise
2449
        """
2450
        try:
1✔
2451
            response_json = self.arborist_client.grant_user_policy(
1✔
2452
                username,
2453
                policy_id,
2454
                expires_at=expires,
2455
            )
2456
        except ArboristError as e:
×
2457
            self.logger.error(
×
2458
                "could not grant policy `{}` to user `{}`: {}".format(
2459
                    policy_id, username, e
2460
                )
2461
            )
2462
            return False
×
2463

2464
        self.logger.debug(
1✔
2465
            "granted policy `{}` to user `{}`".format(policy_id, username)
2466
        )
2467
        return True
1✔
2468

2469
    def _grant_bulk_user_policies(self, username, policy_ids, expires=None):
1✔
2470
        """
2471
        Wrapper around gen3authz's grant_user_policies with additional logging
2472

2473
        Args:
2474
            username (str): username of user in Arborist who policy should be
2475
                            granted to
2476
            policy_ids (set[str]): Arborist policy ids
2477

2478
        Return:
2479
            bool: True if granting of policies was successful, False otherwise
2480
        """
2481
        try:
1✔
2482
            response_json = self.arborist_client.grant_bulk_user_policy(
1✔
2483
                username, policy_ids, expires
2484
            )
2485
        except ArboristError as e:
×
2486
            self.logger.error(
×
2487
                "could not grant bulk policies  to user `{}`: {}".format(username, e)
2488
            )
2489
            return False
×
2490
        except ArboristTimeoutError as e:
×
2491
            self.logger.error(
×
2492
                f"Timeout waiting for response to grant bulk policies  to user `{username}`: {e}"
2493
                "This user will be skipped and usersync will continue."
2494
                "As long as the timeout is not a pool/connection timeout, then "
2495
            )
2496
            return False
×
2497
        return True
1✔
2498

2499
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2500
        """
2501
        Determine the arborist resource path and add it to
2502
        _self._dbgap_study_to_resources
2503

2504
        Args:
2505
            dbgap_study (str): study phs identifier
2506
            dbgap_config (dict): dictionary of config for dbgap server
2507

2508
        """
2509
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2510
            "_default", ["/"]
2511
        )
2512
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2513
            dbgap_study, default_namespaces
2514
        )
2515

2516
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2517

2518
        arborist_resource_namespaces = [
1✔
2519
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2520
        ]
2521

2522
        for resource_namespace in arborist_resource_namespaces:
1✔
2523
            full_resource_path = resource_namespace + dbgap_study
1✔
2524
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2525
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2526
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2527
        return arborist_resource_namespaces
1✔
2528

2529
    def _is_arborist_healthy(self):
1✔
2530
        if not self.arborist_client:
1✔
2531
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2532
            return False
×
2533
        if not self.arborist_client.healthy():
1✔
2534
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2535
            self.logger.error(
×
2536
                "arborist service is unavailable; skipping main arborist dbgap sync"
2537
            )
2538
            return False
×
2539
        return True
1✔
2540

2541
    def _pick_sync_type(self, visa):
1✔
2542
        """
2543
        Pick type of visa to parse according to the visa provider
2544
        """
2545
        sync_client = None
1✔
2546
        if visa.type in self.visa_types["ras"]:
1✔
2547
            sync_client = self.ras_sync_client
1✔
2548
        else:
2549
            raise Exception(
×
2550
                "Visa type {} not recognized. Configure in fence-config".format(
2551
                    visa.type
2552
                )
2553
            )
2554
        if not sync_client:
1✔
2555
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2556

2557
        return sync_client
1✔
2558

2559
    def sync_single_user_visas(
1✔
2560
        self, user, ga4gh_visas, sess=None, expires=None, skip_google_updates=False
2561
    ):
2562
        """
2563
        Sync a single user's visas during login or DRS/data access
2564

2565
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2566
                        BEFORE THIS.
2567

2568
        Args:
2569
            user (userdatamodel.user.User): Fence user whose visas'
2570
                                            authz info is being synced
2571
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2572
                                that are ALREADY VALIDATED
2573
            sess (sqlalchemy.orm.session.Session): database session
2574
            expires (int): time at which synced Arborist policies and
2575
                           inclusion in any GBAG are set to expire
2576
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
2577

2578
        Return:
2579
            list of successfully parsed visas
2580
        """
2581
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2582
        dbgap_config = self.dbGaP[0]
1✔
2583
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2584
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2585
            "enable_common_exchange_area_access", False
2586
        )
2587
        study_common_exchange_areas = dbgap_config.get(
1✔
2588
            "study_common_exchange_areas", {}
2589
        )
2590

2591
        try:
1✔
2592
            user_yaml = UserYAML.from_file(
1✔
2593
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2594
            )
2595
        except (EnvironmentError, AssertionError) as e:
×
2596
            self.logger.error(str(e))
×
2597
            self.logger.error("aborting early")
×
2598
            raise
×
2599

2600
        user_projects = dict()
1✔
2601
        projects = {}
1✔
2602
        info = {}
1✔
2603
        parsed_visas = []
1✔
2604

2605
        for visa in ga4gh_visas:
1✔
2606
            project = {}
1✔
2607
            visa_type = self._pick_sync_type(visa)
1✔
2608
            encoded_visa = visa.ga4gh_visa
1✔
2609

2610
            try:
1✔
2611
                project, info = visa_type._parse_single_visa(
1✔
2612
                    user,
2613
                    encoded_visa,
2614
                    visa.expires,
2615
                    parse_consent_code,
2616
                )
2617
            except Exception:
×
2618
                self.logger.warning(
×
2619
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2620
                )
2621
                continue
×
2622

2623
            projects = {**projects, **project}
1✔
2624
            parsed_visas.append(visa)
1✔
2625

2626
        info["user_id"] = user.id
1✔
2627
        info["username"] = user.username
1✔
2628
        user_projects[user.username] = projects
1✔
2629

2630
        user_projects = self.parse_projects(user_projects)
1✔
2631

2632
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2633
            self.logger.info(
1✔
2634
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2635
            )
2636

2637
        self._process_user_projects(
1✔
2638
            user_projects,
2639
            enable_common_exchange_area_access,
2640
            study_common_exchange_areas,
2641
            dbgap_config,
2642
            sess,
2643
        )
2644

2645
        if parse_consent_code:
1✔
2646
            self._grant_all_consents_to_c999_users(
1✔
2647
                user_projects, user_yaml.project_to_resource
2648
            )
2649

2650
        if user_projects:
1✔
2651
            self.sync_to_storage_backend(
1✔
2652
                user_projects,
2653
                info,
2654
                sess,
2655
                expires=expires,
2656
                skip_google_updates=skip_google_updates,
2657
            )
2658
        else:
2659
            self.logger.info("No users for syncing")
×
2660

2661
        # update arborist db (user access)
2662
        if self.arborist_client:
1✔
2663
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2664
            success = self._update_authz_in_arborist(
1✔
2665
                sess,
2666
                user_projects,
2667
                user_yaml=user_yaml,
2668
                single_user_sync=True,
2669
                expires=expires,
2670
            )
2671
            if success:
1✔
2672
                self.logger.info(
1✔
2673
                    "Finished synchronizing authorization info to arborist"
2674
                )
2675
            else:
2676
                self.logger.error(
×
2677
                    "Could not synchronize authorization info successfully to arborist"
2678
                )
2679
        else:
2680
            self.logger.error("No arborist client set; skipping arborist sync")
×
2681

2682
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc