• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 24741067907

21 Apr 2026 07:03PM UTC coverage: 75.08% (+0.002%) from 75.078%
24741067907

push

github

web-flow
Merge pull request #1346 from uc-cdis/fix/casc_authz_passports_no_sftp

Apply cascading authz for visas

8466 of 11276 relevant lines covered (75.08%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.07
fence/sync/sync_users.py
1
import paramiko.ssh_exception
1✔
2
import backoff
1✔
3
import glob
1✔
4

5
import httpx
1✔
6
import jwt
1✔
7
import os
1✔
8
import re
1✔
9
import subprocess as sp
1✔
10
import yaml
1✔
11
import copy
1✔
12
import datetime
1✔
13
import uuid
1✔
14
import collections
1✔
15
import hashlib
1✔
16

17
from contextlib import contextmanager
1✔
18
from collections import defaultdict
1✔
19
from csv import DictReader
1✔
20
from io import StringIO
1✔
21
from stat import S_ISDIR
1✔
22

23
import paramiko
1✔
24
from cdislogging import get_logger
1✔
25
from email_validator import validate_email, EmailNotValidError
1✔
26
from gen3authz.client.arborist.errors import ArboristError, ArboristTimeoutError
1✔
27
from gen3users.validation import validate_user_yaml
1✔
28
from paramiko.proxy import ProxyCommand
1✔
29
from sqlalchemy.exc import IntegrityError
1✔
30
from sqlalchemy import func
1✔
31

32
from fence.config import config
1✔
33
from fence.models import (
1✔
34
    AccessPrivilege,
35
    AuthorizationProvider,
36
    Project,
37
    Tag,
38
    User,
39
    query_for_user,
40
    Client,
41
    IdentityProvider,
42
    get_project_to_authz_mapping,
43
)
44
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
45
from fence.resources.storage import StorageManager
1✔
46
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
47
from fence.resources.google.access_utils import GoogleUpdateException
1✔
48
from fence.sync import utils
1✔
49
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
50
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
51

52

53
def _format_policy_id(path, privilege):
1✔
54
    resource = ".".join(name for name in path.split("/") if name)
1✔
55
    return "{}-{}".format(resource, privilege)
1✔
56

57

58
def download_dir(sftp, remote_dir, local_dir):
1✔
59
    """
60
    Recursively download file from remote_dir to local_dir
61
    Args:
62
        remote_dir(str)
63
        local_dir(str)
64
    Returns: None
65
    """
66
    dir_items = sftp.listdir_attr(remote_dir)
×
67

68
    for item in dir_items:
×
69
        remote_path = remote_dir + "/" + item.filename
×
70
        local_path = os.path.join(local_dir, item.filename)
×
71
        if S_ISDIR(item.st_mode):
×
72
            download_dir(sftp, remote_path, local_path)
×
73
        else:
74
            sftp.get(remote_path, local_path)
×
75

76

77
def arborist_role_for_permission(permission):
1✔
78
    """
79
    For the programs/projects in the existing fence access control model, in order to
80
    use arborist for checking permissions we generate a policy for each combination of
81
    program/project and privilege. The roles involved all contain only one permission,
82
    for one privilege from the project access model.
83
    """
84
    return {
1✔
85
        "id": permission,
86
        "permissions": [
87
            {"id": permission, "action": {"service": "*", "method": permission}}
88
        ],
89
    }
90

91

92
@contextmanager
1✔
93
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
94
    """
95
    Context manager for reading and optionally decrypting file it only
96
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
97

98
    Args:
99
        filepath (str): path to the file
100
        encrypted (bool): whether the file is encrypted
101

102
    Returns:
103
        Generator[file-like class]: file like object for the file
104
    """
105
    if encrypted:
1✔
106
        p = sp.Popen(
×
107
            [
108
                "ccdecrypt",
109
                "-u",
110
                "-K",
111
                key,
112
                filepath,
113
            ],
114
            stdout=sp.PIPE,
115
            stderr=open(os.devnull, "w"),
116
            universal_newlines=True,
117
        )
118
        try:
×
119
            yield StringIO(p.communicate()[0])
×
120
        except UnicodeDecodeError:
×
121
            logger.error("Could not decode file. Check the decryption key.")
×
122
    else:
123
        f = open(filepath, "r")
1✔
124
        yield f
1✔
125
        f.close()
1✔
126

127

128
class UserYAML(object):
1✔
129
    """
130
    Representation of the information in a YAML file describing user, project, and ABAC
131
    information for access control.
132
    """
133

134
    def __init__(
1✔
135
        self,
136
        projects=None,
137
        user_info=None,
138
        policies=None,
139
        clients=None,
140
        authz=None,
141
        project_to_resource=None,
142
        logger=None,
143
        user_abac=None,
144
    ):
145
        self.projects = projects or {}
1✔
146
        self.user_info = user_info or {}
1✔
147
        self.user_abac = user_abac or {}
1✔
148
        self.policies = policies or {}
1✔
149
        self.clients = clients or {}
1✔
150
        self.authz = authz or {}
1✔
151
        self.project_to_resource = project_to_resource or {}
1✔
152
        self.logger = logger
1✔
153

154
    @classmethod
1✔
155
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
156
        """
157
        Add access by "auth_id" to "self.projects" to update the Fence DB.
158
        Add access by "resource" to "self.user_abac" to update Arborist.
159
        """
160
        data = {}
1✔
161
        if filepath:
1✔
162
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
163
                file_contents = f.read()
1✔
164
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
165
                data = yaml.safe_load(file_contents)
1✔
166
        else:
167
            if logger:
1✔
168
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
169

170
        projects = dict()
1✔
171
        user_info = dict()
1✔
172
        policies = dict()
1✔
173

174
        # resources should be the resource tree to construct in arborist
175
        user_abac = dict()
1✔
176

177
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
178
        if not data.get("authz") and data.get("rbac"):
1✔
179
            if logger:
×
180
                logger.info(
×
181
                    "No authz block found but rbac block present. Using rbac block"
182
                )
183
            data["authz"] = data["rbac"]
×
184

185
        # get user project mapping to arborist resources if it exists
186
        project_to_resource = data.get("authz", dict()).get(
1✔
187
            "user_project_to_resource", dict()
188
        )
189

190
        # read projects and privileges for each user
191
        users = data.get("users", {})
1✔
192
        for username, details in users.items():
1✔
193
            # users should occur only once each; skip if already processed
194
            if username in projects:
1✔
195
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
196
                    username
197
                )
198
                if logger:
×
199
                    logger.error(msg)
×
200
                raise EnvironmentError(msg)
×
201

202
            privileges = {}
1✔
203
            resource_permissions = dict()
1✔
204
            for project in details.get("projects", {}):
1✔
205
                try:
1✔
206
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
207
                except KeyError as e:
×
208
                    if logger:
×
209
                        logger.error("project {} missing field: {}".format(project, e))
×
210
                    continue
×
211

212
                # project may not have `resource` field.
213
                # prefer resource field;
214
                # if no resource or mapping, assume auth_id is resource.
215
                resource = project.get("resource", project["auth_id"])
1✔
216

217
                if project["auth_id"] not in project_to_resource:
1✔
218
                    project_to_resource[project["auth_id"]] = resource
1✔
219
                resource_permissions[resource] = set(project["privilege"])
1✔
220

221
            user_info[username] = {
1✔
222
                "email": details.get("email", ""),
223
                "display_name": details.get("display_name", ""),
224
                "phone_number": details.get("phone_number", ""),
225
                "tags": details.get("tags", {}),
226
                "admin": details.get("admin", False),
227
            }
228
            if not details.get("email"):
1✔
229
                try:
1✔
230
                    valid = validate_email(
1✔
231
                        username, allow_smtputf8=False, check_deliverability=False
232
                    )
233
                    user_info[username]["email"] = valid.email
1✔
234
                except EmailNotValidError:
1✔
235
                    pass
1✔
236
            projects[username] = privileges
1✔
237
            user_abac[username] = resource_permissions
1✔
238

239
            # list of policies we want to grant to this user, which get sent to arborist
240
            # to check if they're allowed to do certain things
241
            policies[username] = details.get("policies", [])
1✔
242

243
        if logger:
1✔
244
            logger.info(
1✔
245
                "Got user project to arborist resource mapping:\n{}".format(
246
                    str(project_to_resource)
247
                )
248
            )
249

250
        authz = data.get("authz", dict())
1✔
251
        if not authz:
1✔
252
            # older version: resources in root, no `authz` section or `rbac` section
253
            if logger:
1✔
254
                logger.warning(
1✔
255
                    "access control YAML file is using old format (missing `authz`/`rbac`"
256
                    " section in the root); assuming that if it exists `resources` will"
257
                    " be on the root level, and continuing"
258
                )
259
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
260
            # the code can pretend it's in the normal place that we expect
261
            resources = data.get("resources", [])
1✔
262
            # keep authz empty dict if resources is not specified
263
            if resources:
1✔
264
                authz["resources"] = data.get("resources", [])
×
265

266
        clients = data.get("clients", {})
1✔
267

268
        return cls(
1✔
269
            projects=projects,
270
            user_info=user_info,
271
            user_abac=user_abac,
272
            policies=policies,
273
            clients=clients,
274
            authz=authz,
275
            project_to_resource=project_to_resource,
276
            logger=logger,
277
        )
278

279
    def persist_project_to_resource(self, db_session):
1✔
280
        """
281
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
282

283
        The mapping comes from an external source, this function persists what was parsed
284
        into memory into the database for future use.
285
        """
286
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
287
            project = (
1✔
288
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
289
            )
290
            if project:
1✔
291
                project.authz = authz_resource
1✔
292
            else:
293
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
294
                db_session.add(project)
×
295
        db_session.commit()
1✔
296

297

298
class UserSyncer(object):
1✔
299
    def __init__(
1✔
300
        self,
301
        dbGaP,
302
        DB,
303
        project_mapping,
304
        storage_credentials=None,
305
        db_session=None,
306
        is_sync_from_dbgap_server=False,
307
        sync_from_local_csv_dir=None,
308
        sync_from_local_yaml_file=None,
309
        arborist=None,
310
        folder=None,
311
    ):
312
        """
313
        Syncs ACL files from dbGap to auth database and storage backends
314
        Args:
315
            dbGaP: a list of dict containing creds to access dbgap sftp
316
            DB: database connection string
317
            project_mapping: a dict containing how dbgap ids map to projects
318
            storage_credentials: a dict containing creds for storage backends
319
            sync_from_dir: path to an alternative dir to sync from instead of
320
                           dbGaP
321
            arborist:
322
                ArboristClient instance if the syncer should also create
323
                resources in arborist
324
            folder: a local folder where dbgap telemetry files will sync to
325
        """
326
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
327
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
328
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
329
        self.dbGaP = dbGaP
1✔
330
        self.session = db_session
1✔
331
        self.driver = get_SQLAlchemyDriver(DB)
1✔
332
        self.project_mapping = project_mapping or {}
1✔
333
        self._projects = dict()
1✔
334
        self._created_roles = set()
1✔
335
        self._created_policies = set()
1✔
336
        self._dbgap_study_to_resources = dict()
1✔
337
        self.logger = get_logger(
1✔
338
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
339
        )
340
        self.arborist_client = arborist
1✔
341
        self.folder = folder
1✔
342

343
        self.auth_source = defaultdict(set)
1✔
344
        # auth_source used for logging. username : [source1, source2]
345
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
346
        self.parent_to_child_studies_mapping = {}
1✔
347
        for dbgap_config in dbGaP:
1✔
348
            self.parent_to_child_studies_mapping.update(
1✔
349
                dbgap_config.get("parent_to_child_studies_mapping", {})
350
            )
351
        if storage_credentials:
1✔
352
            self.storage_manager = StorageManager(
1✔
353
                storage_credentials, logger=self.logger
354
            )
355
        self.id_patterns = []
1✔
356

357
    @staticmethod
1✔
358
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
359
        """
360
        Check if the filename matches dbgap access control file pattern
361

362
        Args:
363
            filepath (str): path to file
364
            encrypted (bool): whether the file is encrypted
365

366
        Returns:
367
            bool: whether the pattern matches
368
        """
369
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
370
        for pattern in id_patterns:
1✔
371
            if encrypted:
1✔
372
                pattern += r".enc"
×
373
            pattern += r"$"
1✔
374
            # when converting the YAML from fence-config,
375
            # python reads it as Python string literal. So "\" turns into "\\"
376
            # which messes with the regex match
377
            pattern.replace("\\\\", "\\")
1✔
378
            if re.match(pattern, os.path.basename(filepath)):
1✔
379
                return True
1✔
380
        return False
1✔
381

382
    def _get_from_sftp_with_proxy(self, server, path):
1✔
383
        """
384
        Download all data from sftp sever to a local dir
385

386
        Args:
387
            server (dict) : dictionary containing info to access sftp server
388
            path (str): path to local directory
389

390
        Returns:
391
            None
392
        """
393
        proxy = None
1✔
394
        if server.get("proxy", "") != "":
1✔
395
            command = "ssh -oHostKeyAlgorithms=+ssh-rsa -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
396
                user=server.get("proxy_user", ""),
397
                proxy=server.get("proxy", ""),
398
                host=server.get("host", ""),
399
                port=server.get("port", 22),
400
            )
401
            self.logger.info("SSH proxy command: {}".format(command))
×
402

403
            proxy = ProxyCommand(command)
×
404

405
        with paramiko.SSHClient() as client:
1✔
406
            client.set_log_channel(self.logger.name)
1✔
407

408
            # Load known host keys
409
            known_hosts_path = os.path.expanduser("~/.ssh/known_hosts")
1✔
410
            if os.path.exists(known_hosts_path):
1✔
411
                client.load_host_keys(known_hosts_path)
×
412
            else:
413
                self.logger.error(
1✔
414
                    "No known_hosts file found — rejecting unknown hosts - make sure the SFTP host key is present in known_hosts before attempting connection."
415
                )
416

417
            client.set_missing_host_key_policy(paramiko.RejectPolicy())
1✔
418
            parameters = {
1✔
419
                "hostname": str(server.get("host", "")),
420
                "username": str(server.get("username", "")),
421
                "password": str(server.get("password", "")),
422
                "port": int(server.get("port", 22)),
423
            }
424
            if proxy:
1✔
425
                parameters["sock"] = proxy
×
426

427
            self.logger.info(
1✔
428
                "SSH connection hostname:post {}:{}".format(
429
                    parameters.get("hostname", "unknown"),
430
                    parameters.get("port", "unknown"),
431
                )
432
            )
433
            try:
1✔
434
                self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
435

436
                with client.open_sftp() as sftp:
×
437
                    download_dir(sftp, "./", path)
×
438
            except paramiko.ssh_exception.SSHException as e:
1✔
439
                self.logger.error(f"SSH connection failed, error: {e}")
×
440

441
        if proxy:
×
442
            proxy.close()
×
443

444
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
445
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
446
        ssh_client.connect(**parameters)
1✔
447

448
    def _get_from_ftp_with_proxy(self, server, path):
1✔
449
        """
450
        Download data from ftp sever to a local dir
451

452
        Args:
453
            server (dict): dictionary containing information for accessing server
454
            path(str): path to local files
455

456
        Returns:
457
            None
458
        """
459
        execstr = (
×
460
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
461
                server.get("username", ""),
462
                server.get("password", ""),
463
                server.get("host", ""),
464
                server.get("proxy", ""),
465
                path,
466
            )
467
        )
468
        os.system(execstr)
×
469

470
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
471
        return dbgap_config.get(
1✔
472
            "parse_consent_code", True
473
        )  # Should this really be true?
474

475
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
476
        """
477
        parse csv files to python dict
478

479
        Args:
480
            file_dict: a dictionary with key(file path) and value(privileges)
481
            sess: sqlalchemy session
482
            dbgap_config: a dictionary containing information about the dbGaP sftp server
483
                (comes from fence config)
484
            encrypted: boolean indicating whether those files are encrypted
485

486

487
        Return:
488
            Tuple[[dict, dict]]:
489
                (user_project, user_info) where user_project is a mapping from
490
                usernames to project permissions and user_info is a mapping
491
                from usernames to user details, such as email
492

493
        Example:
494

495
            (
496
                {
497
                    username: {
498
                        'project1': {'read-storage','write-storage'},
499
                        'project2': {'read-storage'},
500
                    }
501
                },
502
                {
503
                    username: {
504
                        'email': 'email@mail.com',
505
                        'display_name': 'display name',
506
                        'phone_number': '123-456-789',
507
                        'tags': {'dbgap_role': 'PI'}
508
                    }
509
                },
510
            )
511

512
        """
513
        user_projects = dict()
1✔
514
        user_info = defaultdict(dict)
1✔
515

516
        # parse dbGaP sftp server information
517
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
518

519
        self.id_patterns += (
1✔
520
            [
521
                item.replace("\\\\", "\\")
522
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
523
            ]
524
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
525
            else []
526
        )
527

528
        enable_common_exchange_area_access = dbgap_config.get(
1✔
529
            "enable_common_exchange_area_access", False
530
        )
531
        study_common_exchange_areas = dbgap_config.get(
1✔
532
            "study_common_exchange_areas", {}
533
        )
534
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
535

536
        if parse_consent_code and enable_common_exchange_area_access:
1✔
537
            self.logger.info(
1✔
538
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
539
            )
540

541
        project_id_patterns = [r"phs(\d{6})"]
1✔
542
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
543
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
544
            patterns = [
1✔
545
                pattern.replace("\\\\", "\\") for pattern in patterns
546
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
547
            project_id_patterns += patterns
1✔
548

549
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
550
        for filepath, privileges in file_dict.items():
1✔
551
            self.logger.info("Reading file {}".format(filepath))
1✔
552
            if os.stat(filepath).st_size == 0:
1✔
553
                self.logger.warning("Empty file {}".format(filepath))
×
554
                continue
×
555
            if not self._match_pattern(
1✔
556
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
557
            ):
558
                self.logger.warning(
1✔
559
                    "Filename {} does not match dbgap access control filename pattern;"
560
                    " this could mean that the filename has an invalid format, or has"
561
                    " an unexpected .enc extension, or lacks the .enc extension where"
562
                    " expected. This file is NOT being processed by usersync!".format(
563
                        filepath
564
                    )
565
                )
566
                continue
1✔
567

568
            with _read_file(
1✔
569
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
570
            ) as f:
571
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
572

573
                for row in csv:
1✔
574
                    username = row.get("login") or ""
1✔
575
                    if username == "":
1✔
576
                        continue
×
577

578
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
579
                        phsid = (
1✔
580
                            row.get("phsid") or (row.get("project_id") or "")
581
                        ).split(".")
582
                    else:
583
                        phsid = (row.get("phsid") or "").split(".")
1✔
584

585
                    dbgap_project = phsid[0]
1✔
586
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
587
                    skip = False
1✔
588
                    for pattern in project_id_patterns:
1✔
589
                        self.logger.debug(
1✔
590
                            "Checking pattern:{} with project_id:{}".format(
591
                                pattern, dbgap_project
592
                            )
593
                        )
594
                        if re.match(pattern, dbgap_project):
1✔
595
                            skip = False
1✔
596
                            break
1✔
597
                        else:
598
                            skip = True
1✔
599
                    if skip:
1✔
600
                        self.logger.warning(
1✔
601
                            "Skip processing from file {}, user {} with project {}".format(
602
                                filepath,
603
                                username,
604
                                dbgap_project,
605
                            )
606
                        )
607
                        continue
1✔
608
                    if len(phsid) > 1 and parse_consent_code:
1✔
609
                        consent_code = phsid[-1]
1✔
610

611
                        # c999 indicates full access to all consents and access
612
                        # to a study-specific exchange area
613
                        # access to at least one study-specific exchange area implies access
614
                        # to the parent study's common exchange area
615
                        #
616
                        # NOTE: Handling giving access to all consents is done at
617
                        #       a later time, when we have full information about possible
618
                        #       consents
619
                        self.logger.debug(
1✔
620
                            f"got consent code {consent_code} from dbGaP project "
621
                            f"{dbgap_project}"
622
                        )
623
                        if (
1✔
624
                            consent_code == "c999"
625
                            and enable_common_exchange_area_access
626
                            and dbgap_project in study_common_exchange_areas
627
                        ):
628
                            self.logger.info(
1✔
629
                                "found study with consent c999 and Fence "
630
                                "is configured to parse exchange area data. Giving user "
631
                                f"{username} {privileges} privileges in project: "
632
                                f"{study_common_exchange_areas[dbgap_project]}."
633
                            )
634
                            self._add_dbgap_project_for_user(
1✔
635
                                study_common_exchange_areas[dbgap_project],
636
                                privileges,
637
                                username,
638
                                sess,
639
                                user_projects,
640
                                dbgap_config,
641
                            )
642

643
                        dbgap_project += "." + consent_code
1✔
644

645
                    self._add_children_for_dbgap_project(
1✔
646
                        dbgap_project,
647
                        privileges,
648
                        username,
649
                        sess,
650
                        user_projects,
651
                        dbgap_config,
652
                    )
653

654
                    display_name = row.get("user name") or ""
1✔
655
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
656

657
                    # some dbgap telemetry files have information about a researchers PI
658
                    if "downloader for" in row:
1✔
659
                        tags["pi"] = row["downloader for"]
1✔
660

661
                    # prefer name over previous "downloader for" if it exists
662
                    if "downloader for names" in row:
1✔
663
                        tags["pi"] = row["downloader for names"]
×
664

665
                    user_info[username] = {
1✔
666
                        "email": row.get("email")
667
                        or user_info[username].get("email")
668
                        or "",
669
                        "display_name": display_name,
670
                        "phone_number": row.get("phone")
671
                        or user_info[username].get("phone_number")
672
                        or "",
673
                        "tags": tags,
674
                    }
675

676
                    self._process_dbgap_project(
1✔
677
                        dbgap_project,
678
                        privileges,
679
                        username,
680
                        sess,
681
                        user_projects,
682
                        dbgap_config,
683
                    )
684

685
        return user_projects, user_info
1✔
686

687
    def _get_children(self, dbgap_project):
1✔
688
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
689

690
    def _add_children_for_dbgap_project(
1✔
691
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
692
    ):
693
        """
694
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
695
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
696
        child studies.
697
        """
698
        parent_phsid = dbgap_project
1✔
699
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
700
        child_suffix = ""
1✔
701
        if parse_consent_code and re.match(
1✔
702
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
703
        ):
704
            parent_phsid_parts = dbgap_project.split(".")
1✔
705
            parent_phsid = parent_phsid_parts[0]
1✔
706
            child_suffix = "." + parent_phsid_parts[1]
1✔
707

708
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
709
            return
1✔
710

711
        self.logger.info(
1✔
712
            f"found parent study {parent_phsid} and Fence "
713
            "is configured to provide additional access to child studies. Giving user "
714
            f"{username} {privileges} privileges in projects: "
715
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
716
        )
717
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
718
        for child_study in child_studies:
1✔
719
            self._add_dbgap_project_for_user(
1✔
720
                child_study + child_suffix,
721
                privileges,
722
                username,
723
                sess,
724
                user_projects,
725
                dbgap_config,
726
            )
727

728
    def _add_dbgap_project_for_user(
1✔
729
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
730
    ):
731
        """
732
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
733
        and then updates the dictionary containing all user's project access
734
        """
735
        if dbgap_project not in self._projects:
1✔
736
            self.logger.debug(
1✔
737
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
738
            )
739

740
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
741

742
            # need to add dbgap project to arborist
743
            if self.arborist_client:
1✔
744
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
745

746
            if project.name is None:
1✔
747
                project.name = dbgap_project
1✔
748
            self._projects[dbgap_project] = project
1✔
749
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
750
        if username in user_projects:
1✔
751
            user_projects[username].update(phsid_privileges)
1✔
752
        else:
753
            user_projects[username] = phsid_privileges
1✔
754

755
    @staticmethod
1✔
756
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
757
        """
758
        Merge user_info1 into user_info2. Values in user_info2 are overriden
759
        by values in user_info1. user_info2 ends up containing the merged dict.
760

761
        Args:
762
            user_info1 (dict): nested dict
763
            user_info2 (dict): nested dict
764

765
            Example:
766
            {username: {'email': 'abc@email.com'}}
767

768
        Returns:
769
            None
770
        """
771
        user_info2.update(user_info1)
1✔
772

773
    def sync_two_phsids_dict(
1✔
774
        self,
775
        phsids1,
776
        phsids2,
777
        source1=None,
778
        source2=None,
779
        phsids2_overrides_phsids1=True,
780
    ):
781
        """
782
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
783
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
784
        the merged dict (see explanation below).
785
        `source1` and `source2`: for logging.
786

787
        Args:
788
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
789

790
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
791

792
            Example:
793
            {
794
                username: {
795
                    phsid1: {'read-storage','write-storage'},
796
                    phsid2: {'read-storage'},
797
                }
798
            }
799

800
        Return:
801
            None
802

803
        Explanation:
804
            Consider merging projects of the same user:
805

806
                {user1: {phsid1: privillege1}}
807

808
                {user1: {phsid2: privillege2}}
809

810
            case 1: phsid1 != phsid2. Output:
811

812
                {user1: {phsid1: privillege1, phsid2: privillege2}}
813

814
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
815

816
                {user1: {phsid1: union(privillege1, privillege2)}}
817

818
            For the other cases, just simple addition
819
        """
820

821
        for user, projects1 in phsids1.items():
1✔
822
            if not phsids2.get(user):
1✔
823
                if source1:
1✔
824
                    self.auth_source[user].add(source1)
1✔
825
                phsids2[user] = projects1
1✔
826
            elif phsids2_overrides_phsids1:
1✔
827
                if source1:
1✔
828
                    self.auth_source[user].add(source1)
×
829
                if source2:
1✔
830
                    self.auth_source[user].add(source2)
×
831
                for phsid1, privilege1 in projects1.items():
1✔
832
                    if phsid1 not in phsids2[user]:
1✔
833
                        phsids2[user][phsid1] = set()
1✔
834
                    phsids2[user][phsid1].update(privilege1)
1✔
835
            elif source2:
×
836
                self.auth_source[user].add(source2)
×
837

838
    def sync_to_db_and_storage_backend(
1✔
839
        self,
840
        user_project,
841
        user_info,
842
        sess,
843
        do_not_revoke_from_db_and_storage=False,
844
        expires=None,
845
    ):
846
        """
847
        sync user access control to database and storage backend
848

849
        Args:
850
            user_project (dict): a dictionary of
851

852
                {
853
                    username: {
854
                        'project1': {'read-storage','write-storage'},
855
                        'project2': {'read-storage'}
856
                    }
857
                }
858

859
            user_info (dict): a dictionary of {username: user_info{}}
860
            sess: a sqlalchemy session
861

862
        Return:
863
            None
864
        """
865
        google_bulk_mapping = None
1✔
866
        if config["GOOGLE_BULK_UPDATES"]:
1✔
867
            google_bulk_mapping = {}
1✔
868

869
        self._init_projects(user_project, sess)
1✔
870

871
        auth_provider_list = [
1✔
872
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
873
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
874
        ]
875

876
        cur_db_user_project_list = {
1✔
877
            (ua.user.username.lower(), ua.project.auth_id)
878
            for ua in sess.query(AccessPrivilege).all()
879
        }
880

881
        # we need to compare db -> whitelist case-insensitively for username.
882
        # db stores case-sensitively, but we need to query case-insensitively
883
        user_project_lowercase = {}
1✔
884
        syncing_user_project_list = set()
1✔
885
        for username, projects in user_project.items():
1✔
886
            user_project_lowercase[username.lower()] = projects
1✔
887
            for project, _ in projects.items():
1✔
888
                syncing_user_project_list.add((username.lower(), project))
1✔
889

890
        user_info_lowercase = {
1✔
891
            username.lower(): info for username, info in user_info.items()
892
        }
893

894
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
895
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
896
        to_update = set.intersection(
1✔
897
            cur_db_user_project_list, syncing_user_project_list
898
        )
899

900
        # when updating users we want to maintain case sesitivity in the username so
901
        # pass the original, non-lowered user_info dict
902
        self._upsert_userinfo(sess, user_info)
1✔
903

904
        if not do_not_revoke_from_db_and_storage:
1✔
905
            self._revoke_from_storage(
1✔
906
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
907
            )
908
            self._revoke_from_db(sess, to_delete)
1✔
909

910
        self._grant_from_storage(
1✔
911
            to_add,
912
            user_project_lowercase,
913
            sess,
914
            google_bulk_mapping=google_bulk_mapping,
915
            expires=expires,
916
        )
917

918
        self._grant_from_db(
1✔
919
            sess,
920
            to_add,
921
            user_info_lowercase,
922
            user_project_lowercase,
923
            auth_provider_list,
924
        )
925

926
        # re-grant
927
        self._grant_from_storage(
1✔
928
            to_update,
929
            user_project_lowercase,
930
            sess,
931
            google_bulk_mapping=google_bulk_mapping,
932
            expires=expires,
933
        )
934
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
935

936
        if not do_not_revoke_from_db_and_storage:
1✔
937
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
938

939
        sess.commit()
1✔
940

941
        if config["GOOGLE_BULK_UPDATES"]:
1✔
942
            self.logger.info("Doing bulk Google update...")
1✔
943
            update_google_groups_for_users(google_bulk_mapping)
1✔
944
            self.logger.info("Bulk Google update done!")
×
945

946
        sess.commit()
1✔
947

948
    def sync_to_storage_backend(
1✔
949
        self, user_project, user_info, sess, expires, skip_google_updates=False
950
    ):
951
        """
952
        sync user access control to storage backend with given expiration
953

954
        Args:
955
            user_project (dict): a dictionary of
956

957
                {
958
                    username: {
959
                        'project1': {'read-storage','write-storage'},
960
                        'project2': {'read-storage'}
961
                    }
962
                }
963

964
            user_info (dict): a dictionary of attributes for a user.
965
            sess: a sqlalchemy session
966
            expires (int): time at which synced Arborist policies and
967
                   inclusion in any GBAG are set to expire
968
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
969
        Return:
970
            None
971
        """
972
        if not expires:
1✔
973
            raise Exception(
×
974
                f"sync to storage backend requires an expiration. you provided: {expires}"
975
            )
976

977
        google_group_user_mapping = None
1✔
978
        if config["GOOGLE_BULK_UPDATES"]:
1✔
979
            google_group_user_mapping = {}
×
980
            get_or_create_proxy_group_id(
×
981
                expires=expires,
982
                user_id=user_info["user_id"],
983
                username=user_info["username"],
984
                session=sess,
985
                storage_manager=self.storage_manager,
986
            )
987

988
        # TODO: eventually it'd be nice to remove this step but it's required
989
        #       so that grant_from_storage can determine what storage backends
990
        #       are needed for a project.
991
        self._init_projects(user_project, sess)
1✔
992

993
        # we need to compare db -> whitelist case-insensitively for username.
994
        # db stores case-sensitively, but we need to query case-insensitively
995
        user_project_lowercase = {}
1✔
996
        syncing_user_project_list = set()
1✔
997
        for username, projects in user_project.items():
1✔
998
            user_project_lowercase[username.lower()] = projects
1✔
999
            for project, _ in projects.items():
1✔
1000
                syncing_user_project_list.add((username.lower(), project))
1✔
1001

1002
        to_add = set(syncing_user_project_list)
1✔
1003

1004
        # when updating users we want to maintain case sensitivity in the username so
1005
        # pass the original, non-lowered user_info dict
1006
        self._upsert_userinfo(sess, {user_info["username"].lower(): user_info})
1✔
1007
        if not skip_google_updates:
1✔
1008
            self._grant_from_storage(
1✔
1009
                to_add,
1010
                user_project_lowercase,
1011
                sess,
1012
                google_bulk_mapping=google_group_user_mapping,
1013
                expires=expires,
1014
            )
1015

1016
            if config["GOOGLE_BULK_UPDATES"]:
1✔
1017
                self.logger.info("Updating user's google groups ...")
×
1018
                update_google_groups_for_users(google_group_user_mapping)
×
1019
                self.logger.info("Google groups update done!!")
×
1020

1021
        sess.commit()
1✔
1022

1023
    def _revoke_from_db(self, sess, to_delete):
1✔
1024
        """
1025
        Revoke user access to projects in the auth database
1026

1027
        Args:
1028
            sess: sqlalchemy session
1029
            to_delete: a set of (username, project.auth_id) to be revoked from db
1030
        Return:
1031
            None
1032
        """
1033
        for username, project_auth_id in to_delete:
1✔
1034
            q = (
1✔
1035
                sess.query(AccessPrivilege)
1036
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1037
                .join(AccessPrivilege.user)
1038
                .filter(func.lower(User.username) == username)
1039
                .all()
1040
            )
1041
            for access in q:
1✔
1042
                self.logger.info(
1✔
1043
                    "revoke {} access to {} in db".format(username, project_auth_id)
1044
                )
1045
                sess.delete(access)
1✔
1046

1047
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1048
        """
1049
        Make sure there is no admin user that is not in yaml/csv files
1050

1051
        Args:
1052
            sess: sqlalchemy session
1053
            user_info: a dict of
1054
            {
1055
                username: {
1056
                    'email': email,
1057
                    'display_name': display_name,
1058
                    'phone_number': phonenum,
1059
                    'tags': {'k1':'v1', 'k2': 'v2'}
1060
                    'admin': is_admin
1061
                }
1062
            }
1063
        Returns:
1064
            None
1065
        """
1066
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1067
            if admin_user.username.lower() not in user_info:
1✔
1068
                admin_user.is_admin = False
×
1069
                sess.add(admin_user)
×
1070
                self.logger.info(
×
1071
                    "remove admin access from {} in db".format(
1072
                        admin_user.username.lower()
1073
                    )
1074
                )
1075

1076
    def _update_from_db(self, sess, to_update, user_project):
1✔
1077
        """
1078
        Update user access to projects in the auth database
1079

1080
        Args:
1081
            sess: sqlalchemy session
1082
            to_update:
1083
                a set of (username, project.auth_id) to be updated from db
1084

1085
        Return:
1086
            None
1087
        """
1088

1089
        for username, project_auth_id in to_update:
1✔
1090
            q = (
1✔
1091
                sess.query(AccessPrivilege)
1092
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1093
                .join(AccessPrivilege.user)
1094
                .filter(func.lower(User.username) == username)
1095
                .all()
1096
            )
1097
            for access in q:
1✔
1098
                access.privilege = user_project[username][project_auth_id]
1✔
1099
                self.logger.info(
1✔
1100
                    "update {} with {} access to {} in db".format(
1101
                        username, access.privilege, project_auth_id
1102
                    )
1103
                )
1104

1105
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1106
        """
1107
        Grant user access to projects in the auth database
1108
        Args:
1109
            sess: sqlalchemy session
1110
            to_add: a set of (username, project.auth_id) to be granted
1111
            user_project:
1112
                a dictionary of {username: {project: {'read','write'}}
1113
        Return:
1114
            None
1115
        """
1116
        for username, project_auth_id in to_add:
1✔
1117
            u = query_for_user(session=sess, username=username)
1✔
1118

1119
            auth_provider = auth_provider_list[0]
1✔
1120
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1121
                auth_provider = auth_provider_list[1]
1✔
1122
            user_access = AccessPrivilege(
1✔
1123
                user=u,
1124
                project=self._projects[project_auth_id],
1125
                privilege=list(user_project[username][project_auth_id]),
1126
                auth_provider=auth_provider,
1127
            )
1128
            self.logger.info(
1✔
1129
                "grant user {} to {} with access {}".format(
1130
                    username, user_access.project, user_access.privilege
1131
                )
1132
            )
1133
            sess.add(user_access)
1✔
1134

1135
    def _upsert_userinfo(self, sess, user_info):
1✔
1136
        """
1137
        update user info to database.
1138

1139
        Args:
1140
            sess: sqlalchemy session
1141
            user_info:
1142
                a dict of {username: {display_name, phone_number, tags, admin}
1143

1144
        Return:
1145
            None
1146
        """
1147

1148
        for username in user_info:
1✔
1149
            u = query_for_user(session=sess, username=username)
1✔
1150

1151
            if u is None:
1✔
1152
                self.logger.info("create user {}".format(username))
1✔
1153
                u = User(username=username)
1✔
1154
                sess.add(u)
1✔
1155

1156
            if self.arborist_client:
1✔
1157
                self.arborist_client.create_user({"name": username})
1✔
1158

1159
            u.email = user_info[username].get("email", "")
1✔
1160
            u.display_name = user_info[username].get("display_name", "")
1✔
1161
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1162
            u.is_admin = user_info[username].get("admin", False)
1✔
1163

1164
            idp_name = user_info[username].get("idp_name", "")
1✔
1165
            if idp_name and not u.identity_provider:
1✔
1166
                idp = (
×
1167
                    sess.query(IdentityProvider)
1168
                    .filter(IdentityProvider.name == idp_name)
1169
                    .first()
1170
                )
1171
                if not idp:
×
1172
                    idp = IdentityProvider(name=idp_name)
×
1173
                u.identity_provider = idp
×
1174

1175
            # do not update if there is no tag
1176
            if not user_info[username].get("tags"):
1✔
1177
                continue
1✔
1178

1179
            # remove user db tags if they are not shown in new tags
1180
            for tag in u.tags:
1✔
1181
                if tag.key not in user_info[username]["tags"]:
1✔
1182
                    u.tags.remove(tag)
1✔
1183

1184
            # sync
1185
            for k, v in user_info[username]["tags"].items():
1✔
1186
                found = False
1✔
1187
                for tag in u.tags:
1✔
1188
                    if tag.key == k:
1✔
1189
                        found = True
1✔
1190
                        tag.value = v
1✔
1191
                # create new tag if not found
1192
                if not found:
1✔
1193
                    tag = Tag(key=k, value=v)
1✔
1194
                    u.tags.append(tag)
1✔
1195

1196
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1197
        """
1198
        If a project have storage backend, revoke user's access to buckets in
1199
        the storage backend.
1200

1201
        Args:
1202
            to_delete: a set of (username, project.auth_id) to be revoked
1203

1204
        Return:
1205
            None
1206
        """
1207
        for username, project_auth_id in to_delete:
1✔
1208
            project = (
1✔
1209
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1210
            )
1211
            for sa in project.storage_access:
1✔
1212
                if not hasattr(self, "storage_manager"):
1✔
1213
                    self.logger.error(
×
1214
                        (
1215
                            "CANNOT revoke {} access to {} in {} because there is NO "
1216
                            "configured storage accesses at all. See configuration. "
1217
                            "Continuing anyway..."
1218
                        ).format(username, project_auth_id, sa.provider.name)
1219
                    )
1220
                    continue
×
1221

1222
                self.logger.info(
1✔
1223
                    "revoke {} access to {} in {}".format(
1224
                        username, project_auth_id, sa.provider.name
1225
                    )
1226
                )
1227
                self.storage_manager.revoke_access(
1✔
1228
                    provider=sa.provider.name,
1229
                    username=username,
1230
                    project=project,
1231
                    session=sess,
1232
                    google_bulk_mapping=google_bulk_mapping,
1233
                )
1234

1235
    def _grant_from_storage(
1✔
1236
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1237
    ):
1238
        """
1239
        If a project have storage backend, grant user's access to buckets in
1240
        the storage backend.
1241

1242
        Args:
1243
            to_add: a set of (username, project.auth_id)  to be granted
1244
            user_project: a dictionary like:
1245

1246
                    {username: {phsid: {'read-storage','write-storage'}}}
1247

1248
        Return:
1249
            dict of the users' storage usernames to their user_projects and the respective storage access.
1250
        """
1251
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1252
        for username, project_auth_id in to_add:
1✔
1253
            project = self._projects[project_auth_id]
1✔
1254
            for sa in project.storage_access:
1✔
1255
                access = list(user_project[username][project_auth_id])
1✔
1256
                if not hasattr(self, "storage_manager"):
1✔
1257
                    self.logger.error(
×
1258
                        (
1259
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1260
                            "configured storage accesses at all. See configuration. "
1261
                            "Continuing anyway..."
1262
                        ).format(username, access, project_auth_id, sa.provider.name)
1263
                    )
1264
                    continue
×
1265

1266
                self.logger.info(
1✔
1267
                    "grant {} access {} to {} in {}".format(
1268
                        username, access, project_auth_id, sa.provider.name
1269
                    )
1270
                )
1271
                storage_username = self.storage_manager.grant_access(
1✔
1272
                    provider=sa.provider.name,
1273
                    username=username,
1274
                    project=project,
1275
                    access=access,
1276
                    session=sess,
1277
                    google_bulk_mapping=google_bulk_mapping,
1278
                    expires=expires,
1279
                )
1280

1281
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1282
        return storage_user_to_sa_and_user_project
1✔
1283

1284
    def _init_projects(self, user_project, sess):
1✔
1285
        """
1286
        initialize projects
1287
        """
1288

1289
        if self.project_mapping:
1✔
1290
            for projects in list(self.project_mapping.values()):
1✔
1291
                for p in projects:
1✔
1292
                    self.logger.debug(
1✔
1293
                        "creating Project with info from project_mapping: {}".format(p)
1294
                    )
1295
                    project = self._get_or_create(sess, Project, **p)
1✔
1296
                    self._projects[p["auth_id"]] = project
1✔
1297
        for _, projects in user_project.items():
1✔
1298
            for auth_id in list(projects.keys()):
1✔
1299
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1300
                if not project:
1✔
1301
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1302
                    try:
1✔
1303
                        project = self._get_or_create(sess, Project, **data)
1✔
1304
                    except IntegrityError as e:
×
1305
                        sess.rollback()
×
1306
                        self.logger.error(
×
1307
                            f"Project {auth_id} already exists. Detail {str(e)}"
1308
                        )
1309
                        raise Exception(
×
1310
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1311
                                auth_id, str(e)
1312
                            )
1313
                        )
1314
                if auth_id not in self._projects:
1✔
1315
                    self._projects[auth_id] = project
1✔
1316

1317
    @staticmethod
1✔
1318
    def _get_or_create(sess, model, **kwargs):
1✔
1319
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1320
        if not instance:
1✔
1321
            instance = model(**kwargs)
1✔
1322
            sess.add(instance)
1✔
1323
        return instance
1✔
1324

1325
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1326
        """
1327
        Args:
1328
            dbgap_config : a dictionary containing information about a single
1329
                           dbgap sftp server (from fence config)
1330
            sess: database session
1331

1332
        Return:
1333
            user_projects (dict)
1334
            user_info (dict)
1335
        """
1336
        dbgap_file_list = []
1✔
1337
        hostname = dbgap_config["info"]["host"]
1✔
1338
        username = dbgap_config["info"]["username"]
1✔
1339
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1340
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1341

1342
        try:
1✔
1343
            if os.path.exists(folderdir):
1✔
1344
                dbgap_file_list = glob.glob(
×
1345
                    os.path.join(folderdir, "*")
1346
                )  # get lists of file from folder
1347
            else:
1348
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1349
                dbgap_file_list = self._download(dbgap_config)
1✔
1350
        except Exception as e:
1✔
1351
            self.logger.error(e)
1✔
1352
            exit(1)
1✔
1353
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1354
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1355
            dbgap_file_list,
1356
            encrypted=encrypted,
1357
            session=sess,
1358
            dbgap_config=dbgap_config,
1359
        )
1360

1361
        user_projects = self.parse_projects(user_projects)
×
1362
        return user_projects, user_info
×
1363

1364
    def _get_user_permissions_from_csv_list(
1✔
1365
        self, file_list, encrypted, session, dbgap_config={}
1366
    ):
1367
        """
1368
        Args:
1369
            file_list: list of files (represented as strings)
1370
            encrypted: boolean indicating whether those files are encrypted
1371
            session: sqlalchemy session
1372
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1373
                    (comes from fence config)
1374

1375
        Return:
1376
            user_projects (dict)
1377
            user_info (dict)
1378
        """
1379
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1380
        user_projects, user_info = self._parse_csv(
1✔
1381
            dict(list(zip(file_list, permissions))),
1382
            sess=session,
1383
            dbgap_config=dbgap_config,
1384
            encrypted=encrypted,
1385
        )
1386
        return user_projects, user_info
1✔
1387

1388
    def _merge_multiple_local_csv_files(
1✔
1389
        self, dbgap_file_list, encrypted, dbgap_configs, session
1390
    ):
1391
        """
1392
        Args:
1393
            dbgap_file_list (list): a list of whitelist file locations stored locally
1394
            encrypted (bool): whether the file is encrypted (comes from fence config)
1395
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1396
            session (sqlalchemy.Session): database session
1397

1398
        Return:
1399
            merged_user_projects (dict)
1400
            merged_user_info (dict)
1401
        """
1402
        merged_user_projects = {}
1✔
1403
        merged_user_info = {}
1✔
1404

1405
        for dbgap_config in dbgap_configs:
1✔
1406
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1407
                dbgap_file_list,
1408
                encrypted,
1409
                session=session,
1410
                dbgap_config=dbgap_config,
1411
            )
1412
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1413
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1414
        return merged_user_projects, merged_user_info
1✔
1415

1416
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1417
        """
1418
        Args:
1419
            dbgap_servers : a list of dictionaries each containging config on
1420
                           dbgap sftp server (comes from fence config)
1421
            sess: database session
1422

1423
        Return:
1424
            merged_user_projects (dict)
1425
            merged_user_info (dict)
1426
        """
1427
        merged_user_projects = {}
1✔
1428
        merged_user_info = {}
1✔
1429
        for dbgap in dbgap_servers:
1✔
1430
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1431
            # merge into merged_user_info
1432
            # user_info overrides original info in merged_user_info
1433
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1434

1435
            # merge all access info dicts into "merged_user_projects".
1436
            # the access info is combined - if the user_projects access is
1437
            # ["read"] and the merged_user_projects is ["read-storage"], the
1438
            # resulting access is ["read", "read-storage"].
1439
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1440
        return merged_user_projects, merged_user_info
1✔
1441

1442
    def parse_projects(self, user_projects):
1✔
1443
        """
1444
        helper function for parsing projects
1445
        """
1446
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1447

1448
    def _process_dbgap_project(
1✔
1449
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1450
    ):
1451
        if dbgap_project not in self.project_mapping:
1✔
1452
            self._add_dbgap_project_for_user(
1✔
1453
                dbgap_project,
1454
                privileges,
1455
                username,
1456
                sess,
1457
                user_projects,
1458
                dbgap_config,
1459
            )
1460

1461
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1462
            try:
1✔
1463
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1464

1465
                # need to add dbgap project to arborist
1466
                if self.arborist_client:
1✔
1467
                    self._determine_arborist_resource(
1✔
1468
                        element_dict["auth_id"], dbgap_config
1469
                    )
1470

1471
                if username not in user_projects:
1✔
1472
                    user_projects[username] = {}
1✔
1473
                user_projects[username].update(phsid_privileges)
1✔
1474

1475
            except ValueError as e:
×
1476
                self.logger.info(e)
×
1477

1478
    def _process_user_projects(
1✔
1479
        self,
1480
        user_projects,
1481
        enable_common_exchange_area_access,
1482
        study_common_exchange_areas,
1483
        dbgap_config,
1484
        sess,
1485
    ):
1486
        user_projects_to_modify = copy.deepcopy(user_projects)
1✔
1487
        for username in user_projects.keys():
1✔
1488
            for project in user_projects[username].keys():
1✔
1489
                phsid = project.split(".")
1✔
1490
                dbgap_project = phsid[0]
1✔
1491
                privileges = user_projects[username][project]
1✔
1492
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1493
                    consent_code = phsid[-1]
1✔
1494

1495
                    # c999 indicates full access to all consents and access
1496
                    # to a study-specific exchange area
1497
                    # access to at least one study-specific exchange area implies access
1498
                    # to the parent study's common exchange area
1499
                    #
1500
                    # NOTE: Handling giving access to all consents is done at
1501
                    #       a later time, when we have full information about possible
1502
                    #       consents
1503
                    self.logger.debug(
1✔
1504
                        f"got consent code {consent_code} from dbGaP project "
1505
                        f"{dbgap_project}"
1506
                    )
1507
                    if (
1✔
1508
                        consent_code == "c999"
1509
                        and enable_common_exchange_area_access
1510
                        and dbgap_project in study_common_exchange_areas
1511
                    ):
1512
                        self.logger.info(
1✔
1513
                            "found study with consent c999 and Fence "
1514
                            "is configured to parse exchange area data. Giving user "
1515
                            f"{username} {privileges} privileges in project: "
1516
                            f"{study_common_exchange_areas[dbgap_project]}."
1517
                        )
1518
                        self._add_dbgap_project_for_user(
1✔
1519
                            study_common_exchange_areas[dbgap_project],
1520
                            privileges,
1521
                            username,
1522
                            sess,
1523
                            user_projects_to_modify,
1524
                            dbgap_config,
1525
                        )
1526

1527
                    dbgap_project += "." + consent_code
1✔
1528

1529
                self._add_children_for_dbgap_project(
1✔
1530
                    dbgap_project,
1531
                    privileges,
1532
                    username,
1533
                    sess,
1534
                    user_projects_to_modify,
1535
                    dbgap_config,
1536
                )
1537

1538
                self._process_dbgap_project(
1✔
1539
                    dbgap_project,
1540
                    privileges,
1541
                    username,
1542
                    sess,
1543
                    user_projects_to_modify,
1544
                    dbgap_config,
1545
                )
1546
        for user in user_projects_to_modify.keys():
1✔
1547
            user_projects[user] = user_projects_to_modify[user]
1✔
1548

1549
    def sync(self):
1✔
1550
        if self.session:
1✔
1551
            self._sync(self.session)
1✔
1552
        else:
1553
            with self.driver.session as s:
×
1554
                self._sync(s)
×
1555

1556
    def download(self):
1✔
1557
        for dbgap_server in self.dbGaP:
×
1558
            self._download(dbgap_server)
×
1559

1560
    def _download(self, dbgap_config):
1✔
1561
        """
1562
        Download files from dbgap server.
1563
        """
1564
        server = dbgap_config["info"]
1✔
1565
        protocol = dbgap_config["protocol"]
1✔
1566
        hostname = server["host"]
1✔
1567
        username = server["username"]
1✔
1568
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1569

1570
        if not os.path.exists(folderdir):
1✔
1571
            os.makedirs(folderdir)
1✔
1572

1573
        self.logger.info("Download from server")
1✔
1574
        try:
1✔
1575
            if protocol == "sftp":
1✔
1576
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1577
            else:
1578
                self._get_from_ftp_with_proxy(server, folderdir)
×
1579
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1580
            return dbgap_files
×
1581
        except Exception as e:
1✔
1582
            self.logger.error(e)
1✔
1583
            raise
1✔
1584

1585
    def _sync(self, sess):
1✔
1586
        """
1587
        Collect files from dbgap server(s), sync csv and yaml files to storage
1588
        backend and fence DB
1589
        """
1590
        # get all dbgap files
1591
        user_projects = {}
1✔
1592
        user_info = {}
1✔
1593
        if self.is_sync_from_dbgap_server:
1✔
1594
            self.logger.debug(
1✔
1595
                "Pulling telemetry files from {} dbgap sftp servers".format(
1596
                    len(self.dbGaP)
1597
                )
1598
            )
1599
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1600

1601
        local_csv_file_list = []
1✔
1602
        if self.sync_from_local_csv_dir:
1✔
1603
            local_csv_file_list = glob.glob(
1✔
1604
                os.path.join(self.sync_from_local_csv_dir, "*")
1605
            )
1606
            # Sort the list so the order of of files is consistent across platforms
1607
            local_csv_file_list.sort()
1✔
1608

1609
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1610
            local_csv_file_list,
1611
            encrypted=False,
1612
            session=sess,
1613
            dbgap_configs=self.dbGaP,
1614
        )
1615

1616
        try:
1✔
1617
            user_yaml = UserYAML.from_file(
1✔
1618
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1619
            )
1620
        except (EnvironmentError, AssertionError) as e:
1✔
1621
            self.logger.error(str(e))
1✔
1622
            self.logger.error("aborting early")
1✔
1623
            raise
1✔
1624

1625
        # parse all projects
1626
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1627
        user_projects = self.parse_projects(user_projects)
1✔
1628
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1629

1630
        # merge all user info dicts into "user_info".
1631
        # the user info (such as email) in the user.yaml files
1632
        # overrides the user info from the CSV files.
1633
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1634
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1635

1636
        # merge all access info dicts into "user_projects".
1637
        # the access info is combined - if the user.yaml access is
1638
        # ["read"] and the CSV file access is ["read-storage"], the
1639
        # resulting access is ["read", "read-storage"].
1640
        self.sync_two_phsids_dict(
1✔
1641
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1642
        )
1643
        self.sync_two_phsids_dict(
1✔
1644
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1645
        )
1646

1647
        # Note: if there are multiple dbgap sftp servers configured
1648
        # this parameter is always from the config for the first dbgap sftp server
1649
        # not any additional ones
1650
        for dbgap_config in self.dbGaP:
1✔
1651
            if self._get_parse_consent_code(dbgap_config):
1✔
1652
                self._grant_all_consents_to_c999_users(
1✔
1653
                    user_projects, user_yaml.project_to_resource
1654
                )
1655

1656
        google_update_ex = None
1✔
1657

1658
        try:
1✔
1659
            # update the Fence DB
1660
            if user_projects:
1✔
1661
                self.logger.info("Sync to db and storage backend")
1✔
1662
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1663
                self.logger.info("Finish syncing to db and storage backend")
1✔
1664
            else:
1665
                self.logger.info("No users for syncing")
×
1666
        except GoogleUpdateException as ex:
1✔
1667
            # save this to reraise later after all non-Google syncing has finished
1668
            # this way, any issues with Google only affect Google data access and don't
1669
            # cascade problems into non-Google AWS or Azure access
1670
            google_update_ex = ex
1✔
1671

1672
        # update the Arborist DB (resources, roles, policies, groups)
1673
        if user_yaml.authz:
1✔
1674
            if not self.arborist_client:
1✔
1675
                raise EnvironmentError(
×
1676
                    "yaml file contains authz section but sync is not configured with"
1677
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1678
                )
1679
            self.logger.info("Synchronizing arborist...")
1✔
1680
            success = self._update_arborist(user_yaml)
1✔
1681
            if success:
1✔
1682
                self.logger.info("Finished synchronizing arborist")
1✔
1683
            else:
1684
                self.logger.error("Could not synchronize successfully")
×
1685
                exit(1)
×
1686
        else:
1687
            self.logger.info("No `authz` section; skipping arborist sync")
×
1688

1689
        # update the Arborist DB (user access)
1690
        if self.arborist_client:
1✔
1691
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1692
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1693
            if success:
1✔
1694
                self.logger.info(
1✔
1695
                    "Finished synchronizing authorization info to arborist"
1696
                )
1697
            else:
1698
                self.logger.error(
×
1699
                    "Could not synchronize authorization info successfully to arborist"
1700
                )
1701
                exit(1)
×
1702
        else:
1703
            self.logger.error("No arborist client set; skipping arborist sync")
×
1704

1705
        # Logging authz source
1706
        for u, s in self.auth_source.items():
1✔
1707
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1708

1709
        self.logger.info(
1✔
1710
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1711
        )
1712
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1713
        if google_update_ex is not None:
1✔
1714
            raise google_update_ex
1✔
1715

1716
    def _grant_all_consents_to_c999_users(
1✔
1717
        self, user_projects, user_yaml_project_to_resources
1718
    ):
1719
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1720
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1721
        # are in either)
1722
        all_projects = set(
1✔
1723
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1724
        )
1725

1726
        self.logger.debug(f"all projects: {all_projects}")
1✔
1727

1728
        # construct a mapping from phsid (without consent) to all accessions with consent
1729
        consent_mapping = {}
1✔
1730
        for project in all_projects:
1✔
1731
            phs_match = access_number_matcher.match(project)
1✔
1732
            if phs_match:
1✔
1733
                accession_number = phs_match.groupdict()
1✔
1734

1735
                # TODO: This is not handling the .v1.p1 at all
1736
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1737
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1738
                )
1739
                children = self._get_children(accession_number["phsid"])
1✔
1740
                if children:
1✔
1741
                    for child_phs in children:
1✔
1742
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1743
                            ".".join(
1744
                                [child_phs, accession_number["consent"]]
1745
                            )  # Assign parent consent to child study
1746
                        )
1747

1748
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1749

1750
        # go through existing access and find any c999's and make sure to give access to
1751
        # all accessions with consent for that phsid
1752
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1753
            for project, _ in user_project_info.items():
1✔
1754
                phs_match = access_number_matcher.match(project)
1✔
1755
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1756
                    # give access to all consents
1757
                    all_phsids_with_consent = consent_mapping.get(
1✔
1758
                        phs_match.groupdict()["phsid"], []
1759
                    )
1760
                    self.logger.info(
1✔
1761
                        f"user {username} has c999 consent group for: {project}. "
1762
                        f"Granting access to all consents: {all_phsids_with_consent}"
1763
                    )
1764
                    # NOTE: Only giving read-storage at the moment (this is same
1765
                    #       permission we give for other dbgap projects)
1766
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1767
                        user_projects[username].update(
1✔
1768
                            {phsid_with_consent: {"read-storage", "read"}}
1769
                        )
1770

1771
    def _update_arborist(self, user_yaml):
1✔
1772
        """
1773
        Create roles, resources, policies, groups in arborist from the information in
1774
        ``user_yaml``.
1775

1776
        The projects are sent to arborist as resources with paths like
1777
        ``/projects/{project}``. Roles are created with just the original names
1778
        for the privileges like ``"read-storage", "read"`` etc.
1779

1780
        Args:
1781
            session (sqlalchemy.Session)
1782
            user_yaml (UserYAML)
1783

1784
        Return:
1785
            bool: success
1786
        """
1787
        healthy = self._is_arborist_healthy()
1✔
1788
        if not healthy:
1✔
1789
            return False
×
1790

1791
        # Set up the resource tree in arborist by combining provided resources with any
1792
        # dbgap resources that were created before this.
1793
        #
1794
        # Why add dbgap resources if they've already been created?
1795
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1796
        #   resources was created under `/programs/phs000178` anything provided in
1797
        #   user.yaml under `/programs` would completely wipe it out.
1798
        resources = user_yaml.authz.get("resources", [])
1✔
1799

1800
        dbgap_resource_paths = []
1✔
1801
        for path_list in self._dbgap_study_to_resources.values():
1✔
1802
            dbgap_resource_paths.extend(path_list)
1✔
1803

1804
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1805
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1806

1807
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1808
            resources, dbgap_resource_paths
1809
        )
1810

1811
        for resource in combined_resources:
1✔
1812
            try:
1✔
1813
                self.logger.debug(
1✔
1814
                    "attempting to update arborist resource: {}".format(resource)
1815
                )
1816
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1817
            except ArboristError as e:
×
1818
                self.logger.error(e)
×
1819
                # keep going; maybe just some conflicts from things existing already
1820

1821
        # update roles
1822
        roles = user_yaml.authz.get("roles", [])
1✔
1823
        for role in roles:
1✔
1824
            try:
1✔
1825
                response = self.arborist_client.update_role(role["id"], role)
1✔
1826
                if response:
1✔
1827
                    self._created_roles.add(role["id"])
1✔
1828
            except ArboristError as e:
×
1829
                self.logger.info(
×
1830
                    "couldn't update role '{}', creating instead".format(str(e))
1831
                )
1832
                try:
×
1833
                    response = self.arborist_client.create_role(role)
×
1834
                    if response:
×
1835
                        self._created_roles.add(role["id"])
×
1836
                except ArboristError as e:
×
1837
                    self.logger.error(e)
×
1838
                    # keep going; maybe just some conflicts from things existing already
1839

1840
        # update policies
1841
        policies = user_yaml.authz.get("policies", [])
1✔
1842
        for policy in policies:
1✔
1843
            policy_id = policy.pop("id")
1✔
1844
            try:
1✔
1845
                self.logger.debug(
1✔
1846
                    "Trying to upsert policy with id {}".format(policy_id)
1847
                )
1848
                response = self.arborist_client.update_policy(
1✔
1849
                    policy_id, policy, create_if_not_exist=True
1850
                )
1851
            except ArboristError as e:
×
1852
                self.logger.error(e)
×
1853
                # keep going; maybe just some conflicts from things existing already
1854
            else:
1855
                if response:
1✔
1856
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1857
                    self._created_policies.add(policy_id)
1✔
1858

1859
        # update groups
1860
        groups = user_yaml.authz.get("groups", [])
1✔
1861

1862
        # delete from arborist the groups that have been deleted
1863
        # from the user.yaml
1864
        arborist_groups = set(
1✔
1865
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1866
        )
1867
        useryaml_groups = set(g["name"] for g in groups)
1✔
1868
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1869
            # do not try to delete built in groups
1870
            if deleted_group not in ["anonymous", "logged-in"]:
×
1871
                self.arborist_client.delete_group(deleted_group)
×
1872

1873
        # create/update the groups defined in the user.yaml
1874
        for group in groups:
1✔
1875
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1876
            if missing:
×
1877
                name = group.get("name", "{MISSING NAME}")
×
1878
                self.logger.error(
×
1879
                    "group {} missing required field(s): {}".format(name, list(missing))
1880
                )
1881
                continue
×
1882
            try:
×
1883
                response = self.arborist_client.put_group(
×
1884
                    group["name"],
1885
                    # Arborist doesn't handle group descriptions yet
1886
                    # description=group.get("description", ""),
1887
                    users=group["users"],
1888
                    policies=group["policies"],
1889
                )
1890
            except ArboristError as e:
×
1891
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1892

1893
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1894

1895
        # First recreate these groups in order to clear out old, possibly deleted policies
1896
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1897
            try:
1✔
1898
                response = self.arborist_client.put_group(builtin_group)
1✔
1899
            except ArboristError as e:
×
1900
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1901

1902
        # Now add back policies that are in the user.yaml
1903
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1904
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1905

1906
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1907
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1908

1909
        return True
1✔
1910

1911
    def _grant_arborist_policies(
1✔
1912
        self,
1913
        username,
1914
        incoming_policies,
1915
        user_yaml,
1916
        expires=None,
1917
        remove_users_with_no_policies=True,
1918
    ):
1919
        """
1920
        Find the difference between the existing policies for a user and the incoming policies,
1921
        and decide whether to add, remove, or keep policies.
1922

1923
        Args:
1924
            username (str): the username of the user
1925
            incoming_policies (set): set of policies to be applied to the user
1926
            user_yaml (UserYAML): UserYAML object containing authz information
1927
            expires (int): time at which authz info in Arborist should expire
1928
            remove_users_with_no_policies (bool): whether to delete users with no access from
1929
                the Arborist database
1930
        """
1931
        user_existing_policies = set()
1✔
1932
        to_add = set()
1✔
1933
        to_remove = set()
1✔
1934
        is_revoke_all = False
1✔
1935

1936
        try:
1✔
1937
            user_existing_policies = set(
1✔
1938
                policy["policy"]
1939
                for policy in self.arborist_client.get_user(username)["policies"]
1940
            )
1941
            self.logger.info(
1✔
1942
                f"Fetched user {username} existing policies: {user_existing_policies}"
1943
            )
1944
        except ArboristError as e:
1✔
1945
            self.logger.error(
1✔
1946
                f"Could not get user {username} policies from Arborist: {e}. Revoking all policies..."
1947
            )
1948
            # if getting existing policies fails, revoke all policies and re-apply
1949
            is_revoke_all = True
1✔
1950

1951
        if user_yaml:
1✔
1952
            anonymous_policies = set(
1✔
1953
                user_yaml.authz.get("anonymous_policies", [])
1954
                + user_yaml.authz.get("all_users_policies", [])
1955
            )
1956
            user_existing_policies = user_existing_policies - anonymous_policies
1✔
1957

1958
        if is_revoke_all is False and len(incoming_policies) > 0:
1✔
1959
            to_add = incoming_policies - user_existing_policies
1✔
1960
            to_remove = user_existing_policies - incoming_policies
1✔
1961
        else:
1962
            # if incoming_policies is empty, we revoke all policies
1963
            is_revoke_all = True
1✔
1964

1965
        if not is_revoke_all:
1✔
1966
            success = not to_remove
1✔
1967
            try:
1✔
1968
                if to_remove:
1✔
1969
                    for policy in to_remove:
1✔
1970
                        self.logger.info(
1✔
1971
                            f"Revoking policy {policy} for user {username}."
1972
                        )
1973
                        success = self.arborist_client.revoke_user_policy(
1✔
1974
                            username, policy
1975
                        )
1976
            except ArboristError as e:
×
1977
                self.logger.error(
×
1978
                    f"Could not revoke user {username} policy {policy}: {e}"
1979
                )
1980
            if not success:
1✔
1981
                # `revoke_user_policy` returns None in case of error
1982
                self.logger.error(
×
1983
                    f"Could not revoke user {username} policy. Revoking all instead."
1984
                )
1985
                is_revoke_all = True
×
1986

1987
        if is_revoke_all:
1✔
1988
            if (
1✔
1989
                remove_users_with_no_policies
1990
                and not incoming_policies
1991
                and not user_existing_policies
1992
            ):
1993
                # user without any access (other than anonymous and logged-in groups).
1994
                # cleanup: remove from the arborist DB so we do not check their access again every
1995
                # time this code runs.
1996
                self.logger.info(
1✔
1997
                    f"Deleting user {username} from Arborist (since they have no policies)."
1998
                )
1999
                self.arborist_client.delete_user(username)
1✔
2000
                return
1✔
2001
            success = False
1✔
2002
            try:
1✔
2003
                # Note: If a user only has group policies, we call `revoke_all_policies_for_user`
2004
                # for nothing. Could be fixed by adding a flag to the arborist "get user" endpoint
2005
                # to get the list of policies _excluding_ group policies, or by manually checking
2006
                # which policies are group policies (not worth it atm).
2007
                self.logger.info(f"Revoking all policies for user {username}.")
1✔
2008
                success = self.arborist_client.revoke_all_policies_for_user(username)
1✔
2009
            except ArboristError as e:
×
2010
                self.logger.error(
×
2011
                    f"Could not revoke all policies for user {username}. Error: {e}"
2012
                )
2013
            if not success:
1✔
2014
                # `revoke_all_policies_for_user` returns None in case of error
2015
                raise Exception(f"Could not revoke all policies for user {username}")
×
2016
            to_add = incoming_policies  # if we revoke all, we need to add all incoming policies
1✔
2017

2018
        if (
1✔
2019
            "mfa_policy" not in incoming_policies
2020
            and "mfa_policy" in user_existing_policies
2021
        ):
2022
            to_add.add("mfa_policy")
×
2023

2024
        if to_add:
1✔
2025
            self.logger.info(f"Bulk granting user {username} policies {to_add}.")
1✔
2026
            self._grant_bulk_user_policies(username, to_add, expires)
1✔
2027

2028
    def _update_authz_in_arborist(
1✔
2029
        self,
2030
        session,
2031
        user_projects,
2032
        user_yaml=None,
2033
        single_user_sync=False,
2034
        expires=None,
2035
    ):
2036
        """
2037
        Assign users policies in arborist from the information in
2038
        ``user_projects`` and optionally a ``user_yaml``.
2039

2040
        The projects are sent to arborist as resources with paths like
2041
        ``/projects/{project}``. Roles are created with just the original names
2042
        for the privileges like ``"read-storage", "read"`` etc.
2043

2044
        Args:
2045
            user_projects (dict)
2046
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
2047
            single_user_sync (bool) whether authz update is for a single user
2048
            expires (int) time at which authz info in Arborist should expire
2049

2050
        Return:
2051
            bool: success
2052
        """
2053
        healthy = self._is_arborist_healthy()
1✔
2054
        if not healthy:
1✔
2055
            return False
×
2056

2057
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
2058

2059
        if user_yaml:
1✔
2060
            self.logger.debug(
1✔
2061
                "useryaml abac before lowering usernames: {}".format(
2062
                    user_yaml.user_abac
2063
                )
2064
            )
2065
            user_yaml.user_abac = {
1✔
2066
                key.lower(): value for key, value in user_yaml.user_abac.items()
2067
            }
2068
            # update the project info with `projects` specified in user.yaml
2069
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
2070

2071
        # get list of users from arborist to make sure users that are completely removed
2072
        # from authorization sources get policies revoked
2073

2074
        arborist_user_projects = {}
1✔
2075
        if not single_user_sync:
1✔
2076

2077
            try:
1✔
2078
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
2079

2080
                # construct user information, NOTE the lowering of the username. when adding/
2081
                # removing access, the case in the Fence db is used. For combining access, it is
2082
                # case-insensitive, so we lower
2083
                arborist_user_projects = {
1✔
2084
                    user["name"].lower(): {} for user in arborist_users
2085
                }
2086
            except (ArboristError, KeyError, AttributeError) as error:
×
2087
                # TODO usersync should probably exit with non-zero exit code at the end,
2088
                #      but sync should continue from this point so there are no partial
2089
                #      updates
2090
                self.logger.warning(
×
2091
                    "Could not get list of users in Arborist, continuing anyway. "
2092
                    "WARNING: this sync will NOT remove access for users no longer in "
2093
                    f"authorization sources. Error: {error}"
2094
                )
2095

2096
            # update the project info with users from arborist
2097
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
2098

2099
        # prefer in-memory if available from user_yaml, if not, get from database
2100
        if user_yaml and user_yaml.project_to_resource:
1✔
2101
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
2102
            self.logger.debug(
1✔
2103
                f"using in-memory project to authz resource mapping from "
2104
                f"user.yaml (instead of database): {project_to_authz_mapping}"
2105
            )
2106
        else:
2107
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
2108
            self.logger.debug(
1✔
2109
                f"using persisted project to authz resource mapping from database "
2110
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2111
            )
2112

2113
        self.logger.debug(
1✔
2114
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2115
        )
2116
        all_resources = [
1✔
2117
            r
2118
            for resources in self._dbgap_study_to_resources.values()
2119
            for r in resources
2120
        ]
2121
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2122
        self._create_arborist_resources(all_resources)
1✔
2123

2124
        for username, user_project_info in user_projects.items():
1✔
2125
            self.logger.info("processing user `{}`".format(username))
1✔
2126
            user = query_for_user(session=session, username=username)
1✔
2127
            idp = None
1✔
2128
            if user:
1✔
2129
                username = user.username
1✔
2130
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2131

2132
            self.arborist_client.create_user_if_not_exist(username)
1✔
2133

2134
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2135
            # previously mapped each project to the same set of privileges
2136
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2137
            # single policy with ('read', 'read-storage') being the single
2138
            # key
2139
            unique_policies = self._determine_unique_policies(
1✔
2140
                user_project_info, project_to_authz_mapping
2141
            )
2142
            for roles in unique_policies.keys():
1✔
2143
                for role in roles:
1✔
2144
                    self._create_arborist_role(role)
1✔
2145

2146
            incoming_policies = set()  # set of policies for current user.
1✔
2147

2148
            if single_user_sync:
1✔
2149
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2150
                    policy_hash = self._hash_policy_contents(
1✔
2151
                        ordered_roles, ordered_resources
2152
                    )
2153
                    self._create_arborist_policy(
1✔
2154
                        policy_hash,
2155
                        ordered_roles,
2156
                        ordered_resources,
2157
                        skip_if_exists=True,
2158
                    )
2159
                    # return here as it is not expected single_user_sync
2160
                    # will need any of the remaining user_yaml operations
2161
                    # left in _update_authz_in_arborist
2162
                    return self._grant_arborist_policy(
1✔
2163
                        username, policy_hash, expires=expires
2164
                    )
2165
            else:
2166
                policy_ids_to_grant = set()
1✔
2167
                for roles, resources in unique_policies.items():
1✔
2168
                    for role in roles:
1✔
2169
                        for resource in resources:
1✔
2170
                            # grant a policy to this user which is a single
2171
                            # role on a single resource
2172

2173
                            # format project '/x/y/z' -> 'x.y.z'
2174
                            # so the policy id will be something like 'x.y.z-create'
2175
                            policy_id = _format_policy_id(resource, role)
1✔
2176
                            incoming_policies.add(policy_id)
1✔
2177
                            if policy_id not in self._created_policies:
1✔
2178
                                try:
1✔
2179
                                    self.arborist_client.update_policy(
1✔
2180
                                        policy_id,
2181
                                        {
2182
                                            "description": "policy created by fence sync",
2183
                                            "role_ids": [role],
2184
                                            "resource_paths": [resource],
2185
                                        },
2186
                                        create_if_not_exist=True,
2187
                                    )
2188
                                except ArboristError as e:
×
2189
                                    self.logger.info(
×
2190
                                        "not creating policy in arborist; {}".format(
2191
                                            str(e)
2192
                                        )
2193
                                    )
2194
                                self._created_policies.add(policy_id)
1✔
2195
                            policy_ids_to_grant.add(policy_id)
1✔
2196
                self._grant_arborist_policies(
1✔
2197
                    username,
2198
                    policy_ids_to_grant,
2199
                    user_yaml=None,
2200
                    expires=expires,
2201
                    remove_users_with_no_policies=False,
2202
                )
2203

2204
            if user_yaml:
1✔
2205
                user_yaml_policies = set(user_yaml.policies.get(username, []))
1✔
2206
                incoming_policies = (
1✔
2207
                    incoming_policies | user_yaml_policies
2208
                )  # add policies from whitelist and useryaml
2209

2210
            self._grant_arborist_policies(
1✔
2211
                username,
2212
                incoming_policies,
2213
                user_yaml,
2214
                expires=expires,
2215
                remove_users_with_no_policies=True,
2216
            )
2217

2218
        if user_yaml:
1✔
2219
            for client_name, client_details in user_yaml.clients.items():
1✔
2220
                client_policies = client_details.get("policies", [])
×
2221
                clients = session.query(Client).filter_by(name=client_name).all()
×
2222
                # update existing clients, do not create new ones
2223
                if not clients:
×
2224
                    self.logger.warning(
×
2225
                        "client to update (`{}`) does not exist in fence: skipping".format(
2226
                            client_name
2227
                        )
2228
                    )
2229
                    continue
×
2230
                self.logger.debug(
×
2231
                    "updating client `{}` (found {} client IDs)".format(
2232
                        client_name, len(clients)
2233
                    )
2234
                )
2235
                # there may be more than 1 client with this name if credentials are being rotated,
2236
                # so we grant access to each client ID
2237
                for client in clients:
×
2238
                    try:
×
2239
                        self.arborist_client.update_client(
×
2240
                            client.client_id, client_policies
2241
                        )
2242
                    except ArboristError as e:
×
2243
                        self.logger.info(
×
2244
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2245
                                client_policies, client_name, client.client_id, str(e)
2246
                            )
2247
                        )
2248

2249
        return True
1✔
2250

2251
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2252
        """
2253
        Determine and return a dictionary of unique policies.
2254

2255
        Args (examples):
2256
            user_project_info (dict):
2257
            {
2258
                'phs000002.c1': { 'read-storage', 'read' },
2259
                'phs000001.c1': { 'read', 'read-storage' },
2260
                'phs000004.c1': { 'write', 'read' },
2261
                'phs000003.c1': { 'read', 'write' },
2262
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2263
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2264
            }
2265
            project_to_authz_mapping (dict):
2266
            {
2267
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2268
            }
2269

2270
        Return (for examples):
2271
            dict:
2272
            {
2273
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2274
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2275
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2276
            }
2277
        """
2278
        roles_to_resources = collections.defaultdict(list)
1✔
2279
        for study, roles in user_project_info.items():
1✔
2280
            ordered_roles = tuple(sorted(roles))
1✔
2281
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2282
            if study in project_to_authz_mapping:
1✔
2283
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2284
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2285

2286
        policies = {}
1✔
2287
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2288
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2289
        return policies
1✔
2290

2291
    def _create_arborist_role(self, role):
1✔
2292
        """
2293
        Wrapper around gen3authz's create_role with additional logging
2294

2295
        Args:
2296
            role (str): what the Arborist identity should be of the created role
2297

2298
        Return:
2299
            bool: True if the role was created successfully or it already
2300
                  exists. False otherwise
2301
        """
2302
        if role in self._created_roles:
1✔
2303
            return True
1✔
2304
        try:
1✔
2305
            response_json = self.arborist_client.create_role(
1✔
2306
                arborist_role_for_permission(role)
2307
            )
2308
        except ArboristError as e:
×
2309
            self.logger.error(
×
2310
                "could not create `{}` role in Arborist: {}".format(role, e)
2311
            )
2312
            return False
×
2313
        self._created_roles.add(role)
1✔
2314

2315
        if response_json is None:
1✔
2316
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2317
        else:
2318
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2319
        return True
1✔
2320

2321
    def _create_arborist_resources(self, resources):
1✔
2322
        """
2323
        Create resources in Arborist
2324

2325
        Args:
2326
            resources (list): a list of full Arborist resource paths to create
2327
            [
2328
                "/programs/DEV/projects/phs000001.c1",
2329
                "/programs/DEV/projects/phs000002.c1",
2330
                "/programs/DEV/projects/phs000003.c1"
2331
            ]
2332

2333
        Return:
2334
            bool: True if the resources were successfully created, False otherwise
2335

2336

2337
        As of 2/11/2022, for resources above,
2338
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2339
        [
2340
            { 'name': 'programs', 'subresources': [
2341
                { 'name': 'DEV', 'subresources': [
2342
                    { 'name': 'projects', 'subresources': [
2343
                        { 'name': 'phs000001.c1', 'subresources': []},
2344
                        { 'name': 'phs000002.c1', 'subresources': []},
2345
                        { 'name': 'phs000003.c1', 'subresources': []}
2346
                    ]}
2347
                ]}
2348
            ]}
2349
        ]
2350
        Because this list has a single object, only a single network request gets
2351
        sent to Arborist.
2352

2353
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2354
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2355
        [
2356
            {'name': 'phs000001.c1', 'subresources': []},
2357
            {'name': 'phs000002.c1', 'subresources': []},
2358
            {'name': 'phs000003.c1', 'subresources': []}
2359
        ]
2360
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2361

2362
        As a practical matter, for sync_single_user_visas, studies
2363
        should be nested under the `/programs` resource as in the former
2364
        example (i.e. only one network request gets made).
2365

2366
        TODO for the sake of simplicity, it would be nice if only one network
2367
        request was made no matter the input.
2368
        """
2369
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2370
            try:
1✔
2371
                response_json = self.arborist_client.update_resource(
1✔
2372
                    "/", request_body, merge=True
2373
                )
2374
            except ArboristError as e:
×
2375
                self.logger.error(
×
2376
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2377
                        request_body, e
2378
                    )
2379
                )
2380
                return False
×
2381

2382
        self.logger.debug(
1✔
2383
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2384
        )
2385
        return True
1✔
2386

2387
    def _create_arborist_policy(
1✔
2388
        self, policy_id, roles, resources, skip_if_exists=False
2389
    ):
2390
        """
2391
        Wrapper around gen3authz's create_policy with additional logging
2392

2393
        Args:
2394
            policy_id (str): what the Arborist identity should be of the created policy
2395
            roles (iterable): what roles the create policy should have
2396
            resources (iterable): what resources the created policy should have
2397
            skip_if_exists (bool): if True, this function will not treat an already
2398
                                   existent policy as an error
2399

2400
        Return:
2401
            bool: True if policy creation was successful. False otherwise
2402
        """
2403
        try:
1✔
2404
            response_json = self.arborist_client.create_policy(
1✔
2405
                {
2406
                    "id": policy_id,
2407
                    "role_ids": roles,
2408
                    "resource_paths": resources,
2409
                },
2410
                skip_if_exists=skip_if_exists,
2411
            )
2412
        except ArboristError as e:
×
2413
            self.logger.error(
×
2414
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2415
            )
2416
            return False
×
2417

2418
        if response_json is None:
1✔
2419
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2420
        else:
2421
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2422
        return True
1✔
2423

2424
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2425
        """
2426
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2427

2428
        Args:
2429
            ordered_roles (iterable): policy roles in sorted order
2430
            ordered_resources (iterable): policy resources in sorted order
2431

2432
        Return:
2433
            str: SHA256 hex digest
2434
        """
2435

2436
        def escape(s):
1✔
2437
            return s.replace(",", "\\,")
1✔
2438

2439
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2440
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2441
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2442
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2443

2444
        return policy_hash
1✔
2445

2446
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2447
        """
2448
        Wrapper around gen3authz's grant_user_policy with additional logging
2449

2450
        Args:
2451
            username (str): username of user in Arborist who policy should be
2452
                            granted to
2453
            policy_id (str): Arborist policy id
2454
            expires (int): POSIX timestamp for when policy should expire
2455

2456
        Return:
2457
            bool: True if granting of policy was successful, False otherwise
2458
        """
2459
        try:
1✔
2460
            resp = self.arborist_client.grant_user_policy(
1✔
2461
                username,
2462
                policy_id,
2463
                expires_at=expires,
2464
            )
2465
            if not resp:
1✔
2466
                self.logger.error(
1✔
2467
                    "could not grant policy `{}` to user `{}`".format(
2468
                        policy_id, username
2469
                    )
2470
                )
2471
                return False
1✔
2472
        except ArboristError as e:
×
2473
            self.logger.error(
×
2474
                "could not grant policy `{}` to user `{}`: {}".format(
2475
                    policy_id, username, e
2476
                )
2477
            )
2478
            return False
×
2479

2480
        self.logger.debug(
1✔
2481
            "granted policy `{}` to user `{}`".format(policy_id, username)
2482
        )
2483
        return True
1✔
2484

2485
    def _grant_bulk_user_policies(self, username, policy_ids, expires=None):
1✔
2486
        """
2487
        Wrapper around gen3authz's grant_user_policies with additional logging
2488

2489
        Args:
2490
            username (str): username of user in Arborist who policy should be
2491
                            granted to
2492
            policy_ids (set[str]): Arborist policy ids
2493

2494
        Return:
2495
            bool: True if granting of policies was successful, False otherwise
2496
        """
2497
        try:
1✔
2498
            resp = self.arborist_client.grant_bulk_user_policy(
1✔
2499
                username, policy_ids, expires
2500
            )
2501
            if not resp:
1✔
2502
                self.logger.error(
×
2503
                    "could not grant bulk policies to user `{}`".format(username)
2504
                )
2505
                return False
×
2506
        except ArboristError as e:
×
2507
            self.logger.error(
×
2508
                "could not grant bulk policies to user `{}`: {}".format(username, e)
2509
            )
2510
            return False
×
2511
        except ArboristTimeoutError as e:
×
2512
            self.logger.error(
×
2513
                f"Timeout waiting for response to grant bulk policies  to user `{username}`: {e}"
2514
                "This user will be skipped and usersync will continue."
2515
                "As long as the timeout is not a pool/connection timeout, then "
2516
            )
2517
            return False
×
2518
        return True
1✔
2519

2520
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2521
        """
2522
        Determine the arborist resource path and add it to
2523
        _self._dbgap_study_to_resources
2524

2525
        Args:
2526
            dbgap_study (str): study phs identifier
2527
            dbgap_config (dict): dictionary of config for dbgap server
2528

2529
        """
2530
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2531
            "_default", ["/"]
2532
        )
2533
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2534
            dbgap_study, default_namespaces
2535
        )
2536

2537
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2538

2539
        arborist_resource_namespaces = [
1✔
2540
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2541
        ]
2542

2543
        for resource_namespace in arborist_resource_namespaces:
1✔
2544
            full_resource_path = resource_namespace + dbgap_study
1✔
2545
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2546
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2547
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2548
        return arborist_resource_namespaces
1✔
2549

2550
    def _is_arborist_healthy(self):
1✔
2551
        if not self.arborist_client:
1✔
2552
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2553
            return False
×
2554
        if not self.arborist_client.healthy():
1✔
2555
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2556
            self.logger.error(
×
2557
                "arborist service is unavailable; skipping main arborist dbgap sync"
2558
            )
2559
            return False
×
2560
        return True
1✔
2561

2562
    def _pick_sync_type(self, visa):
1✔
2563
        """
2564
        Pick type of visa to parse according to the visa provider
2565
        """
2566
        sync_client = None
1✔
2567
        if visa.type in self.visa_types["ras"]:
1✔
2568
            sync_client = self.ras_sync_client
1✔
2569
        else:
2570
            raise Exception(
×
2571
                "Visa type {} not recognized. Configure in fence-config".format(
2572
                    visa.type
2573
                )
2574
            )
2575
        if not sync_client:
1✔
2576
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2577

2578
        return sync_client
1✔
2579

2580
    def sync_single_user_visas(
1✔
2581
        self, user, ga4gh_visas, sess=None, expires=None, skip_google_updates=False
2582
    ):
2583
        """
2584
        Sync a single user's visas during login or DRS/data access
2585

2586
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2587
                        BEFORE THIS.
2588

2589
        Args:
2590
            user (userdatamodel.user.User): Fence user whose visas'
2591
                                            authz info is being synced
2592
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2593
                                that are ALREADY VALIDATED
2594
            sess (sqlalchemy.orm.session.Session): database session
2595
            expires (int): time at which synced Arborist policies and
2596
                           inclusion in any GBAG are set to expire
2597
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
2598

2599
        Return:
2600
            list of successfully parsed visas
2601
        """
2602
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2603
        dbgap_config = self.dbGaP[0]
1✔
2604
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2605
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2606
            "enable_common_exchange_area_access", False
2607
        )
2608
        study_common_exchange_areas = dbgap_config.get(
1✔
2609
            "study_common_exchange_areas", {}
2610
        )
2611

2612
        try:
1✔
2613
            user_yaml = UserYAML.from_file(
1✔
2614
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2615
            )
2616
        except (EnvironmentError, AssertionError) as e:
×
2617
            self.logger.error(str(e))
×
2618
            self.logger.error("aborting early")
×
2619
            raise
×
2620

2621
        user_projects = dict()
1✔
2622
        projects = {}
1✔
2623
        info = {}
1✔
2624
        parsed_visas = []
1✔
2625

2626
        for visa in ga4gh_visas:
1✔
2627
            project = {}
1✔
2628
            visa_type = self._pick_sync_type(visa)
1✔
2629
            encoded_visa = visa.ga4gh_visa
1✔
2630

2631
            try:
1✔
2632
                project, info = visa_type._parse_single_visa(
1✔
2633
                    user,
2634
                    encoded_visa,
2635
                    visa.expires,
2636
                    parse_consent_code,
2637
                )
2638
            except Exception:
×
2639
                self.logger.warning(
×
2640
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2641
                )
2642
                continue
×
2643

2644
            projects = {**projects, **project}
1✔
2645
            parsed_visas.append(visa)
1✔
2646

2647
        info["user_id"] = user.id
1✔
2648
        info["username"] = user.username
1✔
2649
        user_projects[user.username] = projects
1✔
2650

2651
        user_projects = self.parse_projects(user_projects)
1✔
2652

2653
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2654
            self.logger.info(
1✔
2655
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2656
            )
2657

2658
        self._process_user_projects(
1✔
2659
            user_projects,
2660
            enable_common_exchange_area_access,
2661
            study_common_exchange_areas,
2662
            dbgap_config,
2663
            sess,
2664
        )
2665

2666
        if parse_consent_code:
1✔
2667
            self._grant_all_consents_to_c999_users(
1✔
2668
                user_projects, user_yaml.project_to_resource
2669
            )
2670

2671
        if user_projects:
1✔
2672
            self.sync_to_storage_backend(
1✔
2673
                user_projects,
2674
                info,
2675
                sess,
2676
                expires=expires,
2677
                skip_google_updates=skip_google_updates,
2678
            )
2679
        else:
2680
            self.logger.info("No users for syncing")
×
2681

2682
        # update arborist db (user access)
2683
        if self.arborist_client:
1✔
2684
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2685
            success = self._update_authz_in_arborist(
1✔
2686
                sess,
2687
                user_projects,
2688
                user_yaml=user_yaml,
2689
                single_user_sync=True,
2690
                expires=expires,
2691
            )
2692
            if success:
1✔
2693
                self.logger.info(
1✔
2694
                    "Finished synchronizing authorization info to arborist"
2695
                )
2696
            else:
2697
                self.logger.error(
1✔
2698
                    "Could not synchronize authorization info successfully to arborist"
2699
                )
2700
        else:
2701
            self.logger.error("No arborist client set; skipping arborist sync")
×
2702

2703
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc