• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 14673322298

25 Apr 2025 08:53PM UTC coverage: 74.882% (+0.005%) from 74.877%
14673322298

push

github

BinamB
print unique policies

8097 of 10813 relevant lines covered (74.88%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.42
fence/sync/sync_users.py
1
import backoff
1✔
2
import glob
1✔
3
import jwt
1✔
4
import os
1✔
5
import re
1✔
6
import subprocess as sp
1✔
7
import yaml
1✔
8
import copy
1✔
9
import datetime
1✔
10
import uuid
1✔
11
import collections
1✔
12
import hashlib
1✔
13

14
from contextlib import contextmanager
1✔
15
from collections import defaultdict
1✔
16
from csv import DictReader
1✔
17
from io import StringIO
1✔
18
from stat import S_ISDIR
1✔
19

20
import paramiko
1✔
21
from cdislogging import get_logger
1✔
22
from email_validator import validate_email, EmailNotValidError
1✔
23
from gen3authz.client.arborist.errors import ArboristError
1✔
24
from gen3users.validation import validate_user_yaml
1✔
25
from paramiko.proxy import ProxyCommand
1✔
26
from sqlalchemy.exc import IntegrityError
1✔
27
from sqlalchemy import func
1✔
28

29
from fence.config import config
1✔
30
from fence.models import (
1✔
31
    AccessPrivilege,
32
    AuthorizationProvider,
33
    Project,
34
    Tag,
35
    User,
36
    query_for_user,
37
    Client,
38
    IdentityProvider,
39
    get_project_to_authz_mapping,
40
)
41
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
42
from fence.resources.storage import StorageManager
1✔
43
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
44
from fence.resources.google.access_utils import GoogleUpdateException
1✔
45
from fence.sync import utils
1✔
46
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
47
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
48

49

50
def _format_policy_id(path, privilege):
1✔
51
    resource = ".".join(name for name in path.split("/") if name)
1✔
52
    return "{}-{}".format(resource, privilege)
1✔
53

54

55
def download_dir(sftp, remote_dir, local_dir):
1✔
56
    """
57
    Recursively download file from remote_dir to local_dir
58
    Args:
59
        remote_dir(str)
60
        local_dir(str)
61
    Returns: None
62
    """
63
    dir_items = sftp.listdir_attr(remote_dir)
×
64

65
    for item in dir_items:
×
66
        remote_path = remote_dir + "/" + item.filename
×
67
        local_path = os.path.join(local_dir, item.filename)
×
68
        if S_ISDIR(item.st_mode):
×
69
            download_dir(sftp, remote_path, local_path)
×
70
        else:
71
            sftp.get(remote_path, local_path)
×
72

73

74
def arborist_role_for_permission(permission):
1✔
75
    """
76
    For the programs/projects in the existing fence access control model, in order to
77
    use arborist for checking permissions we generate a policy for each combination of
78
    program/project and privilege. The roles involved all contain only one permission,
79
    for one privilege from the project access model.
80
    """
81
    return {
1✔
82
        "id": permission,
83
        "permissions": [
84
            {"id": permission, "action": {"service": "*", "method": permission}}
85
        ],
86
    }
87

88

89
@contextmanager
1✔
90
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
91
    """
92
    Context manager for reading and optionally decrypting file it only
93
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
94

95
    Args:
96
        filepath (str): path to the file
97
        encrypted (bool): whether the file is encrypted
98

99
    Returns:
100
        Generator[file-like class]: file like object for the file
101
    """
102
    if encrypted:
1✔
103
        p = sp.Popen(
×
104
            [
105
                "ccdecrypt",
106
                "-u",
107
                "-K",
108
                key,
109
                filepath,
110
            ],
111
            stdout=sp.PIPE,
112
            stderr=open(os.devnull, "w"),
113
            universal_newlines=True,
114
        )
115
        try:
×
116
            yield StringIO(p.communicate()[0])
×
117
        except UnicodeDecodeError:
×
118
            logger.error("Could not decode file. Check the decryption key.")
×
119
    else:
120
        f = open(filepath, "r")
1✔
121
        yield f
1✔
122
        f.close()
1✔
123

124

125
class UserYAML(object):
1✔
126
    """
127
    Representation of the information in a YAML file describing user, project, and ABAC
128
    information for access control.
129
    """
130

131
    def __init__(
1✔
132
        self,
133
        projects=None,
134
        user_info=None,
135
        policies=None,
136
        clients=None,
137
        authz=None,
138
        project_to_resource=None,
139
        logger=None,
140
        user_abac=None,
141
    ):
142
        self.projects = projects or {}
1✔
143
        self.user_info = user_info or {}
1✔
144
        self.user_abac = user_abac or {}
1✔
145
        self.policies = policies or {}
1✔
146
        self.clients = clients or {}
1✔
147
        self.authz = authz or {}
1✔
148
        self.project_to_resource = project_to_resource or {}
1✔
149
        self.logger = logger
1✔
150

151
    @classmethod
1✔
152
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
153
        """
154
        Add access by "auth_id" to "self.projects" to update the Fence DB.
155
        Add access by "resource" to "self.user_abac" to update Arborist.
156
        """
157
        data = {}
1✔
158
        if filepath:
1✔
159
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
160
                file_contents = f.read()
1✔
161
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
162
                data = yaml.safe_load(file_contents)
1✔
163
        else:
164
            if logger:
1✔
165
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
166

167
        projects = dict()
1✔
168
        user_info = dict()
1✔
169
        policies = dict()
1✔
170

171
        # resources should be the resource tree to construct in arborist
172
        user_abac = dict()
1✔
173

174
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
175
        if not data.get("authz") and data.get("rbac"):
1✔
176
            if logger:
×
177
                logger.info(
×
178
                    "No authz block found but rbac block present. Using rbac block"
179
                )
180
            data["authz"] = data["rbac"]
×
181

182
        # get user project mapping to arborist resources if it exists
183
        project_to_resource = data.get("authz", dict()).get(
1✔
184
            "user_project_to_resource", dict()
185
        )
186

187
        # read projects and privileges for each user
188
        users = data.get("users", {})
1✔
189
        for username, details in users.items():
1✔
190
            # users should occur only once each; skip if already processed
191
            if username in projects:
1✔
192
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
193
                    username
194
                )
195
                if logger:
×
196
                    logger.error(msg)
×
197
                raise EnvironmentError(msg)
×
198

199
            privileges = {}
1✔
200
            resource_permissions = dict()
1✔
201
            for project in details.get("projects", {}):
1✔
202
                try:
1✔
203
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
204
                except KeyError as e:
×
205
                    if logger:
×
206
                        logger.error("project {} missing field: {}".format(project, e))
×
207
                    continue
×
208

209
                # project may not have `resource` field.
210
                # prefer resource field;
211
                # if no resource or mapping, assume auth_id is resource.
212
                resource = project.get("resource", project["auth_id"])
1✔
213

214
                if project["auth_id"] not in project_to_resource:
1✔
215
                    project_to_resource[project["auth_id"]] = resource
1✔
216
                resource_permissions[resource] = set(project["privilege"])
1✔
217

218
            user_info[username] = {
1✔
219
                "email": details.get("email", ""),
220
                "display_name": details.get("display_name", ""),
221
                "phone_number": details.get("phone_number", ""),
222
                "tags": details.get("tags", {}),
223
                "admin": details.get("admin", False),
224
            }
225
            if not details.get("email"):
1✔
226
                try:
1✔
227
                    valid = validate_email(
1✔
228
                        username, allow_smtputf8=False, check_deliverability=False
229
                    )
230
                    user_info[username]["email"] = valid.email
1✔
231
                except EmailNotValidError:
1✔
232
                    pass
1✔
233
            projects[username] = privileges
1✔
234
            user_abac[username] = resource_permissions
1✔
235

236
            # list of policies we want to grant to this user, which get sent to arborist
237
            # to check if they're allowed to do certain things
238
            policies[username] = details.get("policies", [])
1✔
239

240
        if logger:
1✔
241
            logger.info(
1✔
242
                "Got user project to arborist resource mapping:\n{}".format(
243
                    str(project_to_resource)
244
                )
245
            )
246

247
        authz = data.get("authz", dict())
1✔
248
        if not authz:
1✔
249
            # older version: resources in root, no `authz` section or `rbac` section
250
            if logger:
1✔
251
                logger.warning(
1✔
252
                    "access control YAML file is using old format (missing `authz`/`rbac`"
253
                    " section in the root); assuming that if it exists `resources` will"
254
                    " be on the root level, and continuing"
255
                )
256
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
257
            # the code can pretend it's in the normal place that we expect
258
            resources = data.get("resources", [])
1✔
259
            # keep authz empty dict if resources is not specified
260
            if resources:
1✔
261
                authz["resources"] = data.get("resources", [])
×
262

263
        clients = data.get("clients", {})
1✔
264

265
        return cls(
1✔
266
            projects=projects,
267
            user_info=user_info,
268
            user_abac=user_abac,
269
            policies=policies,
270
            clients=clients,
271
            authz=authz,
272
            project_to_resource=project_to_resource,
273
            logger=logger,
274
        )
275

276
    def persist_project_to_resource(self, db_session):
1✔
277
        """
278
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
279

280
        The mapping comes from an external source, this function persists what was parsed
281
        into memory into the database for future use.
282
        """
283
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
284
            project = (
1✔
285
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
286
            )
287
            if project:
1✔
288
                project.authz = authz_resource
1✔
289
            else:
290
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
291
                db_session.add(project)
×
292
        db_session.commit()
1✔
293

294

295
class UserSyncer(object):
1✔
296
    def __init__(
1✔
297
        self,
298
        dbGaP,
299
        DB,
300
        project_mapping,
301
        storage_credentials=None,
302
        db_session=None,
303
        is_sync_from_dbgap_server=False,
304
        sync_from_local_csv_dir=None,
305
        sync_from_local_yaml_file=None,
306
        arborist=None,
307
        folder=None,
308
    ):
309
        """
310
        Syncs ACL files from dbGap to auth database and storage backends
311
        Args:
312
            dbGaP: a list of dict containing creds to access dbgap sftp
313
            DB: database connection string
314
            project_mapping: a dict containing how dbgap ids map to projects
315
            storage_credentials: a dict containing creds for storage backends
316
            sync_from_dir: path to an alternative dir to sync from instead of
317
                           dbGaP
318
            arborist:
319
                ArboristClient instance if the syncer should also create
320
                resources in arborist
321
            folder: a local folder where dbgap telemetry files will sync to
322
        """
323
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
324
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
325
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
326
        self.dbGaP = dbGaP
1✔
327
        self.session = db_session
1✔
328
        self.driver = get_SQLAlchemyDriver(DB)
1✔
329
        self.project_mapping = project_mapping or {}
1✔
330
        self._projects = dict()
1✔
331
        self._created_roles = set()
1✔
332
        self._created_policies = set()
1✔
333
        self._dbgap_study_to_resources = dict()
1✔
334
        self.logger = get_logger(
1✔
335
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
336
        )
337
        self.arborist_client = arborist
1✔
338
        self.folder = folder
1✔
339

340
        self.auth_source = defaultdict(set)
1✔
341
        # auth_source used for logging. username : [source1, source2]
342
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
343
        self.parent_to_child_studies_mapping = {}
1✔
344
        for dbgap_config in dbGaP:
1✔
345
            self.parent_to_child_studies_mapping.update(
1✔
346
                dbgap_config.get("parent_to_child_studies_mapping", {})
347
            )
348
        if storage_credentials:
1✔
349
            self.storage_manager = StorageManager(
1✔
350
                storage_credentials, logger=self.logger
351
            )
352
        self.id_patterns = []
1✔
353

354
    @staticmethod
1✔
355
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
356
        """
357
        Check if the filename matches dbgap access control file pattern
358

359
        Args:
360
            filepath (str): path to file
361
            encrypted (bool): whether the file is encrypted
362

363
        Returns:
364
            bool: whether the pattern matches
365
        """
366
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
367
        for pattern in id_patterns:
1✔
368
            if encrypted:
1✔
369
                pattern += r".enc"
×
370
            pattern += r"$"
1✔
371
            # when converting the YAML from fence-config,
372
            # python reads it as Python string literal. So "\" turns into "\\"
373
            # which messes with the regex match
374
            pattern.replace("\\\\", "\\")
1✔
375
            if re.match(pattern, os.path.basename(filepath)):
1✔
376
                return True
1✔
377
        return False
1✔
378

379
    def _get_from_sftp_with_proxy(self, server, path):
1✔
380
        """
381
        Download all data from sftp sever to a local dir
382

383
        Args:
384
            server (dict) : dictionary containing info to access sftp server
385
            path (str): path to local directory
386

387
        Returns:
388
            None
389
        """
390
        proxy = None
1✔
391
        if server.get("proxy", "") != "":
1✔
392
            command = "ssh -oHostKeyAlgorithms=+ssh-rsa -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
393
                user=server.get("proxy_user", ""),
394
                proxy=server.get("proxy", ""),
395
                host=server.get("host", ""),
396
                port=server.get("port", 22),
397
            )
398
            self.logger.info("SSH proxy command: {}".format(command))
×
399

400
            proxy = ProxyCommand(command)
×
401

402
        with paramiko.SSHClient() as client:
1✔
403
            client.set_log_channel(self.logger.name)
1✔
404

405
            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
1✔
406
            parameters = {
1✔
407
                "hostname": str(server.get("host", "")),
408
                "username": str(server.get("username", "")),
409
                "password": str(server.get("password", "")),
410
                "port": int(server.get("port", 22)),
411
            }
412
            if proxy:
1✔
413
                parameters["sock"] = proxy
×
414

415
            self.logger.info(
1✔
416
                "SSH connection hostname:post {}:{}".format(
417
                    parameters.get("hostname", "unknown"),
418
                    parameters.get("port", "unknown"),
419
                )
420
            )
421
            self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
422
            with client.open_sftp() as sftp:
×
423
                download_dir(sftp, "./", path)
1✔
424

425
        if proxy:
×
426
            proxy.close()
×
427

428
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
429
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
430
        ssh_client.connect(**parameters)
1✔
431

432
    def _get_from_ftp_with_proxy(self, server, path):
1✔
433
        """
434
        Download data from ftp sever to a local dir
435

436
        Args:
437
            server (dict): dictionary containing information for accessing server
438
            path(str): path to local files
439

440
        Returns:
441
            None
442
        """
443
        execstr = (
×
444
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
445
                server.get("username", ""),
446
                server.get("password", ""),
447
                server.get("host", ""),
448
                server.get("proxy", ""),
449
                path,
450
            )
451
        )
452
        os.system(execstr)
×
453

454
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
455
        return dbgap_config.get(
1✔
456
            "parse_consent_code", True
457
        )  # Should this really be true?
458

459
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
460
        """
461
        parse csv files to python dict
462

463
        Args:
464
            file_dict: a dictionary with key(file path) and value(privileges)
465
            sess: sqlalchemy session
466
            dbgap_config: a dictionary containing information about the dbGaP sftp server
467
                (comes from fence config)
468
            encrypted: boolean indicating whether those files are encrypted
469

470

471
        Return:
472
            Tuple[[dict, dict]]:
473
                (user_project, user_info) where user_project is a mapping from
474
                usernames to project permissions and user_info is a mapping
475
                from usernames to user details, such as email
476

477
        Example:
478

479
            (
480
                {
481
                    username: {
482
                        'project1': {'read-storage','write-storage'},
483
                        'project2': {'read-storage'},
484
                    }
485
                },
486
                {
487
                    username: {
488
                        'email': 'email@mail.com',
489
                        'display_name': 'display name',
490
                        'phone_number': '123-456-789',
491
                        'tags': {'dbgap_role': 'PI'}
492
                    }
493
                },
494
            )
495

496
        """
497
        user_projects = dict()
1✔
498
        user_info = defaultdict(dict)
1✔
499

500
        # parse dbGaP sftp server information
501
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
502

503
        self.id_patterns += (
1✔
504
            [
505
                item.replace("\\\\", "\\")
506
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
507
            ]
508
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
509
            else []
510
        )
511

512
        enable_common_exchange_area_access = dbgap_config.get(
1✔
513
            "enable_common_exchange_area_access", False
514
        )
515
        study_common_exchange_areas = dbgap_config.get(
1✔
516
            "study_common_exchange_areas", {}
517
        )
518
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
519

520
        if parse_consent_code and enable_common_exchange_area_access:
1✔
521
            self.logger.info(
1✔
522
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
523
            )
524

525
        project_id_patterns = [r"phs(\d{6})"]
1✔
526
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
527
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
528
            patterns = [
1✔
529
                pattern.replace("\\\\", "\\") for pattern in patterns
530
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
531
            project_id_patterns += patterns
1✔
532

533
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
534
        for filepath, privileges in file_dict.items():
1✔
535
            self.logger.info("Reading file {}".format(filepath))
1✔
536
            if os.stat(filepath).st_size == 0:
1✔
537
                self.logger.warning("Empty file {}".format(filepath))
×
538
                continue
×
539
            if not self._match_pattern(
1✔
540
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
541
            ):
542
                self.logger.warning(
1✔
543
                    "Filename {} does not match dbgap access control filename pattern;"
544
                    " this could mean that the filename has an invalid format, or has"
545
                    " an unexpected .enc extension, or lacks the .enc extension where"
546
                    " expected. This file is NOT being processed by usersync!".format(
547
                        filepath
548
                    )
549
                )
550
                continue
1✔
551

552
            with _read_file(
1✔
553
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
554
            ) as f:
555
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
556

557
                for row in csv:
1✔
558
                    username = row.get("login") or ""
1✔
559
                    if username == "":
1✔
560
                        continue
×
561

562
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
563
                        phsid = (
1✔
564
                            row.get("phsid") or (row.get("project_id") or "")
565
                        ).split(".")
566
                    else:
567
                        phsid = (row.get("phsid") or "").split(".")
1✔
568

569
                    dbgap_project = phsid[0]
1✔
570
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
571
                    skip = False
1✔
572
                    print("-------project id patterns------")
1✔
573
                    print(project_id_patterns)
1✔
574
                    for pattern in project_id_patterns:
1✔
575
                        self.logger.debug(
1✔
576
                            "Checking pattern:{} with project_id:{}".format(
577
                                pattern, dbgap_project
578
                            )
579
                        )
580
                        if re.match(pattern, dbgap_project):
1✔
581
                            skip = False
1✔
582
                            break
1✔
583
                        else:
584
                            skip = True
1✔
585
                    if skip:
1✔
586
                        self.logger.warning(
1✔
587
                            "Skip processing from file {}, user {} with project {}".format(
588
                                filepath,
589
                                username,
590
                                dbgap_project,
591
                            )
592
                        )
593
                        continue
1✔
594
                    if len(phsid) > 1 and parse_consent_code:
1✔
595
                        consent_code = phsid[-1]
1✔
596

597
                        # c999 indicates full access to all consents and access
598
                        # to a study-specific exchange area
599
                        # access to at least one study-specific exchange area implies access
600
                        # to the parent study's common exchange area
601
                        #
602
                        # NOTE: Handling giving access to all consents is done at
603
                        #       a later time, when we have full information about possible
604
                        #       consents
605
                        self.logger.debug(
1✔
606
                            f"got consent code {consent_code} from dbGaP project "
607
                            f"{dbgap_project}"
608
                        )
609
                        if (
1✔
610
                            consent_code == "c999"
611
                            and enable_common_exchange_area_access
612
                            and dbgap_project in study_common_exchange_areas
613
                        ):
614
                            self.logger.info(
1✔
615
                                "found study with consent c999 and Fence "
616
                                "is configured to parse exchange area data. Giving user "
617
                                f"{username} {privileges} privileges in project: "
618
                                f"{study_common_exchange_areas[dbgap_project]}."
619
                            )
620
                            self._add_dbgap_project_for_user(
1✔
621
                                study_common_exchange_areas[dbgap_project],
622
                                privileges,
623
                                username,
624
                                sess,
625
                                user_projects,
626
                                dbgap_config,
627
                            )
628

629
                        dbgap_project += "." + consent_code
1✔
630

631
                    self._add_children_for_dbgap_project(
1✔
632
                        dbgap_project,
633
                        privileges,
634
                        username,
635
                        sess,
636
                        user_projects,
637
                        dbgap_config,
638
                    )
639

640
                    display_name = row.get("user name") or ""
1✔
641
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
642

643
                    # some dbgap telemetry files have information about a researchers PI
644
                    if "downloader for" in row:
1✔
645
                        tags["pi"] = row["downloader for"]
1✔
646

647
                    # prefer name over previous "downloader for" if it exists
648
                    if "downloader for names" in row:
1✔
649
                        tags["pi"] = row["downloader for names"]
×
650

651
                    user_info[username] = {
1✔
652
                        "email": row.get("email")
653
                        or user_info[username].get("email")
654
                        or "",
655
                        "display_name": display_name,
656
                        "phone_number": row.get("phone")
657
                        or user_info[username].get("phone_number")
658
                        or "",
659
                        "tags": tags,
660
                    }
661

662
                    self._process_dbgap_project(
1✔
663
                        dbgap_project,
664
                        privileges,
665
                        username,
666
                        sess,
667
                        user_projects,
668
                        dbgap_config,
669
                    )
670

671
        return user_projects, user_info
1✔
672

673
    def _get_children(self, dbgap_project):
1✔
674
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
675

676
    def _add_children_for_dbgap_project(
1✔
677
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
678
    ):
679
        """
680
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
681
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
682
        child studies.
683
        """
684
        parent_phsid = dbgap_project
1✔
685
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
686
        child_suffix = ""
1✔
687
        if parse_consent_code and re.match(
1✔
688
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
689
        ):
690
            parent_phsid_parts = dbgap_project.split(".")
1✔
691
            parent_phsid = parent_phsid_parts[0]
1✔
692
            child_suffix = "." + parent_phsid_parts[1]
1✔
693

694
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
695
            return
1✔
696

697
        self.logger.info(
1✔
698
            f"found parent study {parent_phsid} and Fence "
699
            "is configured to provide additional access to child studies. Giving user "
700
            f"{username} {privileges} privileges in projects: "
701
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
702
        )
703
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
704
        for child_study in child_studies:
1✔
705
            self._add_dbgap_project_for_user(
1✔
706
                child_study + child_suffix,
707
                privileges,
708
                username,
709
                sess,
710
                user_projects,
711
                dbgap_config,
712
            )
713

714
    def _add_dbgap_project_for_user(
1✔
715
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
716
    ):
717
        """
718
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
719
        and then updates the dictionary containing all user's project access
720
        """
721
        if dbgap_project not in self._projects:
1✔
722
            self.logger.debug(
1✔
723
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
724
            )
725

726
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
727

728
            # need to add dbgap project to arborist
729
            if self.arborist_client:
1✔
730
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
731

732
            if project.name is None:
1✔
733
                project.name = dbgap_project
1✔
734
            self._projects[dbgap_project] = project
1✔
735
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
736
        if username in user_projects:
1✔
737
            user_projects[username].update(phsid_privileges)
1✔
738
        else:
739
            user_projects[username] = phsid_privileges
1✔
740

741
    @staticmethod
1✔
742
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
743
        """
744
        Merge user_info1 into user_info2. Values in user_info2 are overriden
745
        by values in user_info1. user_info2 ends up containing the merged dict.
746

747
        Args:
748
            user_info1 (dict): nested dict
749
            user_info2 (dict): nested dict
750

751
            Example:
752
            {username: {'email': 'abc@email.com'}}
753

754
        Returns:
755
            None
756
        """
757
        user_info2.update(user_info1)
1✔
758

759
    def sync_two_phsids_dict(
1✔
760
        self,
761
        phsids1,
762
        phsids2,
763
        source1=None,
764
        source2=None,
765
        phsids2_overrides_phsids1=True,
766
    ):
767
        """
768
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
769
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
770
        the merged dict (see explanation below).
771
        `source1` and `source2`: for logging.
772

773
        Args:
774
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
775

776
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
777

778
            Example:
779
            {
780
                username: {
781
                    phsid1: {'read-storage','write-storage'},
782
                    phsid2: {'read-storage'},
783
                }
784
            }
785

786
        Return:
787
            None
788

789
        Explanation:
790
            Consider merging projects of the same user:
791

792
                {user1: {phsid1: privillege1}}
793

794
                {user1: {phsid2: privillege2}}
795

796
            case 1: phsid1 != phsid2. Output:
797

798
                {user1: {phsid1: privillege1, phsid2: privillege2}}
799

800
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
801

802
                {user1: {phsid1: union(privillege1, privillege2)}}
803

804
            For the other cases, just simple addition
805
        """
806

807
        for user, projects1 in phsids1.items():
1✔
808
            if not phsids2.get(user):
1✔
809
                if source1:
1✔
810
                    self.auth_source[user].add(source1)
1✔
811
                phsids2[user] = projects1
1✔
812
            elif phsids2_overrides_phsids1:
1✔
813
                if source1:
1✔
814
                    self.auth_source[user].add(source1)
×
815
                if source2:
1✔
816
                    self.auth_source[user].add(source2)
×
817
                for phsid1, privilege1 in projects1.items():
1✔
818
                    if phsid1 not in phsids2[user]:
1✔
819
                        phsids2[user][phsid1] = set()
1✔
820
                    phsids2[user][phsid1].update(privilege1)
1✔
821
            elif source2:
×
822
                self.auth_source[user].add(source2)
×
823

824
    def sync_to_db_and_storage_backend(
1✔
825
        self,
826
        user_project,
827
        user_info,
828
        sess,
829
        do_not_revoke_from_db_and_storage=False,
830
        expires=None,
831
    ):
832
        """
833
        sync user access control to database and storage backend
834

835
        Args:
836
            user_project (dict): a dictionary of
837

838
                {
839
                    username: {
840
                        'project1': {'read-storage','write-storage'},
841
                        'project2': {'read-storage'}
842
                    }
843
                }
844

845
            user_info (dict): a dictionary of {username: user_info{}}
846
            sess: a sqlalchemy session
847

848
        Return:
849
            None
850
        """
851
        google_bulk_mapping = None
1✔
852
        if config["GOOGLE_BULK_UPDATES"]:
1✔
853
            google_bulk_mapping = {}
1✔
854

855
        self._init_projects(user_project, sess)
1✔
856

857
        auth_provider_list = [
1✔
858
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
859
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
860
        ]
861

862
        cur_db_user_project_list = {
1✔
863
            (ua.user.username.lower(), ua.project.auth_id)
864
            for ua in sess.query(AccessPrivilege).all()
865
        }
866

867
        # we need to compare db -> whitelist case-insensitively for username.
868
        # db stores case-sensitively, but we need to query case-insensitively
869
        user_project_lowercase = {}
1✔
870
        syncing_user_project_list = set()
1✔
871
        for username, projects in user_project.items():
1✔
872
            user_project_lowercase[username.lower()] = projects
1✔
873
            for project, _ in projects.items():
1✔
874
                syncing_user_project_list.add((username.lower(), project))
1✔
875

876
        user_info_lowercase = {
1✔
877
            username.lower(): info for username, info in user_info.items()
878
        }
879

880
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
881
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
882
        to_update = set.intersection(
1✔
883
            cur_db_user_project_list, syncing_user_project_list
884
        )
885

886
        # when updating users we want to maintain case sesitivity in the username so
887
        # pass the original, non-lowered user_info dict
888
        self._upsert_userinfo(sess, user_info)
1✔
889

890
        if not do_not_revoke_from_db_and_storage:
1✔
891
            self._revoke_from_storage(
1✔
892
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
893
            )
894
            self._revoke_from_db(sess, to_delete)
1✔
895

896
        self._grant_from_storage(
1✔
897
            to_add,
898
            user_project_lowercase,
899
            sess,
900
            google_bulk_mapping=google_bulk_mapping,
901
            expires=expires,
902
        )
903

904
        self._grant_from_db(
1✔
905
            sess,
906
            to_add,
907
            user_info_lowercase,
908
            user_project_lowercase,
909
            auth_provider_list,
910
        )
911

912
        # re-grant
913
        self._grant_from_storage(
1✔
914
            to_update,
915
            user_project_lowercase,
916
            sess,
917
            google_bulk_mapping=google_bulk_mapping,
918
            expires=expires,
919
        )
920
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
921

922
        if not do_not_revoke_from_db_and_storage:
1✔
923
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
924

925
        sess.commit()
1✔
926

927
        if config["GOOGLE_BULK_UPDATES"]:
1✔
928
            self.logger.info("Doing bulk Google update...")
1✔
929
            update_google_groups_for_users(google_bulk_mapping)
1✔
930
            self.logger.info("Bulk Google update done!")
×
931

932
        sess.commit()
1✔
933

934
    def sync_to_storage_backend(
1✔
935
        self, user_project, user_info, sess, expires, skip_google_updates=False
936
    ):
937
        """
938
        sync user access control to storage backend with given expiration
939

940
        Args:
941
            user_project (dict): a dictionary of
942

943
                {
944
                    username: {
945
                        'project1': {'read-storage','write-storage'},
946
                        'project2': {'read-storage'}
947
                    }
948
                }
949

950
            user_info (dict): a dictionary of attributes for a user.
951
            sess: a sqlalchemy session
952
            expires (int): time at which synced Arborist policies and
953
                   inclusion in any GBAG are set to expire
954
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
955
        Return:
956
            None
957
        """
958
        if not expires:
1✔
959
            raise Exception(
×
960
                f"sync to storage backend requires an expiration. you provided: {expires}"
961
            )
962

963
        google_group_user_mapping = None
1✔
964
        if config["GOOGLE_BULK_UPDATES"]:
1✔
965
            google_group_user_mapping = {}
×
966
            get_or_create_proxy_group_id(
×
967
                expires=expires,
968
                user_id=user_info["user_id"],
969
                username=user_info["username"],
970
                session=sess,
971
                storage_manager=self.storage_manager,
972
            )
973

974
        # TODO: eventually it'd be nice to remove this step but it's required
975
        #       so that grant_from_storage can determine what storage backends
976
        #       are needed for a project.
977
        self._init_projects(user_project, sess)
1✔
978

979
        # we need to compare db -> whitelist case-insensitively for username.
980
        # db stores case-sensitively, but we need to query case-insensitively
981
        user_project_lowercase = {}
1✔
982
        syncing_user_project_list = set()
1✔
983
        for username, projects in user_project.items():
1✔
984
            user_project_lowercase[username.lower()] = projects
1✔
985
            for project, _ in projects.items():
1✔
986
                syncing_user_project_list.add((username.lower(), project))
1✔
987

988
        to_add = set(syncing_user_project_list)
1✔
989

990
        # when updating users we want to maintain case sensitivity in the username so
991
        # pass the original, non-lowered user_info dict
992
        self._upsert_userinfo(sess, {user_info["username"].lower(): user_info})
1✔
993
        if not skip_google_updates:
1✔
994
            self._grant_from_storage(
1✔
995
                to_add,
996
                user_project_lowercase,
997
                sess,
998
                google_bulk_mapping=google_group_user_mapping,
999
                expires=expires,
1000
            )
1001

1002
            if config["GOOGLE_BULK_UPDATES"]:
1✔
1003
                self.logger.info("Updating user's google groups ...")
×
1004
                update_google_groups_for_users(google_group_user_mapping)
×
1005
                self.logger.info("Google groups update done!!")
×
1006

1007
        sess.commit()
1✔
1008

1009
    def _revoke_from_db(self, sess, to_delete):
1✔
1010
        """
1011
        Revoke user access to projects in the auth database
1012

1013
        Args:
1014
            sess: sqlalchemy session
1015
            to_delete: a set of (username, project.auth_id) to be revoked from db
1016
        Return:
1017
            None
1018
        """
1019
        for username, project_auth_id in to_delete:
1✔
1020
            q = (
1✔
1021
                sess.query(AccessPrivilege)
1022
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1023
                .join(AccessPrivilege.user)
1024
                .filter(func.lower(User.username) == username)
1025
                .all()
1026
            )
1027
            for access in q:
1✔
1028
                self.logger.info(
1✔
1029
                    "revoke {} access to {} in db".format(username, project_auth_id)
1030
                )
1031
                sess.delete(access)
1✔
1032

1033
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1034
        """
1035
        Make sure there is no admin user that is not in yaml/csv files
1036

1037
        Args:
1038
            sess: sqlalchemy session
1039
            user_info: a dict of
1040
            {
1041
                username: {
1042
                    'email': email,
1043
                    'display_name': display_name,
1044
                    'phone_number': phonenum,
1045
                    'tags': {'k1':'v1', 'k2': 'v2'}
1046
                    'admin': is_admin
1047
                }
1048
            }
1049
        Returns:
1050
            None
1051
        """
1052
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1053
            if admin_user.username.lower() not in user_info:
1✔
1054
                admin_user.is_admin = False
×
1055
                sess.add(admin_user)
×
1056
                self.logger.info(
×
1057
                    "remove admin access from {} in db".format(
1058
                        admin_user.username.lower()
1059
                    )
1060
                )
1061

1062
    def _update_from_db(self, sess, to_update, user_project):
1✔
1063
        """
1064
        Update user access to projects in the auth database
1065

1066
        Args:
1067
            sess: sqlalchemy session
1068
            to_update:
1069
                a set of (username, project.auth_id) to be updated from db
1070

1071
        Return:
1072
            None
1073
        """
1074

1075
        for username, project_auth_id in to_update:
1✔
1076
            q = (
1✔
1077
                sess.query(AccessPrivilege)
1078
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1079
                .join(AccessPrivilege.user)
1080
                .filter(func.lower(User.username) == username)
1081
                .all()
1082
            )
1083
            for access in q:
1✔
1084
                access.privilege = user_project[username][project_auth_id]
1✔
1085
                self.logger.info(
1✔
1086
                    "update {} with {} access to {} in db".format(
1087
                        username, access.privilege, project_auth_id
1088
                    )
1089
                )
1090

1091
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1092
        """
1093
        Grant user access to projects in the auth database
1094
        Args:
1095
            sess: sqlalchemy session
1096
            to_add: a set of (username, project.auth_id) to be granted
1097
            user_project:
1098
                a dictionary of {username: {project: {'read','write'}}
1099
        Return:
1100
            None
1101
        """
1102
        for username, project_auth_id in to_add:
1✔
1103
            u = query_for_user(session=sess, username=username)
1✔
1104

1105
            auth_provider = auth_provider_list[0]
1✔
1106
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1107
                auth_provider = auth_provider_list[1]
1✔
1108
            user_access = AccessPrivilege(
1✔
1109
                user=u,
1110
                project=self._projects[project_auth_id],
1111
                privilege=list(user_project[username][project_auth_id]),
1112
                auth_provider=auth_provider,
1113
            )
1114
            self.logger.info(
1✔
1115
                "grant user {} to {} with access {}".format(
1116
                    username, user_access.project, user_access.privilege
1117
                )
1118
            )
1119
            sess.add(user_access)
1✔
1120

1121
    def _upsert_userinfo(self, sess, user_info):
1✔
1122
        """
1123
        update user info to database.
1124

1125
        Args:
1126
            sess: sqlalchemy session
1127
            user_info:
1128
                a dict of {username: {display_name, phone_number, tags, admin}
1129

1130
        Return:
1131
            None
1132
        """
1133

1134
        for username in user_info:
1✔
1135
            u = query_for_user(session=sess, username=username)
1✔
1136

1137
            if u is None:
1✔
1138
                self.logger.info("create user {}".format(username))
1✔
1139
                u = User(username=username)
1✔
1140
                sess.add(u)
1✔
1141

1142
            if self.arborist_client:
1✔
1143
                self.arborist_client.create_user({"name": username})
1✔
1144

1145
            u.email = user_info[username].get("email", "")
1✔
1146
            u.display_name = user_info[username].get("display_name", "")
1✔
1147
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1148
            u.is_admin = user_info[username].get("admin", False)
1✔
1149

1150
            idp_name = user_info[username].get("idp_name", "")
1✔
1151
            if idp_name and not u.identity_provider:
1✔
1152
                idp = (
×
1153
                    sess.query(IdentityProvider)
1154
                    .filter(IdentityProvider.name == idp_name)
1155
                    .first()
1156
                )
1157
                if not idp:
×
1158
                    idp = IdentityProvider(name=idp_name)
×
1159
                u.identity_provider = idp
×
1160

1161
            # do not update if there is no tag
1162
            if not user_info[username].get("tags"):
1✔
1163
                continue
1✔
1164

1165
            # remove user db tags if they are not shown in new tags
1166
            for tag in u.tags:
1✔
1167
                if tag.key not in user_info[username]["tags"]:
1✔
1168
                    u.tags.remove(tag)
1✔
1169

1170
            # sync
1171
            for k, v in user_info[username]["tags"].items():
1✔
1172
                found = False
1✔
1173
                for tag in u.tags:
1✔
1174
                    if tag.key == k:
1✔
1175
                        found = True
1✔
1176
                        tag.value = v
1✔
1177
                # create new tag if not found
1178
                if not found:
1✔
1179
                    tag = Tag(key=k, value=v)
1✔
1180
                    u.tags.append(tag)
1✔
1181

1182
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1183
        """
1184
        If a project have storage backend, revoke user's access to buckets in
1185
        the storage backend.
1186

1187
        Args:
1188
            to_delete: a set of (username, project.auth_id) to be revoked
1189

1190
        Return:
1191
            None
1192
        """
1193
        for username, project_auth_id in to_delete:
1✔
1194
            project = (
1✔
1195
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1196
            )
1197
            for sa in project.storage_access:
1✔
1198
                if not hasattr(self, "storage_manager"):
1✔
1199
                    self.logger.error(
×
1200
                        (
1201
                            "CANNOT revoke {} access to {} in {} because there is NO "
1202
                            "configured storage accesses at all. See configuration. "
1203
                            "Continuing anyway..."
1204
                        ).format(username, project_auth_id, sa.provider.name)
1205
                    )
1206
                    continue
×
1207

1208
                self.logger.info(
1✔
1209
                    "revoke {} access to {} in {}".format(
1210
                        username, project_auth_id, sa.provider.name
1211
                    )
1212
                )
1213
                self.storage_manager.revoke_access(
1✔
1214
                    provider=sa.provider.name,
1215
                    username=username,
1216
                    project=project,
1217
                    session=sess,
1218
                    google_bulk_mapping=google_bulk_mapping,
1219
                )
1220

1221
    def _grant_from_storage(
1✔
1222
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1223
    ):
1224
        """
1225
        If a project have storage backend, grant user's access to buckets in
1226
        the storage backend.
1227

1228
        Args:
1229
            to_add: a set of (username, project.auth_id)  to be granted
1230
            user_project: a dictionary like:
1231

1232
                    {username: {phsid: {'read-storage','write-storage'}}}
1233

1234
        Return:
1235
            dict of the users' storage usernames to their user_projects and the respective storage access.
1236
        """
1237
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1238
        for username, project_auth_id in to_add:
1✔
1239
            project = self._projects[project_auth_id]
1✔
1240
            for sa in project.storage_access:
1✔
1241
                access = list(user_project[username][project_auth_id])
1✔
1242
                if not hasattr(self, "storage_manager"):
1✔
1243
                    self.logger.error(
×
1244
                        (
1245
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1246
                            "configured storage accesses at all. See configuration. "
1247
                            "Continuing anyway..."
1248
                        ).format(username, access, project_auth_id, sa.provider.name)
1249
                    )
1250
                    continue
×
1251

1252
                self.logger.info(
1✔
1253
                    "grant {} access {} to {} in {}".format(
1254
                        username, access, project_auth_id, sa.provider.name
1255
                    )
1256
                )
1257
                storage_username = self.storage_manager.grant_access(
1✔
1258
                    provider=sa.provider.name,
1259
                    username=username,
1260
                    project=project,
1261
                    access=access,
1262
                    session=sess,
1263
                    google_bulk_mapping=google_bulk_mapping,
1264
                    expires=expires,
1265
                )
1266

1267
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1268
        return storage_user_to_sa_and_user_project
1✔
1269

1270
    def _init_projects(self, user_project, sess):
1✔
1271
        """
1272
        initialize projects
1273
        """
1274
        print("--------project mapping-=------------")
1✔
1275
        print(self.project_mapping)
1✔
1276

1277
        if self.project_mapping:
1✔
1278
            for projects in list(self.project_mapping.values()):
1✔
1279
                for p in projects:
1✔
1280
                    self.logger.debug(
1✔
1281
                        "creating Project with info from project_mapping: {}".format(p)
1282
                    )
1283
                    project = self._get_or_create(sess, Project, **p)
1✔
1284
                    self._projects[p["auth_id"]] = project
1✔
1285
        for _, projects in user_project.items():
1✔
1286
            for auth_id in list(projects.keys()):
1✔
1287
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1288
                if not project:
1✔
1289
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1290
                    try:
1✔
1291
                        project = self._get_or_create(sess, Project, **data)
1✔
1292
                    except IntegrityError as e:
×
1293
                        sess.rollback()
×
1294
                        self.logger.error(
×
1295
                            f"Project {auth_id} already exists. Detail {str(e)}"
1296
                        )
1297
                        raise Exception(
×
1298
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1299
                                auth_id, str(e)
1300
                            )
1301
                        )
1302
                if auth_id not in self._projects:
1✔
1303
                    self._projects[auth_id] = project
1✔
1304

1305
    @staticmethod
1✔
1306
    def _get_or_create(sess, model, **kwargs):
1✔
1307
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1308
        if not instance:
1✔
1309
            instance = model(**kwargs)
1✔
1310
            sess.add(instance)
1✔
1311
        return instance
1✔
1312

1313
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1314
        """
1315
        Args:
1316
            dbgap_config : a dictionary containing information about a single
1317
                           dbgap sftp server (from fence config)
1318
            sess: database session
1319

1320
        Return:
1321
            user_projects (dict)
1322
            user_info (dict)
1323
        """
1324
        dbgap_file_list = []
1✔
1325
        hostname = dbgap_config["info"]["host"]
1✔
1326
        username = dbgap_config["info"]["username"]
1✔
1327
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1328
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1329

1330
        try:
1✔
1331
            if os.path.exists(folderdir):
1✔
1332
                dbgap_file_list = glob.glob(
×
1333
                    os.path.join(folderdir, "*")
1334
                )  # get lists of file from folder
1335
            else:
1336
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1337
                dbgap_file_list = self._download(dbgap_config)
1✔
1338
        except Exception as e:
1✔
1339
            self.logger.error(e)
1✔
1340
            exit(1)
1✔
1341
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1342
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1343
            dbgap_file_list,
1344
            encrypted=encrypted,
1345
            session=sess,
1346
            dbgap_config=dbgap_config,
1347
        )
1348

1349
        user_projects = self.parse_projects(user_projects)
×
1350
        return user_projects, user_info
×
1351

1352
    def _get_user_permissions_from_csv_list(
1✔
1353
        self, file_list, encrypted, session, dbgap_config={}
1354
    ):
1355
        """
1356
        Args:
1357
            file_list: list of files (represented as strings)
1358
            encrypted: boolean indicating whether those files are encrypted
1359
            session: sqlalchemy session
1360
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1361
                    (comes from fence config)
1362

1363
        Return:
1364
            user_projects (dict)
1365
            user_info (dict)
1366
        """
1367
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1368
        user_projects, user_info = self._parse_csv(
1✔
1369
            dict(list(zip(file_list, permissions))),
1370
            sess=session,
1371
            dbgap_config=dbgap_config,
1372
            encrypted=encrypted,
1373
        )
1374
        return user_projects, user_info
1✔
1375

1376
    def _merge_multiple_local_csv_files(
1✔
1377
        self, dbgap_file_list, encrypted, dbgap_configs, session
1378
    ):
1379
        """
1380
        Args:
1381
            dbgap_file_list (list): a list of whitelist file locations stored locally
1382
            encrypted (bool): whether the file is encrypted (comes from fence config)
1383
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1384
            session (sqlalchemy.Session): database session
1385

1386
        Return:
1387
            merged_user_projects (dict)
1388
            merged_user_info (dict)
1389
        """
1390
        merged_user_projects = {}
1✔
1391
        merged_user_info = {}
1✔
1392

1393
        for dbgap_config in dbgap_configs:
1✔
1394
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1395
                dbgap_file_list,
1396
                encrypted,
1397
                session=session,
1398
                dbgap_config=dbgap_config,
1399
            )
1400
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1401
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1402
        return merged_user_projects, merged_user_info
1✔
1403

1404
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1405
        """
1406
        Args:
1407
            dbgap_servers : a list of dictionaries each containging config on
1408
                           dbgap sftp server (comes from fence config)
1409
            sess: database session
1410

1411
        Return:
1412
            merged_user_projects (dict)
1413
            merged_user_info (dict)
1414
        """
1415
        merged_user_projects = {}
1✔
1416
        merged_user_info = {}
1✔
1417
        for dbgap in dbgap_servers:
1✔
1418
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1419
            # merge into merged_user_info
1420
            # user_info overrides original info in merged_user_info
1421
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1422

1423
            # merge all access info dicts into "merged_user_projects".
1424
            # the access info is combined - if the user_projects access is
1425
            # ["read"] and the merged_user_projects is ["read-storage"], the
1426
            # resulting access is ["read", "read-storage"].
1427
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1428
        return merged_user_projects, merged_user_info
1✔
1429

1430
    def parse_projects(self, user_projects):
1✔
1431
        """
1432
        helper function for parsing projects
1433
        """
1434
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1435

1436
    def _process_dbgap_project(
1✔
1437
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1438
    ):
1439
        if dbgap_project not in self.project_mapping:
1✔
1440
            self._add_dbgap_project_for_user(
1✔
1441
                dbgap_project,
1442
                privileges,
1443
                username,
1444
                sess,
1445
                user_projects,
1446
                dbgap_config,
1447
            )
1448

1449
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1450
            try:
1✔
1451
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1452

1453
                # need to add dbgap project to arborist
1454
                if self.arborist_client:
1✔
1455
                    self._determine_arborist_resource(
1✔
1456
                        element_dict["auth_id"], dbgap_config
1457
                    )
1458

1459
                if username not in user_projects:
1✔
1460
                    user_projects[username] = {}
1✔
1461
                user_projects[username].update(phsid_privileges)
1✔
1462

1463
            except ValueError as e:
×
1464
                self.logger.info(e)
×
1465

1466
    def _process_user_projects(
1✔
1467
        self,
1468
        user_projects,
1469
        enable_common_exchange_area_access,
1470
        study_common_exchange_areas,
1471
        dbgap_config,
1472
        sess,
1473
    ):
1474
        user_projects_to_modify = copy.deepcopy(user_projects)
1✔
1475
        for username in user_projects.keys():
1✔
1476
            for project in user_projects[username].keys():
1✔
1477
                phsid = project.split(".")
1✔
1478
                dbgap_project = phsid[0]
1✔
1479
                privileges = user_projects[username][project]
1✔
1480
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1481
                    consent_code = phsid[-1]
1✔
1482

1483
                    # c999 indicates full access to all consents and access
1484
                    # to a study-specific exchange area
1485
                    # access to at least one study-specific exchange area implies access
1486
                    # to the parent study's common exchange area
1487
                    #
1488
                    # NOTE: Handling giving access to all consents is done at
1489
                    #       a later time, when we have full information about possible
1490
                    #       consents
1491
                    self.logger.debug(
1✔
1492
                        f"got consent code {consent_code} from dbGaP project "
1493
                        f"{dbgap_project}"
1494
                    )
1495
                    if (
1✔
1496
                        consent_code == "c999"
1497
                        and enable_common_exchange_area_access
1498
                        and dbgap_project in study_common_exchange_areas
1499
                    ):
1500
                        self.logger.info(
1✔
1501
                            "found study with consent c999 and Fence "
1502
                            "is configured to parse exchange area data. Giving user "
1503
                            f"{username} {privileges} privileges in project: "
1504
                            f"{study_common_exchange_areas[dbgap_project]}."
1505
                        )
1506
                        self._add_dbgap_project_for_user(
1✔
1507
                            study_common_exchange_areas[dbgap_project],
1508
                            privileges,
1509
                            username,
1510
                            sess,
1511
                            user_projects_to_modify,
1512
                            dbgap_config,
1513
                        )
1514

1515
                    dbgap_project += "." + consent_code
1✔
1516

1517
                self._process_dbgap_project(
1✔
1518
                    dbgap_project,
1519
                    privileges,
1520
                    username,
1521
                    sess,
1522
                    user_projects_to_modify,
1523
                    dbgap_config,
1524
                )
1525
        for user in user_projects_to_modify.keys():
1✔
1526
            user_projects[user] = user_projects_to_modify[user]
1✔
1527

1528
    def sync(self):
1✔
1529
        if self.session:
1✔
1530
            self._sync(self.session)
1✔
1531
        else:
1532
            with self.driver.session as s:
×
1533
                self._sync(s)
×
1534

1535
    def download(self):
1✔
1536
        for dbgap_server in self.dbGaP:
×
1537
            self._download(dbgap_server)
×
1538

1539
    def _download(self, dbgap_config):
1✔
1540
        """
1541
        Download files from dbgap server.
1542
        """
1543
        server = dbgap_config["info"]
1✔
1544
        protocol = dbgap_config["protocol"]
1✔
1545
        hostname = server["host"]
1✔
1546
        username = server["username"]
1✔
1547
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1548

1549
        if not os.path.exists(folderdir):
1✔
1550
            os.makedirs(folderdir)
1✔
1551

1552
        self.logger.info("Download from server")
1✔
1553
        try:
1✔
1554
            if protocol == "sftp":
1✔
1555
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1556
            else:
1557
                self._get_from_ftp_with_proxy(server, folderdir)
×
1558
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1559
            return dbgap_files
×
1560
        except Exception as e:
1✔
1561
            self.logger.error(e)
1✔
1562
            raise
1✔
1563

1564
    def _sync(self, sess):
1✔
1565
        """
1566
        Collect files from dbgap server(s), sync csv and yaml files to storage
1567
        backend and fence DB
1568
        """
1569

1570
        # get all dbgap files
1571
        user_projects = {}
1✔
1572
        user_info = {}
1✔
1573
        if self.is_sync_from_dbgap_server:
1✔
1574
            self.logger.debug(
1✔
1575
                "Pulling telemetry files from {} dbgap sftp servers".format(
1576
                    len(self.dbGaP)
1577
                )
1578
            )
1579
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1580

1581
        local_csv_file_list = []
1✔
1582
        if self.sync_from_local_csv_dir:
1✔
1583
            local_csv_file_list = glob.glob(
1✔
1584
                os.path.join(self.sync_from_local_csv_dir, "*")
1585
            )
1586
            # Sort the list so the order of of files is consistent across platforms
1587
            local_csv_file_list.sort()
1✔
1588

1589
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1590
            local_csv_file_list,
1591
            encrypted=False,
1592
            session=sess,
1593
            dbgap_configs=self.dbGaP,
1594
        )
1595

1596
        try:
1✔
1597
            user_yaml = UserYAML.from_file(
1✔
1598
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1599
            )
1600
            print("-------sync from local yaml-----")
1✔
1601
            print(self.sync_from_local_yaml_file)
1✔
1602
            print(self.sync_from_local_csv_dir)
1✔
1603
        except (EnvironmentError, AssertionError) as e:
1✔
1604
            self.logger.error(str(e))
1✔
1605
            self.logger.error("aborting early")
1✔
1606
            raise
1✔
1607

1608
        # parse all projects
1609
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1610
        user_projects = self.parse_projects(user_projects)
1✔
1611
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1612

1613
        # merge all user info dicts into "user_info".
1614
        # the user info (such as email) in the user.yaml files
1615
        # overrides the user info from the CSV files.
1616
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1617
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1618

1619
        # merge all access info dicts into "user_projects".
1620
        # the access info is combined - if the user.yaml access is
1621
        # ["read"] and the CSV file access is ["read-storage"], the
1622
        # resulting access is ["read", "read-storage"].
1623
        self.sync_two_phsids_dict(
1✔
1624
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1625
        )
1626
        self.sync_two_phsids_dict(
1✔
1627
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1628
        )
1629

1630
        # Note: if there are multiple dbgap sftp servers configured
1631
        # this parameter is always from the config for the first dbgap sftp server
1632
        # not any additional ones
1633
        for dbgap_config in self.dbGaP:
1✔
1634
            if self._get_parse_consent_code(dbgap_config):
1✔
1635
                self._grant_all_consents_to_c999_users(
1✔
1636
                    user_projects, user_yaml.project_to_resource
1637
                )
1638

1639
        google_update_ex = None
1✔
1640

1641
        try:
1✔
1642
            # update the Fence DB
1643
            if user_projects:
1✔
1644
                self.logger.info("Sync to db and storage backend")
1✔
1645
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1646
                self.logger.info("Finish syncing to db and storage backend")
1✔
1647
            else:
1648
                self.logger.info("No users for syncing")
×
1649
        except GoogleUpdateException as ex:
1✔
1650
            # save this to reraise later after all non-Google syncing has finished
1651
            # this way, any issues with Google only affect Google data access and don't
1652
            # cascade problems into non-Google AWS or Azure access
1653
            google_update_ex = ex
1✔
1654

1655
        # update the Arborist DB (resources, roles, policies, groups)
1656
        if user_yaml.authz:
1✔
1657
            if not self.arborist_client:
1✔
1658
                raise EnvironmentError(
×
1659
                    "yaml file contains authz section but sync is not configured with"
1660
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1661
                )
1662
            self.logger.info("Synchronizing arborist...")
1✔
1663
            success = self._update_arborist(sess, user_yaml)
1✔
1664
            if success:
1✔
1665
                self.logger.info("Finished synchronizing arborist")
1✔
1666
            else:
1667
                self.logger.error("Could not synchronize successfully")
×
1668
                exit(1)
×
1669
        else:
1670
            self.logger.info("No `authz` section; skipping arborist sync")
×
1671

1672
        # update the Arborist DB (user access)
1673
        if self.arborist_client:
1✔
1674
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1675
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1676
            if success:
1✔
1677
                self.logger.info(
1✔
1678
                    "Finished synchronizing authorization info to arborist"
1679
                )
1680
            else:
1681
                self.logger.error(
×
1682
                    "Could not synchronize authorization info successfully to arborist"
1683
                )
1684
                exit(1)
×
1685
        else:
1686
            self.logger.error("No arborist client set; skipping arborist sync")
×
1687

1688
        # Logging authz source
1689
        for u, s in self.auth_source.items():
1✔
1690
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1691

1692
        self.logger.info(
1✔
1693
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1694
        )
1695
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1696
        if google_update_ex is not None:
1✔
1697
            raise google_update_ex
1✔
1698

1699
    def _grant_all_consents_to_c999_users(
1✔
1700
        self, user_projects, user_yaml_project_to_resources
1701
    ):
1702
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1703
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1704
        # are in either)
1705
        all_projects = set(
1✔
1706
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1707
        )
1708

1709
        self.logger.debug(f"all projects: {all_projects}")
1✔
1710

1711
        # construct a mapping from phsid (without consent) to all accessions with consent
1712
        consent_mapping = {}
1✔
1713
        for project in all_projects:
1✔
1714
            phs_match = access_number_matcher.match(project)
1✔
1715
            if phs_match:
1✔
1716
                accession_number = phs_match.groupdict()
1✔
1717

1718
                # TODO: This is not handling the .v1.p1 at all
1719
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1720
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1721
                )
1722
                children = self._get_children(accession_number["phsid"])
1✔
1723
                if children:
1✔
1724
                    for child_phs in children:
1✔
1725
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1726
                            ".".join(
1727
                                [child_phs, accession_number["consent"]]
1728
                            )  # Assign parent consent to child study
1729
                        )
1730

1731
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1732

1733
        # go through existing access and find any c999's and make sure to give access to
1734
        # all accessions with consent for that phsid
1735
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1736
            for project, _ in user_project_info.items():
1✔
1737
                phs_match = access_number_matcher.match(project)
1✔
1738
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1739
                    # give access to all consents
1740
                    all_phsids_with_consent = consent_mapping.get(
1✔
1741
                        phs_match.groupdict()["phsid"], []
1742
                    )
1743
                    self.logger.info(
1✔
1744
                        f"user {username} has c999 consent group for: {project}. "
1745
                        f"Granting access to all consents: {all_phsids_with_consent}"
1746
                    )
1747
                    # NOTE: Only giving read-storage at the moment (this is same
1748
                    #       permission we give for other dbgap projects)
1749
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1750
                        user_projects[username].update(
1✔
1751
                            {phsid_with_consent: {"read-storage", "read"}}
1752
                        )
1753

1754
    def _update_arborist(self, session, user_yaml):
1✔
1755
        """
1756
        Create roles, resources, policies, groups in arborist from the information in
1757
        ``user_yaml``.
1758

1759
        The projects are sent to arborist as resources with paths like
1760
        ``/projects/{project}``. Roles are created with just the original names
1761
        for the privileges like ``"read-storage", "read"`` etc.
1762

1763
        Args:
1764
            session (sqlalchemy.Session)
1765
            user_yaml (UserYAML)
1766

1767
        Return:
1768
            bool: success
1769
        """
1770
        healthy = self._is_arborist_healthy()
1✔
1771
        if not healthy:
1✔
1772
            return False
×
1773

1774
        # Set up the resource tree in arborist by combining provided resources with any
1775
        # dbgap resources that were created before this.
1776
        #
1777
        # Why add dbgap resources if they've already been created?
1778
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1779
        #   resources was created under `/programs/phs000178` anything provided in
1780
        #   user.yaml under `/programs` would completely wipe it out.
1781
        resources = user_yaml.authz.get("resources", [])
1✔
1782

1783
        dbgap_resource_paths = []
1✔
1784
        for path_list in self._dbgap_study_to_resources.values():
1✔
1785
            dbgap_resource_paths.extend(path_list)
1✔
1786

1787
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1788
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1789

1790
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1791
            resources, dbgap_resource_paths
1792
        )
1793

1794
        for resource in combined_resources:
1✔
1795
            try:
1✔
1796
                self.logger.debug(
1✔
1797
                    "attempting to update arborist resource: {}".format(resource)
1798
                )
1799
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1800
            except ArboristError as e:
×
1801
                self.logger.error(e)
×
1802
                # keep going; maybe just some conflicts from things existing already
1803

1804
        # update roles
1805
        roles = user_yaml.authz.get("roles", [])
1✔
1806
        for role in roles:
1✔
1807
            try:
1✔
1808
                response = self.arborist_client.update_role(role["id"], role)
1✔
1809
                if response:
1✔
1810
                    self._created_roles.add(role["id"])
1✔
1811
            except ArboristError as e:
×
1812
                self.logger.info(
×
1813
                    "couldn't update role '{}', creating instead".format(str(e))
1814
                )
1815
                try:
×
1816
                    response = self.arborist_client.create_role(role)
×
1817
                    if response:
×
1818
                        self._created_roles.add(role["id"])
×
1819
                except ArboristError as e:
×
1820
                    self.logger.error(e)
×
1821
                    # keep going; maybe just some conflicts from things existing already
1822

1823
        # update policies
1824
        policies = user_yaml.authz.get("policies", [])
1✔
1825
        for policy in policies:
1✔
1826
            policy_id = policy.pop("id")
1✔
1827
            try:
1✔
1828
                self.logger.debug(
1✔
1829
                    "Trying to upsert policy with id {}".format(policy_id)
1830
                )
1831
                response = self.arborist_client.update_policy(
1✔
1832
                    policy_id, policy, create_if_not_exist=True
1833
                )
1834
            except ArboristError as e:
×
1835
                self.logger.error(e)
×
1836
                # keep going; maybe just some conflicts from things existing already
1837
            else:
1838
                if response:
1✔
1839
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1840
                    self._created_policies.add(policy_id)
1✔
1841

1842
        # update groups
1843
        groups = user_yaml.authz.get("groups", [])
1✔
1844

1845
        # delete from arborist the groups that have been deleted
1846
        # from the user.yaml
1847
        arborist_groups = set(
1✔
1848
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1849
        )
1850
        useryaml_groups = set(g["name"] for g in groups)
1✔
1851
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1852
            # do not try to delete built in groups
1853
            if deleted_group not in ["anonymous", "logged-in"]:
×
1854
                self.arborist_client.delete_group(deleted_group)
×
1855

1856
        # create/update the groups defined in the user.yaml
1857
        for group in groups:
1✔
1858
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1859
            if missing:
×
1860
                name = group.get("name", "{MISSING NAME}")
×
1861
                self.logger.error(
×
1862
                    "group {} missing required field(s): {}".format(name, list(missing))
1863
                )
1864
                continue
×
1865
            try:
×
1866
                response = self.arborist_client.put_group(
×
1867
                    group["name"],
1868
                    # Arborist doesn't handle group descriptions yet
1869
                    # description=group.get("description", ""),
1870
                    users=group["users"],
1871
                    policies=group["policies"],
1872
                )
1873
            except ArboristError as e:
×
1874
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1875

1876
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1877

1878
        # First recreate these groups in order to clear out old, possibly deleted policies
1879
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1880
            try:
1✔
1881
                response = self.arborist_client.put_group(builtin_group)
1✔
1882
            except ArboristError as e:
×
1883
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1884

1885
        # Now add back policies that are in the user.yaml
1886
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1887
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1888

1889
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1890
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1891

1892
        return True
1✔
1893

1894
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1895
        """
1896
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1897
        mfa_policy after revoking all policies.
1898
        """
1899

1900
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1901
            idp, {}
1902
        )
1903

1904
        if not is_mfa_enabled:
1✔
1905
            # TODO This should be a diff, not a revocation of all policies.
1906
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1907
            return
1✔
1908

1909
        policies = []
1✔
1910
        try:
1✔
1911
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1912
            policies = user_data_from_arborist["policies"]
1✔
1913
        except Exception as e:
×
1914
            self.logger.error(
×
1915
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1916
            )
1917
        finally:
1918
            # TODO This should be a diff, not a revocation of all policies.
1919
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1920

1921
        if "mfa_policy" in policies:
1✔
1922
            self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1923

1924
    def _update_authz_in_arborist(
1✔
1925
        self,
1926
        session,
1927
        user_projects,
1928
        user_yaml=None,
1929
        single_user_sync=False,
1930
        expires=None,
1931
    ):
1932
        """
1933
        Assign users policies in arborist from the information in
1934
        ``user_projects`` and optionally a ``user_yaml``.
1935

1936
        The projects are sent to arborist as resources with paths like
1937
        ``/projects/{project}``. Roles are created with just the original names
1938
        for the privileges like ``"read-storage", "read"`` etc.
1939

1940
        Args:
1941
            user_projects (dict)
1942
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
1943
            single_user_sync (bool) whether authz update is for a single user
1944
            expires (int) time at which authz info in Arborist should expire
1945

1946
        Return:
1947
            bool: success
1948
        """
1949
        healthy = self._is_arborist_healthy()
1✔
1950
        if not healthy:
1✔
1951
            return False
×
1952

1953
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
1954

1955
        if user_yaml:
1✔
1956
            self.logger.debug(
1✔
1957
                "useryaml abac before lowering usernames: {}".format(
1958
                    user_yaml.user_abac
1959
                )
1960
            )
1961
            user_yaml.user_abac = {
1✔
1962
                key.lower(): value for key, value in user_yaml.user_abac.items()
1963
            }
1964
            # update the project info with `projects` specified in user.yaml
1965
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
1966

1967
        # get list of users from arborist to make sure users that are completely removed
1968
        # from authorization sources get policies revoked
1969
        arborist_user_projects = {}
1✔
1970
        arborist_users_auth_mapping = {}
1✔
1971
        if not single_user_sync:
1✔
1972
            try:
1✔
1973
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
1974

1975
                # construct user information, NOTE the lowering of the username. when adding/
1976
                # removing access, the case in the Fence db is used. For combining access, it is
1977
                # case-insensitive, so we lower
1978
                arborist_user_projects = {
1✔
1979
                    user["name"].lower(): {} for user in arborist_users
1980
                }
1981
            except (ArboristError, KeyError, AttributeError) as error:
×
1982
                # TODO usersync should probably exit with non-zero exit code at the end,
1983
                #      but sync should continue from this point so there are no partial
1984
                #      updates
1985
                self.logger.warning(
×
1986
                    "Could not get list of users in Arborist, continuing anyway. "
1987
                    "WARNING: this sync will NOT remove access for users no longer in "
1988
                    f"authorization sources. Error: {error}"
1989
                )
1990

1991
            # Get auth mapping for users
1992
            for user in arborist_users:
1✔
1993
                username = user["name"]
×
1994
                try:
×
1995
                    arborist_users_auth_mapping[
×
1996
                        username
1997
                    ] = self.arborist_client.auth_mapping(username)
1998
                except (ArboristError, KeyError, AttributeError) as error:
×
1999
                    self.logger.warning(
×
2000
                        "Could not get auth mapping of users in Arborist, continuing anyway. "
2001
                        "WARNING: this sync will NOT remove access for users no longer in "
2002
                        f"authorization sources. Error: {error}"
2003
                    )
2004

2005
            # update the project info with users from arborist
2006
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
2007

2008
        policy_id_list = []
1✔
2009
        policies = []
1✔
2010

2011
        # prefer in-memory if available from user_yaml, if not, get from database
2012
        if user_yaml and user_yaml.project_to_resource:
1✔
2013
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
2014
            self.logger.debug(
1✔
2015
                f"using in-memory project to authz resource mapping from "
2016
                f"user.yaml (instead of database): {project_to_authz_mapping}"
2017
            )
2018
        else:
2019
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
2020
            self.logger.debug(
1✔
2021
                f"using persisted project to authz resource mapping from database "
2022
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2023
            )
2024

2025
        self.logger.debug(
1✔
2026
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2027
        )
2028
        all_resources = [
1✔
2029
            r
2030
            for resources in self._dbgap_study_to_resources.values()
2031
            for r in resources
2032
        ]
2033
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2034
        self._create_arborist_resources(all_resources)
1✔
2035

2036
        for username, user_project_info in user_projects.items():
1✔
2037
            self.logger.info("processing user `{}`".format(username))
1✔
2038
            user = query_for_user(session=session, username=username)
1✔
2039
            idp = None
1✔
2040
            if user:
1✔
2041
                username = user.username
1✔
2042
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2043

2044
            self.arborist_client.create_user_if_not_exist(username)
1✔
2045
            if not single_user_sync:
1✔
2046
                print("---------arborist_users_auth_mapping---------")
1✔
2047
                print(arborist_users_auth_mapping)
1✔
2048

2049
                print("------user projects------")
1✔
2050
                print(user_projects)
1✔
2051
                # find diff of incoming vs current policies
2052

2053
                # remove removed policies
2054
                print("-------------revoke all policies--------")
1✔
2055
                self._revoke_all_policies_preserve_mfa(username, idp)
1✔
2056

2057
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2058
            # previously mapped each project to the same set of privileges
2059
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2060
            # single policy with ('read', 'read-storage') being the single
2061
            # key
2062
            unique_policies = self._determine_unique_policies(
1✔
2063
                user_project_info, project_to_authz_mapping
2064
            )
2065

2066
            for roles in unique_policies.keys():
1✔
2067
                for role in roles:
1✔
2068
                    self._create_arborist_role(role)
1✔
2069

2070
            if single_user_sync:
1✔
2071
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2072
                    policy_hash = self._hash_policy_contents(
1✔
2073
                        ordered_roles, ordered_resources
2074
                    )
2075
                    self._create_arborist_policy(
1✔
2076
                        policy_hash,
2077
                        ordered_roles,
2078
                        ordered_resources,
2079
                        skip_if_exists=True,
2080
                    )
2081
                    # return here as it is not expected single_user_sync
2082
                    # will need any of the remaining user_yaml operations
2083
                    # left in _update_authz_in_arborist
2084
                    return self._grant_arborist_policy(
1✔
2085
                        username, policy_hash, expires=expires
2086
                    )
2087
            else:
2088
                print("--------unique policies------")
1✔
2089
                print(unique_policies)
1✔
2090
                for roles, resources in unique_policies.items():
1✔
2091
                    for role in roles:
1✔
2092
                        for resource in resources:
1✔
2093
                            # grant a policy to this user which is a single
2094
                            # role on a single resource
2095

2096
                            # format project '/x/y/z' -> 'x.y.z'
2097
                            # so the policy id will be something like 'x.y.z-create'
2098
                            policy_id = _format_policy_id(resource, role)
1✔
2099
                            if policy_id not in self._created_policies:
1✔
2100
                                try:
1✔
2101
                                    self.arborist_client.update_policy(
1✔
2102
                                        policy_id,
2103
                                        {
2104
                                            "description": "policy created by fence sync",
2105
                                            "role_ids": [role],
2106
                                            "resource_paths": [resource],
2107
                                        },
2108
                                        create_if_not_exist=True,
2109
                                    )
2110
                                except ArboristError as e:
×
2111
                                    self.logger.info(
×
2112
                                        "not creating policy in arborist; {}".format(
2113
                                            str(e)
2114
                                        )
2115
                                    )
2116
                                self._created_policies.add(policy_id)
1✔
2117

2118
                            self._grant_arborist_policy(
1✔
2119
                                username, policy_id, expires=expires
2120
                            )
2121

2122
            if user_yaml:
1✔
2123
                for policy in user_yaml.policies.get(username, []):
1✔
2124
                    self.arborist_client.grant_user_policy(
1✔
2125
                        username,
2126
                        policy,
2127
                        expires_at=expires,
2128
                    )
2129

2130
        print("--------user yaml projects----------")
1✔
2131
        print(user_yaml.policies)
1✔
2132

2133
        if user_yaml:
1✔
2134
            for client_name, client_details in user_yaml.clients.items():
1✔
2135
                client_policies = client_details.get("policies", [])
×
2136
                clients = session.query(Client).filter_by(name=client_name).all()
×
2137
                # update existing clients, do not create new ones
2138
                if not clients:
×
2139
                    self.logger.warning(
×
2140
                        "client to update (`{}`) does not exist in fence: skipping".format(
2141
                            client_name
2142
                        )
2143
                    )
2144
                    continue
×
2145
                self.logger.debug(
×
2146
                    "updating client `{}` (found {} client IDs)".format(
2147
                        client_name, len(clients)
2148
                    )
2149
                )
2150
                # there may be more than 1 client with this name if credentials are being rotated,
2151
                # so we grant access to each client ID
2152
                for client in clients:
×
2153
                    try:
×
2154
                        self.arborist_client.update_client(
×
2155
                            client.client_id, client_policies
2156
                        )
2157
                    except ArboristError as e:
×
2158
                        self.logger.info(
×
2159
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2160
                                client_policies, client_name, client.client_id, str(e)
2161
                            )
2162
                        )
2163

2164
        return True
1✔
2165

2166
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2167
        """
2168
        Determine and return a dictionary of unique policies.
2169

2170
        Args (examples):
2171
            user_project_info (dict):
2172
            {
2173
                'phs000002.c1': { 'read-storage', 'read' },
2174
                'phs000001.c1': { 'read', 'read-storage' },
2175
                'phs000004.c1': { 'write', 'read' },
2176
                'phs000003.c1': { 'read', 'write' },
2177
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2178
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2179
            }
2180
            project_to_authz_mapping (dict):
2181
            {
2182
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2183
            }
2184

2185
        Return (for examples):
2186
            dict:
2187
            {
2188
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2189
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2190
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2191
            }
2192
        """
2193
        roles_to_resources = collections.defaultdict(list)
1✔
2194
        for study, roles in user_project_info.items():
1✔
2195
            ordered_roles = tuple(sorted(roles))
1✔
2196
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2197
            if study in project_to_authz_mapping:
1✔
2198
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2199
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2200

2201
        policies = {}
1✔
2202
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2203
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2204
        return policies
1✔
2205

2206
    def _create_arborist_role(self, role):
1✔
2207
        """
2208
        Wrapper around gen3authz's create_role with additional logging
2209

2210
        Args:
2211
            role (str): what the Arborist identity should be of the created role
2212

2213
        Return:
2214
            bool: True if the role was created successfully or it already
2215
                  exists. False otherwise
2216
        """
2217
        if role in self._created_roles:
1✔
2218
            return True
1✔
2219
        try:
1✔
2220
            response_json = self.arborist_client.create_role(
1✔
2221
                arborist_role_for_permission(role)
2222
            )
2223
        except ArboristError as e:
×
2224
            self.logger.error(
×
2225
                "could not create `{}` role in Arborist: {}".format(role, e)
2226
            )
2227
            return False
×
2228
        self._created_roles.add(role)
1✔
2229

2230
        if response_json is None:
1✔
2231
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2232
        else:
2233
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2234
        return True
1✔
2235

2236
    def _create_arborist_resources(self, resources):
1✔
2237
        """
2238
        Create resources in Arborist
2239

2240
        Args:
2241
            resources (list): a list of full Arborist resource paths to create
2242
            [
2243
                "/programs/DEV/projects/phs000001.c1",
2244
                "/programs/DEV/projects/phs000002.c1",
2245
                "/programs/DEV/projects/phs000003.c1"
2246
            ]
2247

2248
        Return:
2249
            bool: True if the resources were successfully created, False otherwise
2250

2251

2252
        As of 2/11/2022, for resources above,
2253
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2254
        [
2255
            { 'name': 'programs', 'subresources': [
2256
                { 'name': 'DEV', 'subresources': [
2257
                    { 'name': 'projects', 'subresources': [
2258
                        { 'name': 'phs000001.c1', 'subresources': []},
2259
                        { 'name': 'phs000002.c1', 'subresources': []},
2260
                        { 'name': 'phs000003.c1', 'subresources': []}
2261
                    ]}
2262
                ]}
2263
            ]}
2264
        ]
2265
        Because this list has a single object, only a single network request gets
2266
        sent to Arborist.
2267

2268
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2269
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2270
        [
2271
            {'name': 'phs000001.c1', 'subresources': []},
2272
            {'name': 'phs000002.c1', 'subresources': []},
2273
            {'name': 'phs000003.c1', 'subresources': []}
2274
        ]
2275
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2276

2277
        As a practical matter, for sync_single_user_visas, studies
2278
        should be nested under the `/programs` resource as in the former
2279
        example (i.e. only one network request gets made).
2280

2281
        TODO for the sake of simplicity, it would be nice if only one network
2282
        request was made no matter the input.
2283
        """
2284
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2285
            try:
1✔
2286
                response_json = self.arborist_client.update_resource(
1✔
2287
                    "/", request_body, merge=True
2288
                )
2289
            except ArboristError as e:
×
2290
                self.logger.error(
×
2291
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2292
                        request_body, e
2293
                    )
2294
                )
2295
                return False
×
2296

2297
        self.logger.debug(
1✔
2298
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2299
        )
2300
        return True
1✔
2301

2302
    def _create_arborist_policy(
1✔
2303
        self, policy_id, roles, resources, skip_if_exists=False
2304
    ):
2305
        """
2306
        Wrapper around gen3authz's create_policy with additional logging
2307

2308
        Args:
2309
            policy_id (str): what the Arborist identity should be of the created policy
2310
            roles (iterable): what roles the create policy should have
2311
            resources (iterable): what resources the created policy should have
2312
            skip_if_exists (bool): if True, this function will not treat an already
2313
                                   existent policy as an error
2314

2315
        Return:
2316
            bool: True if policy creation was successful. False otherwise
2317
        """
2318
        try:
1✔
2319
            response_json = self.arborist_client.create_policy(
1✔
2320
                {
2321
                    "id": policy_id,
2322
                    "role_ids": roles,
2323
                    "resource_paths": resources,
2324
                },
2325
                skip_if_exists=skip_if_exists,
2326
            )
2327
        except ArboristError as e:
×
2328
            self.logger.error(
×
2329
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2330
            )
2331
            return False
×
2332

2333
        if response_json is None:
1✔
2334
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2335
        else:
2336
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2337
        return True
1✔
2338

2339
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2340
        """
2341
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2342

2343
        Args:
2344
            ordered_roles (iterable): policy roles in sorted order
2345
            ordered_resources (iterable): policy resources in sorted order
2346

2347
        Return:
2348
            str: SHA256 hex digest
2349
        """
2350

2351
        def escape(s):
1✔
2352
            return s.replace(",", "\,")
1✔
2353

2354
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2355
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2356
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2357
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2358

2359
        return policy_hash
1✔
2360

2361
    def _compare_policies(self, existing_policies, incoming_policies):
1✔
2362
        """
2363
        Compares a user's existing polivies with incoming policies from either user_yaml or dbgap whitelist
2364

2365

2366
        Args:
2367
            existing_policies (_type_): user's existing policies pulled with arborist_client.auth_mapping(username)
2368
            incoming_policies (_type_): user's policies as dictated by authz source
2369

2370
        Return:
2371
            policies_to_add (dict): policies to be added to arborist
2372
            policies_to_remove (dict): policies to be removed from arborist
2373
        """
2374
        pass
×
2375

2376
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2377
        """
2378
        Wrapper around gen3authz's grant_user_policy with additional logging
2379

2380
        Args:
2381
            username (str): username of user in Arborist who policy should be
2382
                            granted to
2383
            policy_id (str): Arborist policy id
2384
            expires (int): POSIX timestamp for when policy should expire
2385

2386
        Return:
2387
            bool: True if granting of policy was successful, False otherwise
2388
        """
2389
        try:
1✔
2390
            response_json = self.arborist_client.grant_user_policy(
1✔
2391
                username,
2392
                policy_id,
2393
                expires_at=expires,
2394
            )
2395
        except ArboristError as e:
×
2396
            self.logger.error(
×
2397
                "could not grant policy `{}` to user `{}`: {}".format(
2398
                    policy_id, username, e
2399
                )
2400
            )
2401
            return False
×
2402

2403
        self.logger.debug(
1✔
2404
            "granted policy `{}` to user `{}`".format(policy_id, username)
2405
        )
2406
        return True
1✔
2407

2408
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2409
        """
2410
        Determine the arborist resource path and add it to
2411
        _self._dbgap_study_to_resources
2412

2413
        Args:
2414
            dbgap_study (str): study phs identifier
2415
            dbgap_config (dict): dictionary of config for dbgap server
2416

2417
        """
2418
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2419
            "_default", ["/"]
2420
        )
2421
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2422
            dbgap_study, default_namespaces
2423
        )
2424

2425
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2426

2427
        arborist_resource_namespaces = [
1✔
2428
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2429
        ]
2430

2431
        for resource_namespace in arborist_resource_namespaces:
1✔
2432
            full_resource_path = resource_namespace + dbgap_study
1✔
2433
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2434
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2435
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2436
        return arborist_resource_namespaces
1✔
2437

2438
    def _is_arborist_healthy(self):
1✔
2439
        if not self.arborist_client:
1✔
2440
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2441
            return False
×
2442
        if not self.arborist_client.healthy():
1✔
2443
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2444
            self.logger.error(
×
2445
                "arborist service is unavailable; skipping main arborist dbgap sync"
2446
            )
2447
            return False
×
2448
        return True
1✔
2449

2450
    def _pick_sync_type(self, visa):
1✔
2451
        """
2452
        Pick type of visa to parse according to the visa provider
2453
        """
2454
        sync_client = None
1✔
2455
        if visa.type in self.visa_types["ras"]:
1✔
2456
            sync_client = self.ras_sync_client
1✔
2457
        else:
2458
            raise Exception(
×
2459
                "Visa type {} not recognized. Configure in fence-config".format(
2460
                    visa.type
2461
                )
2462
            )
2463
        if not sync_client:
1✔
2464
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2465

2466
        return sync_client
1✔
2467

2468
    def sync_single_user_visas(
1✔
2469
        self, user, ga4gh_visas, sess=None, expires=None, skip_google_updates=False
2470
    ):
2471
        """
2472
        Sync a single user's visas during login or DRS/data access
2473

2474
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2475
                        BEFORE THIS.
2476

2477
        Args:
2478
            user (userdatamodel.user.User): Fence user whose visas'
2479
                                            authz info is being synced
2480
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2481
                                that are ALREADY VALIDATED
2482
            sess (sqlalchemy.orm.session.Session): database session
2483
            expires (int): time at which synced Arborist policies and
2484
                           inclusion in any GBAG are set to expire
2485
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
2486

2487
        Return:
2488
            list of successfully parsed visas
2489
        """
2490
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2491
        dbgap_config = self.dbGaP[0]
1✔
2492
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2493
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2494
            "enable_common_exchange_area_access", False
2495
        )
2496
        study_common_exchange_areas = dbgap_config.get(
1✔
2497
            "study_common_exchange_areas", {}
2498
        )
2499

2500
        try:
1✔
2501
            user_yaml = UserYAML.from_file(
1✔
2502
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2503
            )
2504
        except (EnvironmentError, AssertionError) as e:
×
2505
            self.logger.error(str(e))
×
2506
            self.logger.error("aborting early")
×
2507
            raise
×
2508

2509
        user_projects = dict()
1✔
2510
        projects = {}
1✔
2511
        info = {}
1✔
2512
        parsed_visas = []
1✔
2513

2514
        for visa in ga4gh_visas:
1✔
2515
            project = {}
1✔
2516
            visa_type = self._pick_sync_type(visa)
1✔
2517
            encoded_visa = visa.ga4gh_visa
1✔
2518

2519
            try:
1✔
2520
                project, info = visa_type._parse_single_visa(
1✔
2521
                    user,
2522
                    encoded_visa,
2523
                    visa.expires,
2524
                    parse_consent_code,
2525
                )
2526
            except Exception:
×
2527
                self.logger.warning(
×
2528
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2529
                )
2530
                continue
×
2531

2532
            projects = {**projects, **project}
1✔
2533
            parsed_visas.append(visa)
1✔
2534

2535
        info["user_id"] = user.id
1✔
2536
        info["username"] = user.username
1✔
2537
        user_projects[user.username] = projects
1✔
2538

2539
        user_projects = self.parse_projects(user_projects)
1✔
2540

2541
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2542
            self.logger.info(
1✔
2543
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2544
            )
2545

2546
        self._process_user_projects(
1✔
2547
            user_projects,
2548
            enable_common_exchange_area_access,
2549
            study_common_exchange_areas,
2550
            dbgap_config,
2551
            sess,
2552
        )
2553

2554
        if parse_consent_code:
1✔
2555
            self._grant_all_consents_to_c999_users(
1✔
2556
                user_projects, user_yaml.project_to_resource
2557
            )
2558

2559
        if user_projects:
1✔
2560
            self.sync_to_storage_backend(
1✔
2561
                user_projects,
2562
                info,
2563
                sess,
2564
                expires=expires,
2565
                skip_google_updates=skip_google_updates,
2566
            )
2567
        else:
2568
            self.logger.info("No users for syncing")
×
2569

2570
        # update arborist db (user access)
2571
        if self.arborist_client:
1✔
2572
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2573
            success = self._update_authz_in_arborist(
1✔
2574
                sess,
2575
                user_projects,
2576
                user_yaml=user_yaml,
2577
                single_user_sync=True,
2578
                expires=expires,
2579
            )
2580
            if success:
1✔
2581
                self.logger.info(
1✔
2582
                    "Finished synchronizing authorization info to arborist"
2583
                )
2584
            else:
2585
                self.logger.error(
×
2586
                    "Could not synchronize authorization info successfully to arborist"
2587
                )
2588
        else:
2589
            self.logger.error("No arborist client set; skipping arborist sync")
×
2590

2591
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc