• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 13143677675

04 Feb 2025 07:51PM UTC coverage: 75.273% (-0.005%) from 75.278%
13143677675

Pull #1223

github

web-flow
Merge branch 'master' into chore/update-passport-logging
Pull Request #1223: Add logging for expiration, discovery and caching. Remove some debug logs

7860 of 10442 relevant lines covered (75.27%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.02
fence/sync/sync_users.py
1
import backoff
1✔
2
import glob
1✔
3
import jwt
1✔
4
import os
1✔
5
import re
1✔
6
import subprocess as sp
1✔
7
import yaml
1✔
8
import copy
1✔
9
import datetime
1✔
10
import uuid
1✔
11
import collections
1✔
12
import hashlib
1✔
13

14
from contextlib import contextmanager
1✔
15
from collections import defaultdict
1✔
16
from csv import DictReader
1✔
17
from io import StringIO
1✔
18
from stat import S_ISDIR
1✔
19

20
import paramiko
1✔
21
from cdislogging import get_logger
1✔
22
from email_validator import validate_email, EmailNotValidError
1✔
23
from gen3authz.client.arborist.errors import ArboristError
1✔
24
from gen3users.validation import validate_user_yaml
1✔
25
from paramiko.proxy import ProxyCommand
1✔
26
from sqlalchemy.exc import IntegrityError
1✔
27
from sqlalchemy import func
1✔
28

29
from fence.config import config
1✔
30
from fence.models import (
1✔
31
    AccessPrivilege,
32
    AuthorizationProvider,
33
    Project,
34
    Tag,
35
    User,
36
    query_for_user,
37
    Client,
38
    IdentityProvider,
39
    get_project_to_authz_mapping,
40
)
41
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
42
from fence.resources.storage import StorageManager
1✔
43
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
44
from fence.resources.google.access_utils import GoogleUpdateException
1✔
45
from fence.sync import utils
1✔
46
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
47
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
48

49

50
def _format_policy_id(path, privilege):
1✔
51
    resource = ".".join(name for name in path.split("/") if name)
1✔
52
    return "{}-{}".format(resource, privilege)
1✔
53

54

55
def download_dir(sftp, remote_dir, local_dir):
1✔
56
    """
57
    Recursively download file from remote_dir to local_dir
58
    Args:
59
        remote_dir(str)
60
        local_dir(str)
61
    Returns: None
62
    """
63
    dir_items = sftp.listdir_attr(remote_dir)
×
64

65
    for item in dir_items:
×
66
        remote_path = remote_dir + "/" + item.filename
×
67
        local_path = os.path.join(local_dir, item.filename)
×
68
        if S_ISDIR(item.st_mode):
×
69
            download_dir(sftp, remote_path, local_path)
×
70
        else:
71
            sftp.get(remote_path, local_path)
×
72

73

74
def arborist_role_for_permission(permission):
1✔
75
    """
76
    For the programs/projects in the existing fence access control model, in order to
77
    use arborist for checking permissions we generate a policy for each combination of
78
    program/project and privilege. The roles involved all contain only one permission,
79
    for one privilege from the project access model.
80
    """
81
    return {
1✔
82
        "id": permission,
83
        "permissions": [
84
            {"id": permission, "action": {"service": "*", "method": permission}}
85
        ],
86
    }
87

88

89
@contextmanager
1✔
90
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
91
    """
92
    Context manager for reading and optionally decrypting file it only
93
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
94

95
    Args:
96
        filepath (str): path to the file
97
        encrypted (bool): whether the file is encrypted
98

99
    Returns:
100
        Generator[file-like class]: file like object for the file
101
    """
102
    if encrypted:
1✔
103
        has_crypt = sp.call(["which", "mcrypt"])
×
104
        if has_crypt != 0:
×
105
            if logger:
×
106
                logger.error("Need to install mcrypt to decrypt files from dbgap")
×
107
            # TODO (rudyardrichter, 2019-01-08): raise error and move exit out to script
108
            exit(1)
×
109
        p = sp.Popen(
×
110
            [
111
                "mcrypt",
112
                "-a",
113
                "enigma",
114
                "-o",
115
                "scrypt",
116
                "-m",
117
                "stream",
118
                "--bare",
119
                "--key",
120
                key,
121
                "--force",
122
            ],
123
            stdin=open(filepath, "r"),
124
            stdout=sp.PIPE,
125
            stderr=open(os.devnull, "w"),
126
            universal_newlines=True,
127
        )
128
        try:
×
129
            yield StringIO(p.communicate()[0])
×
130
        except UnicodeDecodeError:
×
131
            logger.error("Could not decode file. Check the decryption key.")
×
132
    else:
133
        f = open(filepath, "r")
1✔
134
        yield f
1✔
135
        f.close()
1✔
136

137

138
class UserYAML(object):
1✔
139
    """
140
    Representation of the information in a YAML file describing user, project, and ABAC
141
    information for access control.
142
    """
143

144
    def __init__(
1✔
145
        self,
146
        projects=None,
147
        user_info=None,
148
        policies=None,
149
        clients=None,
150
        authz=None,
151
        project_to_resource=None,
152
        logger=None,
153
        user_abac=None,
154
    ):
155
        self.projects = projects or {}
1✔
156
        self.user_info = user_info or {}
1✔
157
        self.user_abac = user_abac or {}
1✔
158
        self.policies = policies or {}
1✔
159
        self.clients = clients or {}
1✔
160
        self.authz = authz or {}
1✔
161
        self.project_to_resource = project_to_resource or {}
1✔
162
        self.logger = logger
1✔
163

164
    @classmethod
1✔
165
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
166
        """
167
        Add access by "auth_id" to "self.projects" to update the Fence DB.
168
        Add access by "resource" to "self.user_abac" to update Arborist.
169
        """
170
        data = {}
1✔
171
        if filepath:
1✔
172
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
173
                file_contents = f.read()
1✔
174
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
175
                data = yaml.safe_load(file_contents)
1✔
176
        else:
177
            if logger:
1✔
178
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
179

180
        projects = dict()
1✔
181
        user_info = dict()
1✔
182
        policies = dict()
1✔
183

184
        # resources should be the resource tree to construct in arborist
185
        user_abac = dict()
1✔
186

187
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
188
        if not data.get("authz") and data.get("rbac"):
1✔
189
            if logger:
×
190
                logger.info(
×
191
                    "No authz block found but rbac block present. Using rbac block"
192
                )
193
            data["authz"] = data["rbac"]
×
194

195
        # get user project mapping to arborist resources if it exists
196
        project_to_resource = data.get("authz", dict()).get(
1✔
197
            "user_project_to_resource", dict()
198
        )
199

200
        # read projects and privileges for each user
201
        users = data.get("users", {})
1✔
202
        for username, details in users.items():
1✔
203
            # users should occur only once each; skip if already processed
204
            if username in projects:
1✔
205
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
206
                    username
207
                )
208
                if logger:
×
209
                    logger.error(msg)
×
210
                raise EnvironmentError(msg)
×
211

212
            privileges = {}
1✔
213
            resource_permissions = dict()
1✔
214
            for project in details.get("projects", {}):
1✔
215
                try:
1✔
216
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
217
                except KeyError as e:
×
218
                    if logger:
×
219
                        logger.error("project {} missing field: {}".format(project, e))
×
220
                    continue
×
221

222
                # project may not have `resource` field.
223
                # prefer resource field;
224
                # if no resource or mapping, assume auth_id is resource.
225
                resource = project.get("resource", project["auth_id"])
1✔
226

227
                if project["auth_id"] not in project_to_resource:
1✔
228
                    project_to_resource[project["auth_id"]] = resource
1✔
229
                resource_permissions[resource] = set(project["privilege"])
1✔
230

231
            user_info[username] = {
1✔
232
                "email": details.get("email", ""),
233
                "display_name": details.get("display_name", ""),
234
                "phone_number": details.get("phone_number", ""),
235
                "tags": details.get("tags", {}),
236
                "admin": details.get("admin", False),
237
            }
238
            if not details.get("email"):
1✔
239
                try:
1✔
240
                    valid = validate_email(
1✔
241
                        username, allow_smtputf8=False, check_deliverability=False
242
                    )
243
                    user_info[username]["email"] = valid.email
1✔
244
                except EmailNotValidError:
1✔
245
                    pass
1✔
246
            projects[username] = privileges
1✔
247
            user_abac[username] = resource_permissions
1✔
248

249
            # list of policies we want to grant to this user, which get sent to arborist
250
            # to check if they're allowed to do certain things
251
            policies[username] = details.get("policies", [])
1✔
252

253
        if logger:
1✔
254
            logger.info(
1✔
255
                "Got user project to arborist resource mapping:\n{}".format(
256
                    str(project_to_resource)
257
                )
258
            )
259

260
        authz = data.get("authz", dict())
1✔
261
        if not authz:
1✔
262
            # older version: resources in root, no `authz` section or `rbac` section
263
            if logger:
1✔
264
                logger.warning(
1✔
265
                    "access control YAML file is using old format (missing `authz`/`rbac`"
266
                    " section in the root); assuming that if it exists `resources` will"
267
                    " be on the root level, and continuing"
268
                )
269
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
270
            # the code can pretend it's in the normal place that we expect
271
            resources = data.get("resources", [])
1✔
272
            # keep authz empty dict if resources is not specified
273
            if resources:
1✔
274
                authz["resources"] = data.get("resources", [])
×
275

276
        clients = data.get("clients", {})
1✔
277

278
        return cls(
1✔
279
            projects=projects,
280
            user_info=user_info,
281
            user_abac=user_abac,
282
            policies=policies,
283
            clients=clients,
284
            authz=authz,
285
            project_to_resource=project_to_resource,
286
            logger=logger,
287
        )
288

289
    def persist_project_to_resource(self, db_session):
1✔
290
        """
291
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
292

293
        The mapping comes from an external source, this function persists what was parsed
294
        into memory into the database for future use.
295
        """
296
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
297
            project = (
1✔
298
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
299
            )
300
            if project:
1✔
301
                project.authz = authz_resource
1✔
302
            else:
303
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
304
                db_session.add(project)
×
305
        db_session.commit()
1✔
306

307

308
class UserSyncer(object):
1✔
309
    def __init__(
1✔
310
        self,
311
        dbGaP,
312
        DB,
313
        project_mapping,
314
        storage_credentials=None,
315
        db_session=None,
316
        is_sync_from_dbgap_server=False,
317
        sync_from_local_csv_dir=None,
318
        sync_from_local_yaml_file=None,
319
        arborist=None,
320
        folder=None,
321
    ):
322
        """
323
        Syncs ACL files from dbGap to auth database and storage backends
324
        Args:
325
            dbGaP: a list of dict containing creds to access dbgap sftp
326
            DB: database connection string
327
            project_mapping: a dict containing how dbgap ids map to projects
328
            storage_credentials: a dict containing creds for storage backends
329
            sync_from_dir: path to an alternative dir to sync from instead of
330
                           dbGaP
331
            arborist:
332
                ArboristClient instance if the syncer should also create
333
                resources in arborist
334
            folder: a local folder where dbgap telemetry files will sync to
335
        """
336
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
337
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
338
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
339
        self.dbGaP = dbGaP
1✔
340
        self.session = db_session
1✔
341
        self.driver = get_SQLAlchemyDriver(DB)
1✔
342
        self.project_mapping = project_mapping or {}
1✔
343
        self._projects = dict()
1✔
344
        self._created_roles = set()
1✔
345
        self._created_policies = set()
1✔
346
        self._dbgap_study_to_resources = dict()
1✔
347
        self.logger = get_logger(
1✔
348
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
349
        )
350
        self.arborist_client = arborist
1✔
351
        self.folder = folder
1✔
352

353
        self.auth_source = defaultdict(set)
1✔
354
        # auth_source used for logging. username : [source1, source2]
355
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
356
        self.parent_to_child_studies_mapping = {}
1✔
357
        for dbgap_config in dbGaP:
1✔
358
            self.parent_to_child_studies_mapping.update(
1✔
359
                dbgap_config.get("parent_to_child_studies_mapping", {})
360
            )
361
        if storage_credentials:
1✔
362
            self.storage_manager = StorageManager(
1✔
363
                storage_credentials, logger=self.logger
364
            )
365
        self.id_patterns = []
1✔
366

367
    @staticmethod
1✔
368
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
369
        """
370
        Check if the filename matches dbgap access control file pattern
371

372
        Args:
373
            filepath (str): path to file
374
            encrypted (bool): whether the file is encrypted
375

376
        Returns:
377
            bool: whether the pattern matches
378
        """
379
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
380
        for pattern in id_patterns:
1✔
381
            if encrypted:
1✔
382
                pattern += r".enc"
×
383
            pattern += r"$"
1✔
384
            # when converting the YAML from fence-config,
385
            # python reads it as Python string literal. So "\" turns into "\\"
386
            # which messes with the regex match
387
            pattern.replace("\\\\", "\\")
1✔
388
            if re.match(pattern, os.path.basename(filepath)):
1✔
389
                return True
1✔
390
        return False
1✔
391

392
    def _get_from_sftp_with_proxy(self, server, path):
1✔
393
        """
394
        Download all data from sftp sever to a local dir
395

396
        Args:
397
            server (dict) : dictionary containing info to access sftp server
398
            path (str): path to local directory
399

400
        Returns:
401
            None
402
        """
403
        proxy = None
1✔
404
        if server.get("proxy", "") != "":
1✔
405
            command = "ssh -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
406
                user=server.get("proxy_user", ""),
407
                proxy=server.get("proxy", ""),
408
                host=server.get("host", ""),
409
                port=server.get("port", 22),
410
            )
411
            self.logger.info("SSH proxy command: {}".format(command))
×
412

413
            proxy = ProxyCommand(command)
×
414

415
        with paramiko.SSHClient() as client:
1✔
416
            client.set_log_channel(self.logger.name)
1✔
417

418
            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
1✔
419
            parameters = {
1✔
420
                "hostname": str(server.get("host", "")),
421
                "username": str(server.get("username", "")),
422
                "password": str(server.get("password", "")),
423
                "port": int(server.get("port", 22)),
424
            }
425
            if proxy:
1✔
426
                parameters["sock"] = proxy
×
427

428
            self.logger.info(
1✔
429
                "SSH connection hostname:post {}:{}".format(
430
                    parameters.get("hostname", "unknown"),
431
                    parameters.get("port", "unknown"),
432
                )
433
            )
434
            self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
435
            with client.open_sftp() as sftp:
×
436
                download_dir(sftp, "./", path)
1✔
437

438
        if proxy:
×
439
            proxy.close()
×
440

441
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
442
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
443
        ssh_client.connect(**parameters)
1✔
444

445
    def _get_from_ftp_with_proxy(self, server, path):
1✔
446
        """
447
        Download data from ftp sever to a local dir
448

449
        Args:
450
            server (dict): dictionary containing information for accessing server
451
            path(str): path to local files
452

453
        Returns:
454
            None
455
        """
456
        execstr = (
×
457
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
458
                server.get("username", ""),
459
                server.get("password", ""),
460
                server.get("host", ""),
461
                server.get("proxy", ""),
462
                path,
463
            )
464
        )
465
        os.system(execstr)
×
466

467
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
468
        return dbgap_config.get(
1✔
469
            "parse_consent_code", True
470
        )  # Should this really be true?
471

472
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
473
        """
474
        parse csv files to python dict
475

476
        Args:
477
            file_dict: a dictionary with key(file path) and value(privileges)
478
            sess: sqlalchemy session
479
            dbgap_config: a dictionary containing information about the dbGaP sftp server
480
                (comes from fence config)
481
            encrypted: boolean indicating whether those files are encrypted
482

483

484
        Return:
485
            Tuple[[dict, dict]]:
486
                (user_project, user_info) where user_project is a mapping from
487
                usernames to project permissions and user_info is a mapping
488
                from usernames to user details, such as email
489

490
        Example:
491

492
            (
493
                {
494
                    username: {
495
                        'project1': {'read-storage','write-storage'},
496
                        'project2': {'read-storage'},
497
                    }
498
                },
499
                {
500
                    username: {
501
                        'email': 'email@mail.com',
502
                        'display_name': 'display name',
503
                        'phone_number': '123-456-789',
504
                        'tags': {'dbgap_role': 'PI'}
505
                    }
506
                },
507
            )
508

509
        """
510
        user_projects = dict()
1✔
511
        user_info = defaultdict(dict)
1✔
512

513
        # parse dbGaP sftp server information
514
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
515

516
        self.id_patterns += (
1✔
517
            [
518
                item.replace("\\\\", "\\")
519
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
520
            ]
521
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
522
            else []
523
        )
524

525
        enable_common_exchange_area_access = dbgap_config.get(
1✔
526
            "enable_common_exchange_area_access", False
527
        )
528
        study_common_exchange_areas = dbgap_config.get(
1✔
529
            "study_common_exchange_areas", {}
530
        )
531
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
532

533
        if parse_consent_code and enable_common_exchange_area_access:
1✔
534
            self.logger.info(
1✔
535
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
536
            )
537

538
        project_id_patterns = [r"phs(\d{6})"]
1✔
539
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
540
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
541
            patterns = [
1✔
542
                pattern.replace("\\\\", "\\") for pattern in patterns
543
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
544
            project_id_patterns += patterns
1✔
545

546
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
547
        for filepath, privileges in file_dict.items():
1✔
548
            self.logger.info("Reading file {}".format(filepath))
1✔
549
            if os.stat(filepath).st_size == 0:
1✔
550
                self.logger.warning("Empty file {}".format(filepath))
×
551
                continue
×
552
            if not self._match_pattern(
1✔
553
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
554
            ):
555
                self.logger.warning(
1✔
556
                    "Filename {} does not match dbgap access control filename pattern;"
557
                    " this could mean that the filename has an invalid format, or has"
558
                    " an unexpected .enc extension, or lacks the .enc extension where"
559
                    " expected. This file is NOT being processed by usersync!".format(
560
                        filepath
561
                    )
562
                )
563
                continue
1✔
564

565
            with _read_file(
1✔
566
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
567
            ) as f:
568
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
569
                for row in csv:
1✔
570
                    username = row.get("login") or ""
1✔
571
                    if username == "":
1✔
572
                        continue
×
573

574
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
575
                        phsid = (
1✔
576
                            row.get("phsid") or (row.get("project_id") or "")
577
                        ).split(".")
578
                    else:
579
                        phsid = (row.get("phsid") or "").split(".")
1✔
580

581
                    dbgap_project = phsid[0]
1✔
582
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
583
                    skip = False
1✔
584
                    for pattern in project_id_patterns:
1✔
585
                        self.logger.debug(
1✔
586
                            "Checking pattern:{} with project_id:{}".format(
587
                                pattern, dbgap_project
588
                            )
589
                        )
590
                        if re.match(pattern, dbgap_project):
1✔
591
                            skip = False
1✔
592
                            break
1✔
593
                        else:
594
                            skip = True
1✔
595
                    if skip:
1✔
596
                        self.logger.warning(
1✔
597
                            "Skip processing from file {}, user {} with project {}".format(
598
                                filepath,
599
                                username,
600
                                dbgap_project,
601
                            )
602
                        )
603
                        continue
1✔
604
                    if len(phsid) > 1 and parse_consent_code:
1✔
605
                        consent_code = phsid[-1]
1✔
606

607
                        # c999 indicates full access to all consents and access
608
                        # to a study-specific exchange area
609
                        # access to at least one study-specific exchange area implies access
610
                        # to the parent study's common exchange area
611
                        #
612
                        # NOTE: Handling giving access to all consents is done at
613
                        #       a later time, when we have full information about possible
614
                        #       consents
615
                        self.logger.debug(
1✔
616
                            f"got consent code {consent_code} from dbGaP project "
617
                            f"{dbgap_project}"
618
                        )
619
                        if (
1✔
620
                            consent_code == "c999"
621
                            and enable_common_exchange_area_access
622
                            and dbgap_project in study_common_exchange_areas
623
                        ):
624
                            self.logger.info(
1✔
625
                                "found study with consent c999 and Fence "
626
                                "is configured to parse exchange area data. Giving user "
627
                                f"{username} {privileges} privileges in project: "
628
                                f"{study_common_exchange_areas[dbgap_project]}."
629
                            )
630
                            self._add_dbgap_project_for_user(
1✔
631
                                study_common_exchange_areas[dbgap_project],
632
                                privileges,
633
                                username,
634
                                sess,
635
                                user_projects,
636
                                dbgap_config,
637
                            )
638

639
                        dbgap_project += "." + consent_code
1✔
640

641
                    self._add_children_for_dbgap_project(
1✔
642
                        dbgap_project,
643
                        privileges,
644
                        username,
645
                        sess,
646
                        user_projects,
647
                        dbgap_config,
648
                    )
649

650
                    display_name = row.get("user name") or ""
1✔
651
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
652

653
                    # some dbgap telemetry files have information about a researchers PI
654
                    if "downloader for" in row:
1✔
655
                        tags["pi"] = row["downloader for"]
1✔
656

657
                    # prefer name over previous "downloader for" if it exists
658
                    if "downloader for names" in row:
1✔
659
                        tags["pi"] = row["downloader for names"]
×
660

661
                    user_info[username] = {
1✔
662
                        "email": row.get("email")
663
                        or user_info[username].get("email")
664
                        or "",
665
                        "display_name": display_name,
666
                        "phone_number": row.get("phone")
667
                        or user_info[username].get("phone_number")
668
                        or "",
669
                        "tags": tags,
670
                    }
671

672
                    self._process_dbgap_project(
1✔
673
                        dbgap_project,
674
                        privileges,
675
                        username,
676
                        sess,
677
                        user_projects,
678
                        dbgap_config,
679
                    )
680

681
        return user_projects, user_info
1✔
682

683
    def _get_children(self, dbgap_project):
1✔
684
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
685

686
    def _add_children_for_dbgap_project(
1✔
687
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
688
    ):
689
        """
690
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
691
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
692
        child studies.
693
        """
694
        parent_phsid = dbgap_project
1✔
695
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
696
        child_suffix = ""
1✔
697
        if parse_consent_code and re.match(
1✔
698
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
699
        ):
700
            parent_phsid_parts = dbgap_project.split(".")
1✔
701
            parent_phsid = parent_phsid_parts[0]
1✔
702
            child_suffix = "." + parent_phsid_parts[1]
1✔
703

704
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
705
            return
1✔
706

707
        self.logger.info(
1✔
708
            f"found parent study {parent_phsid} and Fence "
709
            "is configured to provide additional access to child studies. Giving user "
710
            f"{username} {privileges} privileges in projects: "
711
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
712
        )
713
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
714
        for child_study in child_studies:
1✔
715
            self._add_dbgap_project_for_user(
1✔
716
                child_study + child_suffix,
717
                privileges,
718
                username,
719
                sess,
720
                user_projects,
721
                dbgap_config,
722
            )
723

724
    def _add_dbgap_project_for_user(
1✔
725
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
726
    ):
727
        """
728
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
729
        and then updates the dictionary containing all user's project access
730
        """
731
        if dbgap_project not in self._projects:
1✔
732
            self.logger.debug(
1✔
733
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
734
            )
735

736
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
737

738
            # need to add dbgap project to arborist
739
            if self.arborist_client:
1✔
740
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
741

742
            if project.name is None:
1✔
743
                project.name = dbgap_project
1✔
744
            self._projects[dbgap_project] = project
1✔
745
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
746
        if username in user_projects:
1✔
747
            user_projects[username].update(phsid_privileges)
1✔
748
        else:
749
            user_projects[username] = phsid_privileges
1✔
750

751
    @staticmethod
1✔
752
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
753
        """
754
        Merge user_info1 into user_info2. Values in user_info2 are overriden
755
        by values in user_info1. user_info2 ends up containing the merged dict.
756

757
        Args:
758
            user_info1 (dict): nested dict
759
            user_info2 (dict): nested dict
760

761
            Example:
762
            {username: {'email': 'abc@email.com'}}
763

764
        Returns:
765
            None
766
        """
767
        user_info2.update(user_info1)
1✔
768

769
    def sync_two_phsids_dict(
1✔
770
        self,
771
        phsids1,
772
        phsids2,
773
        source1=None,
774
        source2=None,
775
        phsids2_overrides_phsids1=True,
776
    ):
777
        """
778
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
779
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
780
        the merged dict (see explanation below).
781
        `source1` and `source2`: for logging.
782

783
        Args:
784
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
785

786
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
787

788
            Example:
789
            {
790
                username: {
791
                    phsid1: {'read-storage','write-storage'},
792
                    phsid2: {'read-storage'},
793
                }
794
            }
795

796
        Return:
797
            None
798

799
        Explanation:
800
            Consider merging projects of the same user:
801

802
                {user1: {phsid1: privillege1}}
803

804
                {user1: {phsid2: privillege2}}
805

806
            case 1: phsid1 != phsid2. Output:
807

808
                {user1: {phsid1: privillege1, phsid2: privillege2}}
809

810
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
811

812
                {user1: {phsid1: union(privillege1, privillege2)}}
813

814
            For the other cases, just simple addition
815
        """
816

817
        for user, projects1 in phsids1.items():
1✔
818
            if not phsids2.get(user):
1✔
819
                if source1:
1✔
820
                    self.auth_source[user].add(source1)
1✔
821
                phsids2[user] = projects1
1✔
822
            elif phsids2_overrides_phsids1:
1✔
823
                if source1:
1✔
824
                    self.auth_source[user].add(source1)
×
825
                if source2:
1✔
826
                    self.auth_source[user].add(source2)
×
827
                for phsid1, privilege1 in projects1.items():
1✔
828
                    if phsid1 not in phsids2[user]:
1✔
829
                        phsids2[user][phsid1] = set()
1✔
830
                    phsids2[user][phsid1].update(privilege1)
1✔
831
            elif source2:
×
832
                self.auth_source[user].add(source2)
×
833

834
    def sync_to_db_and_storage_backend(
1✔
835
        self,
836
        user_project,
837
        user_info,
838
        sess,
839
        do_not_revoke_from_db_and_storage=False,
840
        expires=None,
841
    ):
842
        """
843
        sync user access control to database and storage backend
844

845
        Args:
846
            user_project (dict): a dictionary of
847

848
                {
849
                    username: {
850
                        'project1': {'read-storage','write-storage'},
851
                        'project2': {'read-storage'}
852
                    }
853
                }
854

855
            user_info (dict): a dictionary of {username: user_info{}}
856
            sess: a sqlalchemy session
857

858
        Return:
859
            None
860
        """
861
        google_bulk_mapping = None
1✔
862
        if config["GOOGLE_BULK_UPDATES"]:
1✔
863
            google_bulk_mapping = {}
1✔
864

865
        self._init_projects(user_project, sess)
1✔
866

867
        auth_provider_list = [
1✔
868
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
869
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
870
        ]
871

872
        cur_db_user_project_list = {
1✔
873
            (ua.user.username.lower(), ua.project.auth_id)
874
            for ua in sess.query(AccessPrivilege).all()
875
        }
876

877
        # we need to compare db -> whitelist case-insensitively for username.
878
        # db stores case-sensitively, but we need to query case-insensitively
879
        user_project_lowercase = {}
1✔
880
        syncing_user_project_list = set()
1✔
881
        for username, projects in user_project.items():
1✔
882
            user_project_lowercase[username.lower()] = projects
1✔
883
            for project, _ in projects.items():
1✔
884
                syncing_user_project_list.add((username.lower(), project))
1✔
885

886
        user_info_lowercase = {
1✔
887
            username.lower(): info for username, info in user_info.items()
888
        }
889

890
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
891
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
892
        to_update = set.intersection(
1✔
893
            cur_db_user_project_list, syncing_user_project_list
894
        )
895

896
        # when updating users we want to maintain case sesitivity in the username so
897
        # pass the original, non-lowered user_info dict
898
        self._upsert_userinfo(sess, user_info)
1✔
899

900
        if not do_not_revoke_from_db_and_storage:
1✔
901
            self._revoke_from_storage(
1✔
902
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
903
            )
904
            self._revoke_from_db(sess, to_delete)
1✔
905

906
        self._grant_from_storage(
1✔
907
            to_add,
908
            user_project_lowercase,
909
            sess,
910
            google_bulk_mapping=google_bulk_mapping,
911
            expires=expires,
912
        )
913

914
        self._grant_from_db(
1✔
915
            sess,
916
            to_add,
917
            user_info_lowercase,
918
            user_project_lowercase,
919
            auth_provider_list,
920
        )
921

922
        # re-grant
923
        self._grant_from_storage(
1✔
924
            to_update,
925
            user_project_lowercase,
926
            sess,
927
            google_bulk_mapping=google_bulk_mapping,
928
            expires=expires,
929
        )
930
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
931

932
        if not do_not_revoke_from_db_and_storage:
1✔
933
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
934

935
        sess.commit()
1✔
936

937
        if config["GOOGLE_BULK_UPDATES"]:
1✔
938
            self.logger.info("Doing bulk Google update...")
1✔
939
            update_google_groups_for_users(google_bulk_mapping)
1✔
940
            self.logger.info("Bulk Google update done!")
×
941

942
        sess.commit()
1✔
943

944
    def sync_to_storage_backend(
1✔
945
        self, user_project, user_info, sess, expires, skip_google_updates=False
946
    ):
947
        """
948
        sync user access control to storage backend with given expiration
949

950
        Args:
951
            user_project (dict): a dictionary of
952

953
                {
954
                    username: {
955
                        'project1': {'read-storage','write-storage'},
956
                        'project2': {'read-storage'}
957
                    }
958
                }
959

960
            user_info (dict): a dictionary of attributes for a user.
961
            sess: a sqlalchemy session
962
            expires (int): time at which synced Arborist policies and
963
                   inclusion in any GBAG are set to expire
964
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
965
        Return:
966
            None
967
        """
968
        if not expires:
1✔
969
            raise Exception(
×
970
                f"sync to storage backend requires an expiration. you provided: {expires}"
971
            )
972

973
        google_group_user_mapping = None
1✔
974
        if config["GOOGLE_BULK_UPDATES"]:
1✔
975
            google_group_user_mapping = {}
×
976
            get_or_create_proxy_group_id(
×
977
                expires=expires,
978
                user_id=user_info["user_id"],
979
                username=user_info["username"],
980
                session=sess,
981
                storage_manager=self.storage_manager,
982
            )
983

984
        # TODO: eventually it'd be nice to remove this step but it's required
985
        #       so that grant_from_storage can determine what storage backends
986
        #       are needed for a project.
987
        self._init_projects(user_project, sess)
1✔
988

989
        # we need to compare db -> whitelist case-insensitively for username.
990
        # db stores case-sensitively, but we need to query case-insensitively
991
        user_project_lowercase = {}
1✔
992
        syncing_user_project_list = set()
1✔
993
        for username, projects in user_project.items():
1✔
994
            user_project_lowercase[username.lower()] = projects
1✔
995
            for project, _ in projects.items():
1✔
996
                syncing_user_project_list.add((username.lower(), project))
1✔
997

998
        to_add = set(syncing_user_project_list)
1✔
999

1000
        # when updating users we want to maintain case sensitivity in the username so
1001
        # pass the original, non-lowered user_info dict
1002
        self._upsert_userinfo(sess, {user_info["username"].lower(): user_info})
1✔
1003
        if not skip_google_updates:
1✔
1004
            self._grant_from_storage(
1✔
1005
                to_add,
1006
                user_project_lowercase,
1007
                sess,
1008
                google_bulk_mapping=google_group_user_mapping,
1009
                expires=expires,
1010
            )
1011

1012
            if config["GOOGLE_BULK_UPDATES"]:
1✔
1013
                self.logger.info("Updating user's google groups ...")
×
1014
                update_google_groups_for_users(google_group_user_mapping)
×
1015
                self.logger.info("Google groups update done!!")
×
1016

1017
        sess.commit()
1✔
1018

1019
    def _revoke_from_db(self, sess, to_delete):
1✔
1020
        """
1021
        Revoke user access to projects in the auth database
1022

1023
        Args:
1024
            sess: sqlalchemy session
1025
            to_delete: a set of (username, project.auth_id) to be revoked from db
1026
        Return:
1027
            None
1028
        """
1029
        for username, project_auth_id in to_delete:
1✔
1030
            q = (
1✔
1031
                sess.query(AccessPrivilege)
1032
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1033
                .join(AccessPrivilege.user)
1034
                .filter(func.lower(User.username) == username)
1035
                .all()
1036
            )
1037
            for access in q:
1✔
1038
                self.logger.info(
1✔
1039
                    "revoke {} access to {} in db".format(username, project_auth_id)
1040
                )
1041
                sess.delete(access)
1✔
1042

1043
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1044
        """
1045
        Make sure there is no admin user that is not in yaml/csv files
1046

1047
        Args:
1048
            sess: sqlalchemy session
1049
            user_info: a dict of
1050
            {
1051
                username: {
1052
                    'email': email,
1053
                    'display_name': display_name,
1054
                    'phone_number': phonenum,
1055
                    'tags': {'k1':'v1', 'k2': 'v2'}
1056
                    'admin': is_admin
1057
                }
1058
            }
1059
        Returns:
1060
            None
1061
        """
1062
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1063
            if admin_user.username.lower() not in user_info:
1✔
1064
                admin_user.is_admin = False
×
1065
                sess.add(admin_user)
×
1066
                self.logger.info(
×
1067
                    "remove admin access from {} in db".format(
1068
                        admin_user.username.lower()
1069
                    )
1070
                )
1071

1072
    def _update_from_db(self, sess, to_update, user_project):
1✔
1073
        """
1074
        Update user access to projects in the auth database
1075

1076
        Args:
1077
            sess: sqlalchemy session
1078
            to_update:
1079
                a set of (username, project.auth_id) to be updated from db
1080

1081
        Return:
1082
            None
1083
        """
1084

1085
        for username, project_auth_id in to_update:
1✔
1086
            q = (
1✔
1087
                sess.query(AccessPrivilege)
1088
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1089
                .join(AccessPrivilege.user)
1090
                .filter(func.lower(User.username) == username)
1091
                .all()
1092
            )
1093
            for access in q:
1✔
1094
                access.privilege = user_project[username][project_auth_id]
1✔
1095
                self.logger.info(
1✔
1096
                    "update {} with {} access to {} in db".format(
1097
                        username, access.privilege, project_auth_id
1098
                    )
1099
                )
1100

1101
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1102
        """
1103
        Grant user access to projects in the auth database
1104
        Args:
1105
            sess: sqlalchemy session
1106
            to_add: a set of (username, project.auth_id) to be granted
1107
            user_project:
1108
                a dictionary of {username: {project: {'read','write'}}
1109
        Return:
1110
            None
1111
        """
1112
        for username, project_auth_id in to_add:
1✔
1113
            u = query_for_user(session=sess, username=username)
1✔
1114

1115
            auth_provider = auth_provider_list[0]
1✔
1116
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1117
                auth_provider = auth_provider_list[1]
1✔
1118
            user_access = AccessPrivilege(
1✔
1119
                user=u,
1120
                project=self._projects[project_auth_id],
1121
                privilege=list(user_project[username][project_auth_id]),
1122
                auth_provider=auth_provider,
1123
            )
1124
            self.logger.info(
1✔
1125
                "grant user {} to {} with access {}".format(
1126
                    username, user_access.project, user_access.privilege
1127
                )
1128
            )
1129
            sess.add(user_access)
1✔
1130

1131
    def _upsert_userinfo(self, sess, user_info):
1✔
1132
        """
1133
        update user info to database.
1134

1135
        Args:
1136
            sess: sqlalchemy session
1137
            user_info:
1138
                a dict of {username: {display_name, phone_number, tags, admin}
1139

1140
        Return:
1141
            None
1142
        """
1143

1144
        for username in user_info:
1✔
1145
            u = query_for_user(session=sess, username=username)
1✔
1146

1147
            if u is None:
1✔
1148
                self.logger.info("create user {}".format(username))
1✔
1149
                u = User(username=username)
1✔
1150
                sess.add(u)
1✔
1151

1152
            if self.arborist_client:
1✔
1153
                self.arborist_client.create_user({"name": username})
1✔
1154

1155
            u.email = user_info[username].get("email", "")
1✔
1156
            u.display_name = user_info[username].get("display_name", "")
1✔
1157
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1158
            u.is_admin = user_info[username].get("admin", False)
1✔
1159

1160
            idp_name = user_info[username].get("idp_name", "")
1✔
1161
            if idp_name and not u.identity_provider:
1✔
1162
                idp = (
×
1163
                    sess.query(IdentityProvider)
1164
                    .filter(IdentityProvider.name == idp_name)
1165
                    .first()
1166
                )
1167
                if not idp:
×
1168
                    idp = IdentityProvider(name=idp_name)
×
1169
                u.identity_provider = idp
×
1170

1171
            # do not update if there is no tag
1172
            if not user_info[username].get("tags"):
1✔
1173
                continue
1✔
1174

1175
            # remove user db tags if they are not shown in new tags
1176
            for tag in u.tags:
1✔
1177
                if tag.key not in user_info[username]["tags"]:
1✔
1178
                    u.tags.remove(tag)
1✔
1179

1180
            # sync
1181
            for k, v in user_info[username]["tags"].items():
1✔
1182
                found = False
1✔
1183
                for tag in u.tags:
1✔
1184
                    if tag.key == k:
1✔
1185
                        found = True
1✔
1186
                        tag.value = v
1✔
1187
                # create new tag if not found
1188
                if not found:
1✔
1189
                    tag = Tag(key=k, value=v)
1✔
1190
                    u.tags.append(tag)
1✔
1191

1192
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1193
        """
1194
        If a project have storage backend, revoke user's access to buckets in
1195
        the storage backend.
1196

1197
        Args:
1198
            to_delete: a set of (username, project.auth_id) to be revoked
1199

1200
        Return:
1201
            None
1202
        """
1203
        for username, project_auth_id in to_delete:
1✔
1204
            project = (
1✔
1205
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1206
            )
1207
            for sa in project.storage_access:
1✔
1208
                if not hasattr(self, "storage_manager"):
1✔
1209
                    self.logger.error(
×
1210
                        (
1211
                            "CANNOT revoke {} access to {} in {} because there is NO "
1212
                            "configured storage accesses at all. See configuration. "
1213
                            "Continuing anyway..."
1214
                        ).format(username, project_auth_id, sa.provider.name)
1215
                    )
1216
                    continue
×
1217

1218
                self.logger.info(
1✔
1219
                    "revoke {} access to {} in {}".format(
1220
                        username, project_auth_id, sa.provider.name
1221
                    )
1222
                )
1223
                self.storage_manager.revoke_access(
1✔
1224
                    provider=sa.provider.name,
1225
                    username=username,
1226
                    project=project,
1227
                    session=sess,
1228
                    google_bulk_mapping=google_bulk_mapping,
1229
                )
1230

1231
    def _grant_from_storage(
1✔
1232
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1233
    ):
1234
        """
1235
        If a project have storage backend, grant user's access to buckets in
1236
        the storage backend.
1237

1238
        Args:
1239
            to_add: a set of (username, project.auth_id)  to be granted
1240
            user_project: a dictionary like:
1241

1242
                    {username: {phsid: {'read-storage','write-storage'}}}
1243

1244
        Return:
1245
            dict of the users' storage usernames to their user_projects and the respective storage access.
1246
        """
1247
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1248
        for username, project_auth_id in to_add:
1✔
1249
            project = self._projects[project_auth_id]
1✔
1250
            for sa in project.storage_access:
1✔
1251
                access = list(user_project[username][project_auth_id])
1✔
1252
                if not hasattr(self, "storage_manager"):
1✔
1253
                    self.logger.error(
×
1254
                        (
1255
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1256
                            "configured storage accesses at all. See configuration. "
1257
                            "Continuing anyway..."
1258
                        ).format(username, access, project_auth_id, sa.provider.name)
1259
                    )
1260
                    continue
×
1261

1262
                self.logger.info(
1✔
1263
                    "grant {} access {} to {} in {}".format(
1264
                        username, access, project_auth_id, sa.provider.name
1265
                    )
1266
                )
1267
                storage_username = self.storage_manager.grant_access(
1✔
1268
                    provider=sa.provider.name,
1269
                    username=username,
1270
                    project=project,
1271
                    access=access,
1272
                    session=sess,
1273
                    google_bulk_mapping=google_bulk_mapping,
1274
                    expires=expires,
1275
                )
1276

1277
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1278
        return storage_user_to_sa_and_user_project
1✔
1279

1280
    def _init_projects(self, user_project, sess):
1✔
1281
        """
1282
        initialize projects
1283
        """
1284
        if self.project_mapping:
1✔
1285
            for projects in list(self.project_mapping.values()):
1✔
1286
                for p in projects:
1✔
1287
                    self.logger.debug(
1✔
1288
                        "creating Project with info from project_mapping: {}".format(p)
1289
                    )
1290
                    project = self._get_or_create(sess, Project, **p)
1✔
1291
                    self._projects[p["auth_id"]] = project
1✔
1292
        for _, projects in user_project.items():
1✔
1293
            for auth_id in list(projects.keys()):
1✔
1294
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1295
                if not project:
1✔
1296
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1297
                    try:
1✔
1298
                        project = self._get_or_create(sess, Project, **data)
1✔
1299
                    except IntegrityError as e:
×
1300
                        sess.rollback()
×
1301
                        self.logger.error(
×
1302
                            f"Project {auth_id} already exists. Detail {str(e)}"
1303
                        )
1304
                        raise Exception(
×
1305
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1306
                                auth_id, str(e)
1307
                            )
1308
                        )
1309
                if auth_id not in self._projects:
1✔
1310
                    self._projects[auth_id] = project
1✔
1311

1312
    @staticmethod
1✔
1313
    def _get_or_create(sess, model, **kwargs):
1✔
1314
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1315
        if not instance:
1✔
1316
            instance = model(**kwargs)
1✔
1317
            sess.add(instance)
1✔
1318
        return instance
1✔
1319

1320
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1321
        """
1322
        Args:
1323
            dbgap_config : a dictionary containing information about a single
1324
                           dbgap sftp server (from fence config)
1325
            sess: database session
1326

1327
        Return:
1328
            user_projects (dict)
1329
            user_info (dict)
1330
        """
1331
        dbgap_file_list = []
1✔
1332
        hostname = dbgap_config["info"]["host"]
1✔
1333
        username = dbgap_config["info"]["username"]
1✔
1334
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1335
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1336

1337
        try:
1✔
1338
            if os.path.exists(folderdir):
1✔
1339
                dbgap_file_list = glob.glob(
×
1340
                    os.path.join(folderdir, "*")
1341
                )  # get lists of file from folder
1342
            else:
1343
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1344
                dbgap_file_list = self._download(dbgap_config)
1✔
1345
        except Exception as e:
1✔
1346
            self.logger.error(e)
1✔
1347
            exit(1)
1✔
1348
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1349
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1350
            dbgap_file_list,
1351
            encrypted=encrypted,
1352
            session=sess,
1353
            dbgap_config=dbgap_config,
1354
        )
1355

1356
        user_projects = self.parse_projects(user_projects)
×
1357
        return user_projects, user_info
×
1358

1359
    def _get_user_permissions_from_csv_list(
1✔
1360
        self, file_list, encrypted, session, dbgap_config={}
1361
    ):
1362
        """
1363
        Args:
1364
            file_list: list of files (represented as strings)
1365
            encrypted: boolean indicating whether those files are encrypted
1366
            session: sqlalchemy session
1367
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1368
                    (comes from fence config)
1369

1370
        Return:
1371
            user_projects (dict)
1372
            user_info (dict)
1373
        """
1374
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1375
        user_projects, user_info = self._parse_csv(
1✔
1376
            dict(list(zip(file_list, permissions))),
1377
            sess=session,
1378
            dbgap_config=dbgap_config,
1379
            encrypted=encrypted,
1380
        )
1381
        return user_projects, user_info
1✔
1382

1383
    def _merge_multiple_local_csv_files(
1✔
1384
        self, dbgap_file_list, encrypted, dbgap_configs, session
1385
    ):
1386
        """
1387
        Args:
1388
            dbgap_file_list (list): a list of whitelist file locations stored locally
1389
            encrypted (bool): whether the file is encrypted (comes from fence config)
1390
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1391
            session (sqlalchemy.Session): database session
1392

1393
        Return:
1394
            merged_user_projects (dict)
1395
            merged_user_info (dict)
1396
        """
1397
        merged_user_projects = {}
1✔
1398
        merged_user_info = {}
1✔
1399

1400
        for dbgap_config in dbgap_configs:
1✔
1401
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1402
                dbgap_file_list,
1403
                encrypted,
1404
                session=session,
1405
                dbgap_config=dbgap_config,
1406
            )
1407
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1408
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1409
        return merged_user_projects, merged_user_info
1✔
1410

1411
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1412
        """
1413
        Args:
1414
            dbgap_servers : a list of dictionaries each containging config on
1415
                           dbgap sftp server (comes from fence config)
1416
            sess: database session
1417

1418
        Return:
1419
            merged_user_projects (dict)
1420
            merged_user_info (dict)
1421
        """
1422
        merged_user_projects = {}
1✔
1423
        merged_user_info = {}
1✔
1424
        for dbgap in dbgap_servers:
1✔
1425
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1426
            # merge into merged_user_info
1427
            # user_info overrides original info in merged_user_info
1428
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1429

1430
            # merge all access info dicts into "merged_user_projects".
1431
            # the access info is combined - if the user_projects access is
1432
            # ["read"] and the merged_user_projects is ["read-storage"], the
1433
            # resulting access is ["read", "read-storage"].
1434
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1435
        return merged_user_projects, merged_user_info
1✔
1436

1437
    def parse_projects(self, user_projects):
1✔
1438
        """
1439
        helper function for parsing projects
1440
        """
1441
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1442

1443
    def _process_dbgap_project(
1✔
1444
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1445
    ):
1446
        if dbgap_project not in self.project_mapping:
1✔
1447
            self._add_dbgap_project_for_user(
1✔
1448
                dbgap_project,
1449
                privileges,
1450
                username,
1451
                sess,
1452
                user_projects,
1453
                dbgap_config,
1454
            )
1455

1456
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1457
            try:
1✔
1458
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1459

1460
                # need to add dbgap project to arborist
1461
                if self.arborist_client:
1✔
1462
                    self._determine_arborist_resource(
1✔
1463
                        element_dict["auth_id"], dbgap_config
1464
                    )
1465

1466
                if username not in user_projects:
1✔
1467
                    user_projects[username] = {}
1✔
1468
                user_projects[username].update(phsid_privileges)
1✔
1469

1470
            except ValueError as e:
×
1471
                self.logger.info(e)
×
1472

1473
    def _process_user_projects(
1✔
1474
        self,
1475
        user_projects,
1476
        enable_common_exchange_area_access,
1477
        study_common_exchange_areas,
1478
        dbgap_config,
1479
        sess,
1480
    ):
1481
        user_projects_to_modify = copy.deepcopy(user_projects)
1✔
1482
        for username in user_projects.keys():
1✔
1483
            for project in user_projects[username].keys():
1✔
1484
                phsid = project.split(".")
1✔
1485
                dbgap_project = phsid[0]
1✔
1486
                privileges = user_projects[username][project]
1✔
1487
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1488
                    consent_code = phsid[-1]
1✔
1489

1490
                    # c999 indicates full access to all consents and access
1491
                    # to a study-specific exchange area
1492
                    # access to at least one study-specific exchange area implies access
1493
                    # to the parent study's common exchange area
1494
                    #
1495
                    # NOTE: Handling giving access to all consents is done at
1496
                    #       a later time, when we have full information about possible
1497
                    #       consents
1498
                    self.logger.debug(
1✔
1499
                        f"got consent code {consent_code} from dbGaP project "
1500
                        f"{dbgap_project}"
1501
                    )
1502
                    if (
1✔
1503
                        consent_code == "c999"
1504
                        and enable_common_exchange_area_access
1505
                        and dbgap_project in study_common_exchange_areas
1506
                    ):
1507
                        self.logger.info(
1✔
1508
                            "found study with consent c999 and Fence "
1509
                            "is configured to parse exchange area data. Giving user "
1510
                            f"{username} {privileges} privileges in project: "
1511
                            f"{study_common_exchange_areas[dbgap_project]}."
1512
                        )
1513
                        self._add_dbgap_project_for_user(
1✔
1514
                            study_common_exchange_areas[dbgap_project],
1515
                            privileges,
1516
                            username,
1517
                            sess,
1518
                            user_projects_to_modify,
1519
                            dbgap_config,
1520
                        )
1521

1522
                    dbgap_project += "." + consent_code
1✔
1523

1524
                self._process_dbgap_project(
1✔
1525
                    dbgap_project,
1526
                    privileges,
1527
                    username,
1528
                    sess,
1529
                    user_projects_to_modify,
1530
                    dbgap_config,
1531
                )
1532
        for user in user_projects_to_modify.keys():
1✔
1533
            user_projects[user] = user_projects_to_modify[user]
1✔
1534

1535
    def sync(self):
1✔
1536
        if self.session:
1✔
1537
            self._sync(self.session)
1✔
1538
        else:
1539
            with self.driver.session as s:
×
1540
                self._sync(s)
×
1541

1542
    def download(self):
1✔
1543
        for dbgap_server in self.dbGaP:
×
1544
            self._download(dbgap_server)
×
1545

1546
    def _download(self, dbgap_config):
1✔
1547
        """
1548
        Download files from dbgap server.
1549
        """
1550
        server = dbgap_config["info"]
1✔
1551
        protocol = dbgap_config["protocol"]
1✔
1552
        hostname = server["host"]
1✔
1553
        username = server["username"]
1✔
1554
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1555

1556
        if not os.path.exists(folderdir):
1✔
1557
            os.makedirs(folderdir)
1✔
1558

1559
        self.logger.info("Download from server")
1✔
1560
        try:
1✔
1561
            if protocol == "sftp":
1✔
1562
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1563
            else:
1564
                self._get_from_ftp_with_proxy(server, folderdir)
×
1565
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1566
            return dbgap_files
×
1567
        except Exception as e:
1✔
1568
            self.logger.error(e)
1✔
1569
            raise
1✔
1570

1571
    def _sync(self, sess):
1✔
1572
        """
1573
        Collect files from dbgap server(s), sync csv and yaml files to storage
1574
        backend and fence DB
1575
        """
1576

1577
        # get all dbgap files
1578
        user_projects = {}
1✔
1579
        user_info = {}
1✔
1580
        if self.is_sync_from_dbgap_server:
1✔
1581
            self.logger.debug(
1✔
1582
                "Pulling telemetry files from {} dbgap sftp servers".format(
1583
                    len(self.dbGaP)
1584
                )
1585
            )
1586
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1587

1588
        local_csv_file_list = []
1✔
1589
        if self.sync_from_local_csv_dir:
1✔
1590
            local_csv_file_list = glob.glob(
1✔
1591
                os.path.join(self.sync_from_local_csv_dir, "*")
1592
            )
1593
            # Sort the list so the order of of files is consistent across platforms
1594
            local_csv_file_list.sort()
1✔
1595

1596
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1597
            local_csv_file_list,
1598
            encrypted=False,
1599
            session=sess,
1600
            dbgap_configs=self.dbGaP,
1601
        )
1602

1603
        try:
1✔
1604
            user_yaml = UserYAML.from_file(
1✔
1605
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1606
            )
1607
        except (EnvironmentError, AssertionError) as e:
1✔
1608
            self.logger.error(str(e))
1✔
1609
            self.logger.error("aborting early")
1✔
1610
            raise
1✔
1611

1612
        # parse all projects
1613
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1614
        user_projects = self.parse_projects(user_projects)
1✔
1615
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1616

1617
        # merge all user info dicts into "user_info".
1618
        # the user info (such as email) in the user.yaml files
1619
        # overrides the user info from the CSV files.
1620
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1621
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1622

1623
        # merge all access info dicts into "user_projects".
1624
        # the access info is combined - if the user.yaml access is
1625
        # ["read"] and the CSV file access is ["read-storage"], the
1626
        # resulting access is ["read", "read-storage"].
1627
        self.sync_two_phsids_dict(
1✔
1628
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1629
        )
1630
        self.sync_two_phsids_dict(
1✔
1631
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1632
        )
1633

1634
        # Note: if there are multiple dbgap sftp servers configured
1635
        # this parameter is always from the config for the first dbgap sftp server
1636
        # not any additional ones
1637
        for dbgap_config in self.dbGaP:
1✔
1638
            if self._get_parse_consent_code(dbgap_config):
1✔
1639
                self._grant_all_consents_to_c999_users(
1✔
1640
                    user_projects, user_yaml.project_to_resource
1641
                )
1642

1643
        google_update_ex = None
1✔
1644

1645
        try:
1✔
1646
            # update the Fence DB
1647
            if user_projects:
1✔
1648
                self.logger.info("Sync to db and storage backend")
1✔
1649
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1650
                self.logger.info("Finish syncing to db and storage backend")
1✔
1651
            else:
1652
                self.logger.info("No users for syncing")
×
1653
        except GoogleUpdateException as ex:
1✔
1654
            # save this to reraise later after all non-Google syncing has finished
1655
            # this way, any issues with Google only affect Google data access and don't
1656
            # cascade problems into non-Google AWS or Azure access
1657
            google_update_ex = ex
1✔
1658

1659
        # update the Arborist DB (resources, roles, policies, groups)
1660
        if user_yaml.authz:
1✔
1661
            if not self.arborist_client:
1✔
1662
                raise EnvironmentError(
×
1663
                    "yaml file contains authz section but sync is not configured with"
1664
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1665
                )
1666
            self.logger.info("Synchronizing arborist...")
1✔
1667
            success = self._update_arborist(sess, user_yaml)
1✔
1668
            if success:
1✔
1669
                self.logger.info("Finished synchronizing arborist")
1✔
1670
            else:
1671
                self.logger.error("Could not synchronize successfully")
×
1672
                exit(1)
×
1673
        else:
1674
            self.logger.info("No `authz` section; skipping arborist sync")
×
1675

1676
        # update the Arborist DB (user access)
1677
        if self.arborist_client:
1✔
1678
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1679
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1680
            if success:
1✔
1681
                self.logger.info(
1✔
1682
                    "Finished synchronizing authorization info to arborist"
1683
                )
1684
            else:
1685
                self.logger.error(
×
1686
                    "Could not synchronize authorization info successfully to arborist"
1687
                )
1688
                exit(1)
×
1689
        else:
1690
            self.logger.error("No arborist client set; skipping arborist sync")
×
1691

1692
        # Logging authz source
1693
        for u, s in self.auth_source.items():
1✔
1694
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1695

1696
        self.logger.info(
1✔
1697
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1698
        )
1699
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1700
        if google_update_ex is not None:
1✔
1701
            raise google_update_ex
1✔
1702

1703
    def _grant_all_consents_to_c999_users(
1✔
1704
        self, user_projects, user_yaml_project_to_resources
1705
    ):
1706
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1707
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1708
        # are in either)
1709
        all_projects = set(
1✔
1710
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1711
        )
1712

1713
        self.logger.debug(f"all projects: {all_projects}")
1✔
1714

1715
        # construct a mapping from phsid (without consent) to all accessions with consent
1716
        consent_mapping = {}
1✔
1717
        for project in all_projects:
1✔
1718
            phs_match = access_number_matcher.match(project)
1✔
1719
            if phs_match:
1✔
1720
                accession_number = phs_match.groupdict()
1✔
1721

1722
                # TODO: This is not handling the .v1.p1 at all
1723
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1724
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1725
                )
1726
                children = self._get_children(accession_number["phsid"])
1✔
1727
                if children:
1✔
1728
                    for child_phs in children:
1✔
1729
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1730
                            ".".join(
1731
                                [child_phs, accession_number["consent"]]
1732
                            )  # Assign parent consent to child study
1733
                        )
1734

1735
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1736

1737
        # go through existing access and find any c999's and make sure to give access to
1738
        # all accessions with consent for that phsid
1739
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1740
            for project, _ in user_project_info.items():
1✔
1741
                phs_match = access_number_matcher.match(project)
1✔
1742
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1743
                    # give access to all consents
1744
                    all_phsids_with_consent = consent_mapping.get(
1✔
1745
                        phs_match.groupdict()["phsid"], []
1746
                    )
1747
                    self.logger.info(
1✔
1748
                        f"user {username} has c999 consent group for: {project}. "
1749
                        f"Granting access to all consents: {all_phsids_with_consent}"
1750
                    )
1751
                    # NOTE: Only giving read-storage at the moment (this is same
1752
                    #       permission we give for other dbgap projects)
1753
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1754
                        user_projects[username].update(
1✔
1755
                            {phsid_with_consent: {"read-storage", "read"}}
1756
                        )
1757

1758
    def _update_arborist(self, session, user_yaml):
1✔
1759
        """
1760
        Create roles, resources, policies, groups in arborist from the information in
1761
        ``user_yaml``.
1762

1763
        The projects are sent to arborist as resources with paths like
1764
        ``/projects/{project}``. Roles are created with just the original names
1765
        for the privileges like ``"read-storage", "read"`` etc.
1766

1767
        Args:
1768
            session (sqlalchemy.Session)
1769
            user_yaml (UserYAML)
1770

1771
        Return:
1772
            bool: success
1773
        """
1774
        healthy = self._is_arborist_healthy()
1✔
1775
        if not healthy:
1✔
1776
            return False
×
1777

1778
        # Set up the resource tree in arborist by combining provided resources with any
1779
        # dbgap resources that were created before this.
1780
        #
1781
        # Why add dbgap resources if they've already been created?
1782
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1783
        #   resources was created under `/programs/phs000178` anything provided in
1784
        #   user.yaml under `/programs` would completely wipe it out.
1785
        resources = user_yaml.authz.get("resources", [])
1✔
1786

1787
        dbgap_resource_paths = []
1✔
1788
        for path_list in self._dbgap_study_to_resources.values():
1✔
1789
            dbgap_resource_paths.extend(path_list)
1✔
1790

1791
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1792
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1793

1794
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1795
            resources, dbgap_resource_paths
1796
        )
1797

1798
        for resource in combined_resources:
1✔
1799
            try:
1✔
1800
                self.logger.debug(
1✔
1801
                    "attempting to update arborist resource: {}".format(resource)
1802
                )
1803
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1804
            except ArboristError as e:
×
1805
                self.logger.error(e)
×
1806
                # keep going; maybe just some conflicts from things existing already
1807

1808
        # update roles
1809
        roles = user_yaml.authz.get("roles", [])
1✔
1810
        for role in roles:
1✔
1811
            try:
1✔
1812
                response = self.arborist_client.update_role(role["id"], role)
1✔
1813
                if response:
1✔
1814
                    self._created_roles.add(role["id"])
1✔
1815
            except ArboristError as e:
×
1816
                self.logger.info(
×
1817
                    "couldn't update role '{}', creating instead".format(str(e))
1818
                )
1819
                try:
×
1820
                    response = self.arborist_client.create_role(role)
×
1821
                    if response:
×
1822
                        self._created_roles.add(role["id"])
×
1823
                except ArboristError as e:
×
1824
                    self.logger.error(e)
×
1825
                    # keep going; maybe just some conflicts from things existing already
1826

1827
        # update policies
1828
        policies = user_yaml.authz.get("policies", [])
1✔
1829
        for policy in policies:
1✔
1830
            policy_id = policy.pop("id")
1✔
1831
            try:
1✔
1832
                self.logger.debug(
1✔
1833
                    "Trying to upsert policy with id {}".format(policy_id)
1834
                )
1835
                response = self.arborist_client.update_policy(
1✔
1836
                    policy_id, policy, create_if_not_exist=True
1837
                )
1838
            except ArboristError as e:
×
1839
                self.logger.error(e)
×
1840
                # keep going; maybe just some conflicts from things existing already
1841
            else:
1842
                if response:
1✔
1843
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1844
                    self._created_policies.add(policy_id)
1✔
1845

1846
        # update groups
1847
        groups = user_yaml.authz.get("groups", [])
1✔
1848

1849
        # delete from arborist the groups that have been deleted
1850
        # from the user.yaml
1851
        arborist_groups = set(
1✔
1852
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1853
        )
1854
        useryaml_groups = set(g["name"] for g in groups)
1✔
1855
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1856
            # do not try to delete built in groups
1857
            if deleted_group not in ["anonymous", "logged-in"]:
×
1858
                self.arborist_client.delete_group(deleted_group)
×
1859

1860
        # create/update the groups defined in the user.yaml
1861
        for group in groups:
1✔
1862
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1863
            if missing:
×
1864
                name = group.get("name", "{MISSING NAME}")
×
1865
                self.logger.error(
×
1866
                    "group {} missing required field(s): {}".format(name, list(missing))
1867
                )
1868
                continue
×
1869
            try:
×
1870
                response = self.arborist_client.put_group(
×
1871
                    group["name"],
1872
                    # Arborist doesn't handle group descriptions yet
1873
                    # description=group.get("description", ""),
1874
                    users=group["users"],
1875
                    policies=group["policies"],
1876
                )
1877
            except ArboristError as e:
×
1878
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1879

1880
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1881

1882
        # First recreate these groups in order to clear out old, possibly deleted policies
1883
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1884
            try:
1✔
1885
                response = self.arborist_client.put_group(builtin_group)
1✔
1886
            except ArboristError as e:
×
1887
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1888

1889
        # Now add back policies that are in the user.yaml
1890
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1891
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1892

1893
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1894
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1895

1896
        return True
1✔
1897

1898
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1899
        """
1900
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1901
        mfa_policy after revoking all policies.
1902
        """
1903
        user_data_from_arborist = None
1✔
1904
        try:
1✔
1905
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1906
        except ArboristError:
×
1907
            # user doesn't exist in Arborist, nothing to revoke
1908
            return
×
1909

1910
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1911
            idp, {}
1912
        )
1913
        if not is_mfa_enabled:
1✔
1914
            # TODO This should be a diff, not a revocation of all policies.
1915
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1916
            return
1✔
1917

1918
        policies = []
1✔
1919
        try:
1✔
1920
            policies = user_data_from_arborist["policies"]
1✔
1921
        except Exception as e:
×
1922
            self.logger.error(
×
1923
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1924
            )
1925
        finally:
1926
            # TODO This should be a diff, not a revocation of all policies.
1927
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1928

1929
        if "mfa_policy" in policies:
1✔
1930
            status_code = self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1931

1932
    def _update_authz_in_arborist(
1✔
1933
        self,
1934
        session,
1935
        user_projects,
1936
        user_yaml=None,
1937
        single_user_sync=False,
1938
        expires=None,
1939
    ):
1940
        """
1941
        Assign users policies in arborist from the information in
1942
        ``user_projects`` and optionally a ``user_yaml``.
1943

1944
        The projects are sent to arborist as resources with paths like
1945
        ``/projects/{project}``. Roles are created with just the original names
1946
        for the privileges like ``"read-storage", "read"`` etc.
1947

1948
        Args:
1949
            user_projects (dict)
1950
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
1951
            single_user_sync (bool) whether authz update is for a single user
1952
            expires (int) time at which authz info in Arborist should expire
1953

1954
        Return:
1955
            bool: success
1956
        """
1957
        healthy = self._is_arborist_healthy()
1✔
1958
        if not healthy:
1✔
1959
            return False
×
1960

1961
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
1962

1963
        if user_yaml:
1✔
1964
            self.logger.debug(
1✔
1965
                "useryaml abac before lowering usernames: {}".format(
1966
                    user_yaml.user_abac
1967
                )
1968
            )
1969
            user_yaml.user_abac = {
1✔
1970
                key.lower(): value for key, value in user_yaml.user_abac.items()
1971
            }
1972
            # update the project info with `projects` specified in user.yaml
1973
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
1974

1975
        # get list of users from arborist to make sure users that are completely removed
1976
        # from authorization sources get policies revoked
1977
        arborist_user_projects = {}
1✔
1978
        if not single_user_sync:
1✔
1979
            try:
1✔
1980
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
1981

1982
                # construct user information, NOTE the lowering of the username. when adding/
1983
                # removing access, the case in the Fence db is used. For combining access, it is
1984
                # case-insensitive, so we lower
1985
                arborist_user_projects = {
1✔
1986
                    user["name"].lower(): {} for user in arborist_users
1987
                }
1988
            except (ArboristError, KeyError, AttributeError) as error:
×
1989
                # TODO usersync should probably exit with non-zero exit code at the end,
1990
                #      but sync should continue from this point so there are no partial
1991
                #      updates
1992
                self.logger.warning(
×
1993
                    "Could not get list of users in Arborist, continuing anyway. "
1994
                    "WARNING: this sync will NOT remove access for users no longer in "
1995
                    f"authorization sources. Error: {error}"
1996
                )
1997

1998
            # update the project info with users from arborist
1999
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
2000

2001
        policy_id_list = []
1✔
2002
        policies = []
1✔
2003

2004
        # prefer in-memory if available from user_yaml, if not, get from database
2005
        if user_yaml and user_yaml.project_to_resource:
1✔
2006
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
2007
            self.logger.debug(
1✔
2008
                f"using in-memory project to authz resource mapping from "
2009
                f"user.yaml (instead of database): {project_to_authz_mapping}"
2010
            )
2011
        else:
2012
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
2013
            self.logger.debug(
1✔
2014
                f"using persisted project to authz resource mapping from database "
2015
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2016
            )
2017

2018
        self.logger.debug(
1✔
2019
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2020
        )
2021
        all_resources = [
1✔
2022
            r
2023
            for resources in self._dbgap_study_to_resources.values()
2024
            for r in resources
2025
        ]
2026
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2027
        self._create_arborist_resources(all_resources)
1✔
2028

2029
        for username, user_project_info in user_projects.items():
1✔
2030
            self.logger.info("processing user `{}`".format(username))
1✔
2031
            user = query_for_user(session=session, username=username)
1✔
2032
            idp = None
1✔
2033
            if user:
1✔
2034
                username = user.username
1✔
2035
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2036

2037
            self.arborist_client.create_user_if_not_exist(username)
1✔
2038
            if not single_user_sync:
1✔
2039
                self._revoke_all_policies_preserve_mfa(username, idp)
1✔
2040

2041
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2042
            # previously mapped each project to the same set of privileges
2043
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2044
            # single policy with ('read', 'read-storage') being the single
2045
            # key
2046
            unique_policies = self._determine_unique_policies(
1✔
2047
                user_project_info, project_to_authz_mapping
2048
            )
2049

2050
            for roles in unique_policies.keys():
1✔
2051
                for role in roles:
1✔
2052
                    self._create_arborist_role(role)
1✔
2053

2054
            if single_user_sync:
1✔
2055
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2056
                    policy_hash = self._hash_policy_contents(
1✔
2057
                        ordered_roles, ordered_resources
2058
                    )
2059
                    self._create_arborist_policy(
1✔
2060
                        policy_hash,
2061
                        ordered_roles,
2062
                        ordered_resources,
2063
                        skip_if_exists=True,
2064
                    )
2065
                    # return here as it is not expected single_user_sync
2066
                    # will need any of the remaining user_yaml operations
2067
                    # left in _update_authz_in_arborist
2068
                    return self._grant_arborist_policy(
1✔
2069
                        username, policy_hash, expires=expires
2070
                    )
2071
            else:
2072
                for roles, resources in unique_policies.items():
1✔
2073
                    for role in roles:
1✔
2074
                        for resource in resources:
1✔
2075
                            # grant a policy to this user which is a single
2076
                            # role on a single resource
2077

2078
                            # format project '/x/y/z' -> 'x.y.z'
2079
                            # so the policy id will be something like 'x.y.z-create'
2080
                            policy_id = _format_policy_id(resource, role)
1✔
2081
                            if policy_id not in self._created_policies:
1✔
2082
                                try:
1✔
2083
                                    self.arborist_client.update_policy(
1✔
2084
                                        policy_id,
2085
                                        {
2086
                                            "description": "policy created by fence sync",
2087
                                            "role_ids": [role],
2088
                                            "resource_paths": [resource],
2089
                                        },
2090
                                        create_if_not_exist=True,
2091
                                    )
2092
                                except ArboristError as e:
×
2093
                                    self.logger.info(
×
2094
                                        "not creating policy in arborist; {}".format(
2095
                                            str(e)
2096
                                        )
2097
                                    )
2098
                                self._created_policies.add(policy_id)
1✔
2099

2100
                            self._grant_arborist_policy(
1✔
2101
                                username, policy_id, expires=expires
2102
                            )
2103

2104
            if user_yaml:
1✔
2105
                for policy in user_yaml.policies.get(username, []):
1✔
2106
                    self.arborist_client.grant_user_policy(
1✔
2107
                        username,
2108
                        policy,
2109
                        expires_at=expires,
2110
                    )
2111

2112
        if user_yaml:
1✔
2113
            for client_name, client_details in user_yaml.clients.items():
1✔
2114
                client_policies = client_details.get("policies", [])
×
2115
                clients = session.query(Client).filter_by(name=client_name).all()
×
2116
                # update existing clients, do not create new ones
2117
                if not clients:
×
2118
                    self.logger.warning(
×
2119
                        "client to update (`{}`) does not exist in fence: skipping".format(
2120
                            client_name
2121
                        )
2122
                    )
2123
                    continue
×
2124
                self.logger.debug(
×
2125
                    "updating client `{}` (found {} client IDs)".format(
2126
                        client_name, len(clients)
2127
                    )
2128
                )
2129
                # there may be more than 1 client with this name if credentials are being rotated,
2130
                # so we grant access to each client ID
2131
                for client in clients:
×
2132
                    try:
×
2133
                        self.arborist_client.update_client(
×
2134
                            client.client_id, client_policies
2135
                        )
2136
                    except ArboristError as e:
×
2137
                        self.logger.info(
×
2138
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2139
                                client_policies, client_name, client.client_id, str(e)
2140
                            )
2141
                        )
2142

2143
        return True
1✔
2144

2145
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2146
        """
2147
        Determine and return a dictionary of unique policies.
2148

2149
        Args (examples):
2150
            user_project_info (dict):
2151
            {
2152
                'phs000002.c1': { 'read-storage', 'read' },
2153
                'phs000001.c1': { 'read', 'read-storage' },
2154
                'phs000004.c1': { 'write', 'read' },
2155
                'phs000003.c1': { 'read', 'write' },
2156
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2157
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2158
            }
2159
            project_to_authz_mapping (dict):
2160
            {
2161
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2162
            }
2163

2164
        Return (for examples):
2165
            dict:
2166
            {
2167
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2168
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2169
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2170
            }
2171
        """
2172
        roles_to_resources = collections.defaultdict(list)
1✔
2173
        for study, roles in user_project_info.items():
1✔
2174
            ordered_roles = tuple(sorted(roles))
1✔
2175
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2176
            if study in project_to_authz_mapping:
1✔
2177
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2178
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2179

2180
        policies = {}
1✔
2181
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2182
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2183
        return policies
1✔
2184

2185
    def _create_arborist_role(self, role):
1✔
2186
        """
2187
        Wrapper around gen3authz's create_role with additional logging
2188

2189
        Args:
2190
            role (str): what the Arborist identity should be of the created role
2191

2192
        Return:
2193
            bool: True if the role was created successfully or it already
2194
                  exists. False otherwise
2195
        """
2196
        if role in self._created_roles:
1✔
2197
            return True
1✔
2198
        try:
1✔
2199
            response_json = self.arborist_client.create_role(
1✔
2200
                arborist_role_for_permission(role)
2201
            )
2202
        except ArboristError as e:
×
2203
            self.logger.error(
×
2204
                "could not create `{}` role in Arborist: {}".format(role, e)
2205
            )
2206
            return False
×
2207
        self._created_roles.add(role)
1✔
2208

2209
        if response_json is None:
1✔
2210
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2211
        else:
2212
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2213
        return True
1✔
2214

2215
    def _create_arborist_resources(self, resources):
1✔
2216
        """
2217
        Create resources in Arborist
2218

2219
        Args:
2220
            resources (list): a list of full Arborist resource paths to create
2221
            [
2222
                "/programs/DEV/projects/phs000001.c1",
2223
                "/programs/DEV/projects/phs000002.c1",
2224
                "/programs/DEV/projects/phs000003.c1"
2225
            ]
2226

2227
        Return:
2228
            bool: True if the resources were successfully created, False otherwise
2229

2230

2231
        As of 2/11/2022, for resources above,
2232
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2233
        [
2234
            { 'name': 'programs', 'subresources': [
2235
                { 'name': 'DEV', 'subresources': [
2236
                    { 'name': 'projects', 'subresources': [
2237
                        { 'name': 'phs000001.c1', 'subresources': []},
2238
                        { 'name': 'phs000002.c1', 'subresources': []},
2239
                        { 'name': 'phs000003.c1', 'subresources': []}
2240
                    ]}
2241
                ]}
2242
            ]}
2243
        ]
2244
        Because this list has a single object, only a single network request gets
2245
        sent to Arborist.
2246

2247
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2248
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2249
        [
2250
            {'name': 'phs000001.c1', 'subresources': []},
2251
            {'name': 'phs000002.c1', 'subresources': []},
2252
            {'name': 'phs000003.c1', 'subresources': []}
2253
        ]
2254
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2255

2256
        As a practical matter, for sync_single_user_visas, studies
2257
        should be nested under the `/programs` resource as in the former
2258
        example (i.e. only one network request gets made).
2259

2260
        TODO for the sake of simplicity, it would be nice if only one network
2261
        request was made no matter the input.
2262
        """
2263
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2264
            try:
1✔
2265
                response_json = self.arborist_client.update_resource(
1✔
2266
                    "/", request_body, merge=True
2267
                )
2268
            except ArboristError as e:
×
2269
                self.logger.error(
×
2270
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2271
                        request_body, e
2272
                    )
2273
                )
2274
                return False
×
2275

2276
        self.logger.debug(
1✔
2277
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2278
        )
2279
        return True
1✔
2280

2281
    def _create_arborist_policy(
1✔
2282
        self, policy_id, roles, resources, skip_if_exists=False
2283
    ):
2284
        """
2285
        Wrapper around gen3authz's create_policy with additional logging
2286

2287
        Args:
2288
            policy_id (str): what the Arborist identity should be of the created policy
2289
            roles (iterable): what roles the create policy should have
2290
            resources (iterable): what resources the created policy should have
2291
            skip_if_exists (bool): if True, this function will not treat an already
2292
                                   existent policy as an error
2293

2294
        Return:
2295
            bool: True if policy creation was successful. False otherwise
2296
        """
2297
        try:
1✔
2298
            response_json = self.arborist_client.create_policy(
1✔
2299
                {
2300
                    "id": policy_id,
2301
                    "role_ids": roles,
2302
                    "resource_paths": resources,
2303
                },
2304
                skip_if_exists=skip_if_exists,
2305
            )
2306
        except ArboristError as e:
×
2307
            self.logger.error(
×
2308
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2309
            )
2310
            return False
×
2311

2312
        if response_json is None:
1✔
2313
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2314
        else:
2315
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2316
        return True
1✔
2317

2318
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2319
        """
2320
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2321

2322
        Args:
2323
            ordered_roles (iterable): policy roles in sorted order
2324
            ordered_resources (iterable): policy resources in sorted order
2325

2326
        Return:
2327
            str: SHA256 hex digest
2328
        """
2329

2330
        def escape(s):
1✔
2331
            return s.replace(",", "\,")
1✔
2332

2333
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2334
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2335
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2336
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2337

2338
        return policy_hash
1✔
2339

2340
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2341
        """
2342
        Wrapper around gen3authz's grant_user_policy with additional logging
2343

2344
        Args:
2345
            username (str): username of user in Arborist who policy should be
2346
                            granted to
2347
            policy_id (str): Arborist policy id
2348
            expires (int): POSIX timestamp for when policy should expire
2349

2350
        Return:
2351
            bool: True if granting of policy was successful, False otherwise
2352
        """
2353
        try:
1✔
2354
            response_json = self.arborist_client.grant_user_policy(
1✔
2355
                username,
2356
                policy_id,
2357
                expires_at=expires,
2358
            )
2359
        except ArboristError as e:
×
2360
            self.logger.error(
×
2361
                "could not grant policy `{}` to user `{}`: {}".format(
2362
                    policy_id, username, e
2363
                )
2364
            )
2365
            return False
×
2366

2367
        self.logger.debug(
1✔
2368
            "granted policy `{}` to user `{}`".format(policy_id, username)
2369
        )
2370
        return True
1✔
2371

2372
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2373
        """
2374
        Determine the arborist resource path and add it to
2375
        _self._dbgap_study_to_resources
2376

2377
        Args:
2378
            dbgap_study (str): study phs identifier
2379
            dbgap_config (dict): dictionary of config for dbgap server
2380

2381
        """
2382
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2383
            "_default", ["/"]
2384
        )
2385
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2386
            dbgap_study, default_namespaces
2387
        )
2388

2389
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2390

2391
        arborist_resource_namespaces = [
1✔
2392
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2393
        ]
2394

2395
        for resource_namespace in arborist_resource_namespaces:
1✔
2396
            full_resource_path = resource_namespace + dbgap_study
1✔
2397
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2398
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2399
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2400
        return arborist_resource_namespaces
1✔
2401

2402
    def _is_arborist_healthy(self):
1✔
2403
        if not self.arborist_client:
1✔
2404
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2405
            return False
×
2406
        if not self.arborist_client.healthy():
1✔
2407
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2408
            self.logger.error(
×
2409
                "arborist service is unavailable; skipping main arborist dbgap sync"
2410
            )
2411
            return False
×
2412
        return True
1✔
2413

2414
    def _pick_sync_type(self, visa):
1✔
2415
        """
2416
        Pick type of visa to parse according to the visa provider
2417
        """
2418
        sync_client = None
1✔
2419
        if visa.type in self.visa_types["ras"]:
1✔
2420
            sync_client = self.ras_sync_client
1✔
2421
        else:
2422
            raise Exception(
×
2423
                "Visa type {} not recognized. Configure in fence-config".format(
2424
                    visa.type
2425
                )
2426
            )
2427
        if not sync_client:
1✔
2428
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2429

2430
        return sync_client
1✔
2431

2432
    def sync_single_user_visas(
1✔
2433
        self, user, ga4gh_visas, sess=None, expires=None, skip_google_updates=False
2434
    ):
2435
        """
2436
        Sync a single user's visas during login or DRS/data access
2437

2438
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2439
                        BEFORE THIS.
2440

2441
        Args:
2442
            user (userdatamodel.user.User): Fence user whose visas'
2443
                                            authz info is being synced
2444
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2445
                                that are ALREADY VALIDATED
2446
            sess (sqlalchemy.orm.session.Session): database session
2447
            expires (int): time at which synced Arborist policies and
2448
                           inclusion in any GBAG are set to expire
2449
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
2450

2451
        Return:
2452
            list of successfully parsed visas
2453
        """
2454
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2455
        dbgap_config = self.dbGaP[0]
1✔
2456
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2457
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2458
            "enable_common_exchange_area_access", False
2459
        )
2460
        study_common_exchange_areas = dbgap_config.get(
1✔
2461
            "study_common_exchange_areas", {}
2462
        )
2463

2464
        try:
1✔
2465
            user_yaml = UserYAML.from_file(
1✔
2466
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2467
            )
2468
        except (EnvironmentError, AssertionError) as e:
×
2469
            self.logger.error(str(e))
×
2470
            self.logger.error("aborting early")
×
2471
            raise
×
2472

2473
        user_projects = dict()
1✔
2474
        projects = {}
1✔
2475
        info = {}
1✔
2476
        parsed_visas = []
1✔
2477

2478
        for visa in ga4gh_visas:
1✔
2479
            project = {}
1✔
2480
            visa_type = self._pick_sync_type(visa)
1✔
2481
            encoded_visa = visa.ga4gh_visa
1✔
2482

2483
            try:
1✔
2484
                project, info = visa_type._parse_single_visa(
1✔
2485
                    user,
2486
                    encoded_visa,
2487
                    visa.expires,
2488
                    parse_consent_code,
2489
                )
2490
            except Exception:
×
2491
                self.logger.warning(
×
2492
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2493
                )
2494
                continue
×
2495

2496
            projects = {**projects, **project}
1✔
2497
            parsed_visas.append(visa)
1✔
2498

2499
        info["user_id"] = user.id
1✔
2500
        info["username"] = user.username
1✔
2501
        user_projects[user.username] = projects
1✔
2502

2503
        user_projects = self.parse_projects(user_projects)
1✔
2504

2505
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2506
            self.logger.info(
1✔
2507
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2508
            )
2509

2510
        self._process_user_projects(
1✔
2511
            user_projects,
2512
            enable_common_exchange_area_access,
2513
            study_common_exchange_areas,
2514
            dbgap_config,
2515
            sess,
2516
        )
2517

2518
        if parse_consent_code:
1✔
2519
            self._grant_all_consents_to_c999_users(
1✔
2520
                user_projects, user_yaml.project_to_resource
2521
            )
2522

2523
        if user_projects:
1✔
2524
            self.sync_to_storage_backend(
1✔
2525
                user_projects,
2526
                info,
2527
                sess,
2528
                expires=expires,
2529
                skip_google_updates=skip_google_updates,
2530
            )
2531
        else:
2532
            self.logger.info("No users for syncing")
×
2533

2534
        # update arborist db (user access)
2535
        if self.arborist_client:
1✔
2536
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2537
            success = self._update_authz_in_arborist(
1✔
2538
                sess,
2539
                user_projects,
2540
                user_yaml=user_yaml,
2541
                single_user_sync=True,
2542
                expires=expires,
2543
            )
2544
            if success:
1✔
2545
                self.logger.info(
1✔
2546
                    "Finished synchronizing authorization info to arborist"
2547
                )
2548
            else:
2549
                self.logger.error(
×
2550
                    "Could not synchronize authorization info successfully to arborist"
2551
                )
2552
        else:
2553
            self.logger.error("No arborist client set; skipping arborist sync")
×
2554

2555
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc