• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 13164667203

05 Feb 2025 07:03PM UTC coverage: 75.227% (-0.05%) from 75.278%
13164667203

Pull #1207

github

nss10
[WIP]: fix command errors to get it to work
Pull Request #1207: Docker Image Change

7859 of 10447 relevant lines covered (75.23%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.04
fence/sync/sync_users.py
1
import backoff
1✔
2
import glob
1✔
3
import jwt
1✔
4
import os
1✔
5
import re
1✔
6
import subprocess as sp
1✔
7
import yaml
1✔
8
import copy
1✔
9
import datetime
1✔
10
import uuid
1✔
11
import collections
1✔
12
import hashlib
1✔
13

14
from contextlib import contextmanager
1✔
15
from collections import defaultdict
1✔
16
from csv import DictReader
1✔
17
from io import StringIO
1✔
18
from stat import S_ISDIR
1✔
19

20
import paramiko
1✔
21
from cdislogging import get_logger
1✔
22
from email_validator import validate_email, EmailNotValidError
1✔
23
from gen3authz.client.arborist.errors import ArboristError
1✔
24
from gen3users.validation import validate_user_yaml
1✔
25
from paramiko.proxy import ProxyCommand
1✔
26
from sqlalchemy.exc import IntegrityError
1✔
27
from sqlalchemy import func
1✔
28

29
from fence.config import config
1✔
30
from fence.models import (
1✔
31
    AccessPrivilege,
32
    AuthorizationProvider,
33
    Project,
34
    Tag,
35
    User,
36
    query_for_user,
37
    Client,
38
    IdentityProvider,
39
    get_project_to_authz_mapping,
40
)
41
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
42
from fence.resources.storage import StorageManager
1✔
43
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
44
from fence.resources.google.access_utils import GoogleUpdateException
1✔
45
from fence.sync import utils
1✔
46
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
47
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
48

49

50
def _format_policy_id(path, privilege):
1✔
51
    resource = ".".join(name for name in path.split("/") if name)
1✔
52
    return "{}-{}".format(resource, privilege)
1✔
53

54

55
def download_dir(sftp, remote_dir, local_dir):
1✔
56
    """
57
    Recursively download file from remote_dir to local_dir
58
    Args:
59
        remote_dir(str)
60
        local_dir(str)
61
    Returns: None
62
    """
63
    dir_items = sftp.listdir_attr(remote_dir)
×
64

65
    for item in dir_items:
×
66
        remote_path = remote_dir + "/" + item.filename
×
67
        local_path = os.path.join(local_dir, item.filename)
×
68
        if S_ISDIR(item.st_mode):
×
69
            download_dir(sftp, remote_path, local_path)
×
70
        else:
71
            sftp.get(remote_path, local_path)
×
72

73

74
def arborist_role_for_permission(permission):
1✔
75
    """
76
    For the programs/projects in the existing fence access control model, in order to
77
    use arborist for checking permissions we generate a policy for each combination of
78
    program/project and privilege. The roles involved all contain only one permission,
79
    for one privilege from the project access model.
80
    """
81
    return {
1✔
82
        "id": permission,
83
        "permissions": [
84
            {"id": permission, "action": {"service": "*", "method": permission}}
85
        ],
86
    }
87

88

89
@contextmanager
1✔
90
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
91
    """
92
    Context manager for reading and optionally decrypting file it only
93
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
94

95
    Args:
96
        filepath (str): path to the file
97
        encrypted (bool): whether the file is encrypted
98

99
    Returns:
100
        Generator[file-like class]: file like object for the file
101
    """
102
    if encrypted:
1✔
103
        has_crypt = sp.call(["which", "ccdecrypt"])
×
104
        if has_crypt != 0:
×
105
            if logger:
×
106
                logger.error("Need to install mcrypt to decrypt files from dbgap")
×
107
            # TODO (rudyardrichter, 2019-01-08): raise error and move exit out to script
108
            exit(1)
×
109
        p = sp.Popen(
×
110
            [
111
                "ccdecrypt",
112
                "-u",
113
                "-K",
114
                key,
115
                filepath,
116
            ]
117
        )
118
        try:
×
119
            yield StringIO(p.communicate()[0])
×
120
        except UnicodeDecodeError:
×
121
            logger.error("Could not decode file. Check the decryption key.")
×
122
    else:
123
        f = open(filepath, "r")
1✔
124
        yield f
1✔
125
        f.close()
1✔
126

127

128
class UserYAML(object):
1✔
129
    """
130
    Representation of the information in a YAML file describing user, project, and ABAC
131
    information for access control.
132
    """
133

134
    def __init__(
1✔
135
        self,
136
        projects=None,
137
        user_info=None,
138
        policies=None,
139
        clients=None,
140
        authz=None,
141
        project_to_resource=None,
142
        logger=None,
143
        user_abac=None,
144
    ):
145
        self.projects = projects or {}
1✔
146
        self.user_info = user_info or {}
1✔
147
        self.user_abac = user_abac or {}
1✔
148
        self.policies = policies or {}
1✔
149
        self.clients = clients or {}
1✔
150
        self.authz = authz or {}
1✔
151
        self.project_to_resource = project_to_resource or {}
1✔
152
        self.logger = logger
1✔
153

154
    @classmethod
1✔
155
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
156
        """
157
        Add access by "auth_id" to "self.projects" to update the Fence DB.
158
        Add access by "resource" to "self.user_abac" to update Arborist.
159
        """
160
        data = {}
1✔
161
        if filepath:
1✔
162
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
163
                file_contents = f.read()
1✔
164
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
165
                data = yaml.safe_load(file_contents)
1✔
166
        else:
167
            if logger:
1✔
168
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
169

170
        projects = dict()
1✔
171
        user_info = dict()
1✔
172
        policies = dict()
1✔
173

174
        # resources should be the resource tree to construct in arborist
175
        user_abac = dict()
1✔
176

177
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
178
        if not data.get("authz") and data.get("rbac"):
1✔
179
            if logger:
×
180
                logger.info(
×
181
                    "No authz block found but rbac block present. Using rbac block"
182
                )
183
            data["authz"] = data["rbac"]
×
184

185
        # get user project mapping to arborist resources if it exists
186
        project_to_resource = data.get("authz", dict()).get(
1✔
187
            "user_project_to_resource", dict()
188
        )
189

190
        # read projects and privileges for each user
191
        users = data.get("users", {})
1✔
192
        for username, details in users.items():
1✔
193
            # users should occur only once each; skip if already processed
194
            if username in projects:
1✔
195
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
196
                    username
197
                )
198
                if logger:
×
199
                    logger.error(msg)
×
200
                raise EnvironmentError(msg)
×
201

202
            privileges = {}
1✔
203
            resource_permissions = dict()
1✔
204
            for project in details.get("projects", {}):
1✔
205
                try:
1✔
206
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
207
                except KeyError as e:
×
208
                    if logger:
×
209
                        logger.error("project {} missing field: {}".format(project, e))
×
210
                    continue
×
211

212
                # project may not have `resource` field.
213
                # prefer resource field;
214
                # if no resource or mapping, assume auth_id is resource.
215
                resource = project.get("resource", project["auth_id"])
1✔
216

217
                if project["auth_id"] not in project_to_resource:
1✔
218
                    project_to_resource[project["auth_id"]] = resource
1✔
219
                resource_permissions[resource] = set(project["privilege"])
1✔
220

221
            user_info[username] = {
1✔
222
                "email": details.get("email", ""),
223
                "display_name": details.get("display_name", ""),
224
                "phone_number": details.get("phone_number", ""),
225
                "tags": details.get("tags", {}),
226
                "admin": details.get("admin", False),
227
            }
228
            if not details.get("email"):
1✔
229
                try:
1✔
230
                    valid = validate_email(
1✔
231
                        username, allow_smtputf8=False, check_deliverability=False
232
                    )
233
                    user_info[username]["email"] = valid.email
1✔
234
                except EmailNotValidError:
1✔
235
                    pass
1✔
236
            projects[username] = privileges
1✔
237
            user_abac[username] = resource_permissions
1✔
238

239
            # list of policies we want to grant to this user, which get sent to arborist
240
            # to check if they're allowed to do certain things
241
            policies[username] = details.get("policies", [])
1✔
242

243
        if logger:
1✔
244
            logger.info(
1✔
245
                "Got user project to arborist resource mapping:\n{}".format(
246
                    str(project_to_resource)
247
                )
248
            )
249

250
        authz = data.get("authz", dict())
1✔
251
        if not authz:
1✔
252
            # older version: resources in root, no `authz` section or `rbac` section
253
            if logger:
1✔
254
                logger.warning(
1✔
255
                    "access control YAML file is using old format (missing `authz`/`rbac`"
256
                    " section in the root); assuming that if it exists `resources` will"
257
                    " be on the root level, and continuing"
258
                )
259
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
260
            # the code can pretend it's in the normal place that we expect
261
            resources = data.get("resources", [])
1✔
262
            # keep authz empty dict if resources is not specified
263
            if resources:
1✔
264
                authz["resources"] = data.get("resources", [])
×
265

266
        clients = data.get("clients", {})
1✔
267

268
        return cls(
1✔
269
            projects=projects,
270
            user_info=user_info,
271
            user_abac=user_abac,
272
            policies=policies,
273
            clients=clients,
274
            authz=authz,
275
            project_to_resource=project_to_resource,
276
            logger=logger,
277
        )
278

279
    def persist_project_to_resource(self, db_session):
1✔
280
        """
281
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
282

283
        The mapping comes from an external source, this function persists what was parsed
284
        into memory into the database for future use.
285
        """
286
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
287
            project = (
1✔
288
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
289
            )
290
            if project:
1✔
291
                project.authz = authz_resource
1✔
292
            else:
293
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
294
                db_session.add(project)
×
295
        db_session.commit()
1✔
296

297

298
class UserSyncer(object):
1✔
299
    def __init__(
1✔
300
        self,
301
        dbGaP,
302
        DB,
303
        project_mapping,
304
        storage_credentials=None,
305
        db_session=None,
306
        is_sync_from_dbgap_server=False,
307
        sync_from_local_csv_dir=None,
308
        sync_from_local_yaml_file=None,
309
        arborist=None,
310
        folder=None,
311
    ):
312
        """
313
        Syncs ACL files from dbGap to auth database and storage backends
314
        Args:
315
            dbGaP: a list of dict containing creds to access dbgap sftp
316
            DB: database connection string
317
            project_mapping: a dict containing how dbgap ids map to projects
318
            storage_credentials: a dict containing creds for storage backends
319
            sync_from_dir: path to an alternative dir to sync from instead of
320
                           dbGaP
321
            arborist:
322
                ArboristClient instance if the syncer should also create
323
                resources in arborist
324
            folder: a local folder where dbgap telemetry files will sync to
325
        """
326
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
327
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
328
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
329
        self.dbGaP = dbGaP
1✔
330
        self.session = db_session
1✔
331
        self.driver = get_SQLAlchemyDriver(DB)
1✔
332
        self.project_mapping = project_mapping or {}
1✔
333
        self._projects = dict()
1✔
334
        self._created_roles = set()
1✔
335
        self._created_policies = set()
1✔
336
        self._dbgap_study_to_resources = dict()
1✔
337
        self.logger = get_logger(
1✔
338
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
339
        )
340
        self.arborist_client = arborist
1✔
341
        self.folder = folder
1✔
342

343
        self.auth_source = defaultdict(set)
1✔
344
        # auth_source used for logging. username : [source1, source2]
345
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
346
        self.parent_to_child_studies_mapping = {}
1✔
347
        for dbgap_config in dbGaP:
1✔
348
            self.parent_to_child_studies_mapping.update(
1✔
349
                dbgap_config.get("parent_to_child_studies_mapping", {})
350
            )
351
        if storage_credentials:
1✔
352
            self.storage_manager = StorageManager(
1✔
353
                storage_credentials, logger=self.logger
354
            )
355
        self.id_patterns = []
1✔
356

357
    @staticmethod
1✔
358
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
359
        """
360
        Check if the filename matches dbgap access control file pattern
361

362
        Args:
363
            filepath (str): path to file
364
            encrypted (bool): whether the file is encrypted
365

366
        Returns:
367
            bool: whether the pattern matches
368
        """
369
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
370
        for pattern in id_patterns:
1✔
371
            if encrypted:
1✔
372
                pattern += r".enc"
×
373
            pattern += r"$"
1✔
374
            # when converting the YAML from fence-config,
375
            # python reads it as Python string literal. So "\" turns into "\\"
376
            # which messes with the regex match
377
            pattern.replace("\\\\", "\\")
1✔
378
            if re.match(pattern, os.path.basename(filepath)):
1✔
379
                return True
1✔
380
        return False
1✔
381

382
    def _get_from_sftp_with_proxy(self, server, path):
1✔
383
        """
384
        Download all data from sftp sever to a local dir
385

386
        Args:
387
            server (dict) : dictionary containing info to access sftp server
388
            path (str): path to local directory
389

390
        Returns:
391
            None
392
        """
393
        proxy = None
1✔
394
        if server.get("proxy", "") != "":
1✔
395
            command = "ssh -oHostKeyAlgorithms=+ssh-rsa -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
396
                user=server.get("proxy_user", ""),
397
                proxy=server.get("proxy", ""),
398
                host=server.get("host", ""),
399
                port=server.get("port", 22),
400
            )
401
            self.logger.info("SSH proxy command: {}".format(command))
×
402

403
            proxy = ProxyCommand(command)
×
404

405
        with paramiko.SSHClient() as client:
1✔
406
            client.set_log_channel(self.logger.name)
1✔
407

408
            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
1✔
409
            parameters = {
1✔
410
                "hostname": str(server.get("host", "")),
411
                "username": str(server.get("username", "")),
412
                "password": str(server.get("password", "")),
413
                "port": int(server.get("port", 22)),
414
            }
415
            if proxy:
1✔
416
                parameters["sock"] = proxy
×
417

418
            self.logger.info(
1✔
419
                "SSH connection hostname:post {}:{}".format(
420
                    parameters.get("hostname", "unknown"),
421
                    parameters.get("port", "unknown"),
422
                )
423
            )
424
            self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
425
            with client.open_sftp() as sftp:
×
426
                download_dir(sftp, "./", path)
1✔
427

428
        if proxy:
×
429
            proxy.close()
×
430

431
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
432
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
433
        ssh_client.connect(**parameters)
1✔
434

435
    def _get_from_ftp_with_proxy(self, server, path):
1✔
436
        """
437
        Download data from ftp sever to a local dir
438

439
        Args:
440
            server (dict): dictionary containing information for accessing server
441
            path(str): path to local files
442

443
        Returns:
444
            None
445
        """
446
        execstr = (
×
447
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
448
                server.get("username", ""),
449
                server.get("password", ""),
450
                server.get("host", ""),
451
                server.get("proxy", ""),
452
                path,
453
            )
454
        )
455
        os.system(execstr)
×
456

457
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
458
        return dbgap_config.get(
1✔
459
            "parse_consent_code", True
460
        )  # Should this really be true?
461

462
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
463
        """
464
        parse csv files to python dict
465

466
        Args:
467
            file_dict: a dictionary with key(file path) and value(privileges)
468
            sess: sqlalchemy session
469
            dbgap_config: a dictionary containing information about the dbGaP sftp server
470
                (comes from fence config)
471
            encrypted: boolean indicating whether those files are encrypted
472

473

474
        Return:
475
            Tuple[[dict, dict]]:
476
                (user_project, user_info) where user_project is a mapping from
477
                usernames to project permissions and user_info is a mapping
478
                from usernames to user details, such as email
479

480
        Example:
481

482
            (
483
                {
484
                    username: {
485
                        'project1': {'read-storage','write-storage'},
486
                        'project2': {'read-storage'},
487
                    }
488
                },
489
                {
490
                    username: {
491
                        'email': 'email@mail.com',
492
                        'display_name': 'display name',
493
                        'phone_number': '123-456-789',
494
                        'tags': {'dbgap_role': 'PI'}
495
                    }
496
                },
497
            )
498

499
        """
500
        user_projects = dict()
1✔
501
        user_info = defaultdict(dict)
1✔
502

503
        # parse dbGaP sftp server information
504
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
505

506
        self.id_patterns += (
1✔
507
            [
508
                item.replace("\\\\", "\\")
509
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
510
            ]
511
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
512
            else []
513
        )
514

515
        enable_common_exchange_area_access = dbgap_config.get(
1✔
516
            "enable_common_exchange_area_access", False
517
        )
518
        study_common_exchange_areas = dbgap_config.get(
1✔
519
            "study_common_exchange_areas", {}
520
        )
521
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
522

523
        if parse_consent_code and enable_common_exchange_area_access:
1✔
524
            self.logger.info(
1✔
525
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
526
            )
527

528
        project_id_patterns = [r"phs(\d{6})"]
1✔
529
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
530
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
531
            patterns = [
1✔
532
                pattern.replace("\\\\", "\\") for pattern in patterns
533
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
534
            project_id_patterns += patterns
1✔
535

536
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
537
        for filepath, privileges in file_dict.items():
1✔
538
            self.logger.info("Reading file {}".format(filepath))
1✔
539
            if os.stat(filepath).st_size == 0:
1✔
540
                self.logger.warning("Empty file {}".format(filepath))
×
541
                continue
×
542
            if not self._match_pattern(
1✔
543
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
544
            ):
545
                self.logger.warning(
1✔
546
                    "Filename {} does not match dbgap access control filename pattern;"
547
                    " this could mean that the filename has an invalid format, or has"
548
                    " an unexpected .enc extension, or lacks the .enc extension where"
549
                    " expected. This file is NOT being processed by usersync!".format(
550
                        filepath
551
                    )
552
                )
553
                continue
1✔
554

555
            with _read_file(
1✔
556
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
557
            ) as f:
558
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
559
                for row in csv:
1✔
560
                    username = row.get("login") or ""
1✔
561
                    if username == "":
1✔
562
                        continue
×
563

564
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
565
                        phsid = (
1✔
566
                            row.get("phsid") or (row.get("project_id") or "")
567
                        ).split(".")
568
                    else:
569
                        phsid = (row.get("phsid") or "").split(".")
1✔
570

571
                    dbgap_project = phsid[0]
1✔
572
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
573
                    skip = False
1✔
574
                    for pattern in project_id_patterns:
1✔
575
                        self.logger.debug(
1✔
576
                            "Checking pattern:{} with project_id:{}".format(
577
                                pattern, dbgap_project
578
                            )
579
                        )
580
                        if re.match(pattern, dbgap_project):
1✔
581
                            skip = False
1✔
582
                            break
1✔
583
                        else:
584
                            skip = True
1✔
585
                    if skip:
1✔
586
                        self.logger.warning(
1✔
587
                            "Skip processing from file {}, user {} with project {}".format(
588
                                filepath,
589
                                username,
590
                                dbgap_project,
591
                            )
592
                        )
593
                        continue
1✔
594
                    if len(phsid) > 1 and parse_consent_code:
1✔
595
                        consent_code = phsid[-1]
1✔
596

597
                        # c999 indicates full access to all consents and access
598
                        # to a study-specific exchange area
599
                        # access to at least one study-specific exchange area implies access
600
                        # to the parent study's common exchange area
601
                        #
602
                        # NOTE: Handling giving access to all consents is done at
603
                        #       a later time, when we have full information about possible
604
                        #       consents
605
                        self.logger.debug(
1✔
606
                            f"got consent code {consent_code} from dbGaP project "
607
                            f"{dbgap_project}"
608
                        )
609
                        if (
1✔
610
                            consent_code == "c999"
611
                            and enable_common_exchange_area_access
612
                            and dbgap_project in study_common_exchange_areas
613
                        ):
614
                            self.logger.info(
1✔
615
                                "found study with consent c999 and Fence "
616
                                "is configured to parse exchange area data. Giving user "
617
                                f"{username} {privileges} privileges in project: "
618
                                f"{study_common_exchange_areas[dbgap_project]}."
619
                            )
620
                            self._add_dbgap_project_for_user(
1✔
621
                                study_common_exchange_areas[dbgap_project],
622
                                privileges,
623
                                username,
624
                                sess,
625
                                user_projects,
626
                                dbgap_config,
627
                            )
628

629
                        dbgap_project += "." + consent_code
1✔
630

631
                    self._add_children_for_dbgap_project(
1✔
632
                        dbgap_project,
633
                        privileges,
634
                        username,
635
                        sess,
636
                        user_projects,
637
                        dbgap_config,
638
                    )
639

640
                    display_name = row.get("user name") or ""
1✔
641
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
642

643
                    # some dbgap telemetry files have information about a researchers PI
644
                    if "downloader for" in row:
1✔
645
                        tags["pi"] = row["downloader for"]
1✔
646

647
                    # prefer name over previous "downloader for" if it exists
648
                    if "downloader for names" in row:
1✔
649
                        tags["pi"] = row["downloader for names"]
×
650

651
                    user_info[username] = {
1✔
652
                        "email": row.get("email")
653
                        or user_info[username].get("email")
654
                        or "",
655
                        "display_name": display_name,
656
                        "phone_number": row.get("phone")
657
                        or user_info[username].get("phone_number")
658
                        or "",
659
                        "tags": tags,
660
                    }
661

662
                    self._process_dbgap_project(
1✔
663
                        dbgap_project,
664
                        privileges,
665
                        username,
666
                        sess,
667
                        user_projects,
668
                        dbgap_config,
669
                    )
670

671
        return user_projects, user_info
1✔
672

673
    def _get_children(self, dbgap_project):
1✔
674
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
675

676
    def _add_children_for_dbgap_project(
1✔
677
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
678
    ):
679
        """
680
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
681
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
682
        child studies.
683
        """
684
        parent_phsid = dbgap_project
1✔
685
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
686
        child_suffix = ""
1✔
687
        if parse_consent_code and re.match(
1✔
688
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
689
        ):
690
            parent_phsid_parts = dbgap_project.split(".")
1✔
691
            parent_phsid = parent_phsid_parts[0]
1✔
692
            child_suffix = "." + parent_phsid_parts[1]
1✔
693

694
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
695
            return
1✔
696

697
        self.logger.info(
1✔
698
            f"found parent study {parent_phsid} and Fence "
699
            "is configured to provide additional access to child studies. Giving user "
700
            f"{username} {privileges} privileges in projects: "
701
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
702
        )
703
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
704
        for child_study in child_studies:
1✔
705
            self._add_dbgap_project_for_user(
1✔
706
                child_study + child_suffix,
707
                privileges,
708
                username,
709
                sess,
710
                user_projects,
711
                dbgap_config,
712
            )
713

714
    def _add_dbgap_project_for_user(
1✔
715
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
716
    ):
717
        """
718
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
719
        and then updates the dictionary containing all user's project access
720
        """
721
        if dbgap_project not in self._projects:
1✔
722
            self.logger.debug(
1✔
723
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
724
            )
725

726
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
727

728
            # need to add dbgap project to arborist
729
            if self.arborist_client:
1✔
730
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
731

732
            if project.name is None:
1✔
733
                project.name = dbgap_project
1✔
734
            self._projects[dbgap_project] = project
1✔
735
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
736
        if username in user_projects:
1✔
737
            user_projects[username].update(phsid_privileges)
1✔
738
        else:
739
            user_projects[username] = phsid_privileges
1✔
740

741
    @staticmethod
1✔
742
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
743
        """
744
        Merge user_info1 into user_info2. Values in user_info2 are overriden
745
        by values in user_info1. user_info2 ends up containing the merged dict.
746

747
        Args:
748
            user_info1 (dict): nested dict
749
            user_info2 (dict): nested dict
750

751
            Example:
752
            {username: {'email': 'abc@email.com'}}
753

754
        Returns:
755
            None
756
        """
757
        user_info2.update(user_info1)
1✔
758

759
    def sync_two_phsids_dict(
1✔
760
        self,
761
        phsids1,
762
        phsids2,
763
        source1=None,
764
        source2=None,
765
        phsids2_overrides_phsids1=True,
766
    ):
767
        """
768
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
769
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
770
        the merged dict (see explanation below).
771
        `source1` and `source2`: for logging.
772

773
        Args:
774
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
775

776
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
777

778
            Example:
779
            {
780
                username: {
781
                    phsid1: {'read-storage','write-storage'},
782
                    phsid2: {'read-storage'},
783
                }
784
            }
785

786
        Return:
787
            None
788

789
        Explanation:
790
            Consider merging projects of the same user:
791

792
                {user1: {phsid1: privillege1}}
793

794
                {user1: {phsid2: privillege2}}
795

796
            case 1: phsid1 != phsid2. Output:
797

798
                {user1: {phsid1: privillege1, phsid2: privillege2}}
799

800
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
801

802
                {user1: {phsid1: union(privillege1, privillege2)}}
803

804
            For the other cases, just simple addition
805
        """
806

807
        for user, projects1 in phsids1.items():
1✔
808
            if not phsids2.get(user):
1✔
809
                if source1:
1✔
810
                    self.auth_source[user].add(source1)
1✔
811
                phsids2[user] = projects1
1✔
812
            elif phsids2_overrides_phsids1:
1✔
813
                if source1:
1✔
814
                    self.auth_source[user].add(source1)
×
815
                if source2:
1✔
816
                    self.auth_source[user].add(source2)
×
817
                for phsid1, privilege1 in projects1.items():
1✔
818
                    if phsid1 not in phsids2[user]:
1✔
819
                        phsids2[user][phsid1] = set()
1✔
820
                    phsids2[user][phsid1].update(privilege1)
1✔
821
            elif source2:
×
822
                self.auth_source[user].add(source2)
×
823

824
    def sync_to_db_and_storage_backend(
1✔
825
        self,
826
        user_project,
827
        user_info,
828
        sess,
829
        do_not_revoke_from_db_and_storage=False,
830
        expires=None,
831
    ):
832
        """
833
        sync user access control to database and storage backend
834

835
        Args:
836
            user_project (dict): a dictionary of
837

838
                {
839
                    username: {
840
                        'project1': {'read-storage','write-storage'},
841
                        'project2': {'read-storage'}
842
                    }
843
                }
844

845
            user_info (dict): a dictionary of {username: user_info{}}
846
            sess: a sqlalchemy session
847

848
        Return:
849
            None
850
        """
851
        google_bulk_mapping = None
1✔
852
        if config["GOOGLE_BULK_UPDATES"]:
1✔
853
            google_bulk_mapping = {}
1✔
854

855
        self._init_projects(user_project, sess)
1✔
856

857
        auth_provider_list = [
1✔
858
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
859
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
860
        ]
861

862
        cur_db_user_project_list = {
1✔
863
            (ua.user.username.lower(), ua.project.auth_id)
864
            for ua in sess.query(AccessPrivilege).all()
865
        }
866

867
        # we need to compare db -> whitelist case-insensitively for username.
868
        # db stores case-sensitively, but we need to query case-insensitively
869
        user_project_lowercase = {}
1✔
870
        syncing_user_project_list = set()
1✔
871
        for username, projects in user_project.items():
1✔
872
            user_project_lowercase[username.lower()] = projects
1✔
873
            for project, _ in projects.items():
1✔
874
                syncing_user_project_list.add((username.lower(), project))
1✔
875

876
        user_info_lowercase = {
1✔
877
            username.lower(): info for username, info in user_info.items()
878
        }
879

880
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
881
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
882
        to_update = set.intersection(
1✔
883
            cur_db_user_project_list, syncing_user_project_list
884
        )
885

886
        # when updating users we want to maintain case sesitivity in the username so
887
        # pass the original, non-lowered user_info dict
888
        self._upsert_userinfo(sess, user_info)
1✔
889

890
        if not do_not_revoke_from_db_and_storage:
1✔
891
            self._revoke_from_storage(
1✔
892
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
893
            )
894
            self._revoke_from_db(sess, to_delete)
1✔
895

896
        self._grant_from_storage(
1✔
897
            to_add,
898
            user_project_lowercase,
899
            sess,
900
            google_bulk_mapping=google_bulk_mapping,
901
            expires=expires,
902
        )
903

904
        self._grant_from_db(
1✔
905
            sess,
906
            to_add,
907
            user_info_lowercase,
908
            user_project_lowercase,
909
            auth_provider_list,
910
        )
911

912
        # re-grant
913
        self._grant_from_storage(
1✔
914
            to_update,
915
            user_project_lowercase,
916
            sess,
917
            google_bulk_mapping=google_bulk_mapping,
918
            expires=expires,
919
        )
920
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
921

922
        if not do_not_revoke_from_db_and_storage:
1✔
923
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
924

925
        sess.commit()
1✔
926

927
        if config["GOOGLE_BULK_UPDATES"]:
1✔
928
            self.logger.info("Doing bulk Google update...")
1✔
929
            update_google_groups_for_users(google_bulk_mapping)
1✔
930
            self.logger.info("Bulk Google update done!")
×
931

932
        sess.commit()
1✔
933

934
    def sync_to_storage_backend(
1✔
935
        self, user_project, user_info, sess, expires, skip_google_updates=False
936
    ):
937
        """
938
        sync user access control to storage backend with given expiration
939

940
        Args:
941
            user_project (dict): a dictionary of
942

943
                {
944
                    username: {
945
                        'project1': {'read-storage','write-storage'},
946
                        'project2': {'read-storage'}
947
                    }
948
                }
949

950
            user_info (dict): a dictionary of attributes for a user.
951
            sess: a sqlalchemy session
952
            expires (int): time at which synced Arborist policies and
953
                   inclusion in any GBAG are set to expire
954
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
955
        Return:
956
            None
957
        """
958
        if not expires:
1✔
959
            raise Exception(
×
960
                f"sync to storage backend requires an expiration. you provided: {expires}"
961
            )
962

963
        google_group_user_mapping = None
1✔
964
        if config["GOOGLE_BULK_UPDATES"]:
1✔
965
            google_group_user_mapping = {}
×
966
            get_or_create_proxy_group_id(
×
967
                expires=expires,
968
                user_id=user_info["user_id"],
969
                username=user_info["username"],
970
                session=sess,
971
                storage_manager=self.storage_manager,
972
            )
973

974
        # TODO: eventually it'd be nice to remove this step but it's required
975
        #       so that grant_from_storage can determine what storage backends
976
        #       are needed for a project.
977
        self._init_projects(user_project, sess)
1✔
978

979
        # we need to compare db -> whitelist case-insensitively for username.
980
        # db stores case-sensitively, but we need to query case-insensitively
981
        user_project_lowercase = {}
1✔
982
        syncing_user_project_list = set()
1✔
983
        for username, projects in user_project.items():
1✔
984
            user_project_lowercase[username.lower()] = projects
1✔
985
            for project, _ in projects.items():
1✔
986
                syncing_user_project_list.add((username.lower(), project))
1✔
987

988
        to_add = set(syncing_user_project_list)
1✔
989

990
        # when updating users we want to maintain case sensitivity in the username so
991
        # pass the original, non-lowered user_info dict
992
        self._upsert_userinfo(sess, {user_info["username"].lower(): user_info})
1✔
993
        if not skip_google_updates:
1✔
994
            self._grant_from_storage(
1✔
995
                to_add,
996
                user_project_lowercase,
997
                sess,
998
                google_bulk_mapping=google_group_user_mapping,
999
                expires=expires,
1000
            )
1001

1002
            if config["GOOGLE_BULK_UPDATES"]:
1✔
1003
                self.logger.info("Updating user's google groups ...")
×
1004
                update_google_groups_for_users(google_group_user_mapping)
×
1005
                self.logger.info("Google groups update done!!")
×
1006

1007
        sess.commit()
1✔
1008

1009
    def _revoke_from_db(self, sess, to_delete):
1✔
1010
        """
1011
        Revoke user access to projects in the auth database
1012

1013
        Args:
1014
            sess: sqlalchemy session
1015
            to_delete: a set of (username, project.auth_id) to be revoked from db
1016
        Return:
1017
            None
1018
        """
1019
        for username, project_auth_id in to_delete:
1✔
1020
            q = (
1✔
1021
                sess.query(AccessPrivilege)
1022
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1023
                .join(AccessPrivilege.user)
1024
                .filter(func.lower(User.username) == username)
1025
                .all()
1026
            )
1027
            for access in q:
1✔
1028
                self.logger.info(
1✔
1029
                    "revoke {} access to {} in db".format(username, project_auth_id)
1030
                )
1031
                sess.delete(access)
1✔
1032

1033
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1034
        """
1035
        Make sure there is no admin user that is not in yaml/csv files
1036

1037
        Args:
1038
            sess: sqlalchemy session
1039
            user_info: a dict of
1040
            {
1041
                username: {
1042
                    'email': email,
1043
                    'display_name': display_name,
1044
                    'phone_number': phonenum,
1045
                    'tags': {'k1':'v1', 'k2': 'v2'}
1046
                    'admin': is_admin
1047
                }
1048
            }
1049
        Returns:
1050
            None
1051
        """
1052
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1053
            if admin_user.username.lower() not in user_info:
1✔
1054
                admin_user.is_admin = False
×
1055
                sess.add(admin_user)
×
1056
                self.logger.info(
×
1057
                    "remove admin access from {} in db".format(
1058
                        admin_user.username.lower()
1059
                    )
1060
                )
1061

1062
    def _update_from_db(self, sess, to_update, user_project):
1✔
1063
        """
1064
        Update user access to projects in the auth database
1065

1066
        Args:
1067
            sess: sqlalchemy session
1068
            to_update:
1069
                a set of (username, project.auth_id) to be updated from db
1070

1071
        Return:
1072
            None
1073
        """
1074

1075
        for username, project_auth_id in to_update:
1✔
1076
            q = (
1✔
1077
                sess.query(AccessPrivilege)
1078
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1079
                .join(AccessPrivilege.user)
1080
                .filter(func.lower(User.username) == username)
1081
                .all()
1082
            )
1083
            for access in q:
1✔
1084
                access.privilege = user_project[username][project_auth_id]
1✔
1085
                self.logger.info(
1✔
1086
                    "update {} with {} access to {} in db".format(
1087
                        username, access.privilege, project_auth_id
1088
                    )
1089
                )
1090

1091
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1092
        """
1093
        Grant user access to projects in the auth database
1094
        Args:
1095
            sess: sqlalchemy session
1096
            to_add: a set of (username, project.auth_id) to be granted
1097
            user_project:
1098
                a dictionary of {username: {project: {'read','write'}}
1099
        Return:
1100
            None
1101
        """
1102
        for username, project_auth_id in to_add:
1✔
1103
            u = query_for_user(session=sess, username=username)
1✔
1104

1105
            auth_provider = auth_provider_list[0]
1✔
1106
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1107
                auth_provider = auth_provider_list[1]
1✔
1108
            user_access = AccessPrivilege(
1✔
1109
                user=u,
1110
                project=self._projects[project_auth_id],
1111
                privilege=list(user_project[username][project_auth_id]),
1112
                auth_provider=auth_provider,
1113
            )
1114
            self.logger.info(
1✔
1115
                "grant user {} to {} with access {}".format(
1116
                    username, user_access.project, user_access.privilege
1117
                )
1118
            )
1119
            sess.add(user_access)
1✔
1120

1121
    def _upsert_userinfo(self, sess, user_info):
1✔
1122
        """
1123
        update user info to database.
1124

1125
        Args:
1126
            sess: sqlalchemy session
1127
            user_info:
1128
                a dict of {username: {display_name, phone_number, tags, admin}
1129

1130
        Return:
1131
            None
1132
        """
1133

1134
        for username in user_info:
1✔
1135
            u = query_for_user(session=sess, username=username)
1✔
1136

1137
            if u is None:
1✔
1138
                self.logger.info("create user {}".format(username))
1✔
1139
                u = User(username=username)
1✔
1140
                sess.add(u)
1✔
1141

1142
            if self.arborist_client:
1✔
1143
                self.arborist_client.create_user({"name": username})
1✔
1144

1145
            u.email = user_info[username].get("email", "")
1✔
1146
            u.display_name = user_info[username].get("display_name", "")
1✔
1147
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1148
            u.is_admin = user_info[username].get("admin", False)
1✔
1149

1150
            idp_name = user_info[username].get("idp_name", "")
1✔
1151
            if idp_name and not u.identity_provider:
1✔
1152
                idp = (
×
1153
                    sess.query(IdentityProvider)
1154
                    .filter(IdentityProvider.name == idp_name)
1155
                    .first()
1156
                )
1157
                if not idp:
×
1158
                    idp = IdentityProvider(name=idp_name)
×
1159
                u.identity_provider = idp
×
1160

1161
            # do not update if there is no tag
1162
            if not user_info[username].get("tags"):
1✔
1163
                continue
1✔
1164

1165
            # remove user db tags if they are not shown in new tags
1166
            for tag in u.tags:
1✔
1167
                if tag.key not in user_info[username]["tags"]:
1✔
1168
                    u.tags.remove(tag)
1✔
1169

1170
            # sync
1171
            for k, v in user_info[username]["tags"].items():
1✔
1172
                found = False
1✔
1173
                for tag in u.tags:
1✔
1174
                    if tag.key == k:
1✔
1175
                        found = True
1✔
1176
                        tag.value = v
1✔
1177
                # create new tag if not found
1178
                if not found:
1✔
1179
                    tag = Tag(key=k, value=v)
1✔
1180
                    u.tags.append(tag)
1✔
1181

1182
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1183
        """
1184
        If a project have storage backend, revoke user's access to buckets in
1185
        the storage backend.
1186

1187
        Args:
1188
            to_delete: a set of (username, project.auth_id) to be revoked
1189

1190
        Return:
1191
            None
1192
        """
1193
        for username, project_auth_id in to_delete:
1✔
1194
            project = (
1✔
1195
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1196
            )
1197
            for sa in project.storage_access:
1✔
1198
                if not hasattr(self, "storage_manager"):
1✔
1199
                    self.logger.error(
×
1200
                        (
1201
                            "CANNOT revoke {} access to {} in {} because there is NO "
1202
                            "configured storage accesses at all. See configuration. "
1203
                            "Continuing anyway..."
1204
                        ).format(username, project_auth_id, sa.provider.name)
1205
                    )
1206
                    continue
×
1207

1208
                self.logger.info(
1✔
1209
                    "revoke {} access to {} in {}".format(
1210
                        username, project_auth_id, sa.provider.name
1211
                    )
1212
                )
1213
                self.storage_manager.revoke_access(
1✔
1214
                    provider=sa.provider.name,
1215
                    username=username,
1216
                    project=project,
1217
                    session=sess,
1218
                    google_bulk_mapping=google_bulk_mapping,
1219
                )
1220

1221
    def _grant_from_storage(
1✔
1222
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1223
    ):
1224
        """
1225
        If a project have storage backend, grant user's access to buckets in
1226
        the storage backend.
1227

1228
        Args:
1229
            to_add: a set of (username, project.auth_id)  to be granted
1230
            user_project: a dictionary like:
1231

1232
                    {username: {phsid: {'read-storage','write-storage'}}}
1233

1234
        Return:
1235
            dict of the users' storage usernames to their user_projects and the respective storage access.
1236
        """
1237
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1238
        for username, project_auth_id in to_add:
1✔
1239
            project = self._projects[project_auth_id]
1✔
1240
            for sa in project.storage_access:
1✔
1241
                access = list(user_project[username][project_auth_id])
1✔
1242
                if not hasattr(self, "storage_manager"):
1✔
1243
                    self.logger.error(
×
1244
                        (
1245
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1246
                            "configured storage accesses at all. See configuration. "
1247
                            "Continuing anyway..."
1248
                        ).format(username, access, project_auth_id, sa.provider.name)
1249
                    )
1250
                    continue
×
1251

1252
                self.logger.info(
1✔
1253
                    "grant {} access {} to {} in {}".format(
1254
                        username, access, project_auth_id, sa.provider.name
1255
                    )
1256
                )
1257
                storage_username = self.storage_manager.grant_access(
1✔
1258
                    provider=sa.provider.name,
1259
                    username=username,
1260
                    project=project,
1261
                    access=access,
1262
                    session=sess,
1263
                    google_bulk_mapping=google_bulk_mapping,
1264
                    expires=expires,
1265
                )
1266

1267
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1268
        return storage_user_to_sa_and_user_project
1✔
1269

1270
    def _init_projects(self, user_project, sess):
1✔
1271
        """
1272
        initialize projects
1273
        """
1274
        if self.project_mapping:
1✔
1275
            for projects in list(self.project_mapping.values()):
1✔
1276
                for p in projects:
1✔
1277
                    self.logger.debug(
1✔
1278
                        "creating Project with info from project_mapping: {}".format(p)
1279
                    )
1280
                    project = self._get_or_create(sess, Project, **p)
1✔
1281
                    self._projects[p["auth_id"]] = project
1✔
1282
        for _, projects in user_project.items():
1✔
1283
            for auth_id in list(projects.keys()):
1✔
1284
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1285
                if not project:
1✔
1286
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1287
                    try:
1✔
1288
                        project = self._get_or_create(sess, Project, **data)
1✔
1289
                    except IntegrityError as e:
×
1290
                        sess.rollback()
×
1291
                        self.logger.error(
×
1292
                            f"Project {auth_id} already exists. Detail {str(e)}"
1293
                        )
1294
                        raise Exception(
×
1295
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1296
                                auth_id, str(e)
1297
                            )
1298
                        )
1299
                if auth_id not in self._projects:
1✔
1300
                    self._projects[auth_id] = project
1✔
1301

1302
    @staticmethod
1✔
1303
    def _get_or_create(sess, model, **kwargs):
1✔
1304
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1305
        if not instance:
1✔
1306
            instance = model(**kwargs)
1✔
1307
            sess.add(instance)
1✔
1308
        return instance
1✔
1309

1310
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1311
        """
1312
        Args:
1313
            dbgap_config : a dictionary containing information about a single
1314
                           dbgap sftp server (from fence config)
1315
            sess: database session
1316

1317
        Return:
1318
            user_projects (dict)
1319
            user_info (dict)
1320
        """
1321
        dbgap_file_list = []
1✔
1322
        hostname = dbgap_config["info"]["host"]
1✔
1323
        username = dbgap_config["info"]["username"]
1✔
1324
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1325
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1326

1327
        try:
1✔
1328
            if os.path.exists(folderdir):
1✔
1329
                dbgap_file_list = glob.glob(
×
1330
                    os.path.join(folderdir, "*")
1331
                )  # get lists of file from folder
1332
            else:
1333
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1334
                dbgap_file_list = self._download(dbgap_config)
1✔
1335
        except Exception as e:
1✔
1336
            self.logger.error(e)
1✔
1337
            exit(1)
1✔
1338
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1339
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1340
            dbgap_file_list,
1341
            encrypted=encrypted,
1342
            session=sess,
1343
            dbgap_config=dbgap_config,
1344
        )
1345

1346
        user_projects = self.parse_projects(user_projects)
×
1347
        return user_projects, user_info
×
1348

1349
    def _get_user_permissions_from_csv_list(
1✔
1350
        self, file_list, encrypted, session, dbgap_config={}
1351
    ):
1352
        """
1353
        Args:
1354
            file_list: list of files (represented as strings)
1355
            encrypted: boolean indicating whether those files are encrypted
1356
            session: sqlalchemy session
1357
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1358
                    (comes from fence config)
1359

1360
        Return:
1361
            user_projects (dict)
1362
            user_info (dict)
1363
        """
1364
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1365
        user_projects, user_info = self._parse_csv(
1✔
1366
            dict(list(zip(file_list, permissions))),
1367
            sess=session,
1368
            dbgap_config=dbgap_config,
1369
            encrypted=encrypted,
1370
        )
1371
        return user_projects, user_info
1✔
1372

1373
    def _merge_multiple_local_csv_files(
1✔
1374
        self, dbgap_file_list, encrypted, dbgap_configs, session
1375
    ):
1376
        """
1377
        Args:
1378
            dbgap_file_list (list): a list of whitelist file locations stored locally
1379
            encrypted (bool): whether the file is encrypted (comes from fence config)
1380
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1381
            session (sqlalchemy.Session): database session
1382

1383
        Return:
1384
            merged_user_projects (dict)
1385
            merged_user_info (dict)
1386
        """
1387
        merged_user_projects = {}
1✔
1388
        merged_user_info = {}
1✔
1389

1390
        for dbgap_config in dbgap_configs:
1✔
1391
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1392
                dbgap_file_list,
1393
                encrypted,
1394
                session=session,
1395
                dbgap_config=dbgap_config,
1396
            )
1397
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1398
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1399
        return merged_user_projects, merged_user_info
1✔
1400

1401
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1402
        """
1403
        Args:
1404
            dbgap_servers : a list of dictionaries each containging config on
1405
                           dbgap sftp server (comes from fence config)
1406
            sess: database session
1407

1408
        Return:
1409
            merged_user_projects (dict)
1410
            merged_user_info (dict)
1411
        """
1412
        merged_user_projects = {}
1✔
1413
        merged_user_info = {}
1✔
1414
        for dbgap in dbgap_servers:
1✔
1415
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1416
            # merge into merged_user_info
1417
            # user_info overrides original info in merged_user_info
1418
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1419

1420
            # merge all access info dicts into "merged_user_projects".
1421
            # the access info is combined - if the user_projects access is
1422
            # ["read"] and the merged_user_projects is ["read-storage"], the
1423
            # resulting access is ["read", "read-storage"].
1424
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1425
        return merged_user_projects, merged_user_info
1✔
1426

1427
    def parse_projects(self, user_projects):
1✔
1428
        """
1429
        helper function for parsing projects
1430
        """
1431
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1432

1433
    def _process_dbgap_project(
1✔
1434
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1435
    ):
1436
        if dbgap_project not in self.project_mapping:
1✔
1437
            self._add_dbgap_project_for_user(
1✔
1438
                dbgap_project,
1439
                privileges,
1440
                username,
1441
                sess,
1442
                user_projects,
1443
                dbgap_config,
1444
            )
1445

1446
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1447
            try:
1✔
1448
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1449

1450
                # need to add dbgap project to arborist
1451
                if self.arborist_client:
1✔
1452
                    self._determine_arborist_resource(
1✔
1453
                        element_dict["auth_id"], dbgap_config
1454
                    )
1455

1456
                if username not in user_projects:
1✔
1457
                    user_projects[username] = {}
1✔
1458
                user_projects[username].update(phsid_privileges)
1✔
1459

1460
            except ValueError as e:
×
1461
                self.logger.info(e)
×
1462

1463
    def _process_user_projects(
1✔
1464
        self,
1465
        user_projects,
1466
        enable_common_exchange_area_access,
1467
        study_common_exchange_areas,
1468
        dbgap_config,
1469
        sess,
1470
    ):
1471
        user_projects_to_modify = copy.deepcopy(user_projects)
1✔
1472
        for username in user_projects.keys():
1✔
1473
            for project in user_projects[username].keys():
1✔
1474
                phsid = project.split(".")
1✔
1475
                dbgap_project = phsid[0]
1✔
1476
                privileges = user_projects[username][project]
1✔
1477
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1478
                    consent_code = phsid[-1]
1✔
1479

1480
                    # c999 indicates full access to all consents and access
1481
                    # to a study-specific exchange area
1482
                    # access to at least one study-specific exchange area implies access
1483
                    # to the parent study's common exchange area
1484
                    #
1485
                    # NOTE: Handling giving access to all consents is done at
1486
                    #       a later time, when we have full information about possible
1487
                    #       consents
1488
                    self.logger.debug(
1✔
1489
                        f"got consent code {consent_code} from dbGaP project "
1490
                        f"{dbgap_project}"
1491
                    )
1492
                    if (
1✔
1493
                        consent_code == "c999"
1494
                        and enable_common_exchange_area_access
1495
                        and dbgap_project in study_common_exchange_areas
1496
                    ):
1497
                        self.logger.info(
1✔
1498
                            "found study with consent c999 and Fence "
1499
                            "is configured to parse exchange area data. Giving user "
1500
                            f"{username} {privileges} privileges in project: "
1501
                            f"{study_common_exchange_areas[dbgap_project]}."
1502
                        )
1503
                        self._add_dbgap_project_for_user(
1✔
1504
                            study_common_exchange_areas[dbgap_project],
1505
                            privileges,
1506
                            username,
1507
                            sess,
1508
                            user_projects_to_modify,
1509
                            dbgap_config,
1510
                        )
1511

1512
                    dbgap_project += "." + consent_code
1✔
1513

1514
                self._process_dbgap_project(
1✔
1515
                    dbgap_project,
1516
                    privileges,
1517
                    username,
1518
                    sess,
1519
                    user_projects_to_modify,
1520
                    dbgap_config,
1521
                )
1522
        for user in user_projects_to_modify.keys():
1✔
1523
            user_projects[user] = user_projects_to_modify[user]
1✔
1524

1525
    def sync(self):
1✔
1526
        if self.session:
1✔
1527
            self._sync(self.session)
1✔
1528
        else:
1529
            with self.driver.session as s:
×
1530
                self._sync(s)
×
1531

1532
    def download(self):
1✔
1533
        for dbgap_server in self.dbGaP:
×
1534
            self._download(dbgap_server)
×
1535

1536
    def _download(self, dbgap_config):
1✔
1537
        """
1538
        Download files from dbgap server.
1539
        """
1540
        server = dbgap_config["info"]
1✔
1541
        protocol = dbgap_config["protocol"]
1✔
1542
        hostname = server["host"]
1✔
1543
        username = server["username"]
1✔
1544
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1545

1546
        if not os.path.exists(folderdir):
1✔
1547
            os.makedirs(folderdir)
1✔
1548

1549
        self.logger.info("Download from server")
1✔
1550
        try:
1✔
1551
            if protocol == "sftp":
1✔
1552
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1553
            else:
1554
                self._get_from_ftp_with_proxy(server, folderdir)
×
1555
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1556
            return dbgap_files
×
1557
        except Exception as e:
1✔
1558
            self.logger.error(e)
1✔
1559
            raise
1✔
1560

1561
    def _sync(self, sess):
1✔
1562
        """
1563
        Collect files from dbgap server(s), sync csv and yaml files to storage
1564
        backend and fence DB
1565
        """
1566

1567
        # get all dbgap files
1568
        user_projects = {}
1✔
1569
        user_info = {}
1✔
1570
        if self.is_sync_from_dbgap_server:
1✔
1571
            self.logger.debug(
1✔
1572
                "Pulling telemetry files from {} dbgap sftp servers".format(
1573
                    len(self.dbGaP)
1574
                )
1575
            )
1576
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1577

1578
        local_csv_file_list = []
1✔
1579
        if self.sync_from_local_csv_dir:
1✔
1580
            local_csv_file_list = glob.glob(
1✔
1581
                os.path.join(self.sync_from_local_csv_dir, "*")
1582
            )
1583
            # Sort the list so the order of of files is consistent across platforms
1584
            local_csv_file_list.sort()
1✔
1585

1586
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1587
            local_csv_file_list,
1588
            encrypted=False,
1589
            session=sess,
1590
            dbgap_configs=self.dbGaP,
1591
        )
1592

1593
        try:
1✔
1594
            user_yaml = UserYAML.from_file(
1✔
1595
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1596
            )
1597
        except (EnvironmentError, AssertionError) as e:
1✔
1598
            self.logger.error(str(e))
1✔
1599
            self.logger.error("aborting early")
1✔
1600
            raise
1✔
1601

1602
        # parse all projects
1603
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1604
        user_projects = self.parse_projects(user_projects)
1✔
1605
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1606

1607
        # merge all user info dicts into "user_info".
1608
        # the user info (such as email) in the user.yaml files
1609
        # overrides the user info from the CSV files.
1610
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1611
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1612

1613
        # merge all access info dicts into "user_projects".
1614
        # the access info is combined - if the user.yaml access is
1615
        # ["read"] and the CSV file access is ["read-storage"], the
1616
        # resulting access is ["read", "read-storage"].
1617
        self.sync_two_phsids_dict(
1✔
1618
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1619
        )
1620
        self.sync_two_phsids_dict(
1✔
1621
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1622
        )
1623

1624
        # Note: if there are multiple dbgap sftp servers configured
1625
        # this parameter is always from the config for the first dbgap sftp server
1626
        # not any additional ones
1627
        for dbgap_config in self.dbGaP:
1✔
1628
            if self._get_parse_consent_code(dbgap_config):
1✔
1629
                self._grant_all_consents_to_c999_users(
1✔
1630
                    user_projects, user_yaml.project_to_resource
1631
                )
1632

1633
        google_update_ex = None
1✔
1634

1635
        try:
1✔
1636
            # update the Fence DB
1637
            if user_projects:
1✔
1638
                self.logger.info("Sync to db and storage backend")
1✔
1639
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1640
                self.logger.info("Finish syncing to db and storage backend")
1✔
1641
            else:
1642
                self.logger.info("No users for syncing")
×
1643
        except GoogleUpdateException as ex:
1✔
1644
            # save this to reraise later after all non-Google syncing has finished
1645
            # this way, any issues with Google only affect Google data access and don't
1646
            # cascade problems into non-Google AWS or Azure access
1647
            google_update_ex = ex
1✔
1648

1649
        # update the Arborist DB (resources, roles, policies, groups)
1650
        if user_yaml.authz:
1✔
1651
            if not self.arborist_client:
1✔
1652
                raise EnvironmentError(
×
1653
                    "yaml file contains authz section but sync is not configured with"
1654
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1655
                )
1656
            self.logger.info("Synchronizing arborist...")
1✔
1657
            success = self._update_arborist(sess, user_yaml)
1✔
1658
            if success:
1✔
1659
                self.logger.info("Finished synchronizing arborist")
1✔
1660
            else:
1661
                self.logger.error("Could not synchronize successfully")
×
1662
                exit(1)
×
1663
        else:
1664
            self.logger.info("No `authz` section; skipping arborist sync")
×
1665

1666
        # update the Arborist DB (user access)
1667
        if self.arborist_client:
1✔
1668
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1669
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1670
            if success:
1✔
1671
                self.logger.info(
1✔
1672
                    "Finished synchronizing authorization info to arborist"
1673
                )
1674
            else:
1675
                self.logger.error(
×
1676
                    "Could not synchronize authorization info successfully to arborist"
1677
                )
1678
                exit(1)
×
1679
        else:
1680
            self.logger.error("No arborist client set; skipping arborist sync")
×
1681

1682
        # Logging authz source
1683
        for u, s in self.auth_source.items():
1✔
1684
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1685

1686
        self.logger.info(
1✔
1687
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1688
        )
1689
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1690
        if google_update_ex is not None:
1✔
1691
            raise google_update_ex
1✔
1692

1693
    def _grant_all_consents_to_c999_users(
1✔
1694
        self, user_projects, user_yaml_project_to_resources
1695
    ):
1696
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1697
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1698
        # are in either)
1699
        all_projects = set(
1✔
1700
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1701
        )
1702

1703
        self.logger.debug(f"all projects: {all_projects}")
1✔
1704

1705
        # construct a mapping from phsid (without consent) to all accessions with consent
1706
        consent_mapping = {}
1✔
1707
        for project in all_projects:
1✔
1708
            phs_match = access_number_matcher.match(project)
1✔
1709
            if phs_match:
1✔
1710
                accession_number = phs_match.groupdict()
1✔
1711

1712
                # TODO: This is not handling the .v1.p1 at all
1713
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1714
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1715
                )
1716
                children = self._get_children(accession_number["phsid"])
1✔
1717
                if children:
1✔
1718
                    for child_phs in children:
1✔
1719
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1720
                            ".".join(
1721
                                [child_phs, accession_number["consent"]]
1722
                            )  # Assign parent consent to child study
1723
                        )
1724

1725
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1726

1727
        # go through existing access and find any c999's and make sure to give access to
1728
        # all accessions with consent for that phsid
1729
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1730
            for project, _ in user_project_info.items():
1✔
1731
                phs_match = access_number_matcher.match(project)
1✔
1732
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1733
                    # give access to all consents
1734
                    all_phsids_with_consent = consent_mapping.get(
1✔
1735
                        phs_match.groupdict()["phsid"], []
1736
                    )
1737
                    self.logger.info(
1✔
1738
                        f"user {username} has c999 consent group for: {project}. "
1739
                        f"Granting access to all consents: {all_phsids_with_consent}"
1740
                    )
1741
                    # NOTE: Only giving read-storage at the moment (this is same
1742
                    #       permission we give for other dbgap projects)
1743
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1744
                        user_projects[username].update(
1✔
1745
                            {phsid_with_consent: {"read-storage", "read"}}
1746
                        )
1747

1748
    def _update_arborist(self, session, user_yaml):
1✔
1749
        """
1750
        Create roles, resources, policies, groups in arborist from the information in
1751
        ``user_yaml``.
1752

1753
        The projects are sent to arborist as resources with paths like
1754
        ``/projects/{project}``. Roles are created with just the original names
1755
        for the privileges like ``"read-storage", "read"`` etc.
1756

1757
        Args:
1758
            session (sqlalchemy.Session)
1759
            user_yaml (UserYAML)
1760

1761
        Return:
1762
            bool: success
1763
        """
1764
        healthy = self._is_arborist_healthy()
1✔
1765
        if not healthy:
1✔
1766
            return False
×
1767

1768
        # Set up the resource tree in arborist by combining provided resources with any
1769
        # dbgap resources that were created before this.
1770
        #
1771
        # Why add dbgap resources if they've already been created?
1772
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1773
        #   resources was created under `/programs/phs000178` anything provided in
1774
        #   user.yaml under `/programs` would completely wipe it out.
1775
        resources = user_yaml.authz.get("resources", [])
1✔
1776

1777
        dbgap_resource_paths = []
1✔
1778
        for path_list in self._dbgap_study_to_resources.values():
1✔
1779
            dbgap_resource_paths.extend(path_list)
1✔
1780

1781
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1782
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1783

1784
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1785
            resources, dbgap_resource_paths
1786
        )
1787

1788
        for resource in combined_resources:
1✔
1789
            try:
1✔
1790
                self.logger.debug(
1✔
1791
                    "attempting to update arborist resource: {}".format(resource)
1792
                )
1793
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1794
            except ArboristError as e:
×
1795
                self.logger.error(e)
×
1796
                # keep going; maybe just some conflicts from things existing already
1797

1798
        # update roles
1799
        roles = user_yaml.authz.get("roles", [])
1✔
1800
        for role in roles:
1✔
1801
            try:
1✔
1802
                response = self.arborist_client.update_role(role["id"], role)
1✔
1803
                if response:
1✔
1804
                    self._created_roles.add(role["id"])
1✔
1805
            except ArboristError as e:
×
1806
                self.logger.info(
×
1807
                    "couldn't update role '{}', creating instead".format(str(e))
1808
                )
1809
                try:
×
1810
                    response = self.arborist_client.create_role(role)
×
1811
                    if response:
×
1812
                        self._created_roles.add(role["id"])
×
1813
                except ArboristError as e:
×
1814
                    self.logger.error(e)
×
1815
                    # keep going; maybe just some conflicts from things existing already
1816

1817
        # update policies
1818
        policies = user_yaml.authz.get("policies", [])
1✔
1819
        for policy in policies:
1✔
1820
            policy_id = policy.pop("id")
1✔
1821
            try:
1✔
1822
                self.logger.debug(
1✔
1823
                    "Trying to upsert policy with id {}".format(policy_id)
1824
                )
1825
                response = self.arborist_client.update_policy(
1✔
1826
                    policy_id, policy, create_if_not_exist=True
1827
                )
1828
            except ArboristError as e:
×
1829
                self.logger.error(e)
×
1830
                # keep going; maybe just some conflicts from things existing already
1831
            else:
1832
                if response:
1✔
1833
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1834
                    self._created_policies.add(policy_id)
1✔
1835

1836
        # update groups
1837
        groups = user_yaml.authz.get("groups", [])
1✔
1838

1839
        # delete from arborist the groups that have been deleted
1840
        # from the user.yaml
1841
        arborist_groups = set(
1✔
1842
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1843
        )
1844
        useryaml_groups = set(g["name"] for g in groups)
1✔
1845
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1846
            # do not try to delete built in groups
1847
            if deleted_group not in ["anonymous", "logged-in"]:
×
1848
                self.arborist_client.delete_group(deleted_group)
×
1849

1850
        # create/update the groups defined in the user.yaml
1851
        for group in groups:
1✔
1852
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1853
            if missing:
×
1854
                name = group.get("name", "{MISSING NAME}")
×
1855
                self.logger.error(
×
1856
                    "group {} missing required field(s): {}".format(name, list(missing))
1857
                )
1858
                continue
×
1859
            try:
×
1860
                response = self.arborist_client.put_group(
×
1861
                    group["name"],
1862
                    # Arborist doesn't handle group descriptions yet
1863
                    # description=group.get("description", ""),
1864
                    users=group["users"],
1865
                    policies=group["policies"],
1866
                )
1867
            except ArboristError as e:
×
1868
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1869

1870
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1871

1872
        # First recreate these groups in order to clear out old, possibly deleted policies
1873
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1874
            try:
1✔
1875
                response = self.arborist_client.put_group(builtin_group)
1✔
1876
            except ArboristError as e:
×
1877
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1878

1879
        # Now add back policies that are in the user.yaml
1880
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1881
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1882

1883
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1884
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1885

1886
        return True
1✔
1887

1888
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1889
        """
1890
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1891
        mfa_policy after revoking all policies.
1892
        """
1893
        user_data_from_arborist = None
1✔
1894
        try:
1✔
1895
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1896
        except ArboristError:
×
1897
            # user doesn't exist in Arborist, nothing to revoke
1898
            return
×
1899

1900
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1901
            idp, {}
1902
        )
1903
        if not is_mfa_enabled:
1✔
1904
            # TODO This should be a diff, not a revocation of all policies.
1905
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1906
            return
1✔
1907

1908
        policies = []
1✔
1909
        try:
1✔
1910
            policies = user_data_from_arborist["policies"]
1✔
1911
        except Exception as e:
×
1912
            self.logger.error(
×
1913
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1914
            )
1915
        finally:
1916
            # TODO This should be a diff, not a revocation of all policies.
1917
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1918

1919
        if "mfa_policy" in policies:
1✔
1920
            status_code = self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1921

1922
    def _update_authz_in_arborist(
1✔
1923
        self,
1924
        session,
1925
        user_projects,
1926
        user_yaml=None,
1927
        single_user_sync=False,
1928
        expires=None,
1929
    ):
1930
        """
1931
        Assign users policies in arborist from the information in
1932
        ``user_projects`` and optionally a ``user_yaml``.
1933

1934
        The projects are sent to arborist as resources with paths like
1935
        ``/projects/{project}``. Roles are created with just the original names
1936
        for the privileges like ``"read-storage", "read"`` etc.
1937

1938
        Args:
1939
            user_projects (dict)
1940
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
1941
            single_user_sync (bool) whether authz update is for a single user
1942
            expires (int) time at which authz info in Arborist should expire
1943

1944
        Return:
1945
            bool: success
1946
        """
1947
        healthy = self._is_arborist_healthy()
1✔
1948
        if not healthy:
1✔
1949
            return False
×
1950

1951
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
1952

1953
        if user_yaml:
1✔
1954
            self.logger.debug(
1✔
1955
                "useryaml abac before lowering usernames: {}".format(
1956
                    user_yaml.user_abac
1957
                )
1958
            )
1959
            user_yaml.user_abac = {
1✔
1960
                key.lower(): value for key, value in user_yaml.user_abac.items()
1961
            }
1962
            # update the project info with `projects` specified in user.yaml
1963
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
1964

1965
        # get list of users from arborist to make sure users that are completely removed
1966
        # from authorization sources get policies revoked
1967
        arborist_user_projects = {}
1✔
1968
        if not single_user_sync:
1✔
1969
            try:
1✔
1970
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
1971

1972
                # construct user information, NOTE the lowering of the username. when adding/
1973
                # removing access, the case in the Fence db is used. For combining access, it is
1974
                # case-insensitive, so we lower
1975
                arborist_user_projects = {
1✔
1976
                    user["name"].lower(): {} for user in arborist_users
1977
                }
1978
            except (ArboristError, KeyError, AttributeError) as error:
×
1979
                # TODO usersync should probably exit with non-zero exit code at the end,
1980
                #      but sync should continue from this point so there are no partial
1981
                #      updates
1982
                self.logger.warning(
×
1983
                    "Could not get list of users in Arborist, continuing anyway. "
1984
                    "WARNING: this sync will NOT remove access for users no longer in "
1985
                    f"authorization sources. Error: {error}"
1986
                )
1987

1988
            # update the project info with users from arborist
1989
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
1990

1991
        policy_id_list = []
1✔
1992
        policies = []
1✔
1993

1994
        # prefer in-memory if available from user_yaml, if not, get from database
1995
        if user_yaml and user_yaml.project_to_resource:
1✔
1996
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
1997
            self.logger.debug(
1✔
1998
                f"using in-memory project to authz resource mapping from "
1999
                f"user.yaml (instead of database): {project_to_authz_mapping}"
2000
            )
2001
        else:
2002
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
2003
            self.logger.debug(
1✔
2004
                f"using persisted project to authz resource mapping from database "
2005
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2006
            )
2007

2008
        self.logger.debug(
1✔
2009
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2010
        )
2011
        all_resources = [
1✔
2012
            r
2013
            for resources in self._dbgap_study_to_resources.values()
2014
            for r in resources
2015
        ]
2016
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2017
        self._create_arborist_resources(all_resources)
1✔
2018

2019
        for username, user_project_info in user_projects.items():
1✔
2020
            self.logger.info("processing user `{}`".format(username))
1✔
2021
            user = query_for_user(session=session, username=username)
1✔
2022
            idp = None
1✔
2023
            if user:
1✔
2024
                username = user.username
1✔
2025
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2026

2027
            self.arborist_client.create_user_if_not_exist(username)
1✔
2028
            if not single_user_sync:
1✔
2029
                self._revoke_all_policies_preserve_mfa(username, idp)
1✔
2030

2031
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2032
            # previously mapped each project to the same set of privileges
2033
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2034
            # single policy with ('read', 'read-storage') being the single
2035
            # key
2036
            unique_policies = self._determine_unique_policies(
1✔
2037
                user_project_info, project_to_authz_mapping
2038
            )
2039

2040
            for roles in unique_policies.keys():
1✔
2041
                for role in roles:
1✔
2042
                    self._create_arborist_role(role)
1✔
2043

2044
            if single_user_sync:
1✔
2045
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2046
                    policy_hash = self._hash_policy_contents(
1✔
2047
                        ordered_roles, ordered_resources
2048
                    )
2049
                    self._create_arborist_policy(
1✔
2050
                        policy_hash,
2051
                        ordered_roles,
2052
                        ordered_resources,
2053
                        skip_if_exists=True,
2054
                    )
2055
                    # return here as it is not expected single_user_sync
2056
                    # will need any of the remaining user_yaml operations
2057
                    # left in _update_authz_in_arborist
2058
                    return self._grant_arborist_policy(
1✔
2059
                        username, policy_hash, expires=expires
2060
                    )
2061
            else:
2062
                for roles, resources in unique_policies.items():
1✔
2063
                    for role in roles:
1✔
2064
                        for resource in resources:
1✔
2065
                            # grant a policy to this user which is a single
2066
                            # role on a single resource
2067

2068
                            # format project '/x/y/z' -> 'x.y.z'
2069
                            # so the policy id will be something like 'x.y.z-create'
2070
                            policy_id = _format_policy_id(resource, role)
1✔
2071
                            if policy_id not in self._created_policies:
1✔
2072
                                try:
1✔
2073
                                    self.arborist_client.update_policy(
1✔
2074
                                        policy_id,
2075
                                        {
2076
                                            "description": "policy created by fence sync",
2077
                                            "role_ids": [role],
2078
                                            "resource_paths": [resource],
2079
                                        },
2080
                                        create_if_not_exist=True,
2081
                                    )
2082
                                except ArboristError as e:
×
2083
                                    self.logger.info(
×
2084
                                        "not creating policy in arborist; {}".format(
2085
                                            str(e)
2086
                                        )
2087
                                    )
2088
                                self._created_policies.add(policy_id)
1✔
2089

2090
                            self._grant_arborist_policy(
1✔
2091
                                username, policy_id, expires=expires
2092
                            )
2093

2094
            if user_yaml:
1✔
2095
                for policy in user_yaml.policies.get(username, []):
1✔
2096
                    self.arborist_client.grant_user_policy(
1✔
2097
                        username,
2098
                        policy,
2099
                        expires_at=expires,
2100
                    )
2101

2102
        if user_yaml:
1✔
2103
            for client_name, client_details in user_yaml.clients.items():
1✔
2104
                client_policies = client_details.get("policies", [])
×
2105
                clients = session.query(Client).filter_by(name=client_name).all()
×
2106
                # update existing clients, do not create new ones
2107
                if not clients:
×
2108
                    self.logger.warning(
×
2109
                        "client to update (`{}`) does not exist in fence: skipping".format(
2110
                            client_name
2111
                        )
2112
                    )
2113
                    continue
×
2114
                self.logger.debug(
×
2115
                    "updating client `{}` (found {} client IDs)".format(
2116
                        client_name, len(clients)
2117
                    )
2118
                )
2119
                # there may be more than 1 client with this name if credentials are being rotated,
2120
                # so we grant access to each client ID
2121
                for client in clients:
×
2122
                    try:
×
2123
                        self.arborist_client.update_client(
×
2124
                            client.client_id, client_policies
2125
                        )
2126
                    except ArboristError as e:
×
2127
                        self.logger.info(
×
2128
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2129
                                client_policies, client_name, client.client_id, str(e)
2130
                            )
2131
                        )
2132

2133
        return True
1✔
2134

2135
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2136
        """
2137
        Determine and return a dictionary of unique policies.
2138

2139
        Args (examples):
2140
            user_project_info (dict):
2141
            {
2142
                'phs000002.c1': { 'read-storage', 'read' },
2143
                'phs000001.c1': { 'read', 'read-storage' },
2144
                'phs000004.c1': { 'write', 'read' },
2145
                'phs000003.c1': { 'read', 'write' },
2146
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2147
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2148
            }
2149
            project_to_authz_mapping (dict):
2150
            {
2151
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2152
            }
2153

2154
        Return (for examples):
2155
            dict:
2156
            {
2157
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2158
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2159
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2160
            }
2161
        """
2162
        roles_to_resources = collections.defaultdict(list)
1✔
2163
        for study, roles in user_project_info.items():
1✔
2164
            ordered_roles = tuple(sorted(roles))
1✔
2165
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2166
            if study in project_to_authz_mapping:
1✔
2167
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2168
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2169

2170
        policies = {}
1✔
2171
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2172
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2173
        return policies
1✔
2174

2175
    def _create_arborist_role(self, role):
1✔
2176
        """
2177
        Wrapper around gen3authz's create_role with additional logging
2178

2179
        Args:
2180
            role (str): what the Arborist identity should be of the created role
2181

2182
        Return:
2183
            bool: True if the role was created successfully or it already
2184
                  exists. False otherwise
2185
        """
2186
        if role in self._created_roles:
1✔
2187
            return True
1✔
2188
        try:
1✔
2189
            response_json = self.arborist_client.create_role(
1✔
2190
                arborist_role_for_permission(role)
2191
            )
2192
        except ArboristError as e:
×
2193
            self.logger.error(
×
2194
                "could not create `{}` role in Arborist: {}".format(role, e)
2195
            )
2196
            return False
×
2197
        self._created_roles.add(role)
1✔
2198

2199
        if response_json is None:
1✔
2200
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2201
        else:
2202
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2203
        return True
1✔
2204

2205
    def _create_arborist_resources(self, resources):
1✔
2206
        """
2207
        Create resources in Arborist
2208

2209
        Args:
2210
            resources (list): a list of full Arborist resource paths to create
2211
            [
2212
                "/programs/DEV/projects/phs000001.c1",
2213
                "/programs/DEV/projects/phs000002.c1",
2214
                "/programs/DEV/projects/phs000003.c1"
2215
            ]
2216

2217
        Return:
2218
            bool: True if the resources were successfully created, False otherwise
2219

2220

2221
        As of 2/11/2022, for resources above,
2222
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2223
        [
2224
            { 'name': 'programs', 'subresources': [
2225
                { 'name': 'DEV', 'subresources': [
2226
                    { 'name': 'projects', 'subresources': [
2227
                        { 'name': 'phs000001.c1', 'subresources': []},
2228
                        { 'name': 'phs000002.c1', 'subresources': []},
2229
                        { 'name': 'phs000003.c1', 'subresources': []}
2230
                    ]}
2231
                ]}
2232
            ]}
2233
        ]
2234
        Because this list has a single object, only a single network request gets
2235
        sent to Arborist.
2236

2237
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2238
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2239
        [
2240
            {'name': 'phs000001.c1', 'subresources': []},
2241
            {'name': 'phs000002.c1', 'subresources': []},
2242
            {'name': 'phs000003.c1', 'subresources': []}
2243
        ]
2244
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2245

2246
        As a practical matter, for sync_single_user_visas, studies
2247
        should be nested under the `/programs` resource as in the former
2248
        example (i.e. only one network request gets made).
2249

2250
        TODO for the sake of simplicity, it would be nice if only one network
2251
        request was made no matter the input.
2252
        """
2253
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2254
            try:
1✔
2255
                response_json = self.arborist_client.update_resource(
1✔
2256
                    "/", request_body, merge=True
2257
                )
2258
            except ArboristError as e:
×
2259
                self.logger.error(
×
2260
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2261
                        request_body, e
2262
                    )
2263
                )
2264
                return False
×
2265

2266
        self.logger.debug(
1✔
2267
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2268
        )
2269
        return True
1✔
2270

2271
    def _create_arborist_policy(
1✔
2272
        self, policy_id, roles, resources, skip_if_exists=False
2273
    ):
2274
        """
2275
        Wrapper around gen3authz's create_policy with additional logging
2276

2277
        Args:
2278
            policy_id (str): what the Arborist identity should be of the created policy
2279
            roles (iterable): what roles the create policy should have
2280
            resources (iterable): what resources the created policy should have
2281
            skip_if_exists (bool): if True, this function will not treat an already
2282
                                   existent policy as an error
2283

2284
        Return:
2285
            bool: True if policy creation was successful. False otherwise
2286
        """
2287
        try:
1✔
2288
            response_json = self.arborist_client.create_policy(
1✔
2289
                {
2290
                    "id": policy_id,
2291
                    "role_ids": roles,
2292
                    "resource_paths": resources,
2293
                },
2294
                skip_if_exists=skip_if_exists,
2295
            )
2296
        except ArboristError as e:
×
2297
            self.logger.error(
×
2298
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2299
            )
2300
            return False
×
2301

2302
        if response_json is None:
1✔
2303
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2304
        else:
2305
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2306
        return True
1✔
2307

2308
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2309
        """
2310
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2311

2312
        Args:
2313
            ordered_roles (iterable): policy roles in sorted order
2314
            ordered_resources (iterable): policy resources in sorted order
2315

2316
        Return:
2317
            str: SHA256 hex digest
2318
        """
2319

2320
        def escape(s):
1✔
2321
            return s.replace(",", "\,")
1✔
2322

2323
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2324
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2325
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2326
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2327

2328
        return policy_hash
1✔
2329

2330
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2331
        """
2332
        Wrapper around gen3authz's grant_user_policy with additional logging
2333

2334
        Args:
2335
            username (str): username of user in Arborist who policy should be
2336
                            granted to
2337
            policy_id (str): Arborist policy id
2338
            expires (int): POSIX timestamp for when policy should expire
2339

2340
        Return:
2341
            bool: True if granting of policy was successful, False otherwise
2342
        """
2343
        try:
1✔
2344
            response_json = self.arborist_client.grant_user_policy(
1✔
2345
                username,
2346
                policy_id,
2347
                expires_at=expires,
2348
            )
2349
        except ArboristError as e:
×
2350
            self.logger.error(
×
2351
                "could not grant policy `{}` to user `{}`: {}".format(
2352
                    policy_id, username, e
2353
                )
2354
            )
2355
            return False
×
2356

2357
        self.logger.debug(
1✔
2358
            "granted policy `{}` to user `{}`".format(policy_id, username)
2359
        )
2360
        return True
1✔
2361

2362
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2363
        """
2364
        Determine the arborist resource path and add it to
2365
        _self._dbgap_study_to_resources
2366

2367
        Args:
2368
            dbgap_study (str): study phs identifier
2369
            dbgap_config (dict): dictionary of config for dbgap server
2370

2371
        """
2372
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2373
            "_default", ["/"]
2374
        )
2375
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2376
            dbgap_study, default_namespaces
2377
        )
2378

2379
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2380

2381
        arborist_resource_namespaces = [
1✔
2382
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2383
        ]
2384

2385
        for resource_namespace in arborist_resource_namespaces:
1✔
2386
            full_resource_path = resource_namespace + dbgap_study
1✔
2387
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2388
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2389
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2390
        return arborist_resource_namespaces
1✔
2391

2392
    def _is_arborist_healthy(self):
1✔
2393
        if not self.arborist_client:
1✔
2394
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2395
            return False
×
2396
        if not self.arborist_client.healthy():
1✔
2397
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2398
            self.logger.error(
×
2399
                "arborist service is unavailable; skipping main arborist dbgap sync"
2400
            )
2401
            return False
×
2402
        return True
1✔
2403

2404
    def _pick_sync_type(self, visa):
1✔
2405
        """
2406
        Pick type of visa to parse according to the visa provider
2407
        """
2408
        sync_client = None
1✔
2409
        if visa.type in self.visa_types["ras"]:
1✔
2410
            sync_client = self.ras_sync_client
1✔
2411
        else:
2412
            raise Exception(
×
2413
                "Visa type {} not recognized. Configure in fence-config".format(
2414
                    visa.type
2415
                )
2416
            )
2417
        if not sync_client:
1✔
2418
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2419

2420
        return sync_client
1✔
2421

2422
    def sync_single_user_visas(
1✔
2423
        self, user, ga4gh_visas, sess=None, expires=None, skip_google_updates=False
2424
    ):
2425
        """
2426
        Sync a single user's visas during login or DRS/data access
2427

2428
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2429
                        BEFORE THIS.
2430

2431
        Args:
2432
            user (userdatamodel.user.User): Fence user whose visas'
2433
                                            authz info is being synced
2434
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2435
                                that are ALREADY VALIDATED
2436
            sess (sqlalchemy.orm.session.Session): database session
2437
            expires (int): time at which synced Arborist policies and
2438
                           inclusion in any GBAG are set to expire
2439
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
2440

2441
        Return:
2442
            list of successfully parsed visas
2443
        """
2444
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2445
        dbgap_config = self.dbGaP[0]
1✔
2446
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2447
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2448
            "enable_common_exchange_area_access", False
2449
        )
2450
        study_common_exchange_areas = dbgap_config.get(
1✔
2451
            "study_common_exchange_areas", {}
2452
        )
2453

2454
        try:
1✔
2455
            user_yaml = UserYAML.from_file(
1✔
2456
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2457
            )
2458
        except (EnvironmentError, AssertionError) as e:
×
2459
            self.logger.error(str(e))
×
2460
            self.logger.error("aborting early")
×
2461
            raise
×
2462

2463
        user_projects = dict()
1✔
2464
        projects = {}
1✔
2465
        info = {}
1✔
2466
        parsed_visas = []
1✔
2467

2468
        for visa in ga4gh_visas:
1✔
2469
            project = {}
1✔
2470
            visa_type = self._pick_sync_type(visa)
1✔
2471
            encoded_visa = visa.ga4gh_visa
1✔
2472

2473
            try:
1✔
2474
                project, info = visa_type._parse_single_visa(
1✔
2475
                    user,
2476
                    encoded_visa,
2477
                    visa.expires,
2478
                    parse_consent_code,
2479
                )
2480
            except Exception:
×
2481
                self.logger.warning(
×
2482
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2483
                )
2484
                continue
×
2485

2486
            projects = {**projects, **project}
1✔
2487
            parsed_visas.append(visa)
1✔
2488

2489
        info["user_id"] = user.id
1✔
2490
        info["username"] = user.username
1✔
2491
        user_projects[user.username] = projects
1✔
2492

2493
        user_projects = self.parse_projects(user_projects)
1✔
2494

2495
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2496
            self.logger.info(
1✔
2497
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2498
            )
2499

2500
        self._process_user_projects(
1✔
2501
            user_projects,
2502
            enable_common_exchange_area_access,
2503
            study_common_exchange_areas,
2504
            dbgap_config,
2505
            sess,
2506
        )
2507

2508
        if parse_consent_code:
1✔
2509
            self._grant_all_consents_to_c999_users(
1✔
2510
                user_projects, user_yaml.project_to_resource
2511
            )
2512

2513
        if user_projects:
1✔
2514
            self.logger.info("Sync to storage backend [sync_single_user_visas]")
1✔
2515
            self.sync_to_storage_backend(
1✔
2516
                user_projects,
2517
                info,
2518
                sess,
2519
                expires=expires,
2520
                skip_google_updates=skip_google_updates,
2521
            )
2522
        else:
2523
            self.logger.info("No users for syncing")
×
2524

2525
        # update arborist db (user access)
2526
        if self.arborist_client:
1✔
2527
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2528
            success = self._update_authz_in_arborist(
1✔
2529
                sess,
2530
                user_projects,
2531
                user_yaml=user_yaml,
2532
                single_user_sync=True,
2533
                expires=expires,
2534
            )
2535
            if success:
1✔
2536
                self.logger.info(
1✔
2537
                    "Finished synchronizing authorization info to arborist"
2538
                )
2539
            else:
2540
                self.logger.error(
×
2541
                    "Could not synchronize authorization info successfully to arborist"
2542
                )
2543
        else:
2544
            self.logger.error("No arborist client set; skipping arborist sync")
×
2545

2546
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc