• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 10801747339

10 Sep 2024 10:46PM UTC coverage: 75.321% (+0.007%) from 75.314%
10801747339

Pull #1177

github

k-burt-uch
Removing logs, fixing unit tests
Pull Request #1177: fix(PXP-11385): Adds single user google group updates

7813 of 10373 relevant lines covered (75.32%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.65
fence/sync/sync_users.py
1
import backoff
1✔
2
import glob
1✔
3
import jwt
1✔
4
import os
1✔
5
import re
1✔
6
import subprocess as sp
1✔
7
import yaml
1✔
8
import copy
1✔
9
import datetime
1✔
10
import uuid
1✔
11
import collections
1✔
12
import hashlib
1✔
13

14
from contextlib import contextmanager
1✔
15
from collections import defaultdict
1✔
16
from csv import DictReader
1✔
17
from io import StringIO
1✔
18
from stat import S_ISDIR
1✔
19

20
import paramiko
1✔
21
from cdislogging import get_logger
1✔
22
from email_validator import validate_email, EmailNotValidError
1✔
23
from gen3authz.client.arborist.errors import ArboristError
1✔
24
from gen3users.validation import validate_user_yaml
1✔
25
from paramiko.proxy import ProxyCommand
1✔
26
from sqlalchemy.exc import IntegrityError
1✔
27
from sqlalchemy import func
1✔
28

29
from fence.config import config
1✔
30
from fence.models import (
1✔
31
    AccessPrivilege,
32
    AuthorizationProvider,
33
    Project,
34
    Tag,
35
    User,
36
    query_for_user,
37
    Client,
38
    IdentityProvider,
39
    get_project_to_authz_mapping,
40
)
41
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
42
from fence.resources.storage import StorageManager
1✔
43
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
44
from fence.resources.google.access_utils import GoogleUpdateException
1✔
45
from fence.sync import utils
1✔
46
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
47
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
48

49

50
def _format_policy_id(path, privilege):
1✔
51
    resource = ".".join(name for name in path.split("/") if name)
1✔
52
    return "{}-{}".format(resource, privilege)
1✔
53

54

55
def download_dir(sftp, remote_dir, local_dir):
1✔
56
    """
57
    Recursively download file from remote_dir to local_dir
58
    Args:
59
        remote_dir(str)
60
        local_dir(str)
61
    Returns: None
62
    """
63
    dir_items = sftp.listdir_attr(remote_dir)
×
64

65
    for item in dir_items:
×
66
        remote_path = remote_dir + "/" + item.filename
×
67
        local_path = os.path.join(local_dir, item.filename)
×
68
        if S_ISDIR(item.st_mode):
×
69
            download_dir(sftp, remote_path, local_path)
×
70
        else:
71
            sftp.get(remote_path, local_path)
×
72

73

74
def arborist_role_for_permission(permission):
1✔
75
    """
76
    For the programs/projects in the existing fence access control model, in order to
77
    use arborist for checking permissions we generate a policy for each combination of
78
    program/project and privilege. The roles involved all contain only one permission,
79
    for one privilege from the project access model.
80
    """
81
    return {
1✔
82
        "id": permission,
83
        "permissions": [
84
            {"id": permission, "action": {"service": "*", "method": permission}}
85
        ],
86
    }
87

88

89
@contextmanager
1✔
90
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
91
    """
92
    Context manager for reading and optionally decrypting file it only
93
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
94

95
    Args:
96
        filepath (str): path to the file
97
        encrypted (bool): whether the file is encrypted
98

99
    Returns:
100
        Generator[file-like class]: file like object for the file
101
    """
102
    if encrypted:
1✔
103
        has_crypt = sp.call(["which", "mcrypt"])
×
104
        if has_crypt != 0:
×
105
            if logger:
×
106
                logger.error("Need to install mcrypt to decrypt files from dbgap")
×
107
            # TODO (rudyardrichter, 2019-01-08): raise error and move exit out to script
108
            exit(1)
×
109
        p = sp.Popen(
×
110
            [
111
                "mcrypt",
112
                "-a",
113
                "enigma",
114
                "-o",
115
                "scrypt",
116
                "-m",
117
                "stream",
118
                "--bare",
119
                "--key",
120
                key,
121
                "--force",
122
            ],
123
            stdin=open(filepath, "r"),
124
            stdout=sp.PIPE,
125
            stderr=open(os.devnull, "w"),
126
            universal_newlines=True,
127
        )
128
        try:
×
129
            yield StringIO(p.communicate()[0])
×
130
        except UnicodeDecodeError:
×
131
            logger.error("Could not decode file. Check the decryption key.")
×
132
    else:
133
        f = open(filepath, "r")
1✔
134
        yield f
1✔
135
        f.close()
1✔
136

137

138
class UserYAML(object):
1✔
139
    """
140
    Representation of the information in a YAML file describing user, project, and ABAC
141
    information for access control.
142
    """
143

144
    def __init__(
1✔
145
        self,
146
        projects=None,
147
        user_info=None,
148
        policies=None,
149
        clients=None,
150
        authz=None,
151
        project_to_resource=None,
152
        logger=None,
153
        user_abac=None,
154
    ):
155
        self.projects = projects or {}
1✔
156
        self.user_info = user_info or {}
1✔
157
        self.user_abac = user_abac or {}
1✔
158
        self.policies = policies or {}
1✔
159
        self.clients = clients or {}
1✔
160
        self.authz = authz or {}
1✔
161
        self.project_to_resource = project_to_resource or {}
1✔
162
        self.logger = logger
1✔
163

164
    @classmethod
1✔
165
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
166
        """
167
        Add access by "auth_id" to "self.projects" to update the Fence DB.
168
        Add access by "resource" to "self.user_abac" to update Arborist.
169
        """
170
        data = {}
1✔
171
        if filepath:
1✔
172
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
173
                file_contents = f.read()
1✔
174
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
175
                data = yaml.safe_load(file_contents)
1✔
176
        else:
177
            if logger:
1✔
178
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
179

180
        projects = dict()
1✔
181
        user_info = dict()
1✔
182
        policies = dict()
1✔
183

184
        # resources should be the resource tree to construct in arborist
185
        user_abac = dict()
1✔
186

187
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
188
        if not data.get("authz") and data.get("rbac"):
1✔
189
            if logger:
×
190
                logger.info(
×
191
                    "No authz block found but rbac block present. Using rbac block"
192
                )
193
            data["authz"] = data["rbac"]
×
194

195
        # get user project mapping to arborist resources if it exists
196
        project_to_resource = data.get("authz", dict()).get(
1✔
197
            "user_project_to_resource", dict()
198
        )
199

200
        # read projects and privileges for each user
201
        users = data.get("users", {})
1✔
202
        for username, details in users.items():
1✔
203
            # users should occur only once each; skip if already processed
204
            if username in projects:
1✔
205
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
206
                    username
207
                )
208
                if logger:
×
209
                    logger.error(msg)
×
210
                raise EnvironmentError(msg)
×
211

212
            privileges = {}
1✔
213
            resource_permissions = dict()
1✔
214
            for project in details.get("projects", {}):
1✔
215
                try:
1✔
216
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
217
                except KeyError as e:
×
218
                    if logger:
×
219
                        logger.error("project {} missing field: {}".format(project, e))
×
220
                    continue
×
221

222
                # project may not have `resource` field.
223
                # prefer resource field;
224
                # if no resource or mapping, assume auth_id is resource.
225
                resource = project.get("resource", project["auth_id"])
1✔
226

227
                if project["auth_id"] not in project_to_resource:
1✔
228
                    project_to_resource[project["auth_id"]] = resource
1✔
229
                resource_permissions[resource] = set(project["privilege"])
1✔
230

231
            user_info[username] = {
1✔
232
                "email": details.get("email", ""),
233
                "display_name": details.get("display_name", ""),
234
                "phone_number": details.get("phone_number", ""),
235
                "tags": details.get("tags", {}),
236
                "admin": details.get("admin", False),
237
            }
238
            if not details.get("email"):
1✔
239
                try:
1✔
240
                    valid = validate_email(
1✔
241
                        username, allow_smtputf8=False, check_deliverability=False
242
                    )
243
                    user_info[username]["email"] = valid.email
1✔
244
                except EmailNotValidError:
1✔
245
                    pass
1✔
246
            projects[username] = privileges
1✔
247
            user_abac[username] = resource_permissions
1✔
248

249
            # list of policies we want to grant to this user, which get sent to arborist
250
            # to check if they're allowed to do certain things
251
            policies[username] = details.get("policies", [])
1✔
252

253
        if logger:
1✔
254
            logger.info(
1✔
255
                "Got user project to arborist resource mapping:\n{}".format(
256
                    str(project_to_resource)
257
                )
258
            )
259

260
        authz = data.get("authz", dict())
1✔
261
        if not authz:
1✔
262
            # older version: resources in root, no `authz` section or `rbac` section
263
            if logger:
1✔
264
                logger.warning(
1✔
265
                    "access control YAML file is using old format (missing `authz`/`rbac`"
266
                    " section in the root); assuming that if it exists `resources` will"
267
                    " be on the root level, and continuing"
268
                )
269
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
270
            # the code can pretend it's in the normal place that we expect
271
            resources = data.get("resources", [])
1✔
272
            # keep authz empty dict if resources is not specified
273
            if resources:
1✔
274
                authz["resources"] = data.get("resources", [])
×
275

276
        clients = data.get("clients", {})
1✔
277

278
        return cls(
1✔
279
            projects=projects,
280
            user_info=user_info,
281
            user_abac=user_abac,
282
            policies=policies,
283
            clients=clients,
284
            authz=authz,
285
            project_to_resource=project_to_resource,
286
            logger=logger,
287
        )
288

289
    def persist_project_to_resource(self, db_session):
1✔
290
        """
291
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
292

293
        The mapping comes from an external source, this function persists what was parsed
294
        into memory into the database for future use.
295
        """
296
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
297
            project = (
1✔
298
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
299
            )
300
            if project:
1✔
301
                project.authz = authz_resource
1✔
302
            else:
303
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
304
                db_session.add(project)
×
305
        db_session.commit()
1✔
306

307

308
class UserSyncer(object):
1✔
309
    def __init__(
1✔
310
        self,
311
        dbGaP,
312
        DB,
313
        project_mapping,
314
        storage_credentials=None,
315
        db_session=None,
316
        is_sync_from_dbgap_server=False,
317
        sync_from_local_csv_dir=None,
318
        sync_from_local_yaml_file=None,
319
        arborist=None,
320
        folder=None,
321
    ):
322
        """
323
        Syncs ACL files from dbGap to auth database and storage backends
324
        Args:
325
            dbGaP: a list of dict containing creds to access dbgap sftp
326
            DB: database connection string
327
            project_mapping: a dict containing how dbgap ids map to projects
328
            storage_credentials: a dict containing creds for storage backends
329
            sync_from_dir: path to an alternative dir to sync from instead of
330
                           dbGaP
331
            arborist:
332
                ArboristClient instance if the syncer should also create
333
                resources in arborist
334
            folder: a local folder where dbgap telemetry files will sync to
335
        """
336
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
337
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
338
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
339
        self.dbGaP = dbGaP
1✔
340
        self.session = db_session
1✔
341
        self.driver = get_SQLAlchemyDriver(DB)
1✔
342
        self.project_mapping = project_mapping or {}
1✔
343
        self._projects = dict()
1✔
344
        self._created_roles = set()
1✔
345
        self._created_policies = set()
1✔
346
        self._dbgap_study_to_resources = dict()
1✔
347
        self.logger = get_logger(
1✔
348
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
349
        )
350
        self.arborist_client = arborist
1✔
351
        self.folder = folder
1✔
352

353
        self.auth_source = defaultdict(set)
1✔
354
        # auth_source used for logging. username : [source1, source2]
355
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
356
        self.parent_to_child_studies_mapping = {}
1✔
357
        for dbgap_config in dbGaP:
1✔
358
            self.parent_to_child_studies_mapping.update(
1✔
359
                dbgap_config.get("parent_to_child_studies_mapping", {})
360
            )
361
        if storage_credentials:
1✔
362
            self.storage_manager = StorageManager(
1✔
363
                storage_credentials, logger=self.logger
364
            )
365
        self.id_patterns = []
1✔
366

367
    @staticmethod
1✔
368
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
369
        """
370
        Check if the filename matches dbgap access control file pattern
371

372
        Args:
373
            filepath (str): path to file
374
            encrypted (bool): whether the file is encrypted
375

376
        Returns:
377
            bool: whether the pattern matches
378
        """
379
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
380
        for pattern in id_patterns:
1✔
381
            if encrypted:
1✔
382
                pattern += r".enc"
×
383
            pattern += r"$"
1✔
384
            # when converting the YAML from fence-config,
385
            # python reads it as Python string literal. So "\" turns into "\\"
386
            # which messes with the regex match
387
            pattern.replace("\\\\", "\\")
1✔
388
            if re.match(pattern, os.path.basename(filepath)):
1✔
389
                return True
1✔
390
        return False
1✔
391

392
    def _get_from_sftp_with_proxy(self, server, path):
1✔
393
        """
394
        Download all data from sftp sever to a local dir
395

396
        Args:
397
            server (dict) : dictionary containing info to access sftp server
398
            path (str): path to local directory
399

400
        Returns:
401
            None
402
        """
403
        proxy = None
1✔
404
        if server.get("proxy", "") != "":
1✔
405
            command = "ssh -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
406
                user=server.get("proxy_user", ""),
407
                proxy=server.get("proxy", ""),
408
                host=server.get("host", ""),
409
                port=server.get("port", 22),
410
            )
411
            self.logger.info("SSH proxy command: {}".format(command))
×
412

413
            proxy = ProxyCommand(command)
×
414

415
        with paramiko.SSHClient() as client:
1✔
416
            client.set_log_channel(self.logger.name)
1✔
417

418
            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
1✔
419
            parameters = {
1✔
420
                "hostname": str(server.get("host", "")),
421
                "username": str(server.get("username", "")),
422
                "password": str(server.get("password", "")),
423
                "port": int(server.get("port", 22)),
424
            }
425
            if proxy:
1✔
426
                parameters["sock"] = proxy
×
427

428
            self.logger.info(
1✔
429
                "SSH connection hostname:post {}:{}".format(
430
                    parameters.get("hostname", "unknown"),
431
                    parameters.get("port", "unknown"),
432
                )
433
            )
434
            self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
435
            with client.open_sftp() as sftp:
×
436
                download_dir(sftp, "./", path)
1✔
437

438
        if proxy:
×
439
            proxy.close()
×
440

441
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
442
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
443
        ssh_client.connect(**parameters)
1✔
444

445
    def _get_from_ftp_with_proxy(self, server, path):
1✔
446
        """
447
        Download data from ftp sever to a local dir
448

449
        Args:
450
            server (dict): dictionary containing information for accessing server
451
            path(str): path to local files
452

453
        Returns:
454
            None
455
        """
456
        execstr = (
×
457
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
458
                server.get("username", ""),
459
                server.get("password", ""),
460
                server.get("host", ""),
461
                server.get("proxy", ""),
462
                path,
463
            )
464
        )
465
        os.system(execstr)
×
466

467
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
468
        return dbgap_config.get(
1✔
469
            "parse_consent_code", True
470
        )  # Should this really be true?
471

472
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
473
        """
474
        parse csv files to python dict
475

476
        Args:
477
            file_dict: a dictionary with key(file path) and value(privileges)
478
            sess: sqlalchemy session
479
            dbgap_config: a dictionary containing information about the dbGaP sftp server
480
                (comes from fence config)
481
            encrypted: boolean indicating whether those files are encrypted
482

483

484
        Return:
485
            Tuple[[dict, dict]]:
486
                (user_project, user_info) where user_project is a mapping from
487
                usernames to project permissions and user_info is a mapping
488
                from usernames to user details, such as email
489

490
        Example:
491

492
            (
493
                {
494
                    username: {
495
                        'project1': {'read-storage','write-storage'},
496
                        'project2': {'read-storage'},
497
                    }
498
                },
499
                {
500
                    username: {
501
                        'email': 'email@mail.com',
502
                        'display_name': 'display name',
503
                        'phone_number': '123-456-789',
504
                        'tags': {'dbgap_role': 'PI'}
505
                    }
506
                },
507
            )
508

509
        """
510
        user_projects = dict()
1✔
511
        user_info = defaultdict(dict)
1✔
512

513
        # parse dbGaP sftp server information
514
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
515

516
        self.id_patterns += (
1✔
517
            [
518
                item.replace("\\\\", "\\")
519
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
520
            ]
521
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
522
            else []
523
        )
524

525
        enable_common_exchange_area_access = dbgap_config.get(
1✔
526
            "enable_common_exchange_area_access", False
527
        )
528
        study_common_exchange_areas = dbgap_config.get(
1✔
529
            "study_common_exchange_areas", {}
530
        )
531
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
532

533
        if parse_consent_code and enable_common_exchange_area_access:
1✔
534
            self.logger.info(
1✔
535
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
536
            )
537

538
        project_id_patterns = [r"phs(\d{6})"]
1✔
539
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
540
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
541
            patterns = [
1✔
542
                pattern.replace("\\\\", "\\") for pattern in patterns
543
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
544
            project_id_patterns += patterns
1✔
545

546
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
547
        for filepath, privileges in file_dict.items():
1✔
548
            self.logger.info("Reading file {}".format(filepath))
1✔
549
            if os.stat(filepath).st_size == 0:
1✔
550
                self.logger.warning("Empty file {}".format(filepath))
×
551
                continue
×
552
            if not self._match_pattern(
1✔
553
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
554
            ):
555
                self.logger.warning(
1✔
556
                    "Filename {} does not match dbgap access control filename pattern;"
557
                    " this could mean that the filename has an invalid format, or has"
558
                    " an unexpected .enc extension, or lacks the .enc extension where"
559
                    " expected. This file is NOT being processed by usersync!".format(
560
                        filepath
561
                    )
562
                )
563
                continue
1✔
564

565
            with _read_file(
1✔
566
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
567
            ) as f:
568
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
569
                for row in csv:
1✔
570
                    username = row.get("login") or ""
1✔
571
                    if username == "":
1✔
572
                        continue
×
573

574
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
575
                        phsid = (
1✔
576
                            row.get("phsid") or (row.get("project_id") or "")
577
                        ).split(".")
578
                    else:
579
                        phsid = (row.get("phsid") or "").split(".")
1✔
580

581
                    dbgap_project = phsid[0]
1✔
582
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
583
                    skip = False
1✔
584
                    for pattern in project_id_patterns:
1✔
585
                        self.logger.debug(
1✔
586
                            "Checking pattern:{} with project_id:{}".format(
587
                                pattern, dbgap_project
588
                            )
589
                        )
590
                        if re.match(pattern, dbgap_project):
1✔
591
                            skip = False
1✔
592
                            break
1✔
593
                        else:
594
                            skip = True
1✔
595
                    if skip:
1✔
596
                        self.logger.warning(
1✔
597
                            "Skip processing from file {}, user {} with project {}".format(
598
                                filepath,
599
                                username,
600
                                dbgap_project,
601
                            )
602
                        )
603
                        continue
1✔
604
                    if len(phsid) > 1 and parse_consent_code:
1✔
605
                        consent_code = phsid[-1]
1✔
606

607
                        # c999 indicates full access to all consents and access
608
                        # to a study-specific exchange area
609
                        # access to at least one study-specific exchange area implies access
610
                        # to the parent study's common exchange area
611
                        #
612
                        # NOTE: Handling giving access to all consents is done at
613
                        #       a later time, when we have full information about possible
614
                        #       consents
615
                        self.logger.debug(
1✔
616
                            f"got consent code {consent_code} from dbGaP project "
617
                            f"{dbgap_project}"
618
                        )
619
                        if (
1✔
620
                            consent_code == "c999"
621
                            and enable_common_exchange_area_access
622
                            and dbgap_project in study_common_exchange_areas
623
                        ):
624
                            self.logger.info(
1✔
625
                                "found study with consent c999 and Fence "
626
                                "is configured to parse exchange area data. Giving user "
627
                                f"{username} {privileges} privileges in project: "
628
                                f"{study_common_exchange_areas[dbgap_project]}."
629
                            )
630
                            self._add_dbgap_project_for_user(
1✔
631
                                study_common_exchange_areas[dbgap_project],
632
                                privileges,
633
                                username,
634
                                sess,
635
                                user_projects,
636
                                dbgap_config,
637
                            )
638

639
                        dbgap_project += "." + consent_code
1✔
640

641
                    self._add_children_for_dbgap_project(
1✔
642
                        dbgap_project,
643
                        privileges,
644
                        username,
645
                        sess,
646
                        user_projects,
647
                        dbgap_config,
648
                    )
649

650
                    display_name = row.get("user name") or ""
1✔
651
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
652

653
                    # some dbgap telemetry files have information about a researchers PI
654
                    if "downloader for" in row:
1✔
655
                        tags["pi"] = row["downloader for"]
1✔
656

657
                    # prefer name over previous "downloader for" if it exists
658
                    if "downloader for names" in row:
1✔
659
                        tags["pi"] = row["downloader for names"]
×
660

661
                    user_info[username] = {
1✔
662
                        "email": row.get("email") or user_info[username].get('email') or "",
663
                        "display_name": display_name,
664
                        "phone_number": row.get("phone") or user_info[username].get('phone_number') or "",
665
                        "tags": tags,
666
                    }
667

668
                    self._process_dbgap_project(
1✔
669
                        dbgap_project,
670
                        privileges,
671
                        username,
672
                        sess,
673
                        user_projects,
674
                        dbgap_config,
675
                    )
676

677
        return user_projects, user_info
1✔
678

679
    def _get_children(self, dbgap_project):
1✔
680
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
681

682
    def _add_children_for_dbgap_project(
1✔
683
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
684
    ):
685
        """
686
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
687
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
688
        child studies.
689
        """
690
        parent_phsid = dbgap_project
1✔
691
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
692
        child_suffix = ""
1✔
693
        if parse_consent_code and re.match(
1✔
694
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
695
        ):
696
            parent_phsid_parts = dbgap_project.split(".")
1✔
697
            parent_phsid = parent_phsid_parts[0]
1✔
698
            child_suffix = "." + parent_phsid_parts[1]
1✔
699

700
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
701
            return
1✔
702

703
        self.logger.info(
1✔
704
            f"found parent study {parent_phsid} and Fence "
705
            "is configured to provide additional access to child studies. Giving user "
706
            f"{username} {privileges} privileges in projects: "
707
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
708
        )
709
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
710
        for child_study in child_studies:
1✔
711
            self._add_dbgap_project_for_user(
1✔
712
                child_study + child_suffix,
713
                privileges,
714
                username,
715
                sess,
716
                user_projects,
717
                dbgap_config,
718
            )
719

720
    def _add_dbgap_project_for_user(
1✔
721
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
722
    ):
723
        """
724
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
725
        and then updates the dictionary containing all user's project access
726
        """
727
        if dbgap_project not in self._projects:
1✔
728
            self.logger.debug(
1✔
729
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
730
            )
731

732
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
733

734
            # need to add dbgap project to arborist
735
            if self.arborist_client:
1✔
736
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
737

738
            if project.name is None:
1✔
739
                project.name = dbgap_project
1✔
740
            self._projects[dbgap_project] = project
1✔
741
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
742
        if username in user_projects:
1✔
743
            user_projects[username].update(phsid_privileges)
1✔
744
        else:
745
            user_projects[username] = phsid_privileges
1✔
746

747
    @staticmethod
1✔
748
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
749
        """
750
        Merge user_info1 into user_info2. Values in user_info2 are overriden
751
        by values in user_info1. user_info2 ends up containing the merged dict.
752

753
        Args:
754
            user_info1 (dict): nested dict
755
            user_info2 (dict): nested dict
756

757
            Example:
758
            {username: {'email': 'abc@email.com'}}
759

760
        Returns:
761
            None
762
        """
763
        user_info2.update(user_info1)
1✔
764

765
    def sync_two_phsids_dict(
1✔
766
        self,
767
        phsids1,
768
        phsids2,
769
        source1=None,
770
        source2=None,
771
        phsids2_overrides_phsids1=True,
772
    ):
773
        """
774
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
775
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
776
        the merged dict (see explanation below).
777
        `source1` and `source2`: for logging.
778

779
        Args:
780
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
781

782
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
783

784
            Example:
785
            {
786
                username: {
787
                    phsid1: {'read-storage','write-storage'},
788
                    phsid2: {'read-storage'},
789
                }
790
            }
791

792
        Return:
793
            None
794

795
        Explanation:
796
            Consider merging projects of the same user:
797

798
                {user1: {phsid1: privillege1}}
799

800
                {user1: {phsid2: privillege2}}
801

802
            case 1: phsid1 != phsid2. Output:
803

804
                {user1: {phsid1: privillege1, phsid2: privillege2}}
805

806
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
807

808
                {user1: {phsid1: union(privillege1, privillege2)}}
809

810
            For the other cases, just simple addition
811
        """
812

813
        for user, projects1 in phsids1.items():
1✔
814
            if not phsids2.get(user):
1✔
815
                if source1:
1✔
816
                    self.auth_source[user].add(source1)
1✔
817
                phsids2[user] = projects1
1✔
818
            elif phsids2_overrides_phsids1:
1✔
819
                if source1:
1✔
820
                    self.auth_source[user].add(source1)
×
821
                if source2:
1✔
822
                    self.auth_source[user].add(source2)
×
823
                for phsid1, privilege1 in projects1.items():
1✔
824
                    if phsid1 not in phsids2[user]:
1✔
825
                        phsids2[user][phsid1] = set()
1✔
826
                    phsids2[user][phsid1].update(privilege1)
1✔
827
            elif source2:
×
828
                self.auth_source[user].add(source2)
×
829

830
    def sync_to_db_and_storage_backend(
1✔
831
        self,
832
        user_project,
833
        user_info,
834
        sess,
835
        do_not_revoke_from_db_and_storage=False,
836
        expires=None,
837
    ):
838
        """
839
        sync user access control to database and storage backend
840

841
        Args:
842
            user_project (dict): a dictionary of
843

844
                {
845
                    username: {
846
                        'project1': {'read-storage','write-storage'},
847
                        'project2': {'read-storage'}
848
                    }
849
                }
850

851
            user_info (dict): a dictionary of {username: user_info{}}
852
            sess: a sqlalchemy session
853

854
        Return:
855
            None
856
        """
857
        google_bulk_mapping = None
1✔
858
        if config["GOOGLE_BULK_UPDATES"]:
1✔
859
            google_bulk_mapping = {}
1✔
860

861
        self._init_projects(user_project, sess)
1✔
862

863
        auth_provider_list = [
1✔
864
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
865
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
866
        ]
867

868
        cur_db_user_project_list = {
1✔
869
            (ua.user.username.lower(), ua.project.auth_id)
870
            for ua in sess.query(AccessPrivilege).all()
871
        }
872

873
        # we need to compare db -> whitelist case-insensitively for username.
874
        # db stores case-sensitively, but we need to query case-insensitively
875
        user_project_lowercase = {}
1✔
876
        syncing_user_project_list = set()
1✔
877
        for username, projects in user_project.items():
1✔
878
            user_project_lowercase[username.lower()] = projects
1✔
879
            for project, _ in projects.items():
1✔
880
                syncing_user_project_list.add((username.lower(), project))
1✔
881

882
        user_info_lowercase = {
1✔
883
            username.lower(): info for username, info in user_info.items()
884
        }
885

886
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
887
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
888
        to_update = set.intersection(
1✔
889
            cur_db_user_project_list, syncing_user_project_list
890
        )
891

892
        # when updating users we want to maintain case sesitivity in the username so
893
        # pass the original, non-lowered user_info dict
894
        self._upsert_userinfo(sess, user_info)
1✔
895

896
        if not do_not_revoke_from_db_and_storage:
1✔
897
            self._revoke_from_storage(
1✔
898
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
899
            )
900
            self._revoke_from_db(sess, to_delete)
1✔
901

902
        self._grant_from_storage(
1✔
903
            to_add,
904
            user_project_lowercase,
905
            sess,
906
            google_bulk_mapping=google_bulk_mapping,
907
            expires=expires,
908
        )
909

910
        self._grant_from_db(
1✔
911
            sess,
912
            to_add,
913
            user_info_lowercase,
914
            user_project_lowercase,
915
            auth_provider_list,
916
        )
917

918
        # re-grant
919
        self._grant_from_storage(
1✔
920
            to_update,
921
            user_project_lowercase,
922
            sess,
923
            google_bulk_mapping=google_bulk_mapping,
924
            expires=expires,
925
        )
926
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
927

928
        if not do_not_revoke_from_db_and_storage:
1✔
929
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
930

931
        sess.commit()
1✔
932

933
        if config["GOOGLE_BULK_UPDATES"]:
1✔
934
            self.logger.info("Doing bulk Google update...")
1✔
935
            update_google_groups_for_users(google_bulk_mapping)
1✔
936
            self.logger.info("Bulk Google update done!")
×
937

938
        sess.commit()
1✔
939

940
    def sync_to_storage_backend(self, user_project, user_info, sess, expires):
1✔
941
        """
942
        sync user access control to storage backend with given expiration
943

944
        Args:
945
            user_project (dict): a dictionary of
946

947
                {
948
                    username: {
949
                        'project1': {'read-storage','write-storage'},
950
                        'project2': {'read-storage'}
951
                    }
952
                }
953

954
            user_info (dict): a dictionary of attributes for a user.
955
            sess: a sqlalchemy session
956

957
        Return:
958
            None
959
        """
960
        if not expires:
1✔
961
            raise Exception(
×
962
                f"sync to storage backend requires an expiration. you provided: {expires}"
963
            )
964

965
        google_group_user_mapping = None
1✔
966
        if config["GOOGLE_BULK_UPDATES"]:
1✔
967
            google_group_user_mapping = {}
×
968
            get_or_create_proxy_group_id(
×
969
                expires=expires,
970
                user_id=user_info['user_id'],
971
                username=user_info['username'],
972
                session=sess,
973
                storage_manager=self.storage_manager
974
            )
975

976
        # TODO: eventually it'd be nice to remove this step but it's required
977
        #       so that grant_from_storage can determine what storage backends
978
        #       are needed for a project.
979
        self._init_projects(user_project, sess)
1✔
980

981
        # we need to compare db -> whitelist case-insensitively for username.
982
        # db stores case-sensitively, but we need to query case-insensitively
983
        user_project_lowercase = {}
1✔
984
        syncing_user_project_list = set()
1✔
985
        for username, projects in user_project.items():
1✔
986
            user_project_lowercase[username.lower()] = projects
1✔
987
            for project, _ in projects.items():
1✔
988
                syncing_user_project_list.add((username.lower(), project))
1✔
989

990

991
        to_add = set(syncing_user_project_list)
1✔
992

993
        # when updating users we want to maintain case sensitivity in the username so
994
        # pass the original, non-lowered user_info dict
995
        self._upsert_userinfo(sess, {
1✔
996
            user_info['username'].lower(): user_info
997
        })
998

999
        self._grant_from_storage(
1✔
1000
            to_add,
1001
            user_project_lowercase,
1002
            sess,
1003
            google_bulk_mapping=google_group_user_mapping,
1004
            expires=expires,
1005
        )
1006

1007
        if config["GOOGLE_BULK_UPDATES"]:
1✔
1008
            self.logger.info("Updating user's google groups ...")
×
1009
            update_google_groups_for_users(google_group_user_mapping)
×
1010
            self.logger.info("Google groups update done!!")
×
1011

1012
        sess.commit()
1✔
1013

1014
    def _revoke_from_db(self, sess, to_delete):
1✔
1015
        """
1016
        Revoke user access to projects in the auth database
1017

1018
        Args:
1019
            sess: sqlalchemy session
1020
            to_delete: a set of (username, project.auth_id) to be revoked from db
1021
        Return:
1022
            None
1023
        """
1024
        for username, project_auth_id in to_delete:
1✔
1025
            q = (
1✔
1026
                sess.query(AccessPrivilege)
1027
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1028
                .join(AccessPrivilege.user)
1029
                .filter(func.lower(User.username) == username)
1030
                .all()
1031
            )
1032
            for access in q:
1✔
1033
                self.logger.info(
1✔
1034
                    "revoke {} access to {} in db".format(username, project_auth_id)
1035
                )
1036
                sess.delete(access)
1✔
1037

1038
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1039
        """
1040
        Make sure there is no admin user that is not in yaml/csv files
1041

1042
        Args:
1043
            sess: sqlalchemy session
1044
            user_info: a dict of
1045
            {
1046
                username: {
1047
                    'email': email,
1048
                    'display_name': display_name,
1049
                    'phone_number': phonenum,
1050
                    'tags': {'k1':'v1', 'k2': 'v2'}
1051
                    'admin': is_admin
1052
                }
1053
            }
1054
        Returns:
1055
            None
1056
        """
1057
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1058
            if admin_user.username.lower() not in user_info:
1✔
1059
                admin_user.is_admin = False
×
1060
                sess.add(admin_user)
×
1061
                self.logger.info(
×
1062
                    "remove admin access from {} in db".format(
1063
                        admin_user.username.lower()
1064
                    )
1065
                )
1066

1067
    def _update_from_db(self, sess, to_update, user_project):
1✔
1068
        """
1069
        Update user access to projects in the auth database
1070

1071
        Args:
1072
            sess: sqlalchemy session
1073
            to_update:
1074
                a set of (username, project.auth_id) to be updated from db
1075

1076
        Return:
1077
            None
1078
        """
1079

1080
        for username, project_auth_id in to_update:
1✔
1081
            q = (
1✔
1082
                sess.query(AccessPrivilege)
1083
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1084
                .join(AccessPrivilege.user)
1085
                .filter(func.lower(User.username) == username)
1086
                .all()
1087
            )
1088
            for access in q:
1✔
1089
                access.privilege = user_project[username][project_auth_id]
1✔
1090
                self.logger.info(
1✔
1091
                    "update {} with {} access to {} in db".format(
1092
                        username, access.privilege, project_auth_id
1093
                    )
1094
                )
1095

1096
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1097
        """
1098
        Grant user access to projects in the auth database
1099
        Args:
1100
            sess: sqlalchemy session
1101
            to_add: a set of (username, project.auth_id) to be granted
1102
            user_project:
1103
                a dictionary of {username: {project: {'read','write'}}
1104
        Return:
1105
            None
1106
        """
1107
        for username, project_auth_id in to_add:
1✔
1108
            u = query_for_user(session=sess, username=username)
1✔
1109

1110
            auth_provider = auth_provider_list[0]
1✔
1111
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1112
                auth_provider = auth_provider_list[1]
1✔
1113
            user_access = AccessPrivilege(
1✔
1114
                user=u,
1115
                project=self._projects[project_auth_id],
1116
                privilege=list(user_project[username][project_auth_id]),
1117
                auth_provider=auth_provider,
1118
            )
1119
            self.logger.info(
1✔
1120
                "grant user {} to {} with access {}".format(
1121
                    username, user_access.project, user_access.privilege
1122
                )
1123
            )
1124
            sess.add(user_access)
1✔
1125

1126
    def _upsert_userinfo(self, sess, user_info):
1✔
1127
        """
1128
        update user info to database.
1129

1130
        Args:
1131
            sess: sqlalchemy session
1132
            user_info:
1133
                a dict of {username: {display_name, phone_number, tags, admin}
1134

1135
        Return:
1136
            None
1137
        """
1138

1139
        for username in user_info:
1✔
1140
            u = query_for_user(session=sess, username=username)
1✔
1141

1142
            if u is None:
1✔
1143
                self.logger.info("create user {}".format(username))
1✔
1144
                u = User(username=username)
1✔
1145
                sess.add(u)
1✔
1146

1147
            if self.arborist_client:
1✔
1148
                self.arborist_client.create_user({"name": username})
1✔
1149

1150
            u.email = user_info[username].get("email", "")
1✔
1151
            u.display_name = user_info[username].get("display_name", "")
1✔
1152
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1153
            u.is_admin = user_info[username].get("admin", False)
1✔
1154

1155
            idp_name = user_info[username].get("idp_name", "")
1✔
1156
            if idp_name and not u.identity_provider:
1✔
1157
                idp = (
×
1158
                    sess.query(IdentityProvider)
1159
                    .filter(IdentityProvider.name == idp_name)
1160
                    .first()
1161
                )
1162
                if not idp:
×
1163
                    idp = IdentityProvider(name=idp_name)
×
1164
                u.identity_provider = idp
×
1165

1166
            # do not update if there is no tag
1167
            if not user_info[username].get("tags"):
1✔
1168
                continue
1✔
1169

1170
            # remove user db tags if they are not shown in new tags
1171
            for tag in u.tags:
1✔
1172
                if tag.key not in user_info[username]["tags"]:
1✔
1173
                    u.tags.remove(tag)
1✔
1174

1175
            # sync
1176
            for k, v in user_info[username]["tags"].items():
1✔
1177
                found = False
1✔
1178
                for tag in u.tags:
1✔
1179
                    if tag.key == k:
1✔
1180
                        found = True
1✔
1181
                        tag.value = v
1✔
1182
                # create new tag if not found
1183
                if not found:
1✔
1184
                    tag = Tag(key=k, value=v)
1✔
1185
                    u.tags.append(tag)
1✔
1186

1187
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1188
        """
1189
        If a project have storage backend, revoke user's access to buckets in
1190
        the storage backend.
1191

1192
        Args:
1193
            to_delete: a set of (username, project.auth_id) to be revoked
1194

1195
        Return:
1196
            None
1197
        """
1198
        for username, project_auth_id in to_delete:
1✔
1199
            project = (
1✔
1200
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1201
            )
1202
            for sa in project.storage_access:
1✔
1203
                if not hasattr(self, "storage_manager"):
1✔
1204
                    self.logger.error(
×
1205
                        (
1206
                            "CANNOT revoke {} access to {} in {} because there is NO "
1207
                            "configured storage accesses at all. See configuration. "
1208
                            "Continuing anyway..."
1209
                        ).format(username, project_auth_id, sa.provider.name)
1210
                    )
1211
                    continue
×
1212

1213
                self.logger.info(
1✔
1214
                    "revoke {} access to {} in {}".format(
1215
                        username, project_auth_id, sa.provider.name
1216
                    )
1217
                )
1218
                self.storage_manager.revoke_access(
1✔
1219
                    provider=sa.provider.name,
1220
                    username=username,
1221
                    project=project,
1222
                    session=sess,
1223
                    google_bulk_mapping=google_bulk_mapping,
1224
                )
1225

1226
    def _grant_from_storage(
1✔
1227
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1228
    ):
1229
        """
1230
        If a project have storage backend, grant user's access to buckets in
1231
        the storage backend.
1232

1233
        Args:
1234
            to_add: a set of (username, project.auth_id)  to be granted
1235
            user_project: a dictionary like:
1236

1237
                    {username: {phsid: {'read-storage','write-storage'}}}
1238

1239
        Return:
1240
            dict of the users' storage usernames to their user_projects and the respective storage access.
1241
        """
1242
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1243
        for username, project_auth_id in to_add:
1✔
1244
            project = self._projects[project_auth_id]
1✔
1245
            for sa in project.storage_access:
1✔
1246
                access = list(user_project[username][project_auth_id])
1✔
1247
                if not hasattr(self, "storage_manager"):
1✔
1248
                    self.logger.error(
×
1249
                        (
1250
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1251
                            "configured storage accesses at all. See configuration. "
1252
                            "Continuing anyway..."
1253
                        ).format(username, access, project_auth_id, sa.provider.name)
1254
                    )
1255
                    continue
×
1256

1257
                self.logger.info(
1✔
1258
                    "grant {} access {} to {} in {}".format(
1259
                        username, access, project_auth_id, sa.provider.name
1260
                    )
1261
                )
1262
                storage_username = self.storage_manager.grant_access(
1✔
1263
                    provider=sa.provider.name,
1264
                    username=username,
1265
                    project=project,
1266
                    access=access,
1267
                    session=sess,
1268
                    google_bulk_mapping=google_bulk_mapping,
1269
                    expires=expires,
1270
                )
1271

1272
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1273
        return storage_user_to_sa_and_user_project
1✔
1274

1275
    def _init_projects(self, user_project, sess):
1✔
1276
        """
1277
        initialize projects
1278
        """
1279
        if self.project_mapping:
1✔
1280
            for projects in list(self.project_mapping.values()):
1✔
1281
                for p in projects:
1✔
1282
                    self.logger.debug(
1✔
1283
                        "creating Project with info from project_mapping: {}".format(p)
1284
                    )
1285
                    project = self._get_or_create(sess, Project, **p)
1✔
1286
                    self._projects[p["auth_id"]] = project
1✔
1287
        for _, projects in user_project.items():
1✔
1288
            for auth_id in list(projects.keys()):
1✔
1289
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1290
                if not project:
1✔
1291
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1292
                    try:
1✔
1293
                        project = self._get_or_create(sess, Project, **data)
1✔
1294
                    except IntegrityError as e:
×
1295
                        sess.rollback()
×
1296
                        self.logger.error(
×
1297
                            f"Project {auth_id} already exists. Detail {str(e)}"
1298
                        )
1299
                        raise Exception(
×
1300
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1301
                                auth_id, str(e)
1302
                            )
1303
                        )
1304
                if auth_id not in self._projects:
1✔
1305
                    self._projects[auth_id] = project
1✔
1306

1307
    @staticmethod
1✔
1308
    def _get_or_create(sess, model, **kwargs):
1✔
1309
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1310
        if not instance:
1✔
1311
            instance = model(**kwargs)
1✔
1312
            sess.add(instance)
1✔
1313
        return instance
1✔
1314

1315
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1316
        """
1317
        Args:
1318
            dbgap_config : a dictionary containing information about a single
1319
                           dbgap sftp server (from fence config)
1320
            sess: database session
1321

1322
        Return:
1323
            user_projects (dict)
1324
            user_info (dict)
1325
        """
1326
        dbgap_file_list = []
1✔
1327
        hostname = dbgap_config["info"]["host"]
1✔
1328
        username = dbgap_config["info"]["username"]
1✔
1329
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1330
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1331

1332
        try:
1✔
1333
            if os.path.exists(folderdir):
1✔
1334
                dbgap_file_list = glob.glob(
×
1335
                    os.path.join(folderdir, "*")
1336
                )  # get lists of file from folder
1337
            else:
1338
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1339
                dbgap_file_list = self._download(dbgap_config)
1✔
1340
        except Exception as e:
1✔
1341
            self.logger.error(e)
1✔
1342
            exit(1)
1✔
1343
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1344
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1345
            dbgap_file_list,
1346
            encrypted=encrypted,
1347
            session=sess,
1348
            dbgap_config=dbgap_config,
1349
        )
1350

1351
        user_projects = self.parse_projects(user_projects)
×
1352
        return user_projects, user_info
×
1353

1354
    def _get_user_permissions_from_csv_list(
1✔
1355
        self, file_list, encrypted, session, dbgap_config={}
1356
    ):
1357
        """
1358
        Args:
1359
            file_list: list of files (represented as strings)
1360
            encrypted: boolean indicating whether those files are encrypted
1361
            session: sqlalchemy session
1362
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1363
                    (comes from fence config)
1364

1365
        Return:
1366
            user_projects (dict)
1367
            user_info (dict)
1368
        """
1369
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1370
        user_projects, user_info = self._parse_csv(
1✔
1371
            dict(list(zip(file_list, permissions))),
1372
            sess=session,
1373
            dbgap_config=dbgap_config,
1374
            encrypted=encrypted,
1375
        )
1376
        return user_projects, user_info
1✔
1377

1378
    def _merge_multiple_local_csv_files(
1✔
1379
        self, dbgap_file_list, encrypted, dbgap_configs, session
1380
    ):
1381
        """
1382
        Args:
1383
            dbgap_file_list (list): a list of whitelist file locations stored locally
1384
            encrypted (bool): whether the file is encrypted (comes from fence config)
1385
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1386
            session (sqlalchemy.Session): database session
1387

1388
        Return:
1389
            merged_user_projects (dict)
1390
            merged_user_info (dict)
1391
        """
1392
        merged_user_projects = {}
1✔
1393
        merged_user_info = {}
1✔
1394

1395
        for dbgap_config in dbgap_configs:
1✔
1396
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1397
                dbgap_file_list,
1398
                encrypted,
1399
                session=session,
1400
                dbgap_config=dbgap_config,
1401
            )
1402
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1403
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1404
        return merged_user_projects, merged_user_info
1✔
1405

1406
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1407
        """
1408
        Args:
1409
            dbgap_servers : a list of dictionaries each containging config on
1410
                           dbgap sftp server (comes from fence config)
1411
            sess: database session
1412

1413
        Return:
1414
            merged_user_projects (dict)
1415
            merged_user_info (dict)
1416
        """
1417
        merged_user_projects = {}
1✔
1418
        merged_user_info = {}
1✔
1419
        for dbgap in dbgap_servers:
1✔
1420
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1421
            # merge into merged_user_info
1422
            # user_info overrides original info in merged_user_info
1423
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1424

1425
            # merge all access info dicts into "merged_user_projects".
1426
            # the access info is combined - if the user_projects access is
1427
            # ["read"] and the merged_user_projects is ["read-storage"], the
1428
            # resulting access is ["read", "read-storage"].
1429
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1430
        return merged_user_projects, merged_user_info
1✔
1431

1432
    def parse_projects(self, user_projects):
1✔
1433
        """
1434
        helper function for parsing projects
1435
        """
1436
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1437

1438
    def _process_dbgap_project(
1✔
1439
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1440
    ):
1441
        if dbgap_project not in self.project_mapping:
1✔
1442
            self._add_dbgap_project_for_user(
1✔
1443
                dbgap_project,
1444
                privileges,
1445
                username,
1446
                sess,
1447
                user_projects,
1448
                dbgap_config,
1449
            )
1450

1451
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1452
            try:
1✔
1453
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1454

1455
                # need to add dbgap project to arborist
1456
                if self.arborist_client:
1✔
1457
                    self._determine_arborist_resource(
1✔
1458
                        element_dict["auth_id"], dbgap_config
1459
                    )
1460

1461
                if username not in user_projects:
1✔
1462
                    user_projects[username] = {}
1✔
1463
                user_projects[username].update(phsid_privileges)
1✔
1464

1465
            except ValueError as e:
×
1466
                self.logger.info(e)
×
1467

1468
    def _process_user_projects(
1✔
1469
        self,
1470
        user_projects,
1471
        enable_common_exchange_area_access,
1472
        study_common_exchange_areas,
1473
        dbgap_config,
1474
        sess,
1475
    ):
1476
        for username in user_projects.keys():
1✔
1477
            for project in user_projects[username].keys():
1✔
1478
                phsid = project.split(".")
1✔
1479
                dbgap_project = phsid[0]
1✔
1480
                privileges = user_projects[username][project]
1✔
1481
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1482
                    consent_code = phsid[-1]
1✔
1483

1484
                    # c999 indicates full access to all consents and access
1485
                    # to a study-specific exchange area
1486
                    # access to at least one study-specific exchange area implies access
1487
                    # to the parent study's common exchange area
1488
                    #
1489
                    # NOTE: Handling giving access to all consents is done at
1490
                    #       a later time, when we have full information about possible
1491
                    #       consents
1492
                    self.logger.debug(
1✔
1493
                        f"got consent code {consent_code} from dbGaP project "
1494
                        f"{dbgap_project}"
1495
                    )
1496
                    if (
1✔
1497
                        consent_code == "c999"
1498
                        and enable_common_exchange_area_access
1499
                        and dbgap_project in study_common_exchange_areas
1500
                    ):
1501
                        self.logger.info(
×
1502
                            "found study with consent c999 and Fence "
1503
                            "is configured to parse exchange area data. Giving user "
1504
                            f"{username} {privileges} privileges in project: "
1505
                            f"{study_common_exchange_areas[dbgap_project]}."
1506
                        )
1507
                        self._add_dbgap_project_for_user(
×
1508
                            study_common_exchange_areas[dbgap_project],
1509
                            privileges,
1510
                            username,
1511
                            sess,
1512
                            user_projects,
1513
                            dbgap_config,
1514
                        )
1515

1516
                    dbgap_project += "." + consent_code
1✔
1517

1518
                self._process_dbgap_project(
1✔
1519
                    dbgap_project,
1520
                    privileges,
1521
                    username,
1522
                    sess,
1523
                    user_projects,
1524
                    dbgap_config,
1525
                )
1526

1527
    def sync(self):
1✔
1528
        if self.session:
1✔
1529
            self._sync(self.session)
1✔
1530
        else:
1531
            with self.driver.session as s:
×
1532
                self._sync(s)
×
1533

1534
    def download(self):
1✔
1535
        for dbgap_server in self.dbGaP:
×
1536
            self._download(dbgap_server)
×
1537

1538
    def _download(self, dbgap_config):
1✔
1539
        """
1540
        Download files from dbgap server.
1541
        """
1542
        server = dbgap_config["info"]
1✔
1543
        protocol = dbgap_config["protocol"]
1✔
1544
        hostname = server["host"]
1✔
1545
        username = server["username"]
1✔
1546
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1547

1548
        if not os.path.exists(folderdir):
1✔
1549
            os.makedirs(folderdir)
1✔
1550

1551
        self.logger.info("Download from server")
1✔
1552
        try:
1✔
1553
            if protocol == "sftp":
1✔
1554
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1555
            else:
1556
                self._get_from_ftp_with_proxy(server, folderdir)
×
1557
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1558
            return dbgap_files
×
1559
        except Exception as e:
1✔
1560
            self.logger.error(e)
1✔
1561
            raise
1✔
1562

1563
    def _sync(self, sess):
1✔
1564
        """
1565
        Collect files from dbgap server(s), sync csv and yaml files to storage
1566
        backend and fence DB
1567
        """
1568

1569
        # get all dbgap files
1570
        user_projects = {}
1✔
1571
        user_info = {}
1✔
1572
        if self.is_sync_from_dbgap_server:
1✔
1573
            self.logger.debug(
1✔
1574
                "Pulling telemetry files from {} dbgap sftp servers".format(
1575
                    len(self.dbGaP)
1576
                )
1577
            )
1578
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1579

1580
        local_csv_file_list = []
1✔
1581
        if self.sync_from_local_csv_dir:
1✔
1582
            local_csv_file_list = glob.glob(
1✔
1583
                os.path.join(self.sync_from_local_csv_dir, "*")
1584
            )
1585
            # Sort the list so the order of of files is consistent across platforms
1586
            local_csv_file_list.sort()
1✔
1587

1588
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1589
            local_csv_file_list,
1590
            encrypted=False,
1591
            session=sess,
1592
            dbgap_configs=self.dbGaP,
1593
        )
1594

1595
        try:
1✔
1596
            user_yaml = UserYAML.from_file(
1✔
1597
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1598
            )
1599
        except (EnvironmentError, AssertionError) as e:
1✔
1600
            self.logger.error(str(e))
1✔
1601
            self.logger.error("aborting early")
1✔
1602
            raise
1✔
1603

1604
        # parse all projects
1605
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1606
        user_projects = self.parse_projects(user_projects)
1✔
1607
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1608

1609
        # merge all user info dicts into "user_info".
1610
        # the user info (such as email) in the user.yaml files
1611
        # overrides the user info from the CSV files.
1612
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1613
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1614

1615
        # merge all access info dicts into "user_projects".
1616
        # the access info is combined - if the user.yaml access is
1617
        # ["read"] and the CSV file access is ["read-storage"], the
1618
        # resulting access is ["read", "read-storage"].
1619
        self.sync_two_phsids_dict(
1✔
1620
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1621
        )
1622
        self.sync_two_phsids_dict(
1✔
1623
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1624
        )
1625

1626
        # Note: if there are multiple dbgap sftp servers configured
1627
        # this parameter is always from the config for the first dbgap sftp server
1628
        # not any additional ones
1629
        for dbgap_config in self.dbGaP:
1✔
1630
            if self._get_parse_consent_code(dbgap_config):
1✔
1631
                self._grant_all_consents_to_c999_users(
1✔
1632
                    user_projects, user_yaml.project_to_resource
1633
                )
1634

1635
        google_update_ex = None
1✔
1636

1637
        try:
1✔
1638
            # update the Fence DB
1639
            if user_projects:
1✔
1640
                self.logger.info("Sync to db and storage backend")
1✔
1641
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1642
                self.logger.info("Finish syncing to db and storage backend")
1✔
1643
            else:
1644
                self.logger.info("No users for syncing")
×
1645
        except GoogleUpdateException as ex:
1✔
1646
            # save this to reraise later after all non-Google syncing has finished
1647
            # this way, any issues with Google only affect Google data access and don't
1648
            # cascade problems into non-Google AWS or Azure access
1649
            google_update_ex = ex
1✔
1650

1651
        # update the Arborist DB (resources, roles, policies, groups)
1652
        if user_yaml.authz:
1✔
1653
            if not self.arborist_client:
1✔
1654
                raise EnvironmentError(
×
1655
                    "yaml file contains authz section but sync is not configured with"
1656
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1657
                )
1658
            self.logger.info("Synchronizing arborist...")
1✔
1659
            success = self._update_arborist(sess, user_yaml)
1✔
1660
            if success:
1✔
1661
                self.logger.info("Finished synchronizing arborist")
1✔
1662
            else:
1663
                self.logger.error("Could not synchronize successfully")
×
1664
                exit(1)
×
1665
        else:
1666
            self.logger.info("No `authz` section; skipping arborist sync")
×
1667

1668
        # update the Arborist DB (user access)
1669
        if self.arborist_client:
1✔
1670
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1671
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1672
            if success:
1✔
1673
                self.logger.info(
1✔
1674
                    "Finished synchronizing authorization info to arborist"
1675
                )
1676
            else:
1677
                self.logger.error(
×
1678
                    "Could not synchronize authorization info successfully to arborist"
1679
                )
1680
                exit(1)
×
1681
        else:
1682
            self.logger.error("No arborist client set; skipping arborist sync")
×
1683

1684
        # Logging authz source
1685
        for u, s in self.auth_source.items():
1✔
1686
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1687

1688
        self.logger.info(
1✔
1689
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1690
        )
1691
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1692
        if google_update_ex is not None:
1✔
1693
            raise google_update_ex
1✔
1694

1695
    def _grant_all_consents_to_c999_users(
1✔
1696
        self, user_projects, user_yaml_project_to_resources
1697
    ):
1698
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1699
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1700
        # are in either)
1701
        all_projects = set(
1✔
1702
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1703
        )
1704

1705
        self.logger.debug(f"all projects: {all_projects}")
1✔
1706

1707
        # construct a mapping from phsid (without consent) to all accessions with consent
1708
        consent_mapping = {}
1✔
1709
        for project in all_projects:
1✔
1710
            phs_match = access_number_matcher.match(project)
1✔
1711
            if phs_match:
1✔
1712
                accession_number = phs_match.groupdict()
1✔
1713

1714
                # TODO: This is not handling the .v1.p1 at all
1715
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1716
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1717
                )
1718
                children = self._get_children(accession_number["phsid"])
1✔
1719
                if children:
1✔
1720
                    for child_phs in children:
1✔
1721
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1722
                            ".".join(
1723
                                [child_phs, accession_number["consent"]]
1724
                            )  # Assign parent consent to child study
1725
                        )
1726

1727
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1728

1729
        # go through existing access and find any c999's and make sure to give access to
1730
        # all accessions with consent for that phsid
1731
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1732
            for project, _ in user_project_info.items():
1✔
1733
                phs_match = access_number_matcher.match(project)
1✔
1734
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1735
                    # give access to all consents
1736
                    all_phsids_with_consent = consent_mapping.get(
1✔
1737
                        phs_match.groupdict()["phsid"], []
1738
                    )
1739
                    self.logger.info(
1✔
1740
                        f"user {username} has c999 consent group for: {project}. "
1741
                        f"Granting access to all consents: {all_phsids_with_consent}"
1742
                    )
1743
                    # NOTE: Only giving read-storage at the moment (this is same
1744
                    #       permission we give for other dbgap projects)
1745
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1746
                        user_projects[username].update(
1✔
1747
                            {phsid_with_consent: {"read-storage", "read"}}
1748
                        )
1749

1750
    def _update_arborist(self, session, user_yaml):
1✔
1751
        """
1752
        Create roles, resources, policies, groups in arborist from the information in
1753
        ``user_yaml``.
1754

1755
        The projects are sent to arborist as resources with paths like
1756
        ``/projects/{project}``. Roles are created with just the original names
1757
        for the privileges like ``"read-storage", "read"`` etc.
1758

1759
        Args:
1760
            session (sqlalchemy.Session)
1761
            user_yaml (UserYAML)
1762

1763
        Return:
1764
            bool: success
1765
        """
1766
        healthy = self._is_arborist_healthy()
1✔
1767
        if not healthy:
1✔
1768
            return False
×
1769

1770
        # Set up the resource tree in arborist by combining provided resources with any
1771
        # dbgap resources that were created before this.
1772
        #
1773
        # Why add dbgap resources if they've already been created?
1774
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1775
        #   resources was created under `/programs/phs000178` anything provided in
1776
        #   user.yaml under `/programs` would completely wipe it out.
1777
        resources = user_yaml.authz.get("resources", [])
1✔
1778

1779
        dbgap_resource_paths = []
1✔
1780
        for path_list in self._dbgap_study_to_resources.values():
1✔
1781
            dbgap_resource_paths.extend(path_list)
1✔
1782

1783
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1784
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1785

1786
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1787
            resources, dbgap_resource_paths
1788
        )
1789

1790
        for resource in combined_resources:
1✔
1791
            try:
1✔
1792
                self.logger.debug(
1✔
1793
                    "attempting to update arborist resource: {}".format(resource)
1794
                )
1795
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1796
            except ArboristError as e:
×
1797
                self.logger.error(e)
×
1798
                # keep going; maybe just some conflicts from things existing already
1799

1800
        # update roles
1801
        roles = user_yaml.authz.get("roles", [])
1✔
1802
        for role in roles:
1✔
1803
            try:
1✔
1804
                response = self.arborist_client.update_role(role["id"], role)
1✔
1805
                if response:
1✔
1806
                    self._created_roles.add(role["id"])
1✔
1807
            except ArboristError as e:
×
1808
                self.logger.info(
×
1809
                    "couldn't update role '{}', creating instead".format(str(e))
1810
                )
1811
                try:
×
1812
                    response = self.arborist_client.create_role(role)
×
1813
                    if response:
×
1814
                        self._created_roles.add(role["id"])
×
1815
                except ArboristError as e:
×
1816
                    self.logger.error(e)
×
1817
                    # keep going; maybe just some conflicts from things existing already
1818

1819
        # update policies
1820
        policies = user_yaml.authz.get("policies", [])
1✔
1821
        for policy in policies:
1✔
1822
            policy_id = policy.pop("id")
1✔
1823
            try:
1✔
1824
                self.logger.debug(
1✔
1825
                    "Trying to upsert policy with id {}".format(policy_id)
1826
                )
1827
                response = self.arborist_client.update_policy(
1✔
1828
                    policy_id, policy, create_if_not_exist=True
1829
                )
1830
            except ArboristError as e:
×
1831
                self.logger.error(e)
×
1832
                # keep going; maybe just some conflicts from things existing already
1833
            else:
1834
                if response:
1✔
1835
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1836
                    self._created_policies.add(policy_id)
1✔
1837

1838
        # update groups
1839
        groups = user_yaml.authz.get("groups", [])
1✔
1840

1841
        # delete from arborist the groups that have been deleted
1842
        # from the user.yaml
1843
        arborist_groups = set(
1✔
1844
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1845
        )
1846
        useryaml_groups = set(g["name"] for g in groups)
1✔
1847
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1848
            # do not try to delete built in groups
1849
            if deleted_group not in ["anonymous", "logged-in"]:
×
1850
                self.arborist_client.delete_group(deleted_group)
×
1851

1852
        # create/update the groups defined in the user.yaml
1853
        for group in groups:
1✔
1854
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1855
            if missing:
×
1856
                name = group.get("name", "{MISSING NAME}")
×
1857
                self.logger.error(
×
1858
                    "group {} missing required field(s): {}".format(name, list(missing))
1859
                )
1860
                continue
×
1861
            try:
×
1862
                response = self.arborist_client.put_group(
×
1863
                    group["name"],
1864
                    # Arborist doesn't handle group descriptions yet
1865
                    # description=group.get("description", ""),
1866
                    users=group["users"],
1867
                    policies=group["policies"],
1868
                )
1869
            except ArboristError as e:
×
1870
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1871

1872
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1873

1874
        # First recreate these groups in order to clear out old, possibly deleted policies
1875
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1876
            try:
1✔
1877
                response = self.arborist_client.put_group(builtin_group)
1✔
1878
            except ArboristError as e:
×
1879
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1880

1881
        # Now add back policies that are in the user.yaml
1882
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1883
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1884

1885
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1886
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1887

1888
        return True
1✔
1889

1890
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1891
        """
1892
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1893
        mfa_policy after revoking all policies.
1894
        """
1895
        user_data_from_arborist = None
1✔
1896
        try:
1✔
1897
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1898
        except ArboristError:
×
1899
            # user doesn't exist in Arborist, nothing to revoke
1900
            return
×
1901

1902
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1903
            idp, {}
1904
        )
1905
        if not is_mfa_enabled:
1✔
1906
            # TODO This should be a diff, not a revocation of all policies.
1907
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1908
            return
1✔
1909

1910
        policies = []
1✔
1911
        try:
1✔
1912
            policies = user_data_from_arborist["policies"]
1✔
1913
        except Exception as e:
×
1914
            self.logger.error(
×
1915
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1916
            )
1917
        finally:
1918
            # TODO This should be a diff, not a revocation of all policies.
1919
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1920

1921
        if "mfa_policy" in policies:
1✔
1922
            status_code = self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1923

1924
    def _update_authz_in_arborist(
1✔
1925
        self,
1926
        session,
1927
        user_projects,
1928
        user_yaml=None,
1929
        single_user_sync=False,
1930
        expires=None,
1931
    ):
1932
        """
1933
        Assign users policies in arborist from the information in
1934
        ``user_projects`` and optionally a ``user_yaml``.
1935

1936
        The projects are sent to arborist as resources with paths like
1937
        ``/projects/{project}``. Roles are created with just the original names
1938
        for the privileges like ``"read-storage", "read"`` etc.
1939

1940
        Args:
1941
            user_projects (dict)
1942
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
1943
            single_user_sync (bool) whether authz update is for a single user
1944
            expires (int) time at which authz info in Arborist should expire
1945

1946
        Return:
1947
            bool: success
1948
        """
1949
        healthy = self._is_arborist_healthy()
1✔
1950
        if not healthy:
1✔
1951
            return False
×
1952

1953
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
1954

1955
        if user_yaml:
1✔
1956
            self.logger.debug(
1✔
1957
                "useryaml abac before lowering usernames: {}".format(
1958
                    user_yaml.user_abac
1959
                )
1960
            )
1961
            user_yaml.user_abac = {
1✔
1962
                key.lower(): value for key, value in user_yaml.user_abac.items()
1963
            }
1964
            # update the project info with `projects` specified in user.yaml
1965
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
1966

1967
        # get list of users from arborist to make sure users that are completely removed
1968
        # from authorization sources get policies revoked
1969
        arborist_user_projects = {}
1✔
1970
        if not single_user_sync:
1✔
1971
            try:
1✔
1972
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
1973

1974
                # construct user information, NOTE the lowering of the username. when adding/
1975
                # removing access, the case in the Fence db is used. For combining access, it is
1976
                # case-insensitive, so we lower
1977
                arborist_user_projects = {
1✔
1978
                    user["name"].lower(): {} for user in arborist_users
1979
                }
1980
            except (ArboristError, KeyError, AttributeError) as error:
×
1981
                # TODO usersync should probably exit with non-zero exit code at the end,
1982
                #      but sync should continue from this point so there are no partial
1983
                #      updates
1984
                self.logger.warning(
×
1985
                    "Could not get list of users in Arborist, continuing anyway. "
1986
                    "WARNING: this sync will NOT remove access for users no longer in "
1987
                    f"authorization sources. Error: {error}"
1988
                )
1989

1990
            # update the project info with users from arborist
1991
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
1992

1993
        policy_id_list = []
1✔
1994
        policies = []
1✔
1995

1996
        # prefer in-memory if available from user_yaml, if not, get from database
1997
        if user_yaml and user_yaml.project_to_resource:
1✔
1998
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
1999
            self.logger.debug(
1✔
2000
                f"using in-memory project to authz resource mapping from "
2001
                f"user.yaml (instead of database): {project_to_authz_mapping}"
2002
            )
2003
        else:
2004
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
2005
            self.logger.debug(
1✔
2006
                f"using persisted project to authz resource mapping from database "
2007
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2008
            )
2009

2010
        self.logger.debug(
1✔
2011
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2012
        )
2013
        all_resources = [
1✔
2014
            r
2015
            for resources in self._dbgap_study_to_resources.values()
2016
            for r in resources
2017
        ]
2018
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2019
        self._create_arborist_resources(all_resources)
1✔
2020

2021
        for username, user_project_info in user_projects.items():
1✔
2022
            self.logger.info("processing user `{}`".format(username))
1✔
2023
            user = query_for_user(session=session, username=username)
1✔
2024
            idp = None
1✔
2025
            if user:
1✔
2026
                username = user.username
1✔
2027
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2028

2029
            self.arborist_client.create_user_if_not_exist(username)
1✔
2030
            if not single_user_sync:
1✔
2031
                self._revoke_all_policies_preserve_mfa(username, idp)
1✔
2032

2033
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2034
            # previously mapped each project to the same set of privileges
2035
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2036
            # single policy with ('read', 'read-storage') being the single
2037
            # key
2038
            unique_policies = self._determine_unique_policies(
1✔
2039
                user_project_info, project_to_authz_mapping
2040
            )
2041

2042
            for roles in unique_policies.keys():
1✔
2043
                for role in roles:
1✔
2044
                    self._create_arborist_role(role)
1✔
2045

2046
            if single_user_sync:
1✔
2047
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2048
                    policy_hash = self._hash_policy_contents(
1✔
2049
                        ordered_roles, ordered_resources
2050
                    )
2051
                    self._create_arborist_policy(
1✔
2052
                        policy_hash,
2053
                        ordered_roles,
2054
                        ordered_resources,
2055
                        skip_if_exists=True,
2056
                    )
2057
                    # return here as it is not expected single_user_sync
2058
                    # will need any of the remaining user_yaml operations
2059
                    # left in _update_authz_in_arborist
2060
                    return self._grant_arborist_policy(
1✔
2061
                        username, policy_hash, expires=expires
2062
                    )
2063
            else:
2064
                for roles, resources in unique_policies.items():
1✔
2065
                    for role in roles:
1✔
2066
                        for resource in resources:
1✔
2067
                            # grant a policy to this user which is a single
2068
                            # role on a single resource
2069

2070
                            # format project '/x/y/z' -> 'x.y.z'
2071
                            # so the policy id will be something like 'x.y.z-create'
2072
                            policy_id = _format_policy_id(resource, role)
1✔
2073
                            if policy_id not in self._created_policies:
1✔
2074
                                try:
1✔
2075
                                    self.arborist_client.update_policy(
1✔
2076
                                        policy_id,
2077
                                        {
2078
                                            "description": "policy created by fence sync",
2079
                                            "role_ids": [role],
2080
                                            "resource_paths": [resource],
2081
                                        },
2082
                                        create_if_not_exist=True,
2083
                                    )
2084
                                except ArboristError as e:
×
2085
                                    self.logger.info(
×
2086
                                        "not creating policy in arborist; {}".format(
2087
                                            str(e)
2088
                                        )
2089
                                    )
2090
                                self._created_policies.add(policy_id)
1✔
2091

2092
                            self._grant_arborist_policy(
1✔
2093
                                username, policy_id, expires=expires
2094
                            )
2095

2096
            if user_yaml:
1✔
2097
                for policy in user_yaml.policies.get(username, []):
1✔
2098
                    self.arborist_client.grant_user_policy(
1✔
2099
                        username,
2100
                        policy,
2101
                        expires_at=expires,
2102
                    )
2103

2104
        if user_yaml:
1✔
2105
            for client_name, client_details in user_yaml.clients.items():
1✔
2106
                client_policies = client_details.get("policies", [])
×
2107
                clients = session.query(Client).filter_by(name=client_name).all()
×
2108
                # update existing clients, do not create new ones
2109
                if not clients:
×
2110
                    self.logger.warning(
×
2111
                        "client to update (`{}`) does not exist in fence: skipping".format(
2112
                            client_name
2113
                        )
2114
                    )
2115
                    continue
×
2116
                self.logger.debug(
×
2117
                    "updating client `{}` (found {} client IDs)".format(
2118
                        client_name, len(clients)
2119
                    )
2120
                )
2121
                # there may be more than 1 client with this name if credentials are being rotated,
2122
                # so we grant access to each client ID
2123
                for client in clients:
×
2124
                    try:
×
2125
                        self.arborist_client.update_client(
×
2126
                            client.client_id, client_policies
2127
                        )
2128
                    except ArboristError as e:
×
2129
                        self.logger.info(
×
2130
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2131
                                client_policies, client_name, client.client_id, str(e)
2132
                            )
2133
                        )
2134

2135
        return True
1✔
2136

2137
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2138
        """
2139
        Determine and return a dictionary of unique policies.
2140

2141
        Args (examples):
2142
            user_project_info (dict):
2143
            {
2144
                'phs000002.c1': { 'read-storage', 'read' },
2145
                'phs000001.c1': { 'read', 'read-storage' },
2146
                'phs000004.c1': { 'write', 'read' },
2147
                'phs000003.c1': { 'read', 'write' },
2148
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2149
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2150
            }
2151
            project_to_authz_mapping (dict):
2152
            {
2153
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2154
            }
2155

2156
        Return (for examples):
2157
            dict:
2158
            {
2159
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2160
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2161
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2162
            }
2163
        """
2164
        roles_to_resources = collections.defaultdict(list)
1✔
2165
        for study, roles in user_project_info.items():
1✔
2166
            ordered_roles = tuple(sorted(roles))
1✔
2167
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2168
            if study in project_to_authz_mapping:
1✔
2169
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2170
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2171

2172
        policies = {}
1✔
2173
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2174
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2175
        return policies
1✔
2176

2177
    def _create_arborist_role(self, role):
1✔
2178
        """
2179
        Wrapper around gen3authz's create_role with additional logging
2180

2181
        Args:
2182
            role (str): what the Arborist identity should be of the created role
2183

2184
        Return:
2185
            bool: True if the role was created successfully or it already
2186
                  exists. False otherwise
2187
        """
2188
        if role in self._created_roles:
1✔
2189
            return True
1✔
2190
        try:
1✔
2191
            response_json = self.arborist_client.create_role(
1✔
2192
                arborist_role_for_permission(role)
2193
            )
2194
        except ArboristError as e:
×
2195
            self.logger.error(
×
2196
                "could not create `{}` role in Arborist: {}".format(role, e)
2197
            )
2198
            return False
×
2199
        self._created_roles.add(role)
1✔
2200

2201
        if response_json is None:
1✔
2202
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2203
        else:
2204
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2205
        return True
1✔
2206

2207
    def _create_arborist_resources(self, resources):
1✔
2208
        """
2209
        Create resources in Arborist
2210

2211
        Args:
2212
            resources (list): a list of full Arborist resource paths to create
2213
            [
2214
                "/programs/DEV/projects/phs000001.c1",
2215
                "/programs/DEV/projects/phs000002.c1",
2216
                "/programs/DEV/projects/phs000003.c1"
2217
            ]
2218

2219
        Return:
2220
            bool: True if the resources were successfully created, False otherwise
2221

2222

2223
        As of 2/11/2022, for resources above,
2224
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2225
        [
2226
            { 'name': 'programs', 'subresources': [
2227
                { 'name': 'DEV', 'subresources': [
2228
                    { 'name': 'projects', 'subresources': [
2229
                        { 'name': 'phs000001.c1', 'subresources': []},
2230
                        { 'name': 'phs000002.c1', 'subresources': []},
2231
                        { 'name': 'phs000003.c1', 'subresources': []}
2232
                    ]}
2233
                ]}
2234
            ]}
2235
        ]
2236
        Because this list has a single object, only a single network request gets
2237
        sent to Arborist.
2238

2239
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2240
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2241
        [
2242
            {'name': 'phs000001.c1', 'subresources': []},
2243
            {'name': 'phs000002.c1', 'subresources': []},
2244
            {'name': 'phs000003.c1', 'subresources': []}
2245
        ]
2246
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2247

2248
        As a practical matter, for sync_single_user_visas, studies
2249
        should be nested under the `/programs` resource as in the former
2250
        example (i.e. only one network request gets made).
2251

2252
        TODO for the sake of simplicity, it would be nice if only one network
2253
        request was made no matter the input.
2254
        """
2255
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2256
            try:
1✔
2257
                response_json = self.arborist_client.update_resource(
1✔
2258
                    "/", request_body, merge=True
2259
                )
2260
            except ArboristError as e:
×
2261
                self.logger.error(
×
2262
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2263
                        request_body, e
2264
                    )
2265
                )
2266
                return False
×
2267

2268
        self.logger.debug(
1✔
2269
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2270
        )
2271
        return True
1✔
2272

2273
    def _create_arborist_policy(
1✔
2274
        self, policy_id, roles, resources, skip_if_exists=False
2275
    ):
2276
        """
2277
        Wrapper around gen3authz's create_policy with additional logging
2278

2279
        Args:
2280
            policy_id (str): what the Arborist identity should be of the created policy
2281
            roles (iterable): what roles the create policy should have
2282
            resources (iterable): what resources the created policy should have
2283
            skip_if_exists (bool): if True, this function will not treat an already
2284
                                   existent policy as an error
2285

2286
        Return:
2287
            bool: True if policy creation was successful. False otherwise
2288
        """
2289
        try:
1✔
2290
            response_json = self.arborist_client.create_policy(
1✔
2291
                {
2292
                    "id": policy_id,
2293
                    "role_ids": roles,
2294
                    "resource_paths": resources,
2295
                },
2296
                skip_if_exists=skip_if_exists,
2297
            )
2298
        except ArboristError as e:
×
2299
            self.logger.error(
×
2300
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2301
            )
2302
            return False
×
2303

2304
        if response_json is None:
1✔
2305
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2306
        else:
2307
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2308
        return True
1✔
2309

2310
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2311
        """
2312
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2313

2314
        Args:
2315
            ordered_roles (iterable): policy roles in sorted order
2316
            ordered_resources (iterable): policy resources in sorted order
2317

2318
        Return:
2319
            str: SHA256 hex digest
2320
        """
2321

2322
        def escape(s):
1✔
2323
            return s.replace(",", "\,")
1✔
2324

2325
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2326
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2327
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2328
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2329

2330
        return policy_hash
1✔
2331

2332
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2333
        """
2334
        Wrapper around gen3authz's grant_user_policy with additional logging
2335

2336
        Args:
2337
            username (str): username of user in Arborist who policy should be
2338
                            granted to
2339
            policy_id (str): Arborist policy id
2340
            expires (int): POSIX timestamp for when policy should expire
2341

2342
        Return:
2343
            bool: True if granting of policy was successful, False otherwise
2344
        """
2345
        try:
1✔
2346
            response_json = self.arborist_client.grant_user_policy(
1✔
2347
                username,
2348
                policy_id,
2349
                expires_at=expires,
2350
            )
2351
        except ArboristError as e:
×
2352
            self.logger.error(
×
2353
                "could not grant policy `{}` to user `{}`: {}".format(
2354
                    policy_id, username, e
2355
                )
2356
            )
2357
            return False
×
2358

2359
        self.logger.debug(
1✔
2360
            "granted policy `{}` to user `{}`".format(policy_id, username)
2361
        )
2362
        return True
1✔
2363

2364
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2365
        """
2366
        Determine the arborist resource path and add it to
2367
        _self._dbgap_study_to_resources
2368

2369
        Args:
2370
            dbgap_study (str): study phs identifier
2371
            dbgap_config (dict): dictionary of config for dbgap server
2372

2373
        """
2374
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2375
            "_default", ["/"]
2376
        )
2377
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2378
            dbgap_study, default_namespaces
2379
        )
2380

2381
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2382

2383
        arborist_resource_namespaces = [
1✔
2384
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2385
        ]
2386

2387
        for resource_namespace in arborist_resource_namespaces:
1✔
2388
            full_resource_path = resource_namespace + dbgap_study
1✔
2389
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2390
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2391
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2392
        return arborist_resource_namespaces
1✔
2393

2394
    def _is_arborist_healthy(self):
1✔
2395
        if not self.arborist_client:
1✔
2396
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2397
            return False
×
2398
        if not self.arborist_client.healthy():
1✔
2399
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2400
            self.logger.error(
×
2401
                "arborist service is unavailable; skipping main arborist dbgap sync"
2402
            )
2403
            return False
×
2404
        return True
1✔
2405

2406
    def _pick_sync_type(self, visa):
1✔
2407
        """
2408
        Pick type of visa to parse according to the visa provider
2409
        """
2410
        sync_client = None
1✔
2411
        if visa.type in self.visa_types["ras"]:
1✔
2412
            sync_client = self.ras_sync_client
1✔
2413
        else:
2414
            raise Exception(
×
2415
                "Visa type {} not recognized. Configure in fence-config".format(
2416
                    visa.type
2417
                )
2418
            )
2419
        if not sync_client:
1✔
2420
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2421

2422
        return sync_client
1✔
2423

2424
    def sync_single_user_visas(self, user, ga4gh_visas, sess=None, expires=None):
1✔
2425
        """
2426
        Sync a single user's visas during login or DRS/data access
2427

2428
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2429
                        BEFORE THIS.
2430

2431
        Args:
2432
            user (userdatamodel.user.User): Fence user whose visas'
2433
                                            authz info is being synced
2434
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2435
                                that are ALREADY VALIDATED
2436
            sess (sqlalchemy.orm.session.Session): database session
2437
            expires (int): time at which synced Arborist policies and
2438
                           inclusion in any GBAG are set to expire
2439

2440
        Return:
2441
            list of successfully parsed visas
2442
        """
2443
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2444
        dbgap_config = self.dbGaP[0]
1✔
2445
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2446
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2447
            "enable_common_exchange_area_access", False
2448
        )
2449
        study_common_exchange_areas = dbgap_config.get(
1✔
2450
            "study_common_exchange_areas", {}
2451
        )
2452

2453
        try:
1✔
2454
            user_yaml = UserYAML.from_file(
1✔
2455
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2456
            )
2457
        except (EnvironmentError, AssertionError) as e:
×
2458
            self.logger.error(str(e))
×
2459
            self.logger.error("aborting early")
×
2460
            raise
×
2461

2462
        user_projects = dict()
1✔
2463
        projects = {}
1✔
2464
        info = {}
1✔
2465
        parsed_visas = []
1✔
2466

2467
        for visa in ga4gh_visas:
1✔
2468
            project = {}
1✔
2469
            visa_type = self._pick_sync_type(visa)
1✔
2470
            encoded_visa = visa.ga4gh_visa
1✔
2471

2472
            try:
1✔
2473
                project, info = visa_type._parse_single_visa(
1✔
2474
                    user,
2475
                    encoded_visa,
2476
                    visa.expires,
2477
                    parse_consent_code,
2478
                )
2479
            except Exception:
×
2480
                self.logger.warning(
×
2481
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2482
                )
2483
                continue
×
2484

2485
            projects = {**projects, **project}
1✔
2486
            parsed_visas.append(visa)
1✔
2487

2488
        info['user_id'] = user.id
1✔
2489
        info['username'] = user.username
1✔
2490
        user_projects[user.username] = projects
1✔
2491

2492
        user_projects = self.parse_projects(user_projects)
1✔
2493

2494
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2495
            self.logger.info(
×
2496
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2497
            )
2498

2499
        self._process_user_projects(
1✔
2500
            user_projects,
2501
            enable_common_exchange_area_access,
2502
            study_common_exchange_areas,
2503
            dbgap_config,
2504
            sess,
2505
        )
2506

2507
        if parse_consent_code:
1✔
2508
            self._grant_all_consents_to_c999_users(
1✔
2509
                user_projects, user_yaml.project_to_resource
2510
            )
2511

2512
        if user_projects:
1✔
2513
            self.logger.info("Sync to storage backend [sync_single_user_visas]")
1✔
2514
            self.sync_to_storage_backend(
1✔
2515
                user_projects, info, sess, expires=expires
2516
            )
2517
        else:
2518
            self.logger.info("No users for syncing")
×
2519

2520
        # update arborist db (user access)
2521
        if self.arborist_client:
1✔
2522
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2523
            success = self._update_authz_in_arborist(
1✔
2524
                sess,
2525
                user_projects,
2526
                user_yaml=user_yaml,
2527
                single_user_sync=True,
2528
                expires=expires,
2529
            )
2530
            if success:
1✔
2531
                self.logger.info(
1✔
2532
                    "Finished synchronizing authorization info to arborist"
2533
                )
2534
            else:
2535
                self.logger.error(
×
2536
                    "Could not synchronize authorization info successfully to arborist"
2537
                )
2538
        else:
2539
            self.logger.error("No arborist client set; skipping arborist sync")
×
2540

2541
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc