• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 13530643019

25 Feb 2025 08:47PM UTC coverage: 75.268%. Remained the same
13530643019

Pull #1230

github

BinamB
update toml
Pull Request #1230: (Fix): Usersync ccdecrypt output not parsing

7858 of 10440 relevant lines covered (75.27%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.59
fence/sync/sync_users.py
1
import backoff
1✔
2
import glob
1✔
3
import jwt
1✔
4
import os
1✔
5
import re
1✔
6
import subprocess as sp
1✔
7
import yaml
1✔
8
import copy
1✔
9
import datetime
1✔
10
import uuid
1✔
11
import collections
1✔
12
import hashlib
1✔
13

14
from contextlib import contextmanager
1✔
15
from collections import defaultdict
1✔
16
from csv import DictReader
1✔
17
from io import StringIO
1✔
18
from stat import S_ISDIR
1✔
19

20
import paramiko
1✔
21
from cdislogging import get_logger
1✔
22
from email_validator import validate_email, EmailNotValidError
1✔
23
from gen3authz.client.arborist.errors import ArboristError
1✔
24
from gen3users.validation import validate_user_yaml
1✔
25
from paramiko.proxy import ProxyCommand
1✔
26
from sqlalchemy.exc import IntegrityError
1✔
27
from sqlalchemy import func
1✔
28

29
from fence.config import config
1✔
30
from fence.models import (
1✔
31
    AccessPrivilege,
32
    AuthorizationProvider,
33
    Project,
34
    Tag,
35
    User,
36
    query_for_user,
37
    Client,
38
    IdentityProvider,
39
    get_project_to_authz_mapping,
40
)
41
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
42
from fence.resources.storage import StorageManager
1✔
43
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
44
from fence.resources.google.access_utils import GoogleUpdateException
1✔
45
from fence.sync import utils
1✔
46
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
47
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
48

49

50
def _format_policy_id(path, privilege):
1✔
51
    resource = ".".join(name for name in path.split("/") if name)
1✔
52
    return "{}-{}".format(resource, privilege)
1✔
53

54

55
def download_dir(sftp, remote_dir, local_dir):
1✔
56
    """
57
    Recursively download file from remote_dir to local_dir
58
    Args:
59
        remote_dir(str)
60
        local_dir(str)
61
    Returns: None
62
    """
63
    dir_items = sftp.listdir_attr(remote_dir)
×
64

65
    for item in dir_items:
×
66
        remote_path = remote_dir + "/" + item.filename
×
67
        local_path = os.path.join(local_dir, item.filename)
×
68
        if S_ISDIR(item.st_mode):
×
69
            download_dir(sftp, remote_path, local_path)
×
70
        else:
71
            sftp.get(remote_path, local_path)
×
72

73

74
def arborist_role_for_permission(permission):
1✔
75
    """
76
    For the programs/projects in the existing fence access control model, in order to
77
    use arborist for checking permissions we generate a policy for each combination of
78
    program/project and privilege. The roles involved all contain only one permission,
79
    for one privilege from the project access model.
80
    """
81
    return {
1✔
82
        "id": permission,
83
        "permissions": [
84
            {"id": permission, "action": {"service": "*", "method": permission}}
85
        ],
86
    }
87

88

89
@contextmanager
1✔
90
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
91
    """
92
    Context manager for reading and optionally decrypting file it only
93
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
94

95
    Args:
96
        filepath (str): path to the file
97
        encrypted (bool): whether the file is encrypted
98

99
    Returns:
100
        Generator[file-like class]: file like object for the file
101
    """
102
    if encrypted:
1✔
103
        p = sp.Popen(
×
104
            [
105
                "ccdecrypt",
106
                "-u",
107
                "-K",
108
                key,
109
                filepath,
110
            ],
111
            stdout=sp.PIPE,
112
            stderr=open(os.devnull, "w"),
113
            universal_newlines=True,
114
        )
115
        try:
×
116
            yield StringIO(p.communicate()[0])
×
117
        except UnicodeDecodeError:
×
118
            logger.error("Could not decode file. Check the decryption key.")
×
119
    else:
120
        f = open(filepath, "r")
1✔
121
        yield f
1✔
122
        f.close()
1✔
123

124

125
class UserYAML(object):
1✔
126
    """
127
    Representation of the information in a YAML file describing user, project, and ABAC
128
    information for access control.
129
    """
130

131
    def __init__(
1✔
132
        self,
133
        projects=None,
134
        user_info=None,
135
        policies=None,
136
        clients=None,
137
        authz=None,
138
        project_to_resource=None,
139
        logger=None,
140
        user_abac=None,
141
    ):
142
        self.projects = projects or {}
1✔
143
        self.user_info = user_info or {}
1✔
144
        self.user_abac = user_abac or {}
1✔
145
        self.policies = policies or {}
1✔
146
        self.clients = clients or {}
1✔
147
        self.authz = authz or {}
1✔
148
        self.project_to_resource = project_to_resource or {}
1✔
149
        self.logger = logger
1✔
150

151
    @classmethod
1✔
152
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
153
        """
154
        Add access by "auth_id" to "self.projects" to update the Fence DB.
155
        Add access by "resource" to "self.user_abac" to update Arborist.
156
        """
157
        data = {}
1✔
158
        if filepath:
1✔
159
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
160
                file_contents = f.read()
1✔
161
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
162
                data = yaml.safe_load(file_contents)
1✔
163
        else:
164
            if logger:
1✔
165
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
166

167
        projects = dict()
1✔
168
        user_info = dict()
1✔
169
        policies = dict()
1✔
170

171
        # resources should be the resource tree to construct in arborist
172
        user_abac = dict()
1✔
173

174
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
175
        if not data.get("authz") and data.get("rbac"):
1✔
176
            if logger:
×
177
                logger.info(
×
178
                    "No authz block found but rbac block present. Using rbac block"
179
                )
180
            data["authz"] = data["rbac"]
×
181

182
        # get user project mapping to arborist resources if it exists
183
        project_to_resource = data.get("authz", dict()).get(
1✔
184
            "user_project_to_resource", dict()
185
        )
186

187
        # read projects and privileges for each user
188
        users = data.get("users", {})
1✔
189
        for username, details in users.items():
1✔
190
            # users should occur only once each; skip if already processed
191
            if username in projects:
1✔
192
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
193
                    username
194
                )
195
                if logger:
×
196
                    logger.error(msg)
×
197
                raise EnvironmentError(msg)
×
198

199
            privileges = {}
1✔
200
            resource_permissions = dict()
1✔
201
            for project in details.get("projects", {}):
1✔
202
                try:
1✔
203
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
204
                except KeyError as e:
×
205
                    if logger:
×
206
                        logger.error("project {} missing field: {}".format(project, e))
×
207
                    continue
×
208

209
                # project may not have `resource` field.
210
                # prefer resource field;
211
                # if no resource or mapping, assume auth_id is resource.
212
                resource = project.get("resource", project["auth_id"])
1✔
213

214
                if project["auth_id"] not in project_to_resource:
1✔
215
                    project_to_resource[project["auth_id"]] = resource
1✔
216
                resource_permissions[resource] = set(project["privilege"])
1✔
217

218
            user_info[username] = {
1✔
219
                "email": details.get("email", ""),
220
                "display_name": details.get("display_name", ""),
221
                "phone_number": details.get("phone_number", ""),
222
                "tags": details.get("tags", {}),
223
                "admin": details.get("admin", False),
224
            }
225
            if not details.get("email"):
1✔
226
                try:
1✔
227
                    valid = validate_email(
1✔
228
                        username, allow_smtputf8=False, check_deliverability=False
229
                    )
230
                    user_info[username]["email"] = valid.email
1✔
231
                except EmailNotValidError:
1✔
232
                    pass
1✔
233
            projects[username] = privileges
1✔
234
            user_abac[username] = resource_permissions
1✔
235

236
            # list of policies we want to grant to this user, which get sent to arborist
237
            # to check if they're allowed to do certain things
238
            policies[username] = details.get("policies", [])
1✔
239

240
        if logger:
1✔
241
            logger.info(
1✔
242
                "Got user project to arborist resource mapping:\n{}".format(
243
                    str(project_to_resource)
244
                )
245
            )
246

247
        authz = data.get("authz", dict())
1✔
248
        if not authz:
1✔
249
            # older version: resources in root, no `authz` section or `rbac` section
250
            if logger:
1✔
251
                logger.warning(
1✔
252
                    "access control YAML file is using old format (missing `authz`/`rbac`"
253
                    " section in the root); assuming that if it exists `resources` will"
254
                    " be on the root level, and continuing"
255
                )
256
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
257
            # the code can pretend it's in the normal place that we expect
258
            resources = data.get("resources", [])
1✔
259
            # keep authz empty dict if resources is not specified
260
            if resources:
1✔
261
                authz["resources"] = data.get("resources", [])
×
262

263
        clients = data.get("clients", {})
1✔
264

265
        return cls(
1✔
266
            projects=projects,
267
            user_info=user_info,
268
            user_abac=user_abac,
269
            policies=policies,
270
            clients=clients,
271
            authz=authz,
272
            project_to_resource=project_to_resource,
273
            logger=logger,
274
        )
275

276
    def persist_project_to_resource(self, db_session):
1✔
277
        """
278
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
279

280
        The mapping comes from an external source, this function persists what was parsed
281
        into memory into the database for future use.
282
        """
283
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
284
            project = (
1✔
285
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
286
            )
287
            if project:
1✔
288
                project.authz = authz_resource
1✔
289
            else:
290
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
291
                db_session.add(project)
×
292
        db_session.commit()
1✔
293

294

295
class UserSyncer(object):
1✔
296
    def __init__(
1✔
297
        self,
298
        dbGaP,
299
        DB,
300
        project_mapping,
301
        storage_credentials=None,
302
        db_session=None,
303
        is_sync_from_dbgap_server=False,
304
        sync_from_local_csv_dir=None,
305
        sync_from_local_yaml_file=None,
306
        arborist=None,
307
        folder=None,
308
    ):
309
        """
310
        Syncs ACL files from dbGap to auth database and storage backends
311
        Args:
312
            dbGaP: a list of dict containing creds to access dbgap sftp
313
            DB: database connection string
314
            project_mapping: a dict containing how dbgap ids map to projects
315
            storage_credentials: a dict containing creds for storage backends
316
            sync_from_dir: path to an alternative dir to sync from instead of
317
                           dbGaP
318
            arborist:
319
                ArboristClient instance if the syncer should also create
320
                resources in arborist
321
            folder: a local folder where dbgap telemetry files will sync to
322
        """
323
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
324
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
325
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
326
        self.dbGaP = dbGaP
1✔
327
        self.session = db_session
1✔
328
        self.driver = get_SQLAlchemyDriver(DB)
1✔
329
        self.project_mapping = project_mapping or {}
1✔
330
        self._projects = dict()
1✔
331
        self._created_roles = set()
1✔
332
        self._created_policies = set()
1✔
333
        self._dbgap_study_to_resources = dict()
1✔
334
        self.logger = get_logger(
1✔
335
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
336
        )
337
        self.arborist_client = arborist
1✔
338
        self.folder = folder
1✔
339

340
        self.auth_source = defaultdict(set)
1✔
341
        # auth_source used for logging. username : [source1, source2]
342
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
343
        self.parent_to_child_studies_mapping = {}
1✔
344
        for dbgap_config in dbGaP:
1✔
345
            self.parent_to_child_studies_mapping.update(
1✔
346
                dbgap_config.get("parent_to_child_studies_mapping", {})
347
            )
348
        if storage_credentials:
1✔
349
            self.storage_manager = StorageManager(
1✔
350
                storage_credentials, logger=self.logger
351
            )
352
        self.id_patterns = []
1✔
353

354
    @staticmethod
1✔
355
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
356
        """
357
        Check if the filename matches dbgap access control file pattern
358

359
        Args:
360
            filepath (str): path to file
361
            encrypted (bool): whether the file is encrypted
362

363
        Returns:
364
            bool: whether the pattern matches
365
        """
366
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
367
        for pattern in id_patterns:
1✔
368
            if encrypted:
1✔
369
                pattern += r".enc"
×
370
            pattern += r"$"
1✔
371
            # when converting the YAML from fence-config,
372
            # python reads it as Python string literal. So "\" turns into "\\"
373
            # which messes with the regex match
374
            pattern.replace("\\\\", "\\")
1✔
375
            if re.match(pattern, os.path.basename(filepath)):
1✔
376
                return True
1✔
377
        return False
1✔
378

379
    def _get_from_sftp_with_proxy(self, server, path):
1✔
380
        """
381
        Download all data from sftp sever to a local dir
382

383
        Args:
384
            server (dict) : dictionary containing info to access sftp server
385
            path (str): path to local directory
386

387
        Returns:
388
            None
389
        """
390
        proxy = None
1✔
391
        if server.get("proxy", "") != "":
1✔
392
            command = "ssh -oHostKeyAlgorithms=+ssh-rsa -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
393
                user=server.get("proxy_user", ""),
394
                proxy=server.get("proxy", ""),
395
                host=server.get("host", ""),
396
                port=server.get("port", 22),
397
            )
398
            self.logger.info("SSH proxy command: {}".format(command))
×
399

400
            proxy = ProxyCommand(command)
×
401

402
        with paramiko.SSHClient() as client:
1✔
403
            client.set_log_channel(self.logger.name)
1✔
404

405
            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
1✔
406
            parameters = {
1✔
407
                "hostname": str(server.get("host", "")),
408
                "username": str(server.get("username", "")),
409
                "password": str(server.get("password", "")),
410
                "port": int(server.get("port", 22)),
411
            }
412
            if proxy:
1✔
413
                parameters["sock"] = proxy
×
414

415
            self.logger.info(
1✔
416
                "SSH connection hostname:post {}:{}".format(
417
                    parameters.get("hostname", "unknown"),
418
                    parameters.get("port", "unknown"),
419
                )
420
            )
421
            self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
422
            with client.open_sftp() as sftp:
×
423
                download_dir(sftp, "./", path)
1✔
424

425
        if proxy:
×
426
            proxy.close()
×
427

428
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
429
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
430
        ssh_client.connect(**parameters)
1✔
431

432
    def _get_from_ftp_with_proxy(self, server, path):
1✔
433
        """
434
        Download data from ftp sever to a local dir
435

436
        Args:
437
            server (dict): dictionary containing information for accessing server
438
            path(str): path to local files
439

440
        Returns:
441
            None
442
        """
443
        execstr = (
×
444
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
445
                server.get("username", ""),
446
                server.get("password", ""),
447
                server.get("host", ""),
448
                server.get("proxy", ""),
449
                path,
450
            )
451
        )
452
        os.system(execstr)
×
453

454
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
455
        return dbgap_config.get(
1✔
456
            "parse_consent_code", True
457
        )  # Should this really be true?
458

459
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
460
        """
461
        parse csv files to python dict
462

463
        Args:
464
            file_dict: a dictionary with key(file path) and value(privileges)
465
            sess: sqlalchemy session
466
            dbgap_config: a dictionary containing information about the dbGaP sftp server
467
                (comes from fence config)
468
            encrypted: boolean indicating whether those files are encrypted
469

470

471
        Return:
472
            Tuple[[dict, dict]]:
473
                (user_project, user_info) where user_project is a mapping from
474
                usernames to project permissions and user_info is a mapping
475
                from usernames to user details, such as email
476

477
        Example:
478

479
            (
480
                {
481
                    username: {
482
                        'project1': {'read-storage','write-storage'},
483
                        'project2': {'read-storage'},
484
                    }
485
                },
486
                {
487
                    username: {
488
                        'email': 'email@mail.com',
489
                        'display_name': 'display name',
490
                        'phone_number': '123-456-789',
491
                        'tags': {'dbgap_role': 'PI'}
492
                    }
493
                },
494
            )
495

496
        """
497
        user_projects = dict()
1✔
498
        user_info = defaultdict(dict)
1✔
499

500
        # parse dbGaP sftp server information
501
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
502

503
        self.id_patterns += (
1✔
504
            [
505
                item.replace("\\\\", "\\")
506
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
507
            ]
508
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
509
            else []
510
        )
511

512
        enable_common_exchange_area_access = dbgap_config.get(
1✔
513
            "enable_common_exchange_area_access", False
514
        )
515
        study_common_exchange_areas = dbgap_config.get(
1✔
516
            "study_common_exchange_areas", {}
517
        )
518
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
519

520
        if parse_consent_code and enable_common_exchange_area_access:
1✔
521
            self.logger.info(
1✔
522
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
523
            )
524

525
        project_id_patterns = [r"phs(\d{6})"]
1✔
526
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
527
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
528
            patterns = [
1✔
529
                pattern.replace("\\\\", "\\") for pattern in patterns
530
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
531
            project_id_patterns += patterns
1✔
532

533
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
534
        for filepath, privileges in file_dict.items():
1✔
535
            self.logger.info("Reading file {}".format(filepath))
1✔
536
            if os.stat(filepath).st_size == 0:
1✔
537
                self.logger.warning("Empty file {}".format(filepath))
×
538
                continue
×
539
            if not self._match_pattern(
1✔
540
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
541
            ):
542
                self.logger.warning(
1✔
543
                    "Filename {} does not match dbgap access control filename pattern;"
544
                    " this could mean that the filename has an invalid format, or has"
545
                    " an unexpected .enc extension, or lacks the .enc extension where"
546
                    " expected. This file is NOT being processed by usersync!".format(
547
                        filepath
548
                    )
549
                )
550
                continue
1✔
551

552
            with _read_file(
1✔
553
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
554
            ) as f:
555
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
556

557
                for row in csv:
1✔
558
                    username = row.get("login") or ""
1✔
559
                    if username == "":
1✔
560
                        continue
×
561

562
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
563
                        phsid = (
1✔
564
                            row.get("phsid") or (row.get("project_id") or "")
565
                        ).split(".")
566
                    else:
567
                        phsid = (row.get("phsid") or "").split(".")
1✔
568

569
                    dbgap_project = phsid[0]
1✔
570
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
571
                    skip = False
1✔
572
                    for pattern in project_id_patterns:
1✔
573
                        self.logger.debug(
1✔
574
                            "Checking pattern:{} with project_id:{}".format(
575
                                pattern, dbgap_project
576
                            )
577
                        )
578
                        if re.match(pattern, dbgap_project):
1✔
579
                            skip = False
1✔
580
                            break
1✔
581
                        else:
582
                            skip = True
1✔
583
                    if skip:
1✔
584
                        self.logger.warning(
1✔
585
                            "Skip processing from file {}, user {} with project {}".format(
586
                                filepath,
587
                                username,
588
                                dbgap_project,
589
                            )
590
                        )
591
                        continue
1✔
592
                    if len(phsid) > 1 and parse_consent_code:
1✔
593
                        consent_code = phsid[-1]
1✔
594

595
                        # c999 indicates full access to all consents and access
596
                        # to a study-specific exchange area
597
                        # access to at least one study-specific exchange area implies access
598
                        # to the parent study's common exchange area
599
                        #
600
                        # NOTE: Handling giving access to all consents is done at
601
                        #       a later time, when we have full information about possible
602
                        #       consents
603
                        self.logger.debug(
1✔
604
                            f"got consent code {consent_code} from dbGaP project "
605
                            f"{dbgap_project}"
606
                        )
607
                        if (
1✔
608
                            consent_code == "c999"
609
                            and enable_common_exchange_area_access
610
                            and dbgap_project in study_common_exchange_areas
611
                        ):
612
                            self.logger.info(
1✔
613
                                "found study with consent c999 and Fence "
614
                                "is configured to parse exchange area data. Giving user "
615
                                f"{username} {privileges} privileges in project: "
616
                                f"{study_common_exchange_areas[dbgap_project]}."
617
                            )
618
                            self._add_dbgap_project_for_user(
1✔
619
                                study_common_exchange_areas[dbgap_project],
620
                                privileges,
621
                                username,
622
                                sess,
623
                                user_projects,
624
                                dbgap_config,
625
                            )
626

627
                        dbgap_project += "." + consent_code
1✔
628

629
                    self._add_children_for_dbgap_project(
1✔
630
                        dbgap_project,
631
                        privileges,
632
                        username,
633
                        sess,
634
                        user_projects,
635
                        dbgap_config,
636
                    )
637

638
                    display_name = row.get("user name") or ""
1✔
639
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
640

641
                    # some dbgap telemetry files have information about a researchers PI
642
                    if "downloader for" in row:
1✔
643
                        tags["pi"] = row["downloader for"]
1✔
644

645
                    # prefer name over previous "downloader for" if it exists
646
                    if "downloader for names" in row:
1✔
647
                        tags["pi"] = row["downloader for names"]
×
648

649
                    user_info[username] = {
1✔
650
                        "email": row.get("email")
651
                        or user_info[username].get("email")
652
                        or "",
653
                        "display_name": display_name,
654
                        "phone_number": row.get("phone")
655
                        or user_info[username].get("phone_number")
656
                        or "",
657
                        "tags": tags,
658
                    }
659

660
                    self._process_dbgap_project(
1✔
661
                        dbgap_project,
662
                        privileges,
663
                        username,
664
                        sess,
665
                        user_projects,
666
                        dbgap_config,
667
                    )
668

669
        return user_projects, user_info
1✔
670

671
    def _get_children(self, dbgap_project):
1✔
672
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
673

674
    def _add_children_for_dbgap_project(
1✔
675
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
676
    ):
677
        """
678
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
679
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
680
        child studies.
681
        """
682
        parent_phsid = dbgap_project
1✔
683
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
684
        child_suffix = ""
1✔
685
        if parse_consent_code and re.match(
1✔
686
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
687
        ):
688
            parent_phsid_parts = dbgap_project.split(".")
1✔
689
            parent_phsid = parent_phsid_parts[0]
1✔
690
            child_suffix = "." + parent_phsid_parts[1]
1✔
691

692
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
693
            return
1✔
694

695
        self.logger.info(
1✔
696
            f"found parent study {parent_phsid} and Fence "
697
            "is configured to provide additional access to child studies. Giving user "
698
            f"{username} {privileges} privileges in projects: "
699
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
700
        )
701
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
702
        for child_study in child_studies:
1✔
703
            self._add_dbgap_project_for_user(
1✔
704
                child_study + child_suffix,
705
                privileges,
706
                username,
707
                sess,
708
                user_projects,
709
                dbgap_config,
710
            )
711

712
    def _add_dbgap_project_for_user(
1✔
713
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
714
    ):
715
        """
716
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
717
        and then updates the dictionary containing all user's project access
718
        """
719
        if dbgap_project not in self._projects:
1✔
720
            self.logger.debug(
1✔
721
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
722
            )
723

724
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
725

726
            # need to add dbgap project to arborist
727
            if self.arborist_client:
1✔
728
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
729

730
            if project.name is None:
1✔
731
                project.name = dbgap_project
1✔
732
            self._projects[dbgap_project] = project
1✔
733
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
734
        if username in user_projects:
1✔
735
            user_projects[username].update(phsid_privileges)
1✔
736
        else:
737
            user_projects[username] = phsid_privileges
1✔
738

739
    @staticmethod
1✔
740
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
741
        """
742
        Merge user_info1 into user_info2. Values in user_info2 are overriden
743
        by values in user_info1. user_info2 ends up containing the merged dict.
744

745
        Args:
746
            user_info1 (dict): nested dict
747
            user_info2 (dict): nested dict
748

749
            Example:
750
            {username: {'email': 'abc@email.com'}}
751

752
        Returns:
753
            None
754
        """
755
        user_info2.update(user_info1)
1✔
756

757
    def sync_two_phsids_dict(
1✔
758
        self,
759
        phsids1,
760
        phsids2,
761
        source1=None,
762
        source2=None,
763
        phsids2_overrides_phsids1=True,
764
    ):
765
        """
766
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
767
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
768
        the merged dict (see explanation below).
769
        `source1` and `source2`: for logging.
770

771
        Args:
772
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
773

774
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
775

776
            Example:
777
            {
778
                username: {
779
                    phsid1: {'read-storage','write-storage'},
780
                    phsid2: {'read-storage'},
781
                }
782
            }
783

784
        Return:
785
            None
786

787
        Explanation:
788
            Consider merging projects of the same user:
789

790
                {user1: {phsid1: privillege1}}
791

792
                {user1: {phsid2: privillege2}}
793

794
            case 1: phsid1 != phsid2. Output:
795

796
                {user1: {phsid1: privillege1, phsid2: privillege2}}
797

798
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
799

800
                {user1: {phsid1: union(privillege1, privillege2)}}
801

802
            For the other cases, just simple addition
803
        """
804

805
        for user, projects1 in phsids1.items():
1✔
806
            if not phsids2.get(user):
1✔
807
                if source1:
1✔
808
                    self.auth_source[user].add(source1)
1✔
809
                phsids2[user] = projects1
1✔
810
            elif phsids2_overrides_phsids1:
1✔
811
                if source1:
1✔
812
                    self.auth_source[user].add(source1)
×
813
                if source2:
1✔
814
                    self.auth_source[user].add(source2)
×
815
                for phsid1, privilege1 in projects1.items():
1✔
816
                    if phsid1 not in phsids2[user]:
1✔
817
                        phsids2[user][phsid1] = set()
1✔
818
                    phsids2[user][phsid1].update(privilege1)
1✔
819
            elif source2:
×
820
                self.auth_source[user].add(source2)
×
821

822
    def sync_to_db_and_storage_backend(
1✔
823
        self,
824
        user_project,
825
        user_info,
826
        sess,
827
        do_not_revoke_from_db_and_storage=False,
828
        expires=None,
829
    ):
830
        """
831
        sync user access control to database and storage backend
832

833
        Args:
834
            user_project (dict): a dictionary of
835

836
                {
837
                    username: {
838
                        'project1': {'read-storage','write-storage'},
839
                        'project2': {'read-storage'}
840
                    }
841
                }
842

843
            user_info (dict): a dictionary of {username: user_info{}}
844
            sess: a sqlalchemy session
845

846
        Return:
847
            None
848
        """
849
        google_bulk_mapping = None
1✔
850
        if config["GOOGLE_BULK_UPDATES"]:
1✔
851
            google_bulk_mapping = {}
1✔
852

853
        self._init_projects(user_project, sess)
1✔
854

855
        auth_provider_list = [
1✔
856
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
857
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
858
        ]
859

860
        cur_db_user_project_list = {
1✔
861
            (ua.user.username.lower(), ua.project.auth_id)
862
            for ua in sess.query(AccessPrivilege).all()
863
        }
864

865
        # we need to compare db -> whitelist case-insensitively for username.
866
        # db stores case-sensitively, but we need to query case-insensitively
867
        user_project_lowercase = {}
1✔
868
        syncing_user_project_list = set()
1✔
869
        for username, projects in user_project.items():
1✔
870
            user_project_lowercase[username.lower()] = projects
1✔
871
            for project, _ in projects.items():
1✔
872
                syncing_user_project_list.add((username.lower(), project))
1✔
873

874
        user_info_lowercase = {
1✔
875
            username.lower(): info for username, info in user_info.items()
876
        }
877

878
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
879
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
880
        to_update = set.intersection(
1✔
881
            cur_db_user_project_list, syncing_user_project_list
882
        )
883

884
        # when updating users we want to maintain case sesitivity in the username so
885
        # pass the original, non-lowered user_info dict
886
        self._upsert_userinfo(sess, user_info)
1✔
887

888
        if not do_not_revoke_from_db_and_storage:
1✔
889
            self._revoke_from_storage(
1✔
890
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
891
            )
892
            self._revoke_from_db(sess, to_delete)
1✔
893

894
        self._grant_from_storage(
1✔
895
            to_add,
896
            user_project_lowercase,
897
            sess,
898
            google_bulk_mapping=google_bulk_mapping,
899
            expires=expires,
900
        )
901

902
        self._grant_from_db(
1✔
903
            sess,
904
            to_add,
905
            user_info_lowercase,
906
            user_project_lowercase,
907
            auth_provider_list,
908
        )
909

910
        # re-grant
911
        self._grant_from_storage(
1✔
912
            to_update,
913
            user_project_lowercase,
914
            sess,
915
            google_bulk_mapping=google_bulk_mapping,
916
            expires=expires,
917
        )
918
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
919

920
        if not do_not_revoke_from_db_and_storage:
1✔
921
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
922

923
        sess.commit()
1✔
924

925
        if config["GOOGLE_BULK_UPDATES"]:
1✔
926
            self.logger.info("Doing bulk Google update...")
1✔
927
            update_google_groups_for_users(google_bulk_mapping)
1✔
928
            self.logger.info("Bulk Google update done!")
×
929

930
        sess.commit()
1✔
931

932
    def sync_to_storage_backend(
1✔
933
        self, user_project, user_info, sess, expires, skip_google_updates=False
934
    ):
935
        """
936
        sync user access control to storage backend with given expiration
937

938
        Args:
939
            user_project (dict): a dictionary of
940

941
                {
942
                    username: {
943
                        'project1': {'read-storage','write-storage'},
944
                        'project2': {'read-storage'}
945
                    }
946
                }
947

948
            user_info (dict): a dictionary of attributes for a user.
949
            sess: a sqlalchemy session
950
            expires (int): time at which synced Arborist policies and
951
                   inclusion in any GBAG are set to expire
952
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
953
        Return:
954
            None
955
        """
956
        if not expires:
1✔
957
            raise Exception(
×
958
                f"sync to storage backend requires an expiration. you provided: {expires}"
959
            )
960

961
        google_group_user_mapping = None
1✔
962
        if config["GOOGLE_BULK_UPDATES"]:
1✔
963
            google_group_user_mapping = {}
×
964
            get_or_create_proxy_group_id(
×
965
                expires=expires,
966
                user_id=user_info["user_id"],
967
                username=user_info["username"],
968
                session=sess,
969
                storage_manager=self.storage_manager,
970
            )
971

972
        # TODO: eventually it'd be nice to remove this step but it's required
973
        #       so that grant_from_storage can determine what storage backends
974
        #       are needed for a project.
975
        self._init_projects(user_project, sess)
1✔
976

977
        # we need to compare db -> whitelist case-insensitively for username.
978
        # db stores case-sensitively, but we need to query case-insensitively
979
        user_project_lowercase = {}
1✔
980
        syncing_user_project_list = set()
1✔
981
        for username, projects in user_project.items():
1✔
982
            user_project_lowercase[username.lower()] = projects
1✔
983
            for project, _ in projects.items():
1✔
984
                syncing_user_project_list.add((username.lower(), project))
1✔
985

986
        to_add = set(syncing_user_project_list)
1✔
987

988
        # when updating users we want to maintain case sensitivity in the username so
989
        # pass the original, non-lowered user_info dict
990
        self._upsert_userinfo(sess, {user_info["username"].lower(): user_info})
1✔
991
        if not skip_google_updates:
1✔
992
            self._grant_from_storage(
1✔
993
                to_add,
994
                user_project_lowercase,
995
                sess,
996
                google_bulk_mapping=google_group_user_mapping,
997
                expires=expires,
998
            )
999

1000
            if config["GOOGLE_BULK_UPDATES"]:
1✔
1001
                self.logger.info("Updating user's google groups ...")
×
1002
                update_google_groups_for_users(google_group_user_mapping)
×
1003
                self.logger.info("Google groups update done!!")
×
1004

1005
        sess.commit()
1✔
1006

1007
    def _revoke_from_db(self, sess, to_delete):
1✔
1008
        """
1009
        Revoke user access to projects in the auth database
1010

1011
        Args:
1012
            sess: sqlalchemy session
1013
            to_delete: a set of (username, project.auth_id) to be revoked from db
1014
        Return:
1015
            None
1016
        """
1017
        for username, project_auth_id in to_delete:
1✔
1018
            q = (
1✔
1019
                sess.query(AccessPrivilege)
1020
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1021
                .join(AccessPrivilege.user)
1022
                .filter(func.lower(User.username) == username)
1023
                .all()
1024
            )
1025
            for access in q:
1✔
1026
                self.logger.info(
1✔
1027
                    "revoke {} access to {} in db".format(username, project_auth_id)
1028
                )
1029
                sess.delete(access)
1✔
1030

1031
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1032
        """
1033
        Make sure there is no admin user that is not in yaml/csv files
1034

1035
        Args:
1036
            sess: sqlalchemy session
1037
            user_info: a dict of
1038
            {
1039
                username: {
1040
                    'email': email,
1041
                    'display_name': display_name,
1042
                    'phone_number': phonenum,
1043
                    'tags': {'k1':'v1', 'k2': 'v2'}
1044
                    'admin': is_admin
1045
                }
1046
            }
1047
        Returns:
1048
            None
1049
        """
1050
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1051
            if admin_user.username.lower() not in user_info:
1✔
1052
                admin_user.is_admin = False
×
1053
                sess.add(admin_user)
×
1054
                self.logger.info(
×
1055
                    "remove admin access from {} in db".format(
1056
                        admin_user.username.lower()
1057
                    )
1058
                )
1059

1060
    def _update_from_db(self, sess, to_update, user_project):
1✔
1061
        """
1062
        Update user access to projects in the auth database
1063

1064
        Args:
1065
            sess: sqlalchemy session
1066
            to_update:
1067
                a set of (username, project.auth_id) to be updated from db
1068

1069
        Return:
1070
            None
1071
        """
1072

1073
        for username, project_auth_id in to_update:
1✔
1074
            q = (
1✔
1075
                sess.query(AccessPrivilege)
1076
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1077
                .join(AccessPrivilege.user)
1078
                .filter(func.lower(User.username) == username)
1079
                .all()
1080
            )
1081
            for access in q:
1✔
1082
                access.privilege = user_project[username][project_auth_id]
1✔
1083
                self.logger.info(
1✔
1084
                    "update {} with {} access to {} in db".format(
1085
                        username, access.privilege, project_auth_id
1086
                    )
1087
                )
1088

1089
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1090
        """
1091
        Grant user access to projects in the auth database
1092
        Args:
1093
            sess: sqlalchemy session
1094
            to_add: a set of (username, project.auth_id) to be granted
1095
            user_project:
1096
                a dictionary of {username: {project: {'read','write'}}
1097
        Return:
1098
            None
1099
        """
1100
        for username, project_auth_id in to_add:
1✔
1101
            u = query_for_user(session=sess, username=username)
1✔
1102

1103
            auth_provider = auth_provider_list[0]
1✔
1104
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1105
                auth_provider = auth_provider_list[1]
1✔
1106
            user_access = AccessPrivilege(
1✔
1107
                user=u,
1108
                project=self._projects[project_auth_id],
1109
                privilege=list(user_project[username][project_auth_id]),
1110
                auth_provider=auth_provider,
1111
            )
1112
            self.logger.info(
1✔
1113
                "grant user {} to {} with access {}".format(
1114
                    username, user_access.project, user_access.privilege
1115
                )
1116
            )
1117
            sess.add(user_access)
1✔
1118

1119
    def _upsert_userinfo(self, sess, user_info):
1✔
1120
        """
1121
        update user info to database.
1122

1123
        Args:
1124
            sess: sqlalchemy session
1125
            user_info:
1126
                a dict of {username: {display_name, phone_number, tags, admin}
1127

1128
        Return:
1129
            None
1130
        """
1131

1132
        for username in user_info:
1✔
1133
            u = query_for_user(session=sess, username=username)
1✔
1134

1135
            if u is None:
1✔
1136
                self.logger.info("create user {}".format(username))
1✔
1137
                u = User(username=username)
1✔
1138
                sess.add(u)
1✔
1139

1140
            if self.arborist_client:
1✔
1141
                self.arborist_client.create_user({"name": username})
1✔
1142

1143
            u.email = user_info[username].get("email", "")
1✔
1144
            u.display_name = user_info[username].get("display_name", "")
1✔
1145
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1146
            u.is_admin = user_info[username].get("admin", False)
1✔
1147

1148
            idp_name = user_info[username].get("idp_name", "")
1✔
1149
            if idp_name and not u.identity_provider:
1✔
1150
                idp = (
×
1151
                    sess.query(IdentityProvider)
1152
                    .filter(IdentityProvider.name == idp_name)
1153
                    .first()
1154
                )
1155
                if not idp:
×
1156
                    idp = IdentityProvider(name=idp_name)
×
1157
                u.identity_provider = idp
×
1158

1159
            # do not update if there is no tag
1160
            if not user_info[username].get("tags"):
1✔
1161
                continue
1✔
1162

1163
            # remove user db tags if they are not shown in new tags
1164
            for tag in u.tags:
1✔
1165
                if tag.key not in user_info[username]["tags"]:
1✔
1166
                    u.tags.remove(tag)
1✔
1167

1168
            # sync
1169
            for k, v in user_info[username]["tags"].items():
1✔
1170
                found = False
1✔
1171
                for tag in u.tags:
1✔
1172
                    if tag.key == k:
1✔
1173
                        found = True
1✔
1174
                        tag.value = v
1✔
1175
                # create new tag if not found
1176
                if not found:
1✔
1177
                    tag = Tag(key=k, value=v)
1✔
1178
                    u.tags.append(tag)
1✔
1179

1180
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1181
        """
1182
        If a project have storage backend, revoke user's access to buckets in
1183
        the storage backend.
1184

1185
        Args:
1186
            to_delete: a set of (username, project.auth_id) to be revoked
1187

1188
        Return:
1189
            None
1190
        """
1191
        for username, project_auth_id in to_delete:
1✔
1192
            project = (
1✔
1193
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1194
            )
1195
            for sa in project.storage_access:
1✔
1196
                if not hasattr(self, "storage_manager"):
1✔
1197
                    self.logger.error(
×
1198
                        (
1199
                            "CANNOT revoke {} access to {} in {} because there is NO "
1200
                            "configured storage accesses at all. See configuration. "
1201
                            "Continuing anyway..."
1202
                        ).format(username, project_auth_id, sa.provider.name)
1203
                    )
1204
                    continue
×
1205

1206
                self.logger.info(
1✔
1207
                    "revoke {} access to {} in {}".format(
1208
                        username, project_auth_id, sa.provider.name
1209
                    )
1210
                )
1211
                self.storage_manager.revoke_access(
1✔
1212
                    provider=sa.provider.name,
1213
                    username=username,
1214
                    project=project,
1215
                    session=sess,
1216
                    google_bulk_mapping=google_bulk_mapping,
1217
                )
1218

1219
    def _grant_from_storage(
1✔
1220
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1221
    ):
1222
        """
1223
        If a project have storage backend, grant user's access to buckets in
1224
        the storage backend.
1225

1226
        Args:
1227
            to_add: a set of (username, project.auth_id)  to be granted
1228
            user_project: a dictionary like:
1229

1230
                    {username: {phsid: {'read-storage','write-storage'}}}
1231

1232
        Return:
1233
            dict of the users' storage usernames to their user_projects and the respective storage access.
1234
        """
1235
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1236
        for username, project_auth_id in to_add:
1✔
1237
            project = self._projects[project_auth_id]
1✔
1238
            for sa in project.storage_access:
1✔
1239
                access = list(user_project[username][project_auth_id])
1✔
1240
                if not hasattr(self, "storage_manager"):
1✔
1241
                    self.logger.error(
×
1242
                        (
1243
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1244
                            "configured storage accesses at all. See configuration. "
1245
                            "Continuing anyway..."
1246
                        ).format(username, access, project_auth_id, sa.provider.name)
1247
                    )
1248
                    continue
×
1249

1250
                self.logger.info(
1✔
1251
                    "grant {} access {} to {} in {}".format(
1252
                        username, access, project_auth_id, sa.provider.name
1253
                    )
1254
                )
1255
                storage_username = self.storage_manager.grant_access(
1✔
1256
                    provider=sa.provider.name,
1257
                    username=username,
1258
                    project=project,
1259
                    access=access,
1260
                    session=sess,
1261
                    google_bulk_mapping=google_bulk_mapping,
1262
                    expires=expires,
1263
                )
1264

1265
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1266
        return storage_user_to_sa_and_user_project
1✔
1267

1268
    def _init_projects(self, user_project, sess):
1✔
1269
        """
1270
        initialize projects
1271
        """
1272
        if self.project_mapping:
1✔
1273
            for projects in list(self.project_mapping.values()):
1✔
1274
                for p in projects:
1✔
1275
                    self.logger.debug(
1✔
1276
                        "creating Project with info from project_mapping: {}".format(p)
1277
                    )
1278
                    project = self._get_or_create(sess, Project, **p)
1✔
1279
                    self._projects[p["auth_id"]] = project
1✔
1280
        for _, projects in user_project.items():
1✔
1281
            for auth_id in list(projects.keys()):
1✔
1282
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1283
                if not project:
1✔
1284
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1285
                    try:
1✔
1286
                        project = self._get_or_create(sess, Project, **data)
1✔
1287
                    except IntegrityError as e:
×
1288
                        sess.rollback()
×
1289
                        self.logger.error(
×
1290
                            f"Project {auth_id} already exists. Detail {str(e)}"
1291
                        )
1292
                        raise Exception(
×
1293
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1294
                                auth_id, str(e)
1295
                            )
1296
                        )
1297
                if auth_id not in self._projects:
1✔
1298
                    self._projects[auth_id] = project
1✔
1299

1300
    @staticmethod
1✔
1301
    def _get_or_create(sess, model, **kwargs):
1✔
1302
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1303
        if not instance:
1✔
1304
            instance = model(**kwargs)
1✔
1305
            sess.add(instance)
1✔
1306
        return instance
1✔
1307

1308
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1309
        """
1310
        Args:
1311
            dbgap_config : a dictionary containing information about a single
1312
                           dbgap sftp server (from fence config)
1313
            sess: database session
1314

1315
        Return:
1316
            user_projects (dict)
1317
            user_info (dict)
1318
        """
1319
        dbgap_file_list = []
1✔
1320
        hostname = dbgap_config["info"]["host"]
1✔
1321
        username = dbgap_config["info"]["username"]
1✔
1322
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1323
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1324

1325
        try:
1✔
1326
            if os.path.exists(folderdir):
1✔
1327
                dbgap_file_list = glob.glob(
×
1328
                    os.path.join(folderdir, "*")
1329
                )  # get lists of file from folder
1330
            else:
1331
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1332
                dbgap_file_list = self._download(dbgap_config)
1✔
1333
        except Exception as e:
1✔
1334
            self.logger.error(e)
1✔
1335
            exit(1)
1✔
1336
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1337
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1338
            dbgap_file_list,
1339
            encrypted=encrypted,
1340
            session=sess,
1341
            dbgap_config=dbgap_config,
1342
        )
1343

1344
        user_projects = self.parse_projects(user_projects)
×
1345
        return user_projects, user_info
×
1346

1347
    def _get_user_permissions_from_csv_list(
1✔
1348
        self, file_list, encrypted, session, dbgap_config={}
1349
    ):
1350
        """
1351
        Args:
1352
            file_list: list of files (represented as strings)
1353
            encrypted: boolean indicating whether those files are encrypted
1354
            session: sqlalchemy session
1355
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1356
                    (comes from fence config)
1357

1358
        Return:
1359
            user_projects (dict)
1360
            user_info (dict)
1361
        """
1362
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1363
        user_projects, user_info = self._parse_csv(
1✔
1364
            dict(list(zip(file_list, permissions))),
1365
            sess=session,
1366
            dbgap_config=dbgap_config,
1367
            encrypted=encrypted,
1368
        )
1369
        return user_projects, user_info
1✔
1370

1371
    def _merge_multiple_local_csv_files(
1✔
1372
        self, dbgap_file_list, encrypted, dbgap_configs, session
1373
    ):
1374
        """
1375
        Args:
1376
            dbgap_file_list (list): a list of whitelist file locations stored locally
1377
            encrypted (bool): whether the file is encrypted (comes from fence config)
1378
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1379
            session (sqlalchemy.Session): database session
1380

1381
        Return:
1382
            merged_user_projects (dict)
1383
            merged_user_info (dict)
1384
        """
1385
        merged_user_projects = {}
1✔
1386
        merged_user_info = {}
1✔
1387

1388
        for dbgap_config in dbgap_configs:
1✔
1389
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1390
                dbgap_file_list,
1391
                encrypted,
1392
                session=session,
1393
                dbgap_config=dbgap_config,
1394
            )
1395
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1396
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1397
        return merged_user_projects, merged_user_info
1✔
1398

1399
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1400
        """
1401
        Args:
1402
            dbgap_servers : a list of dictionaries each containging config on
1403
                           dbgap sftp server (comes from fence config)
1404
            sess: database session
1405

1406
        Return:
1407
            merged_user_projects (dict)
1408
            merged_user_info (dict)
1409
        """
1410
        merged_user_projects = {}
1✔
1411
        merged_user_info = {}
1✔
1412
        for dbgap in dbgap_servers:
1✔
1413
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1414
            # merge into merged_user_info
1415
            # user_info overrides original info in merged_user_info
1416
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1417

1418
            # merge all access info dicts into "merged_user_projects".
1419
            # the access info is combined - if the user_projects access is
1420
            # ["read"] and the merged_user_projects is ["read-storage"], the
1421
            # resulting access is ["read", "read-storage"].
1422
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1423
        return merged_user_projects, merged_user_info
1✔
1424

1425
    def parse_projects(self, user_projects):
1✔
1426
        """
1427
        helper function for parsing projects
1428
        """
1429
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1430

1431
    def _process_dbgap_project(
1✔
1432
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1433
    ):
1434
        if dbgap_project not in self.project_mapping:
1✔
1435
            self._add_dbgap_project_for_user(
1✔
1436
                dbgap_project,
1437
                privileges,
1438
                username,
1439
                sess,
1440
                user_projects,
1441
                dbgap_config,
1442
            )
1443

1444
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1445
            try:
1✔
1446
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1447

1448
                # need to add dbgap project to arborist
1449
                if self.arborist_client:
1✔
1450
                    self._determine_arborist_resource(
1✔
1451
                        element_dict["auth_id"], dbgap_config
1452
                    )
1453

1454
                if username not in user_projects:
1✔
1455
                    user_projects[username] = {}
1✔
1456
                user_projects[username].update(phsid_privileges)
1✔
1457

1458
            except ValueError as e:
×
1459
                self.logger.info(e)
×
1460

1461
    def _process_user_projects(
1✔
1462
        self,
1463
        user_projects,
1464
        enable_common_exchange_area_access,
1465
        study_common_exchange_areas,
1466
        dbgap_config,
1467
        sess,
1468
    ):
1469
        user_projects_to_modify = copy.deepcopy(user_projects)
1✔
1470
        for username in user_projects.keys():
1✔
1471
            for project in user_projects[username].keys():
1✔
1472
                phsid = project.split(".")
1✔
1473
                dbgap_project = phsid[0]
1✔
1474
                privileges = user_projects[username][project]
1✔
1475
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1476
                    consent_code = phsid[-1]
1✔
1477

1478
                    # c999 indicates full access to all consents and access
1479
                    # to a study-specific exchange area
1480
                    # access to at least one study-specific exchange area implies access
1481
                    # to the parent study's common exchange area
1482
                    #
1483
                    # NOTE: Handling giving access to all consents is done at
1484
                    #       a later time, when we have full information about possible
1485
                    #       consents
1486
                    self.logger.debug(
1✔
1487
                        f"got consent code {consent_code} from dbGaP project "
1488
                        f"{dbgap_project}"
1489
                    )
1490
                    if (
1✔
1491
                        consent_code == "c999"
1492
                        and enable_common_exchange_area_access
1493
                        and dbgap_project in study_common_exchange_areas
1494
                    ):
1495
                        self.logger.info(
1✔
1496
                            "found study with consent c999 and Fence "
1497
                            "is configured to parse exchange area data. Giving user "
1498
                            f"{username} {privileges} privileges in project: "
1499
                            f"{study_common_exchange_areas[dbgap_project]}."
1500
                        )
1501
                        self._add_dbgap_project_for_user(
1✔
1502
                            study_common_exchange_areas[dbgap_project],
1503
                            privileges,
1504
                            username,
1505
                            sess,
1506
                            user_projects_to_modify,
1507
                            dbgap_config,
1508
                        )
1509

1510
                    dbgap_project += "." + consent_code
1✔
1511

1512
                self._process_dbgap_project(
1✔
1513
                    dbgap_project,
1514
                    privileges,
1515
                    username,
1516
                    sess,
1517
                    user_projects_to_modify,
1518
                    dbgap_config,
1519
                )
1520
        for user in user_projects_to_modify.keys():
1✔
1521
            user_projects[user] = user_projects_to_modify[user]
1✔
1522

1523
    def sync(self):
1✔
1524
        if self.session:
1✔
1525
            self._sync(self.session)
1✔
1526
        else:
1527
            with self.driver.session as s:
×
1528
                self._sync(s)
×
1529

1530
    def download(self):
1✔
1531
        for dbgap_server in self.dbGaP:
×
1532
            self._download(dbgap_server)
×
1533

1534
    def _download(self, dbgap_config):
1✔
1535
        """
1536
        Download files from dbgap server.
1537
        """
1538
        server = dbgap_config["info"]
1✔
1539
        protocol = dbgap_config["protocol"]
1✔
1540
        hostname = server["host"]
1✔
1541
        username = server["username"]
1✔
1542
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1543

1544
        if not os.path.exists(folderdir):
1✔
1545
            os.makedirs(folderdir)
1✔
1546

1547
        self.logger.info("Download from server")
1✔
1548
        try:
1✔
1549
            if protocol == "sftp":
1✔
1550
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1551
            else:
1552
                self._get_from_ftp_with_proxy(server, folderdir)
×
1553
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1554
            return dbgap_files
×
1555
        except Exception as e:
1✔
1556
            self.logger.error(e)
1✔
1557
            raise
1✔
1558

1559
    def _sync(self, sess):
1✔
1560
        """
1561
        Collect files from dbgap server(s), sync csv and yaml files to storage
1562
        backend and fence DB
1563
        """
1564

1565
        # get all dbgap files
1566
        user_projects = {}
1✔
1567
        user_info = {}
1✔
1568
        if self.is_sync_from_dbgap_server:
1✔
1569
            self.logger.debug(
1✔
1570
                "Pulling telemetry files from {} dbgap sftp servers".format(
1571
                    len(self.dbGaP)
1572
                )
1573
            )
1574
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1575

1576
        local_csv_file_list = []
1✔
1577
        if self.sync_from_local_csv_dir:
1✔
1578
            local_csv_file_list = glob.glob(
1✔
1579
                os.path.join(self.sync_from_local_csv_dir, "*")
1580
            )
1581
            # Sort the list so the order of of files is consistent across platforms
1582
            local_csv_file_list.sort()
1✔
1583

1584
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1585
            local_csv_file_list,
1586
            encrypted=False,
1587
            session=sess,
1588
            dbgap_configs=self.dbGaP,
1589
        )
1590

1591
        try:
1✔
1592
            user_yaml = UserYAML.from_file(
1✔
1593
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1594
            )
1595
        except (EnvironmentError, AssertionError) as e:
1✔
1596
            self.logger.error(str(e))
1✔
1597
            self.logger.error("aborting early")
1✔
1598
            raise
1✔
1599

1600
        # parse all projects
1601
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1602
        user_projects = self.parse_projects(user_projects)
1✔
1603
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1604

1605
        # merge all user info dicts into "user_info".
1606
        # the user info (such as email) in the user.yaml files
1607
        # overrides the user info from the CSV files.
1608
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1609
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1610

1611
        # merge all access info dicts into "user_projects".
1612
        # the access info is combined - if the user.yaml access is
1613
        # ["read"] and the CSV file access is ["read-storage"], the
1614
        # resulting access is ["read", "read-storage"].
1615
        self.sync_two_phsids_dict(
1✔
1616
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1617
        )
1618
        self.sync_two_phsids_dict(
1✔
1619
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1620
        )
1621

1622
        # Note: if there are multiple dbgap sftp servers configured
1623
        # this parameter is always from the config for the first dbgap sftp server
1624
        # not any additional ones
1625
        for dbgap_config in self.dbGaP:
1✔
1626
            if self._get_parse_consent_code(dbgap_config):
1✔
1627
                self._grant_all_consents_to_c999_users(
1✔
1628
                    user_projects, user_yaml.project_to_resource
1629
                )
1630

1631
        google_update_ex = None
1✔
1632

1633
        try:
1✔
1634
            # update the Fence DB
1635
            if user_projects:
1✔
1636
                self.logger.info("Sync to db and storage backend")
1✔
1637
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1638
                self.logger.info("Finish syncing to db and storage backend")
1✔
1639
            else:
1640
                self.logger.info("No users for syncing")
×
1641
        except GoogleUpdateException as ex:
1✔
1642
            # save this to reraise later after all non-Google syncing has finished
1643
            # this way, any issues with Google only affect Google data access and don't
1644
            # cascade problems into non-Google AWS or Azure access
1645
            google_update_ex = ex
1✔
1646

1647
        # update the Arborist DB (resources, roles, policies, groups)
1648
        if user_yaml.authz:
1✔
1649
            if not self.arborist_client:
1✔
1650
                raise EnvironmentError(
×
1651
                    "yaml file contains authz section but sync is not configured with"
1652
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1653
                )
1654
            self.logger.info("Synchronizing arborist...")
1✔
1655
            success = self._update_arborist(sess, user_yaml)
1✔
1656
            if success:
1✔
1657
                self.logger.info("Finished synchronizing arborist")
1✔
1658
            else:
1659
                self.logger.error("Could not synchronize successfully")
×
1660
                exit(1)
×
1661
        else:
1662
            self.logger.info("No `authz` section; skipping arborist sync")
×
1663

1664
        # update the Arborist DB (user access)
1665
        if self.arborist_client:
1✔
1666
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1667
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1668
            if success:
1✔
1669
                self.logger.info(
1✔
1670
                    "Finished synchronizing authorization info to arborist"
1671
                )
1672
            else:
1673
                self.logger.error(
×
1674
                    "Could not synchronize authorization info successfully to arborist"
1675
                )
1676
                exit(1)
×
1677
        else:
1678
            self.logger.error("No arborist client set; skipping arborist sync")
×
1679

1680
        # Logging authz source
1681
        for u, s in self.auth_source.items():
1✔
1682
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1683

1684
        self.logger.info(
1✔
1685
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1686
        )
1687
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1688
        if google_update_ex is not None:
1✔
1689
            raise google_update_ex
1✔
1690

1691
    def _grant_all_consents_to_c999_users(
1✔
1692
        self, user_projects, user_yaml_project_to_resources
1693
    ):
1694
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1695
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1696
        # are in either)
1697
        all_projects = set(
1✔
1698
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1699
        )
1700

1701
        self.logger.debug(f"all projects: {all_projects}")
1✔
1702

1703
        # construct a mapping from phsid (without consent) to all accessions with consent
1704
        consent_mapping = {}
1✔
1705
        for project in all_projects:
1✔
1706
            phs_match = access_number_matcher.match(project)
1✔
1707
            if phs_match:
1✔
1708
                accession_number = phs_match.groupdict()
1✔
1709

1710
                # TODO: This is not handling the .v1.p1 at all
1711
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1712
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1713
                )
1714
                children = self._get_children(accession_number["phsid"])
1✔
1715
                if children:
1✔
1716
                    for child_phs in children:
1✔
1717
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1718
                            ".".join(
1719
                                [child_phs, accession_number["consent"]]
1720
                            )  # Assign parent consent to child study
1721
                        )
1722

1723
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1724

1725
        # go through existing access and find any c999's and make sure to give access to
1726
        # all accessions with consent for that phsid
1727
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1728
            for project, _ in user_project_info.items():
1✔
1729
                phs_match = access_number_matcher.match(project)
1✔
1730
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1731
                    # give access to all consents
1732
                    all_phsids_with_consent = consent_mapping.get(
1✔
1733
                        phs_match.groupdict()["phsid"], []
1734
                    )
1735
                    self.logger.info(
1✔
1736
                        f"user {username} has c999 consent group for: {project}. "
1737
                        f"Granting access to all consents: {all_phsids_with_consent}"
1738
                    )
1739
                    # NOTE: Only giving read-storage at the moment (this is same
1740
                    #       permission we give for other dbgap projects)
1741
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1742
                        user_projects[username].update(
1✔
1743
                            {phsid_with_consent: {"read-storage", "read"}}
1744
                        )
1745

1746
    def _update_arborist(self, session, user_yaml):
1✔
1747
        """
1748
        Create roles, resources, policies, groups in arborist from the information in
1749
        ``user_yaml``.
1750

1751
        The projects are sent to arborist as resources with paths like
1752
        ``/projects/{project}``. Roles are created with just the original names
1753
        for the privileges like ``"read-storage", "read"`` etc.
1754

1755
        Args:
1756
            session (sqlalchemy.Session)
1757
            user_yaml (UserYAML)
1758

1759
        Return:
1760
            bool: success
1761
        """
1762
        healthy = self._is_arborist_healthy()
1✔
1763
        if not healthy:
1✔
1764
            return False
×
1765

1766
        # Set up the resource tree in arborist by combining provided resources with any
1767
        # dbgap resources that were created before this.
1768
        #
1769
        # Why add dbgap resources if they've already been created?
1770
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1771
        #   resources was created under `/programs/phs000178` anything provided in
1772
        #   user.yaml under `/programs` would completely wipe it out.
1773
        resources = user_yaml.authz.get("resources", [])
1✔
1774

1775
        dbgap_resource_paths = []
1✔
1776
        for path_list in self._dbgap_study_to_resources.values():
1✔
1777
            dbgap_resource_paths.extend(path_list)
1✔
1778

1779
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1780
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1781

1782
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1783
            resources, dbgap_resource_paths
1784
        )
1785

1786
        for resource in combined_resources:
1✔
1787
            try:
1✔
1788
                self.logger.debug(
1✔
1789
                    "attempting to update arborist resource: {}".format(resource)
1790
                )
1791
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1792
            except ArboristError as e:
×
1793
                self.logger.error(e)
×
1794
                # keep going; maybe just some conflicts from things existing already
1795

1796
        # update roles
1797
        roles = user_yaml.authz.get("roles", [])
1✔
1798
        for role in roles:
1✔
1799
            try:
1✔
1800
                response = self.arborist_client.update_role(role["id"], role)
1✔
1801
                if response:
1✔
1802
                    self._created_roles.add(role["id"])
1✔
1803
            except ArboristError as e:
×
1804
                self.logger.info(
×
1805
                    "couldn't update role '{}', creating instead".format(str(e))
1806
                )
1807
                try:
×
1808
                    response = self.arborist_client.create_role(role)
×
1809
                    if response:
×
1810
                        self._created_roles.add(role["id"])
×
1811
                except ArboristError as e:
×
1812
                    self.logger.error(e)
×
1813
                    # keep going; maybe just some conflicts from things existing already
1814

1815
        # update policies
1816
        policies = user_yaml.authz.get("policies", [])
1✔
1817
        for policy in policies:
1✔
1818
            policy_id = policy.pop("id")
1✔
1819
            try:
1✔
1820
                self.logger.debug(
1✔
1821
                    "Trying to upsert policy with id {}".format(policy_id)
1822
                )
1823
                response = self.arborist_client.update_policy(
1✔
1824
                    policy_id, policy, create_if_not_exist=True
1825
                )
1826
            except ArboristError as e:
×
1827
                self.logger.error(e)
×
1828
                # keep going; maybe just some conflicts from things existing already
1829
            else:
1830
                if response:
1✔
1831
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1832
                    self._created_policies.add(policy_id)
1✔
1833

1834
        # update groups
1835
        groups = user_yaml.authz.get("groups", [])
1✔
1836

1837
        # delete from arborist the groups that have been deleted
1838
        # from the user.yaml
1839
        arborist_groups = set(
1✔
1840
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1841
        )
1842
        useryaml_groups = set(g["name"] for g in groups)
1✔
1843
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1844
            # do not try to delete built in groups
1845
            if deleted_group not in ["anonymous", "logged-in"]:
×
1846
                self.arborist_client.delete_group(deleted_group)
×
1847

1848
        # create/update the groups defined in the user.yaml
1849
        for group in groups:
1✔
1850
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1851
            if missing:
×
1852
                name = group.get("name", "{MISSING NAME}")
×
1853
                self.logger.error(
×
1854
                    "group {} missing required field(s): {}".format(name, list(missing))
1855
                )
1856
                continue
×
1857
            try:
×
1858
                response = self.arborist_client.put_group(
×
1859
                    group["name"],
1860
                    # Arborist doesn't handle group descriptions yet
1861
                    # description=group.get("description", ""),
1862
                    users=group["users"],
1863
                    policies=group["policies"],
1864
                )
1865
            except ArboristError as e:
×
1866
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1867

1868
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1869

1870
        # First recreate these groups in order to clear out old, possibly deleted policies
1871
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1872
            try:
1✔
1873
                response = self.arborist_client.put_group(builtin_group)
1✔
1874
            except ArboristError as e:
×
1875
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1876

1877
        # Now add back policies that are in the user.yaml
1878
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1879
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1880

1881
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1882
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1883

1884
        return True
1✔
1885

1886
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1887
        """
1888
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1889
        mfa_policy after revoking all policies.
1890
        """
1891

1892
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1893
            idp, {}
1894
        )
1895

1896
        if not is_mfa_enabled:
1✔
1897
            # TODO This should be a diff, not a revocation of all policies.
1898
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1899
            return
1✔
1900

1901
        policies = []
1✔
1902
        try:
1✔
1903
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1904
            policies = user_data_from_arborist["policies"]
1✔
1905
        except Exception as e:
×
1906
            self.logger.error(
×
1907
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1908
            )
1909
        finally:
1910
            # TODO This should be a diff, not a revocation of all policies.
1911
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1912

1913
        if "mfa_policy" in policies:
1✔
1914
            self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1915

1916
    def _update_authz_in_arborist(
1✔
1917
        self,
1918
        session,
1919
        user_projects,
1920
        user_yaml=None,
1921
        single_user_sync=False,
1922
        expires=None,
1923
    ):
1924
        """
1925
        Assign users policies in arborist from the information in
1926
        ``user_projects`` and optionally a ``user_yaml``.
1927

1928
        The projects are sent to arborist as resources with paths like
1929
        ``/projects/{project}``. Roles are created with just the original names
1930
        for the privileges like ``"read-storage", "read"`` etc.
1931

1932
        Args:
1933
            user_projects (dict)
1934
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
1935
            single_user_sync (bool) whether authz update is for a single user
1936
            expires (int) time at which authz info in Arborist should expire
1937

1938
        Return:
1939
            bool: success
1940
        """
1941
        healthy = self._is_arborist_healthy()
1✔
1942
        if not healthy:
1✔
1943
            return False
×
1944

1945
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
1946

1947
        if user_yaml:
1✔
1948
            self.logger.debug(
1✔
1949
                "useryaml abac before lowering usernames: {}".format(
1950
                    user_yaml.user_abac
1951
                )
1952
            )
1953
            user_yaml.user_abac = {
1✔
1954
                key.lower(): value for key, value in user_yaml.user_abac.items()
1955
            }
1956
            # update the project info with `projects` specified in user.yaml
1957
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
1958

1959
        # get list of users from arborist to make sure users that are completely removed
1960
        # from authorization sources get policies revoked
1961
        arborist_user_projects = {}
1✔
1962
        if not single_user_sync:
1✔
1963
            try:
1✔
1964
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
1965

1966
                # construct user information, NOTE the lowering of the username. when adding/
1967
                # removing access, the case in the Fence db is used. For combining access, it is
1968
                # case-insensitive, so we lower
1969
                arborist_user_projects = {
1✔
1970
                    user["name"].lower(): {} for user in arborist_users
1971
                }
1972
            except (ArboristError, KeyError, AttributeError) as error:
×
1973
                # TODO usersync should probably exit with non-zero exit code at the end,
1974
                #      but sync should continue from this point so there are no partial
1975
                #      updates
1976
                self.logger.warning(
×
1977
                    "Could not get list of users in Arborist, continuing anyway. "
1978
                    "WARNING: this sync will NOT remove access for users no longer in "
1979
                    f"authorization sources. Error: {error}"
1980
                )
1981

1982
            # update the project info with users from arborist
1983
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
1984

1985
        policy_id_list = []
1✔
1986
        policies = []
1✔
1987

1988
        # prefer in-memory if available from user_yaml, if not, get from database
1989
        if user_yaml and user_yaml.project_to_resource:
1✔
1990
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
1991
            self.logger.debug(
1✔
1992
                f"using in-memory project to authz resource mapping from "
1993
                f"user.yaml (instead of database): {project_to_authz_mapping}"
1994
            )
1995
        else:
1996
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
1997
            self.logger.debug(
1✔
1998
                f"using persisted project to authz resource mapping from database "
1999
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2000
            )
2001

2002
        self.logger.debug(
1✔
2003
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2004
        )
2005
        all_resources = [
1✔
2006
            r
2007
            for resources in self._dbgap_study_to_resources.values()
2008
            for r in resources
2009
        ]
2010
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2011
        self._create_arborist_resources(all_resources)
1✔
2012

2013
        for username, user_project_info in user_projects.items():
1✔
2014
            self.logger.info("processing user `{}`".format(username))
1✔
2015
            user = query_for_user(session=session, username=username)
1✔
2016
            idp = None
1✔
2017
            if user:
1✔
2018
                username = user.username
1✔
2019
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2020

2021
            self.arborist_client.create_user_if_not_exist(username)
1✔
2022
            if not single_user_sync:
1✔
2023
                self._revoke_all_policies_preserve_mfa(username, idp)
1✔
2024

2025
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2026
            # previously mapped each project to the same set of privileges
2027
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2028
            # single policy with ('read', 'read-storage') being the single
2029
            # key
2030
            unique_policies = self._determine_unique_policies(
1✔
2031
                user_project_info, project_to_authz_mapping
2032
            )
2033

2034
            for roles in unique_policies.keys():
1✔
2035
                for role in roles:
1✔
2036
                    self._create_arborist_role(role)
1✔
2037

2038
            if single_user_sync:
1✔
2039
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2040
                    policy_hash = self._hash_policy_contents(
1✔
2041
                        ordered_roles, ordered_resources
2042
                    )
2043
                    self._create_arborist_policy(
1✔
2044
                        policy_hash,
2045
                        ordered_roles,
2046
                        ordered_resources,
2047
                        skip_if_exists=True,
2048
                    )
2049
                    # return here as it is not expected single_user_sync
2050
                    # will need any of the remaining user_yaml operations
2051
                    # left in _update_authz_in_arborist
2052
                    return self._grant_arborist_policy(
1✔
2053
                        username, policy_hash, expires=expires
2054
                    )
2055
            else:
2056
                for roles, resources in unique_policies.items():
1✔
2057
                    for role in roles:
1✔
2058
                        for resource in resources:
1✔
2059
                            # grant a policy to this user which is a single
2060
                            # role on a single resource
2061

2062
                            # format project '/x/y/z' -> 'x.y.z'
2063
                            # so the policy id will be something like 'x.y.z-create'
2064
                            policy_id = _format_policy_id(resource, role)
1✔
2065
                            if policy_id not in self._created_policies:
1✔
2066
                                try:
1✔
2067
                                    self.arborist_client.update_policy(
1✔
2068
                                        policy_id,
2069
                                        {
2070
                                            "description": "policy created by fence sync",
2071
                                            "role_ids": [role],
2072
                                            "resource_paths": [resource],
2073
                                        },
2074
                                        create_if_not_exist=True,
2075
                                    )
2076
                                except ArboristError as e:
×
2077
                                    self.logger.info(
×
2078
                                        "not creating policy in arborist; {}".format(
2079
                                            str(e)
2080
                                        )
2081
                                    )
2082
                                self._created_policies.add(policy_id)
1✔
2083

2084
                            self._grant_arborist_policy(
1✔
2085
                                username, policy_id, expires=expires
2086
                            )
2087

2088
            if user_yaml:
1✔
2089
                for policy in user_yaml.policies.get(username, []):
1✔
2090
                    self.arborist_client.grant_user_policy(
1✔
2091
                        username,
2092
                        policy,
2093
                        expires_at=expires,
2094
                    )
2095

2096
        if user_yaml:
1✔
2097
            for client_name, client_details in user_yaml.clients.items():
1✔
2098
                client_policies = client_details.get("policies", [])
×
2099
                clients = session.query(Client).filter_by(name=client_name).all()
×
2100
                # update existing clients, do not create new ones
2101
                if not clients:
×
2102
                    self.logger.warning(
×
2103
                        "client to update (`{}`) does not exist in fence: skipping".format(
2104
                            client_name
2105
                        )
2106
                    )
2107
                    continue
×
2108
                self.logger.debug(
×
2109
                    "updating client `{}` (found {} client IDs)".format(
2110
                        client_name, len(clients)
2111
                    )
2112
                )
2113
                # there may be more than 1 client with this name if credentials are being rotated,
2114
                # so we grant access to each client ID
2115
                for client in clients:
×
2116
                    try:
×
2117
                        self.arborist_client.update_client(
×
2118
                            client.client_id, client_policies
2119
                        )
2120
                    except ArboristError as e:
×
2121
                        self.logger.info(
×
2122
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2123
                                client_policies, client_name, client.client_id, str(e)
2124
                            )
2125
                        )
2126

2127
        return True
1✔
2128

2129
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2130
        """
2131
        Determine and return a dictionary of unique policies.
2132

2133
        Args (examples):
2134
            user_project_info (dict):
2135
            {
2136
                'phs000002.c1': { 'read-storage', 'read' },
2137
                'phs000001.c1': { 'read', 'read-storage' },
2138
                'phs000004.c1': { 'write', 'read' },
2139
                'phs000003.c1': { 'read', 'write' },
2140
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2141
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2142
            }
2143
            project_to_authz_mapping (dict):
2144
            {
2145
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2146
            }
2147

2148
        Return (for examples):
2149
            dict:
2150
            {
2151
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2152
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2153
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2154
            }
2155
        """
2156
        roles_to_resources = collections.defaultdict(list)
1✔
2157
        for study, roles in user_project_info.items():
1✔
2158
            ordered_roles = tuple(sorted(roles))
1✔
2159
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2160
            if study in project_to_authz_mapping:
1✔
2161
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2162
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2163

2164
        policies = {}
1✔
2165
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2166
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2167
        return policies
1✔
2168

2169
    def _create_arborist_role(self, role):
1✔
2170
        """
2171
        Wrapper around gen3authz's create_role with additional logging
2172

2173
        Args:
2174
            role (str): what the Arborist identity should be of the created role
2175

2176
        Return:
2177
            bool: True if the role was created successfully or it already
2178
                  exists. False otherwise
2179
        """
2180
        if role in self._created_roles:
1✔
2181
            return True
1✔
2182
        try:
1✔
2183
            response_json = self.arborist_client.create_role(
1✔
2184
                arborist_role_for_permission(role)
2185
            )
2186
        except ArboristError as e:
×
2187
            self.logger.error(
×
2188
                "could not create `{}` role in Arborist: {}".format(role, e)
2189
            )
2190
            return False
×
2191
        self._created_roles.add(role)
1✔
2192

2193
        if response_json is None:
1✔
2194
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2195
        else:
2196
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2197
        return True
1✔
2198

2199
    def _create_arborist_resources(self, resources):
1✔
2200
        """
2201
        Create resources in Arborist
2202

2203
        Args:
2204
            resources (list): a list of full Arborist resource paths to create
2205
            [
2206
                "/programs/DEV/projects/phs000001.c1",
2207
                "/programs/DEV/projects/phs000002.c1",
2208
                "/programs/DEV/projects/phs000003.c1"
2209
            ]
2210

2211
        Return:
2212
            bool: True if the resources were successfully created, False otherwise
2213

2214

2215
        As of 2/11/2022, for resources above,
2216
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2217
        [
2218
            { 'name': 'programs', 'subresources': [
2219
                { 'name': 'DEV', 'subresources': [
2220
                    { 'name': 'projects', 'subresources': [
2221
                        { 'name': 'phs000001.c1', 'subresources': []},
2222
                        { 'name': 'phs000002.c1', 'subresources': []},
2223
                        { 'name': 'phs000003.c1', 'subresources': []}
2224
                    ]}
2225
                ]}
2226
            ]}
2227
        ]
2228
        Because this list has a single object, only a single network request gets
2229
        sent to Arborist.
2230

2231
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2232
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2233
        [
2234
            {'name': 'phs000001.c1', 'subresources': []},
2235
            {'name': 'phs000002.c1', 'subresources': []},
2236
            {'name': 'phs000003.c1', 'subresources': []}
2237
        ]
2238
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2239

2240
        As a practical matter, for sync_single_user_visas, studies
2241
        should be nested under the `/programs` resource as in the former
2242
        example (i.e. only one network request gets made).
2243

2244
        TODO for the sake of simplicity, it would be nice if only one network
2245
        request was made no matter the input.
2246
        """
2247
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2248
            try:
1✔
2249
                response_json = self.arborist_client.update_resource(
1✔
2250
                    "/", request_body, merge=True
2251
                )
2252
            except ArboristError as e:
×
2253
                self.logger.error(
×
2254
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2255
                        request_body, e
2256
                    )
2257
                )
2258
                return False
×
2259

2260
        self.logger.debug(
1✔
2261
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2262
        )
2263
        return True
1✔
2264

2265
    def _create_arborist_policy(
1✔
2266
        self, policy_id, roles, resources, skip_if_exists=False
2267
    ):
2268
        """
2269
        Wrapper around gen3authz's create_policy with additional logging
2270

2271
        Args:
2272
            policy_id (str): what the Arborist identity should be of the created policy
2273
            roles (iterable): what roles the create policy should have
2274
            resources (iterable): what resources the created policy should have
2275
            skip_if_exists (bool): if True, this function will not treat an already
2276
                                   existent policy as an error
2277

2278
        Return:
2279
            bool: True if policy creation was successful. False otherwise
2280
        """
2281
        try:
1✔
2282
            response_json = self.arborist_client.create_policy(
1✔
2283
                {
2284
                    "id": policy_id,
2285
                    "role_ids": roles,
2286
                    "resource_paths": resources,
2287
                },
2288
                skip_if_exists=skip_if_exists,
2289
            )
2290
        except ArboristError as e:
×
2291
            self.logger.error(
×
2292
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2293
            )
2294
            return False
×
2295

2296
        if response_json is None:
1✔
2297
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2298
        else:
2299
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2300
        return True
1✔
2301

2302
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2303
        """
2304
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2305

2306
        Args:
2307
            ordered_roles (iterable): policy roles in sorted order
2308
            ordered_resources (iterable): policy resources in sorted order
2309

2310
        Return:
2311
            str: SHA256 hex digest
2312
        """
2313

2314
        def escape(s):
1✔
2315
            return s.replace(",", "\,")
1✔
2316

2317
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2318
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2319
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2320
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2321

2322
        return policy_hash
1✔
2323

2324
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2325
        """
2326
        Wrapper around gen3authz's grant_user_policy with additional logging
2327

2328
        Args:
2329
            username (str): username of user in Arborist who policy should be
2330
                            granted to
2331
            policy_id (str): Arborist policy id
2332
            expires (int): POSIX timestamp for when policy should expire
2333

2334
        Return:
2335
            bool: True if granting of policy was successful, False otherwise
2336
        """
2337
        try:
1✔
2338
            response_json = self.arborist_client.grant_user_policy(
1✔
2339
                username,
2340
                policy_id,
2341
                expires_at=expires,
2342
            )
2343
        except ArboristError as e:
×
2344
            self.logger.error(
×
2345
                "could not grant policy `{}` to user `{}`: {}".format(
2346
                    policy_id, username, e
2347
                )
2348
            )
2349
            return False
×
2350

2351
        self.logger.debug(
1✔
2352
            "granted policy `{}` to user `{}`".format(policy_id, username)
2353
        )
2354
        return True
1✔
2355

2356
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2357
        """
2358
        Determine the arborist resource path and add it to
2359
        _self._dbgap_study_to_resources
2360

2361
        Args:
2362
            dbgap_study (str): study phs identifier
2363
            dbgap_config (dict): dictionary of config for dbgap server
2364

2365
        """
2366
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2367
            "_default", ["/"]
2368
        )
2369
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2370
            dbgap_study, default_namespaces
2371
        )
2372

2373
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2374

2375
        arborist_resource_namespaces = [
1✔
2376
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2377
        ]
2378

2379
        for resource_namespace in arborist_resource_namespaces:
1✔
2380
            full_resource_path = resource_namespace + dbgap_study
1✔
2381
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2382
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2383
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2384
        return arborist_resource_namespaces
1✔
2385

2386
    def _is_arborist_healthy(self):
1✔
2387
        if not self.arborist_client:
1✔
2388
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2389
            return False
×
2390
        if not self.arborist_client.healthy():
1✔
2391
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2392
            self.logger.error(
×
2393
                "arborist service is unavailable; skipping main arborist dbgap sync"
2394
            )
2395
            return False
×
2396
        return True
1✔
2397

2398
    def _pick_sync_type(self, visa):
1✔
2399
        """
2400
        Pick type of visa to parse according to the visa provider
2401
        """
2402
        sync_client = None
1✔
2403
        if visa.type in self.visa_types["ras"]:
1✔
2404
            sync_client = self.ras_sync_client
1✔
2405
        else:
2406
            raise Exception(
×
2407
                "Visa type {} not recognized. Configure in fence-config".format(
2408
                    visa.type
2409
                )
2410
            )
2411
        if not sync_client:
1✔
2412
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2413

2414
        return sync_client
1✔
2415

2416
    def sync_single_user_visas(
1✔
2417
        self, user, ga4gh_visas, sess=None, expires=None, skip_google_updates=False
2418
    ):
2419
        """
2420
        Sync a single user's visas during login or DRS/data access
2421

2422
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2423
                        BEFORE THIS.
2424

2425
        Args:
2426
            user (userdatamodel.user.User): Fence user whose visas'
2427
                                            authz info is being synced
2428
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2429
                                that are ALREADY VALIDATED
2430
            sess (sqlalchemy.orm.session.Session): database session
2431
            expires (int): time at which synced Arborist policies and
2432
                           inclusion in any GBAG are set to expire
2433
            skip_google_updates (bool): True if google group updates should be skipped. False if otherwise.
2434

2435
        Return:
2436
            list of successfully parsed visas
2437
        """
2438
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2439
        dbgap_config = self.dbGaP[0]
1✔
2440
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2441
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2442
            "enable_common_exchange_area_access", False
2443
        )
2444
        study_common_exchange_areas = dbgap_config.get(
1✔
2445
            "study_common_exchange_areas", {}
2446
        )
2447

2448
        try:
1✔
2449
            user_yaml = UserYAML.from_file(
1✔
2450
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2451
            )
2452
        except (EnvironmentError, AssertionError) as e:
×
2453
            self.logger.error(str(e))
×
2454
            self.logger.error("aborting early")
×
2455
            raise
×
2456

2457
        user_projects = dict()
1✔
2458
        projects = {}
1✔
2459
        info = {}
1✔
2460
        parsed_visas = []
1✔
2461

2462
        for visa in ga4gh_visas:
1✔
2463
            project = {}
1✔
2464
            visa_type = self._pick_sync_type(visa)
1✔
2465
            encoded_visa = visa.ga4gh_visa
1✔
2466

2467
            try:
1✔
2468
                project, info = visa_type._parse_single_visa(
1✔
2469
                    user,
2470
                    encoded_visa,
2471
                    visa.expires,
2472
                    parse_consent_code,
2473
                )
2474
            except Exception:
×
2475
                self.logger.warning(
×
2476
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2477
                )
2478
                continue
×
2479

2480
            projects = {**projects, **project}
1✔
2481
            parsed_visas.append(visa)
1✔
2482

2483
        info["user_id"] = user.id
1✔
2484
        info["username"] = user.username
1✔
2485
        user_projects[user.username] = projects
1✔
2486

2487
        user_projects = self.parse_projects(user_projects)
1✔
2488

2489
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2490
            self.logger.info(
1✔
2491
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2492
            )
2493

2494
        self._process_user_projects(
1✔
2495
            user_projects,
2496
            enable_common_exchange_area_access,
2497
            study_common_exchange_areas,
2498
            dbgap_config,
2499
            sess,
2500
        )
2501

2502
        if parse_consent_code:
1✔
2503
            self._grant_all_consents_to_c999_users(
1✔
2504
                user_projects, user_yaml.project_to_resource
2505
            )
2506

2507
        if user_projects:
1✔
2508
            self.sync_to_storage_backend(
1✔
2509
                user_projects,
2510
                info,
2511
                sess,
2512
                expires=expires,
2513
                skip_google_updates=skip_google_updates,
2514
            )
2515
        else:
2516
            self.logger.info("No users for syncing")
×
2517

2518
        # update arborist db (user access)
2519
        if self.arborist_client:
1✔
2520
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2521
            success = self._update_authz_in_arborist(
1✔
2522
                sess,
2523
                user_projects,
2524
                user_yaml=user_yaml,
2525
                single_user_sync=True,
2526
                expires=expires,
2527
            )
2528
            if success:
1✔
2529
                self.logger.info(
1✔
2530
                    "Finished synchronizing authorization info to arborist"
2531
                )
2532
            else:
2533
                self.logger.error(
×
2534
                    "Could not synchronize authorization info successfully to arborist"
2535
                )
2536
        else:
2537
            self.logger.error("No arborist client set; skipping arborist sync")
×
2538

2539
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc