• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

uc-cdis / fence / 11483296934

23 Oct 2024 03:34PM UTC coverage: 75.27%. Remained the same
11483296934

push

github

web-flow
Update sync_users.py

7813 of 10380 relevant lines covered (75.27%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.65
fence/sync/sync_users.py
1
import backoff
1✔
2
import glob
1✔
3
import jwt
1✔
4
import os
1✔
5
import re
1✔
6
import subprocess as sp
1✔
7
import yaml
1✔
8
import copy
1✔
9
import datetime
1✔
10
import uuid
1✔
11
import collections
1✔
12
import hashlib
1✔
13

14
from contextlib import contextmanager
1✔
15
from collections import defaultdict
1✔
16
from csv import DictReader
1✔
17
from io import StringIO
1✔
18
from stat import S_ISDIR
1✔
19

20
import paramiko
1✔
21
from cdislogging import get_logger
1✔
22
from email_validator import validate_email, EmailNotValidError
1✔
23
from gen3authz.client.arborist.errors import ArboristError
1✔
24
from gen3users.validation import validate_user_yaml
1✔
25
from paramiko.proxy import ProxyCommand
1✔
26
from sqlalchemy.exc import IntegrityError
1✔
27
from sqlalchemy import func
1✔
28

29
from fence.config import config
1✔
30
from fence.models import (
1✔
31
    AccessPrivilege,
32
    AuthorizationProvider,
33
    Project,
34
    Tag,
35
    User,
36
    query_for_user,
37
    Client,
38
    IdentityProvider,
39
    get_project_to_authz_mapping,
40
)
41
from fence.resources.google.utils import get_or_create_proxy_group_id
1✔
42
from fence.resources.storage import StorageManager
1✔
43
from fence.resources.google.access_utils import update_google_groups_for_users
1✔
44
from fence.resources.google.access_utils import GoogleUpdateException
1✔
45
from fence.sync import utils
1✔
46
from fence.sync.passport_sync.ras_sync import RASVisa
1✔
47
from fence.utils import get_SQLAlchemyDriver, DEFAULT_BACKOFF_SETTINGS
1✔
48

49

50
def _format_policy_id(path, privilege):
1✔
51
    resource = ".".join(name for name in path.split("/") if name)
1✔
52
    return "{}-{}".format(resource, privilege)
1✔
53

54

55
def download_dir(sftp, remote_dir, local_dir):
1✔
56
    """
57
    Recursively download file from remote_dir to local_dir
58
    Args:
59
        remote_dir(str)
60
        local_dir(str)
61
    Returns: None
62
    """
63
    dir_items = sftp.listdir_attr(remote_dir)
×
64

65
    for item in dir_items:
×
66
        remote_path = remote_dir + "/" + item.filename
×
67
        local_path = os.path.join(local_dir, item.filename)
×
68
        if S_ISDIR(item.st_mode):
×
69
            download_dir(sftp, remote_path, local_path)
×
70
        else:
71
            sftp.get(remote_path, local_path)
×
72

73

74
def arborist_role_for_permission(permission):
1✔
75
    """
76
    For the programs/projects in the existing fence access control model, in order to
77
    use arborist for checking permissions we generate a policy for each combination of
78
    program/project and privilege. The roles involved all contain only one permission,
79
    for one privilege from the project access model.
80
    """
81
    return {
1✔
82
        "id": permission,
83
        "permissions": [
84
            {"id": permission, "action": {"service": "*", "method": permission}}
85
        ],
86
    }
87

88

89
@contextmanager
1✔
90
def _read_file(filepath, encrypted=True, key=None, logger=None):
1✔
91
    """
92
    Context manager for reading and optionally decrypting file it only
93
    decrypts files encrypted by unix 'crypt' tool which is used by dbGaP.
94

95
    Args:
96
        filepath (str): path to the file
97
        encrypted (bool): whether the file is encrypted
98

99
    Returns:
100
        Generator[file-like class]: file like object for the file
101
    """
102
    if encrypted:
1✔
103
        has_crypt = sp.call(["which", "openssl"])
×
104
        if has_crypt != 0:
×
105
            if logger:
×
106
                logger.error("Need to install openssl to decrypt files from dbgap")
×
107
            # TODO (rudyardrichter, 2019-01-08): raise error and move exit out to script
108
            exit(1)
×
109
        p = sp.Popen(
×
110
            [
111
                "openssl",
112
                "enc",
113
                "-aes-256-cbc",  # Encryption algorithm
114
                "-d",  # Decrypt mode, remove for encryption
115
                "-k", key,  # Use the provided key for decryption
116
            ],
117
            stdin=open(filepath, "r"),  # Reading the input file through stdin
118
            stdout=sp.PIPE,  # Capture the output in a pipe
119
            stderr=open(os.devnull, "w"),  # Suppress error output
120
            universal_newlines=True,  # For proper handling of text input/output
121
        )
122
        try:
×
123
            yield StringIO(p.communicate()[0])
×
124
        except UnicodeDecodeError:
×
125
            logger.error("Could not decode file. Check the decryption key.")
×
126
    else:
127
        f = open(filepath, "r")
1✔
128
        yield f
1✔
129
        f.close()
1✔
130

131

132
class UserYAML(object):
1✔
133
    """
134
    Representation of the information in a YAML file describing user, project, and ABAC
135
    information for access control.
136
    """
137

138
    def __init__(
1✔
139
        self,
140
        projects=None,
141
        user_info=None,
142
        policies=None,
143
        clients=None,
144
        authz=None,
145
        project_to_resource=None,
146
        logger=None,
147
        user_abac=None,
148
    ):
149
        self.projects = projects or {}
1✔
150
        self.user_info = user_info or {}
1✔
151
        self.user_abac = user_abac or {}
1✔
152
        self.policies = policies or {}
1✔
153
        self.clients = clients or {}
1✔
154
        self.authz = authz or {}
1✔
155
        self.project_to_resource = project_to_resource or {}
1✔
156
        self.logger = logger
1✔
157

158
    @classmethod
1✔
159
    def from_file(cls, filepath, encrypted=True, key=None, logger=None):
1✔
160
        """
161
        Add access by "auth_id" to "self.projects" to update the Fence DB.
162
        Add access by "resource" to "self.user_abac" to update Arborist.
163
        """
164
        data = {}
1✔
165
        if filepath:
1✔
166
            with _read_file(filepath, encrypted=encrypted, key=key, logger=logger) as f:
1✔
167
                file_contents = f.read()
1✔
168
                validate_user_yaml(file_contents)  # run user.yaml validation tests
1✔
169
                data = yaml.safe_load(file_contents)
1✔
170
        else:
171
            if logger:
1✔
172
                logger.info("Did not sync a user.yaml, no file path provided.")
1✔
173

174
        projects = dict()
1✔
175
        user_info = dict()
1✔
176
        policies = dict()
1✔
177

178
        # resources should be the resource tree to construct in arborist
179
        user_abac = dict()
1✔
180

181
        # Fall back on rbac block if no authz. Remove when rbac in useryaml fully deprecated.
182
        if not data.get("authz") and data.get("rbac"):
1✔
183
            if logger:
×
184
                logger.info(
×
185
                    "No authz block found but rbac block present. Using rbac block"
186
                )
187
            data["authz"] = data["rbac"]
×
188

189
        # get user project mapping to arborist resources if it exists
190
        project_to_resource = data.get("authz", dict()).get(
1✔
191
            "user_project_to_resource", dict()
192
        )
193

194
        # read projects and privileges for each user
195
        users = data.get("users", {})
1✔
196
        for username, details in users.items():
1✔
197
            # users should occur only once each; skip if already processed
198
            if username in projects:
1✔
199
                msg = "invalid yaml file: user `{}` occurs multiple times".format(
×
200
                    username
201
                )
202
                if logger:
×
203
                    logger.error(msg)
×
204
                raise EnvironmentError(msg)
×
205

206
            privileges = {}
1✔
207
            resource_permissions = dict()
1✔
208
            for project in details.get("projects", {}):
1✔
209
                try:
1✔
210
                    privileges[project["auth_id"]] = set(project["privilege"])
1✔
211
                except KeyError as e:
×
212
                    if logger:
×
213
                        logger.error("project {} missing field: {}".format(project, e))
×
214
                    continue
×
215

216
                # project may not have `resource` field.
217
                # prefer resource field;
218
                # if no resource or mapping, assume auth_id is resource.
219
                resource = project.get("resource", project["auth_id"])
1✔
220

221
                if project["auth_id"] not in project_to_resource:
1✔
222
                    project_to_resource[project["auth_id"]] = resource
1✔
223
                resource_permissions[resource] = set(project["privilege"])
1✔
224

225
            user_info[username] = {
1✔
226
                "email": details.get("email", ""),
227
                "display_name": details.get("display_name", ""),
228
                "phone_number": details.get("phone_number", ""),
229
                "tags": details.get("tags", {}),
230
                "admin": details.get("admin", False),
231
            }
232
            if not details.get("email"):
1✔
233
                try:
1✔
234
                    valid = validate_email(
1✔
235
                        username, allow_smtputf8=False, check_deliverability=False
236
                    )
237
                    user_info[username]["email"] = valid.email
1✔
238
                except EmailNotValidError:
1✔
239
                    pass
1✔
240
            projects[username] = privileges
1✔
241
            user_abac[username] = resource_permissions
1✔
242

243
            # list of policies we want to grant to this user, which get sent to arborist
244
            # to check if they're allowed to do certain things
245
            policies[username] = details.get("policies", [])
1✔
246

247
        if logger:
1✔
248
            logger.info(
1✔
249
                "Got user project to arborist resource mapping:\n{}".format(
250
                    str(project_to_resource)
251
                )
252
            )
253

254
        authz = data.get("authz", dict())
1✔
255
        if not authz:
1✔
256
            # older version: resources in root, no `authz` section or `rbac` section
257
            if logger:
1✔
258
                logger.warning(
1✔
259
                    "access control YAML file is using old format (missing `authz`/`rbac`"
260
                    " section in the root); assuming that if it exists `resources` will"
261
                    " be on the root level, and continuing"
262
                )
263
            # we're going to throw it into the `authz` dictionary anyways, so the rest of
264
            # the code can pretend it's in the normal place that we expect
265
            resources = data.get("resources", [])
1✔
266
            # keep authz empty dict if resources is not specified
267
            if resources:
1✔
268
                authz["resources"] = data.get("resources", [])
×
269

270
        clients = data.get("clients", {})
1✔
271

272
        return cls(
1✔
273
            projects=projects,
274
            user_info=user_info,
275
            user_abac=user_abac,
276
            policies=policies,
277
            clients=clients,
278
            authz=authz,
279
            project_to_resource=project_to_resource,
280
            logger=logger,
281
        )
282

283
    def persist_project_to_resource(self, db_session):
1✔
284
        """
285
        Store the mappings from Project.auth_id to authorization resource (Project.authz)
286

287
        The mapping comes from an external source, this function persists what was parsed
288
        into memory into the database for future use.
289
        """
290
        for auth_id, authz_resource in self.project_to_resource.items():
1✔
291
            project = (
1✔
292
                db_session.query(Project).filter(Project.auth_id == auth_id).first()
293
            )
294
            if project:
1✔
295
                project.authz = authz_resource
1✔
296
            else:
297
                project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource)
×
298
                db_session.add(project)
×
299
        db_session.commit()
1✔
300

301

302
class UserSyncer(object):
1✔
303
    def __init__(
1✔
304
        self,
305
        dbGaP,
306
        DB,
307
        project_mapping,
308
        storage_credentials=None,
309
        db_session=None,
310
        is_sync_from_dbgap_server=False,
311
        sync_from_local_csv_dir=None,
312
        sync_from_local_yaml_file=None,
313
        arborist=None,
314
        folder=None,
315
    ):
316
        """
317
        Syncs ACL files from dbGap to auth database and storage backends
318
        Args:
319
            dbGaP: a list of dict containing creds to access dbgap sftp
320
            DB: database connection string
321
            project_mapping: a dict containing how dbgap ids map to projects
322
            storage_credentials: a dict containing creds for storage backends
323
            sync_from_dir: path to an alternative dir to sync from instead of
324
                           dbGaP
325
            arborist:
326
                ArboristClient instance if the syncer should also create
327
                resources in arborist
328
            folder: a local folder where dbgap telemetry files will sync to
329
        """
330
        self.sync_from_local_csv_dir = sync_from_local_csv_dir
1✔
331
        self.sync_from_local_yaml_file = sync_from_local_yaml_file
1✔
332
        self.is_sync_from_dbgap_server = is_sync_from_dbgap_server
1✔
333
        self.dbGaP = dbGaP
1✔
334
        self.session = db_session
1✔
335
        self.driver = get_SQLAlchemyDriver(DB)
1✔
336
        self.project_mapping = project_mapping or {}
1✔
337
        self._projects = dict()
1✔
338
        self._created_roles = set()
1✔
339
        self._created_policies = set()
1✔
340
        self._dbgap_study_to_resources = dict()
1✔
341
        self.logger = get_logger(
1✔
342
            "user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
343
        )
344
        self.arborist_client = arborist
1✔
345
        self.folder = folder
1✔
346

347
        self.auth_source = defaultdict(set)
1✔
348
        # auth_source used for logging. username : [source1, source2]
349
        self.visa_types = config.get("USERSYNC", {}).get("visa_types", {})
1✔
350
        self.parent_to_child_studies_mapping = {}
1✔
351
        for dbgap_config in dbGaP:
1✔
352
            self.parent_to_child_studies_mapping.update(
1✔
353
                dbgap_config.get("parent_to_child_studies_mapping", {})
354
            )
355
        if storage_credentials:
1✔
356
            self.storage_manager = StorageManager(
1✔
357
                storage_credentials, logger=self.logger
358
            )
359
        self.id_patterns = []
1✔
360

361
    @staticmethod
1✔
362
    def _match_pattern(filepath, id_patterns, encrypted=True):
1✔
363
        """
364
        Check if the filename matches dbgap access control file pattern
365

366
        Args:
367
            filepath (str): path to file
368
            encrypted (bool): whether the file is encrypted
369

370
        Returns:
371
            bool: whether the pattern matches
372
        """
373
        id_patterns.append(r"authentication_file_phs(\d{6}).(csv|txt)")
1✔
374
        for pattern in id_patterns:
1✔
375
            if encrypted:
1✔
376
                pattern += r".enc"
×
377
            pattern += r"$"
1✔
378
            # when converting the YAML from fence-config,
379
            # python reads it as Python string literal. So "\" turns into "\\"
380
            # which messes with the regex match
381
            pattern.replace("\\\\", "\\")
1✔
382
            if re.match(pattern, os.path.basename(filepath)):
1✔
383
                return True
1✔
384
        return False
1✔
385

386
    def _get_from_sftp_with_proxy(self, server, path):
1✔
387
        """
388
        Download all data from sftp sever to a local dir
389

390
        Args:
391
            server (dict) : dictionary containing info to access sftp server
392
            path (str): path to local directory
393

394
        Returns:
395
            None
396
        """
397
        proxy = None
1✔
398
        if server.get("proxy", "") != "":
1✔
399
            command = "ssh -i ~/.ssh/id_rsa {user}@{proxy} nc {host} {port}".format(
×
400
                user=server.get("proxy_user", ""),
401
                proxy=server.get("proxy", ""),
402
                host=server.get("host", ""),
403
                port=server.get("port", 22),
404
            )
405
            self.logger.info("SSH proxy command: {}".format(command))
×
406

407
            proxy = ProxyCommand(command)
×
408

409
        with paramiko.SSHClient() as client:
1✔
410
            client.set_log_channel(self.logger.name)
1✔
411

412
            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
1✔
413
            parameters = {
1✔
414
                "hostname": str(server.get("host", "")),
415
                "username": str(server.get("username", "")),
416
                "password": str(server.get("password", "")),
417
                "port": int(server.get("port", 22)),
418
            }
419
            if proxy:
1✔
420
                parameters["sock"] = proxy
×
421

422
            self.logger.info(
1✔
423
                "SSH connection hostname:post {}:{}".format(
424
                    parameters.get("hostname", "unknown"),
425
                    parameters.get("port", "unknown"),
426
                )
427
            )
428
            self._connect_with_ssh(ssh_client=client, parameters=parameters)
1✔
429
            with client.open_sftp() as sftp:
×
430
                download_dir(sftp, "./", path)
1✔
431

432
        if proxy:
×
433
            proxy.close()
×
434

435
    @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
1✔
436
    def _connect_with_ssh(self, ssh_client, parameters):
1✔
437
        ssh_client.connect(**parameters)
1✔
438

439
    def _get_from_ftp_with_proxy(self, server, path):
1✔
440
        """
441
        Download data from ftp sever to a local dir
442

443
        Args:
444
            server (dict): dictionary containing information for accessing server
445
            path(str): path to local files
446

447
        Returns:
448
            None
449
        """
450
        execstr = (
×
451
            'lftp -u {},{}  {} -e "set ftp:proxy http://{}; mirror . {}; exit"'.format(
452
                server.get("username", ""),
453
                server.get("password", ""),
454
                server.get("host", ""),
455
                server.get("proxy", ""),
456
                path,
457
            )
458
        )
459
        os.system(execstr)
×
460

461
    def _get_parse_consent_code(self, dbgap_config={}):
1✔
462
        return dbgap_config.get(
1✔
463
            "parse_consent_code", True
464
        )  # Should this really be true?
465

466
    def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
1✔
467
        """
468
        parse csv files to python dict
469

470
        Args:
471
            file_dict: a dictionary with key(file path) and value(privileges)
472
            sess: sqlalchemy session
473
            dbgap_config: a dictionary containing information about the dbGaP sftp server
474
                (comes from fence config)
475
            encrypted: boolean indicating whether those files are encrypted
476

477

478
        Return:
479
            Tuple[[dict, dict]]:
480
                (user_project, user_info) where user_project is a mapping from
481
                usernames to project permissions and user_info is a mapping
482
                from usernames to user details, such as email
483

484
        Example:
485

486
            (
487
                {
488
                    username: {
489
                        'project1': {'read-storage','write-storage'},
490
                        'project2': {'read-storage'},
491
                    }
492
                },
493
                {
494
                    username: {
495
                        'email': 'email@mail.com',
496
                        'display_name': 'display name',
497
                        'phone_number': '123-456-789',
498
                        'tags': {'dbgap_role': 'PI'}
499
                    }
500
                },
501
            )
502

503
        """
504
        user_projects = dict()
1✔
505
        user_info = defaultdict(dict)
1✔
506

507
        # parse dbGaP sftp server information
508
        dbgap_key = dbgap_config.get("decrypt_key", None)
1✔
509

510
        self.id_patterns += (
1✔
511
            [
512
                item.replace("\\\\", "\\")
513
                for item in dbgap_config.get("allowed_whitelist_patterns", [])
514
            ]
515
            if dbgap_config.get("allow_non_dbGaP_whitelist", False)
516
            else []
517
        )
518

519
        enable_common_exchange_area_access = dbgap_config.get(
1✔
520
            "enable_common_exchange_area_access", False
521
        )
522
        study_common_exchange_areas = dbgap_config.get(
1✔
523
            "study_common_exchange_areas", {}
524
        )
525
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
526

527
        if parse_consent_code and enable_common_exchange_area_access:
1✔
528
            self.logger.info(
1✔
529
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
530
            )
531

532
        project_id_patterns = [r"phs(\d{6})"]
1✔
533
        if "additional_allowed_project_id_patterns" in dbgap_config:
1✔
534
            patterns = dbgap_config.get("additional_allowed_project_id_patterns")
1✔
535
            patterns = [
1✔
536
                pattern.replace("\\\\", "\\") for pattern in patterns
537
            ]  # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
538
            project_id_patterns += patterns
1✔
539

540
        self.logger.info(f"Using these file paths: {file_dict.items()}")
1✔
541
        for filepath, privileges in file_dict.items():
1✔
542
            self.logger.info("Reading file {}".format(filepath))
1✔
543
            if os.stat(filepath).st_size == 0:
1✔
544
                self.logger.warning("Empty file {}".format(filepath))
×
545
                continue
×
546
            if not self._match_pattern(
1✔
547
                filepath, id_patterns=self.id_patterns, encrypted=encrypted
548
            ):
549
                self.logger.warning(
1✔
550
                    "Filename {} does not match dbgap access control filename pattern;"
551
                    " this could mean that the filename has an invalid format, or has"
552
                    " an unexpected .enc extension, or lacks the .enc extension where"
553
                    " expected. This file is NOT being processed by usersync!".format(
554
                        filepath
555
                    )
556
                )
557
                continue
1✔
558

559
            with _read_file(
1✔
560
                filepath, encrypted=encrypted, key=dbgap_key, logger=self.logger
561
            ) as f:
562
                csv = DictReader(f, quotechar='"', skipinitialspace=True)
1✔
563
                for row in csv:
1✔
564
                    username = row.get("login") or ""
1✔
565
                    if username == "":
1✔
566
                        continue
×
567

568
                    if dbgap_config.get("allow_non_dbGaP_whitelist", False):
1✔
569
                        phsid = (
1✔
570
                            row.get("phsid") or (row.get("project_id") or "")
571
                        ).split(".")
572
                    else:
573
                        phsid = (row.get("phsid") or "").split(".")
1✔
574

575
                    dbgap_project = phsid[0]
1✔
576
                    # There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
577
                    skip = False
1✔
578
                    for pattern in project_id_patterns:
1✔
579
                        self.logger.debug(
1✔
580
                            "Checking pattern:{} with project_id:{}".format(
581
                                pattern, dbgap_project
582
                            )
583
                        )
584
                        if re.match(pattern, dbgap_project):
1✔
585
                            skip = False
1✔
586
                            break
1✔
587
                        else:
588
                            skip = True
1✔
589
                    if skip:
1✔
590
                        self.logger.warning(
1✔
591
                            "Skip processing from file {}, user {} with project {}".format(
592
                                filepath,
593
                                username,
594
                                dbgap_project,
595
                            )
596
                        )
597
                        continue
1✔
598
                    if len(phsid) > 1 and parse_consent_code:
1✔
599
                        consent_code = phsid[-1]
1✔
600

601
                        # c999 indicates full access to all consents and access
602
                        # to a study-specific exchange area
603
                        # access to at least one study-specific exchange area implies access
604
                        # to the parent study's common exchange area
605
                        #
606
                        # NOTE: Handling giving access to all consents is done at
607
                        #       a later time, when we have full information about possible
608
                        #       consents
609
                        self.logger.debug(
1✔
610
                            f"got consent code {consent_code} from dbGaP project "
611
                            f"{dbgap_project}"
612
                        )
613
                        if (
1✔
614
                            consent_code == "c999"
615
                            and enable_common_exchange_area_access
616
                            and dbgap_project in study_common_exchange_areas
617
                        ):
618
                            self.logger.info(
1✔
619
                                "found study with consent c999 and Fence "
620
                                "is configured to parse exchange area data. Giving user "
621
                                f"{username} {privileges} privileges in project: "
622
                                f"{study_common_exchange_areas[dbgap_project]}."
623
                            )
624
                            self._add_dbgap_project_for_user(
1✔
625
                                study_common_exchange_areas[dbgap_project],
626
                                privileges,
627
                                username,
628
                                sess,
629
                                user_projects,
630
                                dbgap_config,
631
                            )
632

633
                        dbgap_project += "." + consent_code
1✔
634

635
                    self._add_children_for_dbgap_project(
1✔
636
                        dbgap_project,
637
                        privileges,
638
                        username,
639
                        sess,
640
                        user_projects,
641
                        dbgap_config,
642
                    )
643

644
                    display_name = row.get("user name") or ""
1✔
645
                    tags = {"dbgap_role": row.get("role") or ""}
1✔
646

647
                    # some dbgap telemetry files have information about a researchers PI
648
                    if "downloader for" in row:
1✔
649
                        tags["pi"] = row["downloader for"]
1✔
650

651
                    # prefer name over previous "downloader for" if it exists
652
                    if "downloader for names" in row:
1✔
653
                        tags["pi"] = row["downloader for names"]
×
654

655
                    user_info[username] = {
1✔
656
                        "email": row.get("email") or user_info[username].get('email') or "",
657
                        "display_name": display_name,
658
                        "phone_number": row.get("phone") or user_info[username].get('phone_number') or "",
659
                        "tags": tags,
660
                    }
661

662
                    self._process_dbgap_project(
1✔
663
                        dbgap_project,
664
                        privileges,
665
                        username,
666
                        sess,
667
                        user_projects,
668
                        dbgap_config,
669
                    )
670

671
        return user_projects, user_info
1✔
672

673
    def _get_children(self, dbgap_project):
1✔
674
        return self.parent_to_child_studies_mapping.get(dbgap_project.split(".")[0])
1✔
675

676
    def _add_children_for_dbgap_project(
1✔
677
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
678
    ):
679
        """
680
        Adds the configured child studies for the given dbgap_project, adding it to the provided user_projects. If
681
        parse_consent_code is true, then the consents granted in the provided dbgap_project will also be granted to the
682
        child studies.
683
        """
684
        parent_phsid = dbgap_project
1✔
685
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
686
        child_suffix = ""
1✔
687
        if parse_consent_code and re.match(
1✔
688
            config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"], dbgap_project
689
        ):
690
            parent_phsid_parts = dbgap_project.split(".")
1✔
691
            parent_phsid = parent_phsid_parts[0]
1✔
692
            child_suffix = "." + parent_phsid_parts[1]
1✔
693

694
        if parent_phsid not in self.parent_to_child_studies_mapping:
1✔
695
            return
1✔
696

697
        self.logger.info(
1✔
698
            f"found parent study {parent_phsid} and Fence "
699
            "is configured to provide additional access to child studies. Giving user "
700
            f"{username} {privileges} privileges in projects: "
701
            f"{{k + child_suffix: v + child_suffix for k, v in self.parent_to_child_studies_mapping.items()}}."
702
        )
703
        child_studies = self.parent_to_child_studies_mapping.get(parent_phsid, [])
1✔
704
        for child_study in child_studies:
1✔
705
            self._add_dbgap_project_for_user(
1✔
706
                child_study + child_suffix,
707
                privileges,
708
                username,
709
                sess,
710
                user_projects,
711
                dbgap_config,
712
            )
713

714
    def _add_dbgap_project_for_user(
1✔
715
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
716
    ):
717
        """
718
        Helper function for csv parsing that adds a given dbgap project to Fence/Arborist
719
        and then updates the dictionary containing all user's project access
720
        """
721
        if dbgap_project not in self._projects:
1✔
722
            self.logger.debug(
1✔
723
                "creating Project in fence for dbGaP study: {}".format(dbgap_project)
724
            )
725

726
            project = self._get_or_create(sess, Project, auth_id=dbgap_project)
1✔
727

728
            # need to add dbgap project to arborist
729
            if self.arborist_client:
1✔
730
                self._determine_arborist_resource(dbgap_project, dbgap_config)
1✔
731

732
            if project.name is None:
1✔
733
                project.name = dbgap_project
1✔
734
            self._projects[dbgap_project] = project
1✔
735
        phsid_privileges = {dbgap_project: set(privileges)}
1✔
736
        if username in user_projects:
1✔
737
            user_projects[username].update(phsid_privileges)
1✔
738
        else:
739
            user_projects[username] = phsid_privileges
1✔
740

741
    @staticmethod
1✔
742
    def sync_two_user_info_dict(user_info1, user_info2):
1✔
743
        """
744
        Merge user_info1 into user_info2. Values in user_info2 are overriden
745
        by values in user_info1. user_info2 ends up containing the merged dict.
746

747
        Args:
748
            user_info1 (dict): nested dict
749
            user_info2 (dict): nested dict
750

751
            Example:
752
            {username: {'email': 'abc@email.com'}}
753

754
        Returns:
755
            None
756
        """
757
        user_info2.update(user_info1)
1✔
758

759
    def sync_two_phsids_dict(
1✔
760
        self,
761
        phsids1,
762
        phsids2,
763
        source1=None,
764
        source2=None,
765
        phsids2_overrides_phsids1=True,
766
    ):
767
        """
768
        Merge phsids1 into phsids2. If `phsids2_overrides_phsids1`, values in
769
        phsids1 are overriden by values in phsids2. phsids2 ends up containing
770
        the merged dict (see explanation below).
771
        `source1` and `source2`: for logging.
772

773
        Args:
774
            phsids1, phsids2: nested dicts mapping phsids to sets of permissions
775

776
            source1, source2: source of authz information (eg. dbgap, user_yaml, visas)
777

778
            Example:
779
            {
780
                username: {
781
                    phsid1: {'read-storage','write-storage'},
782
                    phsid2: {'read-storage'},
783
                }
784
            }
785

786
        Return:
787
            None
788

789
        Explanation:
790
            Consider merging projects of the same user:
791

792
                {user1: {phsid1: privillege1}}
793

794
                {user1: {phsid2: privillege2}}
795

796
            case 1: phsid1 != phsid2. Output:
797

798
                {user1: {phsid1: privillege1, phsid2: privillege2}}
799

800
            case 2: phsid1 == phsid2 and privillege1! = privillege2. Output:
801

802
                {user1: {phsid1: union(privillege1, privillege2)}}
803

804
            For the other cases, just simple addition
805
        """
806

807
        for user, projects1 in phsids1.items():
1✔
808
            if not phsids2.get(user):
1✔
809
                if source1:
1✔
810
                    self.auth_source[user].add(source1)
1✔
811
                phsids2[user] = projects1
1✔
812
            elif phsids2_overrides_phsids1:
1✔
813
                if source1:
1✔
814
                    self.auth_source[user].add(source1)
×
815
                if source2:
1✔
816
                    self.auth_source[user].add(source2)
×
817
                for phsid1, privilege1 in projects1.items():
1✔
818
                    if phsid1 not in phsids2[user]:
1✔
819
                        phsids2[user][phsid1] = set()
1✔
820
                    phsids2[user][phsid1].update(privilege1)
1✔
821
            elif source2:
×
822
                self.auth_source[user].add(source2)
×
823

824
    def sync_to_db_and_storage_backend(
1✔
825
        self,
826
        user_project,
827
        user_info,
828
        sess,
829
        do_not_revoke_from_db_and_storage=False,
830
        expires=None,
831
    ):
832
        """
833
        sync user access control to database and storage backend
834

835
        Args:
836
            user_project (dict): a dictionary of
837

838
                {
839
                    username: {
840
                        'project1': {'read-storage','write-storage'},
841
                        'project2': {'read-storage'}
842
                    }
843
                }
844

845
            user_info (dict): a dictionary of {username: user_info{}}
846
            sess: a sqlalchemy session
847

848
        Return:
849
            None
850
        """
851
        google_bulk_mapping = None
1✔
852
        if config["GOOGLE_BULK_UPDATES"]:
1✔
853
            google_bulk_mapping = {}
1✔
854

855
        self._init_projects(user_project, sess)
1✔
856

857
        auth_provider_list = [
1✔
858
            self._get_or_create(sess, AuthorizationProvider, name="dbGaP"),
859
            self._get_or_create(sess, AuthorizationProvider, name="fence"),
860
        ]
861

862
        cur_db_user_project_list = {
1✔
863
            (ua.user.username.lower(), ua.project.auth_id)
864
            for ua in sess.query(AccessPrivilege).all()
865
        }
866

867
        # we need to compare db -> whitelist case-insensitively for username.
868
        # db stores case-sensitively, but we need to query case-insensitively
869
        user_project_lowercase = {}
1✔
870
        syncing_user_project_list = set()
1✔
871
        for username, projects in user_project.items():
1✔
872
            user_project_lowercase[username.lower()] = projects
1✔
873
            for project, _ in projects.items():
1✔
874
                syncing_user_project_list.add((username.lower(), project))
1✔
875

876
        user_info_lowercase = {
1✔
877
            username.lower(): info for username, info in user_info.items()
878
        }
879

880
        to_delete = set.difference(cur_db_user_project_list, syncing_user_project_list)
1✔
881
        to_add = set.difference(syncing_user_project_list, cur_db_user_project_list)
1✔
882
        to_update = set.intersection(
1✔
883
            cur_db_user_project_list, syncing_user_project_list
884
        )
885

886
        # when updating users we want to maintain case sesitivity in the username so
887
        # pass the original, non-lowered user_info dict
888
        self._upsert_userinfo(sess, user_info)
1✔
889

890
        if not do_not_revoke_from_db_and_storage:
1✔
891
            self._revoke_from_storage(
1✔
892
                to_delete, sess, google_bulk_mapping=google_bulk_mapping
893
            )
894
            self._revoke_from_db(sess, to_delete)
1✔
895

896
        self._grant_from_storage(
1✔
897
            to_add,
898
            user_project_lowercase,
899
            sess,
900
            google_bulk_mapping=google_bulk_mapping,
901
            expires=expires,
902
        )
903

904
        self._grant_from_db(
1✔
905
            sess,
906
            to_add,
907
            user_info_lowercase,
908
            user_project_lowercase,
909
            auth_provider_list,
910
        )
911

912
        # re-grant
913
        self._grant_from_storage(
1✔
914
            to_update,
915
            user_project_lowercase,
916
            sess,
917
            google_bulk_mapping=google_bulk_mapping,
918
            expires=expires,
919
        )
920
        self._update_from_db(sess, to_update, user_project_lowercase)
1✔
921

922
        if not do_not_revoke_from_db_and_storage:
1✔
923
            self._validate_and_update_user_admin(sess, user_info_lowercase)
1✔
924

925
        sess.commit()
1✔
926

927
        if config["GOOGLE_BULK_UPDATES"]:
1✔
928
            self.logger.info("Doing bulk Google update...")
1✔
929
            update_google_groups_for_users(google_bulk_mapping)
1✔
930
            self.logger.info("Bulk Google update done!")
×
931

932
        sess.commit()
1✔
933

934
    def sync_to_storage_backend(self, user_project, user_info, sess, expires):
1✔
935
        """
936
        sync user access control to storage backend with given expiration
937

938
        Args:
939
            user_project (dict): a dictionary of
940

941
                {
942
                    username: {
943
                        'project1': {'read-storage','write-storage'},
944
                        'project2': {'read-storage'}
945
                    }
946
                }
947

948
            user_info (dict): a dictionary of attributes for a user.
949
            sess: a sqlalchemy session
950

951
        Return:
952
            None
953
        """
954
        if not expires:
1✔
955
            raise Exception(
×
956
                f"sync to storage backend requires an expiration. you provided: {expires}"
957
            )
958

959
        google_group_user_mapping = None
1✔
960
        if config["GOOGLE_BULK_UPDATES"]:
1✔
961
            google_group_user_mapping = {}
×
962
            get_or_create_proxy_group_id(
×
963
                expires=expires,
964
                user_id=user_info['user_id'],
965
                username=user_info['username'],
966
                session=sess,
967
                storage_manager=self.storage_manager
968
            )
969

970
        # TODO: eventually it'd be nice to remove this step but it's required
971
        #       so that grant_from_storage can determine what storage backends
972
        #       are needed for a project.
973
        self._init_projects(user_project, sess)
1✔
974

975
        # we need to compare db -> whitelist case-insensitively for username.
976
        # db stores case-sensitively, but we need to query case-insensitively
977
        user_project_lowercase = {}
1✔
978
        syncing_user_project_list = set()
1✔
979
        for username, projects in user_project.items():
1✔
980
            user_project_lowercase[username.lower()] = projects
1✔
981
            for project, _ in projects.items():
1✔
982
                syncing_user_project_list.add((username.lower(), project))
1✔
983

984

985
        to_add = set(syncing_user_project_list)
1✔
986

987
        # when updating users we want to maintain case sensitivity in the username so
988
        # pass the original, non-lowered user_info dict
989
        self._upsert_userinfo(sess, {
1✔
990
            user_info['username'].lower(): user_info
991
        })
992

993
        self._grant_from_storage(
1✔
994
            to_add,
995
            user_project_lowercase,
996
            sess,
997
            google_bulk_mapping=google_group_user_mapping,
998
            expires=expires,
999
        )
1000

1001
        if config["GOOGLE_BULK_UPDATES"]:
1✔
1002
            self.logger.info("Updating user's google groups ...")
×
1003
            update_google_groups_for_users(google_group_user_mapping)
×
1004
            self.logger.info("Google groups update done!!")
×
1005

1006
        sess.commit()
1✔
1007

1008
    def _revoke_from_db(self, sess, to_delete):
1✔
1009
        """
1010
        Revoke user access to projects in the auth database
1011

1012
        Args:
1013
            sess: sqlalchemy session
1014
            to_delete: a set of (username, project.auth_id) to be revoked from db
1015
        Return:
1016
            None
1017
        """
1018
        for username, project_auth_id in to_delete:
1✔
1019
            q = (
1✔
1020
                sess.query(AccessPrivilege)
1021
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1022
                .join(AccessPrivilege.user)
1023
                .filter(func.lower(User.username) == username)
1024
                .all()
1025
            )
1026
            for access in q:
1✔
1027
                self.logger.info(
1✔
1028
                    "revoke {} access to {} in db".format(username, project_auth_id)
1029
                )
1030
                sess.delete(access)
1✔
1031

1032
    def _validate_and_update_user_admin(self, sess, user_info):
1✔
1033
        """
1034
        Make sure there is no admin user that is not in yaml/csv files
1035

1036
        Args:
1037
            sess: sqlalchemy session
1038
            user_info: a dict of
1039
            {
1040
                username: {
1041
                    'email': email,
1042
                    'display_name': display_name,
1043
                    'phone_number': phonenum,
1044
                    'tags': {'k1':'v1', 'k2': 'v2'}
1045
                    'admin': is_admin
1046
                }
1047
            }
1048
        Returns:
1049
            None
1050
        """
1051
        for admin_user in sess.query(User).filter_by(is_admin=True).all():
1✔
1052
            if admin_user.username.lower() not in user_info:
1✔
1053
                admin_user.is_admin = False
×
1054
                sess.add(admin_user)
×
1055
                self.logger.info(
×
1056
                    "remove admin access from {} in db".format(
1057
                        admin_user.username.lower()
1058
                    )
1059
                )
1060

1061
    def _update_from_db(self, sess, to_update, user_project):
1✔
1062
        """
1063
        Update user access to projects in the auth database
1064

1065
        Args:
1066
            sess: sqlalchemy session
1067
            to_update:
1068
                a set of (username, project.auth_id) to be updated from db
1069

1070
        Return:
1071
            None
1072
        """
1073

1074
        for username, project_auth_id in to_update:
1✔
1075
            q = (
1✔
1076
                sess.query(AccessPrivilege)
1077
                .filter(AccessPrivilege.project.has(auth_id=project_auth_id))
1078
                .join(AccessPrivilege.user)
1079
                .filter(func.lower(User.username) == username)
1080
                .all()
1081
            )
1082
            for access in q:
1✔
1083
                access.privilege = user_project[username][project_auth_id]
1✔
1084
                self.logger.info(
1✔
1085
                    "update {} with {} access to {} in db".format(
1086
                        username, access.privilege, project_auth_id
1087
                    )
1088
                )
1089

1090
    def _grant_from_db(self, sess, to_add, user_info, user_project, auth_provider_list):
1✔
1091
        """
1092
        Grant user access to projects in the auth database
1093
        Args:
1094
            sess: sqlalchemy session
1095
            to_add: a set of (username, project.auth_id) to be granted
1096
            user_project:
1097
                a dictionary of {username: {project: {'read','write'}}
1098
        Return:
1099
            None
1100
        """
1101
        for username, project_auth_id in to_add:
1✔
1102
            u = query_for_user(session=sess, username=username)
1✔
1103

1104
            auth_provider = auth_provider_list[0]
1✔
1105
            if "dbgap_role" not in user_info[username]["tags"]:
1✔
1106
                auth_provider = auth_provider_list[1]
1✔
1107
            user_access = AccessPrivilege(
1✔
1108
                user=u,
1109
                project=self._projects[project_auth_id],
1110
                privilege=list(user_project[username][project_auth_id]),
1111
                auth_provider=auth_provider,
1112
            )
1113
            self.logger.info(
1✔
1114
                "grant user {} to {} with access {}".format(
1115
                    username, user_access.project, user_access.privilege
1116
                )
1117
            )
1118
            sess.add(user_access)
1✔
1119

1120
    def _upsert_userinfo(self, sess, user_info):
1✔
1121
        """
1122
        update user info to database.
1123

1124
        Args:
1125
            sess: sqlalchemy session
1126
            user_info:
1127
                a dict of {username: {display_name, phone_number, tags, admin}
1128

1129
        Return:
1130
            None
1131
        """
1132

1133
        for username in user_info:
1✔
1134
            u = query_for_user(session=sess, username=username)
1✔
1135

1136
            if u is None:
1✔
1137
                self.logger.info("create user {}".format(username))
1✔
1138
                u = User(username=username)
1✔
1139
                sess.add(u)
1✔
1140

1141
            if self.arborist_client:
1✔
1142
                self.arborist_client.create_user({"name": username})
1✔
1143

1144
            u.email = user_info[username].get("email", "")
1✔
1145
            u.display_name = user_info[username].get("display_name", "")
1✔
1146
            u.phone_number = user_info[username].get("phone_number", "")
1✔
1147
            u.is_admin = user_info[username].get("admin", False)
1✔
1148

1149
            idp_name = user_info[username].get("idp_name", "")
1✔
1150
            if idp_name and not u.identity_provider:
1✔
1151
                idp = (
×
1152
                    sess.query(IdentityProvider)
1153
                    .filter(IdentityProvider.name == idp_name)
1154
                    .first()
1155
                )
1156
                if not idp:
×
1157
                    idp = IdentityProvider(name=idp_name)
×
1158
                u.identity_provider = idp
×
1159

1160
            # do not update if there is no tag
1161
            if not user_info[username].get("tags"):
1✔
1162
                continue
1✔
1163

1164
            # remove user db tags if they are not shown in new tags
1165
            for tag in u.tags:
1✔
1166
                if tag.key not in user_info[username]["tags"]:
1✔
1167
                    u.tags.remove(tag)
1✔
1168

1169
            # sync
1170
            for k, v in user_info[username]["tags"].items():
1✔
1171
                found = False
1✔
1172
                for tag in u.tags:
1✔
1173
                    if tag.key == k:
1✔
1174
                        found = True
1✔
1175
                        tag.value = v
1✔
1176
                # create new tag if not found
1177
                if not found:
1✔
1178
                    tag = Tag(key=k, value=v)
1✔
1179
                    u.tags.append(tag)
1✔
1180

1181
    def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None):
1✔
1182
        """
1183
        If a project have storage backend, revoke user's access to buckets in
1184
        the storage backend.
1185

1186
        Args:
1187
            to_delete: a set of (username, project.auth_id) to be revoked
1188

1189
        Return:
1190
            None
1191
        """
1192
        for username, project_auth_id in to_delete:
1✔
1193
            project = (
1✔
1194
                sess.query(Project).filter(Project.auth_id == project_auth_id).first()
1195
            )
1196
            for sa in project.storage_access:
1✔
1197
                if not hasattr(self, "storage_manager"):
1✔
1198
                    self.logger.error(
×
1199
                        (
1200
                            "CANNOT revoke {} access to {} in {} because there is NO "
1201
                            "configured storage accesses at all. See configuration. "
1202
                            "Continuing anyway..."
1203
                        ).format(username, project_auth_id, sa.provider.name)
1204
                    )
1205
                    continue
×
1206

1207
                self.logger.info(
1✔
1208
                    "revoke {} access to {} in {}".format(
1209
                        username, project_auth_id, sa.provider.name
1210
                    )
1211
                )
1212
                self.storage_manager.revoke_access(
1✔
1213
                    provider=sa.provider.name,
1214
                    username=username,
1215
                    project=project,
1216
                    session=sess,
1217
                    google_bulk_mapping=google_bulk_mapping,
1218
                )
1219

1220
    def _grant_from_storage(
1✔
1221
        self, to_add, user_project, sess, google_bulk_mapping=None, expires=None
1222
    ):
1223
        """
1224
        If a project have storage backend, grant user's access to buckets in
1225
        the storage backend.
1226

1227
        Args:
1228
            to_add: a set of (username, project.auth_id)  to be granted
1229
            user_project: a dictionary like:
1230

1231
                    {username: {phsid: {'read-storage','write-storage'}}}
1232

1233
        Return:
1234
            dict of the users' storage usernames to their user_projects and the respective storage access.
1235
        """
1236
        storage_user_to_sa_and_user_project = defaultdict()
1✔
1237
        for username, project_auth_id in to_add:
1✔
1238
            project = self._projects[project_auth_id]
1✔
1239
            for sa in project.storage_access:
1✔
1240
                access = list(user_project[username][project_auth_id])
1✔
1241
                if not hasattr(self, "storage_manager"):
1✔
1242
                    self.logger.error(
×
1243
                        (
1244
                            "CANNOT grant {} access {} to {} in {} because there is NO "
1245
                            "configured storage accesses at all. See configuration. "
1246
                            "Continuing anyway..."
1247
                        ).format(username, access, project_auth_id, sa.provider.name)
1248
                    )
1249
                    continue
×
1250

1251
                self.logger.info(
1✔
1252
                    "grant {} access {} to {} in {}".format(
1253
                        username, access, project_auth_id, sa.provider.name
1254
                    )
1255
                )
1256
                storage_username = self.storage_manager.grant_access(
1✔
1257
                    provider=sa.provider.name,
1258
                    username=username,
1259
                    project=project,
1260
                    access=access,
1261
                    session=sess,
1262
                    google_bulk_mapping=google_bulk_mapping,
1263
                    expires=expires,
1264
                )
1265

1266
                storage_user_to_sa_and_user_project[storage_username] = (sa, project)
1✔
1267
        return storage_user_to_sa_and_user_project
1✔
1268

1269
    def _init_projects(self, user_project, sess):
1✔
1270
        """
1271
        initialize projects
1272
        """
1273
        if self.project_mapping:
1✔
1274
            for projects in list(self.project_mapping.values()):
1✔
1275
                for p in projects:
1✔
1276
                    self.logger.debug(
1✔
1277
                        "creating Project with info from project_mapping: {}".format(p)
1278
                    )
1279
                    project = self._get_or_create(sess, Project, **p)
1✔
1280
                    self._projects[p["auth_id"]] = project
1✔
1281
        for _, projects in user_project.items():
1✔
1282
            for auth_id in list(projects.keys()):
1✔
1283
                project = sess.query(Project).filter(Project.auth_id == auth_id).first()
1✔
1284
                if not project:
1✔
1285
                    data = {"name": auth_id, "auth_id": auth_id}
1✔
1286
                    try:
1✔
1287
                        project = self._get_or_create(sess, Project, **data)
1✔
1288
                    except IntegrityError as e:
×
1289
                        sess.rollback()
×
1290
                        self.logger.error(
×
1291
                            f"Project {auth_id} already exists. Detail {str(e)}"
1292
                        )
1293
                        raise Exception(
×
1294
                            "Project {} already exists. Detail {}. Please contact your system administrator.".format(
1295
                                auth_id, str(e)
1296
                            )
1297
                        )
1298
                if auth_id not in self._projects:
1✔
1299
                    self._projects[auth_id] = project
1✔
1300

1301
    @staticmethod
1✔
1302
    def _get_or_create(sess, model, **kwargs):
1✔
1303
        instance = sess.query(model).filter_by(**kwargs).first()
1✔
1304
        if not instance:
1✔
1305
            instance = model(**kwargs)
1✔
1306
            sess.add(instance)
1✔
1307
        return instance
1✔
1308

1309
    def _process_dbgap_files(self, dbgap_config, sess):
1✔
1310
        """
1311
        Args:
1312
            dbgap_config : a dictionary containing information about a single
1313
                           dbgap sftp server (from fence config)
1314
            sess: database session
1315

1316
        Return:
1317
            user_projects (dict)
1318
            user_info (dict)
1319
        """
1320
        dbgap_file_list = []
1✔
1321
        hostname = dbgap_config["info"]["host"]
1✔
1322
        username = dbgap_config["info"]["username"]
1✔
1323
        encrypted = dbgap_config["info"].get("encrypted", True)
1✔
1324
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1325

1326
        try:
1✔
1327
            if os.path.exists(folderdir):
1✔
1328
                dbgap_file_list = glob.glob(
×
1329
                    os.path.join(folderdir, "*")
1330
                )  # get lists of file from folder
1331
            else:
1332
                self.logger.info("Downloading files from: {}".format(hostname))
1✔
1333
                dbgap_file_list = self._download(dbgap_config)
1✔
1334
        except Exception as e:
1✔
1335
            self.logger.error(e)
1✔
1336
            exit(1)
1✔
1337
        self.logger.info("dbgap files: {}".format(dbgap_file_list))
×
1338
        user_projects, user_info = self._get_user_permissions_from_csv_list(
×
1339
            dbgap_file_list,
1340
            encrypted=encrypted,
1341
            session=sess,
1342
            dbgap_config=dbgap_config,
1343
        )
1344

1345
        user_projects = self.parse_projects(user_projects)
×
1346
        return user_projects, user_info
×
1347

1348
    def _get_user_permissions_from_csv_list(
1✔
1349
        self, file_list, encrypted, session, dbgap_config={}
1350
    ):
1351
        """
1352
        Args:
1353
            file_list: list of files (represented as strings)
1354
            encrypted: boolean indicating whether those files are encrypted
1355
            session: sqlalchemy session
1356
            dbgap_config: a dictionary containing information about the dbGaP sftp server
1357
                    (comes from fence config)
1358

1359
        Return:
1360
            user_projects (dict)
1361
            user_info (dict)
1362
        """
1363
        permissions = [{"read-storage", "read"} for _ in file_list]
1✔
1364
        user_projects, user_info = self._parse_csv(
1✔
1365
            dict(list(zip(file_list, permissions))),
1366
            sess=session,
1367
            dbgap_config=dbgap_config,
1368
            encrypted=encrypted,
1369
        )
1370
        return user_projects, user_info
1✔
1371

1372
    def _merge_multiple_local_csv_files(
1✔
1373
        self, dbgap_file_list, encrypted, dbgap_configs, session
1374
    ):
1375
        """
1376
        Args:
1377
            dbgap_file_list (list): a list of whitelist file locations stored locally
1378
            encrypted (bool): whether the file is encrypted (comes from fence config)
1379
            dbgap_configs (list): list of dictionaries containing information about the dbgap server (comes from fence config)
1380
            session (sqlalchemy.Session): database session
1381

1382
        Return:
1383
            merged_user_projects (dict)
1384
            merged_user_info (dict)
1385
        """
1386
        merged_user_projects = {}
1✔
1387
        merged_user_info = {}
1✔
1388

1389
        for dbgap_config in dbgap_configs:
1✔
1390
            user_projects, user_info = self._get_user_permissions_from_csv_list(
1✔
1391
                dbgap_file_list,
1392
                encrypted,
1393
                session=session,
1394
                dbgap_config=dbgap_config,
1395
            )
1396
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1397
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1398
        return merged_user_projects, merged_user_info
1✔
1399

1400
    def _merge_multiple_dbgap_sftp(self, dbgap_servers, sess):
1✔
1401
        """
1402
        Args:
1403
            dbgap_servers : a list of dictionaries each containging config on
1404
                           dbgap sftp server (comes from fence config)
1405
            sess: database session
1406

1407
        Return:
1408
            merged_user_projects (dict)
1409
            merged_user_info (dict)
1410
        """
1411
        merged_user_projects = {}
1✔
1412
        merged_user_info = {}
1✔
1413
        for dbgap in dbgap_servers:
1✔
1414
            user_projects, user_info = self._process_dbgap_files(dbgap, sess)
1✔
1415
            # merge into merged_user_info
1416
            # user_info overrides original info in merged_user_info
1417
            self.sync_two_user_info_dict(user_info, merged_user_info)
1✔
1418

1419
            # merge all access info dicts into "merged_user_projects".
1420
            # the access info is combined - if the user_projects access is
1421
            # ["read"] and the merged_user_projects is ["read-storage"], the
1422
            # resulting access is ["read", "read-storage"].
1423
            self.sync_two_phsids_dict(user_projects, merged_user_projects)
1✔
1424
        return merged_user_projects, merged_user_info
1✔
1425

1426
    def parse_projects(self, user_projects):
1✔
1427
        """
1428
        helper function for parsing projects
1429
        """
1430
        return {key.lower(): value for key, value in user_projects.items()}
1✔
1431

1432
    def _process_dbgap_project(
1✔
1433
        self, dbgap_project, privileges, username, sess, user_projects, dbgap_config
1434
    ):
1435
        if dbgap_project not in self.project_mapping:
1✔
1436
            self._add_dbgap_project_for_user(
1✔
1437
                dbgap_project,
1438
                privileges,
1439
                username,
1440
                sess,
1441
                user_projects,
1442
                dbgap_config,
1443
            )
1444

1445
        for element_dict in self.project_mapping.get(dbgap_project, []):
1✔
1446
            try:
1✔
1447
                phsid_privileges = {element_dict["auth_id"]: set(privileges)}
1✔
1448

1449
                # need to add dbgap project to arborist
1450
                if self.arborist_client:
1✔
1451
                    self._determine_arborist_resource(
1✔
1452
                        element_dict["auth_id"], dbgap_config
1453
                    )
1454

1455
                if username not in user_projects:
1✔
1456
                    user_projects[username] = {}
1✔
1457
                user_projects[username].update(phsid_privileges)
1✔
1458

1459
            except ValueError as e:
×
1460
                self.logger.info(e)
×
1461

1462
    def _process_user_projects(
1✔
1463
        self,
1464
        user_projects,
1465
        enable_common_exchange_area_access,
1466
        study_common_exchange_areas,
1467
        dbgap_config,
1468
        sess,
1469
    ):
1470
        for username in user_projects.keys():
1✔
1471
            for project in user_projects[username].keys():
1✔
1472
                phsid = project.split(".")
1✔
1473
                dbgap_project = phsid[0]
1✔
1474
                privileges = user_projects[username][project]
1✔
1475
                if len(phsid) > 1 and self._get_parse_consent_code(dbgap_config):
1✔
1476
                    consent_code = phsid[-1]
1✔
1477

1478
                    # c999 indicates full access to all consents and access
1479
                    # to a study-specific exchange area
1480
                    # access to at least one study-specific exchange area implies access
1481
                    # to the parent study's common exchange area
1482
                    #
1483
                    # NOTE: Handling giving access to all consents is done at
1484
                    #       a later time, when we have full information about possible
1485
                    #       consents
1486
                    self.logger.debug(
1✔
1487
                        f"got consent code {consent_code} from dbGaP project "
1488
                        f"{dbgap_project}"
1489
                    )
1490
                    if (
1✔
1491
                        consent_code == "c999"
1492
                        and enable_common_exchange_area_access
1493
                        and dbgap_project in study_common_exchange_areas
1494
                    ):
1495
                        self.logger.info(
×
1496
                            "found study with consent c999 and Fence "
1497
                            "is configured to parse exchange area data. Giving user "
1498
                            f"{username} {privileges} privileges in project: "
1499
                            f"{study_common_exchange_areas[dbgap_project]}."
1500
                        )
1501
                        self._add_dbgap_project_for_user(
×
1502
                            study_common_exchange_areas[dbgap_project],
1503
                            privileges,
1504
                            username,
1505
                            sess,
1506
                            user_projects,
1507
                            dbgap_config,
1508
                        )
1509

1510
                    dbgap_project += "." + consent_code
1✔
1511

1512
                self._process_dbgap_project(
1✔
1513
                    dbgap_project,
1514
                    privileges,
1515
                    username,
1516
                    sess,
1517
                    user_projects,
1518
                    dbgap_config,
1519
                )
1520

1521
    def sync(self):
1✔
1522
        if self.session:
1✔
1523
            self._sync(self.session)
1✔
1524
        else:
1525
            with self.driver.session as s:
×
1526
                self._sync(s)
×
1527

1528
    def download(self):
1✔
1529
        for dbgap_server in self.dbGaP:
×
1530
            self._download(dbgap_server)
×
1531

1532
    def _download(self, dbgap_config):
1✔
1533
        """
1534
        Download files from dbgap server.
1535
        """
1536
        server = dbgap_config["info"]
1✔
1537
        protocol = dbgap_config["protocol"]
1✔
1538
        hostname = server["host"]
1✔
1539
        username = server["username"]
1✔
1540
        folderdir = os.path.join(str(self.folder), str(hostname), str(username))
1✔
1541

1542
        if not os.path.exists(folderdir):
1✔
1543
            os.makedirs(folderdir)
1✔
1544

1545
        self.logger.info("Download from server")
1✔
1546
        try:
1✔
1547
            if protocol == "sftp":
1✔
1548
                self._get_from_sftp_with_proxy(server, folderdir)
1✔
1549
            else:
1550
                self._get_from_ftp_with_proxy(server, folderdir)
×
1551
            dbgap_files = glob.glob(os.path.join(folderdir, "*"))
×
1552
            return dbgap_files
×
1553
        except Exception as e:
1✔
1554
            self.logger.error(e)
1✔
1555
            raise
1✔
1556

1557
    def _sync(self, sess):
1✔
1558
        """
1559
        Collect files from dbgap server(s), sync csv and yaml files to storage
1560
        backend and fence DB
1561
        """
1562

1563
        # get all dbgap files
1564
        user_projects = {}
1✔
1565
        user_info = {}
1✔
1566
        if self.is_sync_from_dbgap_server:
1✔
1567
            self.logger.debug(
1✔
1568
                "Pulling telemetry files from {} dbgap sftp servers".format(
1569
                    len(self.dbGaP)
1570
                )
1571
            )
1572
            user_projects, user_info = self._merge_multiple_dbgap_sftp(self.dbGaP, sess)
1✔
1573

1574
        local_csv_file_list = []
1✔
1575
        if self.sync_from_local_csv_dir:
1✔
1576
            local_csv_file_list = glob.glob(
1✔
1577
                os.path.join(self.sync_from_local_csv_dir, "*")
1578
            )
1579
            # Sort the list so the order of of files is consistent across platforms
1580
            local_csv_file_list.sort()
1✔
1581

1582
        user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files(
1✔
1583
            local_csv_file_list,
1584
            encrypted=False,
1585
            session=sess,
1586
            dbgap_configs=self.dbGaP,
1587
        )
1588

1589
        try:
1✔
1590
            user_yaml = UserYAML.from_file(
1✔
1591
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
1592
            )
1593
        except (EnvironmentError, AssertionError) as e:
1✔
1594
            self.logger.error(str(e))
1✔
1595
            self.logger.error("aborting early")
1✔
1596
            raise
1✔
1597

1598
        # parse all projects
1599
        user_projects_csv = self.parse_projects(user_projects_csv)
1✔
1600
        user_projects = self.parse_projects(user_projects)
1✔
1601
        user_yaml.projects = self.parse_projects(user_yaml.projects)
1✔
1602

1603
        # merge all user info dicts into "user_info".
1604
        # the user info (such as email) in the user.yaml files
1605
        # overrides the user info from the CSV files.
1606
        self.sync_two_user_info_dict(user_info_csv, user_info)
1✔
1607
        self.sync_two_user_info_dict(user_yaml.user_info, user_info)
1✔
1608

1609
        # merge all access info dicts into "user_projects".
1610
        # the access info is combined - if the user.yaml access is
1611
        # ["read"] and the CSV file access is ["read-storage"], the
1612
        # resulting access is ["read", "read-storage"].
1613
        self.sync_two_phsids_dict(
1✔
1614
            user_projects_csv, user_projects, source1="local_csv", source2="dbgap"
1615
        )
1616
        self.sync_two_phsids_dict(
1✔
1617
            user_yaml.projects, user_projects, source1="user_yaml", source2="dbgap"
1618
        )
1619

1620
        # Note: if there are multiple dbgap sftp servers configured
1621
        # this parameter is always from the config for the first dbgap sftp server
1622
        # not any additional ones
1623
        for dbgap_config in self.dbGaP:
1✔
1624
            if self._get_parse_consent_code(dbgap_config):
1✔
1625
                self._grant_all_consents_to_c999_users(
1✔
1626
                    user_projects, user_yaml.project_to_resource
1627
                )
1628

1629
        google_update_ex = None
1✔
1630

1631
        try:
1✔
1632
            # update the Fence DB
1633
            if user_projects:
1✔
1634
                self.logger.info("Sync to db and storage backend")
1✔
1635
                self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
1✔
1636
                self.logger.info("Finish syncing to db and storage backend")
1✔
1637
            else:
1638
                self.logger.info("No users for syncing")
×
1639
        except GoogleUpdateException as ex:
1✔
1640
            # save this to reraise later after all non-Google syncing has finished
1641
            # this way, any issues with Google only affect Google data access and don't
1642
            # cascade problems into non-Google AWS or Azure access
1643
            google_update_ex = ex
1✔
1644

1645
        # update the Arborist DB (resources, roles, policies, groups)
1646
        if user_yaml.authz:
1✔
1647
            if not self.arborist_client:
1✔
1648
                raise EnvironmentError(
×
1649
                    "yaml file contains authz section but sync is not configured with"
1650
                    " arborist client--did you run sync with --arborist <arborist client> arg?"
1651
                )
1652
            self.logger.info("Synchronizing arborist...")
1✔
1653
            success = self._update_arborist(sess, user_yaml)
1✔
1654
            if success:
1✔
1655
                self.logger.info("Finished synchronizing arborist")
1✔
1656
            else:
1657
                self.logger.error("Could not synchronize successfully")
×
1658
                exit(1)
×
1659
        else:
1660
            self.logger.info("No `authz` section; skipping arborist sync")
×
1661

1662
        # update the Arborist DB (user access)
1663
        if self.arborist_client:
1✔
1664
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
1665
            success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
1✔
1666
            if success:
1✔
1667
                self.logger.info(
1✔
1668
                    "Finished synchronizing authorization info to arborist"
1669
                )
1670
            else:
1671
                self.logger.error(
×
1672
                    "Could not synchronize authorization info successfully to arborist"
1673
                )
1674
                exit(1)
×
1675
        else:
1676
            self.logger.error("No arborist client set; skipping arborist sync")
×
1677

1678
        # Logging authz source
1679
        for u, s in self.auth_source.items():
1✔
1680
            self.logger.info("Access for user {} from {}".format(u, s))
1✔
1681

1682
        self.logger.info(
1✔
1683
            f"Persisting authz mapping to database: {user_yaml.project_to_resource}"
1684
        )
1685
        user_yaml.persist_project_to_resource(db_session=sess)
1✔
1686
        if google_update_ex is not None:
1✔
1687
            raise google_update_ex
1✔
1688

1689
    def _grant_all_consents_to_c999_users(
1✔
1690
        self, user_projects, user_yaml_project_to_resources
1691
    ):
1692
        access_number_matcher = re.compile(config["DBGAP_ACCESSION_WITH_CONSENT_REGEX"])
1✔
1693
        # combine dbgap/user.yaml projects into one big list (in case not all consents
1694
        # are in either)
1695
        all_projects = set(
1✔
1696
            list(self._projects.keys()) + list(user_yaml_project_to_resources.keys())
1697
        )
1698

1699
        self.logger.debug(f"all projects: {all_projects}")
1✔
1700

1701
        # construct a mapping from phsid (without consent) to all accessions with consent
1702
        consent_mapping = {}
1✔
1703
        for project in all_projects:
1✔
1704
            phs_match = access_number_matcher.match(project)
1✔
1705
            if phs_match:
1✔
1706
                accession_number = phs_match.groupdict()
1✔
1707

1708
                # TODO: This is not handling the .v1.p1 at all
1709
                consent_mapping.setdefault(accession_number["phsid"], set()).add(
1✔
1710
                    ".".join([accession_number["phsid"], accession_number["consent"]])
1711
                )
1712
                children = self._get_children(accession_number["phsid"])
1✔
1713
                if children:
1✔
1714
                    for child_phs in children:
1✔
1715
                        consent_mapping.setdefault(child_phs, set()).add(
1✔
1716
                            ".".join(
1717
                                [child_phs, accession_number["consent"]]
1718
                            )  # Assign parent consent to child study
1719
                        )
1720

1721
        self.logger.debug(f"consent mapping: {consent_mapping}")
1✔
1722

1723
        # go through existing access and find any c999's and make sure to give access to
1724
        # all accessions with consent for that phsid
1725
        for username, user_project_info in copy.deepcopy(user_projects).items():
1✔
1726
            for project, _ in user_project_info.items():
1✔
1727
                phs_match = access_number_matcher.match(project)
1✔
1728
                if phs_match and phs_match.groupdict()["consent"] == "c999":
1✔
1729
                    # give access to all consents
1730
                    all_phsids_with_consent = consent_mapping.get(
1✔
1731
                        phs_match.groupdict()["phsid"], []
1732
                    )
1733
                    self.logger.info(
1✔
1734
                        f"user {username} has c999 consent group for: {project}. "
1735
                        f"Granting access to all consents: {all_phsids_with_consent}"
1736
                    )
1737
                    # NOTE: Only giving read-storage at the moment (this is same
1738
                    #       permission we give for other dbgap projects)
1739
                    for phsid_with_consent in all_phsids_with_consent:
1✔
1740
                        user_projects[username].update(
1✔
1741
                            {phsid_with_consent: {"read-storage", "read"}}
1742
                        )
1743

1744
    def _update_arborist(self, session, user_yaml):
1✔
1745
        """
1746
        Create roles, resources, policies, groups in arborist from the information in
1747
        ``user_yaml``.
1748

1749
        The projects are sent to arborist as resources with paths like
1750
        ``/projects/{project}``. Roles are created with just the original names
1751
        for the privileges like ``"read-storage", "read"`` etc.
1752

1753
        Args:
1754
            session (sqlalchemy.Session)
1755
            user_yaml (UserYAML)
1756

1757
        Return:
1758
            bool: success
1759
        """
1760
        healthy = self._is_arborist_healthy()
1✔
1761
        if not healthy:
1✔
1762
            return False
×
1763

1764
        # Set up the resource tree in arborist by combining provided resources with any
1765
        # dbgap resources that were created before this.
1766
        #
1767
        # Why add dbgap resources if they've already been created?
1768
        #   B/C Arborist's PUT update will override existing subresources. So if a dbgap
1769
        #   resources was created under `/programs/phs000178` anything provided in
1770
        #   user.yaml under `/programs` would completely wipe it out.
1771
        resources = user_yaml.authz.get("resources", [])
1✔
1772

1773
        dbgap_resource_paths = []
1✔
1774
        for path_list in self._dbgap_study_to_resources.values():
1✔
1775
            dbgap_resource_paths.extend(path_list)
1✔
1776

1777
        self.logger.debug("user_yaml resources: {}".format(resources))
1✔
1778
        self.logger.debug("dbgap resource paths: {}".format(dbgap_resource_paths))
1✔
1779

1780
        combined_resources = utils.combine_provided_and_dbgap_resources(
1✔
1781
            resources, dbgap_resource_paths
1782
        )
1783

1784
        for resource in combined_resources:
1✔
1785
            try:
1✔
1786
                self.logger.debug(
1✔
1787
                    "attempting to update arborist resource: {}".format(resource)
1788
                )
1789
                self.arborist_client.update_resource("/", resource, merge=True)
1✔
1790
            except ArboristError as e:
×
1791
                self.logger.error(e)
×
1792
                # keep going; maybe just some conflicts from things existing already
1793

1794
        # update roles
1795
        roles = user_yaml.authz.get("roles", [])
1✔
1796
        for role in roles:
1✔
1797
            try:
1✔
1798
                response = self.arborist_client.update_role(role["id"], role)
1✔
1799
                if response:
1✔
1800
                    self._created_roles.add(role["id"])
1✔
1801
            except ArboristError as e:
×
1802
                self.logger.info(
×
1803
                    "couldn't update role '{}', creating instead".format(str(e))
1804
                )
1805
                try:
×
1806
                    response = self.arborist_client.create_role(role)
×
1807
                    if response:
×
1808
                        self._created_roles.add(role["id"])
×
1809
                except ArboristError as e:
×
1810
                    self.logger.error(e)
×
1811
                    # keep going; maybe just some conflicts from things existing already
1812

1813
        # update policies
1814
        policies = user_yaml.authz.get("policies", [])
1✔
1815
        for policy in policies:
1✔
1816
            policy_id = policy.pop("id")
1✔
1817
            try:
1✔
1818
                self.logger.debug(
1✔
1819
                    "Trying to upsert policy with id {}".format(policy_id)
1820
                )
1821
                response = self.arborist_client.update_policy(
1✔
1822
                    policy_id, policy, create_if_not_exist=True
1823
                )
1824
            except ArboristError as e:
×
1825
                self.logger.error(e)
×
1826
                # keep going; maybe just some conflicts from things existing already
1827
            else:
1828
                if response:
1✔
1829
                    self.logger.debug("Upserted policy with id {}".format(policy_id))
1✔
1830
                    self._created_policies.add(policy_id)
1✔
1831

1832
        # update groups
1833
        groups = user_yaml.authz.get("groups", [])
1✔
1834

1835
        # delete from arborist the groups that have been deleted
1836
        # from the user.yaml
1837
        arborist_groups = set(
1✔
1838
            g["name"] for g in self.arborist_client.list_groups().get("groups", [])
1839
        )
1840
        useryaml_groups = set(g["name"] for g in groups)
1✔
1841
        for deleted_group in arborist_groups.difference(useryaml_groups):
1✔
1842
            # do not try to delete built in groups
1843
            if deleted_group not in ["anonymous", "logged-in"]:
×
1844
                self.arborist_client.delete_group(deleted_group)
×
1845

1846
        # create/update the groups defined in the user.yaml
1847
        for group in groups:
1✔
1848
            missing = {"name", "users", "policies"}.difference(set(group.keys()))
×
1849
            if missing:
×
1850
                name = group.get("name", "{MISSING NAME}")
×
1851
                self.logger.error(
×
1852
                    "group {} missing required field(s): {}".format(name, list(missing))
1853
                )
1854
                continue
×
1855
            try:
×
1856
                response = self.arborist_client.put_group(
×
1857
                    group["name"],
1858
                    # Arborist doesn't handle group descriptions yet
1859
                    # description=group.get("description", ""),
1860
                    users=group["users"],
1861
                    policies=group["policies"],
1862
                )
1863
            except ArboristError as e:
×
1864
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1865

1866
        # Update policies for built-in (`anonymous` and `logged-in`) groups
1867

1868
        # First recreate these groups in order to clear out old, possibly deleted policies
1869
        for builtin_group in ["anonymous", "logged-in"]:
1✔
1870
            try:
1✔
1871
                response = self.arborist_client.put_group(builtin_group)
1✔
1872
            except ArboristError as e:
×
1873
                self.logger.info("couldn't put group: {}".format(str(e)))
×
1874

1875
        # Now add back policies that are in the user.yaml
1876
        for policy in user_yaml.authz.get("anonymous_policies", []):
1✔
1877
            self.arborist_client.grant_group_policy("anonymous", policy)
×
1878

1879
        for policy in user_yaml.authz.get("all_users_policies", []):
1✔
1880
            self.arborist_client.grant_group_policy("logged-in", policy)
×
1881

1882
        return True
1✔
1883

1884
    def _revoke_all_policies_preserve_mfa(self, username, idp=None):
1✔
1885
        """
1886
        If MFA is enabled for the user's idp, check if they have the /multifactor_auth resource and restore the
1887
        mfa_policy after revoking all policies.
1888
        """
1889
        user_data_from_arborist = None
1✔
1890
        try:
1✔
1891
            user_data_from_arborist = self.arborist_client.get_user(username)
1✔
1892
        except ArboristError:
×
1893
            # user doesn't exist in Arborist, nothing to revoke
1894
            return
×
1895

1896
        is_mfa_enabled = "multifactor_auth_claim_info" in config["OPENID_CONNECT"].get(
1✔
1897
            idp, {}
1898
        )
1899
        if not is_mfa_enabled:
1✔
1900
            # TODO This should be a diff, not a revocation of all policies.
1901
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1902
            return
1✔
1903

1904
        policies = []
1✔
1905
        try:
1✔
1906
            policies = user_data_from_arborist["policies"]
1✔
1907
        except Exception as e:
×
1908
            self.logger.error(
×
1909
                f"Could not retrieve user's policies, revoking all policies anyway. {e}"
1910
            )
1911
        finally:
1912
            # TODO This should be a diff, not a revocation of all policies.
1913
            self.arborist_client.revoke_all_policies_for_user(username)
1✔
1914

1915
        if "mfa_policy" in policies:
1✔
1916
            status_code = self.arborist_client.grant_user_policy(username, "mfa_policy")
1✔
1917

1918
    def _update_authz_in_arborist(
1✔
1919
        self,
1920
        session,
1921
        user_projects,
1922
        user_yaml=None,
1923
        single_user_sync=False,
1924
        expires=None,
1925
    ):
1926
        """
1927
        Assign users policies in arborist from the information in
1928
        ``user_projects`` and optionally a ``user_yaml``.
1929

1930
        The projects are sent to arborist as resources with paths like
1931
        ``/projects/{project}``. Roles are created with just the original names
1932
        for the privileges like ``"read-storage", "read"`` etc.
1933

1934
        Args:
1935
            user_projects (dict)
1936
            user_yaml (UserYAML) optional, if there are policies for users in a user.yaml
1937
            single_user_sync (bool) whether authz update is for a single user
1938
            expires (int) time at which authz info in Arborist should expire
1939

1940
        Return:
1941
            bool: success
1942
        """
1943
        healthy = self._is_arborist_healthy()
1✔
1944
        if not healthy:
1✔
1945
            return False
×
1946

1947
        self.logger.debug("user_projects: {}".format(user_projects))
1✔
1948

1949
        if user_yaml:
1✔
1950
            self.logger.debug(
1✔
1951
                "useryaml abac before lowering usernames: {}".format(
1952
                    user_yaml.user_abac
1953
                )
1954
            )
1955
            user_yaml.user_abac = {
1✔
1956
                key.lower(): value for key, value in user_yaml.user_abac.items()
1957
            }
1958
            # update the project info with `projects` specified in user.yaml
1959
            self.sync_two_phsids_dict(user_yaml.user_abac, user_projects)
1✔
1960

1961
        # get list of users from arborist to make sure users that are completely removed
1962
        # from authorization sources get policies revoked
1963
        arborist_user_projects = {}
1✔
1964
        if not single_user_sync:
1✔
1965
            try:
1✔
1966
                arborist_users = self.arborist_client.get_users().json["users"]
1✔
1967

1968
                # construct user information, NOTE the lowering of the username. when adding/
1969
                # removing access, the case in the Fence db is used. For combining access, it is
1970
                # case-insensitive, so we lower
1971
                arborist_user_projects = {
1✔
1972
                    user["name"].lower(): {} for user in arborist_users
1973
                }
1974
            except (ArboristError, KeyError, AttributeError) as error:
×
1975
                # TODO usersync should probably exit with non-zero exit code at the end,
1976
                #      but sync should continue from this point so there are no partial
1977
                #      updates
1978
                self.logger.warning(
×
1979
                    "Could not get list of users in Arborist, continuing anyway. "
1980
                    "WARNING: this sync will NOT remove access for users no longer in "
1981
                    f"authorization sources. Error: {error}"
1982
                )
1983

1984
            # update the project info with users from arborist
1985
            self.sync_two_phsids_dict(arborist_user_projects, user_projects)
1✔
1986

1987
        policy_id_list = []
1✔
1988
        policies = []
1✔
1989

1990
        # prefer in-memory if available from user_yaml, if not, get from database
1991
        if user_yaml and user_yaml.project_to_resource:
1✔
1992
            project_to_authz_mapping = user_yaml.project_to_resource
1✔
1993
            self.logger.debug(
1✔
1994
                f"using in-memory project to authz resource mapping from "
1995
                f"user.yaml (instead of database): {project_to_authz_mapping}"
1996
            )
1997
        else:
1998
            project_to_authz_mapping = get_project_to_authz_mapping(session)
1✔
1999
            self.logger.debug(
1✔
2000
                f"using persisted project to authz resource mapping from database "
2001
                f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}"
2002
            )
2003

2004
        self.logger.debug(
1✔
2005
            f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}"
2006
        )
2007
        all_resources = [
1✔
2008
            r
2009
            for resources in self._dbgap_study_to_resources.values()
2010
            for r in resources
2011
        ]
2012
        all_resources.extend(r for r in project_to_authz_mapping.values())
1✔
2013
        self._create_arborist_resources(all_resources)
1✔
2014

2015
        for username, user_project_info in user_projects.items():
1✔
2016
            self.logger.info("processing user `{}`".format(username))
1✔
2017
            user = query_for_user(session=session, username=username)
1✔
2018
            idp = None
1✔
2019
            if user:
1✔
2020
                username = user.username
1✔
2021
                idp = user.identity_provider.name if user.identity_provider else None
1✔
2022

2023
            self.arborist_client.create_user_if_not_exist(username)
1✔
2024
            if not single_user_sync:
1✔
2025
                self._revoke_all_policies_preserve_mfa(username, idp)
1✔
2026

2027
            # as of 2/11/2022, for single_user_sync, as RAS visa parsing has
2028
            # previously mapped each project to the same set of privileges
2029
            # (i.e.{'read', 'read-storage'}), unique_policies will just be a
2030
            # single policy with ('read', 'read-storage') being the single
2031
            # key
2032
            unique_policies = self._determine_unique_policies(
1✔
2033
                user_project_info, project_to_authz_mapping
2034
            )
2035

2036
            for roles in unique_policies.keys():
1✔
2037
                for role in roles:
1✔
2038
                    self._create_arborist_role(role)
1✔
2039

2040
            if single_user_sync:
1✔
2041
                for ordered_roles, ordered_resources in unique_policies.items():
1✔
2042
                    policy_hash = self._hash_policy_contents(
1✔
2043
                        ordered_roles, ordered_resources
2044
                    )
2045
                    self._create_arborist_policy(
1✔
2046
                        policy_hash,
2047
                        ordered_roles,
2048
                        ordered_resources,
2049
                        skip_if_exists=True,
2050
                    )
2051
                    # return here as it is not expected single_user_sync
2052
                    # will need any of the remaining user_yaml operations
2053
                    # left in _update_authz_in_arborist
2054
                    return self._grant_arborist_policy(
1✔
2055
                        username, policy_hash, expires=expires
2056
                    )
2057
            else:
2058
                for roles, resources in unique_policies.items():
1✔
2059
                    for role in roles:
1✔
2060
                        for resource in resources:
1✔
2061
                            # grant a policy to this user which is a single
2062
                            # role on a single resource
2063

2064
                            # format project '/x/y/z' -> 'x.y.z'
2065
                            # so the policy id will be something like 'x.y.z-create'
2066
                            policy_id = _format_policy_id(resource, role)
1✔
2067
                            if policy_id not in self._created_policies:
1✔
2068
                                try:
1✔
2069
                                    self.arborist_client.update_policy(
1✔
2070
                                        policy_id,
2071
                                        {
2072
                                            "description": "policy created by fence sync",
2073
                                            "role_ids": [role],
2074
                                            "resource_paths": [resource],
2075
                                        },
2076
                                        create_if_not_exist=True,
2077
                                    )
2078
                                except ArboristError as e:
×
2079
                                    self.logger.info(
×
2080
                                        "not creating policy in arborist; {}".format(
2081
                                            str(e)
2082
                                        )
2083
                                    )
2084
                                self._created_policies.add(policy_id)
1✔
2085

2086
                            self._grant_arborist_policy(
1✔
2087
                                username, policy_id, expires=expires
2088
                            )
2089

2090
            if user_yaml:
1✔
2091
                for policy in user_yaml.policies.get(username, []):
1✔
2092
                    self.arborist_client.grant_user_policy(
1✔
2093
                        username,
2094
                        policy,
2095
                        expires_at=expires,
2096
                    )
2097

2098
        if user_yaml:
1✔
2099
            for client_name, client_details in user_yaml.clients.items():
1✔
2100
                client_policies = client_details.get("policies", [])
×
2101
                clients = session.query(Client).filter_by(name=client_name).all()
×
2102
                # update existing clients, do not create new ones
2103
                if not clients:
×
2104
                    self.logger.warning(
×
2105
                        "client to update (`{}`) does not exist in fence: skipping".format(
2106
                            client_name
2107
                        )
2108
                    )
2109
                    continue
×
2110
                self.logger.debug(
×
2111
                    "updating client `{}` (found {} client IDs)".format(
2112
                        client_name, len(clients)
2113
                    )
2114
                )
2115
                # there may be more than 1 client with this name if credentials are being rotated,
2116
                # so we grant access to each client ID
2117
                for client in clients:
×
2118
                    try:
×
2119
                        self.arborist_client.update_client(
×
2120
                            client.client_id, client_policies
2121
                        )
2122
                    except ArboristError as e:
×
2123
                        self.logger.info(
×
2124
                            "not granting policies {} to client `{}` (`{}`); {}".format(
2125
                                client_policies, client_name, client.client_id, str(e)
2126
                            )
2127
                        )
2128

2129
        return True
1✔
2130

2131
    def _determine_unique_policies(self, user_project_info, project_to_authz_mapping):
1✔
2132
        """
2133
        Determine and return a dictionary of unique policies.
2134

2135
        Args (examples):
2136
            user_project_info (dict):
2137
            {
2138
                'phs000002.c1': { 'read-storage', 'read' },
2139
                'phs000001.c1': { 'read', 'read-storage' },
2140
                'phs000004.c1': { 'write', 'read' },
2141
                'phs000003.c1': { 'read', 'write' },
2142
                'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' }
2143
                'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' },
2144
            }
2145
            project_to_authz_mapping (dict):
2146
            {
2147
                'phs000001.c1': '/programs/DEV/projects/phs000001.c1'
2148
            }
2149

2150
        Return (for examples):
2151
            dict:
2152
            {
2153
                ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'),
2154
                ('read', 'write'): ('phs000003.c1', 'phs000004.c1'),
2155
                ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'),
2156
            }
2157
        """
2158
        roles_to_resources = collections.defaultdict(list)
1✔
2159
        for study, roles in user_project_info.items():
1✔
2160
            ordered_roles = tuple(sorted(roles))
1✔
2161
            study_authz_paths = self._dbgap_study_to_resources.get(study, [study])
1✔
2162
            if study in project_to_authz_mapping:
1✔
2163
                study_authz_paths = [project_to_authz_mapping[study]]
1✔
2164
            roles_to_resources[ordered_roles].extend(study_authz_paths)
1✔
2165

2166
        policies = {}
1✔
2167
        for ordered_roles, unordered_resources in roles_to_resources.items():
1✔
2168
            policies[ordered_roles] = tuple(sorted(unordered_resources))
1✔
2169
        return policies
1✔
2170

2171
    def _create_arborist_role(self, role):
1✔
2172
        """
2173
        Wrapper around gen3authz's create_role with additional logging
2174

2175
        Args:
2176
            role (str): what the Arborist identity should be of the created role
2177

2178
        Return:
2179
            bool: True if the role was created successfully or it already
2180
                  exists. False otherwise
2181
        """
2182
        if role in self._created_roles:
1✔
2183
            return True
1✔
2184
        try:
1✔
2185
            response_json = self.arborist_client.create_role(
1✔
2186
                arborist_role_for_permission(role)
2187
            )
2188
        except ArboristError as e:
×
2189
            self.logger.error(
×
2190
                "could not create `{}` role in Arborist: {}".format(role, e)
2191
            )
2192
            return False
×
2193
        self._created_roles.add(role)
1✔
2194

2195
        if response_json is None:
1✔
2196
            self.logger.info("role `{}` already exists in Arborist".format(role))
×
2197
        else:
2198
            self.logger.info("created role `{}` in Arborist".format(role))
1✔
2199
        return True
1✔
2200

2201
    def _create_arborist_resources(self, resources):
1✔
2202
        """
2203
        Create resources in Arborist
2204

2205
        Args:
2206
            resources (list): a list of full Arborist resource paths to create
2207
            [
2208
                "/programs/DEV/projects/phs000001.c1",
2209
                "/programs/DEV/projects/phs000002.c1",
2210
                "/programs/DEV/projects/phs000003.c1"
2211
            ]
2212

2213
        Return:
2214
            bool: True if the resources were successfully created, False otherwise
2215

2216

2217
        As of 2/11/2022, for resources above,
2218
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2219
        [
2220
            { 'name': 'programs', 'subresources': [
2221
                { 'name': 'DEV', 'subresources': [
2222
                    { 'name': 'projects', 'subresources': [
2223
                        { 'name': 'phs000001.c1', 'subresources': []},
2224
                        { 'name': 'phs000002.c1', 'subresources': []},
2225
                        { 'name': 'phs000003.c1', 'subresources': []}
2226
                    ]}
2227
                ]}
2228
            ]}
2229
        ]
2230
        Because this list has a single object, only a single network request gets
2231
        sent to Arborist.
2232

2233
        However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"],
2234
        utils.combine_provided_and_dbgap_resources({}, resources) returns:
2235
        [
2236
            {'name': 'phs000001.c1', 'subresources': []},
2237
            {'name': 'phs000002.c1', 'subresources': []},
2238
            {'name': 'phs000003.c1', 'subresources': []}
2239
        ]
2240
        Because this list has 3 objects, 3 network requests get sent to Arborist.
2241

2242
        As a practical matter, for sync_single_user_visas, studies
2243
        should be nested under the `/programs` resource as in the former
2244
        example (i.e. only one network request gets made).
2245

2246
        TODO for the sake of simplicity, it would be nice if only one network
2247
        request was made no matter the input.
2248
        """
2249
        for request_body in utils.combine_provided_and_dbgap_resources({}, resources):
1✔
2250
            try:
1✔
2251
                response_json = self.arborist_client.update_resource(
1✔
2252
                    "/", request_body, merge=True
2253
                )
2254
            except ArboristError as e:
×
2255
                self.logger.error(
×
2256
                    "could not create Arborist resources using request body `{}`. error: {}".format(
2257
                        request_body, e
2258
                    )
2259
                )
2260
                return False
×
2261

2262
        self.logger.debug(
1✔
2263
            "created {} resource(s) in Arborist: `{}`".format(len(resources), resources)
2264
        )
2265
        return True
1✔
2266

2267
    def _create_arborist_policy(
1✔
2268
        self, policy_id, roles, resources, skip_if_exists=False
2269
    ):
2270
        """
2271
        Wrapper around gen3authz's create_policy with additional logging
2272

2273
        Args:
2274
            policy_id (str): what the Arborist identity should be of the created policy
2275
            roles (iterable): what roles the create policy should have
2276
            resources (iterable): what resources the created policy should have
2277
            skip_if_exists (bool): if True, this function will not treat an already
2278
                                   existent policy as an error
2279

2280
        Return:
2281
            bool: True if policy creation was successful. False otherwise
2282
        """
2283
        try:
1✔
2284
            response_json = self.arborist_client.create_policy(
1✔
2285
                {
2286
                    "id": policy_id,
2287
                    "role_ids": roles,
2288
                    "resource_paths": resources,
2289
                },
2290
                skip_if_exists=skip_if_exists,
2291
            )
2292
        except ArboristError as e:
×
2293
            self.logger.error(
×
2294
                "could not create policy `{}` in Arborist: {}".format(policy_id, e)
2295
            )
2296
            return False
×
2297

2298
        if response_json is None:
1✔
2299
            self.logger.info("policy `{}` already exists in Arborist".format(policy_id))
×
2300
        else:
2301
            self.logger.info("created policy `{}` in Arborist".format(policy_id))
1✔
2302
        return True
1✔
2303

2304
    def _hash_policy_contents(self, ordered_roles, ordered_resources):
1✔
2305
        """
2306
        Generate a sha256 hexdigest representing ordered_roles and ordered_resources.
2307

2308
        Args:
2309
            ordered_roles (iterable): policy roles in sorted order
2310
            ordered_resources (iterable): policy resources in sorted order
2311

2312
        Return:
2313
            str: SHA256 hex digest
2314
        """
2315

2316
        def escape(s):
1✔
2317
            return s.replace(",", "\,")
1✔
2318

2319
        canonical_roles = ",".join(escape(r) for r in ordered_roles)
1✔
2320
        canonical_resources = ",".join(escape(r) for r in ordered_resources)
1✔
2321
        canonical_policy = f"{canonical_roles},,f{canonical_resources}"
1✔
2322
        policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest()
1✔
2323

2324
        return policy_hash
1✔
2325

2326
    def _grant_arborist_policy(self, username, policy_id, expires=None):
1✔
2327
        """
2328
        Wrapper around gen3authz's grant_user_policy with additional logging
2329

2330
        Args:
2331
            username (str): username of user in Arborist who policy should be
2332
                            granted to
2333
            policy_id (str): Arborist policy id
2334
            expires (int): POSIX timestamp for when policy should expire
2335

2336
        Return:
2337
            bool: True if granting of policy was successful, False otherwise
2338
        """
2339
        try:
1✔
2340
            response_json = self.arborist_client.grant_user_policy(
1✔
2341
                username,
2342
                policy_id,
2343
                expires_at=expires,
2344
            )
2345
        except ArboristError as e:
×
2346
            self.logger.error(
×
2347
                "could not grant policy `{}` to user `{}`: {}".format(
2348
                    policy_id, username, e
2349
                )
2350
            )
2351
            return False
×
2352

2353
        self.logger.debug(
1✔
2354
            "granted policy `{}` to user `{}`".format(policy_id, username)
2355
        )
2356
        return True
1✔
2357

2358
    def _determine_arborist_resource(self, dbgap_study, dbgap_config):
1✔
2359
        """
2360
        Determine the arborist resource path and add it to
2361
        _self._dbgap_study_to_resources
2362

2363
        Args:
2364
            dbgap_study (str): study phs identifier
2365
            dbgap_config (dict): dictionary of config for dbgap server
2366

2367
        """
2368
        default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2369
            "_default", ["/"]
2370
        )
2371
        namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get(
1✔
2372
            dbgap_study, default_namespaces
2373
        )
2374

2375
        self.logger.debug(f"dbgap study namespaces: {namespaces}")
1✔
2376

2377
        arborist_resource_namespaces = [
1✔
2378
            namespace.rstrip("/") + "/programs/" for namespace in namespaces
2379
        ]
2380

2381
        for resource_namespace in arborist_resource_namespaces:
1✔
2382
            full_resource_path = resource_namespace + dbgap_study
1✔
2383
            if dbgap_study not in self._dbgap_study_to_resources:
1✔
2384
                self._dbgap_study_to_resources[dbgap_study] = []
1✔
2385
            self._dbgap_study_to_resources[dbgap_study].append(full_resource_path)
1✔
2386
        return arborist_resource_namespaces
1✔
2387

2388
    def _is_arborist_healthy(self):
1✔
2389
        if not self.arborist_client:
1✔
2390
            self.logger.warning("no arborist client set; skipping arborist dbgap sync")
×
2391
            return False
×
2392
        if not self.arborist_client.healthy():
1✔
2393
            # TODO (rudyardrichter, 2019-01-07): add backoff/retry here
2394
            self.logger.error(
×
2395
                "arborist service is unavailable; skipping main arborist dbgap sync"
2396
            )
2397
            return False
×
2398
        return True
1✔
2399

2400
    def _pick_sync_type(self, visa):
1✔
2401
        """
2402
        Pick type of visa to parse according to the visa provider
2403
        """
2404
        sync_client = None
1✔
2405
        if visa.type in self.visa_types["ras"]:
1✔
2406
            sync_client = self.ras_sync_client
1✔
2407
        else:
2408
            raise Exception(
×
2409
                "Visa type {} not recognized. Configure in fence-config".format(
2410
                    visa.type
2411
                )
2412
            )
2413
        if not sync_client:
1✔
2414
            raise Exception("Sync client for {} not configured".format(visa.type))
×
2415

2416
        return sync_client
1✔
2417

2418
    def sync_single_user_visas(self, user, ga4gh_visas, sess=None, expires=None):
1✔
2419
        """
2420
        Sync a single user's visas during login or DRS/data access
2421

2422
        IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE
2423
                        BEFORE THIS.
2424

2425
        Args:
2426
            user (userdatamodel.user.User): Fence user whose visas'
2427
                                            authz info is being synced
2428
            ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects
2429
                                that are ALREADY VALIDATED
2430
            sess (sqlalchemy.orm.session.Session): database session
2431
            expires (int): time at which synced Arborist policies and
2432
                           inclusion in any GBAG are set to expire
2433

2434
        Return:
2435
            list of successfully parsed visas
2436
        """
2437
        self.ras_sync_client = RASVisa(logger=self.logger)
1✔
2438
        dbgap_config = self.dbGaP[0]
1✔
2439
        parse_consent_code = self._get_parse_consent_code(dbgap_config)
1✔
2440
        enable_common_exchange_area_access = dbgap_config.get(
1✔
2441
            "enable_common_exchange_area_access", False
2442
        )
2443
        study_common_exchange_areas = dbgap_config.get(
1✔
2444
            "study_common_exchange_areas", {}
2445
        )
2446

2447
        try:
1✔
2448
            user_yaml = UserYAML.from_file(
1✔
2449
                self.sync_from_local_yaml_file, encrypted=False, logger=self.logger
2450
            )
2451
        except (EnvironmentError, AssertionError) as e:
×
2452
            self.logger.error(str(e))
×
2453
            self.logger.error("aborting early")
×
2454
            raise
×
2455

2456
        user_projects = dict()
1✔
2457
        projects = {}
1✔
2458
        info = {}
1✔
2459
        parsed_visas = []
1✔
2460

2461
        for visa in ga4gh_visas:
1✔
2462
            project = {}
1✔
2463
            visa_type = self._pick_sync_type(visa)
1✔
2464
            encoded_visa = visa.ga4gh_visa
1✔
2465

2466
            try:
1✔
2467
                project, info = visa_type._parse_single_visa(
1✔
2468
                    user,
2469
                    encoded_visa,
2470
                    visa.expires,
2471
                    parse_consent_code,
2472
                )
2473
            except Exception:
×
2474
                self.logger.warning(
×
2475
                    f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}"
2476
                )
2477
                continue
×
2478

2479
            projects = {**projects, **project}
1✔
2480
            parsed_visas.append(visa)
1✔
2481

2482
        info['user_id'] = user.id
1✔
2483
        info['username'] = user.username
1✔
2484
        user_projects[user.username] = projects
1✔
2485

2486
        user_projects = self.parse_projects(user_projects)
1✔
2487

2488
        if parse_consent_code and enable_common_exchange_area_access:
1✔
2489
            self.logger.info(
×
2490
                f"using study to common exchange area mapping: {study_common_exchange_areas}"
2491
            )
2492

2493
        self._process_user_projects(
1✔
2494
            user_projects,
2495
            enable_common_exchange_area_access,
2496
            study_common_exchange_areas,
2497
            dbgap_config,
2498
            sess,
2499
        )
2500

2501
        if parse_consent_code:
1✔
2502
            self._grant_all_consents_to_c999_users(
1✔
2503
                user_projects, user_yaml.project_to_resource
2504
            )
2505

2506
        if user_projects:
1✔
2507
            self.logger.info("Sync to storage backend [sync_single_user_visas]")
1✔
2508
            self.sync_to_storage_backend(
1✔
2509
                user_projects, info, sess, expires=expires
2510
            )
2511
        else:
2512
            self.logger.info("No users for syncing")
×
2513

2514
        # update arborist db (user access)
2515
        if self.arborist_client:
1✔
2516
            self.logger.info("Synchronizing arborist with authorization info...")
1✔
2517
            success = self._update_authz_in_arborist(
1✔
2518
                sess,
2519
                user_projects,
2520
                user_yaml=user_yaml,
2521
                single_user_sync=True,
2522
                expires=expires,
2523
            )
2524
            if success:
1✔
2525
                self.logger.info(
1✔
2526
                    "Finished synchronizing authorization info to arborist"
2527
                )
2528
            else:
2529
                self.logger.error(
×
2530
                    "Could not synchronize authorization info successfully to arborist"
2531
                )
2532
        else:
2533
            self.logger.error("No arborist client set; skipping arborist sync")
×
2534

2535
        return parsed_visas
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc