• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

great-expectations / great_expectations / 4973

pending completion
4973

Pull #1240

travis-ci

web-flow
Code reuse for exception messages.
Pull Request #1240: Alexsherstinsky/error messages improvements 03 24 2020

234 of 234 new or added lines in 28 files covered. (100.0%)

9453 of 12196 relevant lines covered (77.51%)

2.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.36
/great_expectations/data_context/data_context.py
1
# -*- coding: utf-8 -*-
2
import copy
3✔
3
import datetime
3✔
4
import errno
3✔
5
import glob
3✔
6
import logging
3✔
7
import os
3✔
8
import shutil
3✔
9
import sys
3✔
10
import warnings
3✔
11
import webbrowser
3✔
12

13
from marshmallow import ValidationError
3✔
14
from ruamel.yaml import YAML, YAMLError
3✔
15
from six import string_types
3✔
16

17
from great_expectations.util import verify_dynamic_loading_support
3✔
18
from great_expectations.core import (
3✔
19
    ExpectationSuite,
20
    get_metric_kwargs_id,
21
)
22
from great_expectations.core.id_dict import BatchKwargs
3✔
23
from great_expectations.core.metric import ValidationMetricIdentifier
3✔
24
from great_expectations.core.util import nested_update
3✔
25
from great_expectations.data_context.types.base import (
3✔
26
    DataContextConfig,
27
    dataContextConfigSchema,
28
    datasourceConfigSchema, DatasourceConfig)
29
from great_expectations.data_context.util import (
3✔
30
    file_relative_path,
31
    substitute_config_variable,
32
)
33
from great_expectations.dataset import Dataset
3✔
34
from great_expectations.profile.basic_dataset_profiler import (
3✔
35
    BasicDatasetProfiler,
36
)
37

38
import great_expectations.exceptions as ge_exceptions
3✔
39

40
from ..validator.validator import Validator
3✔
41
from .templates import (
3✔
42
    CONFIG_VARIABLES_INTRO,
43
    CONFIG_VARIABLES_TEMPLATE,
44
    PROJECT_TEMPLATE,
45
)
46
from .types.resource_identifiers import (
3✔
47
    ExpectationSuiteIdentifier,
48
    ValidationResultIdentifier,
49
)
50
from .util import (
3✔
51
    instantiate_class_from_config,
52
    load_class,
53
    safe_mmkdir,
54
    substitute_all_config_variables,
55
)
56

57
try:
3✔
58
    from urllib.parse import urlparse
3✔
59
except ImportError:
×
60
    from urlparse import urlparse
×
61

62
try:
3✔
63
    from sqlalchemy.exc import SQLAlchemyError
3✔
64
except ImportError:
×
65
    # We'll redefine this error in code below to catch ProfilerError, which is caught above, so SA errors will
66
    # just fall through
67
    SQLAlchemyError = ge_exceptions.ProfilerError
×
68

69
logger = logging.getLogger(__name__)
3✔
70
yaml = YAML()
3✔
71
yaml.indent(mapping=2, sequence=4, offset=2)
3✔
72
yaml.default_flow_style = False
3✔
73

74

75
class BaseDataContext(object):
3✔
76
    """
77
    This class implements most of the functionality of DataContext, with a few exceptions.
78

79
    1. BaseDataContext does not attempt to keep its project_config in sync with a file on disc.
80
    2. BaseDataContext doesn't attempt to "guess" paths or objects types. Instead, that logic is pushed
81
        into DataContext class.
82

83
    Together, these changes make BaseDataContext class more testable.
84
    """
85

86
    PROFILING_ERROR_CODE_TOO_MANY_DATA_ASSETS = 2
3✔
87
    PROFILING_ERROR_CODE_SPECIFIED_DATA_ASSETS_NOT_FOUND = 3
3✔
88
    PROFILING_ERROR_CODE_NO_GENERATOR_FOUND = 4
3✔
89
    PROFILING_ERROR_CODE_MULTIPLE_GENERATORS_FOUND = 5
3✔
90
    UNCOMMITTED_DIRECTORIES = ["data_docs", "validations"]
3✔
91
    GE_UNCOMMITTED_DIR = "uncommitted"
3✔
92
    BASE_DIRECTORIES = [
3✔
93
        "expectations",
94
        "notebooks",
95
        "plugins",
96
        GE_UNCOMMITTED_DIR,
97
    ]
98
    NOTEBOOK_SUBDIRECTORIES = ["pandas", "spark", "sql"]
3✔
99
    GE_DIR = "great_expectations"
3✔
100
    GE_YML = "great_expectations.yml"
3✔
101
    GE_EDIT_NOTEBOOK_DIR = GE_UNCOMMITTED_DIR
3✔
102

103
    @classmethod
3✔
104
    def validate_config(cls, project_config):
105
        if isinstance(project_config, DataContextConfig):
3✔
106
            return True
3✔
107
        try:
×
108
            dataContextConfigSchema.load(project_config)
×
109
        except ValidationError:
×
110
            raise
×
111
        return True
×
112

113
    def __init__(self, project_config, context_root_dir=None):
3✔
114
        """DataContext constructor
115

116
        Args:
117
            context_root_dir: location to look for the ``great_expectations.yml`` file. If None, searches for the file \
118
            based on conventions for project subdirectories.
119

120
        Returns:
121
            None
122
        """
123
        if not BaseDataContext.validate_config(project_config):
3✔
124
            raise ge_exceptions.InvalidConfigError("Your project_config is not valid. Try using the CLI check-config command.")
×
125

126
        self._project_config = project_config
3✔
127
        if context_root_dir is not None:
3✔
128
            self._context_root_directory = os.path.abspath(context_root_dir)
3✔
129
        else:
130
            self._context_root_directory = context_root_dir
3✔
131

132
        # Init plugin support
133
        if self.plugins_directory is not None:
3✔
134
            sys.path.append(self.plugins_directory)
3✔
135

136
        # Init data sources
137
        self._datasources = {}
3✔
138
        for datasource in self._project_config_with_variables_substituted["datasources"].keys():
3✔
139
            self.get_datasource(datasource)
3✔
140

141
        # Init stores
142
        self._stores = dict()
3✔
143
        self._init_stores(self._project_config_with_variables_substituted["stores"])
3✔
144

145
        # Init validation operators
146
        self.validation_operators = {}
3✔
147
        for validation_operator_name, validation_operator_config in self._project_config_with_variables_substituted["validation_operators"].items():
3✔
148
            self.add_validation_operator(
3✔
149
                validation_operator_name,
150
                validation_operator_config,
151
            )
152

153
        self._evaluation_parameter_dependencies_compiled = False
3✔
154
        self._evaluation_parameter_dependencies = {}
3✔
155

156
    def _build_store(self, store_name, store_config):
3✔
157
        module_name = 'great_expectations.data_context.store'
3✔
158
        new_store = instantiate_class_from_config(
3✔
159
            config=store_config,
160
            runtime_environment={
161
                "root_directory": self.root_directory,
162
            },
163
            config_defaults={
164
                "module_name": module_name
165
            }
166
        )
167
        if not new_store:
3✔
168
            raise ge_exceptions.ClassInstantiationError(
×
169
                module_name=module_name,
170
                package_name=None,
171
                class_name=store_config['class_name']
172
            )
173
        self._stores[store_name] = new_store
3✔
174
        return new_store
3✔
175

176
    def _init_stores(self, store_configs):
3✔
177
        """Initialize all Stores for this DataContext.
178

179
        Stores are a good fit for reading/writing objects that:
180
            1. follow a clear key-value pattern, and
181
            2. are usually edited programmatically, using the Context
182

183
        In general, Stores should take over most of the reading and writing to disk that DataContext had previously done.
184
        As of 9/21/2019, the following Stores had not yet been implemented
185
            * great_expectations.yml
186
            * expectations
187
            * data documentation
188
            * config_variables
189
            * anything accessed via write_resource
190

191
        Note that stores do NOT manage plugins.
192
        """
193

194
        for store_name, store_config in store_configs.items():
3✔
195
            self._build_store(store_name, store_config)
3✔
196

197
    def add_store(self, store_name, store_config):
3✔
198
        """Add a new Store to the DataContext and (for convenience) return the instantiated Store object.
199

200
        Args:
201
            store_name (str): a key for the new Store in in self._stores
202
            store_config (dict): a config for the Store to add
203

204
        Returns:
205
            store (Store)
206
        """
207

208
        self._project_config["stores"][store_name] = store_config
3✔
209
        return self._build_store(store_name, store_config)
3✔
210

211
    def add_validation_operator(self, validation_operator_name, validation_operator_config):
3✔
212
        """Add a new ValidationOperator to the DataContext and (for convenience) return the instantiated object.
213

214
        Args:
215
            validation_operator_name (str): a key for the new ValidationOperator in in self._validation_operators
216
            validation_operator_config (dict): a config for the ValidationOperator to add
217

218
        Returns:
219
            validation_operator (ValidationOperator)
220
        """
221

222
        self._project_config["validation_operators"][validation_operator_name] = validation_operator_config
3✔
223
        config = self._project_config_with_variables_substituted["validation_operators"][validation_operator_name]
3✔
224
        module_name = 'great_expectations.validation_operators'
3✔
225
        new_validation_operator = instantiate_class_from_config(
3✔
226
            config=config,
227
            runtime_environment={
228
                "data_context": self,
229
            },
230
            config_defaults={
231
                "module_name": module_name
232
            }
233
        )
234
        if not new_validation_operator:
3✔
235
            raise ge_exceptions.ClassInstantiationError(
×
236
                module_name=module_name,
237
                package_name=None,
238
                class_name=config['class_name']
239
            )
240
        self.validation_operators[validation_operator_name] = new_validation_operator
3✔
241
        return new_validation_operator
3✔
242

243
    def _normalize_absolute_or_relative_path(self, path):
3✔
244
        if path is None:
3✔
245
            return
3✔
246
        if os.path.isabs(path):
3✔
247
            return path
3✔
248
        else:
249
            return os.path.join(self.root_directory, path)
3✔
250

251
    def _normalize_store_path(self, resource_store):
3✔
252
        if resource_store["type"] == "filesystem":
×
253
            if not os.path.isabs(resource_store["base_directory"]):
×
254
                resource_store["base_directory"] = os.path.join(self.root_directory, resource_store["base_directory"])
×
255
        return resource_store
×
256

257
    def get_docs_sites_urls(self, resource_identifier=None):
3✔
258
        """
259
        Get URLs for a resource for all data docs sites.
260

261
        This function will return URLs for any configured site even if the sites have not
262
        been built yet.
263

264
        :param resource_identifier: optional. It can be an identifier of ExpectationSuite's,
265
                ValidationResults and other resources that have typed identifiers.
266
                If not provided, the method will return the URLs of the index page.
267
        :return: a list of URLs. Each item is the URL for the resource for a data docs site
268
        """
269

270
        site_urls = []
3✔
271

272
        site_names = None
3✔
273
        sites = self._project_config_with_variables_substituted.get('data_docs_sites', [])
3✔
274
        if sites:
3✔
275
            logger.debug("Found data_docs_sites.")
3✔
276

277
            for site_name, site_config in sites.items():
3✔
278
                if (site_names and site_name in site_names) or not site_names:
3✔
279
                    complete_site_config = site_config
3✔
280
                    module_name = 'great_expectations.render.renderer.site_builder'
3✔
281
                    site_builder = instantiate_class_from_config(
3✔
282
                        config=complete_site_config,
283
                        runtime_environment={
284
                            "data_context": self,
285
                            "root_directory": self.root_directory
286
                        },
287
                        config_defaults={
288
                            "module_name": module_name
289
                        }
290
                    )
291
                    if not site_builder:
3✔
292
                        raise ge_exceptions.ClassInstantiationError(
×
293
                            module_name=module_name,
294
                            package_name=None,
295
                            class_name=complete_site_config['class_name']
296
                        )
297

298
                    url = site_builder.get_resource_url(resource_identifier=resource_identifier)
3✔
299

300
                    site_urls.append(url)
3✔
301

302
        return site_urls
3✔
303

304
    def open_data_docs(self, resource_identifier=None):
3✔
305
        """
306
        A stdlib cross-platform way to open a file in a browser.
307

308
        :param resource_identifier: ExpectationSuiteIdentifier, ValidationResultIdentifier
309
                or any other type's identifier. The argument is optional - when
310
                not supplied, the method returns the URL of the index page.
311
        """
312
        data_docs_urls = self.get_docs_sites_urls(resource_identifier=resource_identifier)
3✔
313
        for url in data_docs_urls:
3✔
314
            logger.debug("Opening Data Docs found here: {}".format(url))
3✔
315
            webbrowser.open(url)
3✔
316

317
    @property
3✔
318
    def root_directory(self):
319
        """The root directory for configuration objects in the data context; the location in which
320
        ``great_expectations.yml`` is located."""
321
        return self._context_root_directory
3✔
322

323
    @property
3✔
324
    def plugins_directory(self):
325
        """The directory in which custom plugin modules should be placed."""
326
        return self._normalize_absolute_or_relative_path(
3✔
327
            self._project_config_with_variables_substituted["plugins_directory"]
328
        )
329

330
    @property
3✔
331
    def _project_config_with_variables_substituted(self):
332
        return self.get_config_with_variables_substituted()
3✔
333

334
    @property
3✔
335
    def stores(self):
336
        """A single holder for all Stores in this context"""
337
        return self._stores
3✔
338

339
    @property
3✔
340
    def datasources(self):
341
        """A single holder for all Datasources in this context"""
342
        return self._datasources
3✔
343

344
    @property
3✔
345
    def expectations_store_name(self):
346
        return self._project_config_with_variables_substituted["expectations_store_name"]
3✔
347

348
    #####
349
    #
350
    # Internal helper methods
351
    #
352
    #####
353

354
    def _load_config_variables_file(self):
3✔
355
        """Get all config variables from the default location."""
356
        if not hasattr(self, "root_directory"):
3✔
357
            # A BaseDataContext does not have a directory in which to look
358
            return {}
×
359

360
        config_variables_file_path = self.get_config().config_variables_file_path
3✔
361
        if config_variables_file_path:
3✔
362
            try:
3✔
363
                with open(os.path.join(self.root_directory,
3✔
364
                                       substitute_config_variable(config_variables_file_path, {})),
365
                          "r") as config_variables_file:
366
                    return yaml.load(config_variables_file) or {}
3✔
367
            except IOError as e:
3✔
368
                if e.errno != errno.ENOENT:
3✔
369
                    raise
×
370
                logger.debug("Generating empty config variables file.")
3✔
371
                # TODO this might be the comment problem?
372
                base_config_variables_store = yaml.load("{}")
3✔
373
                base_config_variables_store.yaml_set_start_comment(CONFIG_VARIABLES_INTRO)
3✔
374
                return base_config_variables_store
3✔
375
        else:
376
            return {}
3✔
377

378
    def get_config_with_variables_substituted(self, config=None):
3✔
379
        if not config:
3✔
380
            config = self._project_config
3✔
381

382
        return substitute_all_config_variables(config, self._load_config_variables_file())
3✔
383

384
    def save_config_variable(self, config_variable_name, value):
3✔
385
        """Save config variable value
386

387
        Args:
388
            config_variable_name: name of the property
389
            value: the value to save for the property
390

391
        Returns:
392
            None
393
        """
394
        config_variables = self._load_config_variables_file()
3✔
395
        config_variables[config_variable_name] = value
3✔
396
        config_variables_filepath = self.get_config().config_variables_file_path
3✔
397
        if not config_variables_filepath:
3✔
398
            raise ge_exceptions.InvalidConfigError("'config_variables_file_path' property is not found in config - setting it is required to use this feature")
3✔
399

400
        config_variables_filepath = os.path.join(self.root_directory, config_variables_filepath)
3✔
401

402
        safe_mmkdir(os.path.dirname(config_variables_filepath), exist_ok=True)
3✔
403
        if not os.path.isfile(config_variables_filepath):
3✔
404
            logger.info("Creating new substitution_variables file at {config_variables_filepath}".format(
3✔
405
                config_variables_filepath=config_variables_filepath)
406
            )
407
        with open(config_variables_filepath, "w") as config_variables_file:
3✔
408
            yaml.dump(config_variables, config_variables_file)
3✔
409

410
    def get_available_data_asset_names(self, datasource_names=None, generator_names=None):
3✔
411
        """Inspect datasource and generators to provide available data_asset objects.
412

413
        Args:
414
            datasource_names: list of datasources for which to provide available data_asset_name objects. If None, \
415
            return available data assets for all datasources.
416
            generator_names: list of generators for which to provide available data_asset_name objects.
417

418
        Returns:
419
            data_asset_names (dict): Dictionary describing available data assets
420
            ::
421

422
                {
423
                  datasource_name: {
424
                    generator_name: [ data_asset_1, data_asset_2, ... ]
425
                    ...
426
                  }
427
                  ...
428
                }
429

430
        """
431
        data_asset_names = {}
3✔
432
        if datasource_names is None:
3✔
433
            datasource_names = [datasource["name"] for datasource in self.list_datasources()]
3✔
434
        elif isinstance(datasource_names, string_types):
3✔
435
            datasource_names = [datasource_names]
3✔
436
        elif not isinstance(datasource_names, list):
×
437
            raise ValueError(
×
438
                "Datasource names must be a datasource name, list of datasource names or None (to list all datasources)"
439
            )
440

441
        if generator_names is not None:
3✔
442
            if isinstance(generator_names, string_types):
×
443
                generator_names = [generator_names]
×
444
            if len(generator_names) == len(datasource_names):  # Iterate over both together
×
445
                for idx, datasource_name in enumerate(datasource_names):
×
446
                    datasource = self.get_datasource(datasource_name)
×
447
                    data_asset_names[datasource_name] = \
×
448
                        datasource.get_available_data_asset_names(generator_names[idx])
449

450
            elif len(generator_names) == 1:
×
451
                datasource = self.get_datasource(datasource_names[0])
×
452
                datasource_names[datasource_names[0]] = datasource.get_available_data_asset_names(generator_names)
×
453

454
            else:
455
                raise ValueError(
×
456
                    "If providing generators, you must either specify one generator for each datasource or only "
457
                    "one datasource."
458
                )
459
        else:  # generator_names is None
460
            for datasource_name in datasource_names:
3✔
461
                try:
3✔
462
                    datasource = self.get_datasource(datasource_name)
3✔
463
                    data_asset_names[datasource_name] = datasource.get_available_data_asset_names()
3✔
464
                except ValueError:
3✔
465
                    # handle the edge case of a non-existent datasource
466
                    data_asset_names[datasource_name] = {}
3✔
467

468
        return data_asset_names
3✔
469

470
    def build_batch_kwargs(self, datasource, generator, name=None, partition_id=None, **kwargs):
3✔
471
        """Builds batch kwargs using the provided datasource, generator, and batch_parameters.
472

473
        Args:
474
            datasource (str): the name of the datasource for which to build batch_kwargs
475
            generator (str): the name of the generator to use to build batch_kwargs
476
            name (str): an optional name batch_parameter
477
            **kwargs: additional batch_parameters
478

479
        Returns:
480
            BatchKwargs
481

482
        """
483
        datasource_obj = self.get_datasource(datasource)
3✔
484
        batch_kwargs = datasource_obj.build_batch_kwargs(generator=generator, name=name, partition_id=partition_id,
3✔
485
                                                         **kwargs)
486
        return batch_kwargs
3✔
487

488
    def get_batch(self, batch_kwargs, expectation_suite_name, data_asset_type=None, batch_parameters=None):
3✔
489
        """Build a batch of data using batch_kwargs, and return a DataAsset with expectation_suite_name attached. If
490
        batch_parameters are included, they will be available as attributes of the batch.
491

492
        Args:
493
            batch_kwargs: the batch_kwargs to use; must include a datasource key
494
            expectation_suite_name: The ExpectationSuite or the name of the expectation_suite to get
495
            data_asset_type: the type of data_asset to build, with associated expectation implementations. This can
496
                generally be inferred from the datasource.
497
            batch_parameters: optional parameters to store as the reference description of the batch. They should
498
                reflect parameters that would provide the passed BatchKwargs.
499

500
        Returns:
501
            DataAsset
502
        """
503
        if isinstance(batch_kwargs, dict):
3✔
504
            batch_kwargs = BatchKwargs(batch_kwargs)
3✔
505

506
        if not isinstance(batch_kwargs, BatchKwargs):
3✔
507
            raise ge_exceptions.BatchKwargsError("BatchKwargs must be a BatchKwargs object or dictionary.")
3✔
508

509
        if not isinstance(expectation_suite_name, (ExpectationSuite, ExpectationSuiteIdentifier, string_types)):
3✔
510
            raise ge_exceptions.DataContextError(
3✔
511
                "expectation_suite_name must be an ExpectationSuite, "
512
                "ExpectationSuiteIdentifier or string."
513
            )
514

515
        if isinstance(expectation_suite_name, ExpectationSuite):
3✔
516
            expectation_suite = expectation_suite_name
3✔
517
        else:
518
            expectation_suite = self.get_expectation_suite(expectation_suite_name)
3✔
519

520
        datasource = self.get_datasource(batch_kwargs.get("datasource"))
3✔
521
        batch = datasource.get_batch(batch_kwargs=batch_kwargs, batch_parameters=batch_parameters)
3✔
522
        if data_asset_type is None:
3✔
523
            data_asset_type = datasource.config.get("data_asset_type")
3✔
524
        validator = Validator(
3✔
525
            batch=batch,
526
            expectation_suite=expectation_suite,
527
            expectation_engine=data_asset_type
528
        )
529
        return validator.get_dataset()
3✔
530

531
    def run_validation_operator(
3✔
532
            self,
533
            validation_operator_name,
534
            assets_to_validate,
535
            run_id=None,
536
            **kwargs
537
    ):
538
        """
539
        Run a validation operator to validate data assets and to perform the business logic around
540
        validation that the operator implements.
541

542
        Args:
543
            validation_operator_name: name of the operator, as appears in the context's config file
544
            assets_to_validate: a list that specifies the data assets that the operator will validate. The members of
545
                the list can be either batches, or a tuple that will allow the operator to fetch the batch:
546
                (batch_kwargs, expectation_suite_name)
547
            run_id: The run_id for the validation; if None, a default value will be used
548
            **kwargs: Additional kwargs to pass to the validation operator
549

550
        Returns:
551
            ValidationOperatorResult
552
        """
553
        if run_id is None:
3✔
554
            run_id = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S.%fZ")
3✔
555
            logger.info("Setting run_id to: {}".format(run_id))
3✔
556

557
        return self.validation_operators[validation_operator_name].run(
3✔
558
            assets_to_validate=assets_to_validate,
559
            run_id=run_id,
560
            **kwargs
561
        )
562

563
    def list_validation_operator_names(self):
3✔
564
        if not self.validation_operators:
3✔
565
            return []
3✔
566
        return list(self.validation_operators.keys())
3✔
567

568
    def add_datasource(self, name, initialize=True, **kwargs):
3✔
569
        """Add a new datasource to the data context, with configuration provided as kwargs.
570
        Args:
571
            name: the name for the new datasource to add
572
            initialize: if False, add the datasource to the config, but do not
573
                initialize it, for example if a user needs to debug database connectivity.
574
            kwargs (keyword arguments): the configuration for the new datasource
575

576
        Returns:
577
            datasource (Datasource)
578
        """
579
        logger.debug("Starting BaseDataContext.add_datasource for %s" % name)
3✔
580
        module_name = kwargs.get("module_name", "great_expectations.datasource")
3✔
581
        verify_dynamic_loading_support(module_name=module_name, package_name=None)
3✔
582
        class_name = kwargs.get("class_name")
3✔
583
        datasource_class = load_class(
3✔
584
            module_name=module_name,
585
            class_name=class_name
586
        )
587

588
        # For any class that should be loaded, it may control its configuration construction
589
        # by implementing a classmethod called build_configuration
590
        if hasattr(datasource_class, "build_configuration"):
3✔
591
            config = datasource_class.build_configuration(**kwargs)
3✔
592
        else:
593
            config = kwargs
×
594

595
        config = datasourceConfigSchema.load(config)
3✔
596
        self._project_config["datasources"][name] = config
3✔
597

598
        # We perform variable substitution in the datasource's config here before using the config
599
        # to instantiate the datasource object. Variable substitution is a service that the data
600
        # context provides. Datasources should not see unsubstituted variables in their config.
601
        if initialize:
3✔
602
            datasource = self._build_datasource_from_config(
3✔
603
                name, self._project_config_with_variables_substituted["datasources"][name])
604
            self._datasources[name] = datasource
3✔
605
        else:
606
            datasource = None
3✔
607

608
        return datasource
3✔
609

610
    def add_generator(self, datasource_name, generator_name, class_name, **kwargs):
3✔
611
        """Add a generator to the named datasource, using the provided configuration.
612

613
        Args:
614
            datasource_name: name of datasource to which to add the new generator
615
            generator_name: name of the generator to add
616
            class_name: class of the generator to add
617
            **kwargs: generator configuration, provided as kwargs
618

619
        Returns:
620

621
        """
622
        datasource_obj = self.get_datasource(datasource_name)
3✔
623
        generator = datasource_obj.add_generator(name=generator_name, class_name=class_name, **kwargs)
3✔
624
        return generator
3✔
625

626
    def get_config(self):
3✔
627
        return self._project_config
3✔
628

629
    def _build_datasource_from_config(self, name, config):
3✔
630
        # We convert from the type back to a dictionary for purposes of instantiation
631
        if isinstance(config, DatasourceConfig):
3✔
632
            config = datasourceConfigSchema.dump(config)
×
633
        config.update({
3✔
634
            "name": name
635
        })
636
        module_name = 'great_expectations.datasource'
3✔
637
        datasource = instantiate_class_from_config(
3✔
638
            config=config,
639
            runtime_environment={
640
                "data_context": self
641
            },
642
            config_defaults={
643
                "module_name": module_name
644
            }
645
        )
646
        if not datasource:
3✔
647
            raise ge_exceptions.ClassInstantiationError(
×
648
                module_name=module_name,
649
                package_name=None,
650
                class_name=config['class_name']
651
            )
652
        return datasource
3✔
653

654
    def get_datasource(self, datasource_name="default"):
3✔
655
        """Get the named datasource
656

657
        Args:
658
            datasource_name (str): the name of the datasource from the configuration
659

660
        Returns:
661
            datasource (Datasource)
662
        """
663
        if datasource_name in self._datasources:
3✔
664
            return self._datasources[datasource_name]
3✔
665
        elif datasource_name in self._project_config_with_variables_substituted["datasources"]:
3✔
666
            datasource_config = copy.deepcopy(
3✔
667
                self._project_config_with_variables_substituted["datasources"][datasource_name])
668
        else:
669
            raise ValueError(
3✔
670
                "Unable to load datasource `%s` -- no configuration found or invalid configuration." % datasource_name
671
            )
672
        datasource = self._build_datasource_from_config(datasource_name, datasource_config)
3✔
673
        self._datasources[datasource_name] = datasource
3✔
674
        return datasource
3✔
675

676
    def list_expectation_suites(self):
3✔
677
        """Return a list of available expectation suite names."""
678
        try:
3✔
679
            keys = self.stores[self.expectations_store_name].list_keys()
3✔
680
        except KeyError as e:
×
681
            raise ge_exceptions.InvalidConfigError("Unable to find configured store: %s" % str(e))
×
682
        return keys
3✔
683

684
    def list_datasources(self):
3✔
685
        """List currently-configured datasources on this context.
686

687
        Returns:
688
            List(dict): each dictionary includes "name" and "class_name" keys
689
        """
690
        datasources = []
3✔
691
        for key, value in self._project_config_with_variables_substituted["datasources"].items():
3✔
692
            datasources.append({
3✔
693
                "name": key,
694
                "class_name": value["class_name"]
695
            })
696
        return datasources
3✔
697

698
    def create_expectation_suite(self, expectation_suite_name, overwrite_existing=False):
3✔
699
        """Build a new expectation suite and save it into the data_context expectation store.
700

701
        Args:
702
            expectation_suite_name: The name of the expectation_suite to create
703
            overwrite_existing (boolean): Whether to overwrite expectation suite if expectation suite with given name
704
                already exists.
705

706
        Returns:
707
            A new (empty) expectation suite.
708
        """
709
        if not isinstance(overwrite_existing, bool):
3✔
710
            raise ValueError("Parameter overwrite_existing must be of type BOOL")
×
711

712
        expectation_suite = ExpectationSuite(expectation_suite_name=expectation_suite_name)
3✔
713
        key = ExpectationSuiteIdentifier(expectation_suite_name=expectation_suite_name)
3✔
714

715
        if self._stores[self.expectations_store_name].has_key(key) and not overwrite_existing:
3✔
716
            raise ge_exceptions.DataContextError(
3✔
717
                "expectation_suite with name {} already exists. If you would like to overwrite this "
718
                "expectation_suite, set overwrite_existing=True.".format(expectation_suite_name)
719
            )
720
        else:
721
            self._stores[self.expectations_store_name].set(key, expectation_suite)
3✔
722

723
        return expectation_suite
3✔
724

725
    def get_expectation_suite(self, expectation_suite_name):
3✔
726
        """Get a named expectation suite for the provided data_asset_name.
727

728
        Args:
729
            expectation_suite_name (str): the name for the expectation suite
730

731
        Returns:
732
            expectation_suite
733
        """
734
        key = ExpectationSuiteIdentifier(expectation_suite_name=expectation_suite_name)
3✔
735

736
        if self.stores[self.expectations_store_name].has_key(key):
3✔
737
            return self.stores[self.expectations_store_name].get(key)
3✔
738
        else:
739
            raise ge_exceptions.DataContextError(
3✔
740
                "expectation_suite %s not found" % expectation_suite_name
741
            )
742

743
    def save_expectation_suite(self, expectation_suite, expectation_suite_name=None):
3✔
744
        """Save the provided expectation suite into the DataContext.
745

746
        Args:
747
            expectation_suite: the suite to save
748
            expectation_suite_name: the name of this expectation suite. If no name is provided the name will \
749
                be read from the suite
750

751
        Returns:
752
            None
753
        """
754
        if expectation_suite_name is None:
3✔
755
            key = ExpectationSuiteIdentifier(expectation_suite_name=expectation_suite.expectation_suite_name)
3✔
756
        else:
757
            expectation_suite.expectation_suite_name = expectation_suite_name
3✔
758
            key = ExpectationSuiteIdentifier(expectation_suite_name=expectation_suite_name)
3✔
759

760
        self.stores[self.expectations_store_name].set(key, expectation_suite)
3✔
761
        self._evaluation_parameter_dependencies_compiled = False
3✔
762

763
    def _store_metrics(self, requested_metrics, validation_results, target_store_name):
3✔
764
        """
765
        requested_metrics is a dictionary like this:
766

767
              requested_metrics:
768
                *:  # The asterisk here matches *any* expectation suite name
769
                  # use the 'kwargs' key to request metrics that are defined by kwargs,
770
                  # for example because they are defined only for a particular column
771
                  # - column:
772
                  #     Age:
773
                  #        - expect_column_min_to_be_between.result.observed_value
774
                    - statistics.evaluated_expectations
775
                    - statistics.successful_expectations
776

777
        Args:
778
            requested_metrics:
779
            validation_results:
780
            target_store_name:
781

782
        Returns:
783

784
        """
785
        expectation_suite_name = validation_results.meta["expectation_suite_name"]
3✔
786
        run_id = validation_results.meta["run_id"]
3✔
787

788
        for expectation_suite_dependency, metrics_list in requested_metrics.items():
3✔
789
            if (expectation_suite_dependency != "*") and (expectation_suite_dependency != expectation_suite_name):
3✔
790
                continue
3✔
791

792
            if not isinstance(metrics_list, list):
3✔
793
                raise ge_exceptions.DataContextError("Invalid requested_metrics configuration: metrics requested for "
×
794
                                                     "each expectation suite must be a list.")
795

796
            for metric_configuration in metrics_list:
3✔
797
                metric_configurations = _get_metric_configuration_tuples(metric_configuration)
3✔
798
                for metric_name, metric_kwargs in metric_configurations:
3✔
799
                    try:
3✔
800
                        metric_value = validation_results.get_metric(metric_name, **metric_kwargs)
3✔
801
                        self.stores[target_store_name].set(
3✔
802
                            ValidationMetricIdentifier(
803
                                run_id=run_id,
804
                                expectation_suite_identifier=ExpectationSuiteIdentifier(expectation_suite_name),
805
                                metric_name=metric_name,
806
                                metric_kwargs_id=get_metric_kwargs_id(metric_name, metric_kwargs)
807
                            ),
808
                            metric_value
809
                        )
810
                    except ge_exceptions.UnavailableMetricError:
×
811
                        # This will happen frequently in larger pipelines
812
                        logger.debug("metric {} was requested by another expectation suite but is not available in "
×
813
                                     "this validation result.".format(metric_name))
814

815
    def store_validation_result_metrics(self, requested_metrics, validation_results, target_store_name):
3✔
816
        self._store_metrics(requested_metrics, validation_results, target_store_name)
3✔
817

818
    def store_evaluation_parameters(self, validation_results, target_store_name=None):
3✔
819
        if not self._evaluation_parameter_dependencies_compiled:
3✔
820
            self._compile_evaluation_parameter_dependencies()
3✔
821

822
        if target_store_name is None:
3✔
823
            target_store_name = self.evaluation_parameter_store_name
3✔
824

825
        self._store_metrics(self._evaluation_parameter_dependencies, validation_results, target_store_name)
3✔
826

827
    @property
3✔
828
    def evaluation_parameter_store(self):
829
        return self.stores[self.evaluation_parameter_store_name]
3✔
830

831
    @property
3✔
832
    def evaluation_parameter_store_name(self):
833
        return self._project_config_with_variables_substituted["evaluation_parameter_store_name"]
3✔
834

835
    @property
3✔
836
    def validations_store_name(self):
837
        return self._project_config_with_variables_substituted["validations_store_name"]
3✔
838

839
    @property
3✔
840
    def validations_store(self):
841
        return self.stores[self.validations_store_name]
3✔
842

843
    def _compile_evaluation_parameter_dependencies(self):
3✔
844
        self._evaluation_parameter_dependencies = {}
3✔
845
        for key in self.stores[self.expectations_store_name].list_keys():
3✔
846
            expectation_suite = self.stores[self.expectations_store_name].get(key)
3✔
847
            dependencies = expectation_suite.get_evaluation_parameter_dependencies()
3✔
848
            if len(dependencies) > 0:
3✔
849
                nested_update(self._evaluation_parameter_dependencies, dependencies)
3✔
850

851
        self._evaluation_parameter_dependencies_compiled = True
3✔
852

853
    def get_validation_result(
3✔
854
        self,
855
        expectation_suite_name,
856
        run_id=None,
857
        batch_identifier=None,
858
        validations_store_name=None,
859
        failed_only=False,
860
    ):
861
        """Get validation results from a configured store.
862

863
        Args:
864
            data_asset_name: name of data asset for which to get validation result
865
            expectation_suite_name: expectation_suite name for which to get validation result (default: "default")
866
            run_id: run_id for which to get validation result (if None, fetch the latest result by alphanumeric sort)
867
            validations_store_name: the name of the store from which to get validation results
868
            failed_only: if True, filter the result to return only failed expectations
869

870
        Returns:
871
            validation_result
872

873
        """
874
        if validations_store_name is None:
3✔
875
            validations_store_name = self.validations_store_name
3✔
876
        selected_store = self.stores[validations_store_name]
3✔
877

878
        if run_id is None or batch_identifier is None:
3✔
879
            #Get most recent run id
880
            # NOTE : This method requires a (potentially very inefficient) list_keys call.
881
            # It should probably move to live in an appropriate Store class,
882
            # but when we do so, that Store will need to function as more than just a key-value Store.
883
            key_list = selected_store.list_keys()
3✔
884
            filtered_key_list = []
3✔
885
            for key in key_list:
3✔
886
                if run_id is not None and key.run_id != run_id:
3✔
887
                    continue
×
888
                if batch_identifier is not None and key.batch_identifier != batch_identifier:
3✔
889
                    continue
×
890
                filtered_key_list.append(key)
3✔
891

892
            # run_id_set = set([key.run_id for key in filtered_key_list])
893
            if len(filtered_key_list) == 0:
3✔
894
                logger.warning("No valid run_id values found.")
3✔
895
                return {}
3✔
896

897
            filtered_key_list = sorted(filtered_key_list, key=lambda x: x.run_id)
3✔
898

899
            if run_id is None:
3✔
900
                run_id = filtered_key_list[-1].run_id
3✔
901
            if batch_identifier is None:
3✔
902
                batch_identifier = filtered_key_list[-1].batch_identifier
3✔
903

904
        key = ValidationResultIdentifier(
3✔
905
                expectation_suite_identifier=ExpectationSuiteIdentifier(
906
                    expectation_suite_name=expectation_suite_name
907
                ),
908
                run_id=run_id,
909
                batch_identifier=batch_identifier
910
        )
911
        results_dict = selected_store.get(key)
3✔
912

913
        #TODO: This should be a convenience method of ValidationResultSuite
914
        if failed_only:
3✔
915
            failed_results_list = [result for result in results_dict.results if not result.success]
3✔
916
            results_dict.results = failed_results_list
3✔
917
            return results_dict
3✔
918
        else:
919
            return results_dict
3✔
920

921
    def update_return_obj(self, data_asset, return_obj):
3✔
922
        """Helper called by data_asset.
923

924
        Args:
925
            data_asset: The data_asset whose validation produced the current return object
926
            return_obj: the return object to update
927

928
        Returns:
929
            return_obj: the return object, potentially changed into a widget by the configured expectation explorer
930
        """
931
        return return_obj
3✔
932

933
    def build_data_docs(self, site_names=None, resource_identifiers=None):
3✔
934
        """
935
        Build Data Docs for your project.
936

937
        These make it simple to visualize data quality in your project. These
938
        include Expectations, Validations & Profiles. The are built for all
939
        Datasources from JSON artifacts in the local repo including validations
940
        & profiles from the uncommitted directory.
941

942
        :param site_names: if specified, build data docs only for these sites, otherwise,
943
                            build all the sites specified in the context's config
944
        :param resource_identifiers: a list of resource identifiers (ExpectationSuiteIdentifier,
945
                            ValidationResultIdentifier). If specified, rebuild HTML
946
                            (or other views the data docs sites are rendering) only for
947
                            the resources in this list. This supports incremental build
948
                            of data docs sites (e.g., when a new validation result is created)
949
                            and avoids full rebuild.
950

951
        Returns:
952
            A dictionary with the names of the updated data documentation sites as keys and the the location info
953
            of their index.html files as values
954
        """
955
        logger.debug("Starting DataContext.build_data_docs")
3✔
956

957
        index_page_locator_infos = {}
3✔
958

959
        sites = self._project_config_with_variables_substituted.get('data_docs_sites', [])
3✔
960
        if sites:
3✔
961
            logger.debug("Found data_docs_sites. Building sites...")
3✔
962

963
            for site_name, site_config in sites.items():
3✔
964
                logger.debug("Building Data Docs Site %s" % site_name,)
3✔
965

966
                if (site_names and site_name in site_names) or not site_names:
3✔
967
                    complete_site_config = site_config
3✔
968
                    module_name = 'great_expectations.render.renderer.site_builder'
3✔
969
                    site_builder = instantiate_class_from_config(
3✔
970
                        config=complete_site_config,
971
                        runtime_environment={
972
                            "data_context": self,
973
                            "root_directory": self.root_directory,
974
                            "site_name": site_name
975
                        },
976
                        config_defaults={
977
                            "module_name": module_name
978
                        }
979
                    )
980
                    if not site_builder:
3✔
981
                        raise ge_exceptions.ClassInstantiationError(
×
982
                            module_name=module_name,
983
                            package_name=None,
984
                            class_name=complete_site_config['class_name']
985
                        )
986
                    index_page_resource_identifier_tuple = site_builder.build(resource_identifiers)
3✔
987
                    if index_page_resource_identifier_tuple:
3✔
988
                        index_page_locator_infos[site_name] = index_page_resource_identifier_tuple[0]
3✔
989

990
        else:
991
            logger.debug("No data_docs_config found. No site(s) built.")
3✔
992

993
        return index_page_locator_infos
3✔
994

995
    def profile_datasource(self,
3✔
996
                           datasource_name,
997
                           generator_name=None,
998
                           data_assets=None,
999
                           max_data_assets=20,
1000
                           profile_all_data_assets=True,
1001
                           profiler=BasicDatasetProfiler,
1002
                           dry_run=False,
1003
                           run_id="profiling",
1004
                           additional_batch_kwargs=None):
1005
        """Profile the named datasource using the named profiler.
1006

1007
        Args:
1008
            datasource_name: the name of the datasource for which to profile data_assets
1009
            generator_name: the name of the generator to use to get batches
1010
            data_assets: list of data asset names to profile
1011
            max_data_assets: if the number of data assets the generator yields is greater than this max_data_assets,
1012
                profile_all_data_assets=True is required to profile all
1013
            profile_all_data_assets: when True, all data assets are profiled, regardless of their number
1014
            profiler: the profiler class to use
1015
            dry_run: when true, the method checks arguments and reports if can profile or specifies the arguments that are missing
1016
            additional_batch_kwargs: Additional keyword arguments to be provided to get_batch when loading the data asset.
1017
        Returns:
1018
            A dictionary::
1019

1020
                {
1021
                    "success": True/False,
1022
                    "results": List of (expectation_suite, EVR) tuples for each of the data_assets found in the datasource
1023
                }
1024

1025
            When success = False, the error details are under "error" key
1026
        """
1027

1028
        # We don't need the datasource object, but this line serves to check if the datasource by the name passed as
1029
        # an arg exists and raise an error if it does not.
1030
        datasource = self.get_datasource(datasource_name)
3✔
1031

1032
        if not dry_run:
3✔
1033
            logger.info("Profiling '%s' with '%s'" % (datasource_name, profiler.__name__))
3✔
1034

1035
        profiling_results = {}
3✔
1036

1037
        # Build the list of available data asset names (each item a tuple of name and type)
1038

1039
        data_asset_names_dict = self.get_available_data_asset_names(datasource_name)
3✔
1040

1041
        available_data_asset_name_list = []
3✔
1042
        try:
3✔
1043
            datasource_data_asset_names_dict = data_asset_names_dict[datasource_name]
3✔
1044
        except KeyError:
×
1045
            # KeyError will happen if there is not datasource
1046
            raise ge_exceptions.ProfilerError(
×
1047
                "No datasource {} found.".format(datasource_name))
1048

1049
        if generator_name is None:
3✔
1050
            # if no generator name is passed as an arg and the datasource has only
1051
            # one generator with data asset names, use it.
1052
            # if ambiguous, raise an exception
1053
            for name in datasource_data_asset_names_dict.keys():
3✔
1054
                if generator_name is not None:
3✔
1055
                    profiling_results = {
3✔
1056
                        'success': False,
1057
                        'error': {
1058
                            'code': DataContext.PROFILING_ERROR_CODE_MULTIPLE_GENERATORS_FOUND
1059
                        }
1060
                    }
1061
                    return profiling_results
3✔
1062

1063
                if len(datasource_data_asset_names_dict[name]["names"]) > 0:
3✔
1064
                    available_data_asset_name_list = datasource_data_asset_names_dict[name]["names"]
3✔
1065
                    generator_name = name
3✔
1066

1067
            if generator_name is None:
3✔
1068
                profiling_results = {
3✔
1069
                    'success': False,
1070
                    'error': {
1071
                        'code': DataContext.PROFILING_ERROR_CODE_NO_GENERATOR_FOUND
1072
                    }
1073
                }
1074
                return profiling_results
3✔
1075
        else:
1076
            # if the generator name is passed as an arg, get this generator's available data asset names
1077
            try:
3✔
1078
                available_data_asset_name_list = datasource_data_asset_names_dict[generator_name]["names"]
3✔
1079
            except KeyError:
3✔
1080
                raise ge_exceptions.ProfilerError(
3✔
1081
                    "Batch Kwarg Generator {} not found. Specify the name of a generator configured in this datasource".format(generator_name))
1082

1083
        available_data_asset_name_list = sorted(available_data_asset_name_list, key=lambda x: x[0])
3✔
1084

1085
        if len(available_data_asset_name_list) == 0:
3✔
1086
            raise ge_exceptions.ProfilerError(
×
1087
                "No Data Assets found in Datasource {}. Used generator: {}.".format(
1088
                    datasource_name,
1089
                    generator_name)
1090
            )
1091
        total_data_assets = len(available_data_asset_name_list)
3✔
1092

1093
        data_asset_names_to_profiled = None
3✔
1094

1095
        if isinstance(data_assets, list) and len(data_assets) > 0:
3✔
1096
            not_found_data_assets = [name for name in data_assets if name not in [da[0] for da in available_data_asset_name_list]]
3✔
1097
            if len(not_found_data_assets) > 0:
3✔
1098
                profiling_results = {
3✔
1099
                    'success': False,
1100
                    'error': {
1101
                        'code': DataContext.PROFILING_ERROR_CODE_SPECIFIED_DATA_ASSETS_NOT_FOUND,
1102
                        'not_found_data_assets': not_found_data_assets,
1103
                        'data_assets': available_data_asset_name_list
1104
                    }
1105
                }
1106
                return profiling_results
3✔
1107

1108
            data_assets.sort()
3✔
1109
            data_asset_names_to_profiled = data_assets
3✔
1110
            total_data_assets = len(available_data_asset_name_list)
3✔
1111
            if not dry_run:
3✔
1112
                logger.info("Profiling the white-listed data assets: %s, alphabetically." % (",".join(data_assets)))
3✔
1113
        else:
1114
            if not profile_all_data_assets:
3✔
1115
                if total_data_assets > max_data_assets:
3✔
1116
                    profiling_results = {
×
1117
                        'success': False,
1118
                        'error': {
1119
                            'code': DataContext.PROFILING_ERROR_CODE_TOO_MANY_DATA_ASSETS,
1120
                            'num_data_assets': total_data_assets,
1121
                            'data_assets': available_data_asset_name_list
1122
                        }
1123
                    }
1124
                    return profiling_results
×
1125

1126
            data_asset_names_to_profiled = [name[0] for name in available_data_asset_name_list]
3✔
1127
        if not dry_run:
3✔
1128
            logger.info("Profiling all %d data assets from generator %s" % (len(available_data_asset_name_list), generator_name))
3✔
1129
        else:
1130
            logger.info("Found %d data assets from generator %s" % (len(available_data_asset_name_list), generator_name))
3✔
1131

1132
        profiling_results['success'] = True
3✔
1133

1134
        if not dry_run:
3✔
1135
            profiling_results['results'] = []
3✔
1136
            total_columns, total_expectations, total_rows, skipped_data_assets = 0, 0, 0, 0
3✔
1137
            total_start_time = datetime.datetime.now()
3✔
1138

1139
            for name in data_asset_names_to_profiled:
3✔
1140
                logger.info("\tProfiling '%s'..." % name)
3✔
1141
                try:
3✔
1142
                    profiling_results['results'].append(
3✔
1143
                        self.profile_data_asset(
1144
                            datasource_name=datasource_name,
1145
                            generator_name=generator_name,
1146
                            data_asset_name=name,
1147
                            profiler=profiler,
1148
                            run_id=run_id,
1149
                            additional_batch_kwargs=additional_batch_kwargs
1150
                        )["results"][0]
1151
                    )
1152

1153
                except ge_exceptions.ProfilerError as err:
×
1154
                    logger.warning(err.message)
×
1155
                except IOError as err:
×
1156
                    logger.warning("IOError while profiling %s. (Perhaps a loading error?) Skipping." % name[1])
×
1157
                    logger.debug(str(err))
×
1158
                    skipped_data_assets += 1
×
1159
                except SQLAlchemyError as e:
×
1160
                    logger.warning("SqlAlchemyError while profiling %s. Skipping." % name[1])
×
1161
                    logger.debug(str(e))
×
1162
                    skipped_data_assets += 1
×
1163

1164
            total_duration = (datetime.datetime.now() - total_start_time).total_seconds()
3✔
1165
            logger.info("""
3✔
1166
    Profiled %d of %d named data assets, with %d total rows and %d columns in %.2f seconds.
1167
    Generated, evaluated, and stored %d Expectations during profiling. Please review results using data-docs.""" % (
1168
                len(data_asset_names_to_profiled),
1169
                total_data_assets,
1170
                total_rows,
1171
                total_columns,
1172
                total_duration,
1173
                total_expectations,
1174
            ))
1175
            if skipped_data_assets > 0:
3✔
1176
                logger.warning("Skipped %d data assets due to errors." % skipped_data_assets)
×
1177

1178
        profiling_results['success'] = True
3✔
1179
        return profiling_results
3✔
1180

1181
    def profile_data_asset(self,
3✔
1182
                           datasource_name,
1183
                           generator_name=None,
1184
                           data_asset_name=None,
1185
                           batch_kwargs=None,
1186
                           expectation_suite_name=None,
1187
                           profiler=BasicDatasetProfiler,
1188
                           run_id="profiling",
1189
                           additional_batch_kwargs=None):
1190
        """
1191
        Profile a data asset
1192

1193
        :param datasource_name: the name of the datasource to which the profiled data asset belongs
1194
        :param generator_name: the name of the generator to use to get batches (only if batch_kwargs are not provided)
1195
        :param data_asset_name: the name of the profiled data asset
1196
        :param batch_kwargs: optional - if set, the method will use the value to fetch the batch to be profiled. If not passed, the generator (generator_name arg) will choose a batch
1197
        :param profiler: the profiler class to use
1198
        :param run_id: optional - if set, the validation result created by the profiler will be under the provided run_id
1199
        :param additional_batch_kwargs:
1200
        :returns
1201
            A dictionary::
1202

1203
                {
1204
                    "success": True/False,
1205
                    "results": List of (expectation_suite, EVR) tuples for each of the data_assets found in the datasource
1206
                }
1207

1208
            When success = False, the error details are under "error" key
1209
        """
1210

1211
        logger.info("Profiling '%s' with '%s'" % (datasource_name, profiler.__name__))
3✔
1212

1213
        if not additional_batch_kwargs:
3✔
1214
            additional_batch_kwargs = {}
3✔
1215

1216
        if batch_kwargs is None:
3✔
1217
            try:
3✔
1218
                generator = self.get_datasource(datasource_name=datasource_name).get_generator(generator_name=generator_name)
3✔
1219
                batch_kwargs = generator.build_batch_kwargs(data_asset_name, **additional_batch_kwargs)
3✔
1220
            except ge_exceptions.BatchKwargsError:
×
1221
                raise ge_exceptions.ProfilerError(
×
1222
                    "Unable to build batch_kwargs for datasource {}, using generator {} for name {}".format(
1223
                        datasource_name,
1224
                        generator_name,
1225
                        data_asset_name
1226
                    ))
1227
            except ValueError:
×
1228
                raise ge_exceptions.ProfilerError(
×
1229
                    "Unable to find datasource {} or generator {}.".format(datasource_name, generator_name)
1230
                )
1231
        else:
1232
            batch_kwargs.update(additional_batch_kwargs)
3✔
1233

1234
        profiling_results = {
3✔
1235
            "success": False,
1236
            "results": []
1237
        }
1238

1239
        total_columns, total_expectations, total_rows, skipped_data_assets = 0, 0, 0, 0
3✔
1240
        total_start_time = datetime.datetime.now()
3✔
1241

1242
        name = data_asset_name
3✔
1243
        # logger.info("\tProfiling '%s'..." % name)
1244

1245
        start_time = datetime.datetime.now()
3✔
1246

1247
        if expectation_suite_name is None:
3✔
1248
            if generator_name is None and data_asset_name is None:
3✔
1249
                expectation_suite_name = datasource_name + "." + profiler.__name__ + "." + BatchKwargs(
×
1250
                    batch_kwargs).to_id()
1251
            else:
1252
                expectation_suite_name = datasource_name + "." + generator_name + "." + data_asset_name + "." + \
3✔
1253
                                         profiler.__name__
1254

1255
        self.create_expectation_suite(
3✔
1256
            expectation_suite_name=expectation_suite_name,
1257
            overwrite_existing=True
1258
        )
1259

1260
        # TODO: Add batch_parameters
1261
        batch = self.get_batch(
3✔
1262
            expectation_suite_name=expectation_suite_name,
1263
            batch_kwargs=batch_kwargs,
1264
        )
1265

1266
        if not profiler.validate(batch):
3✔
1267
            raise ge_exceptions.ProfilerError(
×
1268
                "batch '%s' is not a valid batch for the '%s' profiler" % (name, profiler.__name__)
1269
            )
1270

1271
        # Note: This logic is specific to DatasetProfilers, which profile a single batch. Multi-batch profilers
1272
        # will have more to unpack.
1273
        expectation_suite, validation_results = profiler.profile(batch, run_id=run_id)
3✔
1274
        profiling_results['results'].append((expectation_suite, validation_results))
3✔
1275

1276
        self.validations_store.set(
3✔
1277
            key=ValidationResultIdentifier(
1278
                expectation_suite_identifier=ExpectationSuiteIdentifier(
1279
                    expectation_suite_name=expectation_suite_name
1280
                ),
1281
                run_id=run_id,
1282
                batch_identifier=batch.batch_id
1283
            ),
1284
            value=validation_results
1285
        )
1286

1287
        if isinstance(batch, Dataset):
3✔
1288
            # For datasets, we can produce some more detailed statistics
1289
            row_count = batch.get_row_count()
3✔
1290
            total_rows += row_count
3✔
1291
            new_column_count = len(set([exp.kwargs["column"] for exp in expectation_suite.expectations if "column" in exp.kwargs]))
3✔
1292
            total_columns += new_column_count
3✔
1293

1294
        new_expectation_count = len(expectation_suite.expectations)
3✔
1295
        total_expectations += new_expectation_count
3✔
1296

1297
        self.save_expectation_suite(expectation_suite)
3✔
1298
        duration = (datetime.datetime.now() - start_time).total_seconds()
3✔
1299
        logger.info("\tProfiled %d columns using %d rows from %s (%.3f sec)" %
3✔
1300
                    (new_column_count, row_count, name, duration))
1301

1302
        total_duration = (datetime.datetime.now() - total_start_time).total_seconds()
3✔
1303
        logger.info("""
3✔
1304
Profiled the data asset, with %d total rows and %d columns in %.2f seconds.
1305
Generated, evaluated, and stored %d Expectations during profiling. Please review results using data-docs.""" % (
1306
            total_rows,
1307
            total_columns,
1308
            total_duration,
1309
            total_expectations,
1310
        ))
1311

1312
        profiling_results['success'] = True
3✔
1313
        return profiling_results
3✔
1314

1315

1316
class DataContext(BaseDataContext):
3✔
1317
    """A DataContext represents a Great Expectations project. It organizes storage and access for
1318
    expectation suites, datasources, notification settings, and data fixtures.
1319

1320
    The DataContext is configured via a yml file stored in a directory called great_expectations; the configuration file
1321
    as well as managed expectation suites should be stored in version control.
1322

1323
    Use the `create` classmethod to create a new empty config, or instantiate the DataContext
1324
    by passing the path to an existing data context root directory.
1325

1326
    DataContexts use data sources you're already familiar with. Generators help introspect data stores and data execution
1327
    frameworks (such as airflow, Nifi, dbt, or dagster) to describe and produce batches of data ready for analysis. This
1328
    enables fetching, validation, profiling, and documentation of  your data in a way that is meaningful within your
1329
    existing infrastructure and work environment.
1330

1331
    DataContexts use a datasource-based namespace, where each accessible type of data has a three-part
1332
    normalized *data_asset_name*, consisting of *datasource/generator/generator_asset*.
1333

1334
    - The datasource actually connects to a source of materialized data and returns Great Expectations DataAssets \
1335
      connected to a compute environment and ready for validation.
1336

1337
    - The Generator knows how to introspect datasources and produce identifying "batch_kwargs" that define \
1338
      particular slices of data.
1339

1340
    - The generator_asset is a specific name -- often a table name or other name familiar to users -- that \
1341
      generators can slice into batches.
1342

1343
    An expectation suite is a collection of expectations ready to be applied to a batch of data. Since
1344
    in many projects it is useful to have different expectations evaluate in different contexts--profiling
1345
    vs. testing; warning vs. error; high vs. low compute; ML model or dashboard--suites provide a namespace
1346
    option for selecting which expectations a DataContext returns.
1347

1348
    In many simple projects, the datasource or generator name may be omitted and the DataContext will infer
1349
    the correct name when there is no ambiguity.
1350

1351
    Similarly, if no expectation suite name is provided, the DataContext will assume the name "default".
1352
    """
1353
    @classmethod
3✔
1354
    def create(cls, project_root_dir=None):
3✔
1355
        """
1356
        Build a new great_expectations directory and DataContext object in the provided project_root_dir.
1357

1358
        `create` will not create a new "great_expectations" directory in the provided folder, provided one does not
1359
        already exist. Then, it will initialize a new DataContext in that folder and write the resulting config.
1360

1361
        Args:
1362
            project_root_dir: path to the root directory in which to create a new great_expectations directory
1363

1364
        Returns:
1365
            DataContext
1366
        """
1367

1368
        if not os.path.isdir(project_root_dir):
3✔
1369
            raise ge_exceptions.DataContextError(
×
1370
                "The project_root_dir must be an existing directory in which "
1371
                "to initialize a new DataContext"
1372
            )
1373

1374
        ge_dir = os.path.join(project_root_dir, cls.GE_DIR)
3✔
1375
        safe_mmkdir(ge_dir, exist_ok=True)
3✔
1376
        cls.scaffold_directories(ge_dir)
3✔
1377

1378
        if os.path.isfile(os.path.join(ge_dir, cls.GE_YML)):
3✔
1379
            message = """Warning. An existing `{}` was found here: {}.
3✔
1380
    - No action was taken.""".format(cls.GE_YML, ge_dir)
1381
            warnings.warn(message)
3✔
1382
        else:
1383
            cls.write_project_template_to_disk(ge_dir)
3✔
1384

1385
        if os.path.isfile(os.path.join(ge_dir, "notebooks")):
3✔
1386
            message = """Warning. An existing `notebooks` directory was found here: {}.
×
1387
    - No action was taken.""".format(ge_dir)
1388
            warnings.warn(message)
×
1389
        else:
1390
            cls.scaffold_notebooks(ge_dir)
3✔
1391

1392
        uncommitted_dir = os.path.join(ge_dir, cls.GE_UNCOMMITTED_DIR)
3✔
1393
        if os.path.isfile(os.path.join(uncommitted_dir, "config_variables.yml")):
3✔
1394
            message = """Warning. An existing `config_variables.yml` was found here: {}.
3✔
1395
    - No action was taken.""".format(uncommitted_dir)
1396
            warnings.warn(message)
3✔
1397
        else:
1398
            cls.write_config_variables_template_to_disk(uncommitted_dir)
3✔
1399

1400
        return cls(ge_dir)
3✔
1401

1402
    @classmethod
3✔
1403
    def all_uncommitted_directories_exist(cls, ge_dir):
1404
        """Check if all uncommitted direcotries exist."""
1405
        uncommitted_dir = os.path.join(ge_dir, cls.GE_UNCOMMITTED_DIR)
3✔
1406
        for directory in cls.UNCOMMITTED_DIRECTORIES:
3✔
1407
            if not os.path.isdir(os.path.join(uncommitted_dir, directory)):
3✔
1408
                return False
3✔
1409

1410
        return True
3✔
1411

1412
    @classmethod
3✔
1413
    def config_variables_yml_exist(cls, ge_dir):
1414
        """Check if all config_variables.yml exists."""
1415
        path_to_yml = os.path.join(ge_dir, cls.GE_YML)
3✔
1416

1417
        # TODO this is so brittle and gross
1418
        with open(path_to_yml, "r") as f:
3✔
1419
            config = yaml.load(f)
3✔
1420
        config_var_path = config.get("config_variables_file_path")
3✔
1421
        config_var_path = os.path.join(ge_dir, config_var_path)
3✔
1422
        return os.path.isfile(config_var_path)
3✔
1423

1424
    @classmethod
3✔
1425
    def write_config_variables_template_to_disk(cls, uncommitted_dir):
1426
        safe_mmkdir(uncommitted_dir)
3✔
1427
        config_var_file = os.path.join(uncommitted_dir, "config_variables.yml")
3✔
1428
        with open(config_var_file, "w") as template:
3✔
1429
            template.write(CONFIG_VARIABLES_TEMPLATE)
3✔
1430

1431
    @classmethod
3✔
1432
    def write_project_template_to_disk(cls, ge_dir):
1433
        file_path = os.path.join(ge_dir, cls.GE_YML)
3✔
1434
        with open(file_path, "w") as template:
3✔
1435
            template.write(PROJECT_TEMPLATE)
3✔
1436

1437
    @classmethod
3✔
1438
    def scaffold_directories(cls, base_dir):
1439
        """Safely create GE directories for a new project."""
1440
        safe_mmkdir(base_dir, exist_ok=True)
3✔
1441
        open(os.path.join(base_dir, ".gitignore"), 'w').write("uncommitted/")
3✔
1442

1443
        for directory in cls.BASE_DIRECTORIES:
3✔
1444
            if directory == "plugins":
3✔
1445
                plugins_dir = os.path.join(base_dir, directory)
3✔
1446
                safe_mmkdir(plugins_dir, exist_ok=True)
3✔
1447
                safe_mmkdir(os.path.join(plugins_dir, "custom_data_docs"), exist_ok=True)
3✔
1448
                safe_mmkdir(os.path.join(plugins_dir, "custom_data_docs", "views"), exist_ok=True)
3✔
1449
                safe_mmkdir(os.path.join(plugins_dir, "custom_data_docs", "renderers"), exist_ok=True)
3✔
1450
                safe_mmkdir(os.path.join(plugins_dir, "custom_data_docs", "styles"), exist_ok=True)
3✔
1451
                cls.scaffold_custom_data_docs(plugins_dir)
3✔
1452
            else:
1453
                safe_mmkdir(os.path.join(base_dir, directory), exist_ok=True)
3✔
1454

1455
        uncommitted_dir = os.path.join(base_dir, cls.GE_UNCOMMITTED_DIR)
3✔
1456

1457
        for new_directory in cls.UNCOMMITTED_DIRECTORIES:
3✔
1458
            new_directory_path = os.path.join(uncommitted_dir, new_directory)
3✔
1459
            safe_mmkdir(
3✔
1460
                new_directory_path,
1461
                exist_ok=True
1462
            )
1463

1464
        notebook_path = os.path.join(base_dir, "notebooks")
3✔
1465
        for subdir in cls.NOTEBOOK_SUBDIRECTORIES:
3✔
1466
            safe_mmkdir(os.path.join(notebook_path, subdir), exist_ok=True)
3✔
1467

1468
    @classmethod
3✔
1469
    def scaffold_custom_data_docs(cls, plugins_dir):
1470
        """Copy custom data docs templates"""
1471
        styles_template = file_relative_path(
3✔
1472
            __file__, "../render/view/static/styles/data_docs_custom_styles_template.css")
1473
        styles_destination_path = os.path.join(
3✔
1474
            plugins_dir, "custom_data_docs", "styles", "data_docs_custom_styles.css")
1475
        shutil.copyfile(styles_template, styles_destination_path)
3✔
1476

1477
    @classmethod
3✔
1478
    def scaffold_notebooks(cls, base_dir):
1479
        """Copy template notebooks into the notebooks directory for a project."""
1480
        template_dir = file_relative_path(__file__, "../init_notebooks/")
3✔
1481
        notebook_dir = os.path.join(base_dir, "notebooks/")
3✔
1482
        for subdir in cls.NOTEBOOK_SUBDIRECTORIES:
3✔
1483
            subdir_path = os.path.join(notebook_dir, subdir)
3✔
1484
            for notebook in glob.glob(os.path.join(template_dir, subdir, "*.ipynb")):
3✔
1485
                notebook_name = os.path.basename(notebook)
3✔
1486
                destination_path = os.path.join(subdir_path, notebook_name)
3✔
1487
                shutil.copyfile(notebook, destination_path)
3✔
1488

1489
    def list_expectation_suite_names(self):
3✔
1490
        """Lists the available expectation suite names"""
1491
        sorted_expectation_suite_names = [i.expectation_suite_name for i in self.list_expectation_suites()]
3✔
1492
        sorted_expectation_suite_names.sort()
3✔
1493
        return sorted_expectation_suite_names
3✔
1494

1495
    def __init__(self, context_root_dir=None):
3✔
1496

1497
        # Determine the "context root directory" - this is the parent of "great_expectations" dir
1498
        if context_root_dir is None:
3✔
1499
            context_root_dir = self.find_context_root_dir()
3✔
1500
        context_root_directory = os.path.abspath(os.path.expanduser(context_root_dir))
3✔
1501
        self._context_root_directory = context_root_directory
3✔
1502

1503
        project_config = self._load_project_config()
3✔
1504

1505
        super(DataContext, self).__init__(
3✔
1506
            project_config,
1507
            context_root_directory
1508
        )
1509

1510
    def _load_project_config(self):
3✔
1511
        """
1512
        Reads the project configuration from the project configuration file.
1513
        The file may contain ${SOME_VARIABLE} variables - see self._project_config_with_variables_substituted
1514
        for how these are substituted.
1515

1516
        :return: the configuration object read from the file
1517
        """
1518
        path_to_yml = os.path.join(self.root_directory, self.GE_YML)
3✔
1519
        try:
3✔
1520
            with open(path_to_yml, "r") as data:
3✔
1521
                config_dict = yaml.load(data)
3✔
1522

1523
        except YAMLError as err:
3✔
1524
            raise ge_exceptions.InvalidConfigurationYamlError(
3✔
1525
                "Your configuration file is not a valid yml file likely due to a yml syntax error:\n\n{}".format(err)
1526
            )
1527
        except IOError:
3✔
1528
            raise ge_exceptions.ConfigNotFoundError()
3✔
1529

1530
        try:
3✔
1531
            return DataContextConfig.from_commented_map(config_dict)
3✔
1532
        except ge_exceptions.InvalidDataContextConfigError:
3✔
1533
            # Just to be explicit about what we intended to catch
1534
            raise
3✔
1535

1536
    def _save_project_config(self):
3✔
1537
        """Save the current project to disk."""
1538
        logger.debug("Starting DataContext._save_project_config")
3✔
1539

1540
        config_filepath = os.path.join(self.root_directory, self.GE_YML)
3✔
1541
        with open(config_filepath, "w") as outfile:
3✔
1542
            self._project_config.to_yaml(outfile)
3✔
1543

1544
    def add_store(self, store_name, store_config):
3✔
1545
        logger.debug("Starting DataContext.add_store for store %s" % store_name)
3✔
1546

1547
        new_store = super(DataContext, self).add_store(store_name, store_config)
3✔
1548
        self._save_project_config()
3✔
1549
        return new_store
3✔
1550

1551
    def add_datasource(self, name, **kwargs):
3✔
1552
        logger.debug("Starting DataContext.add_datasource for datasource %s" % name)
3✔
1553

1554
        new_datasource = super(DataContext, self).add_datasource(name, **kwargs)
3✔
1555
        self._save_project_config()
3✔
1556

1557
        return new_datasource
3✔
1558

1559
    @classmethod
3✔
1560
    def find_context_root_dir(cls):
1561
        result = None
3✔
1562
        yml_path = None
3✔
1563
        ge_home_environment = os.getenv("GE_HOME", None)
3✔
1564
        if ge_home_environment:
3✔
1565
            ge_home_environment = os.path.expanduser(ge_home_environment)
3✔
1566
            if os.path.isdir(ge_home_environment) and os.path.isfile(
3✔
1567
                os.path.join(ge_home_environment, "great_expectations.yml")
1568
            ):
1569
                result = ge_home_environment
3✔
1570
        else:
1571
            yml_path = cls.find_context_yml_file()
3✔
1572
            if yml_path:
3✔
1573
                result = os.path.dirname(yml_path)
3✔
1574

1575
        if result is None:
3✔
1576
            raise ge_exceptions.ConfigNotFoundError()
3✔
1577

1578
        logger.debug("Using project config: {}".format(yml_path))
3✔
1579
        return result
3✔
1580

1581
    @classmethod
3✔
1582
    def find_context_yml_file(cls, search_start_dir=None):
3✔
1583
        """Search for the yml file starting here and moving upward."""
1584
        yml_path = None
3✔
1585
        if search_start_dir is None:
3✔
1586
            search_start_dir = os.getcwd()
3✔
1587

1588
        for i in range(4):
3✔
1589
            logger.debug("Searching for config file {} ({} layer deep)".format(search_start_dir, i))
3✔
1590

1591
            potential_ge_dir = os.path.join(search_start_dir, cls.GE_DIR)
3✔
1592

1593
            if os.path.isdir(potential_ge_dir):
3✔
1594
                potential_yml = os.path.join(potential_ge_dir, cls.GE_YML)
3✔
1595
                if os.path.isfile(potential_yml):
3✔
1596
                    yml_path = potential_yml
3✔
1597
                    logger.debug("Found config file at " + str(yml_path))
3✔
1598
                    break
3✔
1599
            # move up one directory
1600
            search_start_dir = os.path.dirname(search_start_dir)
3✔
1601

1602
        return yml_path
3✔
1603

1604
    @classmethod
3✔
1605
    def does_config_exist_on_disk(cls, context_root_dir):
1606
        """Return True if the great_expectations.yml exists on disk."""
1607
        return os.path.isfile(os.path.join(context_root_dir, cls.GE_YML))
3✔
1608

1609
    @classmethod
3✔
1610
    def is_project_initialized(cls, ge_dir):
1611
        """
1612
        Return True if the project is initialized.
1613

1614
        To be considered initialized, all of the following must be true:
1615
        - all project directories exist (including uncommitted directories)
1616
        - a valid great_expectations.yml is on disk
1617
        - a config_variables.yml is on disk
1618
        - the project has at least one datasource
1619
        - the project has at least one suite
1620
        """
1621
        return (
3✔
1622
            cls.does_config_exist_on_disk(ge_dir)
1623
            and cls.all_uncommitted_directories_exist(ge_dir)
1624
            and cls.config_variables_yml_exist(ge_dir)
1625
            and cls._does_context_have_at_least_one_datasource(ge_dir)
1626
            and cls._does_context_have_at_least_one_suite(ge_dir)
1627
        )
1628

1629
    @classmethod
3✔
1630
    def does_project_have_a_datasource_in_config_file(cls, ge_dir):
1631
        if not cls.does_config_exist_on_disk(ge_dir):
3✔
1632
            return False
3✔
1633
        return cls._does_context_have_at_least_one_datasource(ge_dir)
3✔
1634

1635
    @classmethod
3✔
1636
    def _does_context_have_at_least_one_datasource(cls, ge_dir):
1637
        context = cls._attempt_context_instantiation(ge_dir)
3✔
1638
        if not isinstance(context, DataContext):
3✔
1639
            return False
3✔
1640
        return len(context.list_datasources()) >= 1
3✔
1641

1642
    @classmethod
3✔
1643
    def _does_context_have_at_least_one_suite(cls, ge_dir):
1644
        context = cls._attempt_context_instantiation(ge_dir)
3✔
1645
        if not isinstance(context, DataContext):
3✔
1646
            return False
×
1647
        return len(context.list_expectation_suites()) >= 1
3✔
1648

1649
    @classmethod
3✔
1650
    def _attempt_context_instantiation(cls, ge_dir):
1651
        try:
3✔
1652
            context = DataContext(ge_dir)
3✔
1653
            return context
3✔
1654
        except (
3✔
1655
            ge_exceptions.DataContextError,
1656
            ge_exceptions.InvalidDataContextConfigError
1657
        ) as e:
1658
            logger.debug(e)
3✔
1659

1660

1661
class ExplorerDataContext(DataContext):
3✔
1662

1663
    def __init__(self, context_root_dir=None, expectation_explorer=True):
3✔
1664
        """
1665
            expectation_explorer: If True, load the expectation explorer manager, which will modify GE return objects \
1666
            to include ipython notebook widgets.
1667
        """
1668

1669
        super(ExplorerDataContext, self).__init__(
3✔
1670
            context_root_dir
1671
        )
1672

1673
        self._expectation_explorer = expectation_explorer
3✔
1674
        if expectation_explorer:
3✔
1675
            from great_expectations.jupyter_ux.expectation_explorer import ExpectationExplorer
3✔
1676
            self._expectation_explorer_manager = ExpectationExplorer()
3✔
1677

1678
    def update_return_obj(self, data_asset, return_obj):
3✔
1679
        """Helper called by data_asset.
1680

1681
        Args:
1682
            data_asset: The data_asset whose validation produced the current return object
1683
            return_obj: the return object to update
1684

1685
        Returns:
1686
            return_obj: the return object, potentially changed into a widget by the configured expectation explorer
1687
        """
1688
        if self._expectation_explorer:
×
1689
            return self._expectation_explorer_manager.create_expectation_widget(data_asset, return_obj)
×
1690
        else:
1691
            return return_obj
×
1692

1693

1694
def _get_metric_configuration_tuples(metric_configuration, base_kwargs=None):
3✔
1695
    if base_kwargs is None:
3✔
1696
        base_kwargs = {}
3✔
1697

1698
    if isinstance(metric_configuration, string_types):
3✔
1699
        return [(metric_configuration, base_kwargs)]
3✔
1700

1701
    metric_configurations_list = []
3✔
1702
    for kwarg_name in metric_configuration.keys():
3✔
1703
        if not isinstance(metric_configuration[kwarg_name], dict):
3✔
1704
            raise ge_exceptions.DataContextError("Invalid metric_configuration: each key must contain a "
×
1705
                                                 "dictionary.")
1706
        if kwarg_name == "metric_kwargs_id":  # this special case allows a hash of multiple kwargs
3✔
1707
            for metric_kwargs_id in metric_configuration[kwarg_name].keys():
3✔
1708
                if base_kwargs != {}:
3✔
1709
                    raise ge_exceptions.DataContextError("Invalid metric_configuration: when specifying "
×
1710
                                                         "metric_kwargs_id, no other keys or values may be defined.")
1711
                if not isinstance(metric_configuration[kwarg_name][metric_kwargs_id], list):
3✔
1712
                    raise ge_exceptions.DataContextError("Invalid metric_configuration: each value must contain a "
×
1713
                                                         "list.")
1714
                metric_configurations_list += [(metric_name, {"metric_kwargs_id": metric_kwargs_id}) for metric_name
3✔
1715
                                               in metric_configuration[kwarg_name][metric_kwargs_id]]
1716
        else:
1717
            for kwarg_value in metric_configuration[kwarg_name].keys():
3✔
1718
                base_kwargs.update({kwarg_name: kwarg_value})
3✔
1719
                if not isinstance(metric_configuration[kwarg_name][kwarg_value], list):
3✔
1720
                    raise ge_exceptions.DataContextError("Invalid metric_configuration: each value must contain a "
×
1721
                                                         "list.")
1722
                for nested_configuration in metric_configuration[kwarg_name][kwarg_value]:
3✔
1723
                    metric_configurations_list += _get_metric_configuration_tuples(nested_configuration,
3✔
1724
                                                                                   base_kwargs=base_kwargs)
1725

1726
    return metric_configurations_list
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc