• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

berserkhmdvhb / charfinder / 15835271300

23 Jun 2025 09:13PM UTC coverage: 99.189% (+0.5%) from 98.704%
15835271300

push

github

berserkhmdvhb
fixed usage of normalization_profile in codebase

9 of 10 new or added lines in 6 files covered. (90.0%)

4 existing lines in 3 files now uncovered.

1467 of 1479 relevant lines covered (99.19%)

7.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.01
/src/charfinder/validators.py
1
"""
2
Validation utilities for CharFinder configuration and CLI input.
3

4
This module centralizes all validation logic for both core and CLI components,
5
ensuring consistent, safe interpretation of user inputs such as fuzzy algorithm names,
6
thresholds, color modes, and match modes. It also validates cache structures, file paths,
7
and Unicode data configurations.
8

9
Functions:
10
    is_supported_fuzzy_algo(): Check if a fuzzy algorithm name is supported.
11
    _normalize_and_validate_fuzzy_algo():
12
        Internal normalization and strict validation of fuzzy algorithm.
13
    validate_fuzzy_algo(): Public validator for fuzzy algorithm, with CLI/core context handling.
14
    apply_fuzzy_defaults(): Populate missing fuzzy settings in CLI args using fallback config.
15
    _validate_threshold_internal(): Internal float threshold range check.
16
    threshold_range(): Argparse-compatible converter for threshold values.
17
    validate_threshold(): Validate threshold, allowing CLI/default fallback logic.
18
    resolve_effective_threshold(): Resolve final threshold using CLI, env var, or default.
19
    cast_color_mode(): Cast a string to ColorMode type.
20
    validate_color_mode(): Validate and return effective color mode string.
21
    resolve_effective_color_mode(): Determine color mode from CLI or env.
22
    validate_fuzzy_match_mode(): Validate and normalize a fuzzy match mode string.
23
    validate_exact_match_mode(): Validate and normalize an exact match mode string.
24
    validate_dict_str_keys(): Ensure a nested string-keyed dictionary structure (e.g., name cache).
25
    validate_cache_rebuild_flag(): Validate that a rebuild flag is a proper boolean.
26
    validate_normalized_name(): Ensure a normalized name is a valid non-empty string.
27
    validate_unicode_data_url(): Validate that a Unicode data URL is well-formed.
28
    validate_files_and_url(): Validate both file and url
29
    validate_cache_file_path(): Validate that a given or default cache file path exists.
30
    validate_unicode_data_file(): Confirm that a given file path points to an existing file.
31
    resolve_cli_settings(): Resolve CLI-derived color mode, use_color flag, and threshold.
32

33
Custom argparse:
34
    ValidateFuzzyAlgoAction: Argparse Action subclass for --fuzzy-algo validation.
35

36
Constants:
37
    ERROR_INVALID_THRESHOLD: Message used for invalid threshold input.
38
    ERROR_INVALID_NAME: Message used when a name is not a valid non-empty string.
39
    ERROR_INVALID_CACHE_PATH: Message used when the cache file path is not valid.
40
    ENV_MATCH_THRESHOLD: Env var key for threshold override.
41
    ENV_COLOR_MODE: Env var key for color mode override.
42

43
This module is shared across CLI and core layers to prevent duplicated validation logic
44
and ensure strict consistency for all user- or config-sourced inputs.
45
"""
46

47
import os
8✔
48
import sys
8✔
49
from argparse import Action, ArgumentParser, ArgumentTypeError, Namespace
8✔
50
from collections.abc import Mapping, Sequence
8✔
51
from pathlib import Path
8✔
52
from typing import Any, Literal, cast
8✔
53
from urllib.parse import urlparse
8✔
54

55
from charfinder.config.aliases import (
8✔
56
    ColorMode,
57
    ExactMatchMode,
58
    FuzzyAlgorithm,
59
    FuzzyMatchMode,
60
    HybridAggFunc,
61
    NormalizationProfile,
62
)
63
from charfinder.config.constants import (
8✔
64
    DEFAULT_COLOR_MODE,
65
    DEFAULT_NORMALIZATION_PROFILE,
66
    DEFAULT_SHOW_SCORE,
67
    DEFAULT_THRESHOLD,
68
    ENV_COLOR_MODE,
69
    ENV_MATCH_THRESHOLD,
70
    ENV_NORMALIZATION_PROFILE,
71
    ENV_SHOW_SCORE,
72
    FUZZY_ALGO_ALIASES,
73
    VALID_COLOR_MODES,
74
    VALID_EXACT_MATCH_MODES,
75
    VALID_FUZZY_MATCH_MODES,
76
    VALID_HYBRID_AGG_FUNCS,
77
    VALID_NORMALIZATION_PROFILES,
78
    VALID_OUTPUT_FORMATS,
79
    VALID_SHOW_SCORES,
80
    VALID_SHOW_SCORES_FALSE,
81
    VALID_SHOW_SCORES_TRUE,
82
)
83
from charfinder.config.messages import (
8✔
84
    MSG_ERROR_EMPTY_FUZZY_ALGO_LIST,
85
    MSG_ERROR_ENV_INVALID_THRESHOLD,
86
    MSG_ERROR_EXPECTED_BOOL,
87
    MSG_ERROR_EXPECTED_DICT,
88
    MSG_ERROR_EXPECTED_DICT_KEY,
89
    MSG_ERROR_EXPECTED_DICT_VAL,
90
    MSG_ERROR_FILE_NOT_FOUND,
91
    MSG_ERROR_INVALID_AGG_FUNC,
92
    MSG_ERROR_INVALID_COLOR_MODE_WITH_VALUE,
93
    MSG_ERROR_INVALID_EXACT_MATCH_MODE,
94
    MSG_ERROR_INVALID_FUZZY_MATCH_MODE,
95
    MSG_ERROR_INVALID_NAME,
96
    MSG_ERROR_INVALID_NORMALIZATION_PROFILE,
97
    MSG_ERROR_INVALID_OUTPUT_FORMAT,
98
    MSG_ERROR_INVALID_PATH_TYPE,
99
    MSG_ERROR_INVALID_SHOW_SCORE_VALUE,
100
    MSG_ERROR_INVALID_THRESHOLD,
101
    MSG_ERROR_INVALID_THRESHOLD_TYPE,
102
    MSG_ERROR_INVALID_URL,
103
    MSG_ERROR_MISSING_FUZZY_ALGO_VALUE,
104
    MSG_ERROR_UNSUPPORTED_ALGO_INPUT,
105
    MSG_ERROR_UNSUPPORTED_URL_SCHEME,
106
    MSG_ERROR_VALIDATION_FAILED,
107
)
108
from charfinder.config.settings import get_cache_file
8✔
109
from charfinder.config.types import (
8✔
110
    FuzzyConfig,
111
    NameCache,
112
)
113
from charfinder.utils.formatter import echo, should_use_color
8✔
114
from charfinder.utils.logger_styles import format_warning
8✔
115

116
# ------------------------------------------------------------------------
117
# Fuzzy Algorithm Validators
118
# ------------------------------------------------------------------------
119

120

121
def _normalize_and_validate_fuzzy_algo(fuzzy_algo: str) -> FuzzyAlgorithm:
8✔
122
    """
123
    Normalize and validate the given fuzzy algorithm name.
124

125
    Converts dashes to underscores and lowercases the input.
126
    Resolves aliases defined in FUZZY_ALGO_ALIASES and ensures
127
    the final result is registered in FUZZY_ALGORITHM_REGISTRY.
128

129
    Args:
130
        fuzzy_algo (str): The fuzzy algorithm name (alias or canonical).
131

132
    Returns:
133
        FuzzyAlgorithm: Canonical name of the validated fuzzy algorithm.
134

135
    Raises:
136
        ValueError: If the algorithm is not supported.
137
    """
138
    # Lazy import
139
    from charfinder.fuzzymatchlib import FUZZY_ALGORITHM_REGISTRY  # noqa: PLC0415
8✔
140

141
    normalized = fuzzy_algo.strip().lower().replace("-", "_")
8✔
142
    resolved = FUZZY_ALGO_ALIASES.get(normalized, normalized)
8✔
143

144
    if resolved in FUZZY_ALGORITHM_REGISTRY:
8✔
145
        return resolved  # type: ignore[return-value]
8✔
146

147
    valid_options = sorted(set(FUZZY_ALGO_ALIASES) | set(FUZZY_ALGORITHM_REGISTRY))
8✔
148
    raise ValueError(
8✔
149
        MSG_ERROR_UNSUPPORTED_ALGO_INPUT.format(
150
            valid_options=", ".join(valid_options), name=fuzzy_algo
151
        )
152
    )
153

154

155
def validate_fuzzy_algo(
8✔
156
    fuzzy_algo: str, *, source: Literal["cli", "core"] = "core"
157
) -> FuzzyAlgorithm:
158
    """
159
    Validate and normalize a fuzzy algorithm name.
160

161
    In CLI context, assumes the value has already been validated via argparse.
162
    In core context, performs normalization and full validation using known aliases.
163

164
    Args:
165
        fuzzy_algo (str): The fuzzy algorithm name to validate.
166
        source (Literal["cli", "core"], optional):
167
            The source of the invocation. If "cli", skips redundant validation.
168
            Defaults to "core".
169

170
    Returns:
171
        FuzzyAlgorithm: The normalized fuzzy algorithm name.
172

173
    Raises:
174
        ValueError: If the algorithm is invalid and source is "core".
175
    """
176
    if source == "cli":
8✔
177
        return cast("FuzzyAlgorithm", fuzzy_algo)
8✔
178
    return _normalize_and_validate_fuzzy_algo(fuzzy_algo)
8✔
179

180

181
class ValidateFuzzyAlgoAction(Action):
8✔
182
    """
183
    Argparse custom action to validate and normalize fuzzy algorithm names.
184

185
    This action ensures the fuzzy algorithm specified by the user is valid and
186
    normalized at parse time.
187

188
    Example:
189
        parser.add_argument(
190
            "--fuzzy-algo",
191
            action=ValidateFuzzyAlgoAction,
192
            help="Specify the fuzzy matching algorithm to use.",
193
        )
194

195
    Methods:
196
        __call__: Invoked by argparse to process and validate the argument.
197
    """
198

199
    def __init__(
8✔
200
        self,
201
        option_strings: Sequence[str],
202
        dest: str,
203
        **kwargs: Mapping[str, Any],
204
    ) -> None:
205
        super().__init__(option_strings, dest, **kwargs)  # type: ignore[arg-type]
8✔
206

207
    def __call__(
8✔
208
        self,
209
        _: ArgumentParser,
210
        namespace: Namespace,
211
        values: str | Sequence[str] | None,
212
        __: str | None = None,
213
    ) -> None:
214
        """
215
        Validate the fuzzy algorithm argument and set it in the namespace.
216

217
        Args:
218
            _ (ArgumentParser): The argument parser (unused).
219
            namespace (Namespace): The argparse namespace to update.
220
            values (str | Sequence[str] | None): The raw input value(s) for the argument.
221
            __ (str | None): The option string used (unused).
222

223
        Raises:
224
            ValueError: If the provided value is not a supported fuzzy algorithm.
225
        """
226
        if values is None:
8✔
227
            raise ValueError(MSG_ERROR_MISSING_FUZZY_ALGO_VALUE)
8✔
228

229
        if isinstance(values, Sequence) and not isinstance(values, str):
8✔
230
            if not values:
8✔
231
                raise ValueError(MSG_ERROR_EMPTY_FUZZY_ALGO_LIST)
8✔
232
            target = values[0]
8✔
233
        else:
234
            target = values
8✔
235
        validated_value = _normalize_and_validate_fuzzy_algo(target)
8✔
236
        setattr(namespace, self.dest, validated_value)
8✔
237

238

239
def apply_fuzzy_defaults(args: Namespace, config: FuzzyConfig) -> None:
8✔
240
    """
241
    Apply default fuzzy algorithm and match mode to CLI args if missing.
242

243
    This function checks if `--fuzzy` was enabled by the user. If so, and if
244
    no algorithm or match mode was explicitly set in the CLI args, it assigns
245
    the defaults from the provided `FuzzyConfig`.
246

247
    Args:
248
        args (Namespace): Parsed CLI arguments from argparse.
249
        config (FuzzyConfig): Default configuration containing algorithm and match mode.
250

251
    Returns:
252
        None
253
    """
254
    if args.fuzzy:
8✔
255
        if not getattr(args, "fuzzy_algo", None):
8✔
256
            args.fuzzy_algo = config.fuzzy_algo
8✔
257
        if not getattr(args, "fuzzy_match_mode", None):
8✔
258
            args.fuzzy_match_mode = config.fuzzy_match_mode
8✔
259

260

261
# ------------------------------------------------------------------------
262
# Threshold Validators
263
# ------------------------------------------------------------------------
264

265

266
def _validate_threshold_internal(threshold: float) -> float:
8✔
267
    """
268
    Validate that a threshold is within the accepted range [0.0, 1.0].
269

270
    Args:
271
        threshold (float): The threshold value to validate.
272

273
    Returns:
274
        float: The validated threshold.
275

276
    Raises:
277
        TypeError: If the input is not a float or int.
278
        ValueError: If the threshold is outside the range [0.0, 1.0].
279
    """
280
    if not isinstance(threshold, (float, int)):
8✔
281
        raise TypeError(MSG_ERROR_INVALID_THRESHOLD_TYPE)
8✔
282
    if threshold < 0.0 or threshold > 1.0:
8✔
283
        raise ValueError(MSG_ERROR_INVALID_THRESHOLD)
8✔
284
    return float(threshold)
8✔
285

286

287
def threshold_range(value: str) -> float:
8✔
288
    """
289
    Convert a string to a float and validate it as a threshold value.
290

291
    Intended for use with argparse `type=...` to ensure the threshold is
292
    a float between 0.0 and 1.0 inclusive.
293

294
    Args:
295
        value (str): The string input from the command line.
296

297
    Returns:
298
        float: The validated float threshold value.
299

300
    Raises:
301
        ValueError: If the string cannot be converted to float or is out of bounds.
302
    """
303
    try:
8✔
304
        fvalue = float(value)
8✔
305
    except ValueError as exc:
8✔
306
        raise ValueError(MSG_ERROR_INVALID_THRESHOLD) from exc
8✔
307
    return _validate_threshold_internal(fvalue)
8✔
308

309

310
def validate_threshold(
8✔
311
    threshold: float | None, *, source: Literal["cli", "core"] = "core"
312
) -> float:
313
    """
314
    Validate and normalize a threshold value between 0.0 and 1.0.
315

316
    Args:
317
        threshold (float | None): The threshold value to validate.
318
        source (Literal["cli", "core"], optional):
319
            Indicates the calling context. If 'cli', assumes prior validation
320
            by argparse and returns as-is or default. Defaults to 'core'.
321

322
    Returns:
323
        float: A valid threshold value within [0.0, 1.0].
324

325
    Raises:
326
        ValueError: If the threshold is outside the valid range (core only).
327
    """
328
    if source == "cli":
8✔
329
        return threshold if threshold is not None else DEFAULT_THRESHOLD
8✔
330
    if threshold is None:
8✔
331
        return DEFAULT_THRESHOLD
8✔
332
    return _validate_threshold_internal(threshold)
8✔
333

334

335
def resolve_effective_threshold(cli_threshold: float | None, *, use_color: bool = True) -> float:
8✔
336
    """
337
    Resolve the effective threshold value from CLI, environment variable, or default.
338

339
    Priority:
340
        1. CLI-provided threshold (already validated).
341
        2. Environment variable `CHARFINDER_MATCH_THRESHOLD`.
342
        3. Default value.
343

344
    Args:
345
        cli_threshold (float | None): Threshold value from CLI input, if any.
346
        use_color (bool, optional): Whether to use color in warning messages. Defaults to True.
347

348
    Returns:
349
        float: A valid threshold value within the range [0.0, 1.0].
350

351
    Logs:
352
        A warning if the environment variable is present but invalid.
353

354
    Raises:
355
        ValueError: Only indirectly via `_validate_threshold_internal` if CLI value is invalid.
356
    """
357
    if cli_threshold is not None:
8✔
358
        return validate_threshold(cli_threshold, source="cli")
8✔
359

360
    env_value = os.getenv(ENV_MATCH_THRESHOLD)
8✔
361
    if env_value is not None:
8✔
362
        try:
8✔
363
            return _validate_threshold_internal(float(env_value))
8✔
364
        except ValueError:
8✔
365
            echo(
8✔
366
                msg=MSG_ERROR_ENV_INVALID_THRESHOLD.format(
367
                    env_var=ENV_MATCH_THRESHOLD, value=env_value
368
                ),
369
                style=lambda m: format_warning(m, use_color=use_color),
370
                show=True,
371
                log=True,
372
                log_method="warning",
373
            )
374
    return DEFAULT_THRESHOLD
8✔
375

376

377
# ------------------------------------------------------------------------
378
# Color Mode & Match Mode Validators
379
# ------------------------------------------------------------------------
380
def cast_color_mode(value: str) -> ColorMode:
8✔
381
    """
382
    Cast a string value to the ColorMode type.
383

384
    This function assumes the input value is already validated and belongs
385
    to the set of valid color modes.
386

387
    Args:
388
        value (str): The color mode string.
389

390
    Returns:
391
        ColorMode: The value cast to the ColorMode type.
392
    """
393
    return cast("ColorMode", value)
8✔
394

395

396
def validate_color_mode(
8✔
397
    color_mode: str | None, *, source: Literal["cli", "core"] = "core"
398
) -> ColorMode:
399
    """
400
    Validate and normalize a color mode string.
401

402
    Args:
403
        color_mode (str | None): The color mode string to validate.
404
        source (Literal["cli", "core"]): Context of invocation.
405

406
    Returns:
407
        ColorMode: The validated and normalized color mode.
408

409
    Raises:
410
        ValueError: If color_mode is invalid and source is "core".
411
    """
412
    if source == "cli" and color_mode in VALID_COLOR_MODES:
8✔
413
        return cast_color_mode(color_mode)
8✔
414
    if color_mode in VALID_COLOR_MODES:
8✔
415
        return cast_color_mode(color_mode)
8✔
416

417
    if source == "core":
8✔
418
        raise ValueError(
8✔
419
            MSG_ERROR_INVALID_COLOR_MODE_WITH_VALUE.format(
420
                value=color_mode, valid_options=", ".join(sorted(VALID_COLOR_MODES))
421
            )
422
        )
423
    return DEFAULT_COLOR_MODE
8✔
424

425

426
def resolve_effective_color_mode(cli_color_mode: str | None) -> ColorMode:
8✔
427
    """
428
    Determine the effective color mode by prioritizing CLI, then environment variable, then default.
429

430
    This function checks:
431
    1. CLI-supplied value (assumed validated by argparse).
432
    2. Environment variable `CHARFINDER_COLOR_MODE`.
433
    3. Fallback to the default color mode.
434

435
    Args:
436
        cli_color_mode (str | None): The color mode string from CLI arguments.
437

438
    Returns:
439
        ColorMode: The resolved color mode.
440
    """
441
    if cli_color_mode is not None:
8✔
442
        return validate_color_mode(cli_color_mode, source="cli")
8✔
443

444
    env_value = os.getenv(ENV_COLOR_MODE)
8✔
445
    if env_value in VALID_COLOR_MODES:
8✔
446
        return cast_color_mode(env_value)
8✔
447

448
    return DEFAULT_COLOR_MODE
8✔
449

450

451
def validate_fuzzy_match_mode(mode: str) -> FuzzyMatchMode:
8✔
452
    """
453
    Validate and normalize the fuzzy match mode.
454

455
    Converts the mode to lowercase and ensures it is one of the supported fuzzy match modes.
456
    Raises a ValueError if the input is not a valid mode.
457

458
    Args:
459
        mode (str): The fuzzy match mode to validate.
460

461
    Returns:
462
        FuzzyMatchMode: The validated and normalized fuzzy match mode.
463

464
    Raises:
465
        ValueError: If the mode is not in the list of VALID_FUZZY_MATCH_MODES.
466
    """
467
    mode = mode.lower()
8✔
468
    if mode not in VALID_FUZZY_MATCH_MODES:
8✔
469
        raise ValueError(
8✔
470
            MSG_ERROR_INVALID_FUZZY_MATCH_MODE.format(
471
                value=mode, valid_options=", ".join(sorted(VALID_FUZZY_MATCH_MODES))
472
            )
473
        )
474
    return cast("FuzzyMatchMode", mode)
8✔
475

476

477
def validate_exact_match_mode(exact_match_mode: str) -> ExactMatchMode:
8✔
478
    """
479
    Validate the exact match mode string.
480

481
    Ensures the given string is one of the valid exact match modes and casts it to ExactMatchMode.
482
    Raises a ValueError if the input is invalid.
483

484
    Args:
485
        exact_match_mode (str): The exact match mode to validate.
486

487
    Returns:
488
        ExactMatchMode: The validated and cast exact match mode.
489

490
    Raises:
491
        ValueError: If the exact match mode is not one of VALID_EXACT_MATCH_MODES.
492
    """
493
    if exact_match_mode not in VALID_EXACT_MATCH_MODES:
8✔
494
        raise ValueError(
8✔
495
            MSG_ERROR_INVALID_EXACT_MATCH_MODE.format(
496
                value=exact_match_mode, valid_options=", ".join(sorted(VALID_EXACT_MATCH_MODES))
497
            )
498
        )
499
    return cast("ExactMatchMode", exact_match_mode)
8✔
500

501

502
# ------------------------------------------------------------------------
503
# Cache Validators
504
# ------------------------------------------------------------------------
505

506

507
def validate_dict_str_keys(name_cache: NameCache) -> NameCache:
8✔
508
    if not isinstance(name_cache, dict):
8✔
509
        raise TypeError(MSG_ERROR_EXPECTED_DICT)
8✔
510

511
    for key, value in name_cache.items():
8✔
512
        if not isinstance(key, str):
8✔
513
            raise TypeError(MSG_ERROR_EXPECTED_DICT_KEY.format(type=type(key), key=key))
8✔
514
        if not isinstance(value, dict):
8✔
515
            raise TypeError(MSG_ERROR_EXPECTED_DICT_VAL.format(type=type(value), key=key))
8✔
516

517
    return name_cache
8✔
518

519

520
def validate_cache_rebuild_flag(*, force_rebuild: bool) -> bool:
8✔
521
    """
522
    Validate that the `force_rebuild` flag is a boolean.
523

524
    Args:
525
        force_rebuild (bool): A flag indicating whether to force rebuild the cache.
526

527
    Raises:
528
        TypeError: If `force_rebuild` is not a boolean.
529

530
    Returns:
531
        bool: The validated `force_rebuild` flag.
532
    """
533
    if not isinstance(force_rebuild, bool):
8✔
534
        raise TypeError(MSG_ERROR_EXPECTED_BOOL.format(type=type(force_rebuild)))
8✔
535
    return force_rebuild
8✔
536

537

538
def validate_normalized_name(name: str) -> str:
8✔
539
    """
540
    Validate that a name is a non-empty, non-whitespace string.
541

542
    This function is typically used to ensure that normalized character names
543
    are valid before being used in lookups or comparisons.
544

545
    Args:
546
        name (str): The name string to validate.
547

548
    Raises:
549
        ValueError: If the name is not a string or is empty/whitespace only.
550

551
    Returns:
552
        str: The validated name string.
553
    """
554
    if name is None or not isinstance(name, str) or not name.strip():
8✔
555
        raise ValueError(MSG_ERROR_INVALID_NAME.format(value=name))
8✔
556
    return name
8✔
557

558

559
# ------------------------------------------------------------------------
560
# Unicode Data Validators
561
# ------------------------------------------------------------------------
562

563

564
def validate_unicode_data_url(url: str) -> bool:
8✔
565
    """
566
    Validate that a given string is a well-formed URL with HTTP or HTTPS scheme.
567

568
    This function checks whether the provided string has both a scheme and a netloc,
569
    and ensures it uses either the HTTP or HTTPS scheme, suitable for remote Unicode data access.
570

571
    Args:
572
        url (str): The URL string to validate.
573

574
    Raises:
575
        ValueError: If the string is not a valid HTTP/HTTPS URL.
576

577
    Returns:
578
        bool: True if the URL is valid and uses an accepted scheme.
579
    """
580
    parsed_url = urlparse(url)
8✔
581
    if not parsed_url.scheme or not parsed_url.netloc:
8✔
582
        raise ValueError(MSG_ERROR_INVALID_URL.format(url=url))
8✔
583
    if parsed_url.scheme.lower() not in {"http", "https"}:
8✔
584
        raise ValueError(MSG_ERROR_UNSUPPORTED_URL_SCHEME.format(scheme=parsed_url.scheme, url=url))
8✔
585
    return True
8✔
586

587

588
def validate_cache_file_path(cache_file_path: Path | str | None) -> Path:
8✔
589
    """
590
    Validate and normalize the provided cache file path.
591

592
    Ensures the input is a valid `Path` object. This function does not require
593
    the file to exist, making it suitable for cache creation.
594

595
    Args:
596
        cache_file_path (Path | None): The cache file path to validate.
597

598
    Raises:
599
        TypeError: If the input is not a Path instance or a string.
600

601
    Returns:
602
        Path: A valid Path object to the cache file.
603
    """
604
    if cache_file_path is None:
8✔
605
        return get_cache_file()
8✔
606

607
    if isinstance(cache_file_path, str):
8✔
608
        return Path(cache_file_path)
8✔
609

610
    if not isinstance(cache_file_path, Path):
8✔
611
        raise TypeError(MSG_ERROR_INVALID_PATH_TYPE.format(type=type(cache_file_path)))
8✔
612

613
    return cache_file_path
8✔
614

615

616
def validate_files_and_url(
8✔
617
    unicode_data_url: str,
618
    unicode_data_file: Path,
619
    *,
620
    show: bool = True,
621
) -> str | None:
622
    """
623
    Validate the Unicode data URL and the local file path.
624

625
    Args:
626
        unicode_data_url (str): The URL for the Unicode data file.
627
        unicode_data_file (Path): The local path to the Unicode data file.
628
        show (bool): If True, display progress messages.
629

630
    Returns:
631
        str | None: A message if validation fails, or None if validation is successful.
632
    """
633
    try:
8✔
634
        validate_unicode_data_url(unicode_data_url)
8✔
635
        validate_cache_file_path(unicode_data_file)
8✔
636
    except ValueError as exc:
8✔
637
        message = MSG_ERROR_VALIDATION_FAILED.format(error=exc)
8✔
638
        echo(msg=message, style=format_warning, stream=sys.stderr, show=show)
8✔
639
        return message
8✔
640
    return None
8✔
641

642

643
def validate_unicode_data_file(file_path: Path) -> bool:
8✔
644
    """
645
    Validate that the given file path points to an existing file.
646

647
    Args:
648
        file_path (Path): The path to the Unicode data file.
649

650
    Raises:
651
        FileNotFoundError: If the file does not exist or is not a file.
652

653
    Returns:
654
        bool: True if the file exists and is valid.
655
    """
656
    if not file_path.is_file():
8✔
657
        raise FileNotFoundError(MSG_ERROR_FILE_NOT_FOUND.format(path=file_path))
8✔
658
    return True
8✔
659

660

661
def resolve_cli_settings(args: Namespace) -> tuple[str, bool, float]:
8✔
662
    """
663
    Resolve CLI-derived settings for color mode, terminal color usage, and threshold.
664

665
    This function determines the effective color mode, whether terminal output should use color,
666
    and the fuzzy match threshold by considering CLI arguments and environment variables.
667

668
    Args:
669
        args (Namespace): The parsed CLI arguments namespace.
670

671
    Returns:
672
        tuple[str, bool, float]: A tuple containing:
673
            - color_mode (str): The effective color mode.
674
            - use_color (bool): Whether color output should be used.
675
            - threshold (float): The effective match threshold.
676
    """
677
    color_mode = resolve_effective_color_mode(args.color)
8✔
678
    use_color = should_use_color(color_mode)
8✔
679
    threshold = resolve_effective_threshold(args.threshold, use_color=use_color)
8✔
680
    return color_mode, use_color, threshold
8✔
681

682

683
# ------------------------------------------------------------------------
684
# Output Format Validator
685
# ------------------------------------------------------------------------
686

687

688
def validate_output_format(fmt: str) -> str:
8✔
689
    """
690
    Validate the output format string used by the CLI.
691

692
    Ensures the specified output format is one of the supported options ("json" or "text").
693

694
    Args:
695
        fmt (str): The output format string to validate.
696

697
    Raises:
698
        ValueError: If the format is not one of the supported values.
699

700
    Returns:
701
        str: The validated output format.
702
    """
703
    if fmt not in VALID_OUTPUT_FORMATS:
8✔
704
        raise ValueError(
8✔
705
            MSG_ERROR_INVALID_OUTPUT_FORMAT.format(
706
                format=fmt, valid_options=", ".join(sorted(VALID_OUTPUT_FORMATS))
707
            )
708
        )
709
    return fmt
8✔
710

711

712
def validate_hybrid_agg_fn(fn: str) -> HybridAggFunc:
8✔
713
    """
714
    Validate and normalize the hybrid aggregation function.
715

716
    Args:
717
        fn: Aggregation function name.
718

719
    Returns:
720
        HybridAggFunc: Validated aggregation function.
721

722
    Raises:
723
        ValueError: If function is invalid.
724
    """
725
    if fn not in VALID_HYBRID_AGG_FUNCS:
8✔
726
        raise ValueError(
8✔
727
            MSG_ERROR_INVALID_AGG_FUNC.format(
728
                func=fn, valid_options=", ".join(sorted(VALID_HYBRID_AGG_FUNCS))
729
            )
730
        )
731
    return cast("HybridAggFunc", fn)
8✔
732

733

734
def validate_name_cache_structure(name_cache: object) -> None:
8✔
735
    """
736
    Validate that the name_cache is a dictionary mapping characters to name info.
737

738
    Each entry must map a character to a dict with at least 'original' and 'normalized' keys.
739

740
    Args:
741
        name_cache (object): The name cache object to validate.
742

743
    Raises:
744
        TypeError: If name_cache is not a dict.
745
        ValueError: If an entry is not a dict or lacks required keys.
746
    """
747
    if not isinstance(name_cache, dict):
8✔
748
        message = "Expected name_cache to be a dict."
8✔
749
        raise TypeError(message)
8✔
750

751
    for key, value in name_cache.items():
8✔
752
        if not isinstance(value, dict):
8✔
753
            message = f"Invalid entry for {key!r}: expected a dict."
8✔
754
            raise TypeError(message)
8✔
755
        if "original" not in value or "normalized" not in value:
8✔
756
            message = (
8✔
757
                f"Missing required keys in name_cache entry for {key!r}. "
758
                "Expected keys: 'original', 'normalized'."
759
            )
760
            raise ValueError(message)
8✔
761

762

763
def validate_normalization_profile(
8✔
764
    value: str | None, *, source: Literal["cli", "env"] = "cli"
765
) -> NormalizationProfile:
766
    """
767
    Validate and normalize a normalization profile string input.
768

769
    Args:
770
        value: Input string from CLI or environment.
771
        source: Indicates input origin ("cli" or "env").
772

773
    Returns:
774
        A valid NormalizationProfile literal.
775

776
    Raises:
777
        ValueError: If the profile name is not recognized.
778
    """
779

780
    if value is None:
8✔
781
        return DEFAULT_NORMALIZATION_PROFILE
8✔
782

783
    lowered = value.lower()
8✔
784
    if lowered in VALID_NORMALIZATION_PROFILES:
8✔
785
        return cast("NormalizationProfile", lowered)
8✔
786
    raise ValueError(
8✔
787
        MSG_ERROR_INVALID_NORMALIZATION_PROFILE.format(
788
            value=value, source=source, valid_options=", ".join(VALID_NORMALIZATION_PROFILES)
789
        )
790
    )
791

792

793
def resolve_effective_normalization_profile(
8✔
794
    cli_value: str | None,
795
) -> NormalizationProfile:
796
    """
797
    Determine the effective normalization profile based on CLI, env, or default.
798

799
    Args:
800
        cli_value: Value from CLI or None.
801

802
    Returns:
803
        A valid NormalizationProfile.
804
    """
805
    if cli_value is not None:
8✔
806
        return validate_normalization_profile(cli_value, source="cli")
8✔
807

808
    env_value = os.getenv(ENV_NORMALIZATION_PROFILE)
8✔
809
    if env_value is not None:
8✔
810
        try:
8✔
811
            return validate_normalization_profile(env_value, source="env")
8✔
812
        except ValueError:
8✔
813
            pass
8✔
814

815
    return DEFAULT_NORMALIZATION_PROFILE
8✔
816

817

818
def validate_show_score(value: str) -> bool:
8✔
819
    """
820
    Normalize and convert a show_score value to a boolean.
821

822
    Args:
823
        value (str): A string representation of a boolean.
824

825
    Returns:
826
        bool: True if value is in VALID_SHOW_SCORES_TRUE, False if in VALID_SHOW_SCORES_FALSE.
827

828
    Raises:
829
        ArgumentTypeError: If the value is not recognized.
830
    """
831
    lowered = value.strip().lower()
8✔
832
    if lowered in VALID_SHOW_SCORES_TRUE:
8✔
833
        return True
8✔
834
    if lowered in VALID_SHOW_SCORES_FALSE:
8✔
835
        return False
8✔
836
    raise ArgumentTypeError(
8✔
837
        MSG_ERROR_INVALID_SHOW_SCORE_VALUE.format(
838
            value=value, valid_options=", ".join(sorted(VALID_SHOW_SCORES))
839
        )
840
    )
841

842

843
def resolve_effective_show_score(*, cli_value: bool | None) -> bool:
8✔
844
    """
845
    Determine whether to show scores based on CLI, environment variable, or default.
846

847
    This function checks:
848
    1. CLI-supplied value (True/False).
849
    2. Environment variable `CHARFINDER_SHOW_SCORE`.
850
    3. Fallback to DEFAULT_SHOW_SCORE.
851

852
    Args:
853
        cli_value (bool | None): The value from the CLI (already a boolean if set).
854

855
    Returns:
856
        bool: Final resolved value indicating whether to show scores.
857
    """
858
    if cli_value is not None:
8✔
859
        return cli_value
8✔
860

861
    env_value = os.getenv(ENV_SHOW_SCORE)
8✔
862
    if env_value is not None:
8✔
863
        try:
8✔
864
            return validate_show_score(env_value)
8✔
UNCOV
865
        except (ValueError, ArgumentTypeError):
×
UNCOV
866
            pass  # Fall back to default if env var is invalid
×
867

868
    return DEFAULT_SHOW_SCORE
8✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc