• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

berserkhmdvhb / charfinder / 15812117954

22 Jun 2025 11:54PM UTC coverage: 93.405% (+0.9%) from 92.472%
15812117954

push

github

berserkhmdvhb
fixing unit tests

5 of 5 new or added lines in 3 files covered. (100.0%)

85 existing lines in 11 files now uncovered.

1388 of 1486 relevant lines covered (93.41%)

7.47 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.52
/src/charfinder/core/unicode_data_loader.py
1
"""
2
Unicode Data Loader for CharFinder.
3

4
This module handles the loading of alternate names from the UnicodeData.txt file. It attempts
5
to download the file from the internet if it is not available locally and falls back to the local
6
version if the download fails.
7

8
Key Features:
9
- Downloads UnicodeData.txt if not found locally.
10
- Reads the local file and parses the content.
11
- Returns a dictionary of characters and their alternate names.
12
- Handles error and exception handling for file operations and network issues.
13

14
Functions:
15
- load_alternate_names(show: bool = True, use_color: bool = False):
16
    Loads alternate names from the UnicodeData.txt file.
17
- download_and_cache_unicode_data
18
    (unicode_data_url: str, unicode_data_file: Path, show: bool = True, use_color: bool = False):
19
    Downloads and caches the UnicodeData.txt file if not found locally.
20
- load_unicode_data_from_file(unicode_data_file: Path, show: bool = True, use_color: bool = False):
21
    Reads Unicode data from a local file.
22
- parse_unicode_data(text: str, show: bool = True, use_color: bool = False):
23
    Parses the Unicode data into a dictionary of alternate names.
24
"""
25

26
# ---------------------------------------------------------------------
27
# Imports
28
# ---------------------------------------------------------------------
29

30
import sys
8✔
31
from pathlib import Path
8✔
32
from urllib.error import URLError
8✔
33
from urllib.request import urlopen
8✔
34

35
from charfinder.config.constants import ALT_NAME_INDEX, EXPECTED_MIN_FIELDS
8✔
36
from charfinder.config.messages import (
8✔
37
    MSG_INFO_DOWNLOAD_SUCCESS,
38
    MSG_INFO_LOAD_LOCAL_FILE,
39
    MSG_WARNING_DOWNLOAD_FAILED,
40
    MSG_WARNING_INVALID_CODE,
41
    MSG_WARNING_MALFORMED_LINE,
42
    MSG_WARNING_READ_FAILED,
43
)
44
from charfinder.config.settings import get_unicode_data_file, get_unicode_data_url
8✔
45
from charfinder.utils.formatter import echo
8✔
46
from charfinder.utils.logger_setup import get_logger
8✔
47
from charfinder.utils.logger_styles import format_info, format_warning
8✔
48
from charfinder.validators import validate_unicode_data_url
8✔
49

50
logger = get_logger()
8✔
51

52
__all__ = ["load_alternate_names"]
8✔
53

54

55
def download_and_cache_unicode_data(
8✔
56
    unicode_data_url: str,
57
    unicode_data_file: Path,
58
    *,
59
    show: bool = True,
60
    use_color: bool = True,
61
) -> bool:
62
    """
63
    Attempt to download and cache the UnicodeData.txt file if not found locally.
64

65
    Args:
66
        unicode_data_url (str): The URL to download the file from.
67
        unicode_data_file (Path): The path where the file should be cached.
68
        show (bool): If True, show progress messages.
69
        use_color (bool): If True, show colorized log output.
70

71
    Returns:
72
        bool: True if successful, False otherwise.
73

74
    Raises:
75
        ValueError: If the URL scheme is not HTTP/HTTPS.
76
    """
77
    validate_unicode_data_url(unicode_data_url)
8✔
78

79
    try:
8✔
80
        response = urlopen(unicode_data_url, timeout=5)  # noqa: S310
8✔
81
        with response:
8✔
82
            text = response.read().decode("utf-8")
8✔
83
        unicode_data_file.parent.mkdir(parents=True, exist_ok=True)
8✔
84
        unicode_data_file.write_text(text, encoding="utf-8")
8✔
85
        echo(
8✔
86
            msg=MSG_INFO_DOWNLOAD_SUCCESS.format(url=unicode_data_url),
87
            style=lambda m: format_info(m, use_color=use_color),
88
            stream=sys.stderr,
89
            show=show,
90
            log=True,
91
            log_method="info",
92
        )
93
    except (URLError, TimeoutError, OSError) as exc:
8✔
94
        echo(
8✔
95
            msg=MSG_WARNING_DOWNLOAD_FAILED.format(error=exc),
96
            style=lambda m: format_warning(m),
97
            stream=sys.stderr,
98
            show=show,
99
            log=True,
100
            log_method="warning",
101
        )
102
        return False
8✔
103
    else:
104
        return True
8✔
105

106

107
def load_unicode_data_from_file(
8✔
108
    unicode_data_file: Path,
109
    *,
110
    show: bool = True,
111
) -> str | None:
112
    """
113
    Load the Unicode data from a local file.
114

115
    Args:
116
        unicode_data_file (Path): The file path to read from.
117
        show (bool): If True, display progress messages.
118

119
    Returns:
120
        str | None: The content of the file or None if reading failed.
121
    """
122
    try:
8✔
123
        text = unicode_data_file.read_text(encoding="utf-8")
8✔
124
        echo(
8✔
125
            msg=MSG_INFO_LOAD_LOCAL_FILE.format(path=unicode_data_file),
126
            style=lambda m: format_info(m),
127
            stream=sys.stderr,
128
            show=show,
129
            log=True,
130
            log_method="info",
131
        )
132
    except OSError as exc:
8✔
133
        echo(
8✔
134
            msg=MSG_WARNING_READ_FAILED.format(path=unicode_data_file, error=exc),
135
            style=lambda m: format_warning(m),
136
            stream=sys.stderr,
137
            show=show,
138
            log=True,
139
            log_method="warning",
140
        )
141
        return None
8✔
142
    else:
143
        return text
8✔
144

145

146
def parse_unicode_data(text: str, *, show: bool = True) -> dict[str, str]:
8✔
147
    """
148
    Parse the Unicode data text and return a dictionary of alternate names.
149

150
    Args:
151
        text (str): The raw text of the Unicode data.
152
        show (bool): If True, display progress messages.
153

154
    Returns:
155
        dict[str, str]: A dictionary mapping characters to their alternate names.
156
    """
157
    alt_names: dict[str, str] = {}
8✔
158
    for line in text.splitlines():
8✔
159
        stripped_line = line.strip()
8✔
160
        if not stripped_line or stripped_line.startswith("#"):
8✔
UNCOV
161
            continue
×
162
        fields = stripped_line.split(";")
8✔
163
        if len(fields) < EXPECTED_MIN_FIELDS:
8✔
164
            echo(
8✔
165
                msg=MSG_WARNING_MALFORMED_LINE.format(line=stripped_line),
166
                style=lambda m: format_warning(m),
167
                stream=sys.stderr,
168
                show=show,
169
                log=True,
170
                log_method="warning",
171
            )
172
            continue
8✔
173
        code_hex = fields[0]
8✔
174
        alt_name = fields[ALT_NAME_INDEX].strip()
8✔
175
        if alt_name:
8✔
176
            try:
8✔
177
                char = chr(int(code_hex, 16))
8✔
178
                alt_names[char] = alt_name
8✔
179
            except ValueError as exc:
8✔
180
                echo(
8✔
181
                    msg=MSG_WARNING_INVALID_CODE.format(code_hex=code_hex, error=exc),
182
                    style=lambda m: format_warning(m),
183
                    stream=sys.stderr,
184
                    show=show,
185
                    log=True,
186
                    log_method="warning",
187
                )
188
    return alt_names
8✔
189

190

191
def load_alternate_names(*, show: bool = True, use_color: bool = True) -> dict[str, str]:
8✔
192
    """
193
    Load alternate names from UnicodeData.txt.
194

195
    Attempts to download the file if not found locally. Falls back to
196
    using the local version if available.
197

198
    Args:
199
        show (bool): If True, show progress messages to stderr.
200
        use_color (bool): If True, colorize output.
201

202
    Returns:
203
        dict[str, str]: Dictionary mapping characters to their alternate names.
204

205
    Raises:
206
        ValueError: If validation or download of data fails.
207
    """
208
    unicode_data_url = get_unicode_data_url()
8✔
209
    unicode_data_file = get_unicode_data_file()
8✔
210

211
    text = None
8✔
212
    if unicode_data_file.exists():
8✔
213
        text = load_unicode_data_from_file(unicode_data_file, show=show)
8✔
214

215
    if not text:
8✔
216
        success = download_and_cache_unicode_data(
8✔
217
            unicode_data_url,
218
            unicode_data_file,
219
            show=show,
220
            use_color=use_color,
221
        )
222
        if not success:
8✔
UNCOV
223
            return {}
×
224
        text = load_unicode_data_from_file(unicode_data_file, show=show)
8✔
225
        if not text:
8✔
UNCOV
226
            return {}
×
227

228
    return parse_unicode_data(text, show=show)
8✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc