• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

speedyk-005 / chunklet-py / 24798516591

22 Apr 2026 07:19PM UTC coverage: 90.606% (-0.2%) from 90.758%
24798516591

push

github

speedyk-005
refactor: remove redundant type hints from docstrings

- Strip (type) from Args/Returns where signature already has types
- Simplify Returns format to prose description
- Run clean_docstrings.py on src/chunklet (26 files)
- Add ExtractionState TypedDict for type safety (from earlier refactor)

1360 of 1501 relevant lines covered (90.61%)

3.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.4
/src/chunklet/common/path_utils.py
1
import errno
4✔
2
import mimetypes
4✔
3
import sys
4✔
4
from pathlib import Path
4✔
5

6
import regex as re
4✔
7
# charset_normalizer is lazy imported
8

9
from chunklet.common.validation import validate_input
4✔
10
from chunklet.exceptions import FileProcessingError
4✔
11

12

13
PATH_PATTERN = re.compile(
4✔
14
    r"""
15
    ^                                   # start of string
16
    (?:/|[\p{Lu}]:\\)?                 # optional root (Unix or Windows drive)
17
    (?:[\p{L}\p{N}_\-. ]+[/\\])*       # intermediate folders
18
    (?:[\p{L}\p{N}_\-. ])+             # file name (hidden or normal)
19
    (?:\.[\p{L}\p{N}]+)?               # optional extension
20
    $                                   # end of string
21
    """,
22
    re.VERBOSE,
23
)
24

25

26
def _is_binary_file(path: str | Path) -> bool:
4✔
27
    """
28
    Determine whether a file is binary or text.
29

30
    First tries to guess the file type based on its MIME type derived from
31
    the file extension. If MIME type is unavailable or ambiguous, reads the
32
    first 1024 bytes of the file and checks for null bytes (`b'\0'`), which
33
    indicate binary content.
34

35
    Args:
36
        path: Path to the file.
37

38
    Returns:
39
        True if the file is likely binary, False if text.
40
    """
41
    path = Path(path)
4✔
42
    mime_type, _ = mimetypes.guess_type(path)
4✔
43
    if mime_type:
4✔
44
        if mime_type.startswith("text"):
4✔
45
            return False
4✔
46
        if path.suffix.lower() == ".rtf":
4✔
47
            return False
4✔
48
        return True
4✔
49

50
    with open(path, "rb") as f:
×
51
        chunk = f.read(1024)
×
52
        return b"\0" in chunk
×
53

54

55
@validate_input
4✔
56
def is_path_like(text: str) -> bool:
4✔
57
    """
58
    Check if a string looks like a filesystem path (file or folder),
59
    including Unix/Windows paths, hidden files, and scripts without extensions.
60

61
    Args:
62
        text: text to check.
63

64
    Returns:
65
        True if string appears to be a filesystem path.
66

67
    Examples:
68
        >>> is_path_like("/home/user/document.txt")
69
        True
70
        >>> is_path_like("C:\\Users\\User\\file.pdf")
71
        True
72
        >>> is_path_like("folder/subfolder/script.sh")
73
        True
74
        >>> is_path_like(".hidden_file")
75
        True
76
        >>> is_path_like("no_extension_script")
77
        True
78
        >>> is_path_like("path/with/newline\\nchar")
79
        False
80
        >>> is_path_like("string_with_null_byte\\x00")
81
        False
82
    """
83
    if not text or "\n" in text or "\0" in text:
4✔
84
        return False
4✔
85
    if sys.platform == "win32" and any(c in text for c in '<>:"|?*'):
4✔
86
        return False
×
87

88
    try:
4✔
89
        # Attempt to call is_file() to trigger OS-level path validation,
90
        # especially for path length.
91
        Path(text).is_file()
4✔
92
    except OSError as e:
×
93
        # If an OSError occurs, check if it's specifically due to the name being too long.
94
        if e.errno == errno.ENAMETOOLONG:
×
95
            return False
×
96
        else:
97
            # For other OSErrors (e.g., permission denied, invalid characters not caught by initial checks),
98
            # we let the regex check proceed, as the focus is on structural validity, not existence or access.
99
            pass
×
100

101
    return bool(PATH_PATTERN.match(text))
4✔
102

103

104
@validate_input
4✔
105
def read_text_file(path: str | Path) -> str:
4✔
106
    """Read text file with automatic encoding detection.
107

108
    Args:
109
        path: File path to read.
110

111
    Returns:
112
        File content.
113

114
    Raises:
115
        FileProcessingError: If file cannot be read.
116
    """
117
    from charset_normalizer import from_path
4✔
118

119
    path = Path(path)
4✔
120

121
    if not path.exists():
4✔
122
        raise FileProcessingError(f"File does not exist: {path}")
4✔
123

124
    if _is_binary_file(path):
4✔
125
        raise FileProcessingError(f"Binary file not supported: {path}")
4✔
126

127
    match = from_path(str(path)).best()
4✔
128
    return str(match) if match else ""
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc