• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

speedyk-005 / chunklet-py / 24647245391

20 Apr 2026 03:37AM UTC coverage: 90.65% (-0.02%) from 90.671%
24647245391

push

github

speedyk-005
feat(document): update SECTION_BREAK_PATTERN with broader sectioning support

1367 of 1508 relevant lines covered (90.65%)

3.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.4
/src/chunklet/common/path_utils.py
1
import errno
4✔
2
import mimetypes
4✔
3
import sys
4✔
4
from pathlib import Path
4✔
5

6
import regex as re
4✔
7

8
# charset_normalizer is lazy imported
9

10
from chunklet.common.validation import validate_input
4✔
11
from chunklet.exceptions import FileProcessingError
4✔
12

13

14
# Pattern to check if source args provided in the chunk method is a path
15
PATH_PATTERN = re.compile(
4✔
16
    r"""
17
    ^                                   # start of string
18
    (?:/|[\p{Lu}]:\\)?                 # optional root (Unix or Windows drive)
19
    (?:[\p{L}\p{N}_\-. ]+[/\\])*       # intermediate folders
20
    (?:[\p{L}\p{N}_\-. ])+             # file name (hidden or normal)
21
    (?:\.[\p{L}\p{N}]+)?               # optional extension
22
    $                                   # end of string
23
    """,
24
    re.VERBOSE,
25
)
26

27

28
def _is_binary_file(path: str | Path) -> bool:
4✔
29
    """
30
    Determine whether a file is binary or text.
31

32
    First tries to guess the file type based on its MIME type derived from
33
    the file extension. If MIME type is unavailable or ambiguous, reads the
34
    first 1024 bytes of the file and checks for null bytes (`b'\0'`), which
35
    indicate binary content.
36

37
    Args:
38
        path (str | Path): Path to the file.
39

40
    Returns:
41
        bool: True if the file is likely binary, False if text.
42
    """
43
    path = Path(path)
4✔
44
    mime_type, _ = mimetypes.guess_type(path)
4✔
45
    if mime_type:
4✔
46
        if mime_type.startswith("text"):
4✔
47
            return False
4✔
48
        if path.suffix.lower() == ".rtf":
4✔
49
            return False
4✔
50
        return True
4✔
51

52
    with open(path, "rb") as f:
×
53
        chunk = f.read(1024)
×
54
        return b"\0" in chunk
×
55

56

57
@validate_input
4✔
58
def is_path_like(text: str) -> bool:
4✔
59
    """
60
    Check if a string looks like a filesystem path (file or folder),
61
    including Unix/Windows paths, hidden files, and scripts without extensions.
62

63
    Args:
64
        text (str): text to check.
65

66
    Returns:
67
        bool: True if string appears to be a filesystem path.
68

69
    Examples:
70
        >>> is_path_like("/home/user/document.txt")
71
        True
72
        >>> is_path_like("C:\\Users\\User\\file.pdf")
73
        True
74
        >>> is_path_like("folder/subfolder/script.sh")
75
        True
76
        >>> is_path_like(".hidden_file")
77
        True
78
        >>> is_path_like("no_extension_script")
79
        True
80
        >>> is_path_like("path/with/newline\\nchar")
81
        False
82
        >>> is_path_like("string_with_null_byte\\x00")
83
        False
84
    """
85
    if not text or "\n" in text or "\0" in text:
4✔
86
        return False
4✔
87
    if sys.platform == "win32" and any(c in text for c in '<>:"|?*'):
4✔
88
        return False
×
89

90
    try:
4✔
91
        # Attempt to call is_file() to trigger OS-level path validation,
92
        # especially for path length.
93
        Path(text).is_file()
4✔
94
    except OSError as e:
×
95
        # If an OSError occurs, check if it's specifically due to the name being too long.
96
        if e.errno == errno.ENAMETOOLONG:
×
97
            return False
×
98
        else:
99
            # For other OSErrors (e.g., permission denied, invalid characters not caught by initial checks),
100
            # we let the regex check proceed, as the focus is on structural validity, not existence or access.
101
            pass
×
102

103
    return bool(PATH_PATTERN.match(text))
4✔
104

105

106
@validate_input
4✔
107
def read_text_file(path: str | Path) -> str:
4✔
108
    """Read text file with automatic encoding detection.
109

110
    Args:
111
        path: File path to read.
112

113
    Returns:
114
        str: File content.
115

116
    Raises:
117
        FileProcessingError: If file cannot be read.
118
    """
119
    from charset_normalizer import from_path
4✔
120

121
    path = Path(path)
4✔
122

123
    if not path.exists():
4✔
124
        raise FileProcessingError(f"File does not exist: {path}")
4✔
125

126
    if _is_binary_file(path):
4✔
127
        raise FileProcessingError(f"Binary file not supported: {path}")
4✔
128

129
    match = from_path(str(path)).best()
4✔
130
    return str(match) if match else ""
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc