• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

speedyk-005 / chunklet-py / 24647245391

20 Apr 2026 03:37AM UTC coverage: 90.65% (-0.02%) from 90.671%
24647245391

push

github

speedyk-005
feat(document): update SECTION_BREAK_PATTERN with broader sectioning support

1367 of 1508 relevant lines covered (90.65%)

3.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.56
/src/chunklet/visualizer/visualizer.py
1
import json
4✔
2
import mimetypes
4✔
3
import traceback
4✔
4
from pathlib import Path
4✔
5
from typing import Callable
4✔
6

7
import aiofiles
4✔
8

9
try:
4✔
10
    import msgpack
4✔
11
    import uvicorn
4✔
12
    from charset_normalizer import detect
4✔
13
    from fastapi import FastAPI, File, Form, HTTPException, UploadFile
4✔
14
    from fastapi.responses import HTMLResponse, Response
4✔
15
    from fastapi.staticfiles import StaticFiles
4✔
16
except ImportError:  # pragma: no cover
17
    # Lambda placeholders prevent "None is not callable" errors when imports fail
18
    # This allows the module to be imported without dependencies, with proper error handling later
19
    msgpack = None
20
    uvicorn = None
21
    detect = None
22
    FastAPI = None
23
    UploadFile = None
24
    File = lambda x: x  # noqa: E731
25
    Form = lambda x: x  # noqa: E731
26
    HTTPException = None
27
    HTMLResponse = lambda x: x  # noqa: E731
28
    Response = lambda x: x  # noqa: E731
29
    StaticFiles = None
30

31
from chunklet.code_chunker import CodeChunker
4✔
32
from chunklet.common.validation import validate_input
4✔
33
from chunklet.document_chunker import DocumentChunker
4✔
34

35

36
class Visualizer:
4✔
37
    """A FastAPI-based web interface for visualizing document and code chunks.
38

39
    This server allows users to upload text or code files, processes them with
40
    Chunklet's `DocumentChunker` or `CodeChunker`, and returns the chunked
41
    data along with statistics. A minimal frontend interface is served at the
42
    root endpoint.
43

44
    Attributes:
45
        host (str): Host IP to bind the FastAPI server.
46
        port (int): Port number to run the server on.
47
        document_chunker (DocumentChunker): Chunklet document chunker instance.
48
        code_chunker (CodeChunker): Chunklet code chunker instance.
49
        app (FastAPI): FastAPI application instance.
50
    """
51

52
    @validate_input
4✔
53
    def __init__(
4✔
54
        self,
55
        host: str = "127.0.0.1",
56
        port: int = 8000,
57
        token_counter: Callable[[str], int] | None = None,
58
    ):
59
        """Initializes the Visualizer server and configures chunkers.
60

61
        Args:
62
            host (str): Host IP to run the server. Defaults to "127.0.0.1".
63
            port (int): Port number to run the server. Defaults to 8000.
64
            token_counter (Optional[Callable[[str], int]]): Function to count tokens
65
                in text/code. Required for chunkers if used with `max_tokens`.
66
        """
67
        if FastAPI is None:
4✔
68
            raise ImportError(
×
69
                "The 'fastapi' library is not installed. "
70
                "Please install it with 'pip install fastapi>=0.115.12' or install the visualization extras "
71
                "with 'pip install 'chunklet-py[visualization]''"
72
            )
73

74
        if msgpack is None:
4✔
75
            raise ImportError(
×
76
                "The 'msgpack' library is not installed. "
77
                "Please install it with 'pip install msgpack>=1.0.8' or install the visualization extras "
78
                "with 'pip install 'chunklet-py[visualization]''"
79
            )
80

81
        self.host = host
4✔
82
        self.port = port
4✔
83
        self._token_counter = token_counter
4✔
84

85
        self.app = FastAPI()
4✔
86

87
        # Initialize chunkers
88
        self.document_chunker = DocumentChunker(token_counter=token_counter)
4✔
89
        self.code_chunker = CodeChunker(token_counter=token_counter)
4✔
90

91
        self.static_dir = Path(__file__).parent / "static"
4✔
92

93
        self.app.mount(
4✔
94
            "/static", StaticFiles(directory=str(self.static_dir)), name="static"
95
        )
96

97
        # API endpoints
98
        self.app.get("/api/token_counter_status")(self._get_token_counter_status)
4✔
99
        self.app.get("/health")(self._get_health_check)
4✔
100
        self.app.get("/")(self._get_index)
4✔
101
        self.app.post("/api/chunk")(self._chunk_file)
4✔
102

103
    # Instance endpoint methods
104
    def _get_token_counter_status(self):
4✔
105
        return {"token_counter_available": self.token_counter is not None}
4✔
106

107
    def _get_health_check(self):
4✔
108
        """Health check endpoint for testing."""
109
        return {"status": "healthy"}
4✔
110

111
    async def _get_index(self):
4✔
112
        """Serves the main HTML interface for the visualizer.
113

114
        Returns:
115
            HTMLResponse: The content of index.html if exists, else a default heading.
116
        """
117
        index_path = self.static_dir / "index.html"
×
118
        if index_path.exists():
×
119
            async with aiofiles.open(index_path, "r", encoding="utf-8") as f:
×
120
                content = await f.read()
×
121
                return HTMLResponse(content=content)
×
122
        return HTMLResponse(content="<h1>Text Chunk Visualizer</h1>")
×
123

124
    @validate_input
4✔
125
    async def _chunk_file(
4✔
126
        self,
127
        file: UploadFile = File(...),
128
        mode: str = Form("document"),
129
        params: str = Form("{}"),
130
    ) -> Response:
131
        """Processes an uploaded file and returns chunked output.
132

133
        Args:
134
            self: The Visualizer instance.
135
            file (UploadFile): File uploaded by the client.
136
            mode (str): Determines which chunker to use ("document" or "code").
137
            params (str): JSON string containing chunking parameters.
138

139
        Returns:
140
            Response: MessagePack-encoded response with original text, chunks, and stats.
141

142
        Raises:
143
            HTTPException: If chunking fails.
144
        """
145
        # Parse params JSON and filter out None values
146
        try:
4✔
147
            chunker_params = json.loads(params)
4✔
148
            chunker_params = {k: v for k, v in chunker_params.items() if v is not None}
4✔
149
        except (json.JSONDecodeError, TypeError):
×
150
            raise HTTPException(
×
151
                400, f"Invalid chunking parameters JSON: {params}"
152
            ) from None
153

154
        # Use Python mimetypes instead of browser content_type
155
        mimetype, _ = mimetypes.guess_type(file.filename or "")
4✔
156
        if not mimetype or not mimetype.startswith("text/"):
4✔
157
            raise HTTPException(400, "Only text files are supported.")
4✔
158

159
        if detect is None:
4✔
160
            raise HTTPException(
×
161
                400,
162
                "charset-normalizer library is not available. Please install visualization dependencies."
163
                "with 'pip install 'chunklet-py[visualization]''",
164
            )
165

166
        content = await file.read()
4✔
167
        encoding = detect(content).get("encoding", "utf-8")
4✔
168
        text = content.decode(encoding, errors="ignore")
4✔
169
        chunker = self.code_chunker if mode == "code" else self.document_chunker
4✔
170

171
        try:
4✔
172
            chunks = [
4✔
173
                dict(chunk) for chunk in chunker.chunk_text(text, **chunker_params)
174
            ]
175

176
            response_data = {
4✔
177
                "text": text,
178
                "chunks": chunks,
179
                "stats": {
180
                    "text_length": len(text),
181
                    "chunk_count": len(chunks),
182
                    "mode": mode,
183
                },
184
            }
185

186
            return Response(
4✔
187
                content=msgpack.packb(response_data, use_bin_type=True),
188
                media_type="application/msgpack",
189
            )
190

191
        except Exception as e:
×
192
            traceback.print_exc()
×
193
            raise HTTPException(
×
194
                500,
195
                f"Chunking failed. Please check the server terminal for specific error details. ({str(e)})",
196
            ) from None
197

198
    @property
4✔
199
    def token_counter(self):
4✔
200
        """Get the current token counter function."""
201
        return self._token_counter
4✔
202

203
    @token_counter.setter
4✔
204
    @validate_input
4✔
205
    def token_counter(self, value):
4✔
206
        """Set the token counter and update both chunkers."""
207
        self._token_counter = value
4✔
208
        self.document_chunker.token_counter = value
4✔
209
        self.code_chunker.token_counter = value
4✔
210

211
    def serve(self):
4✔
212
        """Starts the FastAPI server and prints the server URL."""
213
        if uvicorn is None:
4✔
214
            raise ImportError(
×
215
                "The 'uvicorn' library is not installed. "
216
                "Please install it with 'pip install uvicorn>=0.34.0' or install the visualization extras "
217
                "with 'pip install 'chunklet-py[visualization]''"
218
            )
219

220
        print(" =" * 20)
4✔
221
        print("\nTEXT CHUNK VISUALIZER")
4✔
222
        print("= " * 20)
4✔
223
        print(f"URL: http://{self.host}:{self.port}")
4✔
224

225
        uvicorn.run(
4✔
226
            self.app,
227
            host=self.host,
228
            port=self.port,
229
            access_log=False,
230
        )
231

232

233
if __name__ == "__main__":  # pragma: no cover
234
    visualizer = Visualizer()
235
    visualizer.serve()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc