• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

speedyk-005 / chunklet-py / 20378511984

19 Dec 2025 06:09PM UTC coverage: 86.588% (+4.8%) from 81.75%
20378511984

Pull #7

github

web-flow
Merge 81717401a into aeb37fd6a
Pull Request #7: Merge develop branch to main

464 of 550 new or added lines in 17 files covered. (84.36%)

1 existing line in 1 file now uncovered.

1317 of 1521 relevant lines covered (86.59%)

4.33 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.26
/src/chunklet/visualizer/visualizer.py
1
import os
5✔
2
import json
5✔
3
import tempfile
5✔
4
import traceback
5✔
5
import mimetypes
5✔
6
from typing import Callable
5✔
7

8
try:
5✔
9
    import uvicorn
5✔
10
    from charset_normalizer import detect
5✔
11
    from fastapi import FastAPI, UploadFile, File, HTTPException, Form
5✔
12
    from fastapi.responses import HTMLResponse
5✔
13
    from fastapi.staticfiles import StaticFiles
5✔
NEW
14
except ImportError:
×
NEW
15
    uvicorn = None
×
NEW
16
    detect = None
×
NEW
17
    FastAPI = None
×
NEW
18
    FastAPI = None
×
NEW
19
    UploadFile = None
×
NEW
20
    File = None
×
NEW
21
    HTTPException = None
×
NEW
22
    Form = None
×
23

24
from chunklet.document_chunker import DocumentChunker
5✔
25
from chunklet.code_chunker import CodeChunker
5✔
26
from chunklet.common.validation import validate_input
5✔
27

28

29
class Visualizer:
5✔
30
    """A FastAPI-based web interface for visualizing document and code chunks.
31

32
    This server allows users to upload text or code files, processes them with
33
    Chunklet's `DocumentChunker` or `CodeChunker`, and returns the chunked
34
    data along with statistics. A minimal frontend interface is served at the
35
    root endpoint.
36

37
    Attributes:
38
        host (str): Host IP to bind the FastAPI server.
39
        port (int): Port number to run the server on.
40
        document_chunker (DocumentChunker): Chunklet document chunker instance.
41
        code_chunker (CodeChunker): Chunklet code chunker instance.
42
        app (FastAPI): FastAPI application instance.
43
    """
44

45
    @validate_input
5✔
46
    def __init__(
5✔
47
        self,
48
        host: str = "127.0.0.1",
49
        port: int = 8000,
50
        token_counter: Callable[[str], int] | None = None,
51
    ):
52
        """Initializes the Visualizer server and configures chunkers.
53

54
        Args:
55
            host (str): Host IP to run the server. Defaults to "127.0.0.1".
56
            port (int): Port number to run the server. Defaults to 8000.
57
            token_counter (Optional[Callable[[str], int]]): Function to count tokens
58
                in text/code. Required for chunkers if used with `max_tokens`.
59
        """
60
        if FastAPI is None:
5✔
NEW
61
            raise ImportError(
×
62
                "The 'fastapi' library is not installed. "
63
                "Please install it with 'pip install fastapi>=0.115.12' or install the visualization extras "
64
                "with 'pip install 'chunklet-py[visualization]''"
65
            )
66

67
        self.host = host
5✔
68
        self.port = port
5✔
69
        self._token_counter = token_counter
5✔
70

71
        self.app = FastAPI()
5✔
72

73
        # Initialize chunkers
74
        self.document_chunker = DocumentChunker(token_counter=token_counter)
5✔
75
        self.code_chunker = CodeChunker(token_counter=token_counter)
5✔
76

77
        base_dir = os.path.dirname(os.path.abspath(__file__))
5✔
78
        static_dir = os.path.join(base_dir, "static")
5✔
79

80
        self.app.mount("/static", StaticFiles(directory=static_dir), name="static")
5✔
81

82
        # API endpoints
83
        self.app.get("/api/token_counter_status")(self._get_token_counter_status)
5✔
84
        self.app.get("/health")(self._get_health_check)
5✔
85
        self.app.get("/")(self._get_index)
5✔
86
        self.app.post("/api/chunk")(self._chunk_file)
5✔
87

88
    # Instance endpoint methods
89
    async def _get_token_counter_status(self):
5✔
90
        return {"token_counter_available": self.token_counter is not None}
5✔
91

92
    async def _get_health_check(self):
5✔
93
        """Health check endpoint for testing."""
94
        return {"status": "healthy"}
5✔
95

96
    async def _get_index(self):
5✔
97
        """Serves the main HTML interface for the visualizer.
98

99
        Returns:
100
            HTMLResponse: The content of index.html if exists, else a default heading.
101
        """
NEW
102
        base_dir = os.path.dirname(os.path.abspath(__file__))
×
NEW
103
        static_dir = os.path.join(base_dir, "static")
×
NEW
104
        index_path = os.path.join(static_dir, "index.html")
×
NEW
105
        if os.path.exists(index_path):
×
NEW
106
            with open(index_path, "r", encoding="utf-8") as f:
×
NEW
107
                return HTMLResponse(content=f.read())
×
NEW
108
        return HTMLResponse(content="<h1>Text Chunk Visualizer</h1>")
×
109

110
    @validate_input
5✔
111
    async def _chunk_file(
5✔
112
        self,
113
        file: UploadFile = File(...),
114
        mode: str = Form("document"),
115
        params: str = Form("{}"),
116
    ) -> dict:
117
        """Processes an uploaded file and returns chunked output.
118

119
        Args:
120
            self: The Visualizer instance.
121
            file (UploadFile): File uploaded by the client.
122
            mode (str): Determines which chunker to use ("document" or "code").
123
            params (str): JSON string containing chunking parameters.
124

125
        Returns:
126
            dict: Contains original text, chunked content, and statistics.
127

128
        Raises:
129
            HTTPException: If chunking fails.
130
        """
131
        # Parse params JSON and filter out None values
132
        try:
5✔
133
            chunker_params = json.loads(params)
5✔
134
            chunker_params = {k: v for k, v in chunker_params.items() if v is not None}
5✔
NEW
135
        except (json.JSONDecodeError, TypeError):
×
NEW
136
            raise HTTPException(400, f"Invalid chunking parameters JSON: {params}")
×
137

138
        print(f"Processing: {file.filename} in {mode} mode")
5✔
139

140
        # Use Python mimetypes instead of browser content_type
141
        mimetype, _ = mimetypes.guess_type(file.filename or "")
5✔
142
        if not mimetype or not mimetype.startswith("text/"):
5✔
143
            raise HTTPException(400, "Only text files are supported.")
5✔
144

145
        if detect is None:
5✔
NEW
146
            raise HTTPException(
×
147
                400,
148
                "charset-normalizer library is not available. Please install visualization dependencies."
149
                "with 'pip install 'chunklet-py[visualization]''",
150
            )
151

152
        # Saved as txt file since they are all plaintext anyway
153
        with tempfile.NamedTemporaryFile(mode="wb", suffix=".txt", delete=False) as tmp:
5✔
154
            content = await file.read()
5✔
155
            tmp.write(content)
5✔
156
            tmp_path = tmp.name
5✔
157

158
        encoding = detect(content).get("encoding", "utf-8")
5✔
159
        text = content.decode(encoding, errors="ignore")
5✔
160
        chunker = self.code_chunker if mode == "code" else self.document_chunker
5✔
161

162
        try:
5✔
163
            chunks = [
5✔
164
                dict(chunk) for chunk in chunker.chunk(tmp_path, **chunker_params)
165
            ]
166

167
            return {
5✔
168
                "text": text,
169
                "chunks": chunks,
170
                "stats": {
171
                    "text_length": len(text),
172
                    "chunk_count": len(chunks),
173
                    "mode": mode,
174
                },
175
            }
176

NEW
177
        except Exception as e:
×
NEW
178
            traceback.print_exc()
×
NEW
179
            raise HTTPException(
×
180
                500,
181
                f"Chunking failed. Please check the server terminal for specific error details. ({str(e)})",
182
            )
183
        finally:
184
            # Always cleanup temp file
185
            try:
5✔
186
                os.unlink(tmp_path)
5✔
NEW
187
            except OSError:
×
NEW
188
                pass
×
189

190
    @property
5✔
191
    def token_counter(self):
5✔
192
        """Get the current token counter function."""
193
        return self._token_counter
5✔
194

195
    @token_counter.setter
5✔
196
    @validate_input
5✔
197
    def token_counter(self, value):
5✔
198
        """Set the token counter and update both chunkers."""
199
        self._token_counter = value
5✔
200
        self.document_chunker.token_counter = value
5✔
201
        self.code_chunker.token_counter = value
5✔
202

203
    def serve(self):
5✔
204
        """Starts the FastAPI server and prints the server URL."""
205
        if uvicorn is None:
5✔
NEW
206
            raise ImportError(
×
207
                "The 'uvicorn' library is not installed. "
208
                "Please install it with 'pip install uvicorn>=0.34.0' or install the visualization extras "
209
                "with 'pip install 'chunklet-py[visualization]''"
210
            )
211

212
        print(" =" * 20)
5✔
213
        print("\nTEXT CHUNK VISUALIZER")
5✔
214
        print("= " * 20)
5✔
215
        print(f"URL: http://{self.host}:{self.port}")
5✔
216

217
        uvicorn.run(self.app, host=self.host, port=self.port)
5✔
218

219

220
if __name__ == "__main__":  # pragma: no cover
221
    visualizer = Visualizer()
222
    visualizer.serve()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc