• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

speedyk-005 / chunklet-py / 20396856167

20 Dec 2025 04:12PM UTC coverage: 87.366% (+5.6%) from 81.75%
20396856167

push

github

speedyk-005
fix(ci): resolve Coveralls 422 error

- Switch to GITHUB_TOKEN for seamless Coveralls authentication
- Remove manual --service flag to allow auto-detection
- Set explicit job permissions for status reporting

1307 of 1496 relevant lines covered (87.37%)

3.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.01
/src/chunklet/code_chunker/code_chunker.py
1
"""
2
Author: Speedyk-005 | Copyright (c) 2025 | License: MIT
3

4
Language-Agnostic Code Chunking Utility
5

6
This module provides a robust, convention-aware engine for segmenting source code into
7
semantic units ("chunks") such as functions, classes, namespaces, and logical blocks.
8
Unlike purely heuristic or grammar-dependent parsers, the `CodeChunker` relies on
9
anchored, multi-language regex patterns and indentation rules to identify structures
10
consistently across a variety of programming languages.
11

12
Limitations
13
-----------
14
`CodeChunker` assumes syntactically conventional code. Highly obfuscated, minified,
15
or macro-generated sources may not fully respect its boundary patterns, though such
16
cases fall outside its intended domain.
17

18
Inspired by:
19
    - Camel.utils.chunker.CodeChunker (@ CAMEL-AI.org)
20
    - code-chunker by JimAiMoment
21
    - whats_that_code by matthewdeanmartin
22
    - CintraAI Code Chunker
23
"""
24

25
import sys
4✔
26
from pathlib import Path
4✔
27
from typing import Any, Literal, Callable, Generator, Annotated
4✔
28
from functools import partial
4✔
29
from itertools import chain
4✔
30

31
from more_itertools import unique_everseen
4✔
32
from pydantic import Field
4✔
33
from box import Box
4✔
34

35
try:
4✔
36
    from charset_normalizer import from_path
4✔
37
    from littletree import Node
4✔
38
    import defusedxml.ElementTree as ET
4✔
39
except ImportError:
×
40
    from_path, Node, ET = None, None, None
×
41

42
from loguru import logger
4✔
43

44
from chunklet.base_chunker import BaseChunker
4✔
45
from chunklet.code_chunker._code_structure_extractor import CodeStructureExtractor
4✔
46
from chunklet.common.path_utils import is_path_like
4✔
47
from chunklet.common.batch_runner import run_in_batch
4✔
48
from chunklet.common.validation import validate_input, restricted_iterable
4✔
49
from chunklet.common.token_utils import count_tokens
4✔
50
from chunklet.exceptions import (
4✔
51
    InvalidInputError,
52
    MissingTokenCounterError,
53
    TokenLimitError,
54
)
55

56

57
class CodeChunker(BaseChunker):
4✔
58
    """
59
    Language-agnostic code chunking utility for semantic code segmentation.
60

61
    Extracts structural units (functions, classes, namespaces) from source code
62
    across multiple programming languages using pattern-based detection and
63
    token-aware segmentation.
64

65
    Key Features:
66
        - Cross-language support (Python, C/C++, Java, C#, JavaScript, Go, etc.)
67
        - Structural analysis with namespace hierarchy tracking
68
        - Configurable token limits with strict/lenient overflow handling
69
        - Flexible docstring and comment processing modes
70
        - Accurate line number preservation and source tracking
71
        - Parallel batch processing for multiple files
72
        - Comprehensive logging and progress tracking
73
    """
74

75
    @validate_input
4✔
76
    def __init__(
4✔
77
        self,
78
        verbose: bool = False,
79
        token_counter: Callable[[str], int] | None = None,
80
    ):
81
        """
82
        Initialize the CodeChunker with optional token counter and verbosity control.
83

84
        Args:
85
            verbose (bool): Enable verbose logging.
86
            token_counter (Callable[[str], int] | None): Function that counts tokens in text.
87
                If None, must be provided when calling chunk() methods.
88
        """
89
        self.token_counter = token_counter
4✔
90
        self._verbose = verbose
4✔
91
        self.extractor = CodeStructureExtractor(verbose=self._verbose)
4✔
92

93
    @property
4✔
94
    def verbose(self) -> bool:
4✔
95
        """Get the verbose setting."""
96
        return self._verbose
4✔
97

98
    @verbose.setter
4✔
99
    def verbose(self, value: bool) -> None:
4✔
100
        """Set the verbose setting and propagate to the extractor."""
101
        self._verbose = value
×
102
        self.extractor.verbose = value
×
103

104
    def _merge_tree(self, relations_list: list[list]) -> str:
4✔
105
        """
106
        Merges multiple sets of parent-child relation dictionaries into a single tree
107
        then returns its string representation.
108

109
        Args:
110
            relations_list (list[list]): A list containing relation lists.
111

112
        Returns:
113
            str: The string representation of the tree
114
        """
115
        if not relations_list:
4✔
116
            return "global"
×
117

118
        # Flatten the set of lists into a single iterable
119
        all_relations_flat = chain.from_iterable(relations_list)
4✔
120

121
        # Deduplicate relations
122
        def relation_key(relation: dict):
4✔
123
            return tuple(sorted(relation.items()))
4✔
124

125
        unique_relations = list(unique_everseen(all_relations_flat, key=relation_key))
4✔
126

127
        if not unique_relations:
4✔
128
            return "global"
4✔
129

130
        merged_tree = Node.from_relations(unique_relations, root="global")
4✔
131

132
        return merged_tree.to_string()
4✔
133

134
    def _split_oversized(
4✔
135
        self,
136
        snippet_dict: dict,
137
        max_tokens: int,
138
        max_lines: int,
139
        source: str | Path,
140
        token_counter: Callable | None,
141
        cumulative_lengths: tuple[int, ...],
142
    ):
143
        """
144
        Split an oversized structural block into smaller sub-chunks.
145

146
        This helper is used when a single code block exceeds the maximum
147
        token limit and `strict_mode` is disabled. It divides the block's
148
        content into token-bounded fragments while preserving line order
149
        and basic metadata.
150

151
        Args:
152
            snippet_dict (dict): The oversized snippet to split.
153
            max_tokens (int): Maximum tokens per sub-chunk.
154
            max_lines (int): Maximum lines per sub-chunk.
155
            source (str | Path): The source of the code.
156
            token_counter (Callable | None): The token counting function.
157
            cumulative_lengths (tuple[int, ...]): The cumulative lengths of the lines in the source code.
158

159
        Returns:
160
            list[Box]: A list of sub-chunks derived from the original block.
161
        """
162
        sub_boxes = []
4✔
163
        curr_chunk = []
4✔
164
        token_count = 0
4✔
165
        line_count = 0
4✔
166

167
        # Iterate through each line in the snippet_dict content
168
        for line_no, line in enumerate(
4✔
169
            snippet_dict["content"].splitlines(), start=snippet_dict["start_line"]
170
        ):
171
            line_tokens = (
4✔
172
                count_tokens(line, token_counter) if max_tokens != sys.maxsize else 0
173
            )
174

175
            # If adding this line would exceed either max_tokens or max_lines, commit current chunk
176
            if (token_count + line_tokens > max_tokens) or (line_count + 1 > max_lines):
4✔
177
                start_line = line_no - len(curr_chunk)
4✔
178
                end_line = line_no - 1
4✔
179
                start_span = cumulative_lengths[start_line - 1]
4✔
180
                end_span = cumulative_lengths[end_line]
4✔
181
                tree = Node.from_relations(snippet_dict["relations"]).to_string()
4✔
182
                sub_boxes.append(
4✔
183
                    Box(
184
                        {
185
                            "content": "\n".join(curr_chunk),
186
                            "metadata": {
187
                                "tree": tree,
188
                                "start_line": start_line,
189
                                "end_line": end_line,
190
                                "span": (start_span, end_span),
191
                                "source": (
192
                                    str(source)
193
                                    if isinstance(source, Path)
194
                                    or (
195
                                        isinstance(source, str) and is_path_like(source)
196
                                    )
197
                                    else "N/A"
198
                                ),
199
                            },
200
                        }
201
                    )
202
                )
203
                curr_chunk = [line]  # Add the overflow line!
4✔
204
                token_count = 0
4✔
205
                line_count = 0
4✔
206

207
            curr_chunk.append(line)
4✔
208
            token_count += line_tokens
4✔
209
            line_count += 1
4✔
210

211
        # Add any remaining chunk at the end
212
        if curr_chunk:
4✔
213
            start_line = snippet_dict["end_line"] - len(curr_chunk) + 1
4✔
214
            end_line = snippet_dict["end_line"]
4✔
215
            start_span = cumulative_lengths[start_line - 1]
4✔
216
            end_span = cumulative_lengths[end_line]
4✔
217
            tree = Node.from_relations(snippet_dict["relations"]).to_string()
4✔
218
            sub_boxes.append(
4✔
219
                Box(
220
                    {
221
                        "content": "\n".join(curr_chunk),
222
                        "metadata": {
223
                            "tree": tree,
224
                            "start_line": start_line,
225
                            "end_line": end_line,
226
                            "span": (start_span, end_span),
227
                            "source": (
228
                                str(source)
229
                                if (isinstance(source, Path) or is_path_like(source))
230
                                else "N/A"
231
                            ),
232
                        },
233
                    }
234
                )
235
            )
236

237
        return sub_boxes
4✔
238

239
    def _format_limit_msg(
4✔
240
        self,
241
        box_tokens: int,
242
        max_tokens: int,
243
        box_lines: int,
244
        max_lines: int,
245
        function_count: int,
246
        max_functions: int,
247
        content_preview: str,
248
    ) -> str:
249
        """
250
        Format a limit exceeded error message, only including limits that are not sys.maxsize.
251

252
        Args:
253
            box_tokens: Actual token count in the block
254
            max_tokens: Maximum allowed tokens
255
            box_lines: Actual line count in the block
256
            max_lines: Maximum allowed lines
257
            function_count: Actual function count in the block
258
            max_functions: Maximum allowed functions
259
            content_preview: Preview of the content that exceeded limits
260

261
        Returns:
262
            Formatted error message with applicable limits
263
        """
264
        limits = []
4✔
265

266
        if max_tokens != sys.maxsize:
4✔
267
            limits.append(f"tokens: {box_tokens} > {max_tokens}")
4✔
268
        if max_lines != sys.maxsize:
4✔
269
            limits.append(f"lines: {box_lines} > {max_lines}")
4✔
270
        if max_functions != sys.maxsize:
4✔
271
            limits.append(f"functions: {function_count} > {max_functions}")
4✔
272
        
273
        return (
4✔
274
            f"Limits: {', '.join(limits)}\n"
275
            f"Content starting with: \n```\n{content_preview}...\n```" 
276
        )
277
        
278
    def _group_by_chunk(
4✔
279
        self,
280
        snippet_dicts: list[dict],
281
        cumulative_lengths: tuple[int, ...],
282
        token_counter: Callable[[str], int] | None,
283
        max_tokens: int,
284
        max_lines: int,
285
        max_functions: int,
286
        strict: bool,
287
        source: str | Path,
288
    ) -> list[Box]:
289
        """
290
        Group code snippets into chunks based on specified constraints.
291

292
        Iteratively merges snippets into chunks while respecting token, line, and function limits.
293
        Handles oversized snippets by splitting them if strict mode is disabled.
294

295
        Args:
296
            snippet_dicts (list[dict]): List of extracted code snippet dictionaries.
297
            cumulative_lengths (tuple[int, ...]): Cumulative character lengths for span calculation.
298
            token_counter (Callable[[str], int] | None): Function to count tokens in text.
299
            max_tokens (int): Maximum tokens per chunk.
300
            max_lines (int): Maximum lines per chunk.
301
            max_functions (int): Maximum functions per chunk.
302
            strict (bool): If True, raise error on oversized snippets; if False, split them.
303
            source (str | Path): Original source for metadata.
304

305
        Returns:
306
            list[Box]: List of chunk boxes with content and metadata.
307
        """
308
        source = (
4✔
309
            str(source) if (isinstance(source, Path) or is_path_like(source)) else "N/A"
310
        )
311

312
        merged_content = []
4✔
313
        relations_list = []
4✔
314
        start_line = None
4✔
315
        end_line = None
4✔
316
        token_count = 0
4✔
317
        line_count = 0
4✔
318
        function_count = 0
4✔
319
        result_chunks = []
4✔
320

321
        index = 0
4✔
322
        while index < len(snippet_dicts):
4✔
323
            snippet_dict = snippet_dicts[index]
4✔
324
            box_tokens = (
4✔
325
                count_tokens(snippet_dict["content"], token_counter)
326
                if max_tokens != sys.maxsize
327
                else 0
328
            )
329
            box_lines = snippet_dict["content"].count("\n") + bool(
4✔
330
                snippet_dict["content"]
331
            )
332
            is_function = bool(snippet_dict.get("func_partial_signature"))
4✔
333

334
            # Check if adding this snippet exceeds any limits
335
            token_limit_reached = token_count + box_tokens > max_tokens
4✔
336
            line_limit_reached = line_count + box_lines > max_lines
4✔
337
            function_limit_reached = is_function and (
4✔
338
                function_count + 1 > max_functions
339
            )
340

341
            if not (
4✔
342
                token_limit_reached or line_limit_reached or function_limit_reached
343
            ):
344
                # Fits: merge normally
345
                merged_content.append(snippet_dict["content"])
4✔
346
                relations_list.append(snippet_dict["relations"])
4✔
347
                token_count += box_tokens
4✔
348
                line_count += box_lines
4✔
349
                if is_function:
4✔
350
                    function_count += 1
4✔
351

352
                if start_line is None:
4✔
353
                    start_line = snippet_dict["start_line"]
4✔
354
                end_line = snippet_dict["end_line"]
4✔
355
                index += 1
4✔
356

357
            elif not merged_content:
4✔
358
                # Too big and nothing merged yet: handle oversize
359
                limit_msg = self._format_limit_msg(
4✔
360
                    box_tokens,
361
                    max_tokens,
362
                    box_lines,
363
                    max_lines,
364
                    function_count,
365
                    max_functions,
366
                    snippet_dict["content"][:100],
367
                )
368
                if strict:
4✔
369
                    raise TokenLimitError(
4✔
370
                        f"Structural block exceeds maximum limit.\n{limit_msg}\n"
371
                        "Reason: Prevent splitting inside interest points (function, class, region, ...)\n"
372
                        "💡Hint: Consider increasing 'max_tokens', 'max_lines', or 'max_functions', "
373
                        "refactoring the oversized block, or setting 'strict=False' to allow automatic splitting of oversized blocks."
374
                    )
375
                else:  # Else split further
376
                    logger.warning(
4✔
377
                        "Splitting oversized block into sub-chunks.\n(%s)",
378
                        limit_msg,
379
                    )
380

381
                    sub_chunks = self._split_oversized(
4✔
382
                        snippet_dict,
383
                        max_tokens,
384
                        max_lines,
385
                        source,
386
                        token_counter,
387
                        cumulative_lengths,
388
                    )
389

390
                    for sub_chunk in sub_chunks:
4✔
391
                        sub_chunk.metadata.chunk_num = len(result_chunks) + 1
4✔
392
                        result_chunks.append(sub_chunk)
4✔
393
                    index += 1
4✔
394
            else:
395
                # Flush current merged content as a chunk
396
                start_span = cumulative_lengths[start_line - 1]
4✔
397
                end_span = cumulative_lengths[end_line]
4✔
398
                merged_chunk = Box(
4✔
399
                    {
400
                        "content": "\n".join(merged_content),
401
                        "metadata": {
402
                            "chunk_num": len(result_chunks) + 1,
403
                            "tree": self._merge_tree(relations_list),
404
                            "start_line": start_line,
405
                            "end_line": end_line,
406
                            "span": (start_span, end_span),
407
                            "source": source,
408
                        },
409
                    }
410
                )
411
                result_chunks.append(merged_chunk)
4✔
412

413
                # Reset for next chunk
414
                merged_content.clear()
4✔
415
                relations_list.clear()
4✔
416
                start_line = None
4✔
417
                end_line = None
4✔
418
                token_count = 0
4✔
419
                line_count = 0
4✔
420
                function_count = 0
4✔
421

422
        # Flush remaining content
423
        if merged_content:
4✔
424
            start_span = cumulative_lengths[start_line - 1]
4✔
425
            end_span = cumulative_lengths[end_line]
4✔
426
            merged_chunk = Box(
4✔
427
                {
428
                    "content": "\n".join(merged_content),
429
                    "metadata": {
430
                        "chunk_num": len(result_chunks) + 1,
431
                        "tree": self._merge_tree(relations_list),
432
                        "start_line": start_line,
433
                        "end_line": end_line,
434
                        "span": (start_span, end_span),
435
                        "source": source,
436
                    },
437
                }
438
            )
439
            result_chunks.append(merged_chunk)
4✔
440

441
        return result_chunks
4✔
442

443
    def _validate_constraints(
4✔
444
        self,
445
        max_tokens: int | None,
446
        max_lines: int | None,
447
        max_functions: int | None,
448
        token_counter: Callable[[str], int] | None,
449
    ):
450
        """
451
        Validates that at least one chunking constraint is provided and sets default values.
452

453
        Args:
454
            max_tokens (int | None): Maximum number of tokens per chunk.
455
            max_lines (int | None): Maximum number of lines per chunk.
456
            max_functions (int | None): Maximum number of functions per chunk.
457
            token_counter (Callable[[str], int] | None): Function that counts tokens in text.
458

459
        Raises:
460
            InvalidInputError: If no chunking constraints are provided.
461
            MissingTokenCounterError: If `max_tokens` is provided but no `token_counter` is provided.
462
        """
463
        if not any((max_tokens, max_lines, max_functions)):
4✔
464
            raise InvalidInputError(
4✔
465
                "At least one of 'max_tokens', 'max_lines', or 'max_functions' must be provided."
466
            )
467

468
        # If token_counter is required but not provided
469
        if max_tokens is not None and not (token_counter or self.token_counter):
4✔
470
            raise MissingTokenCounterError()
4✔
471

472
    @validate_input
4✔
473
    def chunk(
4✔
474
        self,
475
        source: str | Path,
476
        *,
477
        max_tokens: Annotated[int | None, Field(ge=12)] = None,
478
        max_lines: Annotated[int | None, Field(ge=5)] = None,
479
        max_functions: Annotated[int | None, Field(ge=1)] = None,
480
        token_counter: Callable[[str], int] | None = None,
481
        include_comments: bool = True,
482
        docstring_mode: Literal["summary", "all", "excluded"] = "all",
483
        strict: bool = True,
484
    ) -> list[Box]:
485
        """
486
        Extract semantic code chunks from source using multi-dimensional analysis.
487

488
        Processes source code by identifying structural boundaries (functions, classes,
489
        namespaces) and grouping content based on multiple constraints including
490
        tokens, lines, and logical units while preserving semantic coherence.
491

492
        Args:
493
            source (str | Path): Raw code string or file path to process.
494
            max_tokens (int, optional): Maximum tokens per chunk. Must be >= 12.
495
            max_lines (int, optional): Maximum number of lines per chunk. Must be >= 5.
496
            max_functions (int, optional): Maximum number of functions per chunk. Must be >= 1.
497
            token_counter (Callable, optional): Token counting function. Uses instance
498
                counter if None. Required for token-based chunking.
499
            include_comments (bool): Include comments in output chunks. Default: True.
500
            docstring_mode(Literal["summary", "all", "excluded"]): Docstring processing strategy:
501
                - "summary": Include only first line of docstrings
502
                - "all": Include complete docstrings
503
                - "excluded": Remove all docstrings
504
                Defaults to "all"
505
            strict (bool): If True, raise error when structural blocks exceed
506
                max_tokens. If False, split oversized blocks. Default: True.
507

508
        Returns:
509
            list[Box]: List of code chunks with metadata. Each Box contains:
510
                - content (str): Code content
511
                - tree (str): Namespace hierarchy
512
                - start_line (int): Starting line in original source
513
                - end_line (int): Ending line in original source
514
                - span (tuple[int, int]): Character-level span (start and end offsets) in the original source.
515
                - source_path (str): Source file path or "N/A"
516

517
        Raises:
518
            InvalidInputError: Invalid configuration parameters.
519
            MissingTokenCounterError: No token counter available.
520
            FileProcessingError: Source file cannot be read.
521
            TokenLimitError: Structural block exceeds max_tokens in strict mode.
522
            CallbackError: If the token counter fails or returns an invalid type.
523
        """
524
        self._validate_constraints(max_tokens, max_lines, max_functions, token_counter)
4✔
525

526
        # Adjust limits for internal use
527
        if max_tokens is None:
4✔
528
            max_tokens = sys.maxsize
4✔
529
        if max_lines is None:
4✔
530
            max_lines = sys.maxsize
4✔
531
        if max_functions is None:
4✔
532
            max_functions = sys.maxsize
4✔
533

534
        token_counter = token_counter or self.token_counter
4✔
535

536
        if isinstance(source, str) and not source.strip():
4✔
537
            self.log_info("Input source is empty. Returning empty list.")
4✔
538
            return []
4✔
539

540
        self.log_info(
4✔
541
            "Starting chunk processing for {}",
542
            (
543
                f"source: {source}"
544
                if isinstance(source, Path)
545
                or (isinstance(source, str) and is_path_like(source))
546
                else f"code starting with:\n```\n{source[:100]}...\n```\n"
547
            ),
548
        )
549

550
        snippet_dicts, cumulative_lengths = self.extractor.extract_code_structure(
4✔
551
            source, include_comments, docstring_mode
552
        )
553

554
        result_chunks = self._group_by_chunk(
4✔
555
            snippet_dicts=snippet_dicts,
556
            cumulative_lengths=cumulative_lengths,
557
            token_counter=token_counter,
558
            max_tokens=max_tokens,
559
            max_lines=max_lines,
560
            max_functions=max_functions,
561
            strict=strict,
562
            source=source,
563
        )
564

565
        self.log_info(
4✔
566
            "Generated {} chunk(s) for the {}",
567
            len(result_chunks),
568
            (
569
                f"source: {source}"
570
                if isinstance(source, Path)
571
                or (isinstance(source, str) and is_path_like(source))
572
                else f"code starting with:\n```\n{source[:100]}...\n```\n"
573
            ),
574
        )
575

576
        return result_chunks
4✔
577

578
    @validate_input
4✔
579
    def batch_chunk(
4✔
580
        self,
581
        sources: restricted_iterable(str | Path),
582
        *,
583
        max_tokens: Annotated[int | None, Field(ge=12)] = None,
584
        max_lines: Annotated[int | None, Field(ge=5)] = None,
585
        max_functions: Annotated[int | None, Field(ge=1)] = None,
586
        token_counter: Callable[[str], int] | None = None,
587
        separator: Any = None,
588
        include_comments: bool = True,
589
        docstring_mode: Literal["summary", "all", "excluded"] = "all",
590
        strict: bool = True,
591
        n_jobs: Annotated[int, Field(ge=1)] | None = None,
592
        show_progress: bool = True,
593
        on_errors: Literal["raise", "skip", "break"] = "raise",
594
    ) -> Generator[Box, None, None]:
595
        """
596
        Process multiple source files or code strings in parallel.
597

598
        Leverages multiprocessing to efficiently chunk multiple code sources,
599
        applying consistent chunking rules across all inputs.
600

601
        Args:
602
            sources (restricted_iterable[str | Path]): A restricted iterable of file paths or raw code strings to process.
603
            max_tokens (int, optional): Maximum tokens per chunk. Must be >= 12.
604
            max_lines (int, optional): Maximum number of lines per chunk. Must be >= 5.
605
            max_functions (int, optional): Maximum number of functions per chunk. Must be >= 1.
606
            token_counter (Callable | None): Token counting function. Uses instance
607
                counter if None. Required for token-based chunking.
608
            separator (Any): A value to be yielded after the chunks of each text are processed.
609
                Note: None cannot be used as a separator.
610
            include_comments (bool): Include comments in output chunks. Default: True.
611
            docstring_mode(Literal["summary", "all", "excluded"]): Docstring processing strategy:
612
                - "summary": Include only first line of docstrings
613
                - "all": Include complete docstrings
614
                - "excluded": Remove all docstrings
615
                Defaults to "all"
616
            strict (bool): If True, raise error when structural blocks exceed
617
                max_tokens. If False, split oversized blocks. Default: True.
618
            n_jobs (int | None): Number of parallel workers. Uses all available CPUs if None.
619
            show_progress (bool): Display progress bar during processing. Defaults to True.
620
            on_errors (Literal["raise", "skip", "break"]):
621
                How to handle errors during processing. Defaults to 'raise'.
622

623
        yields:
624
            Box: `Box` object, representing a chunk with its content and metadata.
625
                Includes:
626
                - content (str): Code content
627
                - tree (str): Namespace hierarchy
628
                - start_line (int): Starting line in original source
629
                - end_line (int): Ending line in original source
630
                - span (tuple[int, int]): Character-level span (start and end offsets) in the original source.
631
                - source_path (str): Source file path or "N/A"
632

633
        Raises:
634
            InvalidInputError: Invalid input parameters.
635
            MissingTokenCounterError: No token counter available.
636
            FileProcessingError: Source file cannot be read.
637
            TokenLimitError: Structural block exceeds max_tokens in strict mode.
638
            CallbackError: If the token counter fails or returns an invalid type.
639
        """
640
        chunk_func = partial(
4✔
641
            self.chunk,
642
            max_tokens=max_tokens,
643
            max_lines=max_lines,
644
            max_functions=max_functions,
645
            token_counter=token_counter or self.token_counter,
646
            include_comments=include_comments,
647
            docstring_mode=docstring_mode,
648
            strict=strict,
649
        )
650

651
        yield from run_in_batch(
4✔
652
            func=chunk_func,
653
            iterable_of_args=sources,
654
            iterable_name="sources",
655
            separator=separator,
656
            n_jobs=n_jobs,
657
            show_progress=show_progress,
658
            on_errors=on_errors,
659
            verbose=self.verbose,
660
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc