• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 16754475749

05 Aug 2025 03:26PM UTC coverage: 91.946% (+0.05%) from 91.901%
16754475749

Pull #9678

github

web-flow
Merge 31abaf9ce into 323274e17
Pull Request #9678: Chore/pep585 type hints

12774 of 13893 relevant lines covered (91.95%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.0
haystack/components/generators/utils.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import json
1✔
6

7
from haystack import logging
1✔
8
from haystack.dataclasses import ChatMessage, StreamingChunk, ToolCall
1✔
9

10
logger = logging.getLogger(__name__)
1✔
11

12

13
def print_streaming_chunk(chunk: StreamingChunk) -> None:
1✔
14
    """
15
    Callback function to handle and display streaming output chunks.
16

17
    This function processes a `StreamingChunk` object by:
18
    - Printing tool call metadata (if any), including function names and arguments, as they arrive.
19
    - Printing tool call results when available.
20
    - Printing the main content (e.g., text tokens) of the chunk as it is received.
21

22
    The function outputs data directly to stdout and flushes output buffers to ensure immediate display during
23
    streaming.
24

25
    :param chunk: A chunk of streaming data containing content and optional metadata, such as tool calls and
26
        tool results.
27
    """
28
    if chunk.start and chunk.index and chunk.index > 0:
1✔
29
        # If this is the start of a new content block but not the first content block, print two new lines
30
        print("\n\n", flush=True, end="")
×
31

32
    ## Tool Call streaming
33
    if chunk.tool_calls:
1✔
34
        # Typically, if there are multiple tool calls in the chunk this means that the tool calls are fully formed and
35
        # not just a delta.
36
        for tool_call in chunk.tool_calls:
1✔
37
            # If chunk.start is True indicates beginning of a tool call
38
            # Also presence of tool_call.tool_name indicates the start of a tool call too
39
            if chunk.start:
1✔
40
                # If there is more than one tool call in the chunk, we print two new lines to separate them
41
                # We know there is more than one tool call if the index of the tool call is greater than the index of
42
                # the chunk.
43
                if chunk.index and tool_call.index > chunk.index:
1✔
44
                    print("\n\n", flush=True, end="")
×
45

46
                print(f"[TOOL CALL]\nTool: {tool_call.tool_name} \nArguments: ", flush=True, end="")
1✔
47

48
            # print the tool arguments
49
            if tool_call.arguments:
1✔
50
                print(tool_call.arguments, flush=True, end="")
1✔
51

52
    ## Tool Call Result streaming
53
    # Print tool call results if available (from ToolInvoker)
54
    if chunk.tool_call_result:
1✔
55
        # Tool Call Result is fully formed so delta accumulation is not needed
56
        print(f"[TOOL RESULT]\n{chunk.tool_call_result.result}", flush=True, end="")
1✔
57

58
    ## Normal content streaming
59
    # Print the main content of the chunk (from ChatGenerator)
60
    if chunk.content:
1✔
61
        if chunk.start:
1✔
62
            print("[ASSISTANT]\n", flush=True, end="")
1✔
63
        print(chunk.content, flush=True, end="")
1✔
64

65
    # End of LLM assistant message so we add two new lines
66
    # This ensures spacing between multiple LLM messages (e.g. Agent) or multiple Tool Call Results
67
    if chunk.finish_reason is not None:
1✔
68
        print("\n\n", flush=True, end="")
1✔
69

70

71
def _convert_streaming_chunks_to_chat_message(chunks: list[StreamingChunk]) -> ChatMessage:
1✔
72
    """
73
    Connects the streaming chunks into a single ChatMessage.
74

75
    :param chunks: The list of all `StreamingChunk` objects.
76

77
    :returns: The ChatMessage.
78
    """
79
    text = "".join([chunk.content for chunk in chunks])
1✔
80
    tool_calls = []
1✔
81

82
    # Process tool calls if present in any chunk
83
    tool_call_data: dict[int, dict[str, str]] = {}  # Track tool calls by index
1✔
84
    for chunk in chunks:
1✔
85
        if chunk.tool_calls:
1✔
86
            for tool_call in chunk.tool_calls:
1✔
87
                # We use the index of the tool_call to track the tool call across chunks since the ID is not always
88
                # provided
89
                if tool_call.index not in tool_call_data:
1✔
90
                    tool_call_data[tool_call.index] = {"id": "", "name": "", "arguments": ""}
1✔
91

92
                # Save the ID if present
93
                if tool_call.id is not None:
1✔
94
                    tool_call_data[tool_call.index]["id"] = tool_call.id
1✔
95

96
                if tool_call.tool_name is not None:
1✔
97
                    tool_call_data[tool_call.index]["name"] += tool_call.tool_name
1✔
98
                if tool_call.arguments is not None:
1✔
99
                    tool_call_data[tool_call.index]["arguments"] += tool_call.arguments
1✔
100

101
    # Convert accumulated tool call data into ToolCall objects
102
    sorted_keys = sorted(tool_call_data.keys())
1✔
103
    for key in sorted_keys:
1✔
104
        tool_call_dict = tool_call_data[key]
1✔
105
        try:
1✔
106
            arguments = json.loads(tool_call_dict.get("arguments", "{}")) if tool_call_dict.get("arguments") else {}
1✔
107
            tool_calls.append(ToolCall(id=tool_call_dict["id"], tool_name=tool_call_dict["name"], arguments=arguments))
1✔
108
        except json.JSONDecodeError:
×
109
            logger.warning(
×
110
                "The LLM provider returned a malformed JSON string for tool call arguments. This tool call "
111
                "will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
112
                "Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
113
                _id=tool_call_dict["id"],
114
                _name=tool_call_dict["name"],
115
                _arguments=tool_call_dict["arguments"],
116
            )
117

118
    # finish_reason can appear in different places so we look for the last one
119
    finish_reasons = [chunk.finish_reason for chunk in chunks if chunk.finish_reason]
1✔
120
    finish_reason = finish_reasons[-1] if finish_reasons else None
1✔
121

122
    meta = {
1✔
123
        "model": chunks[-1].meta.get("model"),
124
        "index": 0,
125
        "finish_reason": finish_reason,
126
        "completion_start_time": chunks[0].meta.get("received_at"),  # first chunk received
127
        "usage": chunks[-1].meta.get("usage"),  # last chunk has the final usage data if available
128
    }
129

130
    return ChatMessage.from_assistant(text=text or None, tool_calls=tool_calls, meta=meta)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc