12744218044

Committed 13 Jan 2025 09:26AM UTC coverage: 91.352% (+0.3%) from 91.099%

Build # 12744218044

Build Type

Pull #8693

github

Committed by

web-flow

Commit Message

Merge 4a3ad897d into db76ae284

Pull Request Pull Request #8693: feat: Add `ComponentTool` to Haystack tools

Run Details

8968 of 9817 relevant lines covered (91.35%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.27

haystack/components/generators/chat/openai.py

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import json
import os
from typing import Any, Callable, Dict, List, Optional, Union

from openai import OpenAI, Stream
from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessage
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice

from haystack import component, default_from_dict, default_to_dict, logging
from haystack.dataclasses import ChatMessage, StreamingChunk, ToolCall
from haystack.tools.tool import Tool, _check_duplicate_tool_names, deserialize_tools_inplace
from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable

logger = logging.getLogger(__name__)


StreamingCallbackT = Callable[[StreamingChunk], None]


@component
class OpenAIChatGenerator:
    """
    Completes chats using OpenAI's large language models (LLMs).

    It works with the gpt-4 and gpt-3.5-turbo models and supports streaming responses
    from OpenAI API. It uses [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
    format in input and output.

    You can customize how the text is generated by passing parameters to the
    OpenAI API. Use the `**generation_kwargs` argument when you initialize
    the component or when you run it. Any parameter that works with
    `openai.ChatCompletion.create` will work here too.

    For details on OpenAI API parameters, see
    [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).

    ### Usage example

    ```python
    from haystack.components.generators.chat import OpenAIChatGenerator
    from haystack.dataclasses import ChatMessage

    messages = [ChatMessage.from_user("What's Natural Language Processing?")]

    client = OpenAIChatGenerator()
    response = client.run(messages)
    print(response)
    ```
    Output:
    ```
    {'replies':
        [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
            that focuses on enabling computers to understand, interpret, and generate human language in
            a way that is meaningful and useful.',
         role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
         meta={'model': 'gpt-4o-mini', 'index': 0, 'finish_reason': 'stop',
         'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})
        ]
    }
    ```
    """

    def __init__(  # pylint: disable=too-many-positional-arguments
        self,
        api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
        model: str = "gpt-4o-mini",
        streaming_callback: Optional[StreamingCallbackT] = None,
        api_base_url: Optional[str] = None,
        organization: Optional[str] = None,
        generation_kwargs: Optional[Dict[str, Any]] = None,
        timeout: Optional[float] = None,
        max_retries: Optional[int] = None,
        tools: Optional[List[Tool]] = None,
        tools_strict: bool = False,
    ):
        """
        Creates an instance of OpenAIChatGenerator. Unless specified otherwise in `model`, uses OpenAI's gpt-4o-mini

        Before initializing the component, you can set the 'OPENAI_TIMEOUT' and 'OPENAI_MAX_RETRIES'
        environment variables to override the `timeout` and `max_retries` parameters respectively
        in the OpenAI client.

        :param api_key: The OpenAI API key.
            You can set it with an environment variable `OPENAI_API_KEY`, or pass with this parameter
            during initialization.
        :param model: The name of the model to use.
        :param streaming_callback: A callback function that is called when a new token is received from the stream.
            The callback function accepts [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk)
            as an argument.
        :param api_base_url: An optional base URL.
        :param organization: Your organization ID, defaults to `None`. See
        [production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
        :param generation_kwargs: Other parameters to use for the model. These parameters are sent directly to
            the OpenAI endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat) for
            more details.
            Some of the supported parameters:
            - `max_tokens`: The maximum number of tokens the output text can have.
            - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
                Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
            - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
                considers the results of the tokens with top_p probability mass. For example, 0.1 means only the tokens
                comprising the top 10% probability mass are considered.
            - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2,
                it will generate two completions for each of the three prompts, ending up with 6 completions in total.
            - `stop`: One or more sequences after which the LLM should stop generating tokens.
            - `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean
                the model will be less likely to repeat the same token in the text.
            - `frequency_penalty`: What penalty to apply if a token has already been generated in the text.
                Bigger values mean the model will be less likely to repeat the same token in the text.
            - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens, and the
                values are the bias to add to that token.
        :param timeout:
            Timeout for OpenAI client calls. If not set, it defaults to either the
            `OPENAI_TIMEOUT` environment variable, or 30 seconds.
        :param max_retries:
            Maximum number of retries to contact OpenAI after an internal error.
            If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
        :param tools:
            A list of tools for which the model can prepare calls.
        :param tools_strict:
            Whether to enable strict schema adherence for tool calls. If set to `True`, the model will follow exactly
            the schema provided in the `parameters` field of the tool definition, but this may increase latency.
        """
        self.api_key = api_key
        self.model = model
        self.generation_kwargs = generation_kwargs or {}
        self.streaming_callback = streaming_callback
        self.api_base_url = api_base_url
        self.organization = organization
        self.timeout = timeout
        self.max_retries = max_retries
        self.tools = tools
        self.tools_strict = tools_strict

        _check_duplicate_tool_names(tools)

        if timeout is None:
            timeout = float(os.environ.get("OPENAI_TIMEOUT", 30.0))
        if max_retries is None:
            max_retries = int(os.environ.get("OPENAI_MAX_RETRIES", 5))

        self.client = OpenAI(
            api_key=api_key.resolve_value(),
            organization=organization,
            base_url=api_base_url,
            timeout=timeout,
            max_retries=max_retries,
        )

    def _get_telemetry_data(self) -> Dict[str, Any]:
        """
        Data that is sent to Posthog for usage analytics.
        """
        return {"model": self.model}

    def to_dict(self) -> Dict[str, Any]:
        """
        Serialize this component to a dictionary.

        :returns:
            The serialized component as a dictionary.
        """
        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
        return default_to_dict(
            self,
            model=self.model,
            streaming_callback=callback_name,
            api_base_url=self.api_base_url,
            organization=self.organization,
            generation_kwargs=self.generation_kwargs,
            api_key=self.api_key.to_dict(),
            timeout=self.timeout,
            max_retries=self.max_retries,
            tools=[tool.to_dict() for tool in self.tools] if self.tools else None,
            tools_strict=self.tools_strict,
        )

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "OpenAIChatGenerator":
        """
        Deserialize this component from a dictionary.

        :param data: The dictionary representation of this component.
        :returns:
            The deserialized component instance.
        """
        deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
        deserialize_tools_inplace(data["init_parameters"], key="tools")
        init_params = data.get("init_parameters", {})
        serialized_callback_handler = init_params.get("streaming_callback")
        if serialized_callback_handler:
            data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
        return default_from_dict(cls, data)

    @component.output_types(replies=List[ChatMessage])
    def run(
        self,
        messages: List[ChatMessage],
        streaming_callback: Optional[StreamingCallbackT] = None,
        generation_kwargs: Optional[Dict[str, Any]] = None,
        *,
        tools: Optional[List[Tool]] = None,
        tools_strict: Optional[bool] = None,
    ):
        """
        Invokes chat completion based on the provided messages and generation parameters.

        :param messages:
            A list of ChatMessage instances representing the input messages.
        :param streaming_callback:
            A callback function that is called when a new token is received from the stream.
        :param generation_kwargs:
            Additional keyword arguments for text generation. These parameters will
            override the parameters passed during component initialization.
            For details on OpenAI API parameters, see [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat/create).
        :param tools:
            A list of tools for which the model can prepare calls. If set, it will override the `tools` parameter set
            during component initialization.
        :param tools_strict:
            Whether to enable strict schema adherence for tool calls. If set to `True`, the model will follow exactly
            the schema provided in the `parameters` field of the tool definition, but this may increase latency.
            If set, it will override the `tools_strict` parameter set during component initialization.

        :returns:
            A dictionary with the following key:
            - `replies`: A list containing the generated responses as ChatMessage instances.
        """
        if len(messages) == 0:
            return {"replies": []}

        streaming_callback = streaming_callback or self.streaming_callback

        api_args = self._prepare_api_call(
            messages=messages,
            streaming_callback=streaming_callback,
            generation_kwargs=generation_kwargs,
            tools=tools,
            tools_strict=tools_strict,
        )
        chat_completion: Union[Stream[ChatCompletionChunk], ChatCompletion] = self.client.chat.completions.create(
            **api_args
        )

        is_streaming = isinstance(chat_completion, Stream)
        assert is_streaming or streaming_callback is None

        if is_streaming:
            completions = self._handle_stream_response(
                chat_completion,  # type: ignore
                streaming_callback,  # type: ignore
            )
        else:
            assert isinstance(chat_completion, ChatCompletion), "Unexpected response type for non-streaming request."
            completions = [
                self._convert_chat_completion_to_chat_message(chat_completion, choice)
                for choice in chat_completion.choices
            ]

        # before returning, do post-processing of the completions
        for message in completions:
            self._check_finish_reason(message.meta)

        return {"replies": completions}

    def _prepare_api_call(  # noqa: PLR0913
        self,
        *,
        messages: List[ChatMessage],
        streaming_callback: Optional[StreamingCallbackT] = None,
        generation_kwargs: Optional[Dict[str, Any]] = None,
        tools: Optional[List[Tool]] = None,
        tools_strict: Optional[bool] = None,
    ) -> Dict[str, Any]:
        # update generation kwargs by merging with the generation kwargs passed to the run method
        generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}

        # adapt ChatMessage(s) to the format expected by the OpenAI API
        openai_formatted_messages = [message.to_openai_dict_format() for message in messages]

        tools = tools or self.tools
        tools_strict = tools_strict if tools_strict is not None else self.tools_strict
        _check_duplicate_tool_names(tools)

        openai_tools = {}
        if tools:
            tool_definitions = [
                {"type": "function", "function": {**t.tool_spec, **({"strict": tools_strict} if tools_strict else {})}}
                for t in tools
            ]
            openai_tools = {"tools": tool_definitions}

        is_streaming = streaming_callback is not None
        num_responses = generation_kwargs.pop("n", 1)
        if is_streaming and num_responses > 1:
            raise ValueError("Cannot stream multiple responses, please set n=1.")

        return {
            "model": self.model,
            "messages": openai_formatted_messages,  # type: ignore[arg-type] # openai expects list of specific message types
            "stream": streaming_callback is not None,
            "n": num_responses,
            **openai_tools,
            **generation_kwargs,
        }

    def _handle_stream_response(self, chat_completion: Stream, callback: StreamingCallbackT) -> List[ChatMessage]:
        chunks: List[StreamingChunk] = []
        chunk = None

        for chunk in chat_completion:  # pylint: disable=not-an-iterable
            assert len(chunk.choices) == 1, "Streaming responses should have only one choice."
            chunk_delta: StreamingChunk = self._convert_chat_completion_chunk_to_streaming_chunk(chunk)
            chunks.append(chunk_delta)

            callback(chunk_delta)

        return [self._convert_streaming_chunks_to_chat_message(chunk, chunks)]

    def _check_finish_reason(self, meta: Dict[str, Any]) -> None:
        if meta["finish_reason"] == "length":
            logger.warning(
                "The completion for index {index} has been truncated before reaching a natural stopping point. "
                "Increase the max_tokens parameter to allow for longer completions.",
                index=meta["index"],
                finish_reason=meta["finish_reason"],
            )
        if meta["finish_reason"] == "content_filter":
            logger.warning(
                "The completion for index {index} has been truncated due to the content filter.",
                index=meta["index"],
                finish_reason=meta["finish_reason"],
            )

    def _convert_streaming_chunks_to_chat_message(self, chunk: Any, chunks: List[StreamingChunk]) -> ChatMessage:
        """
        Connects the streaming chunks into a single ChatMessage.

        :param chunk: The last chunk returned by the OpenAI API.
        :param chunks: The list of all `StreamingChunk` objects.
        """

        text = "".join([chunk.content for chunk in chunks])
        tool_calls = []

        # if it's a tool call , we need to build the payload dict from all the chunks
        if bool(chunks[0].meta.get("tool_calls")):
            tools_len = len(chunks[0].meta.get("tool_calls", []))

            payloads = [{"arguments": "", "name": ""} for _ in range(tools_len)]
            for chunk_payload in chunks:
                deltas = chunk_payload.meta.get("tool_calls") or []

                # deltas is a list of ChoiceDeltaToolCall or ChoiceDeltaFunctionCall
                for i, delta in enumerate(deltas):
                    payloads[i]["id"] = delta.id or payloads[i].get("id", "")
                    if delta.function:
                        payloads[i]["name"] += delta.function.name or ""
                        payloads[i]["arguments"] += delta.function.arguments or ""

            for payload in payloads:
                arguments_str = payload["arguments"]
                try:
                    arguments = json.loads(arguments_str)
                    tool_calls.append(ToolCall(id=payload["id"], tool_name=payload["name"], arguments=arguments))
                except json.JSONDecodeError:
                    logger.warning(
                        "OpenAI returned a malformed JSON string for tool call arguments. This tool call "
                        "will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
                        "Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
                        _id=payload["id"],
                        _name=payload["name"],
                        _arguments=arguments_str,
                    )

        meta = {
            "model": chunk.model,
            "index": 0,
            "finish_reason": chunk.choices[0].finish_reason,
            "usage": {},  # we don't have usage data for streaming responses
        }

        return ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta)

    def _convert_chat_completion_to_chat_message(self, completion: ChatCompletion, choice: Choice) -> ChatMessage:
        """
        Converts the non-streaming response from the OpenAI API to a ChatMessage.

        :param completion: The completion returned by the OpenAI API.
        :param choice: The choice returned by the OpenAI API.
        :return: The ChatMessage.
        """
        message: ChatCompletionMessage = choice.message
        text = message.content
        tool_calls = []
        if openai_tool_calls := message.tool_calls:
            for openai_tc in openai_tool_calls:
                arguments_str = openai_tc.function.arguments
                try:
                    arguments = json.loads(arguments_str)
                    tool_calls.append(ToolCall(id=openai_tc.id, tool_name=openai_tc.function.name, arguments=arguments))
                except json.JSONDecodeError:
                    logger.warning(
                        "OpenAI returned a malformed JSON string for tool call arguments. This tool call "
                        "will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
                        "Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
                        _id=openai_tc.id,
                        _name=openai_tc.function.name,
                        _arguments=arguments_str,
                    )

        chat_message = ChatMessage.from_assistant(text=text, tool_calls=tool_calls)
        chat_message._meta.update(
            {
                "model": completion.model,
                "index": choice.index,
                "finish_reason": choice.finish_reason,
                "usage": dict(completion.usage or {}),
            }
        )
        return chat_message

    def _convert_chat_completion_chunk_to_streaming_chunk(self, chunk: ChatCompletionChunk) -> StreamingChunk:
        """
        Converts the streaming response chunk from the OpenAI API to a StreamingChunk.

        :param chunk: The chunk returned by the OpenAI API.
        :param choice: The choice returned by the OpenAI API.
        :return: The StreamingChunk.
        """
        # we stream the content of the chunk if it's not a tool or function call
        choice: ChunkChoice = chunk.choices[0]
        content = choice.delta.content or ""
        chunk_message = StreamingChunk(content)
        # but save the tool calls and function call in the meta if they are present
        # and then connect the chunks in the _convert_streaming_chunks_to_chat_message method
        chunk_message.meta.update(
            {
                "model": chunk.model,
                "index": choice.index,
                "tool_calls": choice.delta.tool_calls,
                "finish_reason": choice.finish_reason,
            }
        )
        return chunk_message

1	# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2	#
3	# SPDX-License-Identifier: Apache-2.0
4
5	import json	1✔
6	import os	1✔
7	from typing import Any, Callable, Dict, List, Optional, Union	1✔
8
9	from openai import OpenAI, Stream	1✔
10	from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessage	1✔
11	from openai.types.chat.chat_completion import Choice	1✔
12	from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice	1✔
13
14	from haystack import component, default_from_dict, default_to_dict, logging	1✔
15	from haystack.dataclasses import ChatMessage, StreamingChunk, ToolCall	1✔
16	from haystack.tools.tool import Tool, _check_duplicate_tool_names, deserialize_tools_inplace	1✔
17	from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable	1✔
18
19	logger = logging.getLogger(__name__)	1✔
20
21
22	StreamingCallbackT = Callable[[StreamingChunk], None]	1✔
23
24
25	@component	1✔
26	class OpenAIChatGenerator:	1✔
27	"""
28	Completes chats using OpenAI's large language models (LLMs).
29
30	It works with the gpt-4 and gpt-3.5-turbo models and supports streaming responses
31	from OpenAI API. It uses [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
32	format in input and output.
33
34	You can customize how the text is generated by passing parameters to the
35	OpenAI API. Use the `**generation_kwargs` argument when you initialize
36	the component or when you run it. Any parameter that works with
37	`openai.ChatCompletion.create` will work here too.
38
39	For details on OpenAI API parameters, see
40	[OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).
41
42	### Usage example
43
44	```python
45	from haystack.components.generators.chat import OpenAIChatGenerator
46	from haystack.dataclasses import ChatMessage
47
48	messages = [ChatMessage.from_user("What's Natural Language Processing?")]
49
50	client = OpenAIChatGenerator()
51	response = client.run(messages)
52	print(response)
53	```
54	Output:
55	```
56	{'replies':
57	[ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
58	that focuses on enabling computers to understand, interpret, and generate human language in
59	a way that is meaningful and useful.',
60	role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
61	meta={'model': 'gpt-4o-mini', 'index': 0, 'finish_reason': 'stop',
62	'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})
63	]
64	}
65	```
66	"""
67
68	def __init__( # pylint: disable=too-many-positional-arguments	1✔
69	self,
70	api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
71	model: str = "gpt-4o-mini",
72	streaming_callback: Optional[StreamingCallbackT] = None,
73	api_base_url: Optional[str] = None,
74	organization: Optional[str] = None,
75	generation_kwargs: Optional[Dict[str, Any]] = None,
76	timeout: Optional[float] = None,
77	max_retries: Optional[int] = None,
78	tools: Optional[List[Tool]] = None,
79	tools_strict: bool = False,
80	):
81	"""
82	Creates an instance of OpenAIChatGenerator. Unless specified otherwise in `model`, uses OpenAI's gpt-4o-mini
83
84	Before initializing the component, you can set the 'OPENAI_TIMEOUT' and 'OPENAI_MAX_RETRIES'
85	environment variables to override the `timeout` and `max_retries` parameters respectively
86	in the OpenAI client.
87
88	:param api_key: The OpenAI API key.
89	You can set it with an environment variable `OPENAI_API_KEY`, or pass with this parameter
90	during initialization.
91	:param model: The name of the model to use.
92	:param streaming_callback: A callback function that is called when a new token is received from the stream.
93	The callback function accepts [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk)
94	as an argument.
95	:param api_base_url: An optional base URL.
96	:param organization: Your organization ID, defaults to `None`. See
97	[production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
98	:param generation_kwargs: Other parameters to use for the model. These parameters are sent directly to
99	the OpenAI endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat) for
100	more details.
101	Some of the supported parameters:
102	- `max_tokens`: The maximum number of tokens the output text can have.
103	- `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
104	Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
105	- `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
106	considers the results of the tokens with top_p probability mass. For example, 0.1 means only the tokens
107	comprising the top 10% probability mass are considered.
108	- `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2,
109	it will generate two completions for each of the three prompts, ending up with 6 completions in total.
110	- `stop`: One or more sequences after which the LLM should stop generating tokens.
111	- `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean
112	the model will be less likely to repeat the same token in the text.
113	- `frequency_penalty`: What penalty to apply if a token has already been generated in the text.
114	Bigger values mean the model will be less likely to repeat the same token in the text.
115	- `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens, and the
116	values are the bias to add to that token.
117	:param timeout:
118	Timeout for OpenAI client calls. If not set, it defaults to either the
119	`OPENAI_TIMEOUT` environment variable, or 30 seconds.
120	:param max_retries:
121	Maximum number of retries to contact OpenAI after an internal error.
122	If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
123	:param tools:
124	A list of tools for which the model can prepare calls.
125	:param tools_strict:
126	Whether to enable strict schema adherence for tool calls. If set to `True`, the model will follow exactly
127	the schema provided in the `parameters` field of the tool definition, but this may increase latency.
128	"""
129	self.api_key = api_key	1✔
130	self.model = model	1✔
131	self.generation_kwargs = generation_kwargs or {}	1✔
132	self.streaming_callback = streaming_callback	1✔
133	self.api_base_url = api_base_url	1✔
134	self.organization = organization	1✔
135	self.timeout = timeout	1✔
136	self.max_retries = max_retries	1✔
137	self.tools = tools	1✔
138	self.tools_strict = tools_strict	1✔
139
140	_check_duplicate_tool_names(tools)	1✔
141
142	if timeout is None:	1✔
143	timeout = float(os.environ.get("OPENAI_TIMEOUT", 30.0))	1✔
144	if max_retries is None:	1✔
145	max_retries = int(os.environ.get("OPENAI_MAX_RETRIES", 5))	1✔
146
147	self.client = OpenAI(	1✔
148	api_key=api_key.resolve_value(),
149	organization=organization,
150	base_url=api_base_url,
151	timeout=timeout,
152	max_retries=max_retries,
153	)
154
155	def _get_telemetry_data(self) -> Dict[str, Any]:	1✔
156	"""
157	Data that is sent to Posthog for usage analytics.
158	"""
159	return {"model": self.model}	×
160
161	def to_dict(self) -> Dict[str, Any]:	1✔
162	"""
163	Serialize this component to a dictionary.
164
165	:returns:
166	The serialized component as a dictionary.
167	"""
168	callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None	1✔
169	return default_to_dict(	1✔
170	self,
171	model=self.model,
172	streaming_callback=callback_name,
173	api_base_url=self.api_base_url,
174	organization=self.organization,
175	generation_kwargs=self.generation_kwargs,
176	api_key=self.api_key.to_dict(),
177	timeout=self.timeout,
178	max_retries=self.max_retries,
179	tools=[tool.to_dict() for tool in self.tools] if self.tools else None,
180	tools_strict=self.tools_strict,
181	)
182
183	@classmethod	1✔
184	def from_dict(cls, data: Dict[str, Any]) -> "OpenAIChatGenerator":	1✔
185	"""
186	Deserialize this component from a dictionary.
187
188	:param data: The dictionary representation of this component.
189	:returns:
190	The deserialized component instance.
191	"""
192	deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])	1✔
193	deserialize_tools_inplace(data["init_parameters"], key="tools")	1✔
194	init_params = data.get("init_parameters", {})	1✔
195	serialized_callback_handler = init_params.get("streaming_callback")	1✔
196	if serialized_callback_handler:	1✔
197	data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)	1✔
198	return default_from_dict(cls, data)	1✔
199
200	@component.output_types(replies=List[ChatMessage])	1✔
201	def run(	1✔
202	self,
203	messages: List[ChatMessage],
204	streaming_callback: Optional[StreamingCallbackT] = None,
205	generation_kwargs: Optional[Dict[str, Any]] = None,
206	*,
207	tools: Optional[List[Tool]] = None,
208	tools_strict: Optional[bool] = None,
209	):
210	"""
211	Invokes chat completion based on the provided messages and generation parameters.
212
213	:param messages:
214	A list of ChatMessage instances representing the input messages.
215	:param streaming_callback:
216	A callback function that is called when a new token is received from the stream.
217	:param generation_kwargs:
218	Additional keyword arguments for text generation. These parameters will
219	override the parameters passed during component initialization.
220	For details on OpenAI API parameters, see [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat/create).
221	:param tools:
222	A list of tools for which the model can prepare calls. If set, it will override the `tools` parameter set
223	during component initialization.
224	:param tools_strict:
225	Whether to enable strict schema adherence for tool calls. If set to `True`, the model will follow exactly
226	the schema provided in the `parameters` field of the tool definition, but this may increase latency.
227	If set, it will override the `tools_strict` parameter set during component initialization.
228
229	:returns:
230	A dictionary with the following key:
231	- `replies`: A list containing the generated responses as ChatMessage instances.
232	"""
233	if len(messages) == 0:	1✔
234	return {"replies": []}	×
235
236	streaming_callback = streaming_callback or self.streaming_callback	1✔
237
238	api_args = self._prepare_api_call(	1✔
239	messages=messages,
240	streaming_callback=streaming_callback,
241	generation_kwargs=generation_kwargs,
242	tools=tools,
243	tools_strict=tools_strict,
244	)
245	chat_completion: Union[Stream[ChatCompletionChunk], ChatCompletion] = self.client.chat.completions.create(	1✔
246	**api_args
247	)
248
249	is_streaming = isinstance(chat_completion, Stream)	1✔
250	assert is_streaming or streaming_callback is None	1✔
251
252	if is_streaming:	1✔
253	completions = self._handle_stream_response(	1✔
254	chat_completion, # type: ignore
255	streaming_callback, # type: ignore
256	)
257	else:
258	assert isinstance(chat_completion, ChatCompletion), "Unexpected response type for non-streaming request."	1✔
259	completions = [	1✔
260	self._convert_chat_completion_to_chat_message(chat_completion, choice)
261	for choice in chat_completion.choices
262	]
263
264	# before returning, do post-processing of the completions
265	for message in completions:	1✔
266	self._check_finish_reason(message.meta)	1✔
267
268	return {"replies": completions}	1✔
269
270	def _prepare_api_call( # noqa: PLR0913	1✔
271	self,
272	*,
273	messages: List[ChatMessage],
274	streaming_callback: Optional[StreamingCallbackT] = None,
275	generation_kwargs: Optional[Dict[str, Any]] = None,
276	tools: Optional[List[Tool]] = None,
277	tools_strict: Optional[bool] = None,
278	) -> Dict[str, Any]:
279	# update generation kwargs by merging with the generation kwargs passed to the run method
280	generation_kwargs = {self.generation_kwargs, (generation_kwargs or {})}	1✔
281
282	# adapt ChatMessage(s) to the format expected by the OpenAI API
283	openai_formatted_messages = [message.to_openai_dict_format() for message in messages]	1✔
284
285	tools = tools or self.tools	1✔
286	tools_strict = tools_strict if tools_strict is not None else self.tools_strict	1✔
287	_check_duplicate_tool_names(tools)	1✔
288
289	openai_tools = {}	1✔
290	if tools:	1✔
291	tool_definitions = [	1✔
292	{"type": "function", "function": {t.tool_spec, ({"strict": tools_strict} if tools_strict else {})}}
293	for t in tools
294	]
295	openai_tools = {"tools": tool_definitions}	1✔
296
297	is_streaming = streaming_callback is not None	1✔
298	num_responses = generation_kwargs.pop("n", 1)	1✔
299	if is_streaming and num_responses > 1:	1✔
300	raise ValueError("Cannot stream multiple responses, please set n=1.")	×
301
302	return {	1✔
303	"model": self.model,
304	"messages": openai_formatted_messages, # type: ignore[arg-type] # openai expects list of specific message types
305	"stream": streaming_callback is not None,
306	"n": num_responses,
307	**openai_tools,
308	**generation_kwargs,
309	}
310
311	def _handle_stream_response(self, chat_completion: Stream, callback: StreamingCallbackT) -> List[ChatMessage]:	1✔
312	chunks: List[StreamingChunk] = []	1✔
313	chunk = None	1✔
314
315	for chunk in chat_completion: # pylint: disable=not-an-iterable	1✔
316	assert len(chunk.choices) == 1, "Streaming responses should have only one choice."	1✔
317	chunk_delta: StreamingChunk = self._convert_chat_completion_chunk_to_streaming_chunk(chunk)	1✔
318	chunks.append(chunk_delta)	1✔
319
320	callback(chunk_delta)	1✔
321
322	return [self._convert_streaming_chunks_to_chat_message(chunk, chunks)]	1✔
323
324	def _check_finish_reason(self, meta: Dict[str, Any]) -> None:	1✔
325	if meta["finish_reason"] == "length":	1✔
326	logger.warning(	1✔
327	"The completion for index {index} has been truncated before reaching a natural stopping point. "
328	"Increase the max_tokens parameter to allow for longer completions.",
329	index=meta["index"],
330	finish_reason=meta["finish_reason"],
331	)
332	if meta["finish_reason"] == "content_filter":	1✔
333	logger.warning(	1✔
334	"The completion for index {index} has been truncated due to the content filter.",
335	index=meta["index"],
336	finish_reason=meta["finish_reason"],
337	)
338
339	def _convert_streaming_chunks_to_chat_message(self, chunk: Any, chunks: List[StreamingChunk]) -> ChatMessage:	1✔
340	"""
341	Connects the streaming chunks into a single ChatMessage.
342
343	:param chunk: The last chunk returned by the OpenAI API.
344	:param chunks: The list of all `StreamingChunk` objects.
345	"""
346
347	text = "".join([chunk.content for chunk in chunks])	1✔
348	tool_calls = []	1✔
349
350	# if it's a tool call , we need to build the payload dict from all the chunks
351	if bool(chunks[0].meta.get("tool_calls")):	1✔
352	tools_len = len(chunks[0].meta.get("tool_calls", []))	1✔
353
354	payloads = [{"arguments": "", "name": ""} for _ in range(tools_len)]	1✔
355	for chunk_payload in chunks:	1✔
356	deltas = chunk_payload.meta.get("tool_calls") or []	1✔
357
358	# deltas is a list of ChoiceDeltaToolCall or ChoiceDeltaFunctionCall
359	for i, delta in enumerate(deltas):	1✔
360	payloads[i]["id"] = delta.id or payloads[i].get("id", "")	1✔
361	if delta.function:	1✔
362	payloads[i]["name"] += delta.function.name or ""	1✔
363	payloads[i]["arguments"] += delta.function.arguments or ""	1✔
364
365	for payload in payloads:	1✔
366	arguments_str = payload["arguments"]	1✔
367	try:	1✔
368	arguments = json.loads(arguments_str)	1✔
369	tool_calls.append(ToolCall(id=payload["id"], tool_name=payload["name"], arguments=arguments))	1✔
370	except json.JSONDecodeError:	×
371	logger.warning(	×
372	"OpenAI returned a malformed JSON string for tool call arguments. This tool call "
373	"will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
374	"Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
375	_id=payload["id"],
376	_name=payload["name"],
377	_arguments=arguments_str,
378	)
379
380	meta = {	1✔
381	"model": chunk.model,
382	"index": 0,
383	"finish_reason": chunk.choices[0].finish_reason,
384	"usage": {}, # we don't have usage data for streaming responses
385	}
386
387	return ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta)	1✔
388
389	def _convert_chat_completion_to_chat_message(self, completion: ChatCompletion, choice: Choice) -> ChatMessage:	1✔
390	"""
391	Converts the non-streaming response from the OpenAI API to a ChatMessage.
392
393	:param completion: The completion returned by the OpenAI API.
394	:param choice: The choice returned by the OpenAI API.
395	:return: The ChatMessage.
396	"""
397	message: ChatCompletionMessage = choice.message	1✔
398	text = message.content	1✔
399	tool_calls = []	1✔
400	if openai_tool_calls := message.tool_calls:	1✔
401	for openai_tc in openai_tool_calls:	1✔
402	arguments_str = openai_tc.function.arguments	1✔
403	try:	1✔
404	arguments = json.loads(arguments_str)	1✔
405	tool_calls.append(ToolCall(id=openai_tc.id, tool_name=openai_tc.function.name, arguments=arguments))	1✔
406	except json.JSONDecodeError:	1✔
407	logger.warning(	1✔
408	"OpenAI returned a malformed JSON string for tool call arguments. This tool call "
409	"will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
410	"Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
411	_id=openai_tc.id,
412	_name=openai_tc.function.name,
413	_arguments=arguments_str,
414	)
415
416	chat_message = ChatMessage.from_assistant(text=text, tool_calls=tool_calls)	1✔
417	chat_message._meta.update(	1✔
418	{
419	"model": completion.model,
420	"index": choice.index,
421	"finish_reason": choice.finish_reason,
422	"usage": dict(completion.usage or {}),
423	}
424	)
425	return chat_message	1✔
426
427	def _convert_chat_completion_chunk_to_streaming_chunk(self, chunk: ChatCompletionChunk) -> StreamingChunk:	1✔
428	"""
429	Converts the streaming response chunk from the OpenAI API to a StreamingChunk.
430
431	:param chunk: The chunk returned by the OpenAI API.
432	:param choice: The choice returned by the OpenAI API.
433	:return: The StreamingChunk.
434	"""
435	# we stream the content of the chunk if it's not a tool or function call
436	choice: ChunkChoice = chunk.choices[0]	1✔
437	content = choice.delta.content or ""	1✔
438	chunk_message = StreamingChunk(content)	1✔
439	# but save the tool calls and function call in the meta if they are present
440	# and then connect the chunks in the _convert_streaming_chunks_to_chat_message method
441	chunk_message.meta.update(	1✔
442	{
443	"model": chunk.model,
444	"index": choice.index,
445	"tool_calls": choice.delta.tool_calls,
446	"finish_reason": choice.finish_reason,
447	}
448	)
449	return chunk_message	1✔

deepset-ai / haystack / 12744218044

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous