13972131258

Committed 20 Mar 2025 02:43PM UTC coverage: 90.021% (-0.03%) from 90.054%

Build # 13972131258

Build Type

Pull #9069

github

Committed by

web-flow

Commit Message

Merge 8371761b0 into 67ab3788e

Pull Request Pull Request #9069: refactor!: `ChatMessage` serialization-deserialization updates

Run Details

9833 of 10923 relevant lines covered (90.02%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.45

haystack/components/generators/chat/azure.py

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import os
from typing import Any, Callable, Dict, List, Optional, Union

from openai.lib.azure import AsyncAzureADTokenProvider, AsyncAzureOpenAI, AzureADTokenProvider, AzureOpenAI

from haystack import component, default_from_dict, default_to_dict
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.dataclasses import StreamingChunk
from haystack.tools.tool import Tool, _check_duplicate_tool_names, deserialize_tools_inplace
from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable


@component
class AzureOpenAIChatGenerator(OpenAIChatGenerator):
    """
    Generates text using OpenAI's models on Azure.

    It works with the gpt-4 - type models and supports streaming responses
    from OpenAI API. It uses [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
    format in input and output.

    You can customize how the text is generated by passing parameters to the
    OpenAI API. Use the `**generation_kwargs` argument when you initialize
    the component or when you run it. Any parameter that works with
    `openai.ChatCompletion.create` will work here too.

    For details on OpenAI API parameters, see
    [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).

    ### Usage example

    ```python
    from haystack.components.generators.chat import AzureOpenAIChatGenerator
    from haystack.dataclasses import ChatMessage
    from haystack.utils import Secret

    messages = [ChatMessage.from_user("What's Natural Language Processing?")]

    client = AzureOpenAIChatGenerator(
        azure_endpoint="<Your Azure endpoint e.g. `https://your-company.azure.openai.com/>",
        api_key=Secret.from_token("<your-api-key>"),
        azure_deployment="<this a model name, e.g. gpt-4o-mini>")
    response = client.run(messages)
    print(response)
    ```

    ```
    {'replies':
        [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
         enabling computers to understand, interpret, and generate human language in a way that is useful.',
         role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
         meta={'model': 'gpt-4o-mini', 'index': 0, 'finish_reason': 'stop',
         'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]
    }
    ```
    """

    # pylint: disable=super-init-not-called
    def __init__(  # pylint: disable=too-many-positional-arguments
        self,
        azure_endpoint: Optional[str] = None,
        api_version: Optional[str] = "2023-05-15",
        azure_deployment: Optional[str] = "gpt-4o-mini",
        api_key: Optional[Secret] = Secret.from_env_var("AZURE_OPENAI_API_KEY", strict=False),
        azure_ad_token: Optional[Secret] = Secret.from_env_var("AZURE_OPENAI_AD_TOKEN", strict=False),
        organization: Optional[str] = None,
        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
        timeout: Optional[float] = None,
        max_retries: Optional[int] = None,
        generation_kwargs: Optional[Dict[str, Any]] = None,
        default_headers: Optional[Dict[str, str]] = None,
        tools: Optional[List[Tool]] = None,
        tools_strict: bool = False,
        *,
        azure_ad_token_provider: Optional[Union[AzureADTokenProvider, AsyncAzureADTokenProvider]] = None,
    ):
        """
        Initialize the Azure OpenAI Chat Generator component.

        :param azure_endpoint: The endpoint of the deployed model, for example `"https://example-resource.azure.openai.com/"`.
        :param api_version: The version of the API to use. Defaults to 2023-05-15.
        :param azure_deployment: The deployment of the model, usually the model name.
        :param api_key: The API key to use for authentication.
        :param azure_ad_token: [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id).
        :param organization: Your organization ID, defaults to `None`. For help, see
        [Setting up your organization](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
        :param streaming_callback: A callback function called when a new token is received from the stream.
            It accepts [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk)
            as an argument.
        :param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the
            `OPENAI_TIMEOUT` environment variable, or 30 seconds.
        :param max_retries: Maximum number of retries to contact OpenAI after an internal error.
            If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
        :param generation_kwargs: Other parameters to use for the model. These parameters are sent directly to
            the OpenAI endpoint. For details, see [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).
            Some of the supported parameters:
            - `max_tokens`: The maximum number of tokens the output text can have.
            - `temperature`: The sampling temperature to use. Higher values mean the model takes more risks.
                Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
            - `top_p`: Nucleus sampling is an alternative to sampling with temperature, where the model considers
                tokens with a top_p probability mass. For example, 0.1 means only the tokens comprising
                the top 10% probability mass are considered.
            - `n`: The number of completions to generate for each prompt. For example, with 3 prompts and n=2,
                the LLM will generate two completions per prompt, resulting in 6 completions total.
            - `stop`: One or more sequences after which the LLM should stop generating tokens.
            - `presence_penalty`: The penalty applied if a token is already present.
                Higher values make the model less likely to repeat the token.
            - `frequency_penalty`: Penalty applied if a token has already been generated.
                Higher values make the model less likely to repeat the token.
            - `logit_bias`: Adds a logit bias to specific tokens. The keys of the dictionary are tokens, and the
                values are the bias to add to that token.
        :param default_headers: Default headers to use for the AzureOpenAI client.
        :param tools:
            A list of tools for which the model can prepare calls.
        :param tools_strict:
            Whether to enable strict schema adherence for tool calls. If set to `True`, the model will follow exactly
            the schema provided in the `parameters` field of the tool definition, but this may increase latency.
        :param azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on
            every request.
        """
        # We intentionally do not call super().__init__ here because we only need to instantiate the client to interact
        # with the API.

        # Why is this here?
        # AzureOpenAI init is forcing us to use an init method that takes either base_url or azure_endpoint as not
        # None init parameters. This way we accommodate the use case where env var AZURE_OPENAI_ENDPOINT is set instead
        # of passing it as a parameter.
        azure_endpoint = azure_endpoint or os.environ.get("AZURE_OPENAI_ENDPOINT")
        if not azure_endpoint:
            raise ValueError("Please provide an Azure endpoint or set the environment variable AZURE_OPENAI_ENDPOINT.")

        if api_key is None and azure_ad_token is None:
            raise ValueError("Please provide an API key or an Azure Active Directory token.")

        # The check above makes mypy incorrectly infer that api_key is never None,
        # which propagates the incorrect type.
        self.api_key = api_key  # type: ignore
        self.azure_ad_token = azure_ad_token
        self.generation_kwargs = generation_kwargs or {}
        self.streaming_callback = streaming_callback
        self.api_version = api_version
        self.azure_endpoint = azure_endpoint
        self.azure_deployment = azure_deployment
        self.organization = organization
        self.model = azure_deployment or "gpt-4o-mini"
        self.timeout = timeout or float(os.environ.get("OPENAI_TIMEOUT", "30.0"))
        self.max_retries = max_retries or int(os.environ.get("OPENAI_MAX_RETRIES", "5"))
        self.default_headers = default_headers or {}
        self.azure_ad_token_provider = azure_ad_token_provider

        _check_duplicate_tool_names(tools)
        self.tools = tools
        self.tools_strict = tools_strict

        client_args: Dict[str, Any] = {
            "api_version": api_version,
            "azure_endpoint": azure_endpoint,
            "azure_deployment": azure_deployment,
            "api_key": api_key.resolve_value() if api_key is not None else None,
            "azure_ad_token": azure_ad_token.resolve_value() if azure_ad_token is not None else None,
            "organization": organization,
            "timeout": self.timeout,
            "max_retries": self.max_retries,
            "default_headers": self.default_headers,
            "azure_ad_token_provider": azure_ad_token_provider,
        }

        self.client = AzureOpenAI(**client_args)
        self.async_client = AsyncAzureOpenAI(**client_args)

    def to_dict(self) -> Dict[str, Any]:
        """
        Serialize this component to a dictionary.

        :returns:
            The serialized component as a dictionary.
        """
        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
        azure_ad_token_provider_name = None
        if self.azure_ad_token_provider:
            azure_ad_token_provider_name = serialize_callable(self.azure_ad_token_provider)
        return default_to_dict(
            self,
            azure_endpoint=self.azure_endpoint,
            azure_deployment=self.azure_deployment,
            organization=self.organization,
            api_version=self.api_version,
            streaming_callback=callback_name,
            generation_kwargs=self.generation_kwargs,
            timeout=self.timeout,
            max_retries=self.max_retries,
            api_key=self.api_key.to_dict() if self.api_key is not None else None,
            azure_ad_token=self.azure_ad_token.to_dict() if self.azure_ad_token is not None else None,
            default_headers=self.default_headers,
            tools=[tool.to_dict() for tool in self.tools] if self.tools else None,
            tools_strict=self.tools_strict,
            azure_ad_token_provider=azure_ad_token_provider_name,
        )

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "AzureOpenAIChatGenerator":
        """
        Deserialize this component from a dictionary.

        :param data: The dictionary representation of this component.
        :returns:
            The deserialized component instance.
        """
        deserialize_secrets_inplace(data["init_parameters"], keys=["api_key", "azure_ad_token"])
        deserialize_tools_inplace(data["init_parameters"], key="tools")
        init_params = data.get("init_parameters", {})
        serialized_callback_handler = init_params.get("streaming_callback")
        if serialized_callback_handler:
            data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
        serialized_azure_ad_token_provider = init_params.get("azure_ad_token_provider")
        if serialized_azure_ad_token_provider:
            data["init_parameters"]["azure_ad_token_provider"] = deserialize_callable(
                serialized_azure_ad_token_provider
            )
        return default_from_dict(cls, data)

1	# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2	#
3	# SPDX-License-Identifier: Apache-2.0
4
5	import os	1✔
6	from typing import Any, Callable, Dict, List, Optional, Union	1✔
7
8	from openai.lib.azure import AsyncAzureADTokenProvider, AsyncAzureOpenAI, AzureADTokenProvider, AzureOpenAI	1✔
9
10	from haystack import component, default_from_dict, default_to_dict	1✔
11	from haystack.components.generators.chat import OpenAIChatGenerator	1✔
12	from haystack.dataclasses import StreamingChunk	1✔
13	from haystack.tools.tool import Tool, _check_duplicate_tool_names, deserialize_tools_inplace	1✔
14	from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable	1✔
15
16
17	@component	1✔
18	class AzureOpenAIChatGenerator(OpenAIChatGenerator):	1✔
19	"""
20	Generates text using OpenAI's models on Azure.
21
22	It works with the gpt-4 - type models and supports streaming responses
23	from OpenAI API. It uses [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
24	format in input and output.
25
26	You can customize how the text is generated by passing parameters to the
27	OpenAI API. Use the `**generation_kwargs` argument when you initialize
28	the component or when you run it. Any parameter that works with
29	`openai.ChatCompletion.create` will work here too.
30
31	For details on OpenAI API parameters, see
32	[OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).
33
34	### Usage example
35
36	```python
37	from haystack.components.generators.chat import AzureOpenAIChatGenerator
38	from haystack.dataclasses import ChatMessage
39	from haystack.utils import Secret
40
41	messages = [ChatMessage.from_user("What's Natural Language Processing?")]
42
43	client = AzureOpenAIChatGenerator(
44	azure_endpoint="<Your Azure endpoint e.g. `https://your-company.azure.openai.com/>",
45	api_key=Secret.from_token("<your-api-key>"),
46	azure_deployment="<this a model name, e.g. gpt-4o-mini>")
47	response = client.run(messages)
48	print(response)
49	```
50
51	```
52	{'replies':
53	[ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
54	enabling computers to understand, interpret, and generate human language in a way that is useful.',
55	role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
56	meta={'model': 'gpt-4o-mini', 'index': 0, 'finish_reason': 'stop',
57	'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]
58	}
59	```
60	"""
61
62	# pylint: disable=super-init-not-called
63	def __init__( # pylint: disable=too-many-positional-arguments	1✔
64	self,
65	azure_endpoint: Optional[str] = None,
66	api_version: Optional[str] = "2023-05-15",
67	azure_deployment: Optional[str] = "gpt-4o-mini",
68	api_key: Optional[Secret] = Secret.from_env_var("AZURE_OPENAI_API_KEY", strict=False),
69	azure_ad_token: Optional[Secret] = Secret.from_env_var("AZURE_OPENAI_AD_TOKEN", strict=False),
70	organization: Optional[str] = None,
71	streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
72	timeout: Optional[float] = None,
73	max_retries: Optional[int] = None,
74	generation_kwargs: Optional[Dict[str, Any]] = None,
75	default_headers: Optional[Dict[str, str]] = None,
76	tools: Optional[List[Tool]] = None,
77	tools_strict: bool = False,
78	*,
79	azure_ad_token_provider: Optional[Union[AzureADTokenProvider, AsyncAzureADTokenProvider]] = None,
80	):
81	"""
82	Initialize the Azure OpenAI Chat Generator component.
83
84	:param azure_endpoint: The endpoint of the deployed model, for example `"https://example-resource.azure.openai.com/"`.
85	:param api_version: The version of the API to use. Defaults to 2023-05-15.
86	:param azure_deployment: The deployment of the model, usually the model name.
87	:param api_key: The API key to use for authentication.
88	:param azure_ad_token: [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id).
89	:param organization: Your organization ID, defaults to `None`. For help, see
90	[Setting up your organization](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
91	:param streaming_callback: A callback function called when a new token is received from the stream.
92	It accepts [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk)
93	as an argument.
94	:param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the
95	`OPENAI_TIMEOUT` environment variable, or 30 seconds.
96	:param max_retries: Maximum number of retries to contact OpenAI after an internal error.
97	If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
98	:param generation_kwargs: Other parameters to use for the model. These parameters are sent directly to
99	the OpenAI endpoint. For details, see [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).
100	Some of the supported parameters:
101	- `max_tokens`: The maximum number of tokens the output text can have.
102	- `temperature`: The sampling temperature to use. Higher values mean the model takes more risks.
103	Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
104	- `top_p`: Nucleus sampling is an alternative to sampling with temperature, where the model considers
105	tokens with a top_p probability mass. For example, 0.1 means only the tokens comprising
106	the top 10% probability mass are considered.
107	- `n`: The number of completions to generate for each prompt. For example, with 3 prompts and n=2,
108	the LLM will generate two completions per prompt, resulting in 6 completions total.
109	- `stop`: One or more sequences after which the LLM should stop generating tokens.
110	- `presence_penalty`: The penalty applied if a token is already present.
111	Higher values make the model less likely to repeat the token.
112	- `frequency_penalty`: Penalty applied if a token has already been generated.
113	Higher values make the model less likely to repeat the token.
114	- `logit_bias`: Adds a logit bias to specific tokens. The keys of the dictionary are tokens, and the
115	values are the bias to add to that token.
116	:param default_headers: Default headers to use for the AzureOpenAI client.
117	:param tools:
118	A list of tools for which the model can prepare calls.
119	:param tools_strict:
120	Whether to enable strict schema adherence for tool calls. If set to `True`, the model will follow exactly
121	the schema provided in the `parameters` field of the tool definition, but this may increase latency.
122	:param azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on
123	every request.
124	"""
125	# We intentionally do not call super().__init__ here because we only need to instantiate the client to interact
126	# with the API.
127
128	# Why is this here?
129	# AzureOpenAI init is forcing us to use an init method that takes either base_url or azure_endpoint as not
130	# None init parameters. This way we accommodate the use case where env var AZURE_OPENAI_ENDPOINT is set instead
131	# of passing it as a parameter.
132	azure_endpoint = azure_endpoint or os.environ.get("AZURE_OPENAI_ENDPOINT")	1✔
133	if not azure_endpoint:	1✔
134	raise ValueError("Please provide an Azure endpoint or set the environment variable AZURE_OPENAI_ENDPOINT.")	×
135
136	if api_key is None and azure_ad_token is None:	1✔
137	raise ValueError("Please provide an API key or an Azure Active Directory token.")	×
138
139	# The check above makes mypy incorrectly infer that api_key is never None,
140	# which propagates the incorrect type.
141	self.api_key = api_key # type: ignore	1✔
142	self.azure_ad_token = azure_ad_token	1✔
143	self.generation_kwargs = generation_kwargs or {}	1✔
144	self.streaming_callback = streaming_callback	1✔
145	self.api_version = api_version	1✔
146	self.azure_endpoint = azure_endpoint	1✔
147	self.azure_deployment = azure_deployment	1✔
148	self.organization = organization	1✔
149	self.model = azure_deployment or "gpt-4o-mini"	1✔
150	self.timeout = timeout or float(os.environ.get("OPENAI_TIMEOUT", "30.0"))	1✔
151	self.max_retries = max_retries or int(os.environ.get("OPENAI_MAX_RETRIES", "5"))	1✔
152	self.default_headers = default_headers or {}	1✔
153	self.azure_ad_token_provider = azure_ad_token_provider	1✔
154
155	_check_duplicate_tool_names(tools)	1✔
156	self.tools = tools	1✔
157	self.tools_strict = tools_strict	1✔
158
159	client_args: Dict[str, Any] = {	1✔
160	"api_version": api_version,
161	"azure_endpoint": azure_endpoint,
162	"azure_deployment": azure_deployment,
163	"api_key": api_key.resolve_value() if api_key is not None else None,
164	"azure_ad_token": azure_ad_token.resolve_value() if azure_ad_token is not None else None,
165	"organization": organization,
166	"timeout": self.timeout,
167	"max_retries": self.max_retries,
168	"default_headers": self.default_headers,
169	"azure_ad_token_provider": azure_ad_token_provider,
170	}
171
172	self.client = AzureOpenAI(**client_args)	1✔
173	self.async_client = AsyncAzureOpenAI(**client_args)	1✔
174
175	def to_dict(self) -> Dict[str, Any]:	1✔
176	"""
177	Serialize this component to a dictionary.
178
179	:returns:
180	The serialized component as a dictionary.
181	"""
182	callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None	1✔
183	azure_ad_token_provider_name = None	1✔
184	if self.azure_ad_token_provider:	1✔
185	azure_ad_token_provider_name = serialize_callable(self.azure_ad_token_provider)	1✔
186	return default_to_dict(	1✔
187	self,
188	azure_endpoint=self.azure_endpoint,
189	azure_deployment=self.azure_deployment,
190	organization=self.organization,
191	api_version=self.api_version,
192	streaming_callback=callback_name,
193	generation_kwargs=self.generation_kwargs,
194	timeout=self.timeout,
195	max_retries=self.max_retries,
196	api_key=self.api_key.to_dict() if self.api_key is not None else None,
197	azure_ad_token=self.azure_ad_token.to_dict() if self.azure_ad_token is not None else None,
198	default_headers=self.default_headers,
199	tools=[tool.to_dict() for tool in self.tools] if self.tools else None,
200	tools_strict=self.tools_strict,
201	azure_ad_token_provider=azure_ad_token_provider_name,
202	)
203
204	@classmethod	1✔
205	def from_dict(cls, data: Dict[str, Any]) -> "AzureOpenAIChatGenerator":	1✔
206	"""
207	Deserialize this component from a dictionary.
208
209	:param data: The dictionary representation of this component.
210	:returns:
211	The deserialized component instance.
212	"""
213	deserialize_secrets_inplace(data["init_parameters"], keys=["api_key", "azure_ad_token"])	1✔
214	deserialize_tools_inplace(data["init_parameters"], key="tools")	1✔
215	init_params = data.get("init_parameters", {})	1✔
216	serialized_callback_handler = init_params.get("streaming_callback")	1✔
217	if serialized_callback_handler:	1✔
218	data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)	×
219	serialized_azure_ad_token_provider = init_params.get("azure_ad_token_provider")	1✔
220	if serialized_azure_ad_token_provider:	1✔
221	data["init_parameters"]["azure_ad_token_provider"] = deserialize_callable(	×
222	serialized_azure_ad_token_provider
223	)
224	return default_from_dict(cls, data)	1✔

deepset-ai / haystack / 13972131258

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous