19763653080

Committed 28 Nov 2025 12:20PM UTC coverage: 91.571% (+0.02%) from 91.552%

Build # 19763653080

Build Type

Pull #10156

github

Committed by

web-flow

Commit Message

Merge 3e20eec3d into 108204c07

Pull Request Pull Request #10156: chore: Update code snippets in docs (audio and builders components)

Run Details

13939 of 15222 relevant lines covered (91.57%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.45

haystack/components/audio/whisper_remote.py

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import io
from pathlib import Path
from typing import Any, Optional, Union

from openai import OpenAI

from haystack import Document, component, default_from_dict, default_to_dict, logging
from haystack.dataclasses import ByteStream
from haystack.utils import Secret, deserialize_secrets_inplace
from haystack.utils.http_client import init_http_client

logger = logging.getLogger(__name__)


@component
class RemoteWhisperTranscriber:
    """
    Transcribes audio files using the OpenAI's Whisper API.

    The component requires an OpenAI API key, see the
    [OpenAI documentation](https://platform.openai.com/docs/api-reference/authentication) for more details.
    For the supported audio formats, languages, and other parameters, see the
    [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text).

    ### Usage example

    ```python
    from haystack.components.audio import RemoteWhisperTranscriber
    from haystack.utils import Secret

    whisper = RemoteWhisperTranscriber(api_key=Secret.from_env_var("OPENAI_API_KEY"), model="whisper-1")
    transcription = whisper.run(sources=["test/test_files/audio/answer.wav"])
    ```
    """

    def __init__(  # pylint: disable=too-many-positional-arguments
        self,
        api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
        model: str = "whisper-1",
        api_base_url: Optional[str] = None,
        organization: Optional[str] = None,
        http_client_kwargs: Optional[dict[str, Any]] = None,
        **kwargs,
    ):
        """
        Creates an instance of the RemoteWhisperTranscriber component.

        :param api_key:
            OpenAI API key.
            You can set it with an environment variable `OPENAI_API_KEY`, or pass with this parameter
            during initialization.
        :param model:
            Name of the model to use. Currently accepts only `whisper-1`.
        :param organization:
            Your OpenAI organization ID. See OpenAI's documentation on
            [Setting Up Your Organization](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
        :param api_base:
            An optional URL to use as the API base. For details, see the
            OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio).
        :param http_client_kwargs:
            A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`.
            For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client).
        :param kwargs:
            Other optional parameters for the model. These are sent directly to the OpenAI
            endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio) for more details.
            Some of the supported parameters are:
            - `language`: The language of the input audio.
              Provide the input language in ISO-639-1 format
              to improve transcription accuracy and latency.
            - `prompt`: An optional text to guide the model's
              style or continue a previous audio segment.
              The prompt should match the audio language.
            - `response_format`: The format of the transcript
              output. This component only supports `json`.
            - `temperature`: The sampling temperature, between 0
            and 1. Higher values like 0.8 make the output more
            random, while lower values like 0.2 make it more
            focused and deterministic. If set to 0, the model
            uses log probability to automatically increase the
            temperature until certain thresholds are hit.
        """

        self.organization = organization
        self.model = model
        self.api_base_url = api_base_url
        self.api_key = api_key
        self.http_client_kwargs = http_client_kwargs

        # Only response_format = "json" is supported
        whisper_params = kwargs
        response_format = whisper_params.get("response_format", "json")
        if response_format != "json":
            logger.warning(
                "RemoteWhisperTranscriber only supports 'response_format: json'. This parameter will be overwritten."
            )
        whisper_params["response_format"] = "json"
        self.whisper_params = whisper_params
        self.client = OpenAI(
            api_key=api_key.resolve_value(),
            organization=organization,
            base_url=api_base_url,
            http_client=init_http_client(self.http_client_kwargs, async_client=False),
        )

    def to_dict(self) -> dict[str, Any]:
        """
        Serializes the component to a dictionary.

        :returns:
            Dictionary with serialized data.
        """
        return default_to_dict(
            self,
            api_key=self.api_key.to_dict(),
            model=self.model,
            organization=self.organization,
            api_base_url=self.api_base_url,
            http_client_kwargs=self.http_client_kwargs,
            **self.whisper_params,
        )

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "RemoteWhisperTranscriber":
        """
        Deserializes the component from a dictionary.

        :param data:
            The dictionary to deserialize from.
        :returns:
            The deserialized component.
        """
        deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
        return default_from_dict(cls, data)

    @component.output_types(documents=list[Document])
    def run(self, sources: list[Union[str, Path, ByteStream]]):
        """
        Transcribes the list of audio files into a list of documents.

        :param sources:
            A list of file paths or `ByteStream` objects containing the audio files to transcribe.

        :returns: A dictionary with the following keys:
            - `documents`: A list of documents, one document for each file.
                The content of each document is the transcribed text.
        """
        documents = []

        for source in sources:
            if not isinstance(source, ByteStream):
                path = source
                source = ByteStream.from_file_path(Path(source))
                source.meta["file_path"] = path

            file = io.BytesIO(source.data)
            file.name = str(source.meta["file_path"]) if "file_path" in source.meta else "__fallback__.wav"

            content = self.client.audio.transcriptions.create(file=file, model=self.model, **self.whisper_params)
            doc = Document(content=content.text, meta=source.meta)
            documents.append(doc)

        return {"documents": documents}

1	# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2	#
3	# SPDX-License-Identifier: Apache-2.0
4
5	import io	1✔
6	from pathlib import Path	1✔
7	from typing import Any, Optional, Union	1✔
8
9	from openai import OpenAI	1✔
10
11	from haystack import Document, component, default_from_dict, default_to_dict, logging	1✔
12	from haystack.dataclasses import ByteStream	1✔
13	from haystack.utils import Secret, deserialize_secrets_inplace	1✔
14	from haystack.utils.http_client import init_http_client	1✔
15
16	logger = logging.getLogger(__name__)	1✔
17
18
19	@component	1✔
20	class RemoteWhisperTranscriber:	1✔
21	"""
22	Transcribes audio files using the OpenAI's Whisper API.
23
24	The component requires an OpenAI API key, see the
25	[OpenAI documentation](https://platform.openai.com/docs/api-reference/authentication) for more details.
26	For the supported audio formats, languages, and other parameters, see the
27	[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text).
28
29	### Usage example
30
31	```python
32	from haystack.components.audio import RemoteWhisperTranscriber
33	from haystack.utils import Secret
34
35	whisper = RemoteWhisperTranscriber(api_key=Secret.from_env_var("OPENAI_API_KEY"), model="whisper-1")
36	transcription = whisper.run(sources=["test/test_files/audio/answer.wav"])
37	```
38	"""
39
40	def __init__( # pylint: disable=too-many-positional-arguments	1✔
41	self,
42	api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
43	model: str = "whisper-1",
44	api_base_url: Optional[str] = None,
45	organization: Optional[str] = None,
46	http_client_kwargs: Optional[dict[str, Any]] = None,
47	**kwargs,
48	):
49	"""
50	Creates an instance of the RemoteWhisperTranscriber component.
51
52	:param api_key:
53	OpenAI API key.
54	You can set it with an environment variable `OPENAI_API_KEY`, or pass with this parameter
55	during initialization.
56	:param model:
57	Name of the model to use. Currently accepts only `whisper-1`.
58	:param organization:
59	Your OpenAI organization ID. See OpenAI's documentation on
60	[Setting Up Your Organization](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
61	:param api_base:
62	An optional URL to use as the API base. For details, see the
63	OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio).
64	:param http_client_kwargs:
65	A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`.
66	For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client).
67	:param kwargs:
68	Other optional parameters for the model. These are sent directly to the OpenAI
69	endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio) for more details.
70	Some of the supported parameters are:
71	- `language`: The language of the input audio.
72	Provide the input language in ISO-639-1 format
73	to improve transcription accuracy and latency.
74	- `prompt`: An optional text to guide the model's
75	style or continue a previous audio segment.
76	The prompt should match the audio language.
77	- `response_format`: The format of the transcript
78	output. This component only supports `json`.
79	- `temperature`: The sampling temperature, between 0
80	and 1. Higher values like 0.8 make the output more
81	random, while lower values like 0.2 make it more
82	focused and deterministic. If set to 0, the model
83	uses log probability to automatically increase the
84	temperature until certain thresholds are hit.
85	"""
86
87	self.organization = organization	1✔
88	self.model = model	1✔
89	self.api_base_url = api_base_url	1✔
90	self.api_key = api_key	1✔
91	self.http_client_kwargs = http_client_kwargs	1✔
92
93	# Only response_format = "json" is supported
94	whisper_params = kwargs	1✔
95	response_format = whisper_params.get("response_format", "json")	1✔
96	if response_format != "json":	1✔
97	logger.warning(	×
98	"RemoteWhisperTranscriber only supports 'response_format: json'. This parameter will be overwritten."
99	)
100	whisper_params["response_format"] = "json"	1✔
101	self.whisper_params = whisper_params	1✔
102	self.client = OpenAI(	1✔
103	api_key=api_key.resolve_value(),
104	organization=organization,
105	base_url=api_base_url,
106	http_client=init_http_client(self.http_client_kwargs, async_client=False),
107	)
108
109	def to_dict(self) -> dict[str, Any]:	1✔
110	"""
111	Serializes the component to a dictionary.
112
113	:returns:
114	Dictionary with serialized data.
115	"""
116	return default_to_dict(	1✔
117	self,
118	api_key=self.api_key.to_dict(),
119	model=self.model,
120	organization=self.organization,
121	api_base_url=self.api_base_url,
122	http_client_kwargs=self.http_client_kwargs,
123	**self.whisper_params,
124	)
125
126	@classmethod	1✔
127	def from_dict(cls, data: dict[str, Any]) -> "RemoteWhisperTranscriber":	1✔
128	"""
129	Deserializes the component from a dictionary.
130
131	:param data:
132	The dictionary to deserialize from.
133	:returns:
134	The deserialized component.
135	"""
136	deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])	1✔
137	return default_from_dict(cls, data)	1✔
138
139	@component.output_types(documents=list[Document])	1✔
140	def run(self, sources: list[Union[str, Path, ByteStream]]):	1✔
141	"""
142	Transcribes the list of audio files into a list of documents.
143
144	:param sources:
145	A list of file paths or `ByteStream` objects containing the audio files to transcribe.
146
147	:returns: A dictionary with the following keys:
148	- `documents`: A list of documents, one document for each file.
149	The content of each document is the transcribed text.
150	"""
151	documents = []	×
152
153	for source in sources:	×
154	if not isinstance(source, ByteStream):	×
155	path = source	×
156	source = ByteStream.from_file_path(Path(source))	×
157	source.meta["file_path"] = path	×
158
159	file = io.BytesIO(source.data)	×
160	file.name = str(source.meta["file_path"]) if "file_path" in source.meta else "__fallback__.wav"	×
161
162	content = self.client.audio.transcriptions.create(file=file, model=self.model, **self.whisper_params)	×
163	doc = Document(content=content.text, meta=source.meta)	×
164	documents.append(doc)	×
165
166	return {"documents": documents}	×

deepset-ai / haystack / 19763653080

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous