• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 19763653080

28 Nov 2025 12:20PM UTC coverage: 91.571% (+0.02%) from 91.552%
19763653080

Pull #10156

github

web-flow
Merge 3e20eec3d into 108204c07
Pull Request #10156: chore: Update code snippets in docs (audio and builders components)

13939 of 15222 relevant lines covered (91.57%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.45
haystack/components/audio/whisper_remote.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import io
1✔
6
from pathlib import Path
1✔
7
from typing import Any, Optional, Union
1✔
8

9
from openai import OpenAI
1✔
10

11
from haystack import Document, component, default_from_dict, default_to_dict, logging
1✔
12
from haystack.dataclasses import ByteStream
1✔
13
from haystack.utils import Secret, deserialize_secrets_inplace
1✔
14
from haystack.utils.http_client import init_http_client
1✔
15

16
logger = logging.getLogger(__name__)
1✔
17

18

19
@component
1✔
20
class RemoteWhisperTranscriber:
1✔
21
    """
22
    Transcribes audio files using the OpenAI's Whisper API.
23

24
    The component requires an OpenAI API key, see the
25
    [OpenAI documentation](https://platform.openai.com/docs/api-reference/authentication) for more details.
26
    For the supported audio formats, languages, and other parameters, see the
27
    [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text).
28

29
    ### Usage example
30

31
    ```python
32
    from haystack.components.audio import RemoteWhisperTranscriber
33
    from haystack.utils import Secret
34

35
    whisper = RemoteWhisperTranscriber(api_key=Secret.from_env_var("OPENAI_API_KEY"), model="whisper-1")
36
    transcription = whisper.run(sources=["test/test_files/audio/answer.wav"])
37
    ```
38
    """
39

40
    def __init__(  # pylint: disable=too-many-positional-arguments
1✔
41
        self,
42
        api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
43
        model: str = "whisper-1",
44
        api_base_url: Optional[str] = None,
45
        organization: Optional[str] = None,
46
        http_client_kwargs: Optional[dict[str, Any]] = None,
47
        **kwargs,
48
    ):
49
        """
50
        Creates an instance of the RemoteWhisperTranscriber component.
51

52
        :param api_key:
53
            OpenAI API key.
54
            You can set it with an environment variable `OPENAI_API_KEY`, or pass with this parameter
55
            during initialization.
56
        :param model:
57
            Name of the model to use. Currently accepts only `whisper-1`.
58
        :param organization:
59
            Your OpenAI organization ID. See OpenAI's documentation on
60
            [Setting Up Your Organization](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
61
        :param api_base:
62
            An optional URL to use as the API base. For details, see the
63
            OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio).
64
        :param http_client_kwargs:
65
            A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`.
66
            For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client).
67
        :param kwargs:
68
            Other optional parameters for the model. These are sent directly to the OpenAI
69
            endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio) for more details.
70
            Some of the supported parameters are:
71
            - `language`: The language of the input audio.
72
              Provide the input language in ISO-639-1 format
73
              to improve transcription accuracy and latency.
74
            - `prompt`: An optional text to guide the model's
75
              style or continue a previous audio segment.
76
              The prompt should match the audio language.
77
            - `response_format`: The format of the transcript
78
              output. This component only supports `json`.
79
            - `temperature`: The sampling temperature, between 0
80
            and 1. Higher values like 0.8 make the output more
81
            random, while lower values like 0.2 make it more
82
            focused and deterministic. If set to 0, the model
83
            uses log probability to automatically increase the
84
            temperature until certain thresholds are hit.
85
        """
86

87
        self.organization = organization
1✔
88
        self.model = model
1✔
89
        self.api_base_url = api_base_url
1✔
90
        self.api_key = api_key
1✔
91
        self.http_client_kwargs = http_client_kwargs
1✔
92

93
        # Only response_format = "json" is supported
94
        whisper_params = kwargs
1✔
95
        response_format = whisper_params.get("response_format", "json")
1✔
96
        if response_format != "json":
1✔
97
            logger.warning(
×
98
                "RemoteWhisperTranscriber only supports 'response_format: json'. This parameter will be overwritten."
99
            )
100
        whisper_params["response_format"] = "json"
1✔
101
        self.whisper_params = whisper_params
1✔
102
        self.client = OpenAI(
1✔
103
            api_key=api_key.resolve_value(),
104
            organization=organization,
105
            base_url=api_base_url,
106
            http_client=init_http_client(self.http_client_kwargs, async_client=False),
107
        )
108

109
    def to_dict(self) -> dict[str, Any]:
1✔
110
        """
111
        Serializes the component to a dictionary.
112

113
        :returns:
114
            Dictionary with serialized data.
115
        """
116
        return default_to_dict(
1✔
117
            self,
118
            api_key=self.api_key.to_dict(),
119
            model=self.model,
120
            organization=self.organization,
121
            api_base_url=self.api_base_url,
122
            http_client_kwargs=self.http_client_kwargs,
123
            **self.whisper_params,
124
        )
125

126
    @classmethod
1✔
127
    def from_dict(cls, data: dict[str, Any]) -> "RemoteWhisperTranscriber":
1✔
128
        """
129
        Deserializes the component from a dictionary.
130

131
        :param data:
132
            The dictionary to deserialize from.
133
        :returns:
134
            The deserialized component.
135
        """
136
        deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
1✔
137
        return default_from_dict(cls, data)
1✔
138

139
    @component.output_types(documents=list[Document])
1✔
140
    def run(self, sources: list[Union[str, Path, ByteStream]]):
1✔
141
        """
142
        Transcribes the list of audio files into a list of documents.
143

144
        :param sources:
145
            A list of file paths or `ByteStream` objects containing the audio files to transcribe.
146

147
        :returns: A dictionary with the following keys:
148
            - `documents`: A list of documents, one document for each file.
149
                The content of each document is the transcribed text.
150
        """
151
        documents = []
×
152

153
        for source in sources:
×
154
            if not isinstance(source, ByteStream):
×
155
                path = source
×
156
                source = ByteStream.from_file_path(Path(source))
×
157
                source.meta["file_path"] = path
×
158

159
            file = io.BytesIO(source.data)
×
160
            file.name = str(source.meta["file_path"]) if "file_path" in source.meta else "__fallback__.wav"
×
161

162
            content = self.client.audio.transcriptions.create(file=file, model=self.model, **self.whisper_params)
×
163
            doc = Document(content=content.text, meta=source.meta)
×
164
            documents.append(doc)
×
165

166
        return {"documents": documents}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc