• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

deepset-ai / haystack / 13972131258

20 Mar 2025 02:43PM UTC coverage: 90.021% (-0.03%) from 90.054%
13972131258

Pull #9069

github

web-flow
Merge 8371761b0 into 67ab3788e
Pull Request #9069: refactor!: `ChatMessage` serialization-deserialization updates

9833 of 10923 relevant lines covered (90.02%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.06
haystack/components/audio/whisper_local.py
1
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
#
3
# SPDX-License-Identifier: Apache-2.0
4

5
import tempfile
1✔
6
from pathlib import Path
1✔
7
from typing import Any, Dict, List, Literal, Optional, Union, get_args
1✔
8

9
from haystack import Document, component, default_from_dict, default_to_dict
1✔
10
from haystack.dataclasses import ByteStream
1✔
11
from haystack.lazy_imports import LazyImport
1✔
12
from haystack.utils import ComponentDevice
1✔
13

14
with LazyImport("Run 'pip install \"openai-whisper>=20231106\"' to install whisper.") as whisper_import:
1✔
15
    import whisper
1✔
16

17
WhisperLocalModel = Literal[
1✔
18
    "base",
19
    "base.en",
20
    "large",
21
    "large-v1",
22
    "large-v2",
23
    "large-v3",
24
    "medium",
25
    "medium.en",
26
    "small",
27
    "small.en",
28
    "tiny",
29
    "tiny.en",
30
]
31

32

33
@component
1✔
34
class LocalWhisperTranscriber:
1✔
35
    """
36
    Transcribes audio files using OpenAI's Whisper model on your local machine.
37

38
    For the supported audio formats, languages, and other parameters, see the
39
    [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
40
    [GitHub repository](https://github.com/openai/whisper).
41

42
    ### Usage example
43

44
    ```python
45
    from haystack.components.audio import LocalWhisperTranscriber
46

47
    whisper = LocalWhisperTranscriber(model="small")
48
    whisper.warm_up()
49
    transcription = whisper.run(sources=["path/to/audio/file"])
50
    ```
51
    """
52

53
    def __init__(
1✔
54
        self,
55
        model: WhisperLocalModel = "large",
56
        device: Optional[ComponentDevice] = None,
57
        whisper_params: Optional[Dict[str, Any]] = None,
58
    ):
59
        """
60
        Creates an instance of the LocalWhisperTranscriber component.
61

62
        :param model:
63
            The name of the model to use. Set to one of the following models:
64
            "tiny", "base", "small", "medium", "large" (default).
65
            For details on the models and their modifications, see the
66
            [Whisper documentation](https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages).
67
        :param device:
68
            The device for loading the model. If `None`, automatically selects the default device.
69
        """
70
        whisper_import.check()
1✔
71
        if model not in get_args(WhisperLocalModel):
1✔
72
            raise ValueError(
1✔
73
                f"Model name '{model}' not recognized. Choose one among: {', '.join(get_args(WhisperLocalModel))}."
74
            )
75
        self.model = model
1✔
76
        self.whisper_params = whisper_params or {}
1✔
77
        self.device = ComponentDevice.resolve_device(device)
1✔
78
        self._model = None
1✔
79

80
    def warm_up(self) -> None:
1✔
81
        """
82
        Loads the model in memory.
83
        """
84
        if not self._model:
1✔
85
            self._model = whisper.load_model(self.model, device=self.device.to_torch())
1✔
86

87
    def to_dict(self) -> Dict[str, Any]:
1✔
88
        """
89
        Serializes the component to a dictionary.
90

91
        :returns:
92
            Dictionary with serialized data.
93
        """
94
        return default_to_dict(self, model=self.model, device=self.device.to_dict(), whisper_params=self.whisper_params)
1✔
95

96
    @classmethod
1✔
97
    def from_dict(cls, data: Dict[str, Any]) -> "LocalWhisperTranscriber":
1✔
98
        """
99
        Deserializes the component from a dictionary.
100

101
        :param data:
102
            The dictionary to deserialize from.
103
        :returns:
104
            The deserialized component.
105
        """
106
        init_params = data["init_parameters"]
1✔
107
        if init_params.get("device") is not None:
1✔
108
            init_params["device"] = ComponentDevice.from_dict(init_params["device"])
1✔
109
        return default_from_dict(cls, data)
1✔
110

111
    @component.output_types(documents=List[Document])
1✔
112
    def run(self, sources: List[Union[str, Path, ByteStream]], whisper_params: Optional[Dict[str, Any]] = None):
1✔
113
        """
114
        Transcribes a list of audio files into a list of documents.
115

116
        For the supported audio formats, languages, and other parameters, see the
117
        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
118
        [GitHup repo](https://github.com/openai/whisper).
119

120
        :param sources:
121
            A list of paths or binary streams to transcribe.
122

123
        :returns: A dictionary with the following keys:
124
            - `documents`: A list of documents where each document is a transcribed audio file. The content of
125
                the document is the transcription text, and the document's metadata contains the values returned by
126
                the Whisper model, such as the alignment data and the path to the audio file used
127
                for the transcription.
128
        """
129
        if self._model is None:
1✔
130
            raise RuntimeError(
×
131
                "The component LocalWhisperTranscriber was not warmed up. Run 'warm_up()' before calling 'run()'."
132
            )
133

134
        if whisper_params is None:
1✔
135
            whisper_params = self.whisper_params
1✔
136

137
        documents = self.transcribe(sources, **whisper_params)
1✔
138
        return {"documents": documents}
1✔
139

140
    def transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> List[Document]:
1✔
141
        """
142
        Transcribes the audio files into a list of Documents, one for each input file.
143

144
        For the supported audio formats, languages, and other parameters, see the
145
        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
146
        [github repo](https://github.com/openai/whisper).
147

148
        :param sources:
149
            A list of paths or binary streams to transcribe.
150
        :returns:
151
            A list of Documents, one for each file.
152
        """
153
        transcriptions = self._raw_transcribe(sources, **kwargs)
1✔
154
        documents = []
1✔
155
        for path, transcript in transcriptions.items():
1✔
156
            content = transcript.pop("text")
1✔
157
            doc = Document(content=content, meta={"audio_file": path, **transcript})
1✔
158
            documents.append(doc)
1✔
159
        return documents
1✔
160

161
    def _raw_transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> Dict[Path, Any]:
1✔
162
        """
163
        Transcribes the given audio files. Returns the output of the model, a dictionary, for each input file.
164

165
        For the supported audio formats, languages, and other parameters, see the
166
        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
167
        [github repo](https://github.com/openai/whisper).
168

169
        :param sources:
170
            A list of paths or binary streams to transcribe.
171
        :returns:
172
            A dictionary mapping 'file_path' to 'transcription'.
173
        """
174
        if self._model is None:
1✔
175
            raise RuntimeError("Model is not loaded, please run 'warm_up()' before calling 'run()'")
×
176

177
        return_segments = kwargs.pop("return_segments", False)
1✔
178
        transcriptions = {}
1✔
179

180
        for source in sources:
1✔
181
            path = Path(source) if not isinstance(source, ByteStream) else source.meta.get("file_path")
1✔
182

183
            if isinstance(source, ByteStream) and path is None:
1✔
184
                with tempfile.NamedTemporaryFile(delete=False) as fp:
×
185
                    path = Path(fp.name)
×
186
                    source.to_file(path)
×
187

188
            transcription = self._model.transcribe(str(path), **kwargs)
1✔
189

190
            if not return_segments:
1✔
191
                transcription.pop("segments", None)
1✔
192

193
            transcriptions[path] = transcription
1✔
194

195
        return transcriptions
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc