• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

scope3data / scope3ai-py / 13040707031

29 Jan 2025 09:23PM UTC coverage: 96.412% (+15.9%) from 80.557%
13040707031

Pull #84

github

24322d
kevdevg
feat: multimodal output for openain/litellm
Pull Request #84: feat: multimodal output audio for OpenAi and Litellm

33 of 34 new or added lines in 3 files covered. (97.06%)

54 existing lines in 10 files now uncovered.

2472 of 2564 relevant lines covered (96.41%)

3.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.23
/scope3ai/tracers/utils/multimodal.py
1
import base64
4✔
2
import logging
4✔
3
from io import BytesIO
4✔
4

5
from scope3ai.api.types import ImpactRow
4✔
6
from scope3ai.api.typesgen import Image as RootImage
4✔
7
from scope3ai.tracers.utils.audio import MUTAGEN_MAPPING, _get_audio_duration
4✔
8

9

10
def aggregate_multimodal_image(content: dict, row: ImpactRow) -> None:
4✔
11
    from PIL import Image
4✔
12

13
    url = content["image_url"]["url"]
4✔
14
    if url.startswith("data:"):
4✔
15
        # extract content type, and data part
16
        # example: data:image/jpeg;base64,....
17
        content_type, data = url.split(",", 1)
4✔
18
        image_data = BytesIO(base64.b64decode(data))
4✔
19
        image = Image.open(image_data)
4✔
20
        width, height = image.size
4✔
21
        size = RootImage(root=f"{width}x{height}")
4✔
22

23
        if row.input_images is None:
4✔
24
            row.input_images = [size]
4✔
25
        else:
26
            row.input_images.append(size)
4✔
27

28
    else:
29
        # TODO: not supported yet.
30
        # Should we actually download the file here just to have the size ??
31
        pass
1✔
32

33

34
def aggregate_multimodal_audio(content: dict, row: ImpactRow) -> None:
4✔
35
    input_audio = content["input_audio"]
4✔
36
    format = input_audio["format"]
4✔
37
    b64data = input_audio["data"]
4✔
38
    assert format in MUTAGEN_MAPPING
4✔
39

40
    # decode the base64 data
41
    audio_data = base64.b64decode(b64data)
4✔
42
    # TODO: accept audio duration as float in AiApi
43
    duration = _get_audio_duration(format, audio_data)
4✔
44

45
    if duration:
4✔
46
        if row.input_audio_seconds is None:
4✔
47
            row.input_audio_seconds = duration
4✔
48
        else:
49
            row.input_audio_seconds += duration
4✔
50

51

52
def aggregate_multimodal_audio_content_output(
4✔
53
    content: str, audio_format: str, row: ImpactRow
54
) -> None:
55
    assert audio_format in MUTAGEN_MAPPING
4✔
56

57
    audio_data = base64.b64decode(content)
4✔
58
    duration = _get_audio_duration(audio_format, audio_data)
4✔
59
    if duration:
4✔
60
        if row.output_audio_seconds is None:
4✔
61
            row.output_audio_seconds = duration
4✔
62
        else:
NEW
63
            row.output_audio_seconds += duration
×
64

65

66
def aggregate_multimodal_content(
4✔
67
    content: dict, row: ImpactRow, logger: logging.Logger
68
) -> None:
69
    try:
4✔
70
        content_type = content.get("type")
4✔
71
        if content_type == "image_url":
4✔
72
            aggregate_multimodal_image(content, row)
4✔
73
        elif content_type == "input_audio":
4✔
74
            aggregate_multimodal_audio(content, row)
4✔
75
    except Exception as e:
×
76
        logger.error(f"Error processing multimodal content: {e}")
×
77

78

79
def aggregate_multimodal(message: dict, row: ImpactRow, logger: logging.Logger) -> None:
4✔
80
    # if the message content is not a tuple/list, it's just text.
81
    # so there is nothing multimodal in it, we can just forget about it.
82
    content = message.get("content", [])
4✔
83
    if isinstance(content, (tuple, list)):
4✔
84
        for item in content:
4✔
85
            aggregate_multimodal_content(item, row, logger)
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc