12753874046

Committed 13 Jan 2025 06:40PM UTC coverage: 95.076% (+14.5%) from 80.557%

Build # 12753874046

Build Type

Pull #61

github

Specific Base 3a8d3f

Committed by

kevdevg

Commit Message

fix: vision pillow read bytes

Pull Request Pull Request #61: feat(Hugging face): Vision methods - image classification / image segmentation / object detection

Run Details

179 of 189 new or added lines in 5 files covered. (94.71%)

34 existing lines in 9 files now uncovered.

2008 of 2112 relevant lines covered (95.08%)

3.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.84

/scope3ai/tracers/huggingface/chat.py

import time
from collections.abc import AsyncIterable, Iterable
from dataclasses import asdict, dataclass
from typing import Any, Callable, Optional, Union

import tiktoken
from huggingface_hub import AsyncInferenceClient, InferenceClient  # type: ignore[import-untyped]
from huggingface_hub import ChatCompletionOutput as _ChatCompletionOutput
from huggingface_hub import ChatCompletionStreamOutput as _ChatCompletionStreamOutput
from requests import Response

from scope3ai.api.types import Scope3AIContext, Model, ImpactRow
from scope3ai.constants import PROVIDERS
from scope3ai.lib import Scope3AI
from scope3ai.response_interceptor.requests_interceptor import requests_response_capture

PROVIDER = PROVIDERS.HUGGINGFACE_HUB.value
HUGGING_FACE_CHAT_TASK = "chat"


@dataclass
class ChatCompletionOutput(_ChatCompletionOutput):
    scope3ai: Optional[Scope3AIContext] = None


@dataclass
class ChatCompletionStreamOutput(_ChatCompletionStreamOutput):
    scope3ai: Optional[Scope3AIContext] = None


def huggingface_chat_wrapper(
    wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
    if kwargs.get("stream", False):
        return huggingface_chat_wrapper_stream(wrapped, instance, args, kwargs)
    else:
        return huggingface_chat_wrapper_non_stream(wrapped, instance, args, kwargs)


def huggingface_chat_wrapper_non_stream(
    wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
) -> ChatCompletionOutput:
    timer_start = time.perf_counter()
    http_response: Response | None = None
    with requests_response_capture() as responses:
        response = wrapped(*args, **kwargs)
        http_responses = responses.get()
        if len(http_responses) > 0:
            http_response = http_responses[0]
    model = (
        instance.model
        or kwargs.get("model")
        or instance.get_recommended_model(HUGGING_FACE_CHAT_TASK)
    )
    if http_response:
        compute_time = http_response.headers.get("x-compute-time")
    else:
        compute_time = time.perf_counter() - timer_start
    scope3_row = ImpactRow(
        model=Model(id=model),
        input_tokens=response.usage.prompt_tokens,
        output_tokens=response.usage.completion_tokens,
        request_duration_ms=float(compute_time) * 1000,
        managed_service_id=PROVIDER,
    )
    scope3ai_ctx = Scope3AI.get_instance().submit_impact(scope3_row)
    chat = ChatCompletionOutput(**asdict(response))
    chat.scope3ai = scope3ai_ctx
    return chat


def huggingface_chat_wrapper_stream(
    wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
) -> Iterable[ChatCompletionStreamOutput]:
    timer_start = time.perf_counter()
    stream = wrapped(*args, **kwargs)
    token_count = 0
    model = (
        instance.model
        or kwargs.get("model")
        or instance.get_recommended_model(HUGGING_FACE_CHAT_TASK)
    )
    for chunk in stream:
        token_count += 1
        request_latency = time.perf_counter() - timer_start
        scope3_row = ImpactRow(
            model=Model(id=model),
            output_tokens=token_count,
            request_duration_ms=request_latency * 1000,
            managed_service_id=PROVIDER,
        )
        chunk_data = ChatCompletionStreamOutput(**asdict(chunk))
        scope3_ctx = Scope3AI.get_instance().submit_impact(scope3_row)
        if scope3_ctx is not None:
            chunk_data.scope3ai = scope3_ctx
        yield chunk_data


async def huggingface_async_chat_wrapper(
    wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
    if kwargs.get("stream", False):
        return huggingface_async_chat_wrapper_stream(wrapped, instance, args, kwargs)
    else:
        return await huggingface_async_chat_wrapper_non_stream(
            wrapped, instance, args, kwargs
        )


async def huggingface_async_chat_wrapper_non_stream(
    wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
) -> ChatCompletionOutput:
    timer_start = time.perf_counter()

    response = await wrapped(*args, **kwargs)
    request_latency = time.perf_counter() - timer_start
    model = (
        instance.model or kwargs.get("model") or instance.get_recommended_model("chat")
    )
    encoder = tiktoken.get_encoding("cl100k_base")
    output_tokens = len(encoder.encode(response.choices[0].message.content))
    scope3_row = ImpactRow(
        model=Model(id=model),
        input_tokens=response.usage.prompt_tokens,
        output_tokens=output_tokens,
        request_duration_ms=request_latency * 1000,
        managed_service_id=PROVIDER,
    )

    scope3ai_ctx = Scope3AI.get_instance().submit_impact(scope3_row)
    chat = ChatCompletionOutput(**asdict(response))
    chat.scope3ai = scope3ai_ctx
    return chat


# Todo: How headers works for stream
async def huggingface_async_chat_wrapper_stream(
    wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
) -> AsyncIterable[ChatCompletionStreamOutput]:
    timer_start = time.perf_counter()
    stream = await wrapped(*args, **kwargs)
    token_count = 0
    model_used = instance.model or kwargs["model"]
    async for chunk in stream:
        token_count += 1
        request_latency = time.perf_counter() - timer_start
        scope3_row = ImpactRow(
            model=Model(id=model_used),
            output_tokens=token_count,
            request_duration_ms=request_latency
            * 1000,  # TODO: can we get the header that has the processing time
            managed_service_id=PROVIDER,
        )
        scope3_ctx = Scope3AI.get_instance().submit_impact(scope3_row)
        chunk_data = ChatCompletionStreamOutput(**asdict(chunk))
        if scope3_ctx is not None:
            chunk_data.scope3ai = scope3_ctx
        yield chunk_data

1	import time	4✔
2	from collections.abc import AsyncIterable, Iterable	4✔
3	from dataclasses import asdict, dataclass	4✔
4	from typing import Any, Callable, Optional, Union	4✔
5
6	import tiktoken	4✔
7	from huggingface_hub import AsyncInferenceClient, InferenceClient # type: ignore[import-untyped]	4✔
8	from huggingface_hub import ChatCompletionOutput as _ChatCompletionOutput	4✔
9	from huggingface_hub import ChatCompletionStreamOutput as _ChatCompletionStreamOutput	4✔
10	from requests import Response	4✔
11
12	from scope3ai.api.types import Scope3AIContext, Model, ImpactRow	4✔
13	from scope3ai.constants import PROVIDERS	4✔
14	from scope3ai.lib import Scope3AI	4✔
15	from scope3ai.response_interceptor.requests_interceptor import requests_response_capture	4✔
16
17	PROVIDER = PROVIDERS.HUGGINGFACE_HUB.value	4✔
18	HUGGING_FACE_CHAT_TASK = "chat"	4✔
19
20
21	@dataclass	4✔
22	class ChatCompletionOutput(_ChatCompletionOutput):	4✔
23	scope3ai: Optional[Scope3AIContext] = None	4✔
24
25
26	@dataclass	4✔
27	class ChatCompletionStreamOutput(_ChatCompletionStreamOutput):	4✔
28	scope3ai: Optional[Scope3AIContext] = None	4✔
29
30
31	def huggingface_chat_wrapper(	4✔
32	wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
33	) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
34	if kwargs.get("stream", False):	4✔
35	return huggingface_chat_wrapper_stream(wrapped, instance, args, kwargs)	4✔
36	else:
37	return huggingface_chat_wrapper_non_stream(wrapped, instance, args, kwargs)	4✔
38
39
40	def huggingface_chat_wrapper_non_stream(	4✔
41	wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
42	) -> ChatCompletionOutput:
43	timer_start = time.perf_counter()	4✔
44	http_response: Response \| None = None	4✔
45	with requests_response_capture() as responses:	4✔
46	response = wrapped(args, *kwargs)	4✔
47	http_responses = responses.get()	4✔
48	if len(http_responses) > 0:	4✔
49	http_response = http_responses[0]	4✔
50	model = (	4✔
51	instance.model
52	or kwargs.get("model")
53	or instance.get_recommended_model(HUGGING_FACE_CHAT_TASK)
54	)
55	if http_response:	4✔
56	compute_time = http_response.headers.get("x-compute-time")	4✔
57	else:
UNCOV 58	compute_time = time.perf_counter() - timer_start	×
59	scope3_row = ImpactRow(	4✔
60	model=Model(id=model),
61	input_tokens=response.usage.prompt_tokens,
62	output_tokens=response.usage.completion_tokens,
63	request_duration_ms=float(compute_time) * 1000,
64	managed_service_id=PROVIDER,
65	)
66	scope3ai_ctx = Scope3AI.get_instance().submit_impact(scope3_row)	4✔
67	chat = ChatCompletionOutput(**asdict(response))	4✔
68	chat.scope3ai = scope3ai_ctx	4✔
69	return chat	4✔
70
71
72	def huggingface_chat_wrapper_stream(	4✔
73	wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
74	) -> Iterable[ChatCompletionStreamOutput]:
75	timer_start = time.perf_counter()	4✔
76	stream = wrapped(args, *kwargs)	4✔
77	token_count = 0	4✔
78	model = (	4✔
79	instance.model
80	or kwargs.get("model")
81	or instance.get_recommended_model(HUGGING_FACE_CHAT_TASK)
82	)
83	for chunk in stream:	4✔
84	token_count += 1	4✔
85	request_latency = time.perf_counter() - timer_start	4✔
86	scope3_row = ImpactRow(	4✔
87	model=Model(id=model),
88	output_tokens=token_count,
89	request_duration_ms=request_latency * 1000,
90	managed_service_id=PROVIDER,
91	)
92	chunk_data = ChatCompletionStreamOutput(**asdict(chunk))	4✔
93	scope3_ctx = Scope3AI.get_instance().submit_impact(scope3_row)	4✔
94	if scope3_ctx is not None:	4✔
95	chunk_data.scope3ai = scope3_ctx	4✔
96	yield chunk_data	4✔
97
98
99	async def huggingface_async_chat_wrapper(	4✔
100	wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
101	) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
102	if kwargs.get("stream", False):	4✔
103	return huggingface_async_chat_wrapper_stream(wrapped, instance, args, kwargs)	4✔
104	else:
105	return await huggingface_async_chat_wrapper_non_stream(	4✔
106	wrapped, instance, args, kwargs
107	)
108
109
110	async def huggingface_async_chat_wrapper_non_stream(	4✔
111	wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
112	) -> ChatCompletionOutput:
113	timer_start = time.perf_counter()	4✔
114
115	response = await wrapped(args, *kwargs)	4✔
116	request_latency = time.perf_counter() - timer_start	4✔
117	model = (	4✔
118	instance.model or kwargs.get("model") or instance.get_recommended_model("chat")
119	)
120	encoder = tiktoken.get_encoding("cl100k_base")	4✔
121	output_tokens = len(encoder.encode(response.choices[0].message.content))	4✔
122	scope3_row = ImpactRow(	4✔
123	model=Model(id=model),
124	input_tokens=response.usage.prompt_tokens,
125	output_tokens=output_tokens,
126	request_duration_ms=request_latency * 1000,
127	managed_service_id=PROVIDER,
128	)
129
130	scope3ai_ctx = Scope3AI.get_instance().submit_impact(scope3_row)	4✔
131	chat = ChatCompletionOutput(**asdict(response))	4✔
132	chat.scope3ai = scope3ai_ctx	4✔
133	return chat	4✔
134
135
136	# Todo: How headers works for stream
137	async def huggingface_async_chat_wrapper_stream(	4✔
138	wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
139	) -> AsyncIterable[ChatCompletionStreamOutput]:
140	timer_start = time.perf_counter()	4✔
141	stream = await wrapped(args, *kwargs)	4✔
142	token_count = 0	4✔
143	model_used = instance.model or kwargs["model"]	4✔
144	async for chunk in stream:	4✔
145	token_count += 1	4✔
146	request_latency = time.perf_counter() - timer_start	4✔
147	scope3_row = ImpactRow(	4✔
148	model=Model(id=model_used),
149	output_tokens=token_count,
150	request_duration_ms=request_latency
151	* 1000, # TODO: can we get the header that has the processing time
152	managed_service_id=PROVIDER,
153	)
154	scope3_ctx = Scope3AI.get_instance().submit_impact(scope3_row)	4✔
155	chunk_data = ChatCompletionStreamOutput(**asdict(chunk))	4✔
156	if scope3_ctx is not None:	4✔
157	chunk_data.scope3ai = scope3_ctx	4✔
158	yield chunk_data	4✔

scope3data / scope3ai-py / 12753874046

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous