• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

liqd / roots / 22103202171

17 Feb 2026 02:59PM UTC coverage: 85.026% (-0.01%) from 85.039%
22103202171

push

github

7092 of 8341 relevant lines covered (85.03%)

0.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

37.14
/apps/summarization/services.py
1
"""Service for text summarization using AI providers."""
2

1✔
3
import json
1✔
4
import logging
1✔
5
from datetime import timedelta
6

1✔
7
from django.conf import settings
1✔
8
from django.utils import timezone
1✔
9
from pydantic import BaseModel
10
from sentry_sdk import capture_exception
1✔
11

1✔
12
from .models import ProjectSummary
1✔
13
from .providers import AIProvider
1✔
14
from .providers import AIRequest
1✔
15
from .providers import ProviderConfig
1✔
16
from .pydantic_models import DocumentInputItem
17
from .pydantic_models import DocumentSummaryItem
1✔
18
from .pydantic_models import DocumentSummaryResponse
19
from .pydantic_models import ProjectSummaryResponse
20
from .pydantic_models import SummaryItem
1✔
21
from .utils import extract_text_from_document
22

23
logger = logging.getLogger(__name__)
1✔
24

25

26
PROJECT_SUMMARY_RATE_LIMIT_MINUTES = (
1✔
27
    5  # Minimum minutes between summary generations per project
28
)
×
29
SUMMARY_GLOBAL_LIMIT_PER_HOUR = 100  # Maximum summaries per hour across all projects
30

31

32
class AIService:
×
33
    """Service for summarizing text using configured AI provider."""
×
34

35
    def __init__(
1✔
36
        self, provider_handle: str = None, document_provider_handle: str = None
37
    ):
38
        """Initialize AI service."""
39
        # Check if provider_handle is provided or configured in settings
40
        if not provider_handle:
41
            provider_handle = getattr(settings, "AI_PROVIDER", None)
42
            if not provider_handle:
×
43
                raise ValueError(
×
44
                    "No provider configured. "
×
45
                    "Either pass provider_handle to AIService() or set AI_PROVIDER in settings."
46
                )
1✔
47

48
        # ProviderConfig loads configuration from settings automatically
49
        config = ProviderConfig.from_handle(provider_handle)
50
        self.provider = AIProvider(config)
51

52
        # Check if document_provider_handle is provided or configured in settings
53
        if not document_provider_handle:
54
            document_provider_handle = getattr(settings, "AI_DOCUMENT_PROVIDER", None)
55
            if not document_provider_handle:
56
                raise ValueError(
57
                    "No document provider configured. "
×
58
                    "Either pass document_provider_handle to AIService() or set AI_DOCUMENT_PROVIDER in settings."
59
                )
×
60

61
        doc_config = ProviderConfig.from_handle(document_provider_handle)
62
        self.document_provider = AIProvider(doc_config)
63

64
    def summarize(
65
        self,
×
66
        text: str,
67
        prompt: str | None = None,
×
68
        result_type: type[BaseModel] = SummaryItem,
×
69
    ) -> BaseModel:
×
70
        """Summarize text."""
71
        request = SummaryRequest(text=text, prompt=prompt)
72
        response = self.provider.request(request, result_type=result_type)
×
73
        return response
74

75
    def _check_cache_and_rate_limits(
×
76
        self, project, text: str, latest_project_summary
×
77
    ) -> ProjectSummaryResponse | None:
×
78
        """Check cache and rate limits, return cached summary if applicable."""
79
        if not latest_project_summary:
80
            return None
×
81

82
        # Check 1: Exact content match
83
        current_hash = ProjectSummary.compute_hash(text)
×
84
        if latest_project_summary.input_text_hash == current_hash:
×
85
            logger.debug(
×
86
                f"Cached summary found (exact match via hash comparison) for project {project.id}"
87
            )
88
            return ProjectSummaryResponse(**latest_project_summary.response_data)
89

×
90
        # Check 2: Per-project rate limiting
×
91
        time_since_last = timezone.now() - latest_project_summary.created_at
92
        if time_since_last < timedelta(minutes=PROJECT_SUMMARY_RATE_LIMIT_MINUTES):
93
            logger.debug(
×
94
                f"Using rate-limited summary from {latest_project_summary.created_at} "
95
                f"(within {PROJECT_SUMMARY_RATE_LIMIT_MINUTES} min per project) for project {project.id}"
96
            )
×
97
            return ProjectSummaryResponse(**latest_project_summary.response_data)
98

99
        # Check 3: Global rate limiting
100
        if time_since_last < timedelta(hours=1):
101
            global_limit_time = timezone.now() - timedelta(hours=1)
×
102
            recent_global_count = ProjectSummary.objects.filter(
×
103
                created_at__gte=global_limit_time
×
104
            ).count()
105

106
            if recent_global_count >= SUMMARY_GLOBAL_LIMIT_PER_HOUR:
×
107
                logger.debug(
×
108
                    f"Global rate limit reached ({recent_global_count}/{SUMMARY_GLOBAL_LIMIT_PER_HOUR} in last hour), "
×
109
                    f"using most recent summary from {latest_project_summary.created_at} for project {project.id}"
110
                )
111
                return ProjectSummaryResponse(**latest_project_summary.response_data)
112

113
        return None
114

×
115
    def _try_fallback_cache(
116
        self, latest_project_summary
117
    ) -> ProjectSummaryResponse | None:
1✔
118
        """Try to use cached fallback on error."""
119
        fallback_max_age_minutes = getattr(
120
            settings, "PROJECT_SUMMARY_FALLBACK_MAX_AGE_MINUTES", 0
1✔
121
        )
122
        logger.debug(
123
            f"Fallback check: max_age_minutes={fallback_max_age_minutes}, "
124
            f"latest_project_summary exists={latest_project_summary is not None}"
125
        )
126

127
        if fallback_max_age_minutes == 0:
128
            logger.debug("Fallback disabled (max_age_minutes=0)")
129
            return None
130

131
        if not latest_project_summary:
132
            logger.debug("No cached summary available for fallback")
133
            return None
134

135
        time_since_cached = timezone.now() - latest_project_summary.created_at
136
        max_age = timedelta(minutes=fallback_max_age_minutes)
137
        age_minutes = time_since_cached.total_seconds() / 60
138

139
        logger.debug(
140
            f"Fallback age check: {age_minutes:.1f} min <= {fallback_max_age_minutes} min? "
141
            f"{age_minutes <= fallback_max_age_minutes}"
142
        )
143

144
        if time_since_cached <= max_age:
145
            logger.debug(
146
                f"Using cached fallback summary from {latest_project_summary.created_at} "
147
                f"(age: {age_minutes:.1f} min, max: {fallback_max_age_minutes} min)"
148
            )
149
            return ProjectSummaryResponse(**latest_project_summary.response_data)
150
        else:
151
            logger.debug(
152
                f"Cached summary too old ({age_minutes:.1f} min > {fallback_max_age_minutes} min) - not using fallback"
153
            )
154
        return None
155

156
    def project_summarize(
157
        self,
158
        project,
159
        text: str,
160
        prompt: str | None = None,
161
        result_type: type[
162
            BaseModel
163
        ] = ProjectSummaryResponse,  # Changed from SummaryResponse
164
        is_rate_limit: bool = True,
165
    ) -> BaseModel:
166
        """Summarize text for a project with caching and rate limiting support."""
167
        request = SummaryRequest(text=text, prompt=prompt)
1✔
168
        latest_project_summary = (
×
169
            ProjectSummary.objects.filter(project=project)
×
170
            .order_by("-created_at")
×
171
            .first()
172
        )
1✔
173

×
174
        # Check cache and rate limits
175
        if is_rate_limit:
176
            cached_response = self._check_cache_and_rate_limits(
1✔
177
                project, text, latest_project_summary
178
            )
179
            if cached_response:
1✔
180
                return cached_response
181

182
        # Generate new summary
183
        logger.info(f"Generating new summary for project {project.id} ({project.slug})")
184
        logger.debug(f"Prompt preview: {request.prompt()[:500]}...")
185
        try:
1✔
186
            response = self.provider.request(request, result_type=result_type)
187

188
            if isinstance(response, ProjectSummaryResponse):
×
189
                logger.info(
×
190
                    f"Created new project summary for project {project.id} ({project.slug})"
×
191
                )
×
192
                ProjectSummary.objects.create(
×
193
                    project=project,
×
194
                    prompt=request.prompt_text,
195
                    input_text_hash=ProjectSummary.compute_hash(text),
1✔
196
                    response_data=json.loads(response.model_dump_json()),
×
197
                )
×
198
            return response
×
199
        except Exception as e:
200
            logger.error(
201
                f"Error during summary generation for project {project.id} ({project.slug}): {str(e)} - NOT CACHING",
202
                exc_info=True,
203
            )
204
            capture_exception(e)
205
            fallback_response = self._try_fallback_cache(latest_project_summary)
206
            if fallback_response:
207
                logger.info(
208
                    f"Using fallback cache for project {project.id} after error"
209
                )
210
                return fallback_response
211
            logger.warning(
212
                f"Re-raising exception - no valid fallback available for project {project.id}"
213
            )
214
            raise
215

216
    def request_vision(
217
        self,
218
        documents: list[DocumentInputItem],
219
        prompt: str | None = None,
220
    ) -> DocumentSummaryResponse:
221
        """Process documents and images, return combined summaries."""
222
        document_urls = []
223
        document_handle_list = []
224
        image_urls = []
225
        image_handle_list = []
226

227
        for doc in documents:
228
            if (
229
                not self.document_provider.config.supports_documents
230
                and doc.is_document()
231
            ):
232
                document_urls.append(doc.url)
233
                document_handle_list.append(doc.handle)
234
            else:
235
                image_urls.append(doc.url)
236
                image_handle_list.append(doc.handle)
237

238
        document_results = []
239
        if document_urls:
240
            results1 = self.request_documents(document_urls, document_handle_list)
241
            document_results = results1.documents
242

243
        image_results = []
244
        if image_urls:
245
            results2 = self.request_images(image_urls, image_handle_list, prompt)
246
            image_results = results2.documents
247

248
        return DocumentSummaryResponse(documents=image_results + document_results)
249

250
    def request_vision_dict(
251
        self,
252
        documents_dict: dict[str, str],
253
        prompt: str | None = None,
254
    ) -> DocumentSummaryResponse:
255
        """
256
        Process documents from dictionary format.
257

258
        Args:
259
            documents_dict: Dictionary mapping handles to absolute URLs
260
            prompt: Optional prompt for summarization
261

262
        Returns:
263
            DocumentSummaryResponse with summaries for all documents
264
        """
265
        document_items = [
266
            DocumentInputItem(handle=handle, url=url)
267
            for handle, url in documents_dict.items()
268
        ]
269
        return self.request_vision(documents=document_items, prompt=prompt)
270

271
    def request_images(
272
        self,
273
        image_urls: list[str],
274
        image_handle_list: list[str],
275
        prompt: str | None = None,
276
    ) -> DocumentSummaryResponse:
277
        if prompt:
278
            custom_prompt = prompt
279
        else:
280
            custom_prompt = (
281
                f"Summarize each document separately. "
282
                f"The documents are provided in order with the following handles: {image_handle_list}. "
283
                f"Return a list of summaries, one for each document in the same order. "
284
                f"Each summary should include the handle and describe the content and most important information of that document."
285
            )
286

287
        request = MultimodalSummaryRequest(
288
            image_urls=image_urls,
289
            prompt=custom_prompt,
290
        )
291
        return self.document_provider.request(
292
            request, result_type=DocumentSummaryResponse
293
        )
294

295
    def request_documents(
296
        self,
297
        document_urls: list[str],
298
        document_handle_list: list[str],
299
    ) -> DocumentSummaryResponse:
300
        """Extract text from PDFs and DOCX files, return as summaries."""
301
        results = []
302

303
        for url, handle in zip(document_urls, document_handle_list):
304
            try:
305
                extracted_text = extract_text_from_document(url)
306
                results.append(
307
                    DocumentSummaryItem(handle=handle, summary=extracted_text)
308
                )
309
            except Exception as e:
310
                logger.error(
311
                    f"Failed to extract text from document {handle} ({url}): {str(e)}",
312
                    exc_info=True,
313
                )
314
                capture_exception(e)
315

316
        return DocumentSummaryResponse(documents=results)
317

318

319
# TODO: Move to a providers.py ?
320

321

322
class SummaryRequest(AIRequest):
323
    """Request model for text summarization."""
324

325
    DEFAULT_PROMPT = """
326
        You are a JSON generator. Return ONLY valid JSON. No explanations, no markdown, no code blocks.
327

328
        Schema:
329
        {
330
        "title": "Summary of participation",
331
        "stats": {"participants": 0, "contributions": 0, "modules": 0},
332
        "general_summary": "string",
333
        "general_goals": ["string"],
334
        "past_modules": [
335
            {
336
            "id": "int",
337
            "module_id": "int",
338
            "module_name": "string",
339
            "purpose": "string",
340
            "main_sentiments": ["string"],
341
            "phase_status": "past",
342
            "link": "string"
343
            }
344
        ],
345
        "current_modules": [
346
            {
347
            "id": "int",
348
            "module_id": "int",
349
            "module_name": "string",
350
            "purpose": "string",
351
            "first_content": ["string"],
352
            "phase_status": "active",
353
            "link": "string"
354
            }
355
        ],
356
        "upcoming_modules": [
357
            {
358
            "id": "int",
359
            "module_id": "int",
360
            "module_name": "string",
361
            "purpose": "string",
362
            "phase_status": "upcoming",
363
            "link": "string"
364
            }
365
        ]
366
        }
367

368
        Extract real data from the project export. Use actual numbers and content.
369
        Respond with ONLY the JSON object.
370
        """
371

372
    def __init__(self, text: str, prompt: str | None = None) -> None:
373
        super().__init__()
374
        self.text = text
375
        self.prompt_text = prompt or self.DEFAULT_PROMPT
376

377
    def prompt(self) -> str:
378
        return f"{self.prompt_text}\n\n{self.text}"
379

380

381
# TODO: Move to a providers.py ?
382

383

384
class MultimodalSummaryRequest(AIRequest):
385
    """Request model for multimodal document summarization."""
386

387
    vision_support = True
388

389
    DEFAULT_PROMPT = (
390
        "Summarize this document/image. "
391
        "Describe the content and the most important information. "
392
        "Return your answer as structured JSON that matches the expected format."
393
    )
394

395
    def __init__(
396
        self,
397
        image_urls: list[str] | None = None,
398
        text: str | None = None,
399
        prompt: str | None = None,
400
    ) -> None:
401
        super().__init__()
402
        self.image_urls = image_urls or []
403
        self.prompt_text = prompt or self.DEFAULT_PROMPT
404
        self.text = text
405

406
    def prompt(self) -> str:
407
        if self.text:
408
            return self.prompt_text + "\n\nText:\n" + self.text
409
        return self.prompt_text
410

411

412
class DocumentRequest(AIRequest):
413
    """Request model for document summarization with handle."""
414

415
    vision_support = True
416

417
    DEFAULT_PROMPT = (
418
        "Summarize this document. "
419
        "Describe the content and the most important information. "
420
        "Return your answer as structured JSON with 'summary' field, "
421
    )
422

423
    def __init__(
424
        self,
425
        url: str,
426
        prompt: str | None = None,
427
    ) -> None:
428
        super().__init__()
429
        self.image_urls = [url]
430
        self.prompt_text = prompt or self.DEFAULT_PROMPT
431

432
    def prompt(self) -> str:
433
        return self.prompt_text
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc