• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

akvo / akvo-mis / #514

18 Jan 2026 04:24PM UTC coverage: 88.467% (-0.06%) from 88.528%
#514

push

coveralls-python

web-flow
Merge d0d8e91e3 into 6dd41dc61

3824 of 4436 branches covered (86.2%)

Branch coverage included in aggregate %.

7828 of 8735 relevant lines covered (89.62%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.24
backend/utils/seeder_answer_processor.py
1
"""
2
Seeder Answer Processor Module
3

4
This module provides answer processing functionality for Flow Complete Seeder.
5
"""
6

7
import logging
1✔
8
import os
1✔
9
import tempfile
1✔
10
import uuid
1✔
11
from typing import Optional, Tuple
1✔
12
from urllib.parse import urlparse
1✔
13

14
import requests
1✔
15

16
from api.v1.v1_forms.models import QuestionTypes
1✔
17
from utils import storage
1✔
18

19
logger = logging.getLogger(__name__)
1✔
20

21
# Valid image MIME types
22
VALID_IMAGE_MIME_TYPES = {
1✔
23
    'image/jpeg',
24
    'image/png',
25
    'image/gif',
26
    'image/webp',
27
    'image/bmp',
28
    'image/tiff',
29
}
30

31
# Image magic bytes for validation
32
IMAGE_MAGIC_BYTES = {
1✔
33
    b'\xff\xd8\xff': 'jpg',      # JPEG
34
    b'\x89PNG\r\n\x1a\n': 'png',  # PNG
35
    b'GIF87a': 'gif',            # GIF87a
36
    b'GIF89a': 'gif',            # GIF89a
37
    b'RIFF': 'webp',             # WebP (starts with RIFF)
38
    b'BM': 'bmp',                # BMP
39
}
40

41

42
# =============================================================================
43
# Download Photo Processor
44
# =============================================================================
45

46

47
class DownloadPhotoProcessor:
1✔
48
    """Handles downloading and validating photos from URLs."""
49

50
    @staticmethod
1✔
51
    def validate_image_content(content: bytes) -> Optional[str]:
1✔
52
        """Validate image content by checking magic bytes.
53

54
        Args:
55
            content: The raw bytes of the downloaded file
56

57
        Returns:
58
            The detected file extension if valid, None otherwise
59
        """
60
        for magic, ext in IMAGE_MAGIC_BYTES.items():
1✔
61
            if content.startswith(magic):
1✔
62
                return ext
1✔
63
        return None
1✔
64

65
    @staticmethod
1✔
66
    def get_extension_from_url(url: str) -> str:
1✔
67
        """Extract file extension from URL path.
68

69
        Args:
70
            url: The URL to extract extension from
71

72
        Returns:
73
            The file extension (without dot) or 'jpg' as default
74
        """
75
        parsed = urlparse(url)
1✔
76
        path = parsed.path.lower()
1✔
77
        for ext in ['jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff']:
1✔
78
            if path.endswith(f'.{ext}'):
1✔
79
                return 'jpg' if ext == 'jpeg' else ext
1✔
80
        return 'jpg'
1✔
81

82
    @staticmethod
1✔
83
    def download_image(url: str, timeout: int = 30) -> Optional[bytes]:
1✔
84
        """Download image from URL with proper error handling.
85

86
        Args:
87
            url: The URL to download from
88
            timeout: Request timeout in seconds
89

90
        Returns:
91
            The image content as bytes, or None on failure
92
        """
93
        try:
1✔
94
            response = requests.get(
1✔
95
                url,
96
                timeout=timeout,
97
                headers={
98
                    'User-Agent': 'Mozilla/5.0 (compatible; AkvoMIS/1.0)'
99
                },
100
                stream=True
101
            )
102
            response.raise_for_status()
1✔
103

104
            # Check content type if available
105
            content_type = response.headers.get('Content-Type', '').lower()
1✔
106
            if content_type:
1!
107
                mime_type = content_type.split(';')[0].strip()
1✔
108
                if mime_type and mime_type not in VALID_IMAGE_MIME_TYPES:
1✔
109
                    # Allow unknown content types but log a warning
110
                    logger.warning(
1✔
111
                        f"Unexpected content type '{mime_type}' for URL: {url}"
112
                    )
113

114
            # Limit download size to 50MB
115
            max_size = 50 * 1024 * 1024
1✔
116
            content = b''
1✔
117
            for chunk in response.iter_content(chunk_size=8192):
1✔
118
                content += chunk
1✔
119
                if len(content) > max_size:
1✔
120
                    logger.warning(f"Image too large (>50MB): {url}")
1✔
121
                    return None
1✔
122

123
            return content
1✔
124

125
        except requests.exceptions.Timeout:
1✔
126
            logger.warning(f"Timeout downloading image from: {url}")
1✔
127
            return None
1✔
128
        except requests.exceptions.ConnectionError:
1✔
129
            logger.warning(f"Connection error downloading image from: {url}")
1✔
130
            return None
1✔
131
        except requests.exceptions.HTTPError as e:
1!
132
            logger.warning(f"HTTP error {e.response.status_code} for: {url}")
1✔
133
            return None
1✔
134
        except requests.exceptions.RequestException as e:
×
135
            logger.warning(f"Request error downloading image: {e}")
×
136
            return None
×
137

138
    @classmethod
1✔
139
    def process(cls, url: str) -> Optional[str]:
1✔
140
        """Download, validate, and store an image from a URL.
141

142
        Args:
143
            url: The URL to download the image from
144

145
        Returns:
146
            The local storage path if successful, None otherwise
147
        """
148
        # Download the image
149
        content = cls.download_image(url)
1✔
150
        if content is None:
1✔
151
            logger.warning(f"Failed to download image: {url}")
1✔
152
            return None
1✔
153

154
        # Validate image content by checking magic bytes
155
        detected_ext = cls.validate_image_content(content)
1✔
156
        if detected_ext is None:
1✔
157
            logger.warning(
1✔
158
                f"Downloaded content is not a valid image format: {url}"
159
            )
160
            return None
1✔
161

162
        # Use detected extension or fall back to URL extension
163
        extension = detected_ext or cls.get_extension_from_url(url)
1✔
164

165
        # Generate unique filename
166
        unique_id = uuid.uuid4().hex
1✔
167
        filename = f"seeder_{unique_id}.{extension}"
1✔
168

169
        # Save to temporary file then upload to storage
170
        tmp_file = None
1✔
171
        try:
1✔
172
            # Create temp file with proper extension
173
            with tempfile.NamedTemporaryFile(
1✔
174
                suffix=f'.{extension}',
175
                delete=False
176
            ) as tmp:
177
                tmp.write(content)
1✔
178
                tmp_file = tmp.name
1✔
179

180
            # Upload to storage/images
181
            stored_path = storage.upload(
1✔
182
                file=tmp_file,
183
                folder="images",
184
                filename=filename
185
            )
186

187
            logger.info(
1✔
188
                f"Successfully downloaded and stored image: {stored_path}"
189
            )
190
            return stored_path
1✔
191

192
        except Exception as e:
1✔
193
            logger.error(f"Error saving image to storage: {e}")
1✔
194
            return None
1✔
195

196
        finally:
197
            # Clean up temp file
198
            if tmp_file and os.path.exists(tmp_file):
1!
199
                try:
1✔
200
                    os.remove(tmp_file)
1✔
201
                except OSError:
×
202
                    pass
×
203

204

205
# =============================================================================
206
# Strategy Pattern for Answer Processing
207
# =============================================================================
208

209

210
class AnswerProcessor:
1✔
211
    """Strategy pattern for processing different question types."""
212

213
    OPTION_TYPES = [
1✔
214
        QuestionTypes.option,
215
        QuestionTypes.multiple_option,
216
    ]
217

218
    # Class-level cache populated by predownload_photos command
219
    photo_url_map: dict = {}
1✔
220

221
    @classmethod
1✔
222
    def set_photo_url_map(cls, url_map: dict):
1✔
223
        """Set the photo URL mapping from pre-download log.
224

225
        Args:
226
            url_map: Dictionary mapping URL to local storage path
227
        """
228
        cls.photo_url_map = url_map
×
229

230
    @classmethod
1✔
231
    def clear_photo_url_map(cls):
1✔
232
        """Clear the photo URL mapping."""
233
        cls.photo_url_map = {}
×
234

235
    @staticmethod
1✔
236
    def process_administration(
1✔
237
        row_value,
238
        administration_id: Optional[int],
239
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
240
        """Process administration-type questions.
241

242
        Args:
243
            row_value: The value from the CSV row
244
            administration_id: Administration ID to use
245

246
        Returns:
247
            Tuple of (name, value, options)
248
        """
249
        return None, administration_id, None
1✔
250

251
    @staticmethod
1✔
252
    def process_geo(
1✔
253
        row_value,
254
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
255
        """Process geo-type questions.
256

257
        Args:
258
            row_value: The value from the CSV row
259

260
        Returns:
261
            Tuple of (name, value, options)
262
        """
263
        if row_value is None:
1✔
264
            return None, None, None
1✔
265
        options = [
1✔
266
            float(g) for g in str(row_value).split("|")
267
        ]
268
        return None, None, options
1✔
269

270
    @staticmethod
1✔
271
    def process_option(
1✔
272
        row_value,
273
        opt_list: Optional[list] = [],
274
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
275
        """Process option-type questions.
276

277
        Args:
278
            row_value: The value from the CSV row
279

280
        Returns:
281
            Tuple of (name, value, options)
282
        """
283
        if row_value is None:
1!
284
            return None, None, None
×
285
        option_values = str(row_value).split("|")
1✔
286
        # find intersection with opt_list to validate options
287
        option_values = [opt for opt in option_values if opt in opt_list]
1✔
288
        if not option_values:
1✔
289
            return None, None, None
1✔
290
        return None, None, option_values
1✔
291

292
    @staticmethod
1✔
293
    def process_number(
1✔
294
        row_value,
295
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
296
        """Process number-type questions.
297

298
        Args:
299
            row_value: The value from the CSV row
300

301
        Returns:
302
            Tuple of (name, value, options)
303
        """
304
        try:
1✔
305
            number_value = float(row_value)
1✔
306
            return None, number_value, None
1✔
307
        except (ValueError, TypeError):
1✔
308
            return None, None, None
1✔
309

310
    @classmethod
1✔
311
    def process_photo(
1✔
312
        cls,
313
        row_value,
314
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
315
        """Process photo-type questions.
316

317
        First checks the pre-downloaded cache (photo_url_map). If not found,
318
        downloads the photo from the given URL and uploads it to storage.
319
        Validates that the downloaded content is actually an image.
320

321
        Args:
322
            row_value: The value from the CSV row (expected to be a URL)
323

324
        Returns:
325
            Tuple of (name, value, options) where:
326
            - name: The local storage path if successful, original value
327
            - value: Always None for photos
328
            - options: Always None for photos
329
        """
330
        if row_value is None:
1✔
331
            return None, None, None
1✔
332

333
        url = str(row_value).strip()
1✔
334

335
        # If not a URL, return as-is (might be an existing local path)
336
        if not url.startswith(('http://', 'https://')):
1✔
337
            return row_value, None, None
1✔
338

339
        # Check pre-downloaded cache first
340
        if url in cls.photo_url_map:
1!
341
            return cls.photo_url_map[url], None, None
×
342

343
        # Fallback: download on-demand using DownloadPhotoProcessor
344
        stored_path = DownloadPhotoProcessor.process(url)
1✔
345
        if stored_path is None:
1✔
346
            # Fall back to original value if download fails
347
            return row_value, None, None
1✔
348

349
        return stored_path, None, None
1✔
350

351
    @staticmethod
1✔
352
    def process_default(
1✔
353
        row_value,
354
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
355
        """Process default question types.
356

357
        Args:
358
            row_value: The value from the CSV row
359

360
        Returns:
361
            Tuple of (name, value, options)
362
        """
363
        return row_value, None, None
1✔
364

365
    @classmethod
1✔
366
    def process(
1✔
367
        cls,
368
        question_type: str,
369
        row_value,
370
        administration_id: Optional[int] = None,
371
        opt_list: Optional[list] = [],
372
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
373
        """Process answer based on question type.
374

375
        Args:
376
            question_type: The type of question
377
            row_value: The value from the CSV row
378
            administration_id: Administration ID for admin-type questions
379

380
        Returns:
381
            Tuple of (name, value, options)
382
        """
383
        if question_type == QuestionTypes.administration:
1✔
384
            return cls.process_administration(row_value, administration_id)
1✔
385
        elif question_type == QuestionTypes.geo:
1✔
386
            return cls.process_geo(row_value)
1✔
387
        elif question_type in cls.OPTION_TYPES:
1✔
388
            return cls.process_option(row_value, opt_list)
1✔
389
        elif question_type == QuestionTypes.number:
1✔
390
            return cls.process_number(row_value)
1✔
391
        elif question_type == QuestionTypes.photo:
1✔
392
            return cls.process_photo(row_value)
1✔
393
        else:
394
            return cls.process_default(row_value)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc