#514

Committed 18 Jan 2026 04:24PM UTC coverage: 88.467% (-0.06%) from 88.528%

Build # #514

Build Type

push

coveralls-python

Committed by

web-flow

Commit Message

Merge d0d8e91e3 into 6dd41dc61

Run Details

3824 of 4436 branches covered (86.2%)

Branch coverage included in aggregate %.

7828 of 8735 relevant lines covered (89.62%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.24

backend/utils/seeder_answer_processor.py

"""
Seeder Answer Processor Module

This module provides answer processing functionality for Flow Complete Seeder.
"""

import logging
import os
import tempfile
import uuid
from typing import Optional, Tuple
from urllib.parse import urlparse

import requests

from api.v1.v1_forms.models import QuestionTypes
from utils import storage

logger = logging.getLogger(__name__)

# Valid image MIME types
VALID_IMAGE_MIME_TYPES = {
    'image/jpeg',
    'image/png',
    'image/gif',
    'image/webp',
    'image/bmp',
    'image/tiff',
}

# Image magic bytes for validation
IMAGE_MAGIC_BYTES = {
    b'\xff\xd8\xff': 'jpg',      # JPEG
    b'\x89PNG\r\n\x1a\n': 'png',  # PNG
    b'GIF87a': 'gif',            # GIF87a
    b'GIF89a': 'gif',            # GIF89a
    b'RIFF': 'webp',             # WebP (starts with RIFF)
    b'BM': 'bmp',                # BMP
}


# =============================================================================
# Download Photo Processor
# =============================================================================


class DownloadPhotoProcessor:
    """Handles downloading and validating photos from URLs."""

    @staticmethod
    def validate_image_content(content: bytes) -> Optional[str]:
        """Validate image content by checking magic bytes.

        Args:
            content: The raw bytes of the downloaded file

        Returns:
            The detected file extension if valid, None otherwise
        """
        for magic, ext in IMAGE_MAGIC_BYTES.items():
            if content.startswith(magic):
                return ext
        return None

    @staticmethod
    def get_extension_from_url(url: str) -> str:
        """Extract file extension from URL path.

        Args:
            url: The URL to extract extension from

        Returns:
            The file extension (without dot) or 'jpg' as default
        """
        parsed = urlparse(url)
        path = parsed.path.lower()
        for ext in ['jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff']:
            if path.endswith(f'.{ext}'):
                return 'jpg' if ext == 'jpeg' else ext
        return 'jpg'

    @staticmethod
    def download_image(url: str, timeout: int = 30) -> Optional[bytes]:
        """Download image from URL with proper error handling.

        Args:
            url: The URL to download from
            timeout: Request timeout in seconds

        Returns:
            The image content as bytes, or None on failure
        """
        try:
            response = requests.get(
                url,
                timeout=timeout,
                headers={
                    'User-Agent': 'Mozilla/5.0 (compatible; AkvoMIS/1.0)'
                },
                stream=True
            )
            response.raise_for_status()

            # Check content type if available
            content_type = response.headers.get('Content-Type', '').lower()
            if content_type:
                mime_type = content_type.split(';')[0].strip()
                if mime_type and mime_type not in VALID_IMAGE_MIME_TYPES:
                    # Allow unknown content types but log a warning
                    logger.warning(
                        f"Unexpected content type '{mime_type}' for URL: {url}"
                    )

            # Limit download size to 50MB
            max_size = 50 * 1024 * 1024
            content = b''
            for chunk in response.iter_content(chunk_size=8192):
                content += chunk
                if len(content) > max_size:
                    logger.warning(f"Image too large (>50MB): {url}")
                    return None

            return content

        except requests.exceptions.Timeout:
            logger.warning(f"Timeout downloading image from: {url}")
            return None
        except requests.exceptions.ConnectionError:
            logger.warning(f"Connection error downloading image from: {url}")
            return None
        except requests.exceptions.HTTPError as e:
            logger.warning(f"HTTP error {e.response.status_code} for: {url}")
            return None
        except requests.exceptions.RequestException as e:
            logger.warning(f"Request error downloading image: {e}")
            return None

    @classmethod
    def process(cls, url: str) -> Optional[str]:
        """Download, validate, and store an image from a URL.

        Args:
            url: The URL to download the image from

        Returns:
            The local storage path if successful, None otherwise
        """
        # Download the image
        content = cls.download_image(url)
        if content is None:
            logger.warning(f"Failed to download image: {url}")
            return None

        # Validate image content by checking magic bytes
        detected_ext = cls.validate_image_content(content)
        if detected_ext is None:
            logger.warning(
                f"Downloaded content is not a valid image format: {url}"
            )
            return None

        # Use detected extension or fall back to URL extension
        extension = detected_ext or cls.get_extension_from_url(url)

        # Generate unique filename
        unique_id = uuid.uuid4().hex
        filename = f"seeder_{unique_id}.{extension}"

        # Save to temporary file then upload to storage
        tmp_file = None
        try:
            # Create temp file with proper extension
            with tempfile.NamedTemporaryFile(
                suffix=f'.{extension}',
                delete=False
            ) as tmp:
                tmp.write(content)
                tmp_file = tmp.name

            # Upload to storage/images
            stored_path = storage.upload(
                file=tmp_file,
                folder="images",
                filename=filename
            )

            logger.info(
                f"Successfully downloaded and stored image: {stored_path}"
            )
            return stored_path

        except Exception as e:
            logger.error(f"Error saving image to storage: {e}")
            return None

        finally:
            # Clean up temp file
            if tmp_file and os.path.exists(tmp_file):
                try:
                    os.remove(tmp_file)
                except OSError:
                    pass


# =============================================================================
# Strategy Pattern for Answer Processing
# =============================================================================


class AnswerProcessor:
    """Strategy pattern for processing different question types."""

    OPTION_TYPES = [
        QuestionTypes.option,
        QuestionTypes.multiple_option,
    ]

    # Class-level cache populated by predownload_photos command
    photo_url_map: dict = {}

    @classmethod
    def set_photo_url_map(cls, url_map: dict):
        """Set the photo URL mapping from pre-download log.

        Args:
            url_map: Dictionary mapping URL to local storage path
        """
        cls.photo_url_map = url_map

    @classmethod
    def clear_photo_url_map(cls):
        """Clear the photo URL mapping."""
        cls.photo_url_map = {}

    @staticmethod
    def process_administration(
        row_value,
        administration_id: Optional[int],
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
        """Process administration-type questions.

        Args:
            row_value: The value from the CSV row
            administration_id: Administration ID to use

        Returns:
            Tuple of (name, value, options)
        """
        return None, administration_id, None

    @staticmethod
    def process_geo(
        row_value,
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
        """Process geo-type questions.

        Args:
            row_value: The value from the CSV row

        Returns:
            Tuple of (name, value, options)
        """
        if row_value is None:
            return None, None, None
        options = [
            float(g) for g in str(row_value).split("|")
        ]
        return None, None, options

    @staticmethod
    def process_option(
        row_value,
        opt_list: Optional[list] = [],
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
        """Process option-type questions.

        Args:
            row_value: The value from the CSV row

        Returns:
            Tuple of (name, value, options)
        """
        if row_value is None:
            return None, None, None
        option_values = str(row_value).split("|")
        # find intersection with opt_list to validate options
        option_values = [opt for opt in option_values if opt in opt_list]
        if not option_values:
            return None, None, None
        return None, None, option_values

    @staticmethod
    def process_number(
        row_value,
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
        """Process number-type questions.

        Args:
            row_value: The value from the CSV row

        Returns:
            Tuple of (name, value, options)
        """
        try:
            number_value = float(row_value)
            return None, number_value, None
        except (ValueError, TypeError):
            return None, None, None

    @classmethod
    def process_photo(
        cls,
        row_value,
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
        """Process photo-type questions.

        First checks the pre-downloaded cache (photo_url_map). If not found,
        downloads the photo from the given URL and uploads it to storage.
        Validates that the downloaded content is actually an image.

        Args:
            row_value: The value from the CSV row (expected to be a URL)

        Returns:
            Tuple of (name, value, options) where:
            - name: The local storage path if successful, original value
            - value: Always None for photos
            - options: Always None for photos
        """
        if row_value is None:
            return None, None, None

        url = str(row_value).strip()

        # If not a URL, return as-is (might be an existing local path)
        if not url.startswith(('http://', 'https://')):
            return row_value, None, None

        # Check pre-downloaded cache first
        if url in cls.photo_url_map:
            return cls.photo_url_map[url], None, None

        # Fallback: download on-demand using DownloadPhotoProcessor
        stored_path = DownloadPhotoProcessor.process(url)
        if stored_path is None:
            # Fall back to original value if download fails
            return row_value, None, None

        return stored_path, None, None

    @staticmethod
    def process_default(
        row_value,
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
        """Process default question types.

        Args:
            row_value: The value from the CSV row

        Returns:
            Tuple of (name, value, options)
        """
        return row_value, None, None

    @classmethod
    def process(
        cls,
        question_type: str,
        row_value,
        administration_id: Optional[int] = None,
        opt_list: Optional[list] = [],
    ) -> Tuple[Optional[str], Optional[int], Optional[list]]:
        """Process answer based on question type.

        Args:
            question_type: The type of question
            row_value: The value from the CSV row
            administration_id: Administration ID for admin-type questions

        Returns:
            Tuple of (name, value, options)
        """
        if question_type == QuestionTypes.administration:
            return cls.process_administration(row_value, administration_id)
        elif question_type == QuestionTypes.geo:
            return cls.process_geo(row_value)
        elif question_type in cls.OPTION_TYPES:
            return cls.process_option(row_value, opt_list)
        elif question_type == QuestionTypes.number:
            return cls.process_number(row_value)
        elif question_type == QuestionTypes.photo:
            return cls.process_photo(row_value)
        else:
            return cls.process_default(row_value)

1	"""
2	Seeder Answer Processor Module
3
4	This module provides answer processing functionality for Flow Complete Seeder.
5	"""
6
7	import logging	1✔
8	import os	1✔
9	import tempfile	1✔
10	import uuid	1✔
11	from typing import Optional, Tuple	1✔
12	from urllib.parse import urlparse	1✔
13
14	import requests	1✔
15
16	from api.v1.v1_forms.models import QuestionTypes	1✔
17	from utils import storage	1✔
18
19	logger = logging.getLogger(__name__)	1✔
20
21	# Valid image MIME types
22	VALID_IMAGE_MIME_TYPES = {	1✔
23	'image/jpeg',
24	'image/png',
25	'image/gif',
26	'image/webp',
27	'image/bmp',
28	'image/tiff',
29	}
30
31	# Image magic bytes for validation
32	IMAGE_MAGIC_BYTES = {	1✔
33	b'\xff\xd8\xff': 'jpg', # JPEG
34	b'\x89PNG\r\n\x1a\n': 'png', # PNG
35	b'GIF87a': 'gif', # GIF87a
36	b'GIF89a': 'gif', # GIF89a
37	b'RIFF': 'webp', # WebP (starts with RIFF)
38	b'BM': 'bmp', # BMP
39	}
40
41
42	# =============================================================================
43	# Download Photo Processor
44	# =============================================================================
45
46
47	class DownloadPhotoProcessor:	1✔
48	"""Handles downloading and validating photos from URLs."""
49
50	@staticmethod	1✔
51	def validate_image_content(content: bytes) -> Optional[str]:	1✔
52	"""Validate image content by checking magic bytes.
53
54	Args:
55	content: The raw bytes of the downloaded file
56
57	Returns:
58	The detected file extension if valid, None otherwise
59	"""
60	for magic, ext in IMAGE_MAGIC_BYTES.items():	1✔
61	if content.startswith(magic):	1✔
62	return ext	1✔
63	return None	1✔
64
65	@staticmethod	1✔
66	def get_extension_from_url(url: str) -> str:	1✔
67	"""Extract file extension from URL path.
68
69	Args:
70	url: The URL to extract extension from
71
72	Returns:
73	The file extension (without dot) or 'jpg' as default
74	"""
75	parsed = urlparse(url)	1✔
76	path = parsed.path.lower()	1✔
77	for ext in ['jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff']:	1✔
78	if path.endswith(f'.{ext}'):	1✔
79	return 'jpg' if ext == 'jpeg' else ext	1✔
80	return 'jpg'	1✔
81
82	@staticmethod	1✔
83	def download_image(url: str, timeout: int = 30) -> Optional[bytes]:	1✔
84	"""Download image from URL with proper error handling.
85
86	Args:
87	url: The URL to download from
88	timeout: Request timeout in seconds
89
90	Returns:
91	The image content as bytes, or None on failure
92	"""
93	try:	1✔
94	response = requests.get(	1✔
95	url,
96	timeout=timeout,
97	headers={
98	'User-Agent': 'Mozilla/5.0 (compatible; AkvoMIS/1.0)'
99	},
100	stream=True
101	)
102	response.raise_for_status()	1✔
103
104	# Check content type if available
105	content_type = response.headers.get('Content-Type', '').lower()	1✔
106	if content_type:	1!
107	mime_type = content_type.split(';')[0].strip()	1✔
108	if mime_type and mime_type not in VALID_IMAGE_MIME_TYPES:	1✔
109	# Allow unknown content types but log a warning
110	logger.warning(	1✔
111	f"Unexpected content type '{mime_type}' for URL: {url}"
112	)
113
114	# Limit download size to 50MB
115	max_size = 50 * 1024 * 1024	1✔
116	content = b''	1✔
117	for chunk in response.iter_content(chunk_size=8192):	1✔
118	content += chunk	1✔
119	if len(content) > max_size:	1✔
120	logger.warning(f"Image too large (>50MB): {url}")	1✔
121	return None	1✔
122
123	return content	1✔
124
125	except requests.exceptions.Timeout:	1✔
126	logger.warning(f"Timeout downloading image from: {url}")	1✔
127	return None	1✔
128	except requests.exceptions.ConnectionError:	1✔
129	logger.warning(f"Connection error downloading image from: {url}")	1✔
130	return None	1✔
131	except requests.exceptions.HTTPError as e:	1!
132	logger.warning(f"HTTP error {e.response.status_code} for: {url}")	1✔
133	return None	1✔
134	except requests.exceptions.RequestException as e:	×
135	logger.warning(f"Request error downloading image: {e}")	×
136	return None	×
137
138	@classmethod	1✔
139	def process(cls, url: str) -> Optional[str]:	1✔
140	"""Download, validate, and store an image from a URL.
141
142	Args:
143	url: The URL to download the image from
144
145	Returns:
146	The local storage path if successful, None otherwise
147	"""
148	# Download the image
149	content = cls.download_image(url)	1✔
150	if content is None:	1✔
151	logger.warning(f"Failed to download image: {url}")	1✔
152	return None	1✔
153
154	# Validate image content by checking magic bytes
155	detected_ext = cls.validate_image_content(content)	1✔
156	if detected_ext is None:	1✔
157	logger.warning(	1✔
158	f"Downloaded content is not a valid image format: {url}"
159	)
160	return None	1✔
161
162	# Use detected extension or fall back to URL extension
163	extension = detected_ext or cls.get_extension_from_url(url)	1✔
164
165	# Generate unique filename
166	unique_id = uuid.uuid4().hex	1✔
167	filename = f"seeder_{unique_id}.{extension}"	1✔
168
169	# Save to temporary file then upload to storage
170	tmp_file = None	1✔
171	try:	1✔
172	# Create temp file with proper extension
173	with tempfile.NamedTemporaryFile(	1✔
174	suffix=f'.{extension}',
175	delete=False
176	) as tmp:
177	tmp.write(content)	1✔
178	tmp_file = tmp.name	1✔
179
180	# Upload to storage/images
181	stored_path = storage.upload(	1✔
182	file=tmp_file,
183	folder="images",
184	filename=filename
185	)
186
187	logger.info(	1✔
188	f"Successfully downloaded and stored image: {stored_path}"
189	)
190	return stored_path	1✔
191
192	except Exception as e:	1✔
193	logger.error(f"Error saving image to storage: {e}")	1✔
194	return None	1✔
195
196	finally:
197	# Clean up temp file
198	if tmp_file and os.path.exists(tmp_file):	1!
199	try:	1✔
200	os.remove(tmp_file)	1✔
201	except OSError:	×
202	pass	×
203
204
205	# =============================================================================
206	# Strategy Pattern for Answer Processing
207	# =============================================================================
208
209
210	class AnswerProcessor:	1✔
211	"""Strategy pattern for processing different question types."""
212
213	OPTION_TYPES = [	1✔
214	QuestionTypes.option,
215	QuestionTypes.multiple_option,
216	]
217
218	# Class-level cache populated by predownload_photos command
219	photo_url_map: dict = {}	1✔
220
221	@classmethod	1✔
222	def set_photo_url_map(cls, url_map: dict):	1✔
223	"""Set the photo URL mapping from pre-download log.
224
225	Args:
226	url_map: Dictionary mapping URL to local storage path
227	"""
228	cls.photo_url_map = url_map	×
229
230	@classmethod	1✔
231	def clear_photo_url_map(cls):	1✔
232	"""Clear the photo URL mapping."""
233	cls.photo_url_map = {}	×
234
235	@staticmethod	1✔
236	def process_administration(	1✔
237	row_value,
238	administration_id: Optional[int],
239	) -> Tuple[Optional[str], Optional[int], Optional[list]]:
240	"""Process administration-type questions.
241
242	Args:
243	row_value: The value from the CSV row
244	administration_id: Administration ID to use
245
246	Returns:
247	Tuple of (name, value, options)
248	"""
249	return None, administration_id, None	1✔
250
251	@staticmethod	1✔
252	def process_geo(	1✔
253	row_value,
254	) -> Tuple[Optional[str], Optional[int], Optional[list]]:
255	"""Process geo-type questions.
256
257	Args:
258	row_value: The value from the CSV row
259
260	Returns:
261	Tuple of (name, value, options)
262	"""
263	if row_value is None:	1✔
264	return None, None, None	1✔
265	options = [	1✔
266	float(g) for g in str(row_value).split("\|")
267	]
268	return None, None, options	1✔
269
270	@staticmethod	1✔
271	def process_option(	1✔
272	row_value,
273	opt_list: Optional[list] = [],
274	) -> Tuple[Optional[str], Optional[int], Optional[list]]:
275	"""Process option-type questions.
276
277	Args:
278	row_value: The value from the CSV row
279
280	Returns:
281	Tuple of (name, value, options)
282	"""
283	if row_value is None:	1!
284	return None, None, None	×
285	option_values = str(row_value).split("\|")	1✔
286	# find intersection with opt_list to validate options
287	option_values = [opt for opt in option_values if opt in opt_list]	1✔
288	if not option_values:	1✔
289	return None, None, None	1✔
290	return None, None, option_values	1✔
291
292	@staticmethod	1✔
293	def process_number(	1✔
294	row_value,
295	) -> Tuple[Optional[str], Optional[int], Optional[list]]:
296	"""Process number-type questions.
297
298	Args:
299	row_value: The value from the CSV row
300
301	Returns:
302	Tuple of (name, value, options)
303	"""
304	try:	1✔
305	number_value = float(row_value)	1✔
306	return None, number_value, None	1✔
307	except (ValueError, TypeError):	1✔
308	return None, None, None	1✔
309
310	@classmethod	1✔
311	def process_photo(	1✔
312	cls,
313	row_value,
314	) -> Tuple[Optional[str], Optional[int], Optional[list]]:
315	"""Process photo-type questions.
316
317	First checks the pre-downloaded cache (photo_url_map). If not found,
318	downloads the photo from the given URL and uploads it to storage.
319	Validates that the downloaded content is actually an image.
320
321	Args:
322	row_value: The value from the CSV row (expected to be a URL)
323
324	Returns:
325	Tuple of (name, value, options) where:
326	- name: The local storage path if successful, original value
327	- value: Always None for photos
328	- options: Always None for photos
329	"""
330	if row_value is None:	1✔
331	return None, None, None	1✔
332
333	url = str(row_value).strip()	1✔
334
335	# If not a URL, return as-is (might be an existing local path)
336	if not url.startswith(('http://', 'https://')):	1✔
337	return row_value, None, None	1✔
338
339	# Check pre-downloaded cache first
340	if url in cls.photo_url_map:	1!
341	return cls.photo_url_map[url], None, None	×
342
343	# Fallback: download on-demand using DownloadPhotoProcessor
344	stored_path = DownloadPhotoProcessor.process(url)	1✔
345	if stored_path is None:	1✔
346	# Fall back to original value if download fails
347	return row_value, None, None	1✔
348
349	return stored_path, None, None	1✔
350
351	@staticmethod	1✔
352	def process_default(	1✔
353	row_value,
354	) -> Tuple[Optional[str], Optional[int], Optional[list]]:
355	"""Process default question types.
356
357	Args:
358	row_value: The value from the CSV row
359
360	Returns:
361	Tuple of (name, value, options)
362	"""
363	return row_value, None, None	1✔
364
365	@classmethod	1✔
366	def process(	1✔
367	cls,
368	question_type: str,
369	row_value,
370	administration_id: Optional[int] = None,
371	opt_list: Optional[list] = [],
372	) -> Tuple[Optional[str], Optional[int], Optional[list]]:
373	"""Process answer based on question type.
374
375	Args:
376	question_type: The type of question
377	row_value: The value from the CSV row
378	administration_id: Administration ID for admin-type questions
379
380	Returns:
381	Tuple of (name, value, options)
382	"""
383	if question_type == QuestionTypes.administration:	1✔
384	return cls.process_administration(row_value, administration_id)	1✔
385	elif question_type == QuestionTypes.geo:	1✔
386	return cls.process_geo(row_value)	1✔
387	elif question_type in cls.OPTION_TYPES:	1✔
388	return cls.process_option(row_value, opt_list)	1✔
389	elif question_type == QuestionTypes.number:	1✔
390	return cls.process_number(row_value)	1✔
391	elif question_type == QuestionTypes.photo:	1✔
392	return cls.process_photo(row_value)	1✔
393	else:
394	return cls.process_default(row_value)	1✔

akvo / akvo-mis / #514

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous