23002117210

Committed 12 Mar 2026 12:32PM UTC coverage: 81.12% (-0.009%) from 81.129%

Build # 23002117210

Build Type

push

github

Committed by

web-flow

Commit Message

apps/summarization/export_utils: reduce size of ratings in export (#91)

Coverage Stats

0 of 4 new or added lines in 1 file covered. (0.0%)

7386 of 9105 relevant lines covered (81.12%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

12.5

/apps/summarization/export_utils/processing/extractors.py

import re


def extract_attachments(text):
    """Extract upload links from HTML text"""
    if not text:
        return []

    # Find all links containing /uploads/ (both href and src attributes)
    pattern_href = r'href="([^"]*?/uploads/[^"]*?)"'
    pattern_src = r'src="([^"]*?/uploads/[^"]*?)"'

    attachments_href = re.findall(pattern_href, text)
    attachments_src = re.findall(pattern_src, text)

    # Combine and deduplicate
    attachments = list(dict.fromkeys(attachments_href + attachments_src))

    return attachments


def extract_comments(queryset, include_ratings=True, include_children=True):
    """
    Extract comments from any model with a 'comments' GenericRelation.
    Filters out removed, censored, or blocked comments.
    Recursively includes child comments.

    Args:
        queryset: Comment queryset (e.g., obj.comments.all())
        include_ratings: Whether to include ratings on comments
        include_children: Whether to recursively include child comments

    Returns:
        List of comment dictionaries with nested 'replies' key
    """
    comments_list = []

    # Filter out unwanted comments at the queryset level
    filtered_queryset = queryset.filter(
        is_removed=False, is_censored=False, is_blocked=False
    )

    for comment in filtered_queryset:
        comment_data = {
            "id": comment.id,
            "text": comment.comment,
            # "created": comment.created.isoformat(),
        }

        # Optional fields
        if hasattr(comment, "comment_categories") and comment.comment_categories:
            comment_data["comment_categories"] = comment.comment_categories

        if include_ratings and hasattr(comment, "ratings"):
            comment_data["ratings"] = [
                {
                    "id": rating.id,
                    "value": rating.value,
                }
                for rating in comment.ratings.all()
            ]

        # Recursively include child comments (they will also be filtered)
        if include_children and hasattr(comment, "child_comments"):
            child_comments = comment.child_comments.all()
            if child_comments.exists():
                comment_data["replies"] = extract_comments(
                    child_comments,
                    include_ratings=include_ratings,
                    include_children=True,
                )
                comment_data["reply_count"] = len(comment_data["replies"])

        comments_list.append(comment_data)

    return comments_list


def extract_ratings(queryset):
    """
    Extract ratings from any model with a 'ratings' GenericRelation.

    Returns a dictionary with value:count format.
    Example: {1: 5, -1: 2} meaning 5 positive, 2 negative
    """
    ratings_count = {}
    for rating in queryset:
        value = rating.value
        ratings_count[value] = ratings_count.get(value, 0) + 1

    return ratings_count

1	import re	1✔
2
3
4	def extract_attachments(text):	1✔
5	"""Extract upload links from HTML text"""
6	if not text:	×
7	return []	×
8
9	# Find all links containing /uploads/ (both href and src attributes)
10	pattern_href = r'href="([^"]?/uploads/[^"]?)"'	×
11	pattern_src = r'src="([^"]?/uploads/[^"]?)"'	×
12
13	attachments_href = re.findall(pattern_href, text)	×
14	attachments_src = re.findall(pattern_src, text)	×
15
16	# Combine and deduplicate
17	attachments = list(dict.fromkeys(attachments_href + attachments_src))	×
18
19	return attachments	×
20
21
22	def extract_comments(queryset, include_ratings=True, include_children=True):	1✔
23	"""
24	Extract comments from any model with a 'comments' GenericRelation.
25	Filters out removed, censored, or blocked comments.
26	Recursively includes child comments.
27
28	Args:
29	queryset: Comment queryset (e.g., obj.comments.all())
30	include_ratings: Whether to include ratings on comments
31	include_children: Whether to recursively include child comments
32
33	Returns:
34	List of comment dictionaries with nested 'replies' key
35	"""
36	comments_list = []	×
37
38	# Filter out unwanted comments at the queryset level
39	filtered_queryset = queryset.filter(	×
40	is_removed=False, is_censored=False, is_blocked=False
41	)
42
43	for comment in filtered_queryset:	×
44	comment_data = {	×
45	"id": comment.id,
46	"text": comment.comment,
47	# "created": comment.created.isoformat(),
48	}
49
50	# Optional fields
51	if hasattr(comment, "comment_categories") and comment.comment_categories:	×
52	comment_data["comment_categories"] = comment.comment_categories	×
53
54	if include_ratings and hasattr(comment, "ratings"):	×
55	comment_data["ratings"] = [	×
56	{
57	"id": rating.id,
58	"value": rating.value,
59	}
60	for rating in comment.ratings.all()
61	]
62
63	# Recursively include child comments (they will also be filtered)
64	if include_children and hasattr(comment, "child_comments"):	×
65	child_comments = comment.child_comments.all()	×
66	if child_comments.exists():	×
67	comment_data["replies"] = extract_comments(	×
68	child_comments,
69	include_ratings=include_ratings,
70	include_children=True,
71	)
72	comment_data["reply_count"] = len(comment_data["replies"])	×
73
74	comments_list.append(comment_data)	×
75
76	return comments_list	×
77
78
79	def extract_ratings(queryset):	1✔
80	"""
81	Extract ratings from any model with a 'ratings' GenericRelation.
82
83	Returns a dictionary with value:count format.
84	Example: {1: 5, -1: 2} meaning 5 positive, 2 negative
85	"""
NEW 86	ratings_count = {}	×
87	for rating in queryset:	×
NEW 88	value = rating.value	×
NEW 89	ratings_count[value] = ratings_count.get(value, 0) + 1	×
90
NEW 91	return ratings_count	×

liqd / roots / 23002117210

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous