• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

bloomberg / pybossa / 21253638897

22 Jan 2026 03:12PM UTC coverage: 94.004% (-0.07%) from 94.073%
21253638897

Pull #1084

github

web-flow
Merge a31600754 into 94413386c
Pull Request #1084: RDISCROWD-8411: Filter task data by fields under files

102 of 115 new or added lines in 4 files covered. (88.7%)

7 existing lines in 1 file now uncovered.

17951 of 19096 relevant lines covered (94.0%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.92
/pybossa/api/task.py
1
# -*- coding: utf8 -*-
2
# This file is part of PYBOSSA.
3
#
4
# Copyright (C) 2015 Scifabric LTD.
5
#
6
# PYBOSSA is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU Affero General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# PYBOSSA is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
# GNU Affero General Public License for more details.
15
#
16
# You should have received a copy of the GNU Affero General Public License
17
# along with PYBOSSA.  If not, see <http://www.gnu.org/licenses/>.
18
"""
1✔
19
PYBOSSA api module for exposing domain object Task via an API.
20

21
This package adds GET, POST, PUT and DELETE methods for:
22
    * tasks
23

24
"""
25
from flask import abort, current_app
1✔
26
from flask_login import current_user
1✔
27
from werkzeug.exceptions import BadRequest, Conflict, NotFound
1✔
28
from pybossa.model.task import Task
1✔
29
from pybossa.model.project import Project
1✔
30
from pybossa.core import result_repo
1✔
31
from pybossa.util import sign_task
1✔
32
from .api_base import APIBase
1✔
33
from pybossa.api.pwd_manager import get_pwd_manager
1✔
34
from pybossa.util import get_user_id_or_ip, validate_required_fields
1✔
35
from pybossa.core import task_repo, project_repo
1✔
36
from pybossa.cache.projects import get_project_data
1✔
37
from pybossa.data_access import when_data_access
1✔
38
import hashlib
1✔
39
from flask import url_for
1✔
40
from pybossa.cloud_store_api.s3 import upload_json_data
1✔
41
from pybossa.auth.task import TaskAuth
1✔
42
from pybossa.cache import delete_memoized
1✔
43
from pybossa.cache.task_browse_helpers import get_searchable_columns
1✔
44
import json
1✔
45
import copy
1✔
46
from pybossa.task_creator_helper import get_task_expiration
1✔
47
from pybossa.model import make_timestamp
1✔
48
from pybossa.task_creator_helper import generate_checksum, get_task_contents_for_processing, set_task_filter_fields
1✔
49
from pybossa.cache.projects import get_project_data
1✔
50

51

52
class TaskAPI(APIBase):
1✔
53

54
    """Class for domain object Task."""
55

56
    __class__ = Task
1✔
57
    reserved_keys = set(['id', 'created', 'state', 'fav_user_ids',
1✔
58
        'calibration'])
59

60
    immutable_keys = set(['project_id'])
1✔
61

62
    def _forbidden_attributes(self, data):
1✔
63
        for key in data.keys():
1✔
64
            if key in self.reserved_keys:
1✔
65
                raise BadRequest("Reserved keys in payload")
1✔
66

67
    def _update_attribute(self, new, old):
1✔
68
        for key, value in old.info.items():
1✔
69
            new.info.setdefault(key, value)
1✔
70

71
        gold_task = bool(new.gold_answers)
1✔
72
        n_taskruns = len(new.task_runs)
1✔
73
        if new.state == 'completed':
1✔
74
            if gold_task or (old.n_answers < new.n_answers and
1✔
75
                n_taskruns < new.n_answers):
76
                new.state = 'ongoing'
1✔
77
        if new.state == 'ongoing':
1✔
78
            if not gold_task and (n_taskruns >= new.n_answers):
1✔
79
                new.state = 'completed'
1✔
80
        new.calibration = int(gold_task)
1✔
81
        if new.expiration is not None:
1✔
82
            new.expiration = get_task_expiration(new.expiration, old.created)
1✔
83

84
    def _preprocess_post_data(self, data):
1✔
85
        project_id = data["project_id"]
1✔
86
        project = project_repo.get(project_id)
1✔
87
        if not project:
1✔
88
            raise NotFound(f'Non existing project id {project_id}')
1✔
89

90
        info = data["info"]
1✔
91
        if isinstance(info, dict):
1✔
92
            hdfs_task = any([val.startswith("/fileproxy/hdfs/") for val in info.values() if isinstance(val, str)])
1✔
93
            if hdfs_task:
1✔
94
                raise BadRequest("Invalid task payload. HDFS is not supported")
1✔
95

96
        # Extract task contents once for both checksum and filter fields (optimization)
97
        try:
1✔
98
            task_contents, _ = get_task_contents_for_processing(project_id=project_id, task=data)
1✔
NEW
99
        except Exception as e:
×
NEW
100
            current_app.logger.info("Project %d. Error extracting task contents %s", project_id, str(e))
×
NEW
101
            raise BadRequest(str(e))
×
102

103
        try:
1✔
104
            dup_checksum = generate_checksum(project_id=project_id, task=data, task_contents=task_contents)
1✔
105
        except Exception as e:
1✔
106
            current_app.logger.info("Project %d. Error generating duplicate task checksum %s", project_id, str(e))
1✔
107
            raise BadRequest(str(e))
1✔
108

109
        # Set task filter fields from file contents (uses same task_contents)
110
        try:
1✔
111
            set_task_filter_fields(project_id=project_id, task=data, task_contents=task_contents)
1✔
NEW
112
        except Exception as e:
×
NEW
113
            current_app.logger.info("Project %d. Error setting task filter fields %s", project_id, str(e))
×
NEW
114
            raise BadRequest(str(e))
×
115

116
        data["dup_checksum"] = dup_checksum
1✔
117
        completed_tasks = project.info.get("duplicate_task_check", {}).get("completed_tasks", False)
1✔
118
        duplicate_task = task_repo.find_duplicate(
1✔
119
            project_id=project_id,
120
            info=info,
121
            dup_checksum=dup_checksum,
122
            completed_tasks=completed_tasks
123
        )
124
        if duplicate_task:
1✔
125
            current_app.logger.info("Project %s, task checksum %s. Duplicate task found with task id %s. Ignoring task creation",
1✔
126
                                    str(project_id), str(dup_checksum), str(duplicate_task))
127
            message = {
1✔
128
                'reason': 'DUPLICATE_TASK',
129
                'task_id': duplicate_task
130
            }
131
            raise Conflict(json.dumps(message))
1✔
132

133

134
        if 'n_answers' not in data:
1✔
135
            data['n_answers'] = project.get_default_n_answers()
1✔
136
        user_pref = data.get('user_pref', {})
1✔
137
        if user_pref.get('languages'):
1✔
138
            user_pref['languages'] = [s.lower() for s in user_pref.get('languages', [])]
×
139
        if user_pref.get('locations'):
1✔
140
            user_pref['locations'] = [s.lower() for s in user_pref.get('locations', [])]
×
141
        if user_pref.get('assign_user'):
1✔
142
            user_pref['assign_user'] = [s.lower() for s in user_pref.get('assign_user', [])]
×
143
        invalid_fields = validate_required_fields(info)
1✔
144
        if invalid_fields:
1✔
145
            raise BadRequest('Missing or incorrect required fields: {}'
1✔
146
                            .format(','.join(invalid_fields)))
147
        if data.get('gold_answers'):
1✔
148
            try:
1✔
149
                gold_answers = data['gold_answers']
1✔
150
                if type(gold_answers) is dict:
1✔
151
                    data['calibration'] = 1
1✔
152
                    data['exported'] = True
1✔
153
            except Exception as e:
×
154
                raise BadRequest('Invalid gold_answers')
×
155
        create_time = data.get("created") or make_timestamp()
1✔
156
        data["expiration"] = get_task_expiration(data.get('expiration'), create_time)
1✔
157

158
    def _verify_auth(self, item):
1✔
159
        if not current_user.is_authenticated:
1✔
160
            return False
×
161
        if current_user.admin or current_user.subadmin:
1✔
162
            return True
1✔
163
        project = Project(**get_project_data(item.project_id))
1✔
164
        pwd_manager = get_pwd_manager(project)
1✔
165
        return not pwd_manager.password_needed(project, get_user_id_or_ip())
1✔
166

167
    def _sign_item(self, item):
1✔
168
        project_id = item['project_id']
1✔
169
        if current_user.admin or \
1✔
170
           current_user.id in get_project_data(project_id)['owners_ids']:
171
            sign_task(item)
1✔
172

173
    def _select_attributes(self, data):
1✔
174
        return TaskAuth.apply_access_control(data, user=current_user, project_data=get_project_data(data['project_id']))
1✔
175

176
    def put(self, oid):
1✔
177
        # reset cache / memoized
178
        delete_memoized(get_searchable_columns)
1✔
179
        return super(TaskAPI, self).put(oid)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc