• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

my8100 / scrapydweb / 62a8ca7a-abfc-4e13-b2e2-dc44b8537bd0

06 Oct 2024 03:02PM UTC coverage: 85.817% (-0.05%) from 85.866%
62a8ca7a-abfc-4e13-b2e2-dc44b8537bd0

push

circleci

web-flow
Release v1.5.1 to support scrapyd v1.5.0 (#240)

* Release v1.5.1 to support scrapyd v1.5.0

* Test py312-scrapyd-v143 in circleci

* Set use-scrapyd-v143 default to true in circleci

2 of 2 new or added lines in 2 files covered. (100.0%)

27 existing lines in 3 files now uncovered.

3467 of 4040 relevant lines covered (85.82%)

7.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.58
/scrapydweb/views/baseview.py
1
# coding: utf-8
2
import logging
9✔
3
import os
9✔
4
import re
9✔
5

6
from flask import current_app as app
9✔
7
from flask import Response, flash, g, request, url_for
9✔
8
from flask.views import View
9✔
9
from logparser import __version__ as LOGPARSER_VERSION
9✔
10
from six import text_type
9✔
11

12
from ..__version__ import __version__ as SCRAPYDWEB_VERSION
9✔
13
from ..common import (get_now_string, get_response_from_view, handle_metadata,
9✔
14
                      handle_slash, json_dumps, session)
15
from ..vars import (ALLOWED_SCRAPYD_LOG_EXTENSIONS, APSCHEDULER_DATABASE_URI,
9✔
16
                    DATA_PATH, DEMO_PROJECTS_PATH, DEPLOY_PATH, PARSE_PATH,
17
                    ALERT_TRIGGER_KEYS, LEGAL_NAME_PATTERN, SCHEDULE_ADDITIONAL,
18
                    SCHEDULE_PATH, STATE_PAUSED, STATE_RUNNING, STATS_PATH, STRICT_NAME_PATTERN)
19
from ..utils.scheduler import scheduler
9✔
20

21

22
class BaseView(View):
9✔
23
    SCRAPYDWEB_VERSION = SCRAPYDWEB_VERSION
9✔
24
    LOGPARSER_VERSION = LOGPARSER_VERSION
9✔
25

26
    DEMO_PROJECTS_PATH = DEMO_PROJECTS_PATH
9✔
27
    DEPLOY_PATH = DEPLOY_PATH
9✔
28
    PARSE_PATH = PARSE_PATH
9✔
29
    SCHEDULE_PATH = SCHEDULE_PATH
9✔
30
    STATS_PATH = STATS_PATH
9✔
31

32
    OK = 'ok'
9✔
33
    ERROR = 'error'
9✔
34
    NA = 'N/A'
9✔
35
    INFO = 'info'
9✔
36
    WARN = 'warning'
9✔
37
    DEFAULT_LATEST_VERSION = 'default: the latest version'
9✔
38
    LEGAL_NAME_PATTERN = LEGAL_NAME_PATTERN
9✔
39
    STRICT_NAME_PATTERN = STRICT_NAME_PATTERN
9✔
40
    ALERT_TRIGGER_KEYS = ALERT_TRIGGER_KEYS
9✔
41

42
    methods = ['GET', 'POST']
9✔
43

44
    def __init__(self, *args, **kwargs):
9✔
45
        self.logger = logging.getLogger(self.__class__.__name__)
9✔
46
        # Not in the config file
47
        self.DEFAULT_SETTINGS_PY_PATH = app.config['DEFAULT_SETTINGS_PY_PATH']
9✔
48
        self.SCRAPYDWEB_SETTINGS_PY_PATH = app.config['SCRAPYDWEB_SETTINGS_PY_PATH']
9✔
49
        self.MAIN_PID = app.config['MAIN_PID']
9✔
50
        self.LOGPARSER_PID = app.config['LOGPARSER_PID']
9✔
51
        self.POLL_PID = app.config['POLL_PID']
9✔
52

53
        # System
54
        self.DEBUG = app.config.get('DEBUG', False)
9✔
55
        self.VERBOSE = app.config.get('VERBOSE', False)
9✔
56
        self.DATA_PATH = DATA_PATH
9✔
57
        self.APSCHEDULER_DATABASE_URI = APSCHEDULER_DATABASE_URI
9✔
58
        self.SQLALCHEMY_DATABASE_URI = app.config['SQLALCHEMY_DATABASE_URI']
9✔
59
        self.SQLALCHEMY_BINDS = app.config['SQLALCHEMY_BINDS']
9✔
60

61
        _level = logging.DEBUG if self.VERBOSE else logging.INFO
9✔
62
        self.logger.setLevel(_level)
9✔
63
        logging.getLogger("requests").setLevel(_level)
9✔
64
        logging.getLogger("urllib3").setLevel(_level)
9✔
65

66
        # if app.testing:
67
        self.logger.debug('view_args of %s\n%s', request.url, self.json_dumps(request.view_args))
9✔
68
        if request.args:
9✔
69
            self.logger.debug('request.args of %s\n%s', request.url, self.json_dumps(request.args))
9✔
70
        if request.form:
9✔
71
            self.logger.debug('request.form from %s\n%s', request.url, self.json_dumps(request.form))
9✔
72
        if request.json:
9✔
73
            self.logger.debug('request.json from %s\n%s', request.url, self.json_dumps(request.json))
9✔
74
        if request.files:
9✔
75
            self.logger.debug('request.files from %s\n\n    %s\n', request.url, request.files)
9✔
76

77
        # ScrapydWeb
78
        self.SCRAPYDWEB_BIND = app.config.get('SCRAPYDWEB_BIND', '0.0.0.0')
9✔
79
        self.SCRAPYDWEB_PORT = app.config.get('SCRAPYDWEB_PORT', 5000)
9✔
80

81
        self.ENABLE_AUTH = app.config.get('ENABLE_AUTH', False)
9✔
82
        self.USERNAME = app.config.get('USERNAME', '')
9✔
83
        self.PASSWORD = app.config.get('PASSWORD', '')
9✔
84

85
        self.ENABLE_HTTPS = app.config.get('ENABLE_HTTPS', False)
9✔
86
        self.CERTIFICATE_FILEPATH = app.config.get('CERTIFICATE_FILEPATH', '')
9✔
87
        self.PRIVATEKEY_FILEPATH = app.config.get('PRIVATEKEY_FILEPATH', '')
9✔
88

89
        self.URL_SCRAPYDWEB = app.config.get('URL_SCRAPYDWEB', 'http://127.0.0.1:5000')
9✔
90

91
        # Scrapy
92
        self.SCRAPY_PROJECTS_DIR = app.config.get('SCRAPY_PROJECTS_DIR', '') or self.DEMO_PROJECTS_PATH
9✔
93

94
        # Scrapyd
95
        self.SCRAPYD_SERVERS = app.config.get('SCRAPYD_SERVERS', []) or ['127.0.0.1:6800']
9✔
96
        self.SCRAPYD_SERVERS_AMOUNT = len(self.SCRAPYD_SERVERS)
9✔
97
        self.SCRAPYD_SERVERS_GROUPS = app.config.get('SCRAPYD_SERVERS_GROUPS', []) or ['']
9✔
98
        self.SCRAPYD_SERVERS_AUTHS = app.config.get('SCRAPYD_SERVERS_AUTHS', []) or [None]
9✔
99
        self.SCRAPYD_SERVERS_PUBLIC_URLS = (app.config.get('SCRAPYD_SERVERS_PUBLIC_URLS', None)
9✔
100
                                            or [''] * self.SCRAPYD_SERVERS_AMOUNT)
101

102
        self.LOCAL_SCRAPYD_SERVER = app.config.get('LOCAL_SCRAPYD_SERVER', '')
9✔
103
        self.LOCAL_SCRAPYD_LOGS_DIR = app.config.get('LOCAL_SCRAPYD_LOGS_DIR', '')
9✔
104
        self.SCRAPYD_LOG_EXTENSIONS = (app.config.get('SCRAPYD_LOG_EXTENSIONS', [])
9✔
105
                                       or ALLOWED_SCRAPYD_LOG_EXTENSIONS)
106

107
        # LogParser
108
        self.ENABLE_LOGPARSER = app.config.get('ENABLE_LOGPARSER', False)
9✔
109
        self.BACKUP_STATS_JSON_FILE = app.config.get('BACKUP_STATS_JSON_FILE', True)
9✔
110

111
        # Timer Tasks
112
        self.scheduler = scheduler
9✔
113
        self.JOBS_SNAPSHOT_INTERVAL = app.config.get('JOBS_SNAPSHOT_INTERVAL', 300)
9✔
114

115
        # Run Spider
116
        self.SCHEDULE_EXPAND_SETTINGS_ARGUMENTS = app.config.get('SCHEDULE_EXPAND_SETTINGS_ARGUMENTS', False)
9✔
117
        self.SCHEDULE_CUSTOM_USER_AGENT = app.config.get('SCHEDULE_CUSTOM_USER_AGENT', 'Mozilla/5.0')
9✔
118
        self.SCHEDULE_USER_AGENT = app.config.get('SCHEDULE_USER_AGENT', None)
9✔
119
        self.SCHEDULE_ROBOTSTXT_OBEY = app.config.get('SCHEDULE_ROBOTSTXT_OBEY', None)
9✔
120
        self.SCHEDULE_COOKIES_ENABLED = app.config.get('SCHEDULE_COOKIES_ENABLED', None)
9✔
121
        self.SCHEDULE_CONCURRENT_REQUESTS = app.config.get('SCHEDULE_CONCURRENT_REQUESTS', None)
9✔
122
        self.SCHEDULE_DOWNLOAD_DELAY = app.config.get('SCHEDULE_DOWNLOAD_DELAY', None)
9✔
123
        self.SCHEDULE_ADDITIONAL = app.config.get('SCHEDULE_ADDITIONAL', SCHEDULE_ADDITIONAL)
9✔
124

125
        # Page Display
126
        self.SHOW_SCRAPYD_ITEMS = app.config.get('SHOW_SCRAPYD_ITEMS', True)
9✔
127
        self.SHOW_JOBS_JOB_COLUMN = app.config.get('SHOW_JOBS_JOB_COLUMN', False)
9✔
128
        self.JOBS_FINISHED_JOBS_LIMIT = app.config.get('JOBS_FINISHED_JOBS_LIMIT', 0)
9✔
129
        self.JOBS_RELOAD_INTERVAL = app.config.get('JOBS_RELOAD_INTERVAL', 300)
9✔
130
        self.DAEMONSTATUS_REFRESH_INTERVAL = app.config.get('DAEMONSTATUS_REFRESH_INTERVAL', 10)
9✔
131

132
        # Send text
133
        self.SLACK_TOKEN = app.config.get('SLACK_TOKEN', '')
9✔
134
        self.SLACK_CHANNEL = app.config.get('SLACK_CHANNEL', '') or 'general'
9✔
135
        self.TELEGRAM_TOKEN = app.config.get('TELEGRAM_TOKEN', '')
9✔
136
        self.TELEGRAM_CHAT_ID = app.config.get('TELEGRAM_CHAT_ID', 0)
9✔
137
        self.EMAIL_SUBJECT = app.config.get('EMAIL_SUBJECT', '') or 'Email from #scrapydweb'
9✔
138

139
        # Monitor & Alert
140
        self.ENABLE_MONITOR = app.config.get('ENABLE_MONITOR', False)
9✔
141
        self.ENABLE_SLACK_ALERT = app.config.get('ENABLE_SLACK_ALERT', False)
9✔
142
        self.ENABLE_TELEGRAM_ALERT = app.config.get('ENABLE_TELEGRAM_ALERT', False)
9✔
143
        self.ENABLE_EMAIL_ALERT = app.config.get('ENABLE_EMAIL_ALERT', False)
9✔
144

145
        self.EMAIL_SENDER = app.config.get('EMAIL_SENDER', '')
9✔
146
        self.EMAIL_RECIPIENTS = app.config.get('EMAIL_RECIPIENTS', [])
9✔
147
        self.EMAIL_USERNAME = app.config.get('EMAIL_USERNAME', '') or self.EMAIL_SENDER
9✔
148
        self.EMAIL_PASSWORD = app.config.get('EMAIL_PASSWORD', '')
9✔
149

150
        self.SMTP_SERVER = app.config.get('SMTP_SERVER', '')
9✔
151
        self.SMTP_PORT = app.config.get('SMTP_PORT', 0)
9✔
152
        self.SMTP_OVER_SSL = app.config.get('SMTP_OVER_SSL', False)
9✔
153
        self.SMTP_CONNECTION_TIMEOUT = app.config.get('SMTP_CONNECTION_TIMEOUT', 30)
9✔
154

155
        self.EMAIL_KWARGS = dict(
9✔
156
            email_username=self.EMAIL_USERNAME,
157
            email_password=self.EMAIL_PASSWORD,
158
            email_sender=self.EMAIL_SENDER,
159
            email_recipients=self.EMAIL_RECIPIENTS,
160
            smtp_server=self.SMTP_SERVER,
161
            smtp_port=self.SMTP_PORT,
162
            smtp_over_ssl=self.SMTP_OVER_SSL,
163
            smtp_connection_timeout=self.SMTP_CONNECTION_TIMEOUT,
164
            subject='subject',
165
            content='content'
166
        )
167

168
        self.POLL_ROUND_INTERVAL = app.config.get('POLL_ROUND_INTERVAL', 300)
9✔
169
        self.POLL_REQUEST_INTERVAL = app.config.get('POLL_REQUEST_INTERVAL', 10)
9✔
170
        self.ALERT_WORKING_DAYS = app.config.get('ALERT_WORKING_DAYS', [])
9✔
171
        self.ALERT_WORKING_HOURS = app.config.get('ALERT_WORKING_HOURS', [])
9✔
172
        self.ON_JOB_RUNNING_INTERVAL = app.config.get('ON_JOB_RUNNING_INTERVAL', 0)
9✔
173
        self.ON_JOB_FINISHED = app.config.get('ON_JOB_FINISHED', False)
9✔
174
        # ['CRITICAL', 'ERROR', 'WARNING', 'REDIRECT', 'RETRY', 'IGNORE']
175
        for key in self.ALERT_TRIGGER_KEYS:
9✔
176
            setattr(self, 'LOG_%s_THRESHOLD' % key, app.config.get('LOG_%s_THRESHOLD' % key, 0))
9✔
177
            setattr(self, 'LOG_%s_TRIGGER_STOP' % key, app.config.get('LOG_%s_TRIGGER_STOP' % key, False))
9✔
178
            setattr(self, 'LOG_%s_TRIGGER_FORCESTOP' % key, app.config.get('LOG_%s_TRIGGER_FORCESTOP' % key, False))
9✔
179

180
        # Other attributes not from config
181
        self.view_args = request.view_args
9✔
182
        self.node = self.view_args['node']
9✔
183
        assert 0 < self.node <= self.SCRAPYD_SERVERS_AMOUNT, \
9✔
184
            'node index error: %s, which should be between 1 and %s' % (self.node, self.SCRAPYD_SERVERS_AMOUNT)
185
        self.SCRAPYD_SERVER = self.SCRAPYD_SERVERS[self.node - 1]
9✔
186
        self.IS_LOCAL_SCRAPYD_SERVER = self.SCRAPYD_SERVER == self.LOCAL_SCRAPYD_SERVER
9✔
187
        self.GROUP = self.SCRAPYD_SERVERS_GROUPS[self.node - 1]
9✔
188
        self.AUTH = self.SCRAPYD_SERVERS_AUTHS[self.node - 1]
9✔
189
        self.SCRAPYD_SERVER_PUBLIC_URL = self.SCRAPYD_SERVERS_PUBLIC_URLS[self.node - 1]
9✔
190

191
        ua = request.headers.get('User-Agent', '')
9✔
192
        m_mobile = re.search(r'Android|webOS|iPad|iPhone|iPod|BlackBerry|IEMobile|Opera Mini', ua, re.I)
9✔
193
        self.IS_MOBILE = True if m_mobile else False
9✔
194

195
        m_ipad = re.search(r'iPad', ua, re.I)
9✔
196
        self.IS_IPAD = True if m_ipad else False
9✔
197

198
        # http://werkzeug.pocoo.org/docs/0.14/utils/#module-werkzeug.useragents
199
        # /site-packages/werkzeug/useragents.py
200
        browser = request.user_agent.browser or ''  # lib requests GET: None
9✔
201
        m_edge = re.search(r'Edge', ua, re.I)
9✔
202
        self.IS_IE_EDGE = True if (browser == 'msie' or m_edge) else False
9✔
203

204
        self.USE_MOBILEUI = request.args.get('ui', '') == 'mobile'
9✔
205
        self.UI = 'mobile' if self.USE_MOBILEUI else None
9✔
206
        self.GET = request.method == 'GET'
9✔
207
        self.POST = request.method == 'POST'
9✔
208

209
        self.FEATURES = ''
9✔
210
        self.FEATURES += 'A' if self.ENABLE_AUTH else '-'
9✔
211
        self.FEATURES += 'D' if handle_metadata().get('jobs_style') == 'database' else 'C'
9✔
212
        self.FEATURES += 'd' if self.SCRAPY_PROJECTS_DIR != self.DEMO_PROJECTS_PATH else '-'
9✔
213
        self.FEATURES += 'L' if self.ENABLE_LOGPARSER else '-'
9✔
214
        self.FEATURES += 'Sl' if self.ENABLE_SLACK_ALERT else '-'
9✔
215
        self.FEATURES += 'Tg' if self.ENABLE_TELEGRAM_ALERT else '-'
9✔
216
        self.FEATURES += 'Em' if self.ENABLE_EMAIL_ALERT else '-'
9✔
217
        self.FEATURES += 'P' if self.IS_MOBILE else '-'
9✔
218
        self.FEATURES += 'M' if self.USE_MOBILEUI else '-'
9✔
219
        self.FEATURES += 'S' if self.ENABLE_HTTPS else '-'
9✔
220
        self.any_running_apscheduler_jobs = any(job.next_run_time
9✔
221
                                                for job in self.scheduler.get_jobs(jobstore='default'))
222
        if self.scheduler.state == STATE_PAUSED:
9✔
223
            self.FEATURES += '-'
9✔
224
        elif self.any_running_apscheduler_jobs:
9✔
225
            self.FEATURES += 'T'
9✔
226
        else:
227
            self.FEATURES += 't'
9✔
228
        if not self.SQLALCHEMY_DATABASE_URI.startswith('sqlite'):
9✔
UNCOV
229
            self.FEATURES += self.SQLALCHEMY_DATABASE_URI[:3]
4✔
230

231
        self.template_fail = 'scrapydweb/fail_mobileui.html' if self.USE_MOBILEUI else 'scrapydweb/fail.html'
9✔
232
        self.update_g()
9✔
233

234
    @staticmethod
9✔
235
    def get_job_without_ext(job):
9✔
236
        if job.endswith('.tar.gz'):
9✔
237
            return job[:-len('.tar.gz')]
×
238
        else:
239
            return os.path.splitext(job)[0]  # '1.1.log' => ('1.1', '.log')
9✔
240

241
    @staticmethod
9✔
242
    def get_now_string(allow_space=False):
9✔
243
        return get_now_string(allow_space=allow_space)
9✔
244

245
    def get_response_from_view(self, url, data=None, as_json=False):
9✔
246
        auth = (self.USERNAME, self.PASSWORD) if self.ENABLE_AUTH else None
9✔
247
        return get_response_from_view(url, auth=auth, data=data, as_json=as_json)
9✔
248

249
    def get_selected_nodes(self):
9✔
250
        selected_nodes = []
9✔
251
        for n in range(1, self.SCRAPYD_SERVERS_AMOUNT + 1):
9✔
252
            if request.form.get(str(n)) == 'on':
9✔
253
                selected_nodes.append(n)
9✔
254
        return selected_nodes
9✔
255

256
    @staticmethod
9✔
257
    def handle_slash(string):
9✔
258
        return handle_slash(string)
9✔
259

260
    @staticmethod
9✔
261
    def json_dumps(obj, sort_keys=True, indent=4, ensure_ascii=False, as_response=False):
9✔
262
        # flask.jsonify
263
        # https://flask.palletsprojects.com/en/1.1.x/config/#JSONIFY_MIMETYPE
264
        # https://stackoverflow.com/questions/11773348/python-flask-how-to-set-content-type
265
        # https://stackoverflow.com/questions/9254891/what-does-content-type-application-json-charset-utf-8-really-mean
266
        js = json_dumps(obj, sort_keys=sort_keys, indent=indent, ensure_ascii=ensure_ascii)
9✔
267
        if as_response:
9✔
268
            # Content-Type: application/json
269
            return Response(js, mimetype='application/json')
9✔
270
        else:
271
            return js
9✔
272

273
    @staticmethod
9✔
274
    def remove_microsecond(dt):
9✔
275
        return str(dt)[:19]
9✔
276

277
    def make_request(self, url, data=None, auth=None, as_json=True, dumps_json=True, check_status=True, timeout=60):
9✔
278
        """
279
        :param url: url to make request
280
        :param data: None or a dict object to post
281
        :param auth: None or (username, password) for basic auth
282
        :param as_json: return a dict object if set True, else text
283
        :param dumps_json: whether to dumps the json response when as_json is set to True
284
        :param check_status: whether to log error when status != 'ok'
285
        :param timeout: timeout when making request, in seconds
286
        """
287
        try:
9✔
288
            if 'addversion.json' in url and data:
9✔
289
                self.logger.debug(">>>>> POST %s", url)
9✔
290
                self.logger.debug(self.json_dumps(dict(project=data['project'], version=data['version'],
9✔
291
                                                  egg="%s bytes binary egg file" % len(data['egg']))))
292
            else:
293
                self.logger.debug(">>>>> %s %s", 'POST' if data else 'GET', url)
9✔
294
                if data:
9✔
295
                    self.logger.debug("POST data: %s", self.json_dumps(data))
9✔
296

297
            if data:
9✔
298
                r = session.post(url, data=data, auth=auth, timeout=timeout)
9✔
299
            else:
300
                r = session.get(url, auth=auth, timeout=timeout)
9✔
301
            r.encoding = 'utf-8'
9✔
302
        except Exception as err:
9✔
303
            # self.logger.error('!!!!! %s %s' % (err.__class__.__name__, err))
304
            self.logger.error("!!!!! error with %s: %s", url, err)
9✔
305
            if as_json:
9✔
306
                r_json = dict(url=url, auth=auth, status_code=-1, status=self.ERROR,
9✔
307
                              message=str(err), when=self.get_now_string(True))
308
                return -1, r_json
9✔
309
            else:
310
                return -1, str(err)
9✔
311
        else:
312
            if as_json:
9✔
313
                r_json = {}
9✔
314
                try:
9✔
315
                    # listprojects would get 502 html when Scrapyd server reboots
316
                    r_json = r.json()  # PY3: json.decoder.JSONDecodeError  PY2: exceptions.ValueError
9✔
317
                except ValueError as err:  # issubclass(JSONDecodeError, ValueError)
9✔
318
                    self.logger.error("Fail to decode json from %s: %s", url, err)
9✔
319
                    r_json = dict(status=self.ERROR, message=r.text)
9✔
320
                finally:
321
                    # Scrapyd in Python2: Traceback (most recent call last):\\n
322
                    # Scrapyd in Python3: Traceback (most recent call last):\r\n
323
                    message = r_json.get('message', '')
9✔
324
                    if message and not isinstance(message, dict):
9✔
325
                        r_json['message'] = re.sub(r'\\n', '\n', message)
9✔
326
                    r_json.update(dict(url=url, auth=auth, status_code=r.status_code, when=self.get_now_string(True)))
9✔
327
                    status = r_json.setdefault('status', self.NA)
9✔
328
                    if r.status_code != 200 or (check_status and status != self.OK):
9✔
329
                        self.logger.error("!!!!! (%s) %s: %s", r.status_code, status, url)
9✔
330
                    else:
331
                        self.logger.debug("<<<<< (%s) %s: %s", r.status_code, status, url)
9✔
332
                    if dumps_json:
9✔
333
                        self.logger.debug("Got json from %s: %s", url, self.json_dumps(r_json))
9✔
334
                    else:
335
                        self.logger.debug("Got keys from (%s) %s %s: %s",
9✔
336
                                          r_json.get('status_code'), r_json.get('status'), url, r_json.keys())
337

338
                    return r.status_code, r_json
9✔
339
            else:
340
                if r.status_code == 200:
9✔
341
                    _text = r.text[:100] + '......' + r.text[-100:] if len(r.text) > 200 else r.text
9✔
342
                    self.logger.debug("<<<<< (%s) %s\n%s", r.status_code, url, repr(_text))
9✔
343
                else:
344
                    self.logger.error("!!!!! (%s) %s\n%s", r.status_code, url, r.text)
9✔
345

346
                return r.status_code, r.text
9✔
347

348
    def update_g(self):
9✔
349
        # g lifetime: every single request
350
        # Note that use inject_variable() in View class would cause memory leak, issue #14
351
        g.IS_MOBILE = self.IS_MOBILE
9✔
352
        g.url_jobs_list = [url_for('jobs', node=node, ui=self.UI)
9✔
353
                           for node in range(1, self.SCRAPYD_SERVERS_AMOUNT + 1)]
354
        g.multinode = ('<label title="multinode">'
9✔
355
                       '<svg class="icon" aria-hidden="true"><use xlink:href="#icon-servers"></use></svg>'
356
                       '</label>')
357
        # For base.html
358
        if not self.USE_MOBILEUI:
9✔
359
            g.url_daemonstatus = url_for('api', node=self.node, opt='daemonstatus')
9✔
360
            g.url_menu_servers = url_for('servers', node=self.node)
9✔
361
            g.url_menu_jobs = url_for('jobs', node=self.node)
9✔
362
            g.url_menu_nodereports = url_for('nodereports', node=self.node)
9✔
363
            g.url_menu_clusterreports = url_for('clusterreports', node=self.node)
9✔
364
            g.url_menu_tasks = url_for('tasks', node=self.node)
9✔
365
            g.url_menu_deploy = url_for('deploy', node=self.node)
9✔
366
            g.url_menu_schedule = url_for('schedule', node=self.node)
9✔
367
            g.url_menu_projects = url_for('projects', node=self.node)
9✔
368
            g.url_menu_logs = url_for('logs', node=self.node)
9✔
369
            g.url_menu_items = url_for('items', node=self.node)
9✔
370
            g.url_menu_sendtext = url_for('sendtext', node=self.node)
9✔
371
            g.url_menu_parse = url_for('parse.upload', node=self.node)
9✔
372
            g.url_menu_settings = url_for('settings', node=self.node)
9✔
373
            g.url_menu_mobileui = url_for('index', node=self.node, ui='mobile')
9✔
374
            g.scheduler_state_paused = self.scheduler.state == STATE_PAUSED and self.any_running_apscheduler_jobs
9✔
375
            g.scheduler_state_running = self.scheduler.state == STATE_RUNNING and self.any_running_apscheduler_jobs
9✔
376

377
    # Issue#48 [PY2] UnicodeDecodeError raised when there are some files with illegal filenames in `SCRAPY_PROJECTS_DIR`
378
    # https://stackoverflow.com/questions/21772271/unicodedecodeerror-when-performing-os-walk
379
    # https://xuanwo.io/2018/04/01/python-os-walk/
380
    # Tested in Ubuntu:
381
    # touch $(echo -e "\x8b\x8bFile")
382
    # mkdir $(echo -e "\x8b\x8bFolder")
383
    def safe_walk(self, top, topdown=True, onerror=None, followlinks=False):
9✔
384
        islink, join, isdir = os.path.islink, os.path.join, os.path.isdir
×
385

386
        # touch $(echo -e "\x8b\x8bThis is a bad filename")
387
        # ('top: ', u'/home/username/download/scrapydweb/scrapydweb/data/demo_projects/ScrapydWeb_demo')
388
        # ('names: ', ['\x8b\x8bThis', u'ScrapydWeb_demo', u'filename', u'scrapy.cfg', u'a', u'is', u'bad'])
389
        try:
×
390
            names = os.listdir(top)
×
391
        except OSError as err:
×
392
            if onerror is not None:
×
393
                onerror(err)
×
394
            return
×
395

396
        new_names = []
×
397
        for name in names:
×
398
            if isinstance(name, text_type):
×
399
                new_names.append(name)
×
400
            else:
401
                msg = "Ignore non-unicode filename %s in %s" % (repr(name), top)
×
402
                self.logger.error(msg)
×
403
                flash(msg, self.WARN)
×
404
        names = new_names
×
405

406
        dirs, nondirs = [], []
×
407
        for name in names:
×
408
            if isdir(join(top, name)):
×
409
                dirs.append(name)
×
410
            else:
411
                nondirs.append(name)
×
412

413
        if topdown:
×
414
            yield top, dirs, nondirs
×
415
        for name in dirs:
×
416
            new_path = join(top, name)
×
417
            if followlinks or not islink(new_path):
×
418
                for x in self.safe_walk(new_path, topdown, onerror, followlinks):
×
419
                    yield x
×
420
        if not topdown:
×
421
            yield top, dirs, nondirs
×
422

423

424
class MetadataView(BaseView):
9✔
425

426
    def __init__(self):
9✔
427
        super(MetadataView, self).__init__()
9✔
428

429
    def dispatch_request(self, **kwargs):
9✔
430
        return self.json_dumps(handle_metadata(), as_response=True)
9✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc