• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apertium / apertium-apy / 4241413648

pending completion
4241413648

push

github

Kevin Brubeck Unhammer
pipenv install --dev

361 of 913 branches covered (39.54%)

Branch coverage included in aggregate %.

1251 of 2281 relevant lines covered (54.84%)

0.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.59
/apertium_apy/handlers/translate.py
1
import heapq
1✔
2
import logging
1✔
3
import re
1✔
4
import time
1✔
5
from datetime import datetime
1✔
6

7
from tornado import gen
1✔
8
import tornado.iostream
1✔
9
import asyncio
1✔
10

11
from apertium_apy import missing_freqs_db  # noqa: F401
1✔
12
from apertium_apy.handlers.base import BaseHandler
1✔
13
from apertium_apy.keys import ApiKeys
1✔
14
from apertium_apy.utils import to_alpha3_code, scale_mt_log
1✔
15
from apertium_apy.utils.translation import parse_mode_file, make_pipeline
1✔
16
# Typing imports that flake8 doesn't understand:
17
from apertium_apy.utils.translation import FlushingPipeline, SimplePipeline  # noqa: F401
1✔
18
from typing import Union        # noqa: F401
1✔
19

20

21
class TranslationInfo:
1✔
22
    def __init__(self, handler):
1✔
23
        self.langpair = handler.get_argument('langpair')
×
24
        self.key = handler.get_argument('key', default='null')
×
25
        self.ip = handler.request.headers.get('X-Real-IP', handler.request.remote_ip)
×
26
        self.referer = handler.request.headers.get('Referer', 'null')
×
27

28

29
class TranslateHandler(BaseHandler):
1✔
30
    unknown_mark_re = re.compile(r'[*]([^.,;:\t\* ]+)')
1✔
31
    api_keys = None
1✔
32

33
    def __init__(self, application, request, **kwargs):
1✔
34
        super().__init__(application, request, **kwargs)
1✔
35

36
    @property
1✔
37
    def mark_unknown(self):
1✔
38
        return self.get_argument('markUnknown', default='yes').lower() in ['yes', 'true', '1']
1✔
39

40
    def note_pair_usage(self, pair):
1✔
41
        self.stats.usecount[pair] = 1 + self.stats.usecount.get(pair, 0)
1✔
42

43
    def maybe_strip_marks(self, mark_unknown, pair, translated):
1✔
44
        self.note_unknown_tokens('%s-%s' % pair, translated)
1✔
45
        if mark_unknown:
1✔
46
            return translated
1✔
47
        else:
48
            return re.sub(self.unknown_mark_re, r'\1', translated)
1✔
49

50
    def note_unknown_tokens(self, pair, text):
1✔
51
        global missing_freqs_db
52
        if missing_freqs_db is not None:
1!
53
            for token in re.findall(self.unknown_mark_re, text):
×
54
                missing_freqs_db.note_unknown(token, pair)
×
55

56
    def cleanable(self, i, pair, pipe):
1✔
57
        if pipe.stuck:
1!
58
            logging.info('A pipe for pair %s-%s seems stuck, scheduling restart',
×
59
                         pair[0], pair[1])
60
            return True
×
61
        if pipe.use_count > self.restart_pipe_after:
1!
62
            # Not affected by min_pipes_per_pair
63
            logging.info('A pipe for pair %s-%s has handled %d requests, scheduling restart',
×
64
                         pair[0], pair[1], self.restart_pipe_after)
65
            return True
×
66
        elif (i >= self.min_pipes_per_pair and
1!
67
                self.max_idle_secs != 0 and
68
                time.time() - pipe.last_usage > self.max_idle_secs):
69
            logging.info("A pipe for pair %s-%s hasn't been used in %d secs, scheduling shutdown",
×
70
                         pair[0], pair[1], self.max_idle_secs)
71
            return True
×
72
        else:
73
            return False
1✔
74

75
    def clean_pairs(self):
1✔
76
        for pair in self.pipelines:
1✔
77
            pipes = self.pipelines[pair]
1✔
78
            to_clean = set(p for i, p in enumerate(pipes)
1✔
79
                           if self.cleanable(i, pair, p))
80
            self.pipelines_holding += to_clean
1✔
81
            pipes[:] = [p for p in pipes if p not in to_clean]
1✔
82
            heapq.heapify(pipes)
1✔
83
        # The holding area lets us restart pipes after n usages next
84
        # time round, since with lots of traffic an active pipe may
85
        # never reach 0 users
86
        self.pipelines_holding[:] = [p for p in self.pipelines_holding
1✔
87
                                     if p.users > 0]
88
        if self.pipelines_holding:
1!
89
            logging.info('%d pipelines still scheduled for shutdown', len(self.pipelines_holding))
×
90

91
    def get_pipe_cmds(self, l1, l2):
1✔
92
        if (l1, l2) not in self.pipeline_cmds:
1!
93
            mode_path = self.pairs['%s-%s' % (l1, l2)]
1✔
94
            self.pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path)
1✔
95
        return self.pipeline_cmds[(l1, l2)]
1✔
96

97
    def should_start_pipe(self, l1, l2):
1✔
98
        pipes = self.pipelines.get((l1, l2), [])
1✔
99
        if pipes == []:
1✔
100
            logging.info('%s-%s not in pipelines of this process',
1✔
101
                         l1, l2)
102
            return True
1✔
103
        else:
104
            min_p = pipes[0]
1✔
105
            if len(pipes) < self.max_pipes_per_pair and min_p.users > self.max_users_per_pipe:
1!
106
                logging.info('%s-%s has ≥%d users per pipe but only %d pipes',
×
107
                             l1, l2, min_p.users, len(pipes))
108
                return True
×
109
            else:
110
                return False
1✔
111

112
    def get_pipeline(self, pair):
1✔
113
        (l1, l2) = pair
1✔
114
        if self.should_start_pipe(l1, l2):
1✔
115
            logging.info('Starting up a new pipeline for %s-%s …', l1, l2)
1✔
116
            if pair not in self.pipelines:
1!
117
                self.pipelines[pair] = []
1✔
118
            p = make_pipeline(self.get_pipe_cmds(l1, l2), self.timeout)
1✔
119
            heapq.heappush(self.pipelines[pair], p)
1✔
120
        return self.pipelines[pair][0]
1✔
121

122
    def log_before_translation(self):
1✔
123
        return datetime.now()
1✔
124

125
    def log_after_translation(self, before, length):
1✔
126
        after = datetime.now()
1✔
127
        if self.scale_mt_logs:
1!
128
            t_info = TranslationInfo(self)
×
129
            key = self.get_api_key(t_info.key)
×
130
            scale_mt_log(self.get_status(), after - before, t_info, key, length)
×
131

132
        if self.get_status() == 200:
1✔
133
            timings = self.stats.timing
1✔
134
            oldest = timings[0][0] if timings else datetime.now()
1✔
135
            if datetime.now() - oldest > self.stat_period_max_age:
1!
136
                self.stats.timing.pop(0)
×
137
            self.stats.timing.append(
1✔
138
                (before, after, length))
139

140
    def get_pair_or_error(self, langpair, text_length):
1✔
141
        try:
1✔
142
            l1, l2 = map(to_alpha3_code, langpair.split('|'))
1✔
143
            in_mode = '%s-%s' % (l1, l2)
1✔
144
        except ValueError:
1✔
145
            self.send_error(400, explanation='That pair is invalid, use e.g. eng|spa')
1✔
146
            self.log_after_translation(self.log_before_translation(), text_length)
1✔
147
            return None
1✔
148
        in_mode = self.find_fallback_mode(in_mode, self.pairs)
1✔
149
        if in_mode not in self.pairs:
1✔
150
            self.send_error(400, explanation='That pair is not installed')
1✔
151
            self.log_after_translation(self.log_before_translation(), text_length)
1✔
152
            return None
1✔
153
        else:
154
            return tuple(in_mode.split('-'))
1✔
155

156
    def get_format(self):
1✔
157
        dereformat = self.get_argument('format', default=None)
1✔
158
        deformat = ''
1✔
159
        reformat = ''
1✔
160
        if dereformat:
1!
161
            deformat = 'apertium-des' + dereformat
×
162
            reformat = 'apertium-re' + dereformat
×
163
        else:
164
            deformat = self.get_argument('deformat', default='html')
1✔
165
            if 'apertium-des' not in deformat:
1!
166
                deformat = 'apertium-des' + deformat
1✔
167
            reformat = self.get_argument('reformat', default='html-noent')
1✔
168
            if 'apertium-re' not in reformat:
1!
169
                reformat = 'apertium-re' + reformat
1✔
170

171
        return deformat, reformat
1✔
172

173
    @gen.coroutine
1✔
174
    def translate_and_respond(self, pair, pipeline, to_translate, mark_unknown, nosplit=False, deformat=True, reformat=True, prefs=''):
1✔
175
        mark_unknown = mark_unknown in ['yes', 'true', '1']
1✔
176
        self.note_pair_usage(pair)
1✔
177
        before = self.log_before_translation()
1✔
178
        try:
1✔
179
            translated = yield pipeline.translate(to_translate, nosplit, deformat, reformat, prefs)
1✔
180
            self.log_after_translation(before, len(to_translate))
1✔
181
            self.send_response({
1✔
182
                'responseData': {
183
                    'translatedText': self.maybe_strip_marks(mark_unknown, pair, translated),
184
                },
185
                'responseDetails': None,
186
                'responseStatus': 200,
187
            })
188
        except asyncio.TimeoutError as e:
×
189
            logging.warning('Translation error in pair %s-%s: %s', pair[0], pair[1], e)
×
190
            pipeline.stuck = True
×
191
            self.send_error(503, explanation='internal error')
×
192
        except tornado.iostream.StreamClosedError as e:
×
193
            logging.warning('Translation error in pair %s-%s: %s', pair[0], pair[1], e)
×
194
            pipeline.stuck = True
×
195
            self.send_error(503, explanation='internal error')
×
196
        self.clean_pairs()
1✔
197

198
    @gen.coroutine
1✔
199
    def get(self):
1✔
200
        pair = self.get_pair_or_error(self.get_argument('langpair'),
1✔
201
                                      len(self.get_argument('q')))
202
        if pair is not None:
1✔
203
            pipeline = self.get_pipeline(pair)  # type: Union[FlushingPipeline, SimplePipeline]
1✔
204
            deformat, reformat = self.get_format()
1✔
205
            yield self.translate_and_respond(pair,
1✔
206
                                             pipeline,
207
                                             self.get_argument('q'),
208
                                             self.get_argument('markUnknown', default='yes'),
209
                                             nosplit=False,
210
                                             deformat=deformat,
211
                                             reformat=reformat,
212
                                             prefs=self.get_argument('prefs', default=''),
213
                                             )
214

215
    @classmethod
1✔
216
    def get_api_key(cls, key):
1✔
217
        if not cls.api_keys:
×
218
            cls.api_keys = ApiKeys(cls.api_keys_conf)
×
219

220
        return cls.api_keys.get_key(key)
×
221

222

223
class PairPrefsHandler(BaseHandler):
1✔
224
    @gen.coroutine
1✔
225
    def get(self):
1✔
226
        self.send_response(self.pairprefs)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc