• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apertium / apertium-apy / 4241413648

pending completion
4241413648

push

github

Kevin Brubeck Unhammer
pipenv install --dev

361 of 913 branches covered (39.54%)

Branch coverage included in aggregate %.

1251 of 2281 relevant lines covered (54.84%)

0.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

55.62
/apertium_apy/apy.py
1
#!/usr/bin/env python3
2
# coding=utf-8
3
# -*- indent-tabs-mode: nil -*-
4

5
__author__ = 'Kevin Brubeck Unhammer, Sushain K. Cherivirala'
1✔
6
__copyright__ = 'Copyright 2013--2020, Kevin Brubeck Unhammer, Sushain K. Cherivirala'
1✔
7
__credits__ = ['Kevin Brubeck Unhammer', 'Sushain K. Cherivirala', 'Jonathan North Washington', 'Xavi Ivars', 'Shardul Chiplunkar']
1✔
8
__license__ = 'GPLv3'
1✔
9
__status__ = 'Beta'
1✔
10
__version__ = '0.12.0'
1✔
11

12
import argparse
1✔
13
import configparser
1✔
14
import logging
1✔
15
import os
1✔
16
import re
1✔
17
import signal
1✔
18
import sys
1✔
19
from importlib import util as importlib_util
1✔
20
from datetime import timedelta
1✔
21
from logging import handlers as logging_handlers  # type: ignore
1✔
22

23
import tornado
1✔
24
import tornado.httpserver
1✔
25
import tornado.httputil
1✔
26
import tornado.iostream
1✔
27
import tornado.process
1✔
28
import tornado.web
1✔
29
from tornado.locks import Semaphore
1✔
30
from tornado.log import enable_pretty_logging
1✔
31

32
from typing import Sequence, Iterable, Type, List, Tuple, Any  # noqa: F401
1✔
33

34
from apertium_apy import BYPASS_TOKEN, missing_freqs_db  # noqa: F401
1✔
35
from apertium_apy import missingdb
1✔
36
from apertium_apy import systemd
1✔
37
from apertium_apy.mode_search import search_path, search_prefs
1✔
38
from apertium_apy.utils.wiki import wiki_login, wiki_get_token
1✔
39

40
from apertium_apy.handlers import (
1✔
41
    AnalyzeHandler,
42
    BaseHandler,
43
    CoverageHandler,
44
    GenerateHandler,
45
    IdentifyLangHandler,
46
    ListHandler,
47
    ListLanguageNamesHandler,
48
    PerWordHandler,
49
    PipeDebugHandler,
50
    SpellerHandler,
51
    StatsHandler,
52
    SuggestionHandler,
53
    TranslateChainHandler,
54
    TranslateDocHandler,
55
    TranslateHandler,
56
    PairPrefsHandler,
57
    TranslateRawHandler,
58
    TranslateWebpageHandler,
59
)
60

61

62
def sig_handler(sig, frame):
1✔
63
    global missing_freqs_db
64
    if missing_freqs_db is not None:
1!
65
        if 'children' in frame.f_locals:
×
66
            for child in frame.f_locals['children']:
×
67
                os.kill(child, signal.SIGTERM)
×
68
            missing_freqs_db.commit()
×
69
        else:
70
            # we are one of the children
71
            missing_freqs_db.commit()
×
72
        missing_freqs_db.close_db()
×
73
    logging.warning('Caught signal: %s', sig)
1✔
74
    exit()
1✔
75

76

77
class RootHandler(BaseHandler):
1✔
78
    def get(self):
1✔
79
        self.render('../index.html')
1✔
80

81

82
class GetLocaleHandler(BaseHandler):
1✔
83
    def get(self):
1✔
84
        if 'Accept-Language' in self.request.headers:
×
85
            locales = [locale.split(';')[0] for locale in self.request.headers['Accept-Language'].split(',')]
×
86
            self.send_response(locales)
×
87
        else:
88
            self.send_error(400, explanation='Accept-Language missing from request headers')
×
89

90

91
def setup_handler(
1✔
92
    pairs_path, nonpairs_path, lang_names, missing_freqs_path, timeout,
93
    max_pipes_per_pair, min_pipes_per_pair, max_users_per_pipe, max_idle_secs,
94
    restart_pipe_after, max_doc_pipes, verbosity=0, scale_mt_logs=False,
95
    memory=1000, apy_keys=None,
96
):
97

98
    global missing_freqs_db
99
    if missing_freqs_path:
1!
100
        missing_freqs_db = missingdb.MissingDb(missing_freqs_path, memory)
×
101

102
    handler = BaseHandler
1✔
103
    handler.lang_names = lang_names
1✔
104
    handler.timeout = timeout
1✔
105
    handler.max_pipes_per_pair = max_pipes_per_pair
1✔
106
    handler.min_pipes_per_pair = min_pipes_per_pair
1✔
107
    handler.max_users_per_pipe = max_users_per_pipe
1✔
108
    handler.max_idle_secs = max_idle_secs
1✔
109
    handler.restart_pipe_after = restart_pipe_after
1✔
110
    handler.scale_mt_logs = scale_mt_logs
1✔
111
    handler.verbosity = verbosity
1✔
112
    handler.doc_pipe_sem = Semaphore(max_doc_pipes)
1✔
113
    handler.api_keys_conf = apy_keys
1✔
114

115
    modes = search_path(pairs_path, verbosity=verbosity)
1✔
116
    if nonpairs_path:
1!
117
        src_modes = search_path(nonpairs_path, include_pairs=False, verbosity=verbosity)
1✔
118
        for mtype in modes:
1✔
119
            modes[mtype] += src_modes[mtype]
1✔
120
    handler.pairprefs = search_prefs(pairs_path)
1✔
121

122
    for mtype in modes:
1✔
123
        logging.info('%d %s modes found', len(modes[mtype]), mtype)
1✔
124

125
    for path, lang_src, lang_trg in modes['pair']:
1✔
126
        handler.pairs['%s-%s' % (lang_src, lang_trg)] = path
1✔
127
    for dirpath, modename, lang_pair in modes['analyzer']:
1✔
128
        handler.analyzers[lang_pair] = (dirpath, modename)
1✔
129
    for dirpath, modename, lang_pair in modes['generator']:
1✔
130
        handler.generators[lang_pair] = (dirpath, modename)
1✔
131
    for dirpath, modename, lang_pair in modes['tagger']:
1✔
132
        handler.taggers[lang_pair] = (dirpath, modename)
1✔
133
    for dirpath, modename, lang_src in modes['spell']:
1!
134
        if (any(lang_src == elem[2] for elem in modes['tokenise'])):
×
135
            handler.spellers[lang_src] = (dirpath, modename)
×
136

137
    handler.init_pairs_graph()
1✔
138
    handler.init_paths()
1✔
139

140

141
def check_utf8():
1✔
142
    locale_vars = ['LANG', 'LC_ALL']
1✔
143
    u8 = re.compile('UTF-?8', re.IGNORECASE)
1✔
144
    if not any(re.search(u8, os.environ.get(key, '')) for key in locale_vars):
1!
145
        logging.fatal('apy.py: APy needs a UTF-8 locale, please set LANG or LC_ALL')
×
146
        sys.exit(1)
×
147

148

149
def apply_config(args, parser, apy_section):
1✔
150
    for (name, value) in vars(args).items():
×
151
        if name in apy_section:
×
152
            # Get default from private variables of argparse
153
            default = None
×
154
            for action in parser._actions:  # type: ignore
×
155
                if action.dest == name:
×
156
                    default = action.default
×
157

158
            # Try typecasting string to type of argparse argument
159
            fn = type(value)
×
160
            res = None
×
161
            try:
×
162
                if fn is None:
×
163
                    if apy_section[name] == 'None':
×
164
                        res = None
×
165
                    else:
166
                        res = apy_section[name]
×
167
                elif fn is bool:
×
168
                    if apy_section[name] == 'False':
×
169
                        res = False
×
170
                    elif apy_section[name] == 'True':
×
171
                        res = True
×
172
                    else:
173
                        res = bool(apy_section[name])
×
174
                else:
175
                    res = fn(apy_section[name])
×
176
            except ValueError:
×
177
                print('Warning: Unable to cast {} to expected type'.format(apy_section[name]))
×
178

179
            # only override is value (argument) is default
180
            if res is not None and value == default:
×
181
                setattr(args, name, res)
×
182

183

184
def parse_args(cli_args=sys.argv[1:]):
1✔
185
    parser = argparse.ArgumentParser(description='Apertium APY -- API server for machine translation and language analysis')
1✔
186
    parser.add_argument('pairs_path', help='path to Apertium installed pairs (all modes files in this path are included)')
1✔
187
    parser.add_argument('-s', '--nonpairs-path', help='path to Apertium tree (only non-translator debug modes are included from this path)')
1✔
188
    parser.add_argument('-l', '--lang-names',
1✔
189
                        help='path to localised language names sqlite database (default = langNames.db)', default='langNames.db')
190
    parser.add_argument('-F', '--fasttext-model',
1✔
191
                        help='path to fastText language identification model (e.g. lid.release.ftz)')
192
    parser.add_argument('-f', '--missing-freqs', help='path to missing word frequency sqlite database (default = None)', default=None)
1✔
193
    parser.add_argument('-p', '--port', help='port to run server on (default = 2737)', type=int, default=2737)
1✔
194
    parser.add_argument('-c', '--ssl-cert', help='path to SSL Certificate', default=None)
1✔
195
    parser.add_argument('-k', '--ssl-key', help='path to SSL Key File', default=None)
1✔
196
    parser.add_argument('-t', '--timeout', help='timeout for requests (default = 10)', type=int, default=10)
1✔
197
    parser.add_argument('-j', '--num-processes',
1✔
198
                        help='number of processes to run (default = 1; use 0 to run one http server per core, '
199
                             'where each http server runs all available language pairs)',
200
                        nargs='?', type=int, default=1)
201
    parser.add_argument('-d', '--daemon',
1✔
202
                        help='daemon mode: redirects stdout and stderr to files apertium-apy.log and apertium-apy.err; use with --log-path',
203
                        action='store_true')
204
    parser.add_argument('-P', '--log-path', help='path to log output files to in daemon mode; defaults to local directory', default='./')
1✔
205
    parser.add_argument('-i', '--max-pipes-per-pair',
1✔
206
                        help='how many pipelines we can spin up per language pair (default = 1)', type=int, default=1)
207
    parser.add_argument('-n', '--min-pipes-per-pair',
1✔
208
                        help='when shutting down pipelines, keep at least this many open per language pair (default = 0)',
209
                        type=int, default=0)
210
    parser.add_argument('-u', '--max-users-per-pipe',
1✔
211
                        help='how many concurrent requests per pipeline before we consider spinning up a new one (default = 5)',
212
                        type=int, default=5)
213
    parser.add_argument('-m', '--max-idle-secs',
1✔
214
                        help='if specified, shut down pipelines that have not been used in this many seconds', type=int, default=0)
215
    parser.add_argument('-r', '--restart-pipe-after',
1✔
216
                        help='restart a pipeline if it has had this many requests (default = 1000)', type=int, default=1000)
217
    parser.add_argument('-v', '--verbosity', help='logging verbosity', type=int, default=0)
1✔
218
    parser.add_argument('-V', '--version', help='show APY version', action='version', version='%(prog)s version ' + __version__)
1✔
219
    parser.add_argument('-S', '--scalemt-logs', help='generates ScaleMT-like logs; use with --log-path; disables', action='store_true')
1✔
220
    parser.add_argument('-M', '--unknown-memory-limit',
1✔
221
                        help='keeps unknown words in memory until a limit is reached; use with --missing-freqs (default = 1000)',
222
                        type=int, default=1000)
223
    parser.add_argument('-T', '--stat-period-max-age',
1✔
224
                        help='How many seconds back to keep track request timing stats (default = 3600)', type=int, default=3600)
225
    parser.add_argument('-wp', '--wiki-password', help='Apertium Wiki account password for SuggestionHandler', default=None)
1✔
226
    parser.add_argument('-wu', '--wiki-username', help='Apertium Wiki account username for SuggestionHandler', default=None)
1✔
227
    parser.add_argument('-b', '--bypass-token', help='ReCAPTCHA bypass token', action='store_true')
1✔
228
    parser.add_argument('-rs', '--recaptcha-secret', help='ReCAPTCHA secret for suggestion validation', default=None)
1✔
229
    parser.add_argument('-md', '--max-doc-pipes',
1✔
230
                        help='how many concurrent document translation pipelines we allow (default = 3)', type=int, default=3)
231
    parser.add_argument('-C', '--config', help='Configuration file to load options from', default=None)
1✔
232
    parser.add_argument('-ak', '--api-keys', help='Configuration file to load API keys', default=None)
1✔
233

234
    args = parser.parse_args(cli_args)
1✔
235

236
    if args.config:
1!
237
        conf = configparser.ConfigParser()
×
238
        conf.read(args.config)
×
239

240
        if not os.path.isfile(args.config):
×
241
            logging.warning('Configuration file does not exist,'
×
242
                            ' please see https://wiki.apertium.org/'
243
                            'wiki/Apy#Configuration for more information')
244
        elif 'APY' not in conf:
×
245
            logging.warning('Configuration file does not have APY section,'
×
246
                            ' please see https://wiki.apertium.org/'
247
                            'wiki/Apy#Configuration for more information')
248
        else:
249
            logging.info('Using configuration file ' + args.config)
×
250
            apy_section = conf['APY']
×
251
            apply_config(args, parser, apy_section)
×
252

253
    return args
1✔
254

255

256
def setup_application(args):
1✔
257
    if args.stat_period_max_age:
1!
258
        BaseHandler.stat_period_max_age = timedelta(0, args.stat_period_max_age, 0)
1✔
259

260
    setup_handler(args.pairs_path, args.nonpairs_path, args.lang_names, args.missing_freqs, args.timeout,
1✔
261
                  args.max_pipes_per_pair, args.min_pipes_per_pair, args.max_users_per_pipe, args.max_idle_secs,
262
                  args.restart_pipe_after, args.max_doc_pipes, args.verbosity, args.scalemt_logs,
263
                  args.unknown_memory_limit, args.api_keys)
264

265
    handlers = [
1✔
266
        (r'/', RootHandler),
267
        (r'/list', ListHandler),
268
        (r'/listPairs', ListHandler),
269
        (r'/stats', StatsHandler),
270
        (r'/pairprefs', PairPrefsHandler),
271
        (r'/translate', TranslateHandler),
272
        (r'/translateChain', TranslateChainHandler),
273
        (r'/translateDoc', TranslateDocHandler),
274
        (r'/translatePage', TranslateWebpageHandler),
275
        (r'/translateRaw', TranslateRawHandler),
276
        (r'/analy[sz]e', AnalyzeHandler),
277
        (r'/generate', GenerateHandler),
278
        (r'/listLanguageNames', ListLanguageNamesHandler),
279
        (r'/perWord', PerWordHandler),
280
        (r'/calcCoverage', CoverageHandler),
281
        (r'/identifyLang', IdentifyLangHandler),
282
        (r'/getLocale', GetLocaleHandler),
283
        (r'/pipedebug', PipeDebugHandler),
284
    ]  # type: List[Tuple[str, Type[tornado.web.RequestHandler]]]
285

286
    if importlib_util.find_spec('streamparser'):
1!
287
        handlers.append((r'/speller', SpellerHandler))
1✔
288

289
    if all([args.wiki_username, args.wiki_password]) and importlib_util.find_spec('requests'):
1!
290
        import requests
×
291
        logging.info('Logging into Apertium Wiki with username %s', args.wiki_username)
×
292

293
        SuggestionHandler.SUGGEST_URL = 'User:' + args.wiki_username
×
294
        SuggestionHandler.recaptcha_secret = args.recaptcha_secret
×
295
        SuggestionHandler.wiki_session = requests.Session()
×
296
        SuggestionHandler.auth_token = wiki_login(
×
297
            SuggestionHandler.wiki_session,
298
            args.wiki_username,
299
            args.wiki_password)
300
        SuggestionHandler.wiki_edit_token = wiki_get_token(
×
301
            SuggestionHandler.wiki_session, 'edit', 'info|revisions')
302

303
        handlers.append((r'/suggest', SuggestionHandler))
×
304

305
    if args.fasttext_model and importlib_util.find_spec('fasttext') is not None:
1!
306
        import fasttext
×
307
        IdentifyLangHandler.fasttext = fasttext.FastText.load_model(args.fasttext_model)
×
308

309
    # TODO: fix mypy. Application expects List but List is invariant and we use subclasses
310
    return tornado.web.Application(handlers)  # type:ignore
1✔
311

312

313
def setup_logging(args):
1✔
314
    if args.daemon:
1!
315
        # regular content logs are output stderr
316
        # python messages are mostly output to stdout
317
        # hence swapping the filenames?
318
        logfile = os.path.join(args.log_path, 'apertium-apy.log')
×
319
        errfile = os.path.join(args.log_path, 'apertium-apy.err')
×
320
        sys.stderr = open(logfile, 'a+')
×
321
        sys.stdout = open(errfile, 'a+')
×
322
        logging.basicConfig(filename=logfile, filemode='a')  # NB. Needs to happen *before* we use logs for anything
×
323
        logging.getLogger().setLevel(logging.INFO)
×
324
    if args.scalemt_logs:
1!
325
        logger = logging.getLogger('scale-mt')
×
326
        logger.propagate = False
×
327
        smtlog = os.path.join(args.log_path, 'ScaleMTRequests.log')
×
328
        logging_handler = logging_handlers.TimedRotatingFileHandler(smtlog, 'midnight', 0)
×
329
        # internal attribute, should not use
330
        logging_handler.suffix = '%Y-%m-%d'  # type: ignore
×
331
        logger.addHandler(logging_handler)
×
332
        # if scalemt_logs is enabled, disable tornado.access logs
333
        if args.daemon:
×
334
            logging.getLogger('tornado.access').propagate = False
×
335
    enable_pretty_logging()
1✔
336

337

338
def main():
1✔
339
    check_utf8()
1✔
340
    args = parse_args()
1✔
341
    setup_logging(args)  # before we start logging anything!
1✔
342

343
    if importlib_util.find_spec('fasttext') is None:
1!
344
        logging.warning('Unable to import fastText, trying CLD2')
×
345
        if importlib_util.find_spec('cld2full') is None:
×
346
            logging.warning('Unable to import CLD2, continuing using naive method of language identification')
×
347
    elif not args.fasttext_model:
1!
348
        logging.warning('Have fasttext lib, but started without --fasttext-model, not using fastText for language identification')
1✔
349

350
    if importlib_util.find_spec('chardet') is None:
1!
351
        logging.warning('Unable to import chardet, assuming utf-8 encoding for all websites')
×
352

353
    if importlib_util.find_spec('streamparser') is None:
1!
354
        logging.warning('Apertium streamparser not installed, spelling handler disabled')
×
355

356
    if importlib_util.find_spec('requests') is None:
1!
357
        logging.warning('requests not installed, suggestions disabled')
×
358

359
    if args.bypass_token:
1!
360
        logging.info('reCaptcha bypass for testing: %s', BYPASS_TOKEN)
×
361

362
    application = setup_application(args)
1✔
363

364
    if args.ssl_cert and args.ssl_key:
1!
365
        http_server = tornado.httpserver.HTTPServer(application, ssl_options={
×
366
            'certfile': args.ssl_cert,
367
            'keyfile': args.ssl_key,
368
        })
369
        logging.info('Serving on all interfaces/families, e.g. https://localhost:%s', args.port)
×
370
    else:
371
        http_server = tornado.httpserver.HTTPServer(application)
1✔
372
        logging.info('Serving on all interfaces/families, e.g. http://localhost:%s', args.port)
1✔
373

374
    signal.signal(signal.SIGTERM, sig_handler)
1✔
375
    signal.signal(signal.SIGINT, sig_handler)
1✔
376

377
    http_server.bind(args.port)
1✔
378
    http_server.start(args.num_processes)
1✔
379

380
    loop = tornado.ioloop.IOLoop.instance()
1✔
381
    wd = systemd.setup_watchdog()
1✔
382
    if wd is not None:
1!
383
        wd.systemd_ready()
×
384
        logging.info('Initialised systemd watchdog, pinging every {}s'.format(1000 * wd.period))
×
385
        tornado.ioloop.PeriodicCallback(wd.watchdog_ping, 1000 * wd.period).start()
×
386
    loop.start()
1✔
387

388

389
if __name__ == '__main__':
1!
390
    main()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc