• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apertium / apertium-apy / 15767027851

19 Jun 2025 10:07PM UTC coverage: 49.365%. First build
15767027851

Pull #247

github

Pull Request #247: bilsearch mode

261 of 756 branches covered (34.52%)

Branch coverage included in aggregate %.

19 of 69 new or added lines in 6 files covered. (27.54%)

1294 of 2394 relevant lines covered (54.05%)

0.54 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

56.16
/apertium_apy/apy.py
1
#!/usr/bin/env python3
2
# coding=utf-8
3
# -*- indent-tabs-mode: nil -*-
4

5
__author__ = 'Kevin Brubeck Unhammer, Sushain K. Cherivirala'
1✔
6
__copyright__ = 'Copyright 2013--2025, Kevin Brubeck Unhammer, Sushain K. Cherivirala'
1✔
7
__credits__ = ['Kevin Brubeck Unhammer', 'Sushain K. Cherivirala', 'Jonathan North Washington', 'Xavi Ivars', 'Shardul Chiplunkar']
1✔
8
__license__ = 'GPLv3'
1✔
9
__status__ = 'Beta'
1✔
10
__version__ = '0.13.0'
1✔
11

12
import argparse
1✔
13
import configparser
1✔
14
import logging
1✔
15
import os
1✔
16
import re
1✔
17
import signal
1✔
18
import sys
1✔
19
from importlib import util as importlib_util
1✔
20
from datetime import timedelta
1✔
21
from logging import handlers as logging_handlers  # type: ignore
1✔
22

23
import tornado
1✔
24
import tornado.httpserver
1✔
25
import tornado.httputil
1✔
26
import tornado.iostream
1✔
27
import tornado.process
1✔
28
import tornado.web
1✔
29
from tornado.locks import Semaphore
1✔
30
from tornado.log import enable_pretty_logging
1✔
31

32
from typing import Sequence, Iterable, Type, List, Tuple, Any  # noqa: F401
1✔
33

34
from apertium_apy import BYPASS_TOKEN, missing_freqs_db  # noqa: F401
1✔
35
from apertium_apy import missingdb
1✔
36
from apertium_apy import systemd
1✔
37
from apertium_apy.mode_search import search_path, search_prefs
1✔
38
from apertium_apy.utils.wiki import wiki_login, wiki_get_token
1✔
39

40
from apertium_apy.handlers import (
1✔
41
    AnalyzeHandler,
42
    BaseHandler,
43
    CoverageHandler,
44
    GenerateHandler,
45
    GuesserHandler,
46
    IdentifyLangHandler,
47
    ListHandler,
48
    ListLanguageNamesHandler,
49
    PerWordHandler,
50
    PipeDebugHandler,
51
    SpellerHandler,
52
    StatsHandler,
53
    SuggestionHandler,
54
    TranslateChainHandler,
55
    TranslateDocHandler,
56
    TranslateHandler,
57
    PairPrefsHandler,
58
    TranslateRawHandler,
59
    TranslateWebpageHandler,
60
)
61

62

63
def sig_handler(sig, frame):
64
    global missing_freqs_db
1✔
65
    if missing_freqs_db is not None:
66
        if 'children' in frame.f_locals:
1!
67
            for child in frame.f_locals['children']:
×
68
                os.kill(child, signal.SIGTERM)
×
69
            missing_freqs_db.commit()
×
70
        else:
×
71
            # we are one of the children
72
            missing_freqs_db.commit()
73
        missing_freqs_db.close_db()
×
74
    logging.warning('Caught signal: %s', sig)
×
75
    exit()
1✔
76

1✔
77

78
class RootHandler(BaseHandler):
79
    def get(self):
1✔
80
        self.render('../index.html')
1✔
81

1✔
82

83
class GetLocaleHandler(BaseHandler):
84
    def get(self):
1✔
85
        if 'Accept-Language' in self.request.headers:
1✔
86
            locales = [locale.split(';')[0] for locale in self.request.headers['Accept-Language'].split(',')]
×
87
            self.send_response(locales)
×
88
        else:
×
89
            self.send_error(400, explanation='Accept-Language missing from request headers')
90

×
91

92
def setup_handler(
93
    pairs_path, nonpairs_path, lang_names, missing_freqs_path, timeout,
1✔
94
    max_pipes_per_pair, min_pipes_per_pair, max_users_per_pipe, max_idle_secs,
95
    restart_pipe_after, max_doc_pipes, verbosity=0, scale_mt_logs=False,
96
    memory=1000, apy_keys=None,
97
):
98

99
    global missing_freqs_db
100
    if missing_freqs_path:
101
        missing_freqs_db = missingdb.MissingDb(missing_freqs_path, memory)
1!
102

×
103
    handler = BaseHandler
104
    handler.lang_names = lang_names
1✔
105
    handler.timeout = timeout
1✔
106
    handler.max_pipes_per_pair = max_pipes_per_pair
1✔
107
    handler.min_pipes_per_pair = min_pipes_per_pair
1✔
108
    handler.max_users_per_pipe = max_users_per_pipe
1✔
109
    handler.max_idle_secs = max_idle_secs
1✔
110
    handler.restart_pipe_after = restart_pipe_after
1✔
111
    handler.scale_mt_logs = scale_mt_logs
1✔
112
    handler.verbosity = verbosity
1✔
113
    handler.doc_pipe_sem = Semaphore(max_doc_pipes)
1✔
114
    handler.api_keys_conf = apy_keys
1✔
115

1✔
116
    modes = search_path(pairs_path, verbosity=verbosity)
117
    if nonpairs_path:
1✔
118
        src_modes = search_path(nonpairs_path, include_pairs=False, verbosity=verbosity)
1!
119
        for mtype in modes:
1✔
120
            modes[mtype] += src_modes[mtype]
1✔
121
    handler.pairprefs = search_prefs(pairs_path)
1✔
122

1✔
123
    for mtype in modes:
124
        logging.info('%d %s modes found', len(modes[mtype]), mtype)
1✔
125

1✔
126
    for path, lang_src, lang_trg in modes['pair']:
127
        handler.pairs['%s-%s' % (lang_src, lang_trg)] = path
1✔
128
    for dirpath, modename, lang_pair in modes['analyzer']:
1✔
129
        handler.analyzers[lang_pair] = (dirpath, modename)
1✔
130
    for dirpath, modename, lang_pair in modes['generator']:
1✔
131
        handler.generators[lang_pair] = (dirpath, modename)
1✔
132
    for dirpath, modename, lang_pair in modes['tagger']:
1✔
133
        handler.taggers[lang_pair] = (dirpath, modename)
1✔
134
    for dirpath, modename, lang_src in modes['spell']:
1✔
135
        if (any(lang_src == elem[2] for elem in modes['tokenise'])):
1!
136
            handler.spellers[lang_src] = (dirpath, modename)
×
137
    for dirpath, modename, lang_pair in modes['guesser']:
×
138
        handler.guessers[lang_pair] = (dirpath, modename)
1!
139

×
140
    handler.init_pairs_graph()
1!
NEW
141
    handler.init_paths()
×
142

143

1✔
144
def check_utf8():
1✔
145
    locale_vars = ['LANG', 'LC_ALL']
146
    u8 = re.compile('UTF-?8', re.IGNORECASE)
147
    if not any(re.search(u8, os.environ.get(key, '')) for key in locale_vars):
1✔
148
        logging.fatal('apy.py: APy needs a UTF-8 locale, please set LANG or LC_ALL')
1✔
149
        sys.exit(1)
1✔
150

1!
151

×
152
def apply_config(args, parser, apy_section):
×
153
    for (name, value) in vars(args).items():
154
        if name in apy_section:
155
            # Get default from private variables of argparse
1✔
156
            default = None
×
157
            for action in parser._actions:  # type: ignore
×
158
                if action.dest == name:
159
                    default = action.default
×
160

×
161
            # Try typecasting string to type of argparse argument
×
162
            fn = type(value)
×
163
            res = None
164
            try:
165
                if fn is None:
×
166
                    if apy_section[name] == 'None':
×
167
                        res = None
×
168
                    else:
×
169
                        res = apy_section[name]
×
170
                elif fn is bool:
×
171
                    if apy_section[name] == 'False':
172
                        res = False
×
173
                    elif apy_section[name] == 'True':
×
174
                        res = True
×
175
                    else:
×
176
                        res = bool(apy_section[name])
×
177
                else:
×
178
                    res = fn(apy_section[name])
179
            except ValueError:
×
180
                print('Warning: Unable to cast {} to expected type'.format(apy_section[name]))
181

×
182
            # only override is value (argument) is default
×
183
            if res is not None and value == default:
×
184
                setattr(args, name, res)
185

186

×
187
def parse_args(cli_args=sys.argv[1:]):
×
188
    parser = argparse.ArgumentParser(description='Apertium APY -- API server for machine translation and language analysis')
189
    parser.add_argument('pairs_path', help='path to Apertium installed pairs (all modes files in this path are included)')
190
    parser.add_argument('-s', '--nonpairs-path', help='path to Apertium tree (only non-translator debug modes are included from this path)')
1✔
191
    parser.add_argument('-l', '--lang-names',
1✔
192
                        help='path to localised language names sqlite database (default = langNames.db)', default='langNames.db')
1✔
193
    parser.add_argument('-F', '--fasttext-model',
1✔
194
                        help='path to fastText language identification model (e.g. lid.release.ftz)')
1✔
195
    parser.add_argument('-f', '--missing-freqs', help='path to missing word frequency sqlite database (default = None)', default=None)
196
    parser.add_argument('-p', '--port', help='port to run server on (default = 2737)', type=int, default=2737)
1✔
197
    parser.add_argument('-c', '--ssl-cert', help='path to SSL Certificate', default=None)
198
    parser.add_argument('-k', '--ssl-key', help='path to SSL Key File', default=None)
1✔
199
    parser.add_argument('-t', '--timeout', help='timeout for requests (default = 10)', type=int, default=10)
1✔
200
    parser.add_argument('-j', '--num-processes',
1✔
201
                        help='number of processes to run (default = 1; use 0 to run one http server per core, '
1✔
202
                             'where each http server runs all available language pairs)',
1✔
203
                        nargs='?', type=int, default=1)
1✔
204
    parser.add_argument('-d', '--daemon',
205
                        help='daemon mode: redirects stdout and stderr to files apertium-apy.log and apertium-apy.err; use with --log-path',
206
                        action='store_true')
207
    parser.add_argument('-P', '--log-path', help='path to log output files to in daemon mode; defaults to local directory', default='./')
1✔
208
    parser.add_argument('-i', '--max-pipes-per-pair',
209
                        help='how many pipelines we can spin up per language pair (default = 1)', type=int, default=1)
210
    parser.add_argument('-n', '--min-pipes-per-pair',
1✔
211
                        help='when shutting down pipelines, keep at least this many open per language pair (default = 0)',
1✔
212
                        type=int, default=0)
213
    parser.add_argument('-u', '--max-users-per-pipe',
1✔
214
                        help='how many concurrent requests per pipeline before we consider spinning up a new one (default = 5)',
215
                        type=int, default=5)
216
    parser.add_argument('-m', '--max-idle-secs',
1✔
217
                        help='if specified, shut down pipelines that have not been used in this many seconds', type=int, default=0)
218
    parser.add_argument('-r', '--restart-pipe-after',
219
                        help='restart a pipeline if it has had this many requests (default = 1000)', type=int, default=1000)
1✔
220
    parser.add_argument('-v', '--verbosity', help='logging verbosity', type=int, default=0)
221
    parser.add_argument('-V', '--version', help='show APY version', action='version', version='%(prog)s version ' + __version__)
1✔
222
    parser.add_argument('-S', '--scalemt-logs', help='generates ScaleMT-like logs; use with --log-path; disables', action='store_true')
223
    parser.add_argument('-M', '--unknown-memory-limit',
1✔
224
                        help='keeps unknown words in memory until a limit is reached; use with --missing-freqs (default = 1000)',
1✔
225
                        type=int, default=1000)
1✔
226
    parser.add_argument('-T', '--stat-period-max-age',
1✔
227
                        help='How many seconds back to keep track request timing stats (default = 3600)', type=int, default=3600)
228
    parser.add_argument('-wp', '--wiki-password', help='Apertium Wiki account password for SuggestionHandler', default=None)
229
    parser.add_argument('-wu', '--wiki-username', help='Apertium Wiki account username for SuggestionHandler', default=None)
1✔
230
    parser.add_argument('-b', '--bypass-token', help='ReCAPTCHA bypass token', action='store_true')
231
    parser.add_argument('-rs', '--recaptcha-secret', help='ReCAPTCHA secret for suggestion validation', default=None)
1✔
232
    parser.add_argument('-md', '--max-doc-pipes',
1✔
233
                        help='how many concurrent document translation pipelines we allow (default = 3)', type=int, default=3)
1✔
234
    parser.add_argument('-C', '--config', help='Configuration file to load options from', default=None)
1✔
235
    parser.add_argument('-ak', '--api-keys', help='Configuration file to load API keys', default=None)
1✔
236

237
    args = parser.parse_args(cli_args)
1✔
238

1✔
239
    if args.config:
240
        conf = configparser.ConfigParser()
1✔
241
        conf.read(args.config)
242

1!
243
        if not os.path.isfile(args.config):
×
244
            logging.warning('Configuration file does not exist,'
×
245
                            ' please see https://wiki.apertium.org/'
246
                            'wiki/Apy#Configuration for more information')
×
247
        elif 'APY' not in conf:
×
248
            logging.warning('Configuration file does not have APY section,'
249
                            ' please see https://wiki.apertium.org/'
250
                            'wiki/Apy#Configuration for more information')
×
251
        else:
×
252
            logging.info('Using configuration file ' + args.config)
253
            apy_section = conf['APY']
254
            apply_config(args, parser, apy_section)
255

×
256
    return args
×
257

×
258

259
def setup_application(args):
1✔
260
    if args.stat_period_max_age:
261
        BaseHandler.stat_period_max_age = timedelta(0, args.stat_period_max_age, 0)
262

1✔
263
    setup_handler(args.pairs_path, args.nonpairs_path, args.lang_names, args.missing_freqs, args.timeout,
1!
264
                  args.max_pipes_per_pair, args.min_pipes_per_pair, args.max_users_per_pipe, args.max_idle_secs,
1✔
265
                  args.restart_pipe_after, args.max_doc_pipes, args.verbosity, args.scalemt_logs,
266
                  args.unknown_memory_limit, args.api_keys)
1✔
267

268
    handlers = [
269
        (r'/', RootHandler),
270
        (r'/list', ListHandler),
271
        (r'/listPairs', ListHandler),
1✔
272
        (r'/stats', StatsHandler),
273
        (r'/pairprefs', PairPrefsHandler),
274
        (r'/translate', TranslateHandler),
275
        (r'/translateChain', TranslateChainHandler),
276
        (r'/translateDoc', TranslateDocHandler),
277
        (r'/translatePage', TranslateWebpageHandler),
278
        (r'/translateRaw', TranslateRawHandler),
279
        (r'/analy[sz]e', AnalyzeHandler),
280
        (r'/guesser', GuesserHandler),
281
        (r'/generate', GenerateHandler),
282
        (r'/listLanguageNames', ListLanguageNamesHandler),
283
        (r'/perWord', PerWordHandler),
284
        (r'/calcCoverage', CoverageHandler),
285
        (r'/identifyLang', IdentifyLangHandler),
286
        (r'/getLocale', GetLocaleHandler),
287
        (r'/pipedebug', PipeDebugHandler),
288
    ]  # type: List[Tuple[str, Type[tornado.web.RequestHandler]]]
289

290
    if importlib_util.find_spec('streamparser'):
291
        handlers.append((r'/speller', SpellerHandler))
292

293
    if all([args.wiki_username, args.wiki_password]) and importlib_util.find_spec('requests'):
294
        import requests
1!
295
        logging.info('Logging into Apertium Wiki with username %s', args.wiki_username)
1✔
296

297
        SuggestionHandler.SUGGEST_URL = 'User:' + args.wiki_username
1!
298
        SuggestionHandler.recaptcha_secret = args.recaptcha_secret
×
299
        SuggestionHandler.wiki_session = requests.Session()
×
300
        SuggestionHandler.auth_token = wiki_login(
301
            SuggestionHandler.wiki_session,
×
302
            args.wiki_username,
×
303
            args.wiki_password)
×
304
        SuggestionHandler.wiki_edit_token = wiki_get_token(
×
305
            SuggestionHandler.wiki_session, 'edit', 'info|revisions')
306

307
        handlers.append((r'/suggest', SuggestionHandler))
308

×
309
    if args.fasttext_model and importlib_util.find_spec('fasttext') is not None:
310
        import fasttext
311
        IdentifyLangHandler.fasttext = fasttext.FastText.load_model(args.fasttext_model)
×
312

313
    # TODO: fix mypy. Application expects List but List is invariant and we use subclasses
1!
314
    return tornado.web.Application(handlers)  # type:ignore
×
315

×
316

317
def setup_logging(args):
318
    if args.daemon:
1✔
319
        # regular content logs are output stderr
320
        # python messages are mostly output to stdout
321
        # hence swapping the filenames?
1✔
322
        logfile = os.path.join(args.log_path, 'apertium-apy.log')
1!
323
        errfile = os.path.join(args.log_path, 'apertium-apy.err')
324
        sys.stderr = open(logfile, 'a+')
325
        sys.stdout = open(errfile, 'a+')
326
        logging.basicConfig(filename=logfile, filemode='a')  # NB. Needs to happen *before* we use logs for anything
×
327
        logging.getLogger().setLevel(logging.INFO)
×
328
    if args.scalemt_logs:
×
329
        logger = logging.getLogger('scale-mt')
×
330
        logger.propagate = False
×
331
        smtlog = os.path.join(args.log_path, 'ScaleMTRequests.log')
×
332
        logging_handler = logging_handlers.TimedRotatingFileHandler(smtlog, 'midnight', 0)
1!
333
        # internal attribute, should not use
×
334
        logging_handler.suffix = '%Y-%m-%d'  # type: ignore
×
335
        logger.addHandler(logging_handler)
×
336
        # if scalemt_logs is enabled, disable tornado.access logs
×
337
        if args.daemon:
338
            logging.getLogger('tornado.access').propagate = False
×
339
    enable_pretty_logging()
×
340

341

×
342
def main():
×
343
    check_utf8()
1✔
344
    args = parse_args()
345
    setup_logging(args)  # before we start logging anything!
346

1✔
347
    if importlib_util.find_spec('fasttext') is None:
1✔
348
        logging.warning('Unable to import fastText, trying CLD2')
1✔
349
        if importlib_util.find_spec('cld2full') is None:
1✔
350
            logging.warning('Unable to import CLD2, continuing using naive method of language identification')
351
    elif not args.fasttext_model:
1!
352
        logging.warning('Have fasttext lib, but started without --fasttext-model, not using fastText for language identification')
×
353

×
354
    if importlib_util.find_spec('chardet') is None:
×
355
        logging.warning('Unable to import chardet, assuming utf-8 encoding for all websites')
1!
356

1✔
357
    if importlib_util.find_spec('streamparser') is None:
358
        logging.warning('Apertium streamparser not installed, spelling handler disabled')
1!
359

×
360
    if importlib_util.find_spec('requests') is None:
361
        logging.warning('requests not installed, suggestions disabled')
1!
362

×
363
    if args.bypass_token:
364
        logging.info('reCaptcha bypass for testing: %s', BYPASS_TOKEN)
1!
365

×
366
    application = setup_application(args)
367

1!
368
    if args.ssl_cert and args.ssl_key:
×
369
        http_server = tornado.httpserver.HTTPServer(application, ssl_options={
370
            'certfile': args.ssl_cert,
1✔
371
            'keyfile': args.ssl_key,
372
        })
1!
373
        logging.info('Serving on all interfaces/families, e.g. https://localhost:%s', args.port)
×
374
    else:
375
        http_server = tornado.httpserver.HTTPServer(application)
376
        logging.info('Serving on all interfaces/families, e.g. http://localhost:%s', args.port)
377

×
378
    signal.signal(signal.SIGTERM, sig_handler)
379
    signal.signal(signal.SIGINT, sig_handler)
1✔
380

1✔
381
    http_server.bind(args.port)
382
    http_server.start(args.num_processes)
1✔
383

1✔
384
    loop = tornado.ioloop.IOLoop.instance()
385
    wd = systemd.setup_watchdog()
1✔
386
    if wd is not None:
1✔
387
        wd.systemd_ready()
388
        logging.info('Initialised systemd watchdog, pinging every {}s'.format(1000 * wd.period))
1✔
389
        tornado.ioloop.PeriodicCallback(wd.watchdog_ping, 1000 * wd.period).start()
1✔
390
    loop.start()
1!
391

×
392

×
393
if __name__ == '__main__':
×
394
    main()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc