• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

apertium / apertium-apy / 3856155878

pending completion
3856155878

Pull #207

github

GitHub
Merge b3a03d427 into 784ebd9d4
Pull Request #207: Option to use fasttext for language identification

363 of 913 branches covered (39.76%)

Branch coverage included in aggregate %.

33 of 33 new or added lines in 2 files covered. (100.0%)

1250 of 2274 relevant lines covered (54.97%)

0.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

46.55
/apertium_apy/handlers/identify_lang.py
1
from datetime import timedelta
1✔
2

3
from tornado import gen
1✔
4

5
try:
1✔
6
    import fasttext
1✔
7
except ImportError:
×
8
    fasttext = None
×
9
try:
1✔
10
    import cld2full as cld2  # type: ignore
1✔
11
except ImportError:
1✔
12
    cld2 = None
1✔
13

14
from apertium_apy.handlers.base import BaseHandler
1✔
15
from apertium_apy.utils import get_coverages, to_alpha3_code
1✔
16

17

18
def fasttext_strip_prefix(s):
1✔
19
    """Remove the initial __label__ prefix"""
20
    return s[9:]
×
21

22

23
def fasttext_identify(model, text):
1✔
24
    # grab a bunch since currently the model might predict stuff outside possible_langs – it's still fast:
25
    results = model.predict(text, k=200, threshold=0.001)
×
26
    if results[0]:
×
27
        possible_langs = zip(map(fasttext_strip_prefix, results[0]),
×
28
                             results[1])
29
        return {to_alpha3_code(possible_lang[0]): possible_lang[1]
×
30
                for possible_lang in possible_langs}
31
    else:
32
        return {'nob': 1.0}  # TODO: better default
×
33

34

35
def cld_identify(text):
1✔
36
    cld_results = cld2.detect(text)
×
37
    if cld_results[0]:
×
38
        possible_langs = filter(lambda x: x[1] != 'un', cld_results[2])
×
39
        return {to_alpha3_code(possible_lang[1]): possible_lang[2]
×
40
                for possible_lang in possible_langs}
41
    else:
42
        return {'nob': 1.0}  # TODO: better default
×
43

44

45
class IdentifyLangHandler(BaseHandler):
1✔
46
    fasttext = None
1✔
47

48
    @gen.coroutine
1✔
49
    def get(self):
1✔
50
        text = self.get_argument('q')
1✔
51
        if not text:
1!
52
            return self.send_error(400, explanation='Missing q argument')
×
53

54
        if self.fasttext is not None:
1!
55
            self.send_response(fasttext_identify(self.fasttext, text))
×
56
        elif cld2:
1!
57
            self.send_response(cld_identify(text))
×
58
        else:
59
            try:
1✔
60
                coverages = yield gen.with_timeout(
1✔
61
                    timedelta(seconds=self.timeout),
62
                    get_coverages(text, self.analyzers, penalize=True),
63
                )
64
                self.send_response(coverages)
1✔
65

66
            except gen.TimeoutError:
×
67
                self.send_error(408, explanation='Request timed out')
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc