• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PyThaiNLP / pythainlp / 11625814262

01 Nov 2024 07:14AM UTC coverage: 20.782% (+20.8%) from 0.0%
11625814262

Pull #952

github

web-flow
Merge c8385dcae into 515fe7ced
Pull Request #952: Specify a limited test suite

45 of 80 new or added lines in 48 files covered. (56.25%)

1537 of 7396 relevant lines covered (20.78%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/pythainlp/spell/symspellpy.py
1
# -*- coding: utf-8 -*-
2
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
# SPDX-License-Identifier: Apache-2.0
4
"""
5
symspellpy
6

7
symspellpy is a Python port of SymSpell v6.5.
8
We used unigram & bigram from Thai National Corpus (TNC).
9

10
:See Also:
11
    * \
12
        https://github.com/mammothb/symspellpy
13
"""
14
from typing import List
×
15

16
from symspellpy import SymSpell, Verbosity
×
17

NEW
18
from pythainlp.corpus import get_corpus_path, path_pythainlp_corpus
×
19

20
_UNIGRAM = "tnc_freq.txt"
×
21
_BIGRAM = "tnc_bigram_word_freqs"
×
22

23
sym_spell = SymSpell()
×
24
sym_spell.load_dictionary(
×
25
    path_pythainlp_corpus(_UNIGRAM), 0, 1, separator="\t", encoding="utf-8-sig"
26
)
27
sym_spell.load_bigram_dictionary(
×
28
    get_corpus_path(_BIGRAM), 0, 2, separator="\t", encoding="utf-8-sig"
29
)
30

31

32
def spell(text: str, max_edit_distance: int = 2) -> List[str]:
×
33
    return [
×
34
        str(i).split(",", maxsplit=1)[0]
35
        for i in list(
36
            sym_spell.lookup(
37
                text, Verbosity.CLOSEST, max_edit_distance=max_edit_distance
38
            )
39
        )
40
    ]
41

42

43
def correct(text: str, max_edit_distance: int = 1) -> str:
×
44
    return spell(text, max_edit_distance=max_edit_distance)[0]
×
45

46

47
def spell_sent(list_words: List[str], max_edit_distance: int = 2) -> List[str]:
×
48
    _temp = [
×
49
        str(i).split(",", maxsplit=1)[0].split(" ")
50
        for i in list(
51
            sym_spell.lookup_compound(
52
                " ".join(list_words),
53
                split_by_space=True,
54
                max_edit_distance=max_edit_distance,
55
            )
56
        )
57
    ]
58
    list_new = []
×
59
    for i in _temp:
×
60
        list_new.append(i)
×
61

62
    return list_new
×
63

64

65
def correct_sent(list_words: List[str], max_edit_distance=1) -> List[str]:
×
66
    return [
×
67
        i[0]
68
        for i in spell_sent(list_words, max_edit_distance=max_edit_distance)
69
    ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc