• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PyThaiNLP / pythainlp / 4699361508

pending completion
4699361508

push

github

GitHub
Merge pull request #789 from PyThaiNLP/4.0

22 of 22 new or added lines in 6 files covered. (100.0%)

5749 of 6246 relevant lines covered (92.04%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.33
/pythainlp/tokenize/tltk.py
1
# -*- coding: utf-8 -*-
2
# Copyright (C) 2016-2023 PyThaiNLP Project
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
from typing import List
1✔
16
try:
1✔
17
    from tltk.nlp import word_segment as tltk_segment
1✔
18
    from tltk.nlp import syl_segment
1✔
19
except ImportError:
×
20
    raise ImportError("Not found tltk! Please install tltk by pip install tltk")
×
21

22

23
def segment(text: str) -> List[str]:
1✔
24
    if not text or not isinstance(text, str):
1✔
25
        return []
1✔
26
    text = text.replace(" ", "<u/>")
1✔
27
    _temp = tltk_segment(text).replace("<u/>", " ").replace("<s/>", "")
1✔
28
    _temp = _temp.split("|")
1✔
29
    if _temp[-1] == "":
1✔
30
        del _temp[-1]
1✔
31
    return _temp
1✔
32

33

34
def syllable_tokenize(text: str) -> List[str]:
1✔
35
    if not text or not isinstance(text, str):
1✔
36
        return []
1✔
37
    _temp = syl_segment(text)
1✔
38
    _temp = _temp.split("~")
1✔
39
    if _temp[-1] == "<s/>":
1✔
40
        del _temp[-1]
1✔
41
    return _temp
1✔
42

43

44
def sent_tokenize(text: str) -> List[str]:
1✔
45
    text = text.replace(" ", "<u/>")
1✔
46
    _temp = tltk_segment(text).replace("<u/>", " ").replace("|", "")
1✔
47
    _temp = _temp.split("<s/>")
1✔
48
    if _temp[-1] == "":
1✔
49
        del _temp[-1]
1✔
50
    return _temp
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc