github
22 of 24 new or added lines in 1 file covered. (91.67%)
51 existing lines in 8 files now uncovered.6221 of 7330 relevant lines covered (84.87%)
0.85 hits per line
1 |
# -*- coding: utf-8 -*-
|
|
2 |
"""
|
1✔ |
3 |
esupar: Tokenizer, POS tagger and dependency parser with BERT/RoBERTa/DeBERTa models for Japanese and other languages |
|
4 |
|
|
5 |
GitHub: https://github.com/KoichiYasuoka/esupar |
|
6 |
"""
|
|
7 |
from typing import List, Union |
1✔ |
8 |
|
|
9 |
try:
|
1✔ |
10 |
import esupar |
1✔ |
11 |
except ImportError: |
1✔ |
12 |
raise ImportError("Import Error; Install esupar by pip install esupar") |
1✔ |
13 |
|
|
14 |
|
|
UNCOV
15
|
class Parse: |
× |
UNCOV
16
|
def __init__(self, model: str = "th") -> None: |
× |
UNCOV
17
|
if model is None: |
× |
UNCOV
18
|
model = "th"
|
× |
UNCOV
19
|
self.nlp = esupar.load(model)
|
× |
20 |
|
|
UNCOV
21
|
def __call__( |
× |
22 |
self, text: str, tag: str = "str" |
|
23 |
) -> Union[List[List[str]], str]: |
|
UNCOV
24
|
_data = str(self.nlp(text)) |
× |
UNCOV
25
|
if tag == "list": |
× |
UNCOV
26
|
_temp = _data.splitlines() |
× |
UNCOV
27
|
_tag_data = [] |
× |
UNCOV
28
|
for i in _temp: |
× |
UNCOV
29
|
_tag_data.append(i.split()) |
× |
UNCOV
30
|
return _tag_data
|
× |
UNCOV
31
|
return _data
|
× |