• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PyThaiNLP / pythainlp / 4497445699

pending completion
4497445699

push

github

Wannaphong Phatthiyaphaibun
Add Thai NER 1.5

3 of 3 new or added lines in 1 file covered. (100.0%)

41 of 6145 relevant lines covered (0.67%)

0.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/pythainlp/cli/data.py
1
"""
2
thainlp dataset/corpus management command line.
3
"""
4
import argparse
×
5

6
from pythainlp import cli, corpus
×
7
from pythainlp.tools import get_pythainlp_data_path
×
8

9

10
class App:
×
11
    def __init__(self, argv):
×
12
        parser = argparse.ArgumentParser(
×
13
            prog="data",
14
            description="Manage dataset/corpus.",
15
            usage=(
16
                "thainlp data <subcommand>\n\n"
17
                "subcommands:\n\n"
18
                "catalog                show list of available datasets\n"
19
                "info <dataset_name>    show information about the dataset\n"
20
                "get <dataset_name>     download the dataset\n"
21
                "rm <dataset_name>      remove the dataset\n"
22
                "path                   show full path to data directory\n\n"
23
                "Example:\n\n"
24
                "thainlp data get thai2fit_wv\n\n"
25
                "Current data path:\n\n"
26
                f"{get_pythainlp_data_path()}\n\n"
27
                "To change PyThaiNLP data path, set the operating system's\n"
28
                "PYTHAINLP_DATA_DIR environment variable.\n\n"
29
                "For more information about corpora that PyThaiNLP use, see:\n"
30
                "https://github.com/PyThaiNLP/pythainlp-corpus/\n\n"
31
                "--"
32
            ),
33
        )
34
        parser.add_argument(
×
35
            "subcommand",
36
            type=str,
37
            choices=["catalog", "info", "get", "rm", "path"],
38
            help="action on dataset/corpus",
39
        )
40
        args = parser.parse_args(argv[2:3])
×
41
        getattr(self, args.subcommand)(argv)
×
42

43
    def get(self, argv):
×
44
        parser = argparse.ArgumentParser(
×
45
            description="Download a dataset",
46
            usage="thainlp data get <dataset_name>",
47
        )
48
        parser.add_argument(
×
49
            "dataset_name",
50
            type=str,
51
            help="dataset/corpus's name",
52
        )
53
        args = parser.parse_args(argv[3:])
×
54
        if corpus.download(args.dataset_name):
×
55
            print("Downloaded successfully.")
×
56
        else:
57
            print("Not found.")
×
58

59
    def rm(self, argv):
×
60
        parser = argparse.ArgumentParser(
×
61
            description="Remove a dataset",
62
            usage="thainlp data rm <dataset_name>",
63
        )
64
        parser.add_argument(
×
65
            "dataset_name",
66
            type=str,
67
            help="dataset/corpus's name",
68
        )
69
        args = parser.parse_args(argv[3:])
×
70
        if corpus.remove(args.dataset_name):
×
71
            print("Removed successfully.")
×
72
        else:
73
            print("Not found.")
×
74

75
    def info(self, argv):
×
76
        parser = argparse.ArgumentParser(
×
77
            description="Print information about a dataset",
78
            usage="thainlp data info <dataset_name>",
79
        )
80
        parser.add_argument(
×
81
            "dataset_name",
82
            type=str,
83
            help="dataset/corpus's name",
84
        )
85
        args = parser.parse_args(argv[3:])
×
86
        info = corpus.get_corpus_db_detail(args.dataset_name)
×
87
        if info:
×
88
            print(info)
×
89
        else:
90
            print("Not found.")
×
91

92
    def catalog(self, argv):
×
93
        """Print dataset/corpus available for download."""
94
        corpus_db = corpus.get_corpus_db(corpus.corpus_db_url())
×
95
        corpus_db = corpus_db.json()
×
96
        corpus_names = sorted(corpus_db.keys())
×
97
        print("Dataset/corpus available for download:")
×
98
        for name in corpus_names:
×
99
            print(f"- {name} {corpus_db[name]['latest_version']}", end="")
×
100
            corpus_info = corpus.get_corpus_db_detail(name)
×
101
            if corpus_info:
×
102
                print(f"  (Local: {corpus_info['version']})")
×
103
            else:
104
                print()
×
105

106
        print(
×
107
            "\nUse subcommand 'get' to download a dataset.\n\n"
108
            "Example: thainlp data get crfcut\n"
109
        )
110

111
    def path(self, argv):
×
112
        """Print path for local dataset."""
113
        print(get_pythainlp_data_path())
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc