• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

INGEOTEC / IngeoDash / 5615006502

pending completion
5615006502

push

github

mgraffg
Tests

1 of 1 new or added line in 1 file covered. (100.0%)

326 of 338 relevant lines covered (96.45%)

3.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.98
/IngeoDash/annotate.py
1
# Copyright 2023 Mario Graff Guerrero
2

3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6

7
#     http://www.apache.org/licenses/LICENSE-2.0
8

9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
from EvoMSA import BoW, DenseBoW, StackGeneralization
4✔
15
from typing import Union
4✔
16
from IngeoDash.config import Config
4✔
17
from IngeoDash.config import CONFIG
4✔
18
from sklearn.svm import LinearSVC
4✔
19
import numpy as np
4✔
20

21

22
def has_label(mem: Config, x):
4✔
23
    if mem.label_header in x:
4✔
24
        ele = x[mem.label_header]
4✔
25
        if ele is not None and len(f'{ele}'):
4✔
26
            return True
4✔
27
    return False
4✔
28

29

30
def model(mem: Config, data: dict, select: bool=True):
4✔
31
    lang = mem[mem.lang]
4✔
32
    if lang not in CONFIG.denseBoW:
4✔
33
        dense = DenseBoW(lang=lang, voc_size_exponent=mem.voc_size_exponent,
×
34
                         voc_selection=mem.voc_selection,
35
                         n_jobs=mem.n_jobs, dataset=False)
36
        CONFIG.denseBoW[lang] = dense.text_representations
×
37
    dense = DenseBoW(lang=lang, key=mem.text,
4✔
38
                     label_key=mem.label_header,
39
                     voc_size_exponent=mem.voc_size_exponent,
40
                     voc_selection=mem.voc_selection,
41
                     n_jobs=mem.n_jobs,
42
                     dataset=False, emoji=False, keyword=False)
43
    dense.text_representations_extend(CONFIG.denseBoW[lang])
4✔
44
    if select:
4✔
45
        dense.select(D=data)
4✔
46
    _ = np.unique([x[mem.label_header] for x in data],
4✔
47
                  return_counts=True)[1]
48
    if np.any(_ < 5):
4✔
49
        return dense.fit(data)
4✔
50
    bow = BoW(lang=lang, key=mem.text,
4✔
51
              label_key=mem.label_header,
52
              voc_size_exponent=mem.voc_size_exponent,
53
              voc_selection=mem.voc_selection)
54
    stack = StackGeneralization(decision_function_models=[bow, dense],
4✔
55
                                decision_function_name=mem.decision_function_name,
56
                                estimator_class=mem.estimator_class)
57
    return stack.fit(data)
4✔
58
    
59

60

61
def active_learning_selection(mem: Config):
4✔
62
    db = CONFIG.db[mem[mem.username]]
4✔
63
    dense = model(mem, db[mem.permanent])  
4✔
64
    D = db[mem.data] + db.get(mem.original, list())
4✔
65
    hy = dense.decision_function(D)
4✔
66
    if len(mem[mem.labels]) > 2:
4✔
67
        index = np.arange(hy.shape[0])
4✔
68
        ss = np.argsort(hy, axis=1)
4✔
69
        diff = hy[index, ss[:, -1]] - hy[index, ss[:, -2]]
4✔
70
        index = np.argsort(diff)[:mem.n_value]
4✔
71
        index.sort()
4✔
72
        labels = np.array(mem[mem.labels])
4✔
73
        klasses = labels[hy[index].argmax(axis=1)]
4✔
74
    else:
75
        index = np.argsort(np.fabs(hy[:, 0]))[:mem.n_value]
4✔
76
        index.sort()
4✔
77
        labels = np.array(mem[mem.labels])
4✔
78
        klasses = labels[np.where(hy[:, 0][index] > 0, 1, 0)]
4✔
79
    data = []
4✔
80
    for cnt, i in enumerate(index):
4✔
81
        ele = D.pop(i - cnt)
4✔
82
        ele[mem.label_header] = klasses[cnt]
4✔
83
        data.append(ele)
4✔
84
    db[mem.original] = D
4✔
85
    db[mem.data] = data
4✔
86

87

88
def label_column_predict(mem: Config, model=None):
4✔
89
    db = CONFIG.db[mem[mem.username]]
4✔
90
    data = db[mem.data]
4✔
91
    if len(data) == 0 or np.all([has_label(mem, x) for x in data]):
4✔
92
        return   
4✔
93
    if mem.active_learning in mem and mem[mem.active_learning]:
4✔
94
        return active_learning_selection(mem)
4✔
95
    D = db[mem.permanent]
4✔
96
    dense = model(mem, D)    
4✔
97
    hys = dense.predict(data).tolist()
4✔
98
    for ele, hy in zip(data, hys):
4✔
99
        ele[mem.label_header] = ele.get(mem.label_header, hy)        
4✔
100

101

102
def label_column(mem: Config, model=model):
4✔
103
    db = CONFIG.db[mem[mem.username]]
4✔
104
    if mem.permanent in db:
4✔
105
        _ = np.unique([x[mem.label_header]
4✔
106
                       for x in db[mem.permanent]])
107
        if _.shape[0] > 1:
4✔
108
            mem[mem.labels] = tuple(_.tolist())
4✔
109
            return label_column_predict(mem, model=model)
4✔
110
    label = mem.get(mem.labels, (0, ))[0]
4✔
111
    data = db[mem.data]
4✔
112
    for ele in data:
4✔
113
        ele[mem.label_header] = ele.get(mem.label_header, label)
4✔
114

115

116
def flip_label(mem: Config, k: int):
4✔
117
    db = CONFIG.db[mem[mem.username]]
4✔
118
    data = db[mem.data]
4✔
119
    assert k < len(data)
4✔
120
    labels = mem.get(mem.labels, (0, 1)) 
4✔
121
    label = data[k][mem.label_header]
4✔
122
    index = (labels.index(label) + 1) % len(labels)
4✔
123
    data[k][mem.label_header] = labels[index]
4✔
124
    return data[k]
4✔
125

126

127
def store(mem: Config):
4✔
128
    db = CONFIG.db[mem[mem.username]]
4✔
129
    data = db.pop(mem.data) if mem.data in db else []
4✔
130
    try:
4✔
131
        permanent = db[mem.permanent]
4✔
132
    except KeyError:
4✔
133
        permanent = []
4✔
134
    permanent.extend(data)        
4✔
135
    db[mem.permanent] = permanent
4✔
136

137

138
def similarity(query: Union[list, str],
4✔
139
               dataset: list, key: str='text',
140
               lang: str='es'):
141
    if isinstance(query, str):
4✔
142
        query = [query]
4✔
143
    trans = BoW(lang=lang, key=key).transform
4✔
144
    query = trans(query)
4✔
145
    dataset = trans(dataset)
4✔
146
    return dataset.dot(query.T).toarray()
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc