• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cnr-ibba / SMARTER-database / 9189630163

22 May 2024 10:26AM CUT coverage: 94.311%. Remained the same
9189630163

Pull #121

github

web-flow
Merge 389cf0ae7 into fcdb3ce6a
Pull Request #121: ⬆️ Bump jinja2 from 3.1.2 to 3.1.4

2984 of 3164 relevant lines covered (94.31%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.67
/src/features/utils.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
1✔
4
Created on Mon Mar 15 14:13:51 2021
5

6
@author: Paolo Cozzi <paolo.cozzi@ibba.cnr.it>
7
"""
8

9
import io
1✔
10
import re
1✔
11
import gzip
1✔
12
import logging
1✔
13
import pathlib
1✔
14
import collections
1✔
15

16
# Get an instance of a logger
17
logger = logging.getLogger(__name__)
1✔
18

19

20
def sanitize(
1✔
21
        word: str,
22
        chars=['.', ",", "-", "/", "#"],
23
        check_mongoengine=True) -> str:
24
    """Sanitize a word by removing unwanted characters and lowercase it.
25

26
    Args:
27
        word (str): the word to sanitize
28
        chars (list): a list of characters to remove
29
        check_mongoengine (bool): true to add '_' after a mongoengine reserved
30
            word
31

32
    Returns:
33
        str: the sanitized word
34
    """
35

36
    # remove unwanted characters from word by putting spaces
37
    pattern = "".join(chars)
1✔
38
    tmp = re.sub(r'[%s]' % (pattern), ' ', word)
1✔
39

40
    # remove spaces from column name and lowercase all
41
    sanitized = re.sub(r"\s+", "_", tmp).lower()
1✔
42

43
    if check_mongoengine:
1✔
44
        if sanitized in ['size', 'type']:
1✔
45
            sanitized += "_"
1✔
46

47
    # remove starting sanitized char (can't be used with namedtuple)
48
    if sanitized.startswith("_"):
1✔
49
        sanitized = sanitized[1:]
1✔
50

51
    return sanitized
1✔
52

53

54
def camelCase(string: str) -> str:
1✔
55
    """Convert a string into camel case
56

57
    Args:
58
        string (str): the string to convert
59

60
    Returns:
61
        str: the camel case version of the string
62
    """
63

64
    string = re.sub(r"(_|-|\.)+", " ", string).title().replace(" ", "")
×
65
    return string[0].lower() + string[1:]
×
66

67

68
class TqdmToLogger(io.StringIO):
1✔
69
    """
70
        Output stream for TQDM which will output to logger module instead of
71
        the StdOut.
72
    """
73
    logger = None
1✔
74
    level = None
1✔
75
    buf = ''
1✔
76

77
    def __init__(self, logger, level=None):
1✔
78
        super(TqdmToLogger, self).__init__()
1✔
79
        self.logger = logger
1✔
80
        self.level = level or logging.INFO
1✔
81

82
    def write(self, buf):
1✔
83
        self.buf = buf.strip('\r\n\t ')
1✔
84

85
    def flush(self):
1✔
86
        self.logger.log(self.level, self.buf)
1✔
87

88

89
def get_project_dir() -> pathlib.PosixPath:
1✔
90
    """Return smarter project dir (which are three levels upper from the
91
    module in which this function is stored)
92

93
    Returns:
94
        pathlib.PosixPath: the smarter project base dir
95
    """
96
    return pathlib.Path(__file__).parents[2]
1✔
97

98

99
def get_raw_dir() -> pathlib.PosixPath:
1✔
100
    """Return smarter data raw dir
101

102
    Returns:
103
        pathlib.PosixPath: the smarter data raw directory
104
    """
105

106
    return get_project_dir() / "data/raw"
×
107

108

109
def get_interim_dir() -> pathlib.PosixPath:
1✔
110
    """Return smarter data temporary dir
111

112
    Returns:
113
        pathlib.PosixPath: the smarter data temporary dir
114
    """
115

116
    return get_project_dir() / "data/interim"
×
117

118

119
def get_processed_dir() -> pathlib.PosixPath:
1✔
120
    """Return smarter data processed dir (final processed data)
121

122
    Returns:
123
        pathlib.PosixPath: the smarter data final processed dir
124
    """
125

126
    return get_project_dir() / "data/processed"
×
127

128

129
def text_or_gzip_open(path: str, mode: str = None) -> io.TextIOWrapper:
1✔
130
    """Open a file which can be compressed or not. Returns file handle"""
131

132
    if pathlib.Path(path).suffix == '.gz':
1✔
133
        if not mode:
×
134
            mode = 'rt'
×
135

136
        logger.debug(f"Gzip detected for {path}")
×
137
        return gzip.open(path, mode=mode)
×
138

139
    else:
140
        if not mode:
1✔
141
            mode = 'r'
1✔
142

143
        return open(path, mode=mode)
1✔
144

145

146
def find_duplicates(header: list) -> list:
1✔
147
    """Find duplicate columns in list. Returns index to remove after the first
148
    occurence
149

150
    Args:
151
        header (list): a list like the header read from a CSV file
152

153
    Returns:
154
        list: a list of index (numeric)
155
    """
156

157
    to_remove = []
1✔
158

159
    # count columns and find duplicates
160
    counts = collections.Counter(header)
1✔
161
    duplicated_cols = [key for key, value in counts.items() if value > 1]
1✔
162

163
    # now iterate and get duplicates indexes
164
    for duplicated in duplicated_cols:
1✔
165
        # get all duplicated index
166
        tmp = [i for i, col in enumerate(header) if col == duplicated]
1✔
167

168
        # track only from the 2nd occurrence
169
        to_remove += tmp[1:]
1✔
170

171
    return to_remove
1✔
172

173

174
def skip_comments(handle: io.TextIOWrapper, comment_char="#") -> (int, list):
1✔
175
    """
176
    Ignore comments lines from a open file handle. Return the stream position
177
    immediately after the comments and all the comment lines in a list.
178

179
    Parameters
180
    ----------
181
    handle : io.TextIOWrapper
182
        An open file handle.
183
    comment_char : TYPE, optional
184
        The comment character used in file. The default is "#".
185

186
    Returns
187
    -------
188
    (int, list)
189
        The stream position after the comments and the ignored lines as a list.
190
    """
191

192
    # track skipped lines
193
    skipped = list()
1✔
194

195
    # read first line
196
    line = handle.readline().strip()
1✔
197

198
    # search for comments in file
199
    while line[0] == "#":
1✔
200
        logger.debug(f"Skipping: {line}")
1✔
201
        skipped.append(line)
1✔
202
        position = handle.tell()
1✔
203

204
        # read another line
205
        line = handle.readline().strip()
1✔
206

207
    # the position returned is the one before the one I want
208
    return position, skipped
1✔
209

210

211
class UnknownCountry():
1✔
212
    """Deal with unknown country"""
213

214
    def __init__(self):
1✔
215
        self.name = "Unknown"
1✔
216
        self.alpha_2 = "UN"
1✔
217
        self.alpha_3 = "UNK"
1✔
218
        self.numeric = None
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc