Coveralls logob
Coveralls logo
  • Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

INM-6 / python-dicthash / 96

24 Aug 2019 - 14:29 coverage: 97.849%. Remained the same
96

Pull #39

travis-ci

9181eb84f9c35729a3bad740fb7f9d93?size=18&default=identiconweb-flow
Fix travis
Pull Request #39: README: Improve python badges and add docs badge

273 of 279 relevant lines covered (97.85%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.74
/dicthash/dicthash.py
1
"""
2
dicthash.dicthash
3
=============
4

5
A module implementing an md5 hash function for (nested) dictionaries.
6

7
Functions
8
---------
9

10
generate_hash_from_dict - generate an md5 hash from a (nested)
11
dictionary
12

13
"""
14

15
from future.builtins import str
1×
16
import hashlib
1×
17
import warnings
1×
18
FLOAT_FACTOR = 1e15
1×
19
FLOOR_SMALL_FLOATS = False
1×
20

21
# user warnings are printed to sys.stdout
22
warnings.simplefilter('default', category=UserWarning)
1×
23

24
try:
1×
25
    basestring  # attempt to evaluate basestring
1×
UNCOV
26
except NameError:
!
UNCOV
27
    basestring = str
!
28

29

30
def _save_convert_float_to_int(x):
1×
31
    """
32
    Convert a float x to an integer. Avoid rounding errors on different
33
    platforms by shifting the floating point behind the last relevant
34
    digit.
35

36
    Parameters
37
    ----------
38
    x : float
39
        Float to be converted.
40
    """
41
    if abs(x) > 0. and abs(x) < 1. / FLOAT_FACTOR:
1×
42
        if not FLOOR_SMALL_FLOATS:
1×
43
            raise ValueError('Float too small for safe conversion to '
1×
44
                             'integer.')
45
        else:
46
            x = 0.
!
47
            warnings.warn('Float too small for safe conversion to'
!
48
                          'integer. Rounding down to zero.', UserWarning)
49
    return int(x * FLOAT_FACTOR)
1×
50

51

52
def _unpack_value(value, prefix='', whitelist=None, blacklist=None):
1×
53
    """
54
    Unpack values from a data structure and convert to string. Call
55
    the corresponding functions for dict or iterables or use simple
56
    string conversion for scalar variables.
57

58
    Parameters
59
    ----------
60
    value : dict, iterable, scalar variable
61
        Value to be unpacked.
62
    prefix : str, optional
63
        Prefix to preprend to resulting string. Defaults to empty
64
        string.
65
    """
66

67
    try:
1×
68
        return _generate_string_from_dict(value,
1×
69
                                          blacklist=blacklist,
70
                                          whitelist=whitelist,
71
                                          prefix=prefix + 'd')
72
    except AttributeError:
1×
73
        # not a dict
74
        try:
1×
75
            return prefix + _generate_string_from_iterable(value, prefix='i')
1×
76
        except TypeError:
1×
77
            # not an iterable
78
            if isinstance(value, float):
1×
79
                return prefix + str(_save_convert_float_to_int(value))
1×
80
            else:
81
                return prefix + str(value)
1×
82

83

84
def _generate_string_from_iterable(l, prefix=''):
1×
85
    """
86
    Convert an iterable to a string, by extracting every value. Takes
87
    care of proper handling of floats to avoid rounding errors.
88

89
    Parameters
90
    ----------
91
    l : iterable
92
        Iterable to be converted.
93
    """
94

95
    # we need to handle strings separately to avoid infinite recursion
96
    # due to their iterable property
97
    if isinstance(l, basestring):
1×
98
        return ''.join((prefix, str(l)))
1×
99
    else:
100
        return ''.join(_unpack_value(value, prefix='') for value in l)
1×
101

102

103
def _generate_string_from_dict(d, blacklist, whitelist, prefix=''):
1×
104
    """
105
    Convert a dictionary to a string, by extracting every key value
106
    pair. Takes care of proper handling of floats, iterables and nested
107
    dictionaries.
108

109
    Parameters
110
    ----------
111
    d : dict
112
        Dictionary to be converted
113
    blacklist : list
114
        List of keys to exclude from conversion. Blacklist overrules
115
        whitelist, i.e., keys appearing in the blacklist will
116
        definitely not be used.
117
    whitelist: list
118
        List of keys to include in conversion.
119
    """
120
    if whitelist is None:
1×
121
        whitelist = list(d.keys())
1×
122
    if blacklist is not None:
1×
123
        whitelist = set(whitelist).difference(blacklist)
1×
124
    # Sort whitelist according to the keys converted to str
125
    if len(whitelist) > 0:
1×
126
        return ''.join(_unpack_value(d[key],
1×
127
                                     whitelist=filter_blackwhitelist(whitelist, key),
128
                                     blacklist=filter_blackwhitelist(blacklist, key),
129
                                     prefix=prefix + str(key)) for
130
                       key in sorted(filter_blackwhitelist(whitelist, None), key=str))
131
    else:
132
        return ''
1×
133

134

135
def generate_hash_from_dict(d, blacklist=None, whitelist=None,
1×
136
                            raw=False):
137
    """
138
    Generate an md5 hash from a (nested) dictionary.
139

140
    Takes care of extracting nested dictionaries, iterables and
141
    avoids rounding errors of floats. Makes sure keys are read in a
142
    unique order. A blacklist of keys can be passed, that can contain
143
    keys which should be excluded from the hash. If a whitelist is
144
    given, only keys appearing in the whitelist are used to generate
145
    the hash. All strings are converted to unicode, i.e., the hash
146
    does not distinguish between strings provided in ascii or unicode
147
    format. Lists, np.ndarrays and tuples are treated equally, i.e., an
148
    array-like item [1,2,3], np.array([1,2,3]) or (1,2,3) will lead
149
    to the same hash if they are of the same type.
150

151
    Parameters
152
    ----------
153
    d : dict
154
        Dictionary to compute the hash from.
155
    blacklist : list, optional
156
        List of keys which *are not* used for generating the hash.
157
        Keys of subdirectories can be provided by specifying
158
        the full path of keys in a tuple.
159
        If None, no keys will be ignored.
160
    whitelist : list, optional
161
        List of keys which *are* used for generating the hash.
162
        Keys of subdirectories can be provided by specifying
163
        the full path of keys in a tuple.
164
        If None, all keys will be used.
165
        Blacklist overrules whitelist, i.e., keys appearing in the
166
        blacklist will definitely not be used.
167
    raw : bool, optional
168
          if True, return the unhashed string.
169

170
    Returns
171
    -------
172
    : string
173
      The hash generated from the dictionary, or the unhashed string if
174
      raw is True.
175

176
    Example
177
    -------
178
    >>> from dicthash import generate_hash_from_dict
179
    >>> d = {'a': 'asd', 'b': 0.12, 3: {'c': [3, 4, 5]}}
180
    >>> generate_hash_from_dict(d)
181
    'd748bbf148db514911ed0bf215729d01'
182

183
    """
184
    if not isinstance(d, dict):
1×
185
        raise TypeError('Please provide a dictionary.')
1×
186

187
    if blacklist is not None:
1×
188
        validate_blackwhitelist(d, blacklist)
1×
189
    if whitelist is not None:
1×
190
        validate_blackwhitelist(d, whitelist)
1×
191
    raw_string = _generate_string_from_dict(d, blacklist, whitelist, prefix='d')
1×
192
    if raw:
1×
193
        return raw_string
1×
194
    else:
195
        return hashlib.md5(raw_string.encode('utf-8')).hexdigest()
1×
196

197

198
def validate_blackwhitelist(d, l):
1×
199
    """
200
    Validate that all entries in black/whitelist l, appear in the
201
    dictionary d
202

203
    Parameters
204
    ----------
205
    d : dict
206
        Dictionary to use for validation.
207
    l : list
208
        Blacklist or whitelist to validate.
209
    """
210
    for key in l:
1×
211
        if isinstance(key, tuple):
1×
212
            k = key[0]
1×
213
        else:
214
            k = key
1×
215
        if k not in d:
1×
216
            raise KeyError('Key "{key}" not found in dictionary. '
1×
217
                           'Invalid black/whitelist.'.format(key=key))
218
        if isinstance(key, tuple) and len(key) > 1:
1×
219
            validate_blackwhitelist(d[key[0]], [key[1:]])
1×
220

221

222
def filter_blackwhitelist(l, key):
1×
223
    """
224
    Filter black/whitelist for the keys that belong to the
225
    subdirectory which is embedded into the nested dictionary
226
    structure with the given key.
227

228
    Three different cases:
229
    - if l is None, then return none
230
    - if key is None, then we are at the top-level dictionary, thus
231
      include all scalar keys and the first element of tuples.
232
    - if key is not None, then return only the keys that are tuples
233
      where the first element of the tuple matches the given key
234

235
    Parameters
236
    ----------
237
    l : list
238
        Black- or whitelist to filter
239
    key : scalar variable or None
240
        Key to filter for. See above for the behavior if key is None
241
    """
242
    if l is None:
1×
243
        return None
1×
244
    else:
245
        fl = []
1×
246
        for k in l:
1×
247
            if isinstance(k, tuple):
1×
248
                if key is not None and k[0] == key:
1×
249
                    if len(k) == 2:
1×
250
                        fl.append(k[1])
1×
251
                    else:
252
                        fl.append(k[1:])
1×
253
                elif key is None:
1×
254
                    fl.append(k[0])
1×
255
            elif key is None:
1×
256
                fl.append(k)
1×
257
        if len(fl) == 0:
1×
258
            return None
1×
259
        else:
260
            return fl
1×
Troubleshooting · Open an Issue · Sales · Support · ENTERPRISE · CAREERS · STATUS
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2023 Coveralls, Inc