• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

klahnakoski / mo-json / 13620216787

02 Mar 2025 11:08PM UTC coverage: 64.19%. First build
13620216787

push

github

klahnakoski
all tests pass

9 of 10 new or added lines in 4 files covered. (90.0%)

1002 of 1561 relevant lines covered (64.19%)

0.64 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

62.73
/mo_json/typed_encoder.py
1
# encoding: utf-8
2
#
3
#
4
# This Source Code Form is subject to the terms of the Mozilla Public
5
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
6
# You can obtain one at https://www.mozilla.org/en-US/MPL/2.0/.
7
#
8
# Contact: Kyle Lahnakoski (kyle@lahnakoski.com)
9
#
10
from datetime import date, datetime, timedelta
1✔
11
from decimal import Decimal
1✔
12

13
from mo_dots import (
1✔
14
    CLASS,
15
    Data,
16
    DataObject,
17
    FlatList,
18
    NullType,
19
    SLOT,
20
    _get,
21
    join_field,
22
    split_field,
23
    concat_field, is_missing,
24
)
25
from mo_future import (
1✔
26
    binary_type,
27
    generator_types,
28
    integer_types,
29
    is_binary,
30
    is_text,
31
    sort_using_key,
32
    text,
33
)
34
from mo_logs import Log
1✔
35
from mo_times import Date, Duration
1✔
36

37
from mo_json.encoder import COLON, COMMA, UnicodeBuilder, json_encoder
1✔
38
from mo_json.scrubber import datetime2unix
1✔
39
from mo_json.typed_object import TypedObject
1✔
40
from mo_json.types import (
1✔
41
    BOOLEAN,
42
    EXISTS,
43
    INTEGER,
44
    ARRAY,
45
    NUMBER,
46
    STRING,
47
    python_type_to_jx_type,
48
    python_type_to_jx_type_key,
49
)
50
from mo_json.types import (
1✔
51
    BOOLEAN_KEY,
52
    NUMBER_KEY,
53
    INTEGER_KEY,
54
    STRING_KEY,
55
    ARRAY_KEY,
56
    EXISTS_KEY,
57
    IS_TYPE_KEY,
58
)
59
from mo_json.utils import quote, float2json
1✔
60

61

62
def encode_property(name):
1✔
63
    return name.replace(",", "\\,").replace(".", ",")
1✔
64

65

66
def decode_property(encoded):
1✔
67
    return encoded.replace("\\,", "\a").replace(",", ".").replace("\a", ",")
1✔
68

69

70
def untype_path(encoded):
1✔
71
    """
72
    :param encoded:
73
    :return: RETURN THE UNTYPED PATH, REMOVE LAST TYPE TOO
74
    """
75
    if encoded.startswith(".."):
×
76
        remainder = encoded.lstrip(".")
×
77
        back = len(encoded) - len(remainder) - 1
×
78
        return ("." * back) + join_field(
×
79
            decode_property(c) for c in split_field(remainder) if not IS_TYPE_KEY.match(c)
80
        )
81
    else:
82
        return join_field(decode_property(c) for c in split_field(encoded) if not IS_TYPE_KEY.match(c))
×
83

84

85
def unnest_path(encoded):
1✔
86
    """
87

88
    :param encoded:
89
    :return: RETURN THE UNTYPED PATH, KEEP LAST TYPE
90
    """
91
    if encoded.startswith(".."):
×
92
        remainder = encoded.lstrip(".")
×
93
        back = len(encoded) - len(remainder)
×
94
        return ("." * back) + untype_path(remainder)
×
95

96
    path = split_field(encoded)
×
97
    return join_field([
×
98
        *(decode_property(c) for c in path[:-1] if not IS_TYPE_KEY.match(c)),
99
        decode_property(path[-1]),
100
    ])
101

102

103
def get_nested_path(typed_path):
1✔
104
    # CONSTRUCT THE nested_path FROM THE typed_path
105
    path = split_field(typed_path)
×
106
    parent = "."
×
107
    nested_path = (parent,)
×
108
    for i, p in enumerate(path[:-1]):
×
109
        if p == ARRAY_KEY:
×
110
            step = concat_field(parent, join_field(path[0 : i + 1]))
×
111
            nested_path = (step,) + nested_path
×
112
    return nested_path
×
113

114

115
def detype(value):
1✔
116
    return _detype_value(value)
1✔
117

118

119
def _detype_list(value):
1✔
120
    return [_detype_value(v) for v in value]
1✔
121

122

123
def _detype_dict(value):
1✔
124
    output = {}
1✔
125

126
    for k, v in value.items():
1✔
127
        if IS_TYPE_KEY.match(k):
1✔
128
            if k == EXISTS_KEY:
1✔
129
                continue
1✔
130
            elif k == ARRAY_KEY:
1✔
131
                return _detype_list(v)
1✔
132
            else:
133
                return v
×
134
        else:
135
            new_v = _detype_value(v)
1✔
136
            if new_v is not None:
1✔
137
                output[decode_property(k)] = new_v
1✔
138
    return output
1✔
139

140

141
def _detype_value(value):
1✔
142
    _type = _get(value, CLASS)
1✔
143
    if _type is TypedObject:
1✔
144
        return value._boxed_value
×
145
    elif _type is Data:
1✔
146
        return _detype_dict(_get(value, SLOT))
×
147
    elif _type is dict:
1✔
148
        return _detype_dict(value)
1✔
149
    elif _type is FlatList:
1✔
150
        return _detype_list(value.list)
×
151
    elif _type is list:
1✔
152
        return _detype_list(value)
×
153
    elif _type is NullType:
1✔
154
        return None
×
155
    elif _type is DataObject:
1✔
156
        return _detype_value(_get(value, SLOT))
×
157
    elif _type in generator_types:
1✔
158
        return _detype_list(value)
×
159
    else:
160
        return value
1✔
161

162

163
def encode(value):
1✔
164
    buffer = UnicodeBuilder(1024)
1✔
165
    typed_encode(value, sub_schema={}, path=[], net_new_properties=[], buffer=buffer)
1✔
166
    return buffer.build()
1✔
167

168

169
def typed_encode(value, sub_schema, path, net_new_properties, buffer):
1✔
170
    """
171
    :param value: THE DATA STRUCTURE TO ENCODE
172
    :param sub_schema: dict FROM PATH TO Column DESCRIBING THE TYPE
173
    :param path: list OF CURRENT PATH
174
    :param net_new_properties: list FOR ADDING NEW PROPERTIES NOT FOUND IN sub_schema
175
    :param buffer: UnicodeBuilder OBJECT
176
    :return:
177
    """
178
    try:
1✔
179
        if sub_schema.__class__.__name__ == "Column":
1✔
180
            value_json_type = python_type_to_jx_type[value.__class__]
×
181
            column_json_type = es_type_to_json_type[sub_schema.es_type]
×
182

183
            if value_json_type == column_json_type:
×
184
                pass  # ok
×
185
            elif value_json_type == ARRAY and all(
×
186
                python_type_to_jx_type[v.__class__] == column_json_type for v in value if v != None
187
            ):
188
                pass  # empty arrays can be anything
×
189
            else:
190
                from mo_logs import Log
×
191

192
                Log.error(
×
193
                    "Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.name,
194
                )
195

196
            sub_schema = {json_type_to_inserter_type[value_json_type]: sub_schema}
×
197

198
        if value == None and path:
1✔
199
            from mo_logs import Log
×
200

201
            Log.error("can not encode null (missing) values")
×
202
        elif value is True:
1✔
203
            if BOOLEAN_KEY not in sub_schema:
1✔
204
                sub_schema[BOOLEAN_KEY] = {}
1✔
205
                net_new_properties.append(path + [BOOLEAN_KEY])
1✔
206
            append(buffer, "{")
1✔
207
            append(buffer, QUOTED_BOOLEAN_KEY)
1✔
208
            append(buffer, "true}")
1✔
209
            return
1✔
210
        elif value is False:
1✔
211
            if BOOLEAN_KEY not in sub_schema:
1✔
212
                sub_schema[BOOLEAN_KEY] = {}
1✔
213
                net_new_properties.append(path + [BOOLEAN_KEY])
1✔
214
            append(buffer, "{")
1✔
215
            append(buffer, QUOTED_BOOLEAN_KEY)
1✔
216
            append(buffer, "false}")
1✔
217
            return
1✔
218

219
        _type = value.__class__
1✔
220
        if _type in (dict, Data):
1✔
221
            if sub_schema.__class__.__name__ == "Column":
1✔
222
                from mo_logs import Log
×
223

224
                Log.error("Can not handle {column|json}", column=sub_schema)
×
225

226
            if ARRAY_KEY in sub_schema:
1✔
227
                # PREFER NESTED, WHEN SEEN BEFORE
228
                if value:
×
229
                    append(buffer, "{")
×
230
                    append(buffer, QUOTED_ARRAY_KEY)
×
231
                    append(buffer, "[")
×
232
                    _dict2json(
×
233
                        value, sub_schema[ARRAY_KEY], path + [ARRAY_KEY], net_new_properties, buffer,
234
                    )
235
                    append(buffer, "]" + COMMA)
×
236
                    append(buffer, QUOTED_EXISTS_KEY)
×
237
                    append(buffer, str(len(value)))
×
238
                    append(buffer, "}")
×
239
                else:
240
                    # SINGLETON LIST
241
                    append(buffer, "{")
×
242
                    append(buffer, QUOTED_ARRAY_KEY)
×
243
                    append(buffer, "[{")
×
244
                    append(buffer, QUOTED_EXISTS_KEY)
×
245
                    append(buffer, "1}]")
×
246
                    append(buffer, COMMA)
×
247
                    append(buffer, QUOTED_EXISTS_KEY)
×
248
                    append(buffer, "1}")
×
249
            else:
250
                if EXISTS_KEY not in sub_schema:
1✔
251
                    sub_schema[EXISTS_KEY] = {}
1✔
252
                    net_new_properties.append(path + [EXISTS_KEY])
1✔
253

254
                if value:
1✔
255
                    _dict2json(value, sub_schema, path, net_new_properties, buffer)
1✔
256
                else:
257
                    append(buffer, "{")
1✔
258
                    append(buffer, QUOTED_EXISTS_KEY)
1✔
259
                    append(buffer, "1}")
1✔
260
        elif _type is binary_type:
1✔
261
            if STRING_KEY not in sub_schema:
×
262
                sub_schema[STRING_KEY] = True
×
263
                net_new_properties.append(path + [STRING_KEY])
×
264
            append(buffer, "{")
×
265
            append(buffer, QUOTED_STRING_KEY)
×
266
            append(buffer, quote(value.decode("utf8")))
×
NEW
267
            append(buffer, "}")
×
268
        elif _type is text:
1✔
269
            if STRING_KEY not in sub_schema:
1✔
270
                sub_schema[STRING_KEY] = True
1✔
271
                net_new_properties.append(path + [STRING_KEY])
1✔
272
            append(buffer, "{")
1✔
273
            append(buffer, QUOTED_STRING_KEY)
1✔
274
            append(buffer, quote(value))
1✔
275
            append(buffer, "}")
1✔
276
        elif _type in integer_types:
1✔
277
            if NUMBER_KEY not in sub_schema:
1✔
278
                sub_schema[NUMBER_KEY] = True
1✔
279
                net_new_properties.append(path + [NUMBER_KEY])
1✔
280

281
            append(buffer, "{")
1✔
282
            append(buffer, QUOTED_NUMBER_KEY)
1✔
283
            append(buffer, str(value))
1✔
284
            append(buffer, "}")
1✔
285
        elif _type in (float, Decimal):
1✔
286
            if NUMBER_KEY not in sub_schema:
1✔
287
                sub_schema[NUMBER_KEY] = True
1✔
288
                net_new_properties.append(path + [NUMBER_KEY])
1✔
289
            append(buffer, "{")
1✔
290
            append(buffer, QUOTED_NUMBER_KEY)
1✔
291
            append(buffer, float2json(value))
1✔
292
            append(buffer, "}")
1✔
293
        elif _type in (set, list, tuple, FlatList):
1✔
294
            if len(value) == 0:
1✔
295
                append(buffer, "{")
1✔
296
                append(buffer, QUOTED_EXISTS_KEY)
1✔
297
                append(buffer, "0}")
1✔
298
            elif any(v.__class__ in (Data, dict, set, list, tuple, FlatList) for v in value):
1✔
299
                if len(value) == 1:
1✔
300
                    if ARRAY_KEY in sub_schema:
1✔
301
                        append(buffer, "{")
×
302
                        append(buffer, QUOTED_ARRAY_KEY)
×
303
                        _list2json(
×
304
                            value, sub_schema[ARRAY_KEY], path + [ARRAY_KEY], net_new_properties, buffer,
305
                        )
306
                        append(buffer, "}")
×
307
                    else:
308
                        # NO NEED TO NEST, SO DO NOT DO IT
309
                        typed_encode(value[0], sub_schema, path, net_new_properties, buffer)
1✔
310
                else:
311
                    if ARRAY_KEY not in sub_schema:
1✔
312
                        sub_schema[ARRAY_KEY] = {}
1✔
313
                        net_new_properties.append(path + [ARRAY_KEY])
1✔
314
                    append(buffer, "{")
1✔
315
                    append(buffer, QUOTED_ARRAY_KEY)
1✔
316
                    _list2json(
1✔
317
                        value, sub_schema[ARRAY_KEY], path + [ARRAY_KEY], net_new_properties, buffer,
318
                    )
319
                    append(buffer, "}")
1✔
320
            else:
321
                # ALLOW PRIMITIVE MULTIVALUES
322
                value = [v for v in value if v != None]
1✔
323
                types = list(set(python_type_to_jx_type_key[v.__class__] for v in value))
1✔
324
                if len(types) == 0:  # HANDLE LISTS WITH Nones IN THEM
1✔
325
                    append(buffer, "{")
×
326
                    append(buffer, QUOTED_ARRAY_KEY)
×
327
                    append(buffer, "[]}")
×
328
                elif len(types) > 1:
1✔
329
                    _list2json(
1✔
330
                        value, sub_schema, path + [ARRAY_KEY], net_new_properties, buffer,
331
                    )
332
                else:
333
                    element_type = types[0]
1✔
334
                    if element_type not in sub_schema:
1✔
335
                        sub_schema[element_type] = True
1✔
336
                        net_new_properties.append(path + [element_type])
1✔
337
                    append(buffer, "{")
1✔
338
                    append(buffer, quote(element_type))
1✔
339
                    append(buffer, COLON)
1✔
340
                    _multivalue2json(
1✔
341
                        value, sub_schema[element_type], path + [element_type], net_new_properties, buffer,
342
                    )
343
                    append(buffer, "}")
1✔
344
        elif _type is date:
1✔
345
            if NUMBER_KEY not in sub_schema:
1✔
346
                sub_schema[NUMBER_KEY] = True
1✔
347
                net_new_properties.append(path + [NUMBER_KEY])
1✔
348
            append(buffer, "{")
1✔
349
            append(buffer, QUOTED_NUMBER_KEY)
1✔
350
            append(buffer, float2json(datetime2unix(value)))
1✔
351
            append(buffer, "}")
1✔
352
        elif _type is datetime:
1✔
353
            if NUMBER_KEY not in sub_schema:
×
354
                sub_schema[NUMBER_KEY] = True
×
355
                net_new_properties.append(path + [NUMBER_KEY])
×
356
            append(buffer, "{")
×
357
            append(buffer, QUOTED_NUMBER_KEY)
×
358
            append(buffer, float2json(datetime2unix(value)))
×
359
            append(buffer, "}")
×
360
        elif _type is Date:
1✔
361
            if NUMBER_KEY not in sub_schema:
×
362
                sub_schema[NUMBER_KEY] = True
×
363
                net_new_properties.append(path + [NUMBER_KEY])
×
364
            append(buffer, "{")
×
365
            append(buffer, QUOTED_NUMBER_KEY)
×
366
            append(buffer, float2json(value.unix))
×
367
            append(buffer, "}")
×
368
        elif _type is timedelta:
1✔
369
            if NUMBER_KEY not in sub_schema:
×
370
                sub_schema[NUMBER_KEY] = True
×
371
                net_new_properties.append(path + [NUMBER_KEY])
×
372
            append(buffer, "{")
×
373
            append(buffer, QUOTED_NUMBER_KEY)
×
374
            append(buffer, float2json(value.total_seconds()))
×
375
            append(buffer, "}")
×
376
        elif _type is Duration:
1✔
377
            if NUMBER_KEY not in sub_schema:
×
378
                sub_schema[NUMBER_KEY] = True
×
379
                net_new_properties.append(path + [NUMBER_KEY])
×
380
            append(buffer, "{")
×
381
            append(buffer, QUOTED_NUMBER_KEY)
×
382
            append(buffer, float2json(value.seconds))
×
383
            append(buffer, "}")
×
384
        elif _type is NullType:
1✔
385
            append(buffer, "null")
×
386
        elif hasattr(value, "__data__"):
1✔
387
            typed_encode(value.__data__(), sub_schema, path, net_new_properties, buffer)
×
388
        elif hasattr(value, "__iter__"):
1✔
389
            if ARRAY_KEY not in sub_schema:
×
390
                sub_schema[ARRAY_KEY] = {}
×
391
                net_new_properties.append(path + [ARRAY_KEY])
×
392

393
            append(buffer, "{")
×
394
            append(buffer, QUOTED_ARRAY_KEY)
×
395
            _iter2json(
×
396
                value, sub_schema[ARRAY_KEY], path + [ARRAY_KEY], net_new_properties, buffer,
397
            )
398
            append(buffer, "}")
×
399
        else:
400
            from mo_logs import Log
1✔
401

402
            Log.error(str(repr(value)) + " is not JSON serializable")
1✔
403
    except Exception as e:
404
        from mo_logs import Log
405

406
        Log.error(str(repr(value)) + " is not JSON serializable", cause=e)
407

408

409
def _list2json(value, sub_schema, path, net_new_properties, buffer):
1✔
410
    if not value:
1✔
411
        append(buffer, "[]")
×
412
    else:
413
        sep = "["
1✔
414
        for v in value:
1✔
415
            append(buffer, sep)
1✔
416
            sep = COMMA
1✔
417
            typed_encode(v, sub_schema, path, net_new_properties, buffer)
1✔
418
        append(buffer, "]")
1✔
419
        # append(buffer, COMMA)
420
        # append(buffer, QUOTED_EXISTS_KEY)
421
        # append(buffer, str(len(value)))
422

423

424
def _multivalue2json(value, sub_schema, path, net_new_properties, buffer):
1✔
425
    if not value:
1✔
426
        append(buffer, "[]")
×
427
    elif len(value) == 1:
1✔
428
        append(buffer, json_encoder(value[0]))
1✔
429
    else:
430
        sep = "["
1✔
431
        for v in value:
1✔
432
            append(buffer, sep)
1✔
433
            sep = COMMA
1✔
434
            append(buffer, json_encoder(v))
1✔
435
        append(buffer, "]")
1✔
436

437

438
def _iter2json(value, sub_schema, path, net_new_properties, buffer):
1✔
439
    append(buffer, "[")
×
440
    sep = ""
×
441
    count = 0
×
442
    for v in value:
×
443
        append(buffer, sep)
×
444
        sep = COMMA
×
445
        typed_encode(v, sub_schema, path, net_new_properties, buffer)
×
446
        count += 1
×
447
    append(buffer, "]")
×
448
    append(buffer, COMMA)
×
449
    append(buffer, QUOTED_EXISTS_KEY)
×
450
    append(buffer, str(count))
×
451

452

453
def _dict2json(value, sub_schema, path, net_new_properties, buffer):
1✔
454
    prefix = "{"
1✔
455
    for k, v in sort_using_key(value.items(), lambda r: r[0]):
1✔
456
        if v == None or v == "":
1✔
457
            continue
1✔
458
        append(buffer, prefix)
1✔
459
        prefix = COMMA
1✔
460
        if is_binary(k):
1✔
461
            k = k.decode("utf8")
×
462
        if not is_text(k):
1✔
463
            Log.error("Expecting property name to be a string")
1✔
464
        if k not in sub_schema:
1✔
465
            sub_schema[k] = {}
1✔
466
            net_new_properties.append(path + [k])
1✔
467
        append(buffer, quote(encode_property(k)))
1✔
468
        append(buffer, COLON)
1✔
469
        typed_encode(v, sub_schema[k], path + [k], net_new_properties, buffer)
1✔
470
    if prefix is COMMA:
1✔
471
        append(buffer, COMMA)
1✔
472
        append(buffer, QUOTED_EXISTS_KEY)
1✔
473
        append(buffer, "1}")
1✔
474
    else:
475
        append(buffer, "{")
1✔
476
        append(buffer, QUOTED_EXISTS_KEY)
1✔
477
        append(buffer, "1}")
1✔
478

479

480
append = UnicodeBuilder.append
1✔
481

482
QUOTED_BOOLEAN_KEY = quote(BOOLEAN_KEY) + COLON
1✔
483
QUOTED_NUMBER_KEY = quote(NUMBER_KEY) + COLON
1✔
484
QUOTED_INTEGER_KEY = quote(INTEGER_KEY) + COLON
1✔
485
QUOTED_STRING_KEY = quote(STRING_KEY) + COLON
1✔
486
QUOTED_ARRAY_KEY = quote(ARRAY_KEY) + COLON
1✔
487
QUOTED_EXISTS_KEY = quote(EXISTS_KEY) + COLON
1✔
488

489
inserter_type_to_json_type = {
1✔
490
    BOOLEAN_KEY: BOOLEAN,
491
    NUMBER_KEY: NUMBER,
492
    INTEGER_KEY: INTEGER,
493
    STRING_KEY: STRING,
494
}
495

496
json_type_to_inserter_type = {
1✔
497
    BOOLEAN: BOOLEAN_KEY,
498
    INTEGER: NUMBER_KEY,
499
    NUMBER: NUMBER_KEY,
500
    STRING: STRING_KEY,
501
    ARRAY: ARRAY_KEY,
502
    EXISTS: EXISTS_KEY,
503
}
504

505
es_type_to_json_type = {
1✔
506
    "text": "string",
507
    "string": "string",
508
    "keyword": "string",
509
    "float": "number",
510
    "double": "number",
511
    "integer": "number",
512
    "object": "object",
513
    "nested": "nested",
514
    "source": "json",
515
    "boolean": "boolean",
516
    "exists": "exists",
517
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc