• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OISF / suricata / 23374838686

21 Mar 2026 07:29AM UTC coverage: 59.341% (-20.0%) from 79.315%
23374838686

Pull #15075

github

web-flow
Merge 90b4e834f into 6587e363a
Pull Request #15075: Stack 8001 v16.4

38 of 70 new or added lines in 10 files covered. (54.29%)

34165 existing lines in 563 files now uncovered.

119621 of 201584 relevant lines covered (59.34%)

650666.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

29.46
/src/datasets-context-json.c
1
/* Copyright (C) 2025 Open Information Security Foundation
2
 *
3
 * You can copy, redistribute or modify this Program under the terms of
4
 * the GNU General Public License version 2 as published by the Free
5
 * Software Foundation.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * version 2 along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15
 * 02110-1301, USA.
16
 */
17

18
/**
19
 * \file
20
 *
21
 * \author Eric Leblond <el@stamus-networks.com>
22
 */
23

24
#include "suricata-common.h"
25
#include "suricata.h"
26
#include "rust.h"
27
#include "datasets.h"
28
#include "datasets-context-json.h"
29
#include "datasets-ipv4.h"
30
#include "datasets-ipv6.h"
31
#include "datasets-md5.h"
32
#include "datasets-sha256.h"
33
#include "datasets-string.h"
34
#include "util-byte.h"
35
#include "util-ip.h"
36
#include "util-debug.h"
37

38
static int DatajsonAdd(
39
        Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json);
40

41
static inline void DatajsonUnlockData(THashData *d)
UNCOV
42
{
×
UNCOV
43
    (void)THashDecrUsecnt(d);
×
UNCOV
44
    THashDataUnlock(d);
×
UNCOV
45
}
×
46

47
void DatajsonUnlockElt(DataJsonResultType *r)
UNCOV
48
{
×
UNCOV
49
    if (r->hashdata) {
×
UNCOV
50
        DatajsonUnlockData(r->hashdata);
×
UNCOV
51
    }
×
UNCOV
52
}
×
53

54
int DatajsonCopyJson(DataJsonType *dst, DataJsonType *src)
UNCOV
55
{
×
UNCOV
56
    dst->len = src->len;
×
UNCOV
57
    dst->value = SCMalloc(dst->len + 1);
×
UNCOV
58
    if (dst->value == NULL)
×
59
        return -1;
×
UNCOV
60
    memcpy(dst->value, src->value, dst->len);
×
UNCOV
61
    dst->value[dst->len] = '\0'; // Ensure null-termination
×
UNCOV
62
    return 0;
×
UNCOV
63
}
×
64

65
/* return true if number is a float or an integer */
66
static bool IsFloat(const char *in, size_t ins)
67
{
×
68
    char *endptr;
×
69
    float val = strtof(in, &endptr);
×
70
    const char *end_ins = in + ins - 1;
×
71
    if (val != 0 && (endptr == end_ins)) {
×
72
        return true;
×
73
    }
×
74
    /* if value is 0 then we need to check if some parsing has been done */
75
    if (val == 0 && (endptr == in)) {
×
76
        return false;
×
77
    }
×
78
    return true;
×
79
}
×
80

81
static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out)
82
{
×
83
    if (ins > DATAJSON_JSON_LENGTH) {
×
84
        SCLogError("dataset: json string too long: %s", in);
×
85
        return -1;
×
86
    }
×
87

88
    json_error_t jerror;
×
89
    json_t *msg = json_loads(in, 0, &jerror);
×
90
    if (msg == NULL) {
×
91
        /* JANSSON does not see an integer, float or a string as valid JSON.
92
           So we need to exclude them from failure. */
93
        if (!IsFloat(in, ins) && !((in[0] == '"') && (in[ins - 1] == '"'))) {
×
94
            SCLogError("dataset: Invalid json: %s: '%s'", jerror.text, in);
×
95
            return -1;
×
96
        }
×
97
    } else {
×
98
        json_decref(msg);
×
99
    }
×
100
    rep_out->len = (uint16_t)ins;
×
101
    rep_out->value = SCStrndup(in, ins);
×
102
    if (rep_out->value == NULL) {
×
103
        return -1;
×
104
    }
×
105
    return 0;
×
106
}
×
107

108
static json_t *GetSubObjectByKey(json_t *json, const char *key)
UNCOV
109
{
×
UNCOV
110
    if (!json || !key || !json_is_object(json)) {
×
111
        return NULL;
×
112
    }
×
UNCOV
113
    if (strlen(key) > SIG_JSON_CONTENT_KEY_LEN) {
×
114
        DEBUG_VALIDATE_BUG_ON(strlen(key) > SIG_JSON_CONTENT_KEY_LEN);
×
115
        return NULL;
×
116
    }
×
117

UNCOV
118
    const char *current_key = key;
×
UNCOV
119
    json_t *current = json;
×
UNCOV
120
    while (current_key) {
×
UNCOV
121
        const char *dot = strchr(current_key, '.');
×
122

UNCOV
123
        size_t key_len = dot ? (size_t)(dot - current_key) : strlen(current_key);
×
UNCOV
124
        char key_buffer[key_len + 1];
×
UNCOV
125
        strlcpy(key_buffer, current_key, key_len + 1);
×
126

UNCOV
127
        if (json_is_object(current) == false) {
×
128
            return NULL;
×
129
        }
×
UNCOV
130
        current = json_object_get(current, key_buffer);
×
UNCOV
131
        if (current == NULL) {
×
UNCOV
132
            return NULL;
×
UNCOV
133
        }
×
UNCOV
134
        current_key = dot ? dot + 1 : NULL;
×
UNCOV
135
    }
×
UNCOV
136
    return current;
×
UNCOV
137
}
×
138

139
static int ParseJsonFile(const char *file, json_t **array, char *key)
140
{
25✔
141
    json_t *json;
25✔
142
    json_error_t error;
25✔
143
    /* assume we have one single JSON element in FILE */
144
    json = json_load_file(file, 0, &error);
25✔
145
    if (json == NULL) {
25✔
146
        FatalErrorOnInit("can't load JSON, error on line %d: %s", error.line, error.text);
25✔
147
        return -1;
25✔
148
    }
25✔
149

UNCOV
150
    if (key == NULL || strlen(key) == 0) {
×
UNCOV
151
        *array = json;
×
UNCOV
152
    } else {
×
UNCOV
153
        *array = GetSubObjectByKey(json, key);
×
UNCOV
154
        if (*array == NULL) {
×
155
            SCLogError("dataset: %s failed to get key '%s'", file, key);
×
156
            json_decref(json);
×
157
            return -1;
×
158
        }
×
UNCOV
159
        json_incref(*array);
×
UNCOV
160
        json_decref(json);
×
UNCOV
161
    }
×
UNCOV
162
    if (!json_is_array(*array)) {
×
163
        FatalErrorOnInit("not an array");
×
164
        json_decref(*array);
×
165
        return -1;
×
166
    }
×
UNCOV
167
    return 0;
×
UNCOV
168
}
×
169

170
static int DatajsonSetValue(
171
        Dataset *set, const uint8_t *val, uint16_t val_len, json_t *value, const char *json_key)
UNCOV
172
{
×
UNCOV
173
    DataJsonType elt = { .value = NULL, .len = 0 };
×
UNCOV
174
    if (set->remove_key) {
×
UNCOV
175
        json_object_del(value, json_key);
×
UNCOV
176
    }
×
177

UNCOV
178
    elt.value = json_dumps(value, JSON_COMPACT);
×
UNCOV
179
    if (elt.value == NULL) {
×
180
        FatalErrorOnInit("json_dumps failed for %s/%s", set->name, set->load);
×
181
        return 0;
×
182
    }
×
UNCOV
183
    if (strlen(elt.value) > DATAJSON_JSON_LENGTH) {
×
184
        SCLogError("dataset: json string too long: %s/%s", set->name, set->load);
×
185
        SCFree(elt.value);
×
186
        elt.value = NULL;
×
187
        return 0;
×
188
    }
×
UNCOV
189
    elt.len = (uint16_t)strlen(elt.value);
×
190

UNCOV
191
    int add_ret = DatajsonAdd(set, val, val_len, &elt);
×
UNCOV
192
    if (add_ret < 0) {
×
193
        FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
×
194
        return 0;
×
195
    }
×
UNCOV
196
    return add_ret;
×
UNCOV
197
}
×
198

199
/**
200
 *  \retval 1 data was added to the hash
201
 *  \retval 0 data was not added to the hash as it is already there
202
 *  \retval -1 failed to add data to the hash
203
 */
204
static int DatajsonAddString(
205
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
UNCOV
206
{
×
UNCOV
207
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json };
×
UNCOV
208
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
209
    if (res.data) {
×
UNCOV
210
        DatajsonUnlockData(res.data);
×
UNCOV
211
        return res.is_new ? 1 : 0;
×
UNCOV
212
    }
×
213
    return -1;
×
UNCOV
214
}
×
215

216
static int DatajsonAddMd5(
217
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
UNCOV
218
{
×
UNCOV
219
    if (data_len != SC_MD5_LEN)
×
220
        return -2;
×
221

UNCOV
222
    Md5Type lookup = { .json = *json };
×
UNCOV
223
    memcpy(lookup.md5, data, SC_MD5_LEN);
×
UNCOV
224
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
225
    if (res.data) {
×
UNCOV
226
        DatajsonUnlockData(res.data);
×
UNCOV
227
        return res.is_new ? 1 : 0;
×
UNCOV
228
    }
×
229
    return -1;
×
UNCOV
230
}
×
231

232
static int DatajsonAddSha256(
233
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
UNCOV
234
{
×
UNCOV
235
    if (data_len != SC_SHA256_LEN)
×
236
        return -2;
×
237

UNCOV
238
    Sha256Type lookup = { .json = *json };
×
UNCOV
239
    memcpy(lookup.sha256, data, SC_SHA256_LEN);
×
UNCOV
240
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
241
    if (res.data) {
×
UNCOV
242
        DatajsonUnlockData(res.data);
×
UNCOV
243
        return res.is_new ? 1 : 0;
×
UNCOV
244
    }
×
245
    return -1;
×
UNCOV
246
}
×
247

248
static int DatajsonAddIPv4(
249
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
UNCOV
250
{
×
UNCOV
251
    if (data_len < SC_IPV4_LEN)
×
252
        return -2;
×
253

UNCOV
254
    IPv4Type lookup = { .json = *json };
×
UNCOV
255
    memcpy(lookup.ipv4, data, SC_IPV4_LEN);
×
UNCOV
256
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
257
    if (res.data) {
×
UNCOV
258
        DatajsonUnlockData(res.data);
×
UNCOV
259
        return res.is_new ? 1 : 0;
×
UNCOV
260
    }
×
261
    return -1;
×
UNCOV
262
}
×
263

264
static int DatajsonAddIPv6(
265
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
UNCOV
266
{
×
UNCOV
267
    if (data_len != SC_IPV6_LEN)
×
268
        return -2;
×
269

UNCOV
270
    IPv6Type lookup = { .json = *json };
×
UNCOV
271
    memcpy(lookup.ipv6, data, SC_IPV6_LEN);
×
UNCOV
272
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
273
    if (res.data) {
×
UNCOV
274
        DatajsonUnlockData(res.data);
×
UNCOV
275
        return res.is_new ? 1 : 0;
×
UNCOV
276
    }
×
277
    return -1;
×
UNCOV
278
}
×
279

280
/*
281
 * \brief Add data to the dataset from a JSON object.
282
 *
283
 * \param set The dataset to add data to.
284
 * \param data The data to add.
285
 * \param data_len The length of the data.
286
 * \param json The JSON object containing additional information.
287
 *
288
 * Memory allocated for the `json` parameter will be freed if the data
289
 * is not added to the hash.
290
 *
291
 * \retval 1 Data was added to the hash.
292
 * \retval 0 Data was not added to the hash as it is already there.
293
 * \retval -1 Failed to add data to the hash.
294
 */
295
static int DatajsonAdd(
296
        Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json)
UNCOV
297
{
×
UNCOV
298
    if (json == NULL)
×
299
        return -1;
×
UNCOV
300
    if (json->value == NULL)
×
301
        return -1;
×
302

UNCOV
303
    if (set == NULL) {
×
304
        if (json->value != NULL) {
×
305
            SCFree(json->value);
×
306
            json->value = NULL;
×
307
        }
×
308
        return -1;
×
309
    }
×
310

UNCOV
311
    int add_ret = 0;
×
UNCOV
312
    switch (set->type) {
×
UNCOV
313
        case DATASET_TYPE_STRING:
×
UNCOV
314
            add_ret = DatajsonAddString(set, data, data_len, json);
×
UNCOV
315
            break;
×
UNCOV
316
        case DATASET_TYPE_MD5:
×
UNCOV
317
            add_ret = DatajsonAddMd5(set, data, data_len, json);
×
UNCOV
318
            break;
×
UNCOV
319
        case DATASET_TYPE_SHA256:
×
UNCOV
320
            add_ret = DatajsonAddSha256(set, data, data_len, json);
×
UNCOV
321
            break;
×
UNCOV
322
        case DATASET_TYPE_IPV4:
×
UNCOV
323
            add_ret = DatajsonAddIPv4(set, data, data_len, json);
×
UNCOV
324
            break;
×
UNCOV
325
        case DATASET_TYPE_IPV6:
×
UNCOV
326
            add_ret = DatajsonAddIPv6(set, data, data_len, json);
×
UNCOV
327
            break;
×
328
        default:
×
329
            add_ret = -1;
×
330
            break;
×
UNCOV
331
    }
×
332

UNCOV
333
    SCFree(json->value);
×
UNCOV
334
    json->value = NULL;
×
335

UNCOV
336
    return add_ret;
×
UNCOV
337
}
×
338

339
static int DatajsonLoadTypeFromJSON(Dataset *set, char *json_key, char *array_key,
340
        uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
341
{
25✔
342
    if (strlen(set->load) == 0)
25✔
343
        return 0;
×
344

345
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
25✔
346

347
    uint32_t cnt = 0;
25✔
348
    json_t *json;
25✔
349
    bool found = false;
25✔
350
    SCLogDebug("dataset: array_key '%s' %p", array_key, array_key);
25✔
351
    if (ParseJsonFile(set->load, &json, array_key) == -1) {
25✔
352
        SCLogError("dataset: %s failed to parse from '%s'", set->name, set->load);
25✔
353
        return -1;
25✔
354
    }
25✔
355

UNCOV
356
    size_t index;
×
UNCOV
357
    json_t *value;
×
UNCOV
358
    json_array_foreach (json, index, value) {
×
UNCOV
359
        cnt += DatajsonAddTypeElement(set, value, json_key, &found);
×
UNCOV
360
    }
×
UNCOV
361
    json_decref(json);
×
362

UNCOV
363
    if (found == false) {
×
364
        FatalErrorOnInit(
×
365
                "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
×
366
        return -1;
×
367
    }
×
UNCOV
368
    THashConsolidateMemcap(set->hash);
×
369

UNCOV
370
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
×
UNCOV
371
    return 0;
×
UNCOV
372
}
×
373

374
static uint32_t DatajsonLoadTypeFromJsonline(Dataset *set, char *json_key,
375
        uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
376
{
4✔
377
    uint32_t cnt = 0;
4✔
378
    FILE *fp = fopen(set->load, "r");
4✔
379
    bool found = false;
4✔
380

381
    if (fp == NULL) {
4✔
382
        SCLogError("dataset: %s failed to open file '%s'", set->name, set->load);
4✔
383
        return 0;
4✔
384
    }
4✔
385

UNCOV
386
    char line[DATAJSON_JSON_LENGTH];
×
UNCOV
387
    while (fgets(line, sizeof(line), fp) != NULL) {
×
UNCOV
388
        json_t *json = json_loads(line, 0, NULL);
×
UNCOV
389
        if (json == NULL) {
×
390
            SCLogError("dataset: %s failed to parse line '%s'", set->name, line);
×
391
            goto out_err;
×
392
        }
×
UNCOV
393
        cnt += DatajsonAddTypeElement(set, json, json_key, &found);
×
UNCOV
394
        json_decref(json);
×
UNCOV
395
    }
×
UNCOV
396
    int close_op = fclose(fp);
×
UNCOV
397
    if (close_op != 0) {
×
398
        SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
×
399
        return 0;
×
400
    }
×
401

UNCOV
402
    if (found == false) {
×
403
        FatalErrorOnInit(
×
404
                "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
×
405
        return 0;
×
406
    }
×
UNCOV
407
    return cnt;
×
408
out_err:
×
409
    close_op = fclose(fp);
×
410
    if (close_op != 0) {
×
411
        SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
×
412
    }
×
413
    return 0;
×
UNCOV
414
}
×
415

416
static uint32_t DatajsonAddStringElement(Dataset *set, json_t *value, char *json_key, bool *found)
UNCOV
417
{
×
UNCOV
418
    json_t *key = GetSubObjectByKey(value, json_key);
×
UNCOV
419
    if (key == NULL) {
×
420
        /* ignore error as it can be a working mode where some entries
421
           are not in the same format */
UNCOV
422
        return 0;
×
UNCOV
423
    }
×
424

UNCOV
425
    *found = true;
×
426

UNCOV
427
    const char *val_key = json_string_value(key);
×
UNCOV
428
    if (val_key == NULL) {
×
429
        FatalErrorOnInit("dataset: %s failed to get value for key '%s'", set->name, json_key);
×
430
        return 0;
×
431
    }
×
UNCOV
432
    size_t val_len = strlen(val_key);
×
433

UNCOV
434
    json_incref(key);
×
UNCOV
435
    int ret = DatajsonSetValue(set, (const uint8_t *)val_key, (uint16_t)val_len, value, json_key);
×
UNCOV
436
    json_decref(key);
×
UNCOV
437
    if (ret < 0) {
×
438
        FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
×
439
        return 0;
×
440
    }
×
UNCOV
441
    return ret;
×
UNCOV
442
}
×
443

444
static int DatajsonLoadString(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
445
{
21✔
446
    if (strlen(set->load) == 0)
21✔
447
        return 0;
5✔
448

449
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
16✔
450

451
    uint32_t cnt = 0;
16✔
452
    if (format == DATASET_FORMAT_JSON) {
16✔
453
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddStringElement);
14✔
454
    } else if (format == DATASET_FORMAT_NDJSON) {
14✔
455
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddStringElement);
2✔
456
    }
2✔
457
    THashConsolidateMemcap(set->hash);
16✔
458

459
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
16✔
460
    return 0;
16✔
461
}
21✔
462

463
static uint32_t DatajsonAddMd5Element(Dataset *set, json_t *value, char *json_key, bool *found)
UNCOV
464
{
×
UNCOV
465
    json_t *key = GetSubObjectByKey(value, json_key);
×
UNCOV
466
    if (key == NULL) {
×
467
        /* ignore error as it can be a working mode where some entries
468
           are not in the same format */
469
        return 0;
×
470
    }
×
471

UNCOV
472
    *found = true;
×
473

UNCOV
474
    const char *hash_string = json_string_value(key);
×
UNCOV
475
    if (strlen(hash_string) != SC_MD5_HEX_LEN) {
×
476
        FatalErrorOnInit("Not correct length for a hash");
×
477
        return 0;
×
478
    }
×
479

UNCOV
480
    uint8_t hash[SC_MD5_LEN];
×
UNCOV
481
    if (HexToRaw((const uint8_t *)hash_string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) {
×
482
        FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
×
483
        return 0;
×
484
    }
×
UNCOV
485
    return DatajsonSetValue(set, hash, SC_MD5_LEN, value, json_key);
×
UNCOV
486
}
×
487

488
static int DatajsonLoadMd5(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
489
{
4✔
490
    if (strlen(set->load) == 0)
4✔
491
        return 0;
1✔
492

493
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
3✔
494

495
    uint32_t cnt = 0;
3✔
496
    if (format == DATASET_FORMAT_JSON) {
3✔
497
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddMd5Element);
2✔
498
    } else if (format == DATASET_FORMAT_NDJSON) {
2✔
499
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddMd5Element);
1✔
500
    }
1✔
501
    THashConsolidateMemcap(set->hash);
3✔
502

503
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
3✔
504
    return 0;
3✔
505
}
4✔
506

507
static uint32_t DatajsonAddSha256Element(Dataset *set, json_t *value, char *json_key, bool *found)
UNCOV
508
{
×
UNCOV
509
    json_t *key = GetSubObjectByKey(value, json_key);
×
UNCOV
510
    if (key == NULL) {
×
511
        /* ignore error as it can be a working mode where some entries
512
           are not in the same format */
513
        return 0;
×
514
    }
×
515

UNCOV
516
    *found = true;
×
517

UNCOV
518
    const char *hash_string = json_string_value(key);
×
UNCOV
519
    if (strlen(hash_string) != SC_SHA256_HEX_LEN) {
×
520
        FatalErrorOnInit("Not correct length for a hash");
×
521
        return 0;
×
522
    }
×
523

UNCOV
524
    uint8_t hash[SC_SHA256_LEN];
×
UNCOV
525
    if (HexToRaw((const uint8_t *)hash_string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) {
×
526
        FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
×
527
        return 0;
×
528
    }
×
529

UNCOV
530
    return DatajsonSetValue(set, hash, SC_SHA256_LEN, value, json_key);
×
UNCOV
531
}
×
532

533
static int DatajsonLoadSha256(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
534
{
1✔
535
    if (strlen(set->load) == 0)
1✔
536
        return 0;
×
537

538
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
1✔
539

540
    uint32_t cnt = 0;
1✔
541
    if (format == DATASET_FORMAT_JSON) {
1✔
542
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddSha256Element);
1✔
543
    } else if (format == DATASET_FORMAT_NDJSON) {
1✔
544
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddSha256Element);
×
545
    }
×
546
    THashConsolidateMemcap(set->hash);
1✔
547

548
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
1✔
549
    return 0;
1✔
550
}
1✔
551

552
static uint32_t DatajsonAddIpv4Element(Dataset *set, json_t *value, char *json_key, bool *found)
UNCOV
553
{
×
UNCOV
554
    json_t *key = GetSubObjectByKey(value, json_key);
×
UNCOV
555
    if (key == NULL) {
×
556
        /* ignore error as it can be a working mode where some entries
557
           are not in the same format */
558
        return 0;
×
559
    }
×
560

UNCOV
561
    *found = true;
×
562

UNCOV
563
    const char *ip_string = json_string_value(key);
×
UNCOV
564
    struct in_addr in;
×
UNCOV
565
    if (inet_pton(AF_INET, ip_string, &in) != 1) {
×
566
        FatalErrorOnInit("datajson IPv4 parse failed %s/%s: %s", set->name, set->load, ip_string);
×
567
        return 0;
×
568
    }
×
569

UNCOV
570
    return DatajsonSetValue(set, (const uint8_t *)&in.s_addr, SC_IPV4_LEN, value, json_key);
×
UNCOV
571
}
×
572

573
static int DatajsonLoadIPv4(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
574
{
3✔
575
    if (strlen(set->load) == 0)
3✔
576
        return 0;
×
577

578
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
3✔
579
    uint32_t cnt = 0;
3✔
580

581
    if (format == DATASET_FORMAT_JSON) {
3✔
582
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIpv4Element);
3✔
583
    } else if (format == DATASET_FORMAT_NDJSON) {
3✔
584
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIpv4Element);
×
585
    }
×
586
    THashConsolidateMemcap(set->hash);
3✔
587

588
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
3✔
589
    return 0;
3✔
590
}
3✔
591

592
static uint32_t DatajsonAddIPv6Element(Dataset *set, json_t *value, char *json_key, bool *found)
UNCOV
593
{
×
UNCOV
594
    json_t *key = GetSubObjectByKey(value, json_key);
×
UNCOV
595
    if (key == NULL) {
×
596
        /* ignore error as it can be a working mode where some entries
597
           are not in the same format */
598
        return 0;
×
599
    }
×
600

UNCOV
601
    *found = true;
×
602

UNCOV
603
    const char *ip_string = json_string_value(key);
×
UNCOV
604
    struct in6_addr in6;
×
UNCOV
605
    int ret = DatasetParseIpv6String(set, ip_string, &in6);
×
UNCOV
606
    if (ret < 0) {
×
607
        FatalErrorOnInit("unable to parse IP address");
×
608
        return 0;
×
609
    }
×
610

UNCOV
611
    return DatajsonSetValue(set, (const uint8_t *)&in6.s6_addr, SC_IPV6_LEN, value, json_key);
×
UNCOV
612
}
×
613

614
static int DatajsonLoadIPv6(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
615
{
6✔
616
    if (strlen(set->load) == 0)
6✔
617
        return 0;
×
618

619
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
6✔
620

621
    uint32_t cnt = 0;
6✔
622

623
    if (format == DATASET_FORMAT_JSON) {
6✔
624
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIPv6Element);
5✔
625
    } else if (format == DATASET_FORMAT_NDJSON) {
5✔
626
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIPv6Element);
1✔
627
    }
1✔
628

629
    THashConsolidateMemcap(set->hash);
6✔
630

631
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
6✔
632
    return 0;
6✔
633
}
6✔
634

635
Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap,
636
        uint32_t hashsize, char *json_key_value, char *json_array_key, DatasetFormats format,
637
        bool remove_key)
638
{
302✔
639
    Dataset *set = NULL;
302✔
640

641
    DatasetLock();
302✔
642
    int ret = DatasetGetOrCreate(name, type, NULL, load, &memcap, &hashsize, &set);
302✔
643
    if (ret < 0) {
302✔
644
        SCLogError("dataset with JSON %s creation failed", name);
6✔
645
        DatasetUnlock();
6✔
646
        return NULL;
6✔
647
    }
6✔
648
    if (ret == 1) {
296✔
649
        SCLogDebug("dataset %s already exists", name);
261✔
650
        if (set->remove_key != remove_key) {
261✔
651
            SCLogError("dataset %s remove_key mismatch: %d != %d", set->name, set->remove_key,
1✔
652
                    remove_key);
1✔
653
            DatasetUnlock();
1✔
654
            return NULL;
1✔
655
        }
1✔
656
        DatasetUnlock();
260✔
657
        return set;
260✔
658
    }
261✔
659

660
    set->remove_key = remove_key;
35✔
661

662
    char cnf_name[128];
35✔
663
    snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
35✔
664
    switch (type) {
35✔
665
        case DATASET_TYPE_MD5:
4✔
666
            set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrJsonSet, Md5StrJsonFree,
4✔
667
                    Md5StrHash, Md5StrCompare, NULL, Md5StrJsonGetLength, load != NULL ? 1 : 0,
4✔
668
                    memcap, hashsize);
4✔
669
            if (set->hash == NULL)
4✔
670
                goto out_err;
×
671
            if (DatajsonLoadMd5(set, json_key_value, json_array_key, format) < 0)
4✔
672
                goto out_err;
×
673
            break;
4✔
674
        case DATASET_TYPE_STRING:
21✔
675
            set->hash = THashInit(cnf_name, sizeof(StringType), StringJsonSet, StringJsonFree,
21✔
676
                    StringHash, StringCompare, NULL, StringJsonGetLength, load != NULL ? 1 : 0,
21✔
677
                    memcap, hashsize);
21✔
678
            if (set->hash == NULL)
21✔
679
                goto out_err;
×
680
            if (DatajsonLoadString(set, json_key_value, json_array_key, format) < 0) {
21✔
681
                SCLogError("dataset %s loading failed", name);
×
682
                goto out_err;
×
683
            }
×
684
            break;
21✔
685
        case DATASET_TYPE_SHA256:
21✔
686
            set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrJsonSet, Sha256StrJsonFree,
1✔
687
                    Sha256StrHash, Sha256StrCompare, NULL, Sha256StrJsonGetLength,
1✔
688
                    load != NULL ? 1 : 0, memcap, hashsize);
1✔
689
            if (set->hash == NULL)
1✔
690
                goto out_err;
×
691
            if (DatajsonLoadSha256(set, json_key_value, json_array_key, format) < 0)
1✔
692
                goto out_err;
×
693
            break;
1✔
694
        case DATASET_TYPE_IPV4:
3✔
695
            set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4JsonSet, IPv4JsonFree, IPv4Hash,
3✔
696
                    IPv4Compare, NULL, IPv4JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
3✔
697
            if (set->hash == NULL)
3✔
698
                goto out_err;
×
699
            if (DatajsonLoadIPv4(set, json_key_value, json_array_key, format) < 0)
3✔
700
                goto out_err;
×
701
            break;
3✔
702
        case DATASET_TYPE_IPV6:
6✔
703
            set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6JsonSet, IPv6JsonFree, IPv6Hash,
6✔
704
                    IPv6Compare, NULL, IPv6JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
6✔
705
            if (set->hash == NULL)
6✔
706
                goto out_err;
×
707
            if (DatajsonLoadIPv6(set, json_key_value, json_array_key, format) < 0)
6✔
708
                goto out_err;
×
709
            break;
6✔
710
    }
35✔
711

712
    SCLogDebug(
35✔
713
            "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
35✔
714

715
    if (DatasetAppendSet(set) < 0) {
35✔
716
        SCLogError("dataset %s append failed", name);
×
717
        goto out_err;
×
718
    }
×
719

720
    DatasetUnlock();
35✔
721
    return set;
35✔
722
out_err:
×
723
    if (set->hash) {
×
724
        THashShutdown(set->hash);
×
725
    }
×
726
    SCFree(set);
×
727
    DatasetUnlock();
×
728
    return NULL;
×
729
}
35✔
730

731
static DataJsonResultType DatajsonLookupString(
732
        Dataset *set, const uint8_t *data, const uint32_t data_len)
UNCOV
733
{
×
UNCOV
734
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
×
735

UNCOV
736
    if (set == NULL)
×
737
        return rrep;
×
738

UNCOV
739
    StringType lookup = {
×
UNCOV
740
        .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0
×
UNCOV
741
    };
×
UNCOV
742
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
UNCOV
743
    if (rdata) {
×
UNCOV
744
        StringType *found = rdata->data;
×
UNCOV
745
        rrep.found = true;
×
UNCOV
746
        rrep.json = found->json;
×
UNCOV
747
        rrep.hashdata = rdata;
×
UNCOV
748
        return rrep;
×
UNCOV
749
    }
×
750
    return rrep;
×
UNCOV
751
}
×
752

753
static DataJsonResultType DatajsonLookupMd5(
754
        Dataset *set, const uint8_t *data, const uint32_t data_len)
755
{
1✔
756
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
1✔
757

758
    if (set == NULL)
1✔
759
        return rrep;
×
760

761
    if (data_len != SC_MD5_LEN)
1✔
762
        return rrep;
×
763

764
    Md5Type lookup = { .json.value = NULL, .json.len = 0 };
1✔
765
    memcpy(lookup.md5, data, data_len);
1✔
766
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1✔
767
    if (rdata) {
1✔
UNCOV
768
        Md5Type *found = rdata->data;
×
UNCOV
769
        rrep.found = true;
×
UNCOV
770
        rrep.json = found->json;
×
UNCOV
771
        rrep.hashdata = rdata;
×
UNCOV
772
        return rrep;
×
UNCOV
773
    }
×
774
    return rrep;
1✔
775
}
1✔
776

777
static DataJsonResultType DatajsonLookupSha256(
778
        Dataset *set, const uint8_t *data, const uint32_t data_len)
779
{
1✔
780
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
1✔
781

782
    if (set == NULL)
1✔
783
        return rrep;
×
784

785
    if (data_len != SC_SHA256_LEN)
1✔
786
        return rrep;
×
787

788
    Sha256Type lookup = { .json.value = NULL, .json.len = 0 };
1✔
789
    memcpy(lookup.sha256, data, data_len);
1✔
790
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1✔
791
    if (rdata) {
1✔
UNCOV
792
        Sha256Type *found = rdata->data;
×
UNCOV
793
        rrep.found = true;
×
UNCOV
794
        rrep.json = found->json;
×
UNCOV
795
        rrep.hashdata = rdata;
×
UNCOV
796
        return rrep;
×
UNCOV
797
    }
×
798
    return rrep;
1✔
799
}
1✔
800

801
static DataJsonResultType DatajsonLookupIPv4(
802
        Dataset *set, const uint8_t *data, const uint32_t data_len)
803
{
1✔
804
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
1✔
805

806
    if (set == NULL)
1✔
807
        return rrep;
×
808

809
    if (data_len != SC_IPV4_LEN)
1✔
810
        return rrep;
×
811

812
    IPv4Type lookup = { .json.value = NULL, .json.len = 0 };
1✔
813
    memcpy(lookup.ipv4, data, data_len);
1✔
814
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1✔
815
    if (rdata) {
1✔
UNCOV
816
        IPv4Type *found = rdata->data;
×
UNCOV
817
        rrep.found = true;
×
UNCOV
818
        rrep.json = found->json;
×
UNCOV
819
        rrep.hashdata = rdata;
×
UNCOV
820
        return rrep;
×
UNCOV
821
    }
×
822
    return rrep;
1✔
823
}
1✔
824

825
static DataJsonResultType DatajsonLookupIPv6(
826
        Dataset *set, const uint8_t *data, const uint32_t data_len)
827
{
9✔
828
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
9✔
829

830
    if (set == NULL)
9✔
831
        return rrep;
×
832

833
    /* We can have IPv4 or IPV6 here due to ip.src and ip.dst implementation */
834
    if (data_len != SC_IPV6_LEN && data_len != SC_IPV4_LEN)
9✔
835
        return rrep;
×
836

837
    IPv6Type lookup = { .json.value = NULL, .json.len = 0 };
9✔
838
    memcpy(lookup.ipv6, data, data_len);
9✔
839
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
9✔
840
    if (rdata) {
9✔
UNCOV
841
        IPv6Type *found = rdata->data;
×
UNCOV
842
        rrep.found = true;
×
UNCOV
843
        rrep.json = found->json;
×
UNCOV
844
        rrep.hashdata = rdata;
×
UNCOV
845
        return rrep;
×
UNCOV
846
    }
×
847
    return rrep;
9✔
848
}
9✔
849

850
DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
851
{
12✔
852
    DataJsonResultType rrep = { .found = false, .json = { .value = 0 } };
12✔
853
    if (set == NULL)
12✔
854
        return rrep;
×
855

856
    switch (set->type) {
12✔
UNCOV
857
        case DATASET_TYPE_STRING:
×
UNCOV
858
            return DatajsonLookupString(set, data, data_len);
×
859
        case DATASET_TYPE_MD5:
1✔
860
            return DatajsonLookupMd5(set, data, data_len);
1✔
861
        case DATASET_TYPE_SHA256:
1✔
862
            return DatajsonLookupSha256(set, data, data_len);
1✔
863
        case DATASET_TYPE_IPV4:
1✔
864
            return DatajsonLookupIPv4(set, data, data_len);
1✔
865
        case DATASET_TYPE_IPV6:
9✔
866
            return DatajsonLookupIPv6(set, data, data_len);
9✔
867
        default:
×
868
            break;
×
869
    }
12✔
870
    return rrep;
×
871
}
12✔
872

873
/** \brief add serialized data to json set
874
 *  \retval int 1 added
875
 *  \retval int 0 already in hash
876
 *  \retval int -1 API error (not added)
877
 *  \retval int -2 DATA error
878
 */
879
int DatajsonAddSerialized(Dataset *set, const char *value, const char *json)
880
{
×
881
    if (set == NULL)
×
882
        return -1;
×
883

884
    if (strlen(value) == 0)
×
885
        return -1;
×
886

887
    DataJsonType jvalue = { .value = NULL, .len = 0 };
×
888
    if (json) {
×
889
        if (ParseJsonLine(json, strlen(json), &jvalue) < 0) {
×
890
            SCLogNotice("bad json value for dataset %s/%s", set->name, set->load);
×
891
            return -1;
×
892
        }
×
893
    }
×
894

895
    int ret = -1;
×
896
    switch (set->type) {
×
897
        case DATASET_TYPE_STRING: {
×
898
            if (strlen(value) > UINT16_MAX) {
×
899
                // size check before stack allocation
900
                // should never happen as unix socket callers limits it to 4k
901
                SCFree(jvalue.value);
×
902
                return -1;
×
903
            }
×
904
            uint32_t decoded_size = SCBase64DecodeBufferSize((uint32_t)strlen(value));
×
905
            uint8_t decoded[decoded_size];
×
906
            uint32_t num_decoded = SCBase64Decode(
×
907
                    (const uint8_t *)value, strlen(value), SCBase64ModeStrict, decoded);
×
908
            if (num_decoded == 0)
×
909
                goto operror;
×
910
            ret = DatajsonAdd(set, decoded, num_decoded, &jvalue);
×
911
            break;
×
912
        }
×
913
        case DATASET_TYPE_MD5: {
×
914
            if (strlen(value) != SC_MD5_HEX_LEN)
×
915
                goto operror;
×
916
            uint8_t hash[SC_MD5_LEN];
×
917
            if (HexToRaw((const uint8_t *)value, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0)
×
918
                goto operror;
×
919
            ret = DatajsonAdd(set, hash, SC_MD5_LEN, &jvalue);
×
920
            break;
×
921
        }
×
922
        case DATASET_TYPE_SHA256: {
×
923
            if (strlen(value) != SC_SHA256_HEX_LEN)
×
924
                goto operror;
×
925
            uint8_t hash[SC_SHA256_LEN];
×
926
            if (HexToRaw((const uint8_t *)value, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0)
×
927
                goto operror;
×
928
            ret = DatajsonAdd(set, hash, SC_SHA256_LEN, &jvalue);
×
929
            break;
×
930
        }
×
931
        case DATASET_TYPE_IPV4: {
×
932
            struct in_addr in;
×
933
            if (inet_pton(AF_INET, value, &in) != 1)
×
934
                goto operror;
×
935
            ret = DatajsonAdd(set, (uint8_t *)&in.s_addr, SC_IPV4_LEN, &jvalue);
×
936
            break;
×
937
        }
×
938
        case DATASET_TYPE_IPV6: {
×
939
            struct in6_addr in6;
×
940
            if (DatasetParseIpv6String(set, value, &in6) != 0) {
×
941
                SCLogError("Dataset failed to import %s as IPv6", value);
×
942
                goto operror;
×
943
            }
×
944
            ret = DatajsonAdd(set, (uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &jvalue);
×
945
            break;
×
946
        }
×
947
    }
×
948
    SCFree(jvalue.value);
×
949
    return ret;
×
950
operror:
×
951
    SCFree(jvalue.value);
×
952
    return -2;
×
953
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc