• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OISF / suricata / 22618661228

02 Mar 2026 09:33PM UTC coverage: 42.258% (-34.4%) from 76.611%
22618661228

push

github

victorjulien
github-actions: bump actions/download-artifact from 7.0.0 to 8.0.0

Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 7.0.0 to 8.0.0.
- [Release notes](https://github.com/actions/download-artifact/releases)
- [Commits](https://github.com/actions/download-artifact/compare/37930b1c2...70fc10c6e)

---
updated-dependencies:
- dependency-name: actions/download-artifact
  dependency-version: 8.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

91511 of 216553 relevant lines covered (42.26%)

3416852.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/datasets-context-json.c
1
/* Copyright (C) 2025 Open Information Security Foundation
2
 *
3
 * You can copy, redistribute or modify this Program under the terms of
4
 * the GNU General Public License version 2 as published by the Free
5
 * Software Foundation.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * version 2 along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15
 * 02110-1301, USA.
16
 */
17

18
/**
19
 * \file
20
 *
21
 * \author Eric Leblond <el@stamus-networks.com>
22
 */
23

24
#include "suricata-common.h"
25
#include "suricata.h"
26
#include "rust.h"
27
#include "datasets.h"
28
#include "datasets-context-json.h"
29
#include "datasets-ipv4.h"
30
#include "datasets-ipv6.h"
31
#include "datasets-md5.h"
32
#include "datasets-sha256.h"
33
#include "datasets-string.h"
34
#include "util-byte.h"
35
#include "util-ip.h"
36
#include "util-debug.h"
37

38
static int DatajsonAdd(
39
        Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json);
40

41
static inline void DatajsonUnlockData(THashData *d)
42
{
×
43
    (void)THashDecrUsecnt(d);
×
44
    THashDataUnlock(d);
×
45
}
×
46

47
void DatajsonUnlockElt(DataJsonResultType *r)
48
{
×
49
    if (r->hashdata) {
×
50
        DatajsonUnlockData(r->hashdata);
×
51
    }
×
52
}
×
53

54
int DatajsonCopyJson(DataJsonType *dst, DataJsonType *src)
55
{
×
56
    dst->len = src->len;
×
57
    dst->value = SCMalloc(dst->len + 1);
×
58
    if (dst->value == NULL)
×
59
        return -1;
×
60
    memcpy(dst->value, src->value, dst->len);
×
61
    dst->value[dst->len] = '\0'; // Ensure null-termination
×
62
    return 0;
×
63
}
×
64

65
/* return true if number is a float or an integer */
66
static bool IsFloat(const char *in, size_t ins)
67
{
×
68
    char *endptr;
×
69
    float val = strtof(in, &endptr);
×
70
    const char *end_ins = in + ins - 1;
×
71
    if (val != 0 && (endptr == end_ins)) {
×
72
        return true;
×
73
    }
×
74
    /* if value is 0 then we need to check if some parsing has been done */
75
    if (val == 0 && (endptr == in)) {
×
76
        return false;
×
77
    }
×
78
    return true;
×
79
}
×
80

81
static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out)
82
{
×
83
    if (ins > DATAJSON_JSON_LENGTH) {
×
84
        SCLogError("dataset: json string too long: %s", in);
×
85
        return -1;
×
86
    }
×
87

88
    json_error_t jerror;
×
89
    json_t *msg = json_loads(in, 0, &jerror);
×
90
    if (msg == NULL) {
×
91
        /* JANSSON does not see an integer, float or a string as valid JSON.
92
           So we need to exclude them from failure. */
93
        if (!IsFloat(in, ins) && !((in[0] == '"') && (in[ins - 1] == '"'))) {
×
94
            SCLogError("dataset: Invalid json: %s: '%s'", jerror.text, in);
×
95
            return -1;
×
96
        }
×
97
    } else {
×
98
        json_decref(msg);
×
99
    }
×
100
    rep_out->len = (uint16_t)ins;
×
101
    rep_out->value = SCStrndup(in, ins);
×
102
    if (rep_out->value == NULL) {
×
103
        return -1;
×
104
    }
×
105
    return 0;
×
106
}
×
107

108
static json_t *GetSubObjectByKey(json_t *json, const char *key)
109
{
×
110
    if (!json || !key || !json_is_object(json)) {
×
111
        return NULL;
×
112
    }
×
113
    if (strlen(key) > SIG_JSON_CONTENT_KEY_LEN) {
×
114
        DEBUG_VALIDATE_BUG_ON(strlen(key) > SIG_JSON_CONTENT_KEY_LEN);
×
115
        return NULL;
×
116
    }
×
117

118
    const char *current_key = key;
×
119
    json_t *current = json;
×
120
    while (current_key) {
×
121
        const char *dot = strchr(current_key, '.');
×
122

123
        size_t key_len = dot ? (size_t)(dot - current_key) : strlen(current_key);
×
124
        char key_buffer[key_len + 1];
×
125
        strlcpy(key_buffer, current_key, key_len + 1);
×
126

127
        if (json_is_object(current) == false) {
×
128
            return NULL;
×
129
        }
×
130
        current = json_object_get(current, key_buffer);
×
131
        if (current == NULL) {
×
132
            return NULL;
×
133
        }
×
134
        current_key = dot ? dot + 1 : NULL;
×
135
    }
×
136
    return current;
×
137
}
×
138

139
static int ParseJsonFile(const char *file, json_t **array, char *key)
140
{
×
141
    json_t *json;
×
142
    json_error_t error;
×
143
    /* assume we have one single JSON element in FILE */
144
    json = json_load_file(file, 0, &error);
×
145
    if (json == NULL) {
×
146
        FatalErrorOnInit("can't load JSON, error on line %d: %s", error.line, error.text);
×
147
        return -1;
×
148
    }
×
149

150
    if (key == NULL || strlen(key) == 0) {
×
151
        *array = json;
×
152
    } else {
×
153
        *array = GetSubObjectByKey(json, key);
×
154
        if (*array == NULL) {
×
155
            SCLogError("dataset: %s failed to get key '%s'", file, key);
×
156
            json_decref(json);
×
157
            return -1;
×
158
        }
×
159
        json_incref(*array);
×
160
        json_decref(json);
×
161
    }
×
162
    if (!json_is_array(*array)) {
×
163
        FatalErrorOnInit("not an array");
×
164
        json_decref(*array);
×
165
        return -1;
×
166
    }
×
167
    return 0;
×
168
}
×
169

170
static int DatajsonSetValue(
171
        Dataset *set, const uint8_t *val, uint16_t val_len, json_t *value, const char *json_key)
172
{
×
173
    DataJsonType elt = { .value = NULL, .len = 0 };
×
174
    if (set->remove_key) {
×
175
        json_object_del(value, json_key);
×
176
    }
×
177

178
    elt.value = json_dumps(value, JSON_COMPACT);
×
179
    if (elt.value == NULL) {
×
180
        FatalErrorOnInit("json_dumps failed for %s/%s", set->name, set->load);
×
181
        return 0;
×
182
    }
×
183
    if (strlen(elt.value) > DATAJSON_JSON_LENGTH) {
×
184
        SCLogError("dataset: json string too long: %s/%s", set->name, set->load);
×
185
        SCFree(elt.value);
×
186
        elt.value = NULL;
×
187
        return 0;
×
188
    }
×
189
    elt.len = (uint16_t)strlen(elt.value);
×
190

191
    int add_ret = DatajsonAdd(set, val, val_len, &elt);
×
192
    if (add_ret < 0) {
×
193
        FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
×
194
        return 0;
×
195
    }
×
196
    return add_ret;
×
197
}
×
198

199
/**
200
 *  \retval 1 data was added to the hash
201
 *  \retval 0 data was not added to the hash as it is already there
202
 *  \retval -1 failed to add data to the hash
203
 */
204
static int DatajsonAddString(
205
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
206
{
×
207
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json };
×
208
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
209
    if (res.data) {
×
210
        DatajsonUnlockData(res.data);
×
211
        return res.is_new ? 1 : 0;
×
212
    }
×
213
    return -1;
×
214
}
×
215

216
static int DatajsonAddMd5(
217
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
218
{
×
219
    if (data_len != SC_MD5_LEN)
×
220
        return -2;
×
221

222
    Md5Type lookup = { .json = *json };
×
223
    memcpy(lookup.md5, data, SC_MD5_LEN);
×
224
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
225
    if (res.data) {
×
226
        DatajsonUnlockData(res.data);
×
227
        return res.is_new ? 1 : 0;
×
228
    }
×
229
    return -1;
×
230
}
×
231

232
static int DatajsonAddSha256(
233
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
234
{
×
235
    if (data_len != SC_SHA256_LEN)
×
236
        return -2;
×
237

238
    Sha256Type lookup = { .json = *json };
×
239
    memcpy(lookup.sha256, data, SC_SHA256_LEN);
×
240
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
241
    if (res.data) {
×
242
        DatajsonUnlockData(res.data);
×
243
        return res.is_new ? 1 : 0;
×
244
    }
×
245
    return -1;
×
246
}
×
247

248
static int DatajsonAddIPv4(
249
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
250
{
×
251
    if (data_len < SC_IPV4_LEN)
×
252
        return -2;
×
253

254
    IPv4Type lookup = { .json = *json };
×
255
    memcpy(lookup.ipv4, data, SC_IPV4_LEN);
×
256
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
257
    if (res.data) {
×
258
        DatajsonUnlockData(res.data);
×
259
        return res.is_new ? 1 : 0;
×
260
    }
×
261
    return -1;
×
262
}
×
263

264
static int DatajsonAddIPv6(
265
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
266
{
×
267
    if (data_len != SC_IPV6_LEN)
×
268
        return -2;
×
269

270
    IPv6Type lookup = { .json = *json };
×
271
    memcpy(lookup.ipv6, data, SC_IPV6_LEN);
×
272
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
273
    if (res.data) {
×
274
        DatajsonUnlockData(res.data);
×
275
        return res.is_new ? 1 : 0;
×
276
    }
×
277
    return -1;
×
278
}
×
279

280
/*
281
 * \brief Add data to the dataset from a JSON object.
282
 *
283
 * \param set The dataset to add data to.
284
 * \param data The data to add.
285
 * \param data_len The length of the data.
286
 * \param json The JSON object containing additional information.
287
 *
288
 * Memory allocated for the `json` parameter will be freed if the data
289
 * is not added to the hash.
290
 *
291
 * \retval 1 Data was added to the hash.
292
 * \retval 0 Data was not added to the hash as it is already there.
293
 * \retval -1 Failed to add data to the hash.
294
 */
295
static int DatajsonAdd(
296
        Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json)
297
{
×
298
    if (json == NULL)
×
299
        return -1;
×
300
    if (json->value == NULL)
×
301
        return -1;
×
302

303
    if (set == NULL) {
×
304
        if (json->value != NULL) {
×
305
            SCFree(json->value);
×
306
            json->value = NULL;
×
307
        }
×
308
        return -1;
×
309
    }
×
310

311
    int add_ret = 0;
×
312
    switch (set->type) {
×
313
        case DATASET_TYPE_STRING:
×
314
            add_ret = DatajsonAddString(set, data, data_len, json);
×
315
            break;
×
316
        case DATASET_TYPE_MD5:
×
317
            add_ret = DatajsonAddMd5(set, data, data_len, json);
×
318
            break;
×
319
        case DATASET_TYPE_SHA256:
×
320
            add_ret = DatajsonAddSha256(set, data, data_len, json);
×
321
            break;
×
322
        case DATASET_TYPE_IPV4:
×
323
            add_ret = DatajsonAddIPv4(set, data, data_len, json);
×
324
            break;
×
325
        case DATASET_TYPE_IPV6:
×
326
            add_ret = DatajsonAddIPv6(set, data, data_len, json);
×
327
            break;
×
328
        default:
×
329
            add_ret = -1;
×
330
            break;
×
331
    }
×
332

333
    SCFree(json->value);
×
334
    json->value = NULL;
×
335

336
    return add_ret;
×
337
}
×
338

339
static int DatajsonLoadTypeFromJSON(Dataset *set, char *json_key, char *array_key,
340
        uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
341
{
×
342
    if (strlen(set->load) == 0)
×
343
        return 0;
×
344

345
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
×
346

347
    uint32_t cnt = 0;
×
348
    json_t *json;
×
349
    bool found = false;
×
350
    SCLogDebug("dataset: array_key '%s' %p", array_key, array_key);
×
351
    if (ParseJsonFile(set->load, &json, array_key) == -1) {
×
352
        SCLogError("dataset: %s failed to parse from '%s'", set->name, set->load);
×
353
        return -1;
×
354
    }
×
355

356
    size_t index;
×
357
    json_t *value;
×
358
    json_array_foreach (json, index, value) {
×
359
        cnt += DatajsonAddTypeElement(set, value, json_key, &found);
×
360
    }
×
361
    json_decref(json);
×
362

363
    if (found == false) {
×
364
        FatalErrorOnInit(
×
365
                "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
×
366
        return -1;
×
367
    }
×
368
    THashConsolidateMemcap(set->hash);
×
369

370
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
×
371
    return 0;
×
372
}
×
373

374
static uint32_t DatajsonLoadTypeFromJsonline(Dataset *set, char *json_key,
375
        uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
376
{
×
377
    uint32_t cnt = 0;
×
378
    FILE *fp = fopen(set->load, "r");
×
379
    bool found = false;
×
380

381
    if (fp == NULL) {
×
382
        SCLogError("dataset: %s failed to open file '%s'", set->name, set->load);
×
383
        return 0;
×
384
    }
×
385

386
    char line[DATAJSON_JSON_LENGTH];
×
387
    while (fgets(line, sizeof(line), fp) != NULL) {
×
388
        json_t *json = json_loads(line, 0, NULL);
×
389
        if (json == NULL) {
×
390
            SCLogError("dataset: %s failed to parse line '%s'", set->name, line);
×
391
            goto out_err;
×
392
        }
×
393
        cnt += DatajsonAddTypeElement(set, json, json_key, &found);
×
394
        json_decref(json);
×
395
    }
×
396
    int close_op = fclose(fp);
×
397
    if (close_op != 0) {
×
398
        SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
×
399
        return 0;
×
400
    }
×
401

402
    if (found == false) {
×
403
        FatalErrorOnInit(
×
404
                "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
×
405
        return 0;
×
406
    }
×
407
    return cnt;
×
408
out_err:
×
409
    close_op = fclose(fp);
×
410
    if (close_op != 0) {
×
411
        SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
×
412
    }
×
413
    return 0;
×
414
}
×
415

416
static uint32_t DatajsonAddStringElement(Dataset *set, json_t *value, char *json_key, bool *found)
417
{
×
418
    json_t *key = GetSubObjectByKey(value, json_key);
×
419
    if (key == NULL) {
×
420
        /* ignore error as it can be a working mode where some entries
421
           are not in the same format */
422
        return 0;
×
423
    }
×
424

425
    *found = true;
×
426

427
    const char *val_key = json_string_value(key);
×
428
    if (val_key == NULL) {
×
429
        FatalErrorOnInit("dataset: %s failed to get value for key '%s'", set->name, json_key);
×
430
        return 0;
×
431
    }
×
432
    size_t val_len = strlen(val_key);
×
433

434
    json_incref(key);
×
435
    int ret = DatajsonSetValue(set, (const uint8_t *)val_key, (uint16_t)val_len, value, json_key);
×
436
    json_decref(key);
×
437
    if (ret < 0) {
×
438
        FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
×
439
        return 0;
×
440
    }
×
441
    return ret;
×
442
}
×
443

444
static int DatajsonLoadString(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
445
{
×
446
    if (strlen(set->load) == 0)
×
447
        return 0;
×
448

449
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
×
450

451
    uint32_t cnt = 0;
×
452
    if (format == DATASET_FORMAT_JSON) {
×
453
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddStringElement);
×
454
    } else if (format == DATASET_FORMAT_NDJSON) {
×
455
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddStringElement);
×
456
    }
×
457
    THashConsolidateMemcap(set->hash);
×
458

459
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
×
460
    return 0;
×
461
}
×
462

463
static uint32_t DatajsonAddMd5Element(Dataset *set, json_t *value, char *json_key, bool *found)
464
{
×
465
    json_t *key = GetSubObjectByKey(value, json_key);
×
466
    if (key == NULL) {
×
467
        /* ignore error as it can be a working mode where some entries
468
           are not in the same format */
469
        return 0;
×
470
    }
×
471

472
    *found = true;
×
473

474
    const char *hash_string = json_string_value(key);
×
475
    if (strlen(hash_string) != SC_MD5_HEX_LEN) {
×
476
        FatalErrorOnInit("Not correct length for a hash");
×
477
        return 0;
×
478
    }
×
479

480
    uint8_t hash[SC_MD5_LEN];
×
481
    if (HexToRaw((const uint8_t *)hash_string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) {
×
482
        FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
×
483
        return 0;
×
484
    }
×
485
    return DatajsonSetValue(set, hash, SC_MD5_LEN, value, json_key);
×
486
}
×
487

488
static int DatajsonLoadMd5(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
489
{
×
490
    if (strlen(set->load) == 0)
×
491
        return 0;
×
492

493
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
×
494

495
    uint32_t cnt = 0;
×
496
    if (format == DATASET_FORMAT_JSON) {
×
497
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddMd5Element);
×
498
    } else if (format == DATASET_FORMAT_NDJSON) {
×
499
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddMd5Element);
×
500
    }
×
501
    THashConsolidateMemcap(set->hash);
×
502

503
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
×
504
    return 0;
×
505
}
×
506

507
static uint32_t DatajsonAddSha256Element(Dataset *set, json_t *value, char *json_key, bool *found)
508
{
×
509
    json_t *key = GetSubObjectByKey(value, json_key);
×
510
    if (key == NULL) {
×
511
        /* ignore error as it can be a working mode where some entries
512
           are not in the same format */
513
        return 0;
×
514
    }
×
515

516
    *found = true;
×
517

518
    const char *hash_string = json_string_value(key);
×
519
    if (strlen(hash_string) != SC_SHA256_HEX_LEN) {
×
520
        FatalErrorOnInit("Not correct length for a hash");
×
521
        return 0;
×
522
    }
×
523

524
    uint8_t hash[SC_SHA256_LEN];
×
525
    if (HexToRaw((const uint8_t *)hash_string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) {
×
526
        FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
×
527
        return 0;
×
528
    }
×
529

530
    return DatajsonSetValue(set, hash, SC_SHA256_LEN, value, json_key);
×
531
}
×
532

533
static int DatajsonLoadSha256(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
534
{
×
535
    if (strlen(set->load) == 0)
×
536
        return 0;
×
537

538
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
×
539

540
    uint32_t cnt = 0;
×
541
    if (format == DATASET_FORMAT_JSON) {
×
542
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddSha256Element);
×
543
    } else if (format == DATASET_FORMAT_NDJSON) {
×
544
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddSha256Element);
×
545
    }
×
546
    THashConsolidateMemcap(set->hash);
×
547

548
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
×
549
    return 0;
×
550
}
×
551

552
static uint32_t DatajsonAddIpv4Element(Dataset *set, json_t *value, char *json_key, bool *found)
553
{
×
554
    json_t *key = GetSubObjectByKey(value, json_key);
×
555
    if (key == NULL) {
×
556
        /* ignore error as it can be a working mode where some entries
557
           are not in the same format */
558
        return 0;
×
559
    }
×
560

561
    *found = true;
×
562

563
    const char *ip_string = json_string_value(key);
×
564
    struct in_addr in;
×
565
    if (inet_pton(AF_INET, ip_string, &in) != 1) {
×
566
        FatalErrorOnInit("datajson IPv4 parse failed %s/%s: %s", set->name, set->load, ip_string);
×
567
        return 0;
×
568
    }
×
569

570
    return DatajsonSetValue(set, (const uint8_t *)&in.s_addr, SC_IPV4_LEN, value, json_key);
×
571
}
×
572

573
static int DatajsonLoadIPv4(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
574
{
×
575
    if (strlen(set->load) == 0)
×
576
        return 0;
×
577

578
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
×
579
    uint32_t cnt = 0;
×
580

581
    if (format == DATASET_FORMAT_JSON) {
×
582
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIpv4Element);
×
583
    } else if (format == DATASET_FORMAT_NDJSON) {
×
584
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIpv4Element);
×
585
    }
×
586
    THashConsolidateMemcap(set->hash);
×
587

588
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
×
589
    return 0;
×
590
}
×
591

592
static uint32_t DatajsonAddIPv6Element(Dataset *set, json_t *value, char *json_key, bool *found)
593
{
×
594
    json_t *key = GetSubObjectByKey(value, json_key);
×
595
    if (key == NULL) {
×
596
        /* ignore error as it can be a working mode where some entries
597
           are not in the same format */
598
        return 0;
×
599
    }
×
600

601
    *found = true;
×
602

603
    const char *ip_string = json_string_value(key);
×
604
    struct in6_addr in6;
×
605
    int ret = DatasetParseIpv6String(set, ip_string, &in6);
×
606
    if (ret < 0) {
×
607
        FatalErrorOnInit("unable to parse IP address");
×
608
        return 0;
×
609
    }
×
610

611
    return DatajsonSetValue(set, (const uint8_t *)&in6.s6_addr, SC_IPV6_LEN, value, json_key);
×
612
}
×
613

614
static int DatajsonLoadIPv6(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
615
{
×
616
    if (strlen(set->load) == 0)
×
617
        return 0;
×
618

619
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
×
620

621
    uint32_t cnt = 0;
×
622

623
    if (format == DATASET_FORMAT_JSON) {
×
624
        cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIPv6Element);
×
625
    } else if (format == DATASET_FORMAT_NDJSON) {
×
626
        cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIPv6Element);
×
627
    }
×
628

629
    THashConsolidateMemcap(set->hash);
×
630

631
    SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
×
632
    return 0;
×
633
}
×
634

635
Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap,
636
        uint32_t hashsize, char *json_key_value, char *json_array_key, DatasetFormats format,
637
        bool remove_key)
638
{
×
639
    Dataset *set = NULL;
×
640

641
    DatasetLock();
×
642
    int ret = DatasetGetOrCreate(name, type, NULL, load, &memcap, &hashsize, &set);
×
643
    if (ret < 0) {
×
644
        SCLogError("dataset with JSON %s creation failed", name);
×
645
        DatasetUnlock();
×
646
        return NULL;
×
647
    }
×
648
    if (ret == 1) {
×
649
        SCLogDebug("dataset %s already exists", name);
×
650
        if (set->remove_key != remove_key) {
×
651
            SCLogError("dataset %s remove_key mismatch: %d != %d", set->name, set->remove_key,
×
652
                    remove_key);
×
653
            DatasetUnlock();
×
654
            return NULL;
×
655
        }
×
656
        DatasetUnlock();
×
657
        return set;
×
658
    }
×
659

660
    set->remove_key = remove_key;
×
661

662
    char cnf_name[128];
×
663
    snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
×
664
    switch (type) {
×
665
        case DATASET_TYPE_MD5:
×
666
            set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrJsonSet, Md5StrJsonFree,
×
667
                    Md5StrHash, Md5StrCompare, NULL, Md5StrJsonGetLength, load != NULL ? 1 : 0,
×
668
                    memcap, hashsize);
×
669
            if (set->hash == NULL)
×
670
                goto out_err;
×
671
            if (DatajsonLoadMd5(set, json_key_value, json_array_key, format) < 0)
×
672
                goto out_err;
×
673
            break;
×
674
        case DATASET_TYPE_STRING:
×
675
            set->hash = THashInit(cnf_name, sizeof(StringType), StringJsonSet, StringJsonFree,
×
676
                    StringHash, StringCompare, NULL, StringJsonGetLength, load != NULL ? 1 : 0,
×
677
                    memcap, hashsize);
×
678
            if (set->hash == NULL)
×
679
                goto out_err;
×
680
            if (DatajsonLoadString(set, json_key_value, json_array_key, format) < 0) {
×
681
                SCLogError("dataset %s loading failed", name);
×
682
                goto out_err;
×
683
            }
×
684
            break;
×
685
        case DATASET_TYPE_SHA256:
×
686
            set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrJsonSet, Sha256StrJsonFree,
×
687
                    Sha256StrHash, Sha256StrCompare, NULL, Sha256StrJsonGetLength,
×
688
                    load != NULL ? 1 : 0, memcap, hashsize);
×
689
            if (set->hash == NULL)
×
690
                goto out_err;
×
691
            if (DatajsonLoadSha256(set, json_key_value, json_array_key, format) < 0)
×
692
                goto out_err;
×
693
            break;
×
694
        case DATASET_TYPE_IPV4:
×
695
            set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4JsonSet, IPv4JsonFree, IPv4Hash,
×
696
                    IPv4Compare, NULL, IPv4JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
×
697
            if (set->hash == NULL)
×
698
                goto out_err;
×
699
            if (DatajsonLoadIPv4(set, json_key_value, json_array_key, format) < 0)
×
700
                goto out_err;
×
701
            break;
×
702
        case DATASET_TYPE_IPV6:
×
703
            set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6JsonSet, IPv6JsonFree, IPv6Hash,
×
704
                    IPv6Compare, NULL, IPv6JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
×
705
            if (set->hash == NULL)
×
706
                goto out_err;
×
707
            if (DatajsonLoadIPv6(set, json_key_value, json_array_key, format) < 0)
×
708
                goto out_err;
×
709
            break;
×
710
    }
×
711

712
    SCLogDebug(
×
713
            "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
×
714

715
    if (DatasetAppendSet(set) < 0) {
×
716
        SCLogError("dataset %s append failed", name);
×
717
        goto out_err;
×
718
    }
×
719

720
    DatasetUnlock();
×
721
    return set;
×
722
out_err:
×
723
    if (set->hash) {
×
724
        THashShutdown(set->hash);
×
725
    }
×
726
    SCFree(set);
×
727
    DatasetUnlock();
×
728
    return NULL;
×
729
}
×
730

731
static DataJsonResultType DatajsonLookupString(
732
        Dataset *set, const uint8_t *data, const uint32_t data_len)
733
{
×
734
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
×
735

736
    if (set == NULL)
×
737
        return rrep;
×
738

739
    StringType lookup = {
×
740
        .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0
×
741
    };
×
742
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
743
    if (rdata) {
×
744
        StringType *found = rdata->data;
×
745
        rrep.found = true;
×
746
        rrep.json = found->json;
×
747
        rrep.hashdata = rdata;
×
748
        return rrep;
×
749
    }
×
750
    return rrep;
×
751
}
×
752

753
static DataJsonResultType DatajsonLookupMd5(
754
        Dataset *set, const uint8_t *data, const uint32_t data_len)
755
{
×
756
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
×
757

758
    if (set == NULL)
×
759
        return rrep;
×
760

761
    if (data_len != SC_MD5_LEN)
×
762
        return rrep;
×
763

764
    Md5Type lookup = { .json.value = NULL, .json.len = 0 };
×
765
    memcpy(lookup.md5, data, data_len);
×
766
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
767
    if (rdata) {
×
768
        Md5Type *found = rdata->data;
×
769
        rrep.found = true;
×
770
        rrep.json = found->json;
×
771
        rrep.hashdata = rdata;
×
772
        return rrep;
×
773
    }
×
774
    return rrep;
×
775
}
×
776

777
static DataJsonResultType DatajsonLookupSha256(
778
        Dataset *set, const uint8_t *data, const uint32_t data_len)
779
{
×
780
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
×
781

782
    if (set == NULL)
×
783
        return rrep;
×
784

785
    if (data_len != SC_SHA256_LEN)
×
786
        return rrep;
×
787

788
    Sha256Type lookup = { .json.value = NULL, .json.len = 0 };
×
789
    memcpy(lookup.sha256, data, data_len);
×
790
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
791
    if (rdata) {
×
792
        Sha256Type *found = rdata->data;
×
793
        rrep.found = true;
×
794
        rrep.json = found->json;
×
795
        rrep.hashdata = rdata;
×
796
        return rrep;
×
797
    }
×
798
    return rrep;
×
799
}
×
800

801
static DataJsonResultType DatajsonLookupIPv4(
802
        Dataset *set, const uint8_t *data, const uint32_t data_len)
803
{
×
804
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
×
805

806
    if (set == NULL)
×
807
        return rrep;
×
808

809
    if (data_len != SC_IPV4_LEN)
×
810
        return rrep;
×
811

812
    IPv4Type lookup = { .json.value = NULL, .json.len = 0 };
×
813
    memcpy(lookup.ipv4, data, data_len);
×
814
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
815
    if (rdata) {
×
816
        IPv4Type *found = rdata->data;
×
817
        rrep.found = true;
×
818
        rrep.json = found->json;
×
819
        rrep.hashdata = rdata;
×
820
        return rrep;
×
821
    }
×
822
    return rrep;
×
823
}
×
824

825
static DataJsonResultType DatajsonLookupIPv6(
826
        Dataset *set, const uint8_t *data, const uint32_t data_len)
827
{
×
828
    DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
×
829

830
    if (set == NULL)
×
831
        return rrep;
×
832

833
    /* We can have IPv4 or IPV6 here due to ip.src and ip.dst implementation */
834
    if (data_len != SC_IPV6_LEN && data_len != SC_IPV4_LEN)
×
835
        return rrep;
×
836

837
    IPv6Type lookup = { .json.value = NULL, .json.len = 0 };
×
838
    memcpy(lookup.ipv6, data, data_len);
×
839
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
840
    if (rdata) {
×
841
        IPv6Type *found = rdata->data;
×
842
        rrep.found = true;
×
843
        rrep.json = found->json;
×
844
        rrep.hashdata = rdata;
×
845
        return rrep;
×
846
    }
×
847
    return rrep;
×
848
}
×
849

850
DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
851
{
×
852
    DataJsonResultType rrep = { .found = false, .json = { .value = 0 } };
×
853
    if (set == NULL)
×
854
        return rrep;
×
855

856
    switch (set->type) {
×
857
        case DATASET_TYPE_STRING:
×
858
            return DatajsonLookupString(set, data, data_len);
×
859
        case DATASET_TYPE_MD5:
×
860
            return DatajsonLookupMd5(set, data, data_len);
×
861
        case DATASET_TYPE_SHA256:
×
862
            return DatajsonLookupSha256(set, data, data_len);
×
863
        case DATASET_TYPE_IPV4:
×
864
            return DatajsonLookupIPv4(set, data, data_len);
×
865
        case DATASET_TYPE_IPV6:
×
866
            return DatajsonLookupIPv6(set, data, data_len);
×
867
        default:
×
868
            break;
×
869
    }
×
870
    return rrep;
×
871
}
×
872

873
/** \brief add serialized data to json set
874
 *  \retval int 1 added
875
 *  \retval int 0 already in hash
876
 *  \retval int -1 API error (not added)
877
 *  \retval int -2 DATA error
878
 */
879
int DatajsonAddSerialized(Dataset *set, const char *value, const char *json)
880
{
×
881
    if (set == NULL)
×
882
        return -1;
×
883

884
    if (strlen(value) == 0)
×
885
        return -1;
×
886

887
    DataJsonType jvalue = { .value = NULL, .len = 0 };
×
888
    if (json) {
×
889
        if (ParseJsonLine(json, strlen(json), &jvalue) < 0) {
×
890
            SCLogNotice("bad json value for dataset %s/%s", set->name, set->load);
×
891
            return -1;
×
892
        }
×
893
    }
×
894

895
    int ret = -1;
×
896
    switch (set->type) {
×
897
        case DATASET_TYPE_STRING: {
×
898
            if (strlen(value) > UINT16_MAX) {
×
899
                // size check before stack allocation
900
                // should never happen as unix socket callers limits it to 4k
901
                SCFree(jvalue.value);
×
902
                return -1;
×
903
            }
×
904
            uint32_t decoded_size = SCBase64DecodeBufferSize((uint32_t)strlen(value));
×
905
            uint8_t decoded[decoded_size];
×
906
            uint32_t num_decoded = SCBase64Decode(
×
907
                    (const uint8_t *)value, strlen(value), SCBase64ModeStrict, decoded);
×
908
            if (num_decoded == 0)
×
909
                goto operror;
×
910
            ret = DatajsonAdd(set, decoded, num_decoded, &jvalue);
×
911
            break;
×
912
        }
×
913
        case DATASET_TYPE_MD5: {
×
914
            if (strlen(value) != SC_MD5_HEX_LEN)
×
915
                goto operror;
×
916
            uint8_t hash[SC_MD5_LEN];
×
917
            if (HexToRaw((const uint8_t *)value, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0)
×
918
                goto operror;
×
919
            ret = DatajsonAdd(set, hash, SC_MD5_LEN, &jvalue);
×
920
            break;
×
921
        }
×
922
        case DATASET_TYPE_SHA256: {
×
923
            if (strlen(value) != SC_SHA256_HEX_LEN)
×
924
                goto operror;
×
925
            uint8_t hash[SC_SHA256_LEN];
×
926
            if (HexToRaw((const uint8_t *)value, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0)
×
927
                goto operror;
×
928
            ret = DatajsonAdd(set, hash, SC_SHA256_LEN, &jvalue);
×
929
            break;
×
930
        }
×
931
        case DATASET_TYPE_IPV4: {
×
932
            struct in_addr in;
×
933
            if (inet_pton(AF_INET, value, &in) != 1)
×
934
                goto operror;
×
935
            ret = DatajsonAdd(set, (uint8_t *)&in.s_addr, SC_IPV4_LEN, &jvalue);
×
936
            break;
×
937
        }
×
938
        case DATASET_TYPE_IPV6: {
×
939
            struct in6_addr in6;
×
940
            if (DatasetParseIpv6String(set, value, &in6) != 0) {
×
941
                SCLogError("Dataset failed to import %s as IPv6", value);
×
942
                goto operror;
×
943
            }
×
944
            ret = DatajsonAdd(set, (uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &jvalue);
×
945
            break;
×
946
        }
×
947
    }
×
948
    SCFree(jvalue.value);
×
949
    return ret;
×
950
operror:
×
951
    SCFree(jvalue.value);
×
952
    return -2;
×
953
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc