• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jasonish / suricata / 23105300094

15 Mar 2026 06:48AM UTC coverage: 75.784% (-0.7%) from 76.495%
23105300094

push

github

jasonish
github-ci: ubuntu minimal build fixups

- Don't run on the GitHub provided VM, it contains a newer Rust than
  stock Ubuntu does.

252836 of 333628 relevant lines covered (75.78%)

1978514.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

52.99
/src/datasets.c
1
/* Copyright (C) 2017-2024 Open Information Security Foundation
2
 *
3
 * You can copy, redistribute or modify this Program under the terms of
4
 * the GNU General Public License version 2 as published by the Free
5
 * Software Foundation.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * version 2 along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15
 * 02110-1301, USA.
16
 */
17

18
/**
19
 * \file
20
 *
21
 * \author Victor Julien <victor@inliniac.net>
22
 */
23

24
#include "suricata-common.h"
25
#include "suricata.h"
26
#include "rust.h"
27
#include "conf.h"
28
#include "datasets.h"
29
#include "datasets-string.h"
30
#include "datasets-ipv4.h"
31
#include "datasets-ipv6.h"
32
#include "datasets-md5.h"
33
#include "datasets-sha256.h"
34
#include "datasets-reputation.h"
35
#include "datasets-context-json.h"
36
#include "util-conf.h"
37
#include "util-mem.h"
38
#include "util-thash.h"
39
#include "util-print.h"
40
#include "util-byte.h"
41
#include "util-misc.h"
42
#include "util-path.h"
43
#include "util-debug.h"
44
#include "util-validate.h"
45

46
SCMutex sets_lock = SCMUTEX_INITIALIZER;
47
static Dataset *sets = NULL;
48
static uint32_t set_ids = 0;
49

50
uint32_t dataset_max_one_hashsize = 65536;
51
uint32_t dataset_max_total_hashsize = 16777216;
52
uint32_t dataset_used_hashsize = 0;
53

54
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);
55
static void DatasetUpdateHashsize(const char *name, uint32_t hash_size);
56

57
static inline void DatasetUnlockData(THashData *d)
58
{
1,132,739✔
59
    (void) THashDecrUsecnt(d);
1,132,739✔
60
    THashDataUnlock(d);
1,132,739✔
61
}
1,132,739✔
62
static bool DatasetIsStatic(const char *save, const char *load);
63

64
enum DatasetTypes DatasetGetTypeFromString(const char *s)
65
{
×
66
    if (strcasecmp("md5", s) == 0)
×
67
        return DATASET_TYPE_MD5;
×
68
    if (strcasecmp("sha256", s) == 0)
×
69
        return DATASET_TYPE_SHA256;
×
70
    if (strcasecmp("string", s) == 0)
×
71
        return DATASET_TYPE_STRING;
×
72
    if (strcasecmp("ipv4", s) == 0)
×
73
        return DATASET_TYPE_IPV4;
×
74
    if (strcasecmp("ip", s) == 0)
×
75
        return DATASET_TYPE_IPV6;
×
76
    return DATASET_TYPE_NOTSET;
×
77
}
×
78

79
int DatasetAppendSet(Dataset *set)
80
{
54✔
81

82
    if (set->hash == NULL) {
54✔
83
        return -1;
×
84
    }
×
85

86
    if (SC_ATOMIC_GET(set->hash->memcap_reached)) {
54✔
87
        SCLogError("dataset too large for set memcap");
1✔
88
        return -1;
1✔
89
    }
1✔
90

91
    SCLogDebug(
53✔
92
            "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
53✔
93

94
    set->next = sets;
53✔
95
    sets = set;
53✔
96

97
    /* hash size accounting */
98
    DatasetUpdateHashsize(set->name, set->hash->config.hash_size);
53✔
99
    return 0;
53✔
100
}
54✔
101

102
void DatasetLock(void)
103
{
4,314✔
104
    SCMutexLock(&sets_lock);
4,314✔
105
}
4,314✔
106

107
void DatasetUnlock(void)
108
{
4,306✔
109
    SCMutexUnlock(&sets_lock);
4,306✔
110
}
4,306✔
111

112
Dataset *DatasetAlloc(const char *name)
113
{
62✔
114
    Dataset *set = SCCalloc(1, sizeof(*set));
62✔
115
    if (set) {
62✔
116
        set->id = set_ids++;
62✔
117
    }
62✔
118
    return set;
62✔
119
}
62✔
120

121
Dataset *DatasetSearchByName(const char *name)
122
{
84✔
123
    Dataset *set = sets;
84✔
124
    while (set) {
114✔
125
        if (strcasecmp(name, set->name) == 0 && !set->hidden) {
52✔
126
            return set;
22✔
127
        }
22✔
128
        set = set->next;
30✔
129
    }
30✔
130
    return NULL;
62✔
131
}
84✔
132

133
static int DatasetLoadIPv4(Dataset *set)
134
{
3✔
135
    if (strlen(set->load) == 0)
3✔
136
        return 0;
×
137

138
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
3✔
139
    const char *fopen_mode = "r";
3✔
140
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
3✔
141
        fopen_mode = "a+";
2✔
142
    }
2✔
143

144
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv4);
3✔
145
    if (retval == -2) {
3✔
146
        FatalErrorOnInit("dataset %s could not be processed", set->name);
×
147
    } else if (retval == -1) {
3✔
148
        return -1;
×
149
    }
×
150

151
    THashConsolidateMemcap(set->hash);
3✔
152

153
    return 0;
3✔
154
}
3✔
155

156
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
157
{
11✔
158
    /* Checking IPv6 case */
159
    char *got_colon = strchr(line, ':');
11✔
160
    if (got_colon) {
11✔
161
        uint32_t ip6addr[4];
×
162
        if (inet_pton(AF_INET6, line, in6) != 1) {
×
163
            FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
×
164
            return -1;
×
165
        }
×
166
        memcpy(&ip6addr, in6->s6_addr, sizeof(ip6addr));
×
167
        /* IPv4 in IPv6 notation needs transformation to internal Suricata storage */
168
        if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
×
169
            ip6addr[0] = ip6addr[3];
×
170
            ip6addr[2] = 0;
×
171
            ip6addr[3] = 0;
×
172
            memcpy(in6, ip6addr, sizeof(struct in6_addr));
×
173
        }
×
174
    } else {
11✔
175
        /* IPv4 case */
176
        struct in_addr in;
11✔
177
        if (inet_pton(AF_INET, line, &in) != 1) {
11✔
178
            FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
×
179
            return -1;
×
180
        }
×
181
        memset(in6, 0, sizeof(struct in6_addr));
11✔
182
        memcpy(in6, &in, sizeof(struct in_addr));
11✔
183
    }
11✔
184
    return 0;
11✔
185
}
11✔
186

187
static int DatasetLoadIPv6(Dataset *set)
188
{
4✔
189
    if (strlen(set->load) == 0)
4✔
190
        return 0;
×
191

192
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
4✔
193
    const char *fopen_mode = "r";
4✔
194
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
4✔
195
        fopen_mode = "a+";
2✔
196
    }
2✔
197

198
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv6);
4✔
199
    if (retval == -2) {
4✔
200
        FatalErrorOnInit("dataset %s could not be processed", set->name);
×
201
    } else if (retval == -1) {
4✔
202
        return -1;
×
203
    }
×
204

205
    THashConsolidateMemcap(set->hash);
4✔
206

207
    return 0;
4✔
208
}
4✔
209

210
static int DatasetLoadMd5(Dataset *set)
211
{
3✔
212
    if (strlen(set->load) == 0)
3✔
213
        return 0;
×
214

215
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
3✔
216
    const char *fopen_mode = "r";
3✔
217
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
3✔
218
        fopen_mode = "a+";
1✔
219
    }
1✔
220

221
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSMd5);
3✔
222
    if (retval == -2) {
3✔
223
        FatalErrorOnInit("dataset %s could not be processed", set->name);
×
224
    } else if (retval == -1) {
3✔
225
        return -1;
×
226
    }
×
227

228
    THashConsolidateMemcap(set->hash);
3✔
229

230
    return 0;
3✔
231
}
3✔
232

233
static int DatasetLoadSha256(Dataset *set)
234
{
1✔
235
    if (strlen(set->load) == 0)
1✔
236
        return 0;
×
237

238
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
1✔
239
    const char *fopen_mode = "r";
1✔
240
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
1✔
241
        fopen_mode = "a+";
×
242
    }
×
243

244
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSSha256);
1✔
245
    if (retval == -2) {
1✔
246
        FatalErrorOnInit("dataset %s could not be processed", set->name);
×
247
    } else if (retval == -1) {
1✔
248
        return -1;
×
249
    }
×
250

251
    THashConsolidateMemcap(set->hash);
1✔
252

253
    return 0;
1✔
254
}
1✔
255

256
static int DatasetLoadString(Dataset *set)
257
{
31✔
258
    if (strlen(set->load) == 0)
31✔
259
        return 0;
15✔
260

261
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
16✔
262

263
    const char *fopen_mode = "r";
16✔
264
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
16✔
265
        fopen_mode = "a+";
5✔
266
    }
5✔
267

268
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSString);
16✔
269
    if (retval == -2) {
16✔
270
        FatalErrorOnInit("dataset %s could not be processed", set->name);
2✔
271
    } else if (retval == -1) {
14✔
272
        return -1;
×
273
    }
×
274

275
    THashConsolidateMemcap(set->hash);
14✔
276

277
    return 0;
14✔
278
}
16✔
279

280
extern bool g_system;
281

282
enum DatasetGetPathType {
283
    TYPE_STATE,
284
    TYPE_LOAD,
285
};
286

287
static void DatasetGetPath(
288
        const char *in_path, char *out_path, size_t out_size, enum DatasetGetPathType type)
289
{
×
290
    char path[PATH_MAX];
×
291
    struct stat st;
×
292

293
    if (PathIsAbsolute(in_path)) {
×
294
        strlcpy(path, in_path, sizeof(path));
×
295
        strlcpy(out_path, path, out_size);
×
296
        return;
×
297
    }
×
298

299
    const char *data_dir = ConfigGetDataDirectory();
×
300
    if (stat(data_dir, &st) != 0) {
×
301
        SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
×
302
        return;
×
303
    }
×
304

305
    snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
×
306

307
    if (type == TYPE_LOAD) {
×
308
        if (stat(path, &st) != 0) {
×
309
            SCLogDebug("path %s: %s", path, strerror(errno));
×
310
            if (!g_system) {
×
311
                snprintf(path, sizeof(path), "%s", in_path);
×
312
            }
×
313
        }
×
314
    }
×
315
    strlcpy(out_path, path, out_size);
×
316
    SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
×
317
}
×
318

319
/** \brief look for set by name without creating it */
320
Dataset *DatasetFind(const char *name, enum DatasetTypes type)
321
{
12✔
322
    DatasetLock();
12✔
323
    Dataset *set = DatasetSearchByName(name);
12✔
324
    if (set) {
12✔
325
        if (set->type != type) {
12✔
326
            DatasetUnlock();
×
327
            return NULL;
×
328
        }
×
329
    }
12✔
330
    DatasetUnlock();
12✔
331
    return set;
12✔
332
}
12✔
333

334
static bool DatasetCheckHashsize(const char *name, uint32_t hash_size)
335
{
62✔
336
    if (dataset_max_one_hashsize > 0 && hash_size > dataset_max_one_hashsize) {
62✔
337
        SCLogError("hashsize %u in dataset '%s' exceeds configured 'single-hashsize' limit (%u)",
×
338
                hash_size, name, dataset_max_one_hashsize);
×
339
        return false;
×
340
    }
×
341
    // we cannot underflow as we know from conf loading that
342
    // dataset_max_total_hashsize >= dataset_max_one_hashsize if dataset_max_total_hashsize > 0
343
    if (dataset_max_total_hashsize > 0 &&
62✔
344
            dataset_max_total_hashsize - hash_size < dataset_used_hashsize) {
62✔
345
        SCLogError("hashsize %u in dataset '%s' exceeds configured 'total-hashsizes' limit (%u, in "
×
346
                   "use %u)",
×
347
                hash_size, name, dataset_max_total_hashsize, dataset_used_hashsize);
×
348
        return false;
×
349
    }
×
350

351
    return true;
62✔
352
}
62✔
353

354
static void DatasetUpdateHashsize(const char *name, uint32_t hash_size)
355
{
53✔
356
    if (dataset_max_total_hashsize > 0) {
53✔
357
        dataset_used_hashsize += hash_size;
53✔
358
        SCLogDebug("set %s adding with hash_size %u", name, hash_size);
53✔
359
    }
53✔
360
}
53✔
361

362
/**
363
 * \return -1 on error
364
 * \return 0 on successful creation
365
 * \return 1 if the dataset already exists
366
 *
367
 * Calling function is responsible for locking via DatasetLock()
368
 */
369
int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load,
370
        uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set)
371
{
72✔
372
    uint64_t default_memcap = 0;
72✔
373
    uint32_t default_hashsize = 0;
72✔
374
    if (strlen(name) > DATASET_NAME_MAX_LEN) {
72✔
375
        return -1;
×
376
    }
×
377

378
    Dataset *set = DatasetSearchByName(name);
72✔
379
    if (set) {
72✔
380
        if (type != DATASET_TYPE_NOTSET && set->type != type) {
10✔
381
            SCLogError("dataset %s already "
×
382
                       "exists and is of type %u",
×
383
                    set->name, set->type);
×
384
            return -1;
×
385
        }
×
386

387
        if ((save == NULL || strlen(save) == 0) &&
10✔
388
            (load == NULL || strlen(load) == 0)) {
10✔
389
            // OK, rule keyword doesn't have to set state/load,
390
            // even when yaml set has set it.
391
        } else {
7✔
392
            if ((save == NULL && strlen(set->save) > 0) ||
7✔
393
                    (save != NULL && strcmp(set->save, save) != 0)) {
7✔
394
                SCLogError("dataset %s save mismatch: %s != %s", set->name, set->save, save);
×
395
                DatasetUnlock();
×
396
                return -1;
×
397
            }
×
398
            if ((load == NULL && strlen(set->load) > 0) ||
7✔
399
                    (load != NULL && strcmp(set->load, load) != 0)) {
7✔
400
                SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
×
401
                return -1;
×
402
            }
×
403
        }
7✔
404

405
        *ret_set = set;
10✔
406
        return 1;
10✔
407
    }
10✔
408

409
    if (type == DATASET_TYPE_NOTSET) {
62✔
410
        SCLogError("dataset %s not defined", name);
×
411
        goto out_err;
×
412
    }
×
413

414
    DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
62✔
415
    if (*hashsize == 0) {
62✔
416
        *hashsize = default_hashsize;
52✔
417
    }
52✔
418
    if (*memcap == 0) {
62✔
419
        *memcap = default_memcap;
60✔
420
    }
60✔
421

422
    if (!DatasetCheckHashsize(name, *hashsize)) {
62✔
423
        goto out_err;
×
424
    }
×
425

426
    set = DatasetAlloc(name);
62✔
427
    if (set == NULL) {
62✔
428
        goto out_err;
×
429
    }
×
430

431
    strlcpy(set->name, name, sizeof(set->name));
62✔
432
    set->type = type;
62✔
433
    if (save && strlen(save)) {
62✔
434
        strlcpy(set->save, save, sizeof(set->save));
15✔
435
        SCLogDebug("name %s save '%s'", name, set->save);
15✔
436
    }
15✔
437
    if (load && strlen(load)) {
62✔
438
        strlcpy(set->load, load, sizeof(set->load));
47✔
439
        SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
47✔
440
    }
47✔
441

442
    *ret_set = set;
62✔
443
    return 0;
62✔
444
out_err:
×
445
    if (set) {
×
446
        SCFree(set);
×
447
    }
×
448
    return -1;
×
449
}
62✔
450

451
Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
452
        uint64_t memcap, uint32_t hashsize)
453
{
49✔
454
    Dataset *set = NULL;
49✔
455

456
    DatasetLock();
49✔
457
    int ret = DatasetGetOrCreate(name, type, save, load, &memcap, &hashsize, &set);
49✔
458
    if (ret < 0) {
49✔
459
        SCLogError("dataset %s creation failed", name);
×
460
        DatasetUnlock();
×
461
        return NULL;
×
462
    }
×
463
    if (ret == 1) {
49✔
464
        SCLogDebug("dataset %s already exists", name);
7✔
465
        DatasetUnlock();
7✔
466
        return set;
7✔
467
    }
7✔
468

469
    char cnf_name[128];
42✔
470
    snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
42✔
471
    switch (type) {
42✔
472
        case DATASET_TYPE_MD5:
3✔
473
            set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
3✔
474
                    Md5StrCompare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
3✔
475
            if (set->hash == NULL)
3✔
476
                goto out_err;
×
477
            if (DatasetLoadMd5(set) < 0)
3✔
478
                goto out_err;
×
479
            break;
3✔
480
        case DATASET_TYPE_STRING:
31✔
481
            set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
31✔
482
                    StringCompare, NULL, StringGetLength, load != NULL ? 1 : 0, memcap, hashsize);
31✔
483
            if (set->hash == NULL)
31✔
484
                goto out_err;
×
485
            if (DatasetLoadString(set) < 0)
31✔
486
                goto out_err;
×
487
            break;
31✔
488
        case DATASET_TYPE_SHA256:
31✔
489
            set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
1✔
490
                    Sha256StrHash, Sha256StrCompare, NULL, NULL, load != NULL ? 1 : 0, memcap,
1✔
491
                    hashsize);
1✔
492
            if (set->hash == NULL)
1✔
493
                goto out_err;
×
494
            if (DatasetLoadSha256(set) < 0)
1✔
495
                goto out_err;
×
496
            break;
1✔
497
        case DATASET_TYPE_IPV4:
3✔
498
            set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash,
3✔
499
                    IPv4Compare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
3✔
500
            if (set->hash == NULL)
3✔
501
                goto out_err;
×
502
            if (DatasetLoadIPv4(set) < 0)
3✔
503
                goto out_err;
×
504
            break;
3✔
505
        case DATASET_TYPE_IPV6:
4✔
506
            set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6Set, IPv6Free, IPv6Hash,
4✔
507
                    IPv6Compare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
4✔
508
            if (set->hash == NULL)
4✔
509
                goto out_err;
×
510
            if (DatasetLoadIPv6(set) < 0)
4✔
511
                goto out_err;
×
512
            break;
4✔
513
    }
42✔
514

515
    if (DatasetAppendSet(set) < 0) {
35✔
516
        SCLogError("dataset %s append failed", name);
1✔
517
        goto out_err;
1✔
518
    }
1✔
519

520
    DatasetUnlock();
34✔
521
    return set;
34✔
522
out_err:
1✔
523
    if (set->hash) {
1✔
524
        THashShutdown(set->hash);
1✔
525
    }
1✔
526
    SCFree(set);
1✔
527
    DatasetUnlock();
1✔
528
    return NULL;
1✔
529
}
35✔
530

531
static bool DatasetIsStatic(const char *save, const char *load)
532
{
×
533
    /* A set is static if it does not have any dynamic properties like
534
     * save and/or state defined but has load defined.
535
     * */
536
    if ((load != NULL && strlen(load) > 0) &&
×
537
            (save == NULL || strlen(save) == 0)) {
×
538
        return true;
×
539
    }
×
540
    return false;
×
541
}
×
542

543
void DatasetReload(void)
544
{
×
545
    /* In order to reload the datasets, just mark the current sets as hidden
546
     * and clean them up later.
547
     * New datasets shall be created with the rule reload and do not require
548
     * any intervention.
549
     * */
550
    DatasetLock();
×
551
    Dataset *set = sets;
×
552
    while (set) {
×
553
        if (!DatasetIsStatic(set->save, set->load) || set->from_yaml) {
×
554
            SCLogDebug("Not a static set, skipping %s", set->name);
×
555
            set = set->next;
×
556
            continue;
×
557
        }
×
558
        set->hidden = true;
×
559
        if (dataset_max_total_hashsize > 0) {
×
560
            DEBUG_VALIDATE_BUG_ON(set->hash->config.hash_size > dataset_used_hashsize);
×
561
            dataset_used_hashsize -= set->hash->config.hash_size;
×
562
        }
×
563
        SCLogDebug("Set %s at %p hidden successfully", set->name, set);
×
564
        set = set->next;
×
565
    }
×
566
    DatasetUnlock();
×
567
}
×
568

569
void DatasetPostReloadCleanup(void)
570
{
×
571
    DatasetLock();
×
572
    SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
×
573
    Dataset *cur = sets;
×
574
    Dataset *prev = NULL;
×
575
    while (cur) {
×
576
        Dataset *next = cur->next;
×
577
        if (!cur->hidden) {
×
578
            prev = cur;
×
579
            cur = next;
×
580
            continue;
×
581
        }
×
582
        // Delete the set in case it was hidden
583
        if (prev != NULL) {
×
584
            prev->next = next;
×
585
        } else {
×
586
            sets = next;
×
587
        }
×
588
        THashShutdown(cur->hash);
×
589
        SCFree(cur);
×
590
        cur = next;
×
591
    }
×
592
    DatasetUnlock();
×
593
}
×
594

595
/* Value reflects THASH_DEFAULT_HASHSIZE which is what the default was earlier,
596
 * despite 2048 commented out in the default yaml. */
597
#define DATASETS_HASHSIZE_DEFAULT 4096
2,232✔
598

599
void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
600
{
2,232✔
601
    const char *str = NULL;
2,232✔
602
    if (SCConfGet("datasets.defaults.memcap", &str) == 1) {
2,232✔
603
        if (ParseSizeStringU64(str, memcap) < 0) {
×
604
            SCLogWarning("memcap value cannot be deduced: %s,"
×
605
                         " resetting to default",
×
606
                    str);
×
607
            *memcap = 0;
×
608
        }
×
609
    }
×
610

611
    *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
2,232✔
612
    if (SCConfGet("datasets.defaults.hashsize", &str) == 1) {
2,232✔
613
        if (ParseSizeStringU32(str, hashsize) < 0) {
×
614
            *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
×
615
            SCLogWarning("hashsize value cannot be deduced: %s,"
×
616
                         " resetting to default: %u",
×
617
                    str, *hashsize);
×
618
        }
×
619
    }
×
620
}
2,232✔
621

622
int DatasetsInit(void)
623
{
2,170✔
624
    SCLogDebug("datasets start");
2,170✔
625
    SCConfNode *datasets = SCConfGetNode("datasets");
2,170✔
626
    uint64_t default_memcap = 0;
2,170✔
627
    uint32_t default_hashsize = 0;
2,170✔
628
    DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
2,170✔
629
    if (datasets != NULL) {
2,170✔
630
        const char *str = NULL;
1,484✔
631
        if (SCConfGet("datasets.limits.total-hashsizes", &str) == 1) {
1,484✔
632
            if (ParseSizeStringU32(str, &dataset_max_total_hashsize) < 0) {
×
633
                FatalError("failed to parse datasets.limits.total-hashsizes value: %s", str);
×
634
            }
×
635
        }
×
636
        if (SCConfGet("datasets.limits.single-hashsize", &str) == 1) {
1,484✔
637
            if (ParseSizeStringU32(str, &dataset_max_one_hashsize) < 0) {
×
638
                FatalError("failed to parse datasets.limits.single-hashsize value: %s", str);
×
639
            }
×
640
        }
×
641
        if (dataset_max_total_hashsize > 0 &&
1,484✔
642
                dataset_max_total_hashsize < dataset_max_one_hashsize) {
1,484✔
643
            FatalError("total-hashsizes (%u) cannot be smaller than single-hashsize (%u)",
×
644
                    dataset_max_total_hashsize, dataset_max_one_hashsize);
×
645
        }
×
646
        if (dataset_max_total_hashsize > 0 && dataset_max_one_hashsize == 0) {
1,484✔
647
            // the total limit also applies for single limit
648
            dataset_max_one_hashsize = dataset_max_total_hashsize;
×
649
        }
×
650

651
        int list_pos = 0;
1,484✔
652
        SCConfNode *iter = NULL;
1,484✔
653
        TAILQ_FOREACH(iter, &datasets->head, next) {
4,461✔
654
            if (iter->name == NULL) {
4,461✔
655
                list_pos++;
×
656
                continue;
×
657
            }
×
658

659
            char save[PATH_MAX] = "";
4,461✔
660
            char load[PATH_MAX] = "";
4,461✔
661
            uint64_t memcap = 0;
4,461✔
662
            uint32_t hashsize = 0;
4,461✔
663

664
            const char *set_name = iter->name;
4,461✔
665
            if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
4,461✔
666
                FatalErrorOnInit(
×
667
                        "set name '%s' too long, max %d chars", set_name, DATASET_NAME_MAX_LEN);
×
668
                continue;
×
669
            }
×
670

671
            SCConfNode *set_type = SCConfNodeLookupChild(iter, "type");
4,461✔
672
            if (set_type == NULL) {
4,461✔
673
                list_pos++;
4,453✔
674
                continue;
4,453✔
675
            }
4,453✔
676

677
            SCConfNode *set_save = SCConfNodeLookupChild(iter, "state");
8✔
678
            if (set_save) {
8✔
679
                DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
×
680
                strlcpy(load, save, sizeof(load));
×
681
            } else {
8✔
682
                SCConfNode *set_load = SCConfNodeLookupChild(iter, "load");
8✔
683
                if (set_load) {
8✔
684
                    DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
×
685
                }
×
686
            }
8✔
687

688
            SCConfNode *set_memcap = SCConfNodeLookupChild(iter, "memcap");
8✔
689
            if (set_memcap) {
8✔
690
                if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
×
691
                    SCLogWarning("memcap value cannot be"
×
692
                                 " deduced: %s, resetting to default",
×
693
                            set_memcap->val);
×
694
                    memcap = 0;
×
695
                }
×
696
            }
×
697
            SCConfNode *set_hashsize = SCConfNodeLookupChild(iter, "hashsize");
8✔
698
            if (set_hashsize) {
8✔
699
                if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
×
700
                    SCLogWarning("hashsize value cannot be"
×
701
                                 " deduced: %s, resetting to default",
×
702
                            set_hashsize->val);
×
703
                    hashsize = 0;
×
704
                }
×
705
            }
×
706
            char conf_str[1024];
8✔
707
            snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
8✔
708

709
            SCLogDebug("set %s type %s. Conf %s", set_name, set_type->val, conf_str);
8✔
710

711
            if (strcmp(set_type->val, "md5") == 0) {
8✔
712
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
×
713
                        memcap > 0 ? memcap : default_memcap,
×
714
                        hashsize > 0 ? hashsize : default_hashsize);
×
715
                if (dset == NULL) {
×
716
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
717
                    continue;
×
718
                }
×
719
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
×
720
                dset->from_yaml = true;
×
721

722
            } else if (strcmp(set_type->val, "sha256") == 0) {
8✔
723
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
×
724
                        memcap > 0 ? memcap : default_memcap,
×
725
                        hashsize > 0 ? hashsize : default_hashsize);
×
726
                if (dset == NULL) {
×
727
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
728
                    continue;
×
729
                }
×
730
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
×
731
                dset->from_yaml = true;
×
732

733
            } else if (strcmp(set_type->val, "string") == 0) {
8✔
734
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
8✔
735
                        memcap > 0 ? memcap : default_memcap,
8✔
736
                        hashsize > 0 ? hashsize : default_hashsize);
8✔
737
                if (dset == NULL) {
8✔
738
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
739
                    continue;
×
740
                }
×
741
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
8✔
742
                dset->from_yaml = true;
8✔
743

744
            } else if (strcmp(set_type->val, "ipv4") == 0) {
8✔
745
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV4, save, load,
×
746
                        memcap > 0 ? memcap : default_memcap,
×
747
                        hashsize > 0 ? hashsize : default_hashsize);
×
748
                if (dset == NULL) {
×
749
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
750
                    continue;
×
751
                }
×
752
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
×
753
                dset->from_yaml = true;
×
754

755
            } else if (strcmp(set_type->val, "ip") == 0) {
×
756
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV6, save, load,
×
757
                        memcap > 0 ? memcap : default_memcap,
×
758
                        hashsize > 0 ? hashsize : default_hashsize);
×
759
                if (dset == NULL) {
×
760
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
761
                    continue;
×
762
                }
×
763
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
×
764
                dset->from_yaml = true;
×
765
            }
×
766

767
            list_pos++;
8✔
768
        }
8✔
769
    }
1,484✔
770
    SCLogDebug("datasets done: %p", datasets);
2,170✔
771
    return 0;
2,170✔
772
}
2,170✔
773

774
void DatasetsDestroy(void)
775
{
2,115✔
776
    DatasetLock();
2,115✔
777
    SCLogDebug("destroying datasets: %p", sets);
2,115✔
778
    Dataset *set = sets;
2,115✔
779
    while (set) {
2,168✔
780
        SCLogDebug("destroying set %s", set->name);
53✔
781
        Dataset *next = set->next;
53✔
782
        THashShutdown(set->hash);
53✔
783
        SCFree(set);
53✔
784
        set = next;
53✔
785
    }
53✔
786
    sets = NULL;
2,115✔
787
    DatasetUnlock();
2,115✔
788
    SCLogDebug("destroying datasets done: %p", sets);
2,115✔
789
}
2,115✔
790

791
static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
792
{
1,217✔
793
    FILE *fp = ctx;
1,217✔
794
    //PrintRawDataFp(fp, data, data_len);
795
    if (fp) {
1,217✔
796
        return (int)fwrite(data, data_len, 1, fp);
1,217✔
797
    }
1,217✔
798
    return 0;
×
799
}
1,217✔
800

801
static int Md5AsAscii(const void *s, char *out, size_t out_size)
802
{
1✔
803
    const Md5Type *md5 = s;
1✔
804
    char str[256];
1✔
805
    PrintHexString(str, sizeof(str), (uint8_t *)md5->md5, sizeof(md5->md5));
1✔
806
    strlcat(out, str, out_size);
1✔
807
    strlcat(out, "\n", out_size);
1✔
808
    return (int)strlen(out);
1✔
809
}
1✔
810

811
static int Sha256AsAscii(const void *s, char *out, size_t out_size)
812
{
×
813
    const Sha256Type *sha = s;
×
814
    char str[256];
×
815
    PrintHexString(str, sizeof(str), (uint8_t *)sha->sha256, sizeof(sha->sha256));
×
816
    strlcat(out, str, out_size);
×
817
    strlcat(out, "\n", out_size);
×
818
    return (int)strlen(out);
×
819
}
×
820

821
static int IPv4AsAscii(const void *s, char *out, size_t out_size)
822
{
3✔
823
    const IPv4Type *ip4 = s;
3✔
824
    char str[256];
3✔
825
    PrintInet(AF_INET, ip4->ipv4, str, sizeof(str));
3✔
826
    strlcat(out, str, out_size);
3✔
827
    strlcat(out, "\n", out_size);
3✔
828
    return (int)strlen(out);
3✔
829
}
3✔
830

831
static int IPv6AsAscii(const void *s, char *out, size_t out_size)
832
{
4✔
833
    const IPv6Type *ip6 = s;
4✔
834
    char str[256];
4✔
835
    bool is_ipv4 = true;
4✔
836
    for (int i = 4; i <= 15; i++) {
50✔
837
        if (ip6->ipv6[i] != 0) {
48✔
838
            is_ipv4 = false;
2✔
839
            break;
2✔
840
        }
2✔
841
    }
48✔
842
    if (is_ipv4) {
4✔
843
        PrintInet(AF_INET, ip6->ipv6, str, sizeof(str));
2✔
844
    } else {
2✔
845
        PrintInet(AF_INET6, ip6->ipv6, str, sizeof(str));
2✔
846
    }
2✔
847
    strlcat(out, str, out_size);
4✔
848
    strlcat(out, "\n", out_size);
4✔
849
    return (int)strlen(out);
4✔
850
}
4✔
851

852
void DatasetsSave(void)
853
{
2,115✔
854
    DatasetLock();
2,115✔
855
    SCLogDebug("saving datasets: %p", sets);
2,115✔
856
    Dataset *set = sets;
2,115✔
857
    while (set) {
2,168✔
858
        if (strlen(set->save) == 0)
53✔
859
            goto next;
38✔
860

861
        FILE *fp = fopen(set->save, "w");
15✔
862
        if (fp == NULL)
15✔
863
            goto next;
×
864

865
        SCLogDebug("dumping %s to %s", set->name, set->save);
15✔
866

867
        switch (set->type) {
15✔
868
            case DATASET_TYPE_STRING:
10✔
869
                THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
10✔
870
                break;
10✔
871
            case DATASET_TYPE_MD5:
1✔
872
                THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
1✔
873
                break;
1✔
874
            case DATASET_TYPE_SHA256:
×
875
                THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
×
876
                break;
×
877
            case DATASET_TYPE_IPV4:
2✔
878
                THashWalk(set->hash, IPv4AsAscii, SaveCallback, fp);
2✔
879
                break;
2✔
880
            case DATASET_TYPE_IPV6:
2✔
881
                THashWalk(set->hash, IPv6AsAscii, SaveCallback, fp);
2✔
882
                break;
2✔
883
        }
15✔
884

885
        fclose(fp);
15✔
886

887
    next:
53✔
888
        set = set->next;
53✔
889
    }
53✔
890
    DatasetUnlock();
2,115✔
891
}
2,115✔
892

893
static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
894
{
6✔
895
    if (set == NULL)
6✔
896
        return -1;
×
897

898
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = 0 };
6✔
899
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
6✔
900
    if (rdata) {
6✔
901
        DatasetUnlockData(rdata);
5✔
902
        return 1;
5✔
903
    }
5✔
904
    return 0;
1✔
905
}
6✔
906

907
static DataRepResultType DatasetLookupStringwRep(Dataset *set,
908
        const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
909
{
2✔
910
    DataRepResultType rrep = { .found = false, .rep = 0 };
2✔
911

912
    if (set == NULL)
2✔
913
        return rrep;
×
914

915
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
2✔
916
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
2✔
917
    if (rdata) {
2✔
918
        StringType *found = rdata->data;
2✔
919
        rrep.found = true;
2✔
920
        rrep.rep = found->rep;
2✔
921
        DatasetUnlockData(rdata);
2✔
922
        return rrep;
2✔
923
    }
2✔
924
    return rrep;
×
925
}
2✔
926

927
static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
928
{
6✔
929
    if (set == NULL)
6✔
930
        return -1;
×
931

932
    if (data_len != 4)
6✔
933
        return -1;
×
934

935
    IPv4Type lookup = { .rep = 0 };
6✔
936
    memcpy(lookup.ipv4, data, 4);
6✔
937
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
6✔
938
    if (rdata) {
6✔
939
        DatasetUnlockData(rdata);
3✔
940
        return 1;
3✔
941
    }
3✔
942
    return 0;
3✔
943
}
6✔
944

945
static DataRepResultType DatasetLookupIPv4wRep(
946
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
947
{
×
948
    DataRepResultType rrep = { .found = false, .rep = 0 };
×
949

950
    if (set == NULL)
×
951
        return rrep;
×
952

953
    if (data_len != 4)
×
954
        return rrep;
×
955

956
    IPv4Type lookup = { .rep = 0 };
×
957
    memcpy(lookup.ipv4, data, data_len);
×
958
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
959
    if (rdata) {
×
960
        IPv4Type *found = rdata->data;
×
961
        rrep.found = true;
×
962
        rrep.rep = found->rep;
×
963
        DatasetUnlockData(rdata);
×
964
        return rrep;
×
965
    }
×
966
    return rrep;
×
967
}
×
968

969
static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
970
{
3✔
971
    if (set == NULL)
3✔
972
        return -1;
×
973

974
    if (data_len != 16 && data_len != 4)
3✔
975
        return -1;
×
976

977
    IPv6Type lookup = { .rep = 0 };
3✔
978
    memcpy(lookup.ipv6, data, data_len);
3✔
979
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
3✔
980
    if (rdata) {
3✔
981
        DatasetUnlockData(rdata);
3✔
982
        return 1;
3✔
983
    }
3✔
984
    return 0;
×
985
}
3✔
986

987
static DataRepResultType DatasetLookupIPv6wRep(
988
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
989
{
×
990
    DataRepResultType rrep = { .found = false, .rep = 0 };
×
991

992
    if (set == NULL)
×
993
        return rrep;
×
994

995
    if (data_len != 16 && data_len != 4)
×
996
        return rrep;
×
997

998
    IPv6Type lookup = { .rep = 0 };
×
999
    memcpy(lookup.ipv6, data, data_len);
×
1000
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
1001
    if (rdata) {
×
1002
        IPv6Type *found = rdata->data;
×
1003
        rrep.found = true;
×
1004
        rrep.rep = found->rep;
×
1005
        DatasetUnlockData(rdata);
×
1006
        return rrep;
×
1007
    }
×
1008
    return rrep;
×
1009
}
×
1010

1011
static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1012
{
3✔
1013
    if (set == NULL)
3✔
1014
        return -1;
×
1015

1016
    if (data_len != 16)
3✔
1017
        return -1;
×
1018

1019
    Md5Type lookup = { .rep = 0 };
3✔
1020
    memcpy(lookup.md5, data, data_len);
3✔
1021
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
3✔
1022
    if (rdata) {
3✔
1023
        DatasetUnlockData(rdata);
2✔
1024
        return 1;
2✔
1025
    }
2✔
1026
    return 0;
1✔
1027
}
3✔
1028

1029
static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
1030
        const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1031
{
1✔
1032
    DataRepResultType rrep = { .found = false, .rep = 0 };
1✔
1033

1034
    if (set == NULL)
1✔
1035
        return rrep;
×
1036

1037
    if (data_len != 16)
1✔
1038
        return rrep;
×
1039

1040
    Md5Type lookup = { .rep = 0 };
1✔
1041
    memcpy(lookup.md5, data, data_len);
1✔
1042
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1✔
1043
    if (rdata) {
1✔
1044
        Md5Type *found = rdata->data;
1✔
1045
        rrep.found = true;
1✔
1046
        rrep.rep = found->rep;
1✔
1047
        DatasetUnlockData(rdata);
1✔
1048
        return rrep;
1✔
1049
    }
1✔
1050
    return rrep;
×
1051
}
1✔
1052

1053
static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1054
{
×
1055
    if (set == NULL)
×
1056
        return -1;
×
1057

1058
    if (data_len != 32)
×
1059
        return -1;
×
1060

1061
    Sha256Type lookup = { .rep = 0 };
×
1062
    memcpy(lookup.sha256, data, data_len);
×
1063
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
1064
    if (rdata) {
×
1065
        DatasetUnlockData(rdata);
×
1066
        return 1;
×
1067
    }
×
1068
    return 0;
×
1069
}
×
1070

1071
static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
1072
        const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1073
{
1✔
1074
    DataRepResultType rrep = { .found = false, .rep = 0 };
1✔
1075

1076
    if (set == NULL)
1✔
1077
        return rrep;
×
1078

1079
    if (data_len != 32)
1✔
1080
        return rrep;
×
1081

1082
    Sha256Type lookup = { .rep = 0 };
1✔
1083
    memcpy(lookup.sha256, data, data_len);
1✔
1084
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1✔
1085
    if (rdata) {
1✔
1086
        Sha256Type *found = rdata->data;
1✔
1087
        rrep.found = true;
1✔
1088
        rrep.rep = found->rep;
1✔
1089
        DatasetUnlockData(rdata);
1✔
1090
        return rrep;
1✔
1091
    }
1✔
1092
    return rrep;
×
1093
}
1✔
1094

1095
/**
1096
 *  \brief see if \a data is part of the set
1097
 *  \param set dataset
1098
 *  \param data data to look up
1099
 *  \param data_len length in bytes of \a data
1100
 *  \retval -1 error
1101
 *  \retval 0 not found
1102
 *  \retval 1 found
1103
 */
1104
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
1105
{
18✔
1106
    if (set == NULL)
18✔
1107
        return -1;
×
1108

1109
    switch (set->type) {
18✔
1110
        case DATASET_TYPE_STRING:
6✔
1111
            return DatasetLookupString(set, data, data_len);
6✔
1112
        case DATASET_TYPE_MD5:
3✔
1113
            return DatasetLookupMd5(set, data, data_len);
3✔
1114
        case DATASET_TYPE_SHA256:
×
1115
            return DatasetLookupSha256(set, data, data_len);
×
1116
        case DATASET_TYPE_IPV4:
6✔
1117
            return DatasetLookupIPv4(set, data, data_len);
6✔
1118
        case DATASET_TYPE_IPV6:
3✔
1119
            return DatasetLookupIPv6(set, data, data_len);
3✔
1120
    }
18✔
1121
    return -1;
×
1122
}
18✔
1123

1124
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1125
        const DataRepType *rep)
1126
{
4✔
1127
    DataRepResultType rrep = { .found = false, .rep = 0 };
4✔
1128
    if (set == NULL)
4✔
1129
        return rrep;
×
1130

1131
    switch (set->type) {
4✔
1132
        case DATASET_TYPE_STRING:
2✔
1133
            return DatasetLookupStringwRep(set, data, data_len, rep);
2✔
1134
        case DATASET_TYPE_MD5:
1✔
1135
            return DatasetLookupMd5wRep(set, data, data_len, rep);
1✔
1136
        case DATASET_TYPE_SHA256:
1✔
1137
            return DatasetLookupSha256wRep(set, data, data_len, rep);
1✔
1138
        case DATASET_TYPE_IPV4:
×
1139
            return DatasetLookupIPv4wRep(set, data, data_len, rep);
×
1140
        case DATASET_TYPE_IPV6:
×
1141
            return DatasetLookupIPv6wRep(set, data, data_len, rep);
×
1142
    }
4✔
1143
    return rrep;
×
1144
}
4✔
1145

1146
/**
1147
 *  \retval 1 data was added to the hash
1148
 *  \retval 0 data was not added to the hash as it is already there
1149
 *  \retval -1 failed to add data to the hash
1150
 */
1151
static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1152
{
1,132,677✔
1153
    if (set == NULL)
1,132,677✔
1154
        return -1;
×
1155

1156
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = 0 };
1,132,677✔
1157
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1,132,677✔
1158
    if (res.data) {
1,132,690✔
1159
        DatasetUnlockData(res.data);
1,132,690✔
1160
        return res.is_new ? 1 : 0;
1,132,690✔
1161
    }
1,132,690✔
1162
    return -1;
2,147,483,647✔
1163
}
1,132,677✔
1164

1165
/**
1166
 *  \retval 1 data was added to the hash
1167
 *  \retval 0 data was not added to the hash as it is already there
1168
 *  \retval -1 failed to add data to the hash
1169
 */
1170
static int DatasetAddStringwRep(
1171
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1172
{
3✔
1173
    if (set == NULL)
3✔
1174
        return -1;
×
1175

1176
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
3✔
1177
        .rep = *rep };
3✔
1178
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
3✔
1179
    if (res.data) {
3✔
1180
        DatasetUnlockData(res.data);
3✔
1181
        return res.is_new ? 1 : 0;
3✔
1182
    }
3✔
1183
    return -1;
×
1184
}
3✔
1185

1186
static int DatasetAddIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1187
{
12✔
1188
    if (set == NULL) {
12✔
1189
        return -1;
×
1190
    }
×
1191

1192
    if (data_len < 4) {
12✔
1193
        return -2;
×
1194
    }
×
1195

1196
    IPv4Type lookup = { .rep = 0 };
12✔
1197
    memcpy(lookup.ipv4, data, 4);
12✔
1198
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
12✔
1199
    if (res.data) {
12✔
1200
        DatasetUnlockData(res.data);
12✔
1201
        return res.is_new ? 1 : 0;
12✔
1202
    }
12✔
1203
    return -1;
×
1204
}
12✔
1205

1206
static int DatasetAddIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1207
{
12✔
1208
    if (set == NULL) {
12✔
1209
        return -1;
×
1210
    }
×
1211

1212
    if (data_len != 16 && data_len != 4) {
12✔
1213
        return -2;
×
1214
    }
×
1215

1216
    IPv6Type lookup = { .rep = 0 };
12✔
1217
    memcpy(lookup.ipv6, data, data_len);
12✔
1218
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
12✔
1219
    if (res.data) {
12✔
1220
        DatasetUnlockData(res.data);
12✔
1221
        return res.is_new ? 1 : 0;
12✔
1222
    }
12✔
1223
    return -1;
×
1224
}
12✔
1225

1226
static int DatasetAddIPv4wRep(
1227
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1228
{
×
1229
    if (set == NULL)
×
1230
        return -1;
×
1231

1232
    if (data_len < 4)
×
1233
        return -2;
×
1234

1235
    IPv4Type lookup = { .rep = *rep };
×
1236
    memcpy(lookup.ipv4, data, 4);
×
1237
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
1238
    if (res.data) {
×
1239
        DatasetUnlockData(res.data);
×
1240
        return res.is_new ? 1 : 0;
×
1241
    }
×
1242
    return -1;
×
1243
}
×
1244

1245
static int DatasetAddIPv6wRep(
1246
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1247
{
×
1248
    if (set == NULL)
×
1249
        return -1;
×
1250

1251
    if (data_len != 16)
×
1252
        return -2;
×
1253

1254
    IPv6Type lookup = { .rep = *rep };
×
1255
    memcpy(lookup.ipv6, data, 16);
×
1256
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
1257
    if (res.data) {
×
1258
        DatasetUnlockData(res.data);
×
1259
        return res.is_new ? 1 : 0;
×
1260
    }
×
1261
    return -1;
×
1262
}
×
1263

1264
static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1265
{
1✔
1266
    if (set == NULL)
1✔
1267
        return -1;
×
1268

1269
    if (data_len != 16)
1✔
1270
        return -2;
×
1271

1272
    Md5Type lookup = { .rep = 0 };
1✔
1273
    memcpy(lookup.md5, data, 16);
1✔
1274
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1✔
1275
    if (res.data) {
1✔
1276
        DatasetUnlockData(res.data);
1✔
1277
        return res.is_new ? 1 : 0;
1✔
1278
    }
1✔
1279
    return -1;
×
1280
}
1✔
1281

1282
static int DatasetAddMd5wRep(
1283
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1284
{
2✔
1285
    if (set == NULL)
2✔
1286
        return -1;
×
1287

1288
    if (data_len != 16)
2✔
1289
        return -2;
×
1290

1291
    Md5Type lookup = { .rep = *rep };
2✔
1292
    memcpy(lookup.md5, data, 16);
2✔
1293
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
2✔
1294
    if (res.data) {
2✔
1295
        DatasetUnlockData(res.data);
2✔
1296
        return res.is_new ? 1 : 0;
2✔
1297
    }
2✔
1298
    return -1;
×
1299
}
2✔
1300

1301
static int DatasetAddSha256wRep(
1302
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1303
{
2✔
1304
    if (set == NULL)
2✔
1305
        return -1;
×
1306

1307
    if (data_len != 32)
2✔
1308
        return -2;
×
1309

1310
    Sha256Type lookup = { .rep = *rep };
2✔
1311
    memcpy(lookup.sha256, data, 32);
2✔
1312
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
2✔
1313
    if (res.data) {
2✔
1314
        DatasetUnlockData(res.data);
2✔
1315
        return res.is_new ? 1 : 0;
2✔
1316
    }
2✔
1317
    return -1;
×
1318
}
2✔
1319

1320
static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1321
{
×
1322
    if (set == NULL)
×
1323
        return -1;
×
1324

1325
    if (data_len != 32)
×
1326
        return -2;
×
1327

1328
    Sha256Type lookup = { .rep = 0 };
×
1329
    memcpy(lookup.sha256, data, 32);
×
1330
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
1331
    if (res.data) {
×
1332
        DatasetUnlockData(res.data);
×
1333
        return res.is_new ? 1 : 0;
×
1334
    }
×
1335
    return -1;
×
1336
}
×
1337

1338
int SCDatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1339
{
1,132,709✔
1340
    if (set == NULL)
1,132,709✔
1341
        return -1;
×
1342

1343
    switch (set->type) {
1,132,709✔
1344
        case DATASET_TYPE_STRING:
1,132,683✔
1345
            return DatasetAddString(set, data, data_len);
1,132,683✔
1346
        case DATASET_TYPE_MD5:
1✔
1347
            return DatasetAddMd5(set, data, data_len);
1✔
1348
        case DATASET_TYPE_SHA256:
×
1349
            return DatasetAddSha256(set, data, data_len);
×
1350
        case DATASET_TYPE_IPV4:
12✔
1351
            return DatasetAddIPv4(set, data, data_len);
12✔
1352
        case DATASET_TYPE_IPV6:
12✔
1353
            return DatasetAddIPv6(set, data, data_len);
12✔
1354
    }
1,132,709✔
1355
    return -1;
×
1356
}
1,132,709✔
1357

1358
int SCDatasetAddwRep(
1359
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1360
{
7✔
1361
    if (set == NULL)
7✔
1362
        return -1;
×
1363

1364
    switch (set->type) {
7✔
1365
        case DATASET_TYPE_STRING:
3✔
1366
            return DatasetAddStringwRep(set, data, data_len, rep);
3✔
1367
        case DATASET_TYPE_MD5:
2✔
1368
            return DatasetAddMd5wRep(set, data, data_len, rep);
2✔
1369
        case DATASET_TYPE_SHA256:
2✔
1370
            return DatasetAddSha256wRep(set, data, data_len, rep);
2✔
1371
        case DATASET_TYPE_IPV4:
×
1372
            return DatasetAddIPv4wRep(set, data, data_len, rep);
×
1373
        case DATASET_TYPE_IPV6:
×
1374
            return DatasetAddIPv6wRep(set, data, data_len, rep);
×
1375
    }
7✔
1376
    return -1;
×
1377
}
7✔
1378

1379
typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len);
1380

1381
static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString,
1382
        DatasetOpFunc DatasetOpMd5, DatasetOpFunc DatasetOpSha256, DatasetOpFunc DatasetOpIPv4,
1383
        DatasetOpFunc DatasetOpIPv6)
1384
{
×
1385
    if (set == NULL)
×
1386
        return -1;
×
1387
    if (strlen(string) == 0)
×
1388
        return -1;
×
1389

1390
    switch (set->type) {
×
1391
        case DATASET_TYPE_STRING: {
×
1392
            if (strlen(string) > UINT16_MAX) {
×
1393
                // size check before cast and stack allocation
1394
                return -1;
×
1395
            }
×
1396
            uint32_t decoded_size = SCBase64DecodeBufferSize((uint32_t)strlen(string));
×
1397
            uint8_t decoded[decoded_size];
×
1398
            uint32_t num_decoded = SCBase64Decode(
×
1399
                    (const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded);
×
1400
            if (num_decoded == 0) {
×
1401
                return -2;
×
1402
            }
×
1403

1404
            return DatasetOpString(set, decoded, num_decoded);
×
1405
        }
×
1406
        case DATASET_TYPE_MD5: {
×
1407
            if (strlen(string) != 32)
×
1408
                return -2;
×
1409
            uint8_t hash[16];
×
1410
            if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
×
1411
                return -2;
×
1412
            return DatasetOpMd5(set, hash, 16);
×
1413
        }
×
1414
        case DATASET_TYPE_SHA256: {
×
1415
            if (strlen(string) != 64)
×
1416
                return -2;
×
1417
            uint8_t hash[32];
×
1418
            if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
×
1419
                return -2;
×
1420
            return DatasetOpSha256(set, hash, 32);
×
1421
        }
×
1422
        case DATASET_TYPE_IPV4: {
×
1423
            struct in_addr in;
×
1424
            if (inet_pton(AF_INET, string, &in) != 1)
×
1425
                return -2;
×
1426
            return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
×
1427
        }
×
1428
        case DATASET_TYPE_IPV6: {
×
1429
            struct in6_addr in6;
×
1430
            if (DatasetParseIpv6String(set, string, &in6) != 0) {
×
1431
                SCLogError("Dataset failed to import %s as IPv6", string);
×
1432
                return -2;
×
1433
            }
×
1434
            return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
×
1435
        }
×
1436
    }
×
1437
    return -1;
×
1438
}
×
1439

1440
/** \brief add serialized data to set
1441
 *  \retval int 1 added
1442
 *  \retval int 0 already in hash
1443
 *  \retval int -1 API error (not added)
1444
 *  \retval int -2 DATA error
1445
 */
1446
int DatasetAddSerialized(Dataset *set, const char *string)
1447
{
×
1448
    return DatasetOpSerialized(set, string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
×
1449
            DatasetAddIPv4, DatasetAddIPv6);
×
1450
}
×
1451

1452
/** \brief add serialized data to set
1453
 *  \retval int 1 added
1454
 *  \retval int 0 already in hash
1455
 *  \retval int -1 API error (not added)
1456
 *  \retval int -2 DATA error
1457
 */
1458
int DatasetLookupSerialized(Dataset *set, const char *string)
1459
{
×
1460
    return DatasetOpSerialized(set, string, DatasetLookupString, DatasetLookupMd5,
×
1461
            DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
×
1462
}
×
1463

1464
/**
1465
 *  \retval 1 data was removed from the hash
1466
 *  \retval 0 data not removed (busy)
1467
 *  \retval -1 data not found
1468
 */
1469
static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1470
{
1✔
1471
    if (set == NULL)
1✔
1472
        return -1;
×
1473

1474
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = 0 };
1✔
1475
    return THashRemoveFromHash(set->hash, &lookup);
1✔
1476
}
1✔
1477

1478
static int DatasetRemoveIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1479
{
×
1480
    if (set == NULL)
×
1481
        return -1;
×
1482

1483
    if (data_len != 4)
×
1484
        return -2;
×
1485

1486
    IPv4Type lookup = { .rep = 0 };
×
1487
    memcpy(lookup.ipv4, data, 4);
×
1488
    return THashRemoveFromHash(set->hash, &lookup);
×
1489
}
×
1490

1491
static int DatasetRemoveIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1492
{
×
1493
    if (set == NULL)
×
1494
        return -1;
×
1495

1496
    if (data_len != 16)
×
1497
        return -2;
×
1498

1499
    IPv6Type lookup = { .rep = 0 };
×
1500
    memcpy(lookup.ipv6, data, 16);
×
1501
    return THashRemoveFromHash(set->hash, &lookup);
×
1502
}
×
1503

1504
static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1505
{
×
1506
    if (set == NULL)
×
1507
        return -1;
×
1508

1509
    if (data_len != 16)
×
1510
        return -2;
×
1511

1512
    Md5Type lookup = { .rep = 0 };
×
1513
    memcpy(lookup.md5, data, 16);
×
1514
    return THashRemoveFromHash(set->hash, &lookup);
×
1515
}
×
1516

1517
static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1518
{
×
1519
    if (set == NULL)
×
1520
        return -1;
×
1521

1522
    if (data_len != 32)
×
1523
        return -2;
×
1524

1525
    Sha256Type lookup = { .rep = 0 };
×
1526
    memcpy(lookup.sha256, data, 32);
×
1527
    return THashRemoveFromHash(set->hash, &lookup);
×
1528
}
×
1529

1530
/** \brief remove serialized data from set
1531
 *  \retval int 1 removed
1532
 *  \retval int 0 found but busy (not removed)
1533
 *  \retval int -1 API error (not removed)
1534
 *  \retval int -2 DATA error */
1535
int DatasetRemoveSerialized(Dataset *set, const char *string)
1536
{
×
1537
    return DatasetOpSerialized(set, string, DatasetRemoveString, DatasetRemoveMd5,
×
1538
            DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
×
1539
}
×
1540

1541
int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
1542
{
1✔
1543
    if (set == NULL)
1✔
1544
        return -1;
×
1545

1546
    switch (set->type) {
1✔
1547
        case DATASET_TYPE_STRING:
1✔
1548
            return DatasetRemoveString(set, data, data_len);
1✔
1549
        case DATASET_TYPE_MD5:
×
1550
            return DatasetRemoveMd5(set, data, data_len);
×
1551
        case DATASET_TYPE_SHA256:
×
1552
            return DatasetRemoveSha256(set, data, data_len);
×
1553
        case DATASET_TYPE_IPV4:
×
1554
            return DatasetRemoveIPv4(set, data, data_len);
×
1555
        case DATASET_TYPE_IPV6:
×
1556
            return DatasetRemoveIPv6(set, data, data_len);
×
1557
    }
1✔
1558
    return -1;
×
1559
}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc