• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OISF / suricata / 23374838686

21 Mar 2026 07:29AM UTC coverage: 59.341% (-20.0%) from 79.315%
23374838686

Pull #15075

github

web-flow
Merge 90b4e834f into 6587e363a
Pull Request #15075: Stack 8001 v16.4

38 of 70 new or added lines in 10 files covered. (54.29%)

34165 existing lines in 563 files now uncovered.

119621 of 201584 relevant lines covered (59.34%)

650666.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

42.25
/src/datasets.c
1
/* Copyright (C) 2017-2024 Open Information Security Foundation
2
 *
3
 * You can copy, redistribute or modify this Program under the terms of
4
 * the GNU General Public License version 2 as published by the Free
5
 * Software Foundation.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * version 2 along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15
 * 02110-1301, USA.
16
 */
17

18
/**
19
 * \file
20
 *
21
 * \author Victor Julien <victor@inliniac.net>
22
 */
23

24
#include "suricata-common.h"
25
#include "suricata.h"
26
#include "rust.h"
27
#include "conf.h"
28
#include "datasets.h"
29
#include "datasets-string.h"
30
#include "datasets-ipv4.h"
31
#include "datasets-ipv6.h"
32
#include "datasets-md5.h"
33
#include "datasets-sha256.h"
34
#include "datasets-reputation.h"
35
#include "datasets-context-json.h"
36
#include "util-conf.h"
37
#include "util-mem.h"
38
#include "util-thash.h"
39
#include "util-print.h"
40
#include "util-byte.h"
41
#include "util-misc.h"
42
#include "util-path.h"
43
#include "util-debug.h"
44
#include "util-validate.h"
45

46
SCMutex sets_lock = SCMUTEX_INITIALIZER;
47
static Dataset *sets = NULL;
48
static uint32_t set_ids = 0;
49

50
uint32_t dataset_max_one_hashsize = 65536;
51
uint32_t dataset_max_total_hashsize = 16777216;
52
uint32_t dataset_used_hashsize = 0;
53

54
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);
55
static void DatasetUpdateHashsize(const char *name, uint32_t hash_size);
56

57
static inline void DatasetUnlockData(THashData *d)
58
{
214✔
59
    (void) THashDecrUsecnt(d);
214✔
60
    THashDataUnlock(d);
214✔
61
}
214✔
62
static bool DatasetIsStatic(const char *save, const char *load);
63

64
enum DatasetTypes DatasetGetTypeFromString(const char *s)
UNCOV
65
{
×
UNCOV
66
    if (strcasecmp("md5", s) == 0)
×
67
        return DATASET_TYPE_MD5;
×
UNCOV
68
    if (strcasecmp("sha256", s) == 0)
×
69
        return DATASET_TYPE_SHA256;
×
UNCOV
70
    if (strcasecmp("string", s) == 0)
×
71
        return DATASET_TYPE_STRING;
×
UNCOV
72
    if (strcasecmp("ipv4", s) == 0)
×
UNCOV
73
        return DATASET_TYPE_IPV4;
×
UNCOV
74
    if (strcasecmp("ip", s) == 0)
×
UNCOV
75
        return DATASET_TYPE_IPV6;
×
76
    return DATASET_TYPE_NOTSET;
×
UNCOV
77
}
×
78

79
int DatasetAppendSet(Dataset *set)
80
{
703✔
81

82
    if (set->hash == NULL) {
703✔
83
        return -1;
×
84
    }
×
85

86
    if (SC_ATOMIC_GET(set->hash->memcap_reached)) {
703✔
UNCOV
87
        SCLogError("dataset too large for set memcap");
×
UNCOV
88
        return -1;
×
UNCOV
89
    }
×
90

91
    SCLogDebug(
703✔
92
            "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
703✔
93

94
    set->next = sets;
703✔
95
    sets = set;
703✔
96

97
    /* hash size accounting */
98
    DatasetUpdateHashsize(set->name, set->hash->config.hash_size);
703✔
99
    return 0;
703✔
100
}
703✔
101

102
void DatasetLock(void)
103
{
144,825✔
104
    SCMutexLock(&sets_lock);
144,825✔
105
}
144,825✔
106

107
void DatasetUnlock(void)
108
{
145,907✔
109
    SCMutexUnlock(&sets_lock);
145,907✔
110
}
145,907✔
111

112
Dataset *DatasetAlloc(const char *name)
113
{
11,181✔
114
    Dataset *set = SCCalloc(1, sizeof(*set));
11,181✔
115
    if (set) {
11,181✔
116
        set->id = set_ids++;
11,181✔
117
    }
11,181✔
118
    return set;
11,181✔
119
}
11,181✔
120

121
Dataset *DatasetSearchByName(const char *name)
122
{
73,951✔
123
    Dataset *set = sets;
73,951✔
124
    while (set) {
5,397,756✔
125
        if (strcasecmp(name, set->name) == 0 && !set->hidden) {
5,379,169✔
126
            return set;
55,364✔
127
        }
55,364✔
128
        set = set->next;
5,323,805✔
129
    }
5,323,805✔
130
    return NULL;
18,587✔
131
}
73,951✔
132

133
static int DatasetLoadIPv4(Dataset *set)
134
{
108✔
135
    if (strlen(set->load) == 0)
108✔
136
        return 0;
42✔
137

138
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
66✔
139
    const char *fopen_mode = "r";
66✔
140
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
66✔
141
        fopen_mode = "a+";
13✔
142
    }
13✔
143

144
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv4);
66✔
145
    if (retval == -2) {
66✔
146
        FatalErrorOnInit("dataset %s could not be processed", set->name);
10✔
147
    } else if (retval == -1) {
56✔
148
        return -1;
54✔
149
    }
54✔
150

151
    THashConsolidateMemcap(set->hash);
12✔
152

153
    return 0;
12✔
154
}
66✔
155

156
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
UNCOV
157
{
×
158
    /* Checking IPv6 case */
UNCOV
159
    char *got_colon = strchr(line, ':');
×
UNCOV
160
    if (got_colon) {
×
UNCOV
161
        uint32_t ip6addr[4];
×
UNCOV
162
        if (inet_pton(AF_INET6, line, in6) != 1) {
×
UNCOV
163
            FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
×
UNCOV
164
            return -1;
×
UNCOV
165
        }
×
UNCOV
166
        memcpy(&ip6addr, in6->s6_addr, sizeof(ip6addr));
×
167
        /* IPv4 in IPv6 notation needs transformation to internal Suricata storage */
UNCOV
168
        if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
×
UNCOV
169
            ip6addr[0] = ip6addr[3];
×
UNCOV
170
            ip6addr[2] = 0;
×
UNCOV
171
            ip6addr[3] = 0;
×
UNCOV
172
            memcpy(in6, ip6addr, sizeof(struct in6_addr));
×
UNCOV
173
        }
×
UNCOV
174
    } else {
×
175
        /* IPv4 case */
UNCOV
176
        struct in_addr in;
×
UNCOV
177
        if (inet_pton(AF_INET, line, &in) != 1) {
×
178
            FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
×
179
            return -1;
×
180
        }
×
UNCOV
181
        memset(in6, 0, sizeof(struct in6_addr));
×
UNCOV
182
        memcpy(in6, &in, sizeof(struct in_addr));
×
UNCOV
183
    }
×
UNCOV
184
    return 0;
×
UNCOV
185
}
×
186

187
static int DatasetLoadIPv6(Dataset *set)
188
{
129✔
189
    if (strlen(set->load) == 0)
129✔
190
        return 0;
77✔
191

192
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
52✔
193
    const char *fopen_mode = "r";
52✔
194
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
52✔
195
        fopen_mode = "a+";
7✔
196
    }
7✔
197

198
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv6);
52✔
199
    if (retval == -2) {
52✔
200
        FatalErrorOnInit("dataset %s could not be processed", set->name);
2✔
201
    } else if (retval == -1) {
50✔
202
        return -1;
48✔
203
    }
48✔
204

205
    THashConsolidateMemcap(set->hash);
4✔
206

207
    return 0;
4✔
208
}
52✔
209

210
static int DatasetLoadMd5(Dataset *set)
211
{
5,979✔
212
    if (strlen(set->load) == 0)
5,979✔
213
        return 0;
241✔
214

215
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
5,738✔
216
    const char *fopen_mode = "r";
5,738✔
217
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
5,738✔
218
        fopen_mode = "a+";
252✔
219
    }
252✔
220

221
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSMd5);
5,738✔
222
    if (retval == -2) {
5,738✔
223
        FatalErrorOnInit("dataset %s could not be processed", set->name);
8✔
224
    } else if (retval == -1) {
5,730✔
225
        return -1;
5,613✔
226
    }
5,613✔
227

228
    THashConsolidateMemcap(set->hash);
125✔
229

230
    return 0;
125✔
231
}
5,738✔
232

233
static int DatasetLoadSha256(Dataset *set)
234
{
3,379✔
235
    if (strlen(set->load) == 0)
3,379✔
236
        return 0;
41✔
237

238
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
3,338✔
239
    const char *fopen_mode = "r";
3,338✔
240
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
3,338✔
241
        fopen_mode = "a+";
212✔
242
    }
212✔
243

244
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSSha256);
3,338✔
245
    if (retval == -2) {
3,338✔
246
        FatalErrorOnInit("dataset %s could not be processed", set->name);
×
247
    } else if (retval == -1) {
3,338✔
248
        return -1;
3,331✔
249
    }
3,331✔
250

251
    THashConsolidateMemcap(set->hash);
7✔
252

253
    return 0;
7✔
254
}
3,338✔
255

256
static int DatasetLoadString(Dataset *set)
257
{
1,298✔
258
    if (strlen(set->load) == 0)
1,298✔
259
        return 0;
117✔
260

261
    SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
1,181✔
262

263
    const char *fopen_mode = "r";
1,181✔
264
    if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
1,181✔
265
        fopen_mode = "a+";
245✔
266
    }
245✔
267

268
    int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSString);
1,181✔
269
    if (retval == -2) {
1,181✔
270
        FatalErrorOnInit("dataset %s could not be processed", set->name);
1✔
271
    } else if (retval == -1) {
1,180✔
272
        return -1;
1,179✔
273
    }
1,179✔
274

275
    THashConsolidateMemcap(set->hash);
2✔
276

277
    return 0;
2✔
278
}
1,181✔
279

280
extern bool g_system;
281

282
enum DatasetGetPathType {
283
    TYPE_STATE,
284
    TYPE_LOAD,
285
};
286

287
static void DatasetGetPath(
288
        const char *in_path, char *out_path, size_t out_size, enum DatasetGetPathType type)
289
{
×
290
    char path[PATH_MAX];
×
291
    struct stat st;
×
292

293
    if (PathIsAbsolute(in_path)) {
×
294
        strlcpy(path, in_path, sizeof(path));
×
295
        strlcpy(out_path, path, out_size);
×
296
        return;
×
297
    }
×
298

299
    const char *data_dir = ConfigGetDataDirectory();
×
300
    if (stat(data_dir, &st) != 0) {
×
301
        SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
×
302
        return;
×
303
    }
×
304

305
    snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
×
306

307
    if (type == TYPE_LOAD) {
×
308
        if (stat(path, &st) != 0) {
×
309
            SCLogDebug("path %s: %s", path, strerror(errno));
×
310
            if (!g_system) {
×
311
                snprintf(path, sizeof(path), "%s", in_path);
×
312
            }
×
313
        }
×
314
    }
×
315
    strlcpy(out_path, path, out_size);
×
316
    SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
×
317
}
×
318

319
/** \brief look for set by name without creating it */
320
Dataset *DatasetFind(const char *name, enum DatasetTypes type)
UNCOV
321
{
×
UNCOV
322
    DatasetLock();
×
UNCOV
323
    Dataset *set = DatasetSearchByName(name);
×
UNCOV
324
    if (set) {
×
UNCOV
325
        if (set->type != type) {
×
326
            DatasetUnlock();
×
327
            return NULL;
×
328
        }
×
UNCOV
329
    }
×
UNCOV
330
    DatasetUnlock();
×
UNCOV
331
    return set;
×
UNCOV
332
}
×
333

334
static bool DatasetCheckHashsize(const char *name, uint32_t hash_size)
335
{
13,536✔
336
    if (dataset_max_one_hashsize > 0 && hash_size > dataset_max_one_hashsize) {
13,536✔
337
        SCLogError("hashsize %u in dataset '%s' exceeds configured 'single-hashsize' limit (%u)",
2,355✔
338
                hash_size, name, dataset_max_one_hashsize);
2,355✔
339
        return false;
2,355✔
340
    }
2,355✔
341
    // we cannot underflow as we know from conf loading that
342
    // dataset_max_total_hashsize >= dataset_max_one_hashsize if dataset_max_total_hashsize > 0
343
    if (dataset_max_total_hashsize > 0 &&
11,181✔
344
            dataset_max_total_hashsize - hash_size < dataset_used_hashsize) {
11,181✔
345
        SCLogError("hashsize %u in dataset '%s' exceeds configured 'total-hashsizes' limit (%u, in "
×
346
                   "use %u)",
×
347
                hash_size, name, dataset_max_total_hashsize, dataset_used_hashsize);
×
348
        return false;
×
349
    }
×
350

351
    return true;
11,181✔
352
}
11,181✔
353

354
static void DatasetUpdateHashsize(const char *name, uint32_t hash_size)
355
{
703✔
356
    if (dataset_max_total_hashsize > 0) {
703✔
357
        dataset_used_hashsize += hash_size;
703✔
358
        SCLogDebug("set %s adding with hash_size %u", name, hash_size);
703✔
359
    }
703✔
360
}
703✔
361

362
/**
363
 * \return -1 on error
364
 * \return 0 on successful creation
365
 * \return 1 if the dataset already exists
366
 *
367
 * Calling function is responsible for locking via DatasetLock()
368
 */
369
int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load,
370
        uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set)
371
{
73,951✔
372
    uint64_t default_memcap = 0;
73,951✔
373
    uint32_t default_hashsize = 0;
73,951✔
374
    if (strlen(name) > DATASET_NAME_MAX_LEN) {
73,951✔
375
        return -1;
×
376
    }
×
377

378
    Dataset *set = DatasetSearchByName(name);
73,951✔
379
    if (set) {
73,951✔
380
        if (type != DATASET_TYPE_NOTSET && set->type != type) {
55,364✔
381
            SCLogError("dataset %s already "
1,723✔
382
                       "exists and is of type %u",
1,723✔
383
                    set->name, set->type);
1,723✔
384
            return -1;
1,723✔
385
        }
1,723✔
386

387
        if ((save == NULL || strlen(save) == 0) &&
53,641✔
388
            (load == NULL || strlen(load) == 0)) {
53,641✔
389
            // OK, rule keyword doesn't have to set state/load,
390
            // even when yaml set has set it.
391
        } else {
28,118✔
392
            if ((save == NULL && strlen(set->save) > 0) ||
25,523✔
393
                    (save != NULL && strcmp(set->save, save) != 0)) {
25,523✔
394
                SCLogError("dataset %s save mismatch: %s != %s", set->name, set->save, save);
1,082✔
395
                DatasetUnlock();
1,082✔
396
                return -1;
1,082✔
397
            }
1,082✔
398
            if ((load == NULL && strlen(set->load) > 0) ||
24,441✔
399
                    (load != NULL && strcmp(set->load, load) != 0)) {
24,441✔
400
                SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
22,461✔
401
                return -1;
22,461✔
402
            }
22,461✔
403
        }
24,441✔
404

405
        *ret_set = set;
30,098✔
406
        return 1;
30,098✔
407
    }
53,641✔
408

409
    if (type == DATASET_TYPE_NOTSET) {
18,587✔
410
        SCLogError("dataset %s not defined", name);
5,051✔
411
        goto out_err;
5,051✔
412
    }
5,051✔
413

414
    DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
13,536✔
415
    if (*hashsize == 0) {
13,536✔
416
        *hashsize = default_hashsize;
7,784✔
417
    }
7,784✔
418
    if (*memcap == 0) {
13,536✔
419
        *memcap = default_memcap;
13,259✔
420
    }
13,259✔
421

422
    if (!DatasetCheckHashsize(name, *hashsize)) {
13,536✔
423
        goto out_err;
2,355✔
424
    }
2,355✔
425

426
    set = DatasetAlloc(name);
11,181✔
427
    if (set == NULL) {
11,181✔
428
        goto out_err;
×
429
    }
×
430

431
    strlcpy(set->name, name, sizeof(set->name));
11,181✔
432
    set->type = type;
11,181✔
433
    if (save && strlen(save)) {
11,181✔
434
        strlcpy(set->save, save, sizeof(set->save));
772✔
435
        SCLogDebug("name %s save '%s'", name, set->save);
772✔
436
    }
772✔
437
    if (load && strlen(load)) {
11,181✔
438
        strlcpy(set->load, load, sizeof(set->load));
10,640✔
439
        SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
10,640✔
440
    }
10,640✔
441

442
    *ret_set = set;
11,181✔
443
    return 0;
11,181✔
444
out_err:
7,406✔
445
    if (set) {
7,406✔
446
        SCFree(set);
×
447
    }
×
448
    return -1;
7,406✔
449
}
11,181✔
450

451
Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
452
        uint64_t memcap, uint32_t hashsize)
453
{
73,649✔
454
    Dataset *set = NULL;
73,649✔
455

456
    DatasetLock();
73,649✔
457
    int ret = DatasetGetOrCreate(name, type, save, load, &memcap, &hashsize, &set);
73,649✔
458
    if (ret < 0) {
73,649✔
459
        SCLogError("dataset %s creation failed", name);
32,666✔
460
        DatasetUnlock();
32,666✔
461
        return NULL;
32,666✔
462
    }
32,666✔
463
    if (ret == 1) {
40,983✔
464
        SCLogDebug("dataset %s already exists", name);
29,837✔
465
        DatasetUnlock();
29,837✔
466
        return set;
29,837✔
467
    }
29,837✔
468

469
    char cnf_name[128];
11,146✔
470
    snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
11,146✔
471
    switch (type) {
11,146✔
472
        case DATASET_TYPE_MD5:
5,988✔
473
            set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
5,988✔
474
                    Md5StrCompare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
5,988✔
475
            if (set->hash == NULL)
5,988✔
476
                goto out_err;
9✔
477
            if (DatasetLoadMd5(set) < 0)
5,979✔
478
                goto out_err;
5,613✔
479
            break;
366✔
480
        case DATASET_TYPE_STRING:
1,305✔
481
            set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
1,305✔
482
                    StringCompare, NULL, StringGetLength, load != NULL ? 1 : 0, memcap, hashsize);
1,305✔
483
            if (set->hash == NULL)
1,305✔
484
                goto out_err;
7✔
485
            if (DatasetLoadString(set) < 0)
1,298✔
486
                goto out_err;
1,179✔
487
            break;
119✔
488
        case DATASET_TYPE_SHA256:
3,604✔
489
            set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
3,604✔
490
                    Sha256StrHash, Sha256StrCompare, NULL, NULL, load != NULL ? 1 : 0, memcap,
3,604✔
491
                    hashsize);
3,604✔
492
            if (set->hash == NULL)
3,604✔
493
                goto out_err;
225✔
494
            if (DatasetLoadSha256(set) < 0)
3,379✔
495
                goto out_err;
3,331✔
496
            break;
48✔
497
        case DATASET_TYPE_IPV4:
117✔
498
            set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash,
117✔
499
                    IPv4Compare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
117✔
500
            if (set->hash == NULL)
117✔
501
                goto out_err;
9✔
502
            if (DatasetLoadIPv4(set) < 0)
108✔
503
                goto out_err;
54✔
504
            break;
54✔
505
        case DATASET_TYPE_IPV6:
132✔
506
            set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6Set, IPv6Free, IPv6Hash,
132✔
507
                    IPv6Compare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
132✔
508
            if (set->hash == NULL)
132✔
509
                goto out_err;
3✔
510
            if (DatasetLoadIPv6(set) < 0)
129✔
511
                goto out_err;
48✔
512
            break;
81✔
513
    }
11,146✔
514

515
    if (DatasetAppendSet(set) < 0) {
668✔
UNCOV
516
        SCLogError("dataset %s append failed", name);
×
UNCOV
517
        goto out_err;
×
UNCOV
518
    }
×
519

520
    DatasetUnlock();
668✔
521
    return set;
668✔
522
out_err:
10,478✔
523
    if (set->hash) {
10,478✔
524
        THashShutdown(set->hash);
10,225✔
525
    }
10,225✔
526
    SCFree(set);
10,478✔
527
    DatasetUnlock();
10,478✔
528
    return NULL;
10,478✔
529
}
668✔
530

531
static bool DatasetIsStatic(const char *save, const char *load)
532
{
1,794,477✔
533
    /* A set is static if it does not have any dynamic properties like
534
     * save and/or state defined but has load defined.
535
     * */
536
    if ((load != NULL && strlen(load) > 0) &&
1,794,477✔
537
            (save == NULL || strlen(save) == 0)) {
1,794,477✔
538
        return true;
88✔
539
    }
88✔
540
    return false;
1,794,389✔
541
}
1,794,477✔
542

543
void DatasetReload(void)
544
{
35,437✔
545
    /* In order to reload the datasets, just mark the current sets as hidden
546
     * and clean them up later.
547
     * New datasets shall be created with the rule reload and do not require
548
     * any intervention.
549
     * */
550
    DatasetLock();
35,437✔
551
    Dataset *set = sets;
35,437✔
552
    while (set) {
1,829,914✔
553
        if (!DatasetIsStatic(set->save, set->load) || set->from_yaml) {
1,794,477✔
554
            SCLogDebug("Not a static set, skipping %s", set->name);
1,794,389✔
555
            set = set->next;
1,794,389✔
556
            continue;
1,794,389✔
557
        }
1,794,389✔
558
        set->hidden = true;
88✔
559
        if (dataset_max_total_hashsize > 0) {
88✔
560
            DEBUG_VALIDATE_BUG_ON(set->hash->config.hash_size > dataset_used_hashsize);
88✔
561
            dataset_used_hashsize -= set->hash->config.hash_size;
88✔
562
        }
88✔
563
        SCLogDebug("Set %s at %p hidden successfully", set->name, set);
88✔
564
        set = set->next;
88✔
565
    }
88✔
566
    DatasetUnlock();
35,437✔
567
}
35,437✔
568

569
void DatasetPostReloadCleanup(void)
570
{
35,437✔
571
    DatasetLock();
35,437✔
572
    SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
35,437✔
573
    Dataset *cur = sets;
35,437✔
574
    Dataset *prev = NULL;
35,437✔
575
    while (cur) {
1,830,070✔
576
        Dataset *next = cur->next;
1,794,633✔
577
        if (!cur->hidden) {
1,794,633✔
578
            prev = cur;
1,794,545✔
579
            cur = next;
1,794,545✔
580
            continue;
1,794,545✔
581
        }
1,794,545✔
582
        // Delete the set in case it was hidden
583
        if (prev != NULL) {
88✔
584
            prev->next = next;
×
585
        } else {
88✔
586
            sets = next;
88✔
587
        }
88✔
588
        THashShutdown(cur->hash);
88✔
589
        SCFree(cur);
88✔
590
        cur = next;
88✔
591
    }
88✔
592
    DatasetUnlock();
35,437✔
593
}
35,437✔
594

595
/* Value reflects THASH_DEFAULT_HASHSIZE which is what the default was earlier,
596
 * despite 2048 commented out in the default yaml. */
597
#define DATASETS_HASHSIZE_DEFAULT 4096
13,537✔
598

599
void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
600
{
13,537✔
601
    const char *str = NULL;
13,537✔
602
    if (SCConfGet("datasets.defaults.memcap", &str) == 1) {
13,537✔
603
        if (ParseSizeStringU64(str, memcap) < 0) {
×
604
            SCLogWarning("memcap value cannot be deduced: %s,"
×
605
                         " resetting to default",
×
606
                    str);
×
607
            *memcap = 0;
×
608
        }
×
609
    }
×
610

611
    *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
13,537✔
612
    if (SCConfGet("datasets.defaults.hashsize", &str) == 1) {
13,537✔
613
        if (ParseSizeStringU32(str, hashsize) < 0) {
×
614
            *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
×
615
            SCLogWarning("hashsize value cannot be deduced: %s,"
×
616
                         " resetting to default: %u",
×
617
                    str, *hashsize);
×
618
        }
×
619
    }
×
620
}
13,537✔
621

622
int DatasetsInit(void)
623
{
1✔
624
    SCLogDebug("datasets start");
1✔
625
    SCConfNode *datasets = SCConfGetNode("datasets");
1✔
626
    uint64_t default_memcap = 0;
1✔
627
    uint32_t default_hashsize = 0;
1✔
628
    DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
1✔
629
    if (datasets != NULL) {
1✔
630
        const char *str = NULL;
1✔
631
        if (SCConfGet("datasets.limits.total-hashsizes", &str) == 1) {
1✔
632
            if (ParseSizeStringU32(str, &dataset_max_total_hashsize) < 0) {
×
633
                FatalError("failed to parse datasets.limits.total-hashsizes value: %s", str);
×
634
            }
×
635
        }
×
636
        if (SCConfGet("datasets.limits.single-hashsize", &str) == 1) {
1✔
637
            if (ParseSizeStringU32(str, &dataset_max_one_hashsize) < 0) {
×
638
                FatalError("failed to parse datasets.limits.single-hashsize value: %s", str);
×
639
            }
×
640
        }
×
641
        if (dataset_max_total_hashsize > 0 &&
1✔
642
                dataset_max_total_hashsize < dataset_max_one_hashsize) {
1✔
643
            FatalError("total-hashsizes (%u) cannot be smaller than single-hashsize (%u)",
×
644
                    dataset_max_total_hashsize, dataset_max_one_hashsize);
×
645
        }
×
646
        if (dataset_max_total_hashsize > 0 && dataset_max_one_hashsize == 0) {
1✔
647
            // the total limit also applies for single limit
648
            dataset_max_one_hashsize = dataset_max_total_hashsize;
×
649
        }
×
650

651
        int list_pos = 0;
1✔
652
        SCConfNode *iter = NULL;
1✔
653
        TAILQ_FOREACH(iter, &datasets->head, next) {
1✔
654
            if (iter->name == NULL) {
1✔
655
                list_pos++;
×
656
                continue;
×
657
            }
×
658

659
            char save[PATH_MAX] = "";
1✔
660
            char load[PATH_MAX] = "";
1✔
661
            uint64_t memcap = 0;
1✔
662
            uint32_t hashsize = 0;
1✔
663

664
            const char *set_name = iter->name;
1✔
665
            if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
1✔
666
                FatalErrorOnInit(
×
667
                        "set name '%s' too long, max %d chars", set_name, DATASET_NAME_MAX_LEN);
×
668
                continue;
×
669
            }
×
670

671
            SCConfNode *set_type = SCConfNodeLookupChild(iter, "type");
1✔
672
            if (set_type == NULL) {
1✔
673
                list_pos++;
1✔
674
                continue;
1✔
675
            }
1✔
676

UNCOV
677
            SCConfNode *set_save = SCConfNodeLookupChild(iter, "state");
×
UNCOV
678
            if (set_save) {
×
679
                DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
×
680
                strlcpy(load, save, sizeof(load));
×
UNCOV
681
            } else {
×
UNCOV
682
                SCConfNode *set_load = SCConfNodeLookupChild(iter, "load");
×
UNCOV
683
                if (set_load) {
×
684
                    DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
×
685
                }
×
UNCOV
686
            }
×
687

UNCOV
688
            SCConfNode *set_memcap = SCConfNodeLookupChild(iter, "memcap");
×
UNCOV
689
            if (set_memcap) {
×
690
                if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
×
691
                    SCLogWarning("memcap value cannot be"
×
692
                                 " deduced: %s, resetting to default",
×
693
                            set_memcap->val);
×
694
                    memcap = 0;
×
695
                }
×
696
            }
×
UNCOV
697
            SCConfNode *set_hashsize = SCConfNodeLookupChild(iter, "hashsize");
×
UNCOV
698
            if (set_hashsize) {
×
699
                if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
×
700
                    SCLogWarning("hashsize value cannot be"
×
701
                                 " deduced: %s, resetting to default",
×
702
                            set_hashsize->val);
×
703
                    hashsize = 0;
×
704
                }
×
705
            }
×
UNCOV
706
            char conf_str[1024];
×
UNCOV
707
            snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
×
708

UNCOV
709
            SCLogDebug("set %s type %s. Conf %s", set_name, set_type->val, conf_str);
×
710

UNCOV
711
            if (strcmp(set_type->val, "md5") == 0) {
×
712
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
×
713
                        memcap > 0 ? memcap : default_memcap,
×
714
                        hashsize > 0 ? hashsize : default_hashsize);
×
715
                if (dset == NULL) {
×
716
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
717
                    continue;
×
718
                }
×
719
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
×
720
                dset->from_yaml = true;
×
721

UNCOV
722
            } else if (strcmp(set_type->val, "sha256") == 0) {
×
723
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
×
724
                        memcap > 0 ? memcap : default_memcap,
×
725
                        hashsize > 0 ? hashsize : default_hashsize);
×
726
                if (dset == NULL) {
×
727
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
728
                    continue;
×
729
                }
×
730
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
×
731
                dset->from_yaml = true;
×
732

UNCOV
733
            } else if (strcmp(set_type->val, "string") == 0) {
×
UNCOV
734
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
×
UNCOV
735
                        memcap > 0 ? memcap : default_memcap,
×
UNCOV
736
                        hashsize > 0 ? hashsize : default_hashsize);
×
UNCOV
737
                if (dset == NULL) {
×
738
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
739
                    continue;
×
740
                }
×
UNCOV
741
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
×
UNCOV
742
                dset->from_yaml = true;
×
743

UNCOV
744
            } else if (strcmp(set_type->val, "ipv4") == 0) {
×
745
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV4, save, load,
×
746
                        memcap > 0 ? memcap : default_memcap,
×
747
                        hashsize > 0 ? hashsize : default_hashsize);
×
748
                if (dset == NULL) {
×
749
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
750
                    continue;
×
751
                }
×
752
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
×
753
                dset->from_yaml = true;
×
754

755
            } else if (strcmp(set_type->val, "ip") == 0) {
×
756
                Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV6, save, load,
×
757
                        memcap > 0 ? memcap : default_memcap,
×
758
                        hashsize > 0 ? hashsize : default_hashsize);
×
759
                if (dset == NULL) {
×
760
                    FatalErrorOnInit("failed to setup dataset for %s", set_name);
×
761
                    continue;
×
762
                }
×
763
                SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
×
764
                dset->from_yaml = true;
×
765
            }
×
766

UNCOV
767
            list_pos++;
×
UNCOV
768
        }
×
769
    }
1✔
770
    SCLogDebug("datasets done: %p", datasets);
1✔
771
    return 0;
1✔
772
}
1✔
773

774
void DatasetsDestroy(void)
UNCOV
775
{
×
UNCOV
776
    DatasetLock();
×
UNCOV
777
    SCLogDebug("destroying datasets: %p", sets);
×
UNCOV
778
    Dataset *set = sets;
×
UNCOV
779
    while (set) {
×
UNCOV
780
        SCLogDebug("destroying set %s", set->name);
×
UNCOV
781
        Dataset *next = set->next;
×
UNCOV
782
        THashShutdown(set->hash);
×
UNCOV
783
        SCFree(set);
×
UNCOV
784
        set = next;
×
UNCOV
785
    }
×
UNCOV
786
    sets = NULL;
×
UNCOV
787
    DatasetUnlock();
×
UNCOV
788
    SCLogDebug("destroying datasets done: %p", sets);
×
UNCOV
789
}
×
790

791
static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
UNCOV
792
{
×
UNCOV
793
    FILE *fp = ctx;
×
794
    //PrintRawDataFp(fp, data, data_len);
UNCOV
795
    if (fp) {
×
UNCOV
796
        return (int)fwrite(data, data_len, 1, fp);
×
UNCOV
797
    }
×
798
    return 0;
×
UNCOV
799
}
×
800

801
static int Md5AsAscii(const void *s, char *out, size_t out_size)
UNCOV
802
{
×
UNCOV
803
    const Md5Type *md5 = s;
×
UNCOV
804
    char str[256];
×
UNCOV
805
    PrintHexString(str, sizeof(str), (uint8_t *)md5->md5, sizeof(md5->md5));
×
UNCOV
806
    strlcat(out, str, out_size);
×
UNCOV
807
    strlcat(out, "\n", out_size);
×
UNCOV
808
    return (int)strlen(out);
×
UNCOV
809
}
×
810

811
static int Sha256AsAscii(const void *s, char *out, size_t out_size)
812
{
×
813
    const Sha256Type *sha = s;
×
814
    char str[256];
×
815
    PrintHexString(str, sizeof(str), (uint8_t *)sha->sha256, sizeof(sha->sha256));
×
816
    strlcat(out, str, out_size);
×
817
    strlcat(out, "\n", out_size);
×
818
    return (int)strlen(out);
×
819
}
×
820

821
static int IPv4AsAscii(const void *s, char *out, size_t out_size)
UNCOV
822
{
×
UNCOV
823
    const IPv4Type *ip4 = s;
×
UNCOV
824
    char str[256];
×
UNCOV
825
    PrintInet(AF_INET, ip4->ipv4, str, sizeof(str));
×
UNCOV
826
    strlcat(out, str, out_size);
×
UNCOV
827
    strlcat(out, "\n", out_size);
×
UNCOV
828
    return (int)strlen(out);
×
UNCOV
829
}
×
830

831
static int IPv6AsAscii(const void *s, char *out, size_t out_size)
UNCOV
832
{
×
UNCOV
833
    const IPv6Type *ip6 = s;
×
UNCOV
834
    char str[256];
×
UNCOV
835
    bool is_ipv4 = true;
×
UNCOV
836
    for (int i = 4; i <= 15; i++) {
×
UNCOV
837
        if (ip6->ipv6[i] != 0) {
×
UNCOV
838
            is_ipv4 = false;
×
UNCOV
839
            break;
×
UNCOV
840
        }
×
UNCOV
841
    }
×
UNCOV
842
    if (is_ipv4) {
×
UNCOV
843
        PrintInet(AF_INET, ip6->ipv6, str, sizeof(str));
×
UNCOV
844
    } else {
×
UNCOV
845
        PrintInet(AF_INET6, ip6->ipv6, str, sizeof(str));
×
UNCOV
846
    }
×
UNCOV
847
    strlcat(out, str, out_size);
×
UNCOV
848
    strlcat(out, "\n", out_size);
×
UNCOV
849
    return (int)strlen(out);
×
UNCOV
850
}
×
851

852
void DatasetsSave(void)
UNCOV
853
{
×
UNCOV
854
    DatasetLock();
×
UNCOV
855
    SCLogDebug("saving datasets: %p", sets);
×
UNCOV
856
    Dataset *set = sets;
×
UNCOV
857
    while (set) {
×
UNCOV
858
        if (strlen(set->save) == 0)
×
UNCOV
859
            goto next;
×
860

UNCOV
861
        FILE *fp = fopen(set->save, "w");
×
UNCOV
862
        if (fp == NULL)
×
863
            goto next;
×
864

UNCOV
865
        SCLogDebug("dumping %s to %s", set->name, set->save);
×
866

UNCOV
867
        switch (set->type) {
×
UNCOV
868
            case DATASET_TYPE_STRING:
×
UNCOV
869
                THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
×
UNCOV
870
                break;
×
UNCOV
871
            case DATASET_TYPE_MD5:
×
UNCOV
872
                THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
×
UNCOV
873
                break;
×
874
            case DATASET_TYPE_SHA256:
×
875
                THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
×
876
                break;
×
UNCOV
877
            case DATASET_TYPE_IPV4:
×
UNCOV
878
                THashWalk(set->hash, IPv4AsAscii, SaveCallback, fp);
×
UNCOV
879
                break;
×
UNCOV
880
            case DATASET_TYPE_IPV6:
×
UNCOV
881
                THashWalk(set->hash, IPv6AsAscii, SaveCallback, fp);
×
UNCOV
882
                break;
×
UNCOV
883
        }
×
884

UNCOV
885
        fclose(fp);
×
886

UNCOV
887
    next:
×
UNCOV
888
        set = set->next;
×
UNCOV
889
    }
×
UNCOV
890
    DatasetUnlock();
×
UNCOV
891
}
×
892

893
static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
894
{
9✔
895
    if (set == NULL)
9✔
896
        return -1;
×
897

898
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = 0 };
9✔
899
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
9✔
900
    if (rdata) {
9✔
UNCOV
901
        DatasetUnlockData(rdata);
×
UNCOV
902
        return 1;
×
UNCOV
903
    }
×
904
    return 0;
9✔
905
}
9✔
906

907
static DataRepResultType DatasetLookupStringwRep(Dataset *set,
908
        const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
909
{
148✔
910
    DataRepResultType rrep = { .found = false, .rep = 0 };
148✔
911

912
    if (set == NULL)
148✔
913
        return rrep;
×
914

915
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
148✔
916
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
148✔
917
    if (rdata) {
148✔
UNCOV
918
        StringType *found = rdata->data;
×
UNCOV
919
        rrep.found = true;
×
UNCOV
920
        rrep.rep = found->rep;
×
UNCOV
921
        DatasetUnlockData(rdata);
×
UNCOV
922
        return rrep;
×
UNCOV
923
    }
×
924
    return rrep;
148✔
925
}
148✔
926

927
static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
928
{
209✔
929
    if (set == NULL)
209✔
930
        return -1;
×
931

932
    if (data_len != 4)
209✔
933
        return -1;
12✔
934

935
    IPv4Type lookup = { .rep = 0 };
197✔
936
    memcpy(lookup.ipv4, data, 4);
197✔
937
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
197✔
938
    if (rdata) {
197✔
UNCOV
939
        DatasetUnlockData(rdata);
×
UNCOV
940
        return 1;
×
UNCOV
941
    }
×
942
    return 0;
197✔
943
}
197✔
944

945
static DataRepResultType DatasetLookupIPv4wRep(
946
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
947
{
×
948
    DataRepResultType rrep = { .found = false, .rep = 0 };
×
949

950
    if (set == NULL)
×
951
        return rrep;
×
952

953
    if (data_len != 4)
×
954
        return rrep;
×
955

956
    IPv4Type lookup = { .rep = 0 };
×
957
    memcpy(lookup.ipv4, data, data_len);
×
958
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
959
    if (rdata) {
×
960
        IPv4Type *found = rdata->data;
×
961
        rrep.found = true;
×
962
        rrep.rep = found->rep;
×
963
        DatasetUnlockData(rdata);
×
964
        return rrep;
×
965
    }
×
966
    return rrep;
×
967
}
×
968

969
static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
970
{
20✔
971
    if (set == NULL)
20✔
972
        return -1;
×
973

974
    if (data_len != 16 && data_len != 4)
20✔
975
        return -1;
×
976

977
    IPv6Type lookup = { .rep = 0 };
20✔
978
    memcpy(lookup.ipv6, data, data_len);
20✔
979
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
20✔
980
    if (rdata) {
20✔
UNCOV
981
        DatasetUnlockData(rdata);
×
UNCOV
982
        return 1;
×
UNCOV
983
    }
×
984
    return 0;
20✔
985
}
20✔
986

987
static DataRepResultType DatasetLookupIPv6wRep(
988
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
989
{
×
990
    DataRepResultType rrep = { .found = false, .rep = 0 };
×
991

992
    if (set == NULL)
×
993
        return rrep;
×
994

995
    if (data_len != 16 && data_len != 4)
×
996
        return rrep;
×
997

998
    IPv6Type lookup = { .rep = 0 };
×
999
    memcpy(lookup.ipv6, data, data_len);
×
1000
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
1001
    if (rdata) {
×
1002
        IPv6Type *found = rdata->data;
×
1003
        rrep.found = true;
×
1004
        rrep.rep = found->rep;
×
1005
        DatasetUnlockData(rdata);
×
1006
        return rrep;
×
1007
    }
×
1008
    return rrep;
×
1009
}
×
1010

1011
static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1012
{
4✔
1013
    if (set == NULL)
4✔
1014
        return -1;
×
1015

1016
    if (data_len != 16)
4✔
1017
        return -1;
4✔
1018

UNCOV
1019
    Md5Type lookup = { .rep = 0 };
×
UNCOV
1020
    memcpy(lookup.md5, data, data_len);
×
UNCOV
1021
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
UNCOV
1022
    if (rdata) {
×
UNCOV
1023
        DatasetUnlockData(rdata);
×
UNCOV
1024
        return 1;
×
UNCOV
1025
    }
×
UNCOV
1026
    return 0;
×
UNCOV
1027
}
×
1028

1029
static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
1030
        const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1031
{
29✔
1032
    DataRepResultType rrep = { .found = false, .rep = 0 };
29✔
1033

1034
    if (set == NULL)
29✔
1035
        return rrep;
×
1036

1037
    if (data_len != 16)
29✔
1038
        return rrep;
24✔
1039

1040
    Md5Type lookup = { .rep = 0 };
5✔
1041
    memcpy(lookup.md5, data, data_len);
5✔
1042
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
5✔
1043
    if (rdata) {
5✔
UNCOV
1044
        Md5Type *found = rdata->data;
×
UNCOV
1045
        rrep.found = true;
×
UNCOV
1046
        rrep.rep = found->rep;
×
UNCOV
1047
        DatasetUnlockData(rdata);
×
UNCOV
1048
        return rrep;
×
UNCOV
1049
    }
×
1050
    return rrep;
5✔
1051
}
5✔
1052

1053
static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1054
{
×
1055
    if (set == NULL)
×
1056
        return -1;
×
1057

1058
    if (data_len != 32)
×
1059
        return -1;
×
1060

1061
    Sha256Type lookup = { .rep = 0 };
×
1062
    memcpy(lookup.sha256, data, data_len);
×
1063
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
1064
    if (rdata) {
×
1065
        DatasetUnlockData(rdata);
×
1066
        return 1;
×
1067
    }
×
1068
    return 0;
×
1069
}
×
1070

1071
static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
1072
        const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1073
{
11✔
1074
    DataRepResultType rrep = { .found = false, .rep = 0 };
11✔
1075

1076
    if (set == NULL)
11✔
1077
        return rrep;
×
1078

1079
    if (data_len != 32)
11✔
1080
        return rrep;
11✔
1081

UNCOV
1082
    Sha256Type lookup = { .rep = 0 };
×
UNCOV
1083
    memcpy(lookup.sha256, data, data_len);
×
UNCOV
1084
    THashData *rdata = THashLookupFromHash(set->hash, &lookup);
×
UNCOV
1085
    if (rdata) {
×
UNCOV
1086
        Sha256Type *found = rdata->data;
×
UNCOV
1087
        rrep.found = true;
×
UNCOV
1088
        rrep.rep = found->rep;
×
UNCOV
1089
        DatasetUnlockData(rdata);
×
UNCOV
1090
        return rrep;
×
UNCOV
1091
    }
×
1092
    return rrep;
×
UNCOV
1093
}
×
1094

1095
/**
1096
 *  \brief see if \a data is part of the set
1097
 *  \param set dataset
1098
 *  \param data data to look up
1099
 *  \param data_len length in bytes of \a data
1100
 *  \retval -1 error
1101
 *  \retval 0 not found
1102
 *  \retval 1 found
1103
 */
1104
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
1105
{
242✔
1106
    if (set == NULL)
242✔
1107
        return -1;
×
1108

1109
    switch (set->type) {
242✔
1110
        case DATASET_TYPE_STRING:
9✔
1111
            return DatasetLookupString(set, data, data_len);
9✔
1112
        case DATASET_TYPE_MD5:
4✔
1113
            return DatasetLookupMd5(set, data, data_len);
4✔
1114
        case DATASET_TYPE_SHA256:
×
1115
            return DatasetLookupSha256(set, data, data_len);
×
1116
        case DATASET_TYPE_IPV4:
209✔
1117
            return DatasetLookupIPv4(set, data, data_len);
209✔
1118
        case DATASET_TYPE_IPV6:
20✔
1119
            return DatasetLookupIPv6(set, data, data_len);
20✔
1120
    }
242✔
1121
    return -1;
×
1122
}
242✔
1123

1124
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1125
        const DataRepType *rep)
1126
{
188✔
1127
    DataRepResultType rrep = { .found = false, .rep = 0 };
188✔
1128
    if (set == NULL)
188✔
1129
        return rrep;
×
1130

1131
    switch (set->type) {
188✔
1132
        case DATASET_TYPE_STRING:
148✔
1133
            return DatasetLookupStringwRep(set, data, data_len, rep);
148✔
1134
        case DATASET_TYPE_MD5:
29✔
1135
            return DatasetLookupMd5wRep(set, data, data_len, rep);
29✔
1136
        case DATASET_TYPE_SHA256:
11✔
1137
            return DatasetLookupSha256wRep(set, data, data_len, rep);
11✔
1138
        case DATASET_TYPE_IPV4:
×
1139
            return DatasetLookupIPv4wRep(set, data, data_len, rep);
×
1140
        case DATASET_TYPE_IPV6:
×
1141
            return DatasetLookupIPv6wRep(set, data, data_len, rep);
×
1142
    }
188✔
1143
    return rrep;
×
1144
}
188✔
1145

1146
/**
1147
 *  \retval 1 data was added to the hash
1148
 *  \retval 0 data was not added to the hash as it is already there
1149
 *  \retval -1 failed to add data to the hash
1150
 */
1151
static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1152
{
206✔
1153
    if (set == NULL)
206✔
1154
        return -1;
×
1155

1156
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = 0 };
206✔
1157
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
206✔
1158
    if (res.data) {
206✔
1159
        DatasetUnlockData(res.data);
206✔
1160
        return res.is_new ? 1 : 0;
206✔
1161
    }
206✔
UNCOV
1162
    return -1;
×
1163
}
206✔
1164

1165
/**
1166
 *  \retval 1 data was added to the hash
1167
 *  \retval 0 data was not added to the hash as it is already there
1168
 *  \retval -1 failed to add data to the hash
1169
 */
1170
static int DatasetAddStringwRep(
1171
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
UNCOV
1172
{
×
UNCOV
1173
    if (set == NULL)
×
1174
        return -1;
×
1175

UNCOV
1176
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
×
UNCOV
1177
        .rep = *rep };
×
UNCOV
1178
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
1179
    if (res.data) {
×
UNCOV
1180
        DatasetUnlockData(res.data);
×
UNCOV
1181
        return res.is_new ? 1 : 0;
×
UNCOV
1182
    }
×
1183
    return -1;
×
UNCOV
1184
}
×
1185

1186
static int DatasetAddIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
UNCOV
1187
{
×
UNCOV
1188
    if (set == NULL) {
×
1189
        return -1;
×
1190
    }
×
1191

UNCOV
1192
    if (data_len < 4) {
×
1193
        return -2;
×
1194
    }
×
1195

UNCOV
1196
    IPv4Type lookup = { .rep = 0 };
×
UNCOV
1197
    memcpy(lookup.ipv4, data, 4);
×
UNCOV
1198
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
1199
    if (res.data) {
×
UNCOV
1200
        DatasetUnlockData(res.data);
×
UNCOV
1201
        return res.is_new ? 1 : 0;
×
UNCOV
1202
    }
×
1203
    return -1;
×
UNCOV
1204
}
×
1205

1206
static int DatasetAddIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1207
{
8✔
1208
    if (set == NULL) {
8✔
1209
        return -1;
×
1210
    }
×
1211

1212
    if (data_len != 16 && data_len != 4) {
8✔
1213
        return -2;
×
1214
    }
×
1215

1216
    IPv6Type lookup = { .rep = 0 };
8✔
1217
    memcpy(lookup.ipv6, data, data_len);
8✔
1218
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
8✔
1219
    if (res.data) {
8✔
1220
        DatasetUnlockData(res.data);
8✔
1221
        return res.is_new ? 1 : 0;
8✔
1222
    }
8✔
1223
    return -1;
×
1224
}
8✔
1225

1226
static int DatasetAddIPv4wRep(
1227
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1228
{
×
1229
    if (set == NULL)
×
1230
        return -1;
×
1231

1232
    if (data_len < 4)
×
1233
        return -2;
×
1234

1235
    IPv4Type lookup = { .rep = *rep };
×
1236
    memcpy(lookup.ipv4, data, 4);
×
1237
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
1238
    if (res.data) {
×
1239
        DatasetUnlockData(res.data);
×
1240
        return res.is_new ? 1 : 0;
×
1241
    }
×
1242
    return -1;
×
1243
}
×
1244

1245
static int DatasetAddIPv6wRep(
1246
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1247
{
×
1248
    if (set == NULL)
×
1249
        return -1;
×
1250

1251
    if (data_len != 16)
×
1252
        return -2;
×
1253

1254
    IPv6Type lookup = { .rep = *rep };
×
1255
    memcpy(lookup.ipv6, data, 16);
×
1256
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
1257
    if (res.data) {
×
1258
        DatasetUnlockData(res.data);
×
1259
        return res.is_new ? 1 : 0;
×
1260
    }
×
1261
    return -1;
×
1262
}
×
1263

1264
static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1265
{
20✔
1266
    if (set == NULL)
20✔
1267
        return -1;
×
1268

1269
    if (data_len != 16)
20✔
1270
        return -2;
20✔
1271

UNCOV
1272
    Md5Type lookup = { .rep = 0 };
×
UNCOV
1273
    memcpy(lookup.md5, data, 16);
×
UNCOV
1274
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
1275
    if (res.data) {
×
UNCOV
1276
        DatasetUnlockData(res.data);
×
UNCOV
1277
        return res.is_new ? 1 : 0;
×
UNCOV
1278
    }
×
1279
    return -1;
×
UNCOV
1280
}
×
1281

1282
static int DatasetAddMd5wRep(
1283
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
UNCOV
1284
{
×
UNCOV
1285
    if (set == NULL)
×
1286
        return -1;
×
1287

UNCOV
1288
    if (data_len != 16)
×
1289
        return -2;
×
1290

UNCOV
1291
    Md5Type lookup = { .rep = *rep };
×
UNCOV
1292
    memcpy(lookup.md5, data, 16);
×
UNCOV
1293
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
1294
    if (res.data) {
×
UNCOV
1295
        DatasetUnlockData(res.data);
×
UNCOV
1296
        return res.is_new ? 1 : 0;
×
UNCOV
1297
    }
×
1298
    return -1;
×
UNCOV
1299
}
×
1300

1301
static int DatasetAddSha256wRep(
1302
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
UNCOV
1303
{
×
UNCOV
1304
    if (set == NULL)
×
1305
        return -1;
×
1306

UNCOV
1307
    if (data_len != 32)
×
1308
        return -2;
×
1309

UNCOV
1310
    Sha256Type lookup = { .rep = *rep };
×
UNCOV
1311
    memcpy(lookup.sha256, data, 32);
×
UNCOV
1312
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
UNCOV
1313
    if (res.data) {
×
UNCOV
1314
        DatasetUnlockData(res.data);
×
UNCOV
1315
        return res.is_new ? 1 : 0;
×
UNCOV
1316
    }
×
1317
    return -1;
×
UNCOV
1318
}
×
1319

1320
static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1321
{
61✔
1322
    if (set == NULL)
61✔
1323
        return -1;
×
1324

1325
    if (data_len != 32)
61✔
1326
        return -2;
61✔
1327

1328
    Sha256Type lookup = { .rep = 0 };
×
1329
    memcpy(lookup.sha256, data, 32);
×
1330
    struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
×
1331
    if (res.data) {
×
1332
        DatasetUnlockData(res.data);
×
1333
        return res.is_new ? 1 : 0;
×
1334
    }
×
1335
    return -1;
×
1336
}
×
1337

1338
int SCDatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1339
{
295✔
1340
    if (set == NULL)
295✔
1341
        return -1;
×
1342

1343
    switch (set->type) {
295✔
1344
        case DATASET_TYPE_STRING:
206✔
1345
            return DatasetAddString(set, data, data_len);
206✔
1346
        case DATASET_TYPE_MD5:
20✔
1347
            return DatasetAddMd5(set, data, data_len);
20✔
1348
        case DATASET_TYPE_SHA256:
61✔
1349
            return DatasetAddSha256(set, data, data_len);
61✔
UNCOV
1350
        case DATASET_TYPE_IPV4:
×
UNCOV
1351
            return DatasetAddIPv4(set, data, data_len);
×
1352
        case DATASET_TYPE_IPV6:
8✔
1353
            return DatasetAddIPv6(set, data, data_len);
8✔
1354
    }
295✔
1355
    return -1;
×
1356
}
295✔
1357

1358
int SCDatasetAddwRep(
1359
        Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
UNCOV
1360
{
×
UNCOV
1361
    if (set == NULL)
×
1362
        return -1;
×
1363

UNCOV
1364
    switch (set->type) {
×
UNCOV
1365
        case DATASET_TYPE_STRING:
×
UNCOV
1366
            return DatasetAddStringwRep(set, data, data_len, rep);
×
UNCOV
1367
        case DATASET_TYPE_MD5:
×
UNCOV
1368
            return DatasetAddMd5wRep(set, data, data_len, rep);
×
UNCOV
1369
        case DATASET_TYPE_SHA256:
×
UNCOV
1370
            return DatasetAddSha256wRep(set, data, data_len, rep);
×
1371
        case DATASET_TYPE_IPV4:
×
1372
            return DatasetAddIPv4wRep(set, data, data_len, rep);
×
1373
        case DATASET_TYPE_IPV6:
×
1374
            return DatasetAddIPv6wRep(set, data, data_len, rep);
×
UNCOV
1375
    }
×
1376
    return -1;
×
UNCOV
1377
}
×
1378

1379
typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len);
1380

1381
static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString,
1382
        DatasetOpFunc DatasetOpMd5, DatasetOpFunc DatasetOpSha256, DatasetOpFunc DatasetOpIPv4,
1383
        DatasetOpFunc DatasetOpIPv6)
UNCOV
1384
{
×
UNCOV
1385
    if (set == NULL)
×
1386
        return -1;
×
UNCOV
1387
    if (strlen(string) == 0)
×
1388
        return -1;
×
1389

UNCOV
1390
    switch (set->type) {
×
1391
        case DATASET_TYPE_STRING: {
×
1392
            if (strlen(string) > UINT16_MAX) {
×
1393
                // size check before cast and stack allocation
1394
                return -1;
×
1395
            }
×
1396
            uint32_t decoded_size = SCBase64DecodeBufferSize((uint32_t)strlen(string));
×
1397
            uint8_t decoded[decoded_size];
×
1398
            uint32_t num_decoded = SCBase64Decode(
×
1399
                    (const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded);
×
1400
            if (num_decoded == 0) {
×
1401
                return -2;
×
1402
            }
×
1403

1404
            return DatasetOpString(set, decoded, num_decoded);
×
1405
        }
×
1406
        case DATASET_TYPE_MD5: {
×
1407
            if (strlen(string) != 32)
×
1408
                return -2;
×
1409
            uint8_t hash[16];
×
1410
            if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
×
1411
                return -2;
×
1412
            return DatasetOpMd5(set, hash, 16);
×
1413
        }
×
1414
        case DATASET_TYPE_SHA256: {
×
1415
            if (strlen(string) != 64)
×
1416
                return -2;
×
1417
            uint8_t hash[32];
×
1418
            if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
×
1419
                return -2;
×
1420
            return DatasetOpSha256(set, hash, 32);
×
1421
        }
×
1422
        case DATASET_TYPE_IPV4: {
×
1423
            struct in_addr in;
×
1424
            if (inet_pton(AF_INET, string, &in) != 1)
×
1425
                return -2;
×
1426
            return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
×
1427
        }
×
UNCOV
1428
        case DATASET_TYPE_IPV6: {
×
UNCOV
1429
            struct in6_addr in6;
×
UNCOV
1430
            if (DatasetParseIpv6String(set, string, &in6) != 0) {
×
UNCOV
1431
                SCLogError("Dataset failed to import %s as IPv6", string);
×
UNCOV
1432
                return -2;
×
UNCOV
1433
            }
×
UNCOV
1434
            return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
×
UNCOV
1435
        }
×
UNCOV
1436
    }
×
1437
    return -1;
×
UNCOV
1438
}
×
1439

1440
/** \brief add serialized data to set
1441
 *  \retval int 1 added
1442
 *  \retval int 0 already in hash
1443
 *  \retval int -1 API error (not added)
1444
 *  \retval int -2 DATA error
1445
 */
1446
int DatasetAddSerialized(Dataset *set, const char *string)
UNCOV
1447
{
×
UNCOV
1448
    return DatasetOpSerialized(set, string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
×
UNCOV
1449
            DatasetAddIPv4, DatasetAddIPv6);
×
UNCOV
1450
}
×
1451

1452
/** \brief add serialized data to set
1453
 *  \retval int 1 added
1454
 *  \retval int 0 already in hash
1455
 *  \retval int -1 API error (not added)
1456
 *  \retval int -2 DATA error
1457
 */
1458
int DatasetLookupSerialized(Dataset *set, const char *string)
UNCOV
1459
{
×
UNCOV
1460
    return DatasetOpSerialized(set, string, DatasetLookupString, DatasetLookupMd5,
×
UNCOV
1461
            DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
×
UNCOV
1462
}
×
1463

1464
/**
1465
 *  \retval 1 data was removed from the hash
1466
 *  \retval 0 data not removed (busy)
1467
 *  \retval -1 data not found
1468
 */
1469
static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1470
{
1✔
1471
    if (set == NULL)
1✔
1472
        return -1;
×
1473

1474
    StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = 0 };
1✔
1475
    return THashRemoveFromHash(set->hash, &lookup);
1✔
1476
}
1✔
1477

1478
static int DatasetRemoveIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1479
{
×
1480
    if (set == NULL)
×
1481
        return -1;
×
1482

1483
    if (data_len != 4)
×
1484
        return -2;
×
1485

1486
    IPv4Type lookup = { .rep = 0 };
×
1487
    memcpy(lookup.ipv4, data, 4);
×
1488
    return THashRemoveFromHash(set->hash, &lookup);
×
1489
}
×
1490

1491
static int DatasetRemoveIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1492
{
×
1493
    if (set == NULL)
×
1494
        return -1;
×
1495

1496
    if (data_len != 16)
×
1497
        return -2;
×
1498

1499
    IPv6Type lookup = { .rep = 0 };
×
1500
    memcpy(lookup.ipv6, data, 16);
×
1501
    return THashRemoveFromHash(set->hash, &lookup);
×
1502
}
×
1503

1504
static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1505
{
×
1506
    if (set == NULL)
×
1507
        return -1;
×
1508

1509
    if (data_len != 16)
×
1510
        return -2;
×
1511

1512
    Md5Type lookup = { .rep = 0 };
×
1513
    memcpy(lookup.md5, data, 16);
×
1514
    return THashRemoveFromHash(set->hash, &lookup);
×
1515
}
×
1516

1517
static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1518
{
×
1519
    if (set == NULL)
×
1520
        return -1;
×
1521

1522
    if (data_len != 32)
×
1523
        return -2;
×
1524

1525
    Sha256Type lookup = { .rep = 0 };
×
1526
    memcpy(lookup.sha256, data, 32);
×
1527
    return THashRemoveFromHash(set->hash, &lookup);
×
1528
}
×
1529

1530
/** \brief remove serialized data from set
1531
 *  \retval int 1 removed
1532
 *  \retval int 0 found but busy (not removed)
1533
 *  \retval int -1 API error (not removed)
1534
 *  \retval int -2 DATA error */
1535
int DatasetRemoveSerialized(Dataset *set, const char *string)
1536
{
×
1537
    return DatasetOpSerialized(set, string, DatasetRemoveString, DatasetRemoveMd5,
×
1538
            DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
×
1539
}
×
1540

1541
int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
1542
{
1✔
1543
    if (set == NULL)
1✔
1544
        return -1;
×
1545

1546
    switch (set->type) {
1✔
1547
        case DATASET_TYPE_STRING:
1✔
1548
            return DatasetRemoveString(set, data, data_len);
1✔
1549
        case DATASET_TYPE_MD5:
×
1550
            return DatasetRemoveMd5(set, data, data_len);
×
1551
        case DATASET_TYPE_SHA256:
×
1552
            return DatasetRemoveSha256(set, data, data_len);
×
1553
        case DATASET_TYPE_IPV4:
×
1554
            return DatasetRemoveIPv4(set, data, data_len);
×
1555
        case DATASET_TYPE_IPV6:
×
1556
            return DatasetRemoveIPv6(set, data, data_len);
×
1557
    }
1✔
1558
    return -1;
×
1559
}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc