• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MtFmT-Lib / mtfmt / 5777411721

pending completion
5777411721

push

github

web-flow
Merge pull request #92 from MtFmT-Lib/str_replace

ADD: 字符串的retain

150 of 150 new or added lines in 4 files covered. (100.0%)

3261 of 3533 relevant lines covered (92.3%)

42.76 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.9
/src/mm_pattern.c
1
// SPDX-License-Identifier: LGPL-3.0
2
/**
3
 * @file    mm_pattern.c
4
 * @author  向阳 (hinata.hoshino@foxmail.com)
5
 * @brief   模式串匹配和替换
6
 * @version 1.0
7
 * @date    2023-07-23
8
 *
9
 * @copyright Copyright (c) 向阳, all rights reserved.
10
 *
11
 */
12

13
#define MSTR_IMP_SOURCES 1
14

15
#include "mm_heap.h"
16
#include "mm_string.h"
17
#include <stddef.h>
18
#include <string.h>
19

20
/**
21
 * @brief 指定使用BM算法的阈值
22
 *
23
 */
24
#define BM_THRESHOLD_CNT  6
25

26
/**
27
 * @brief 单个char的最大值(unsigned)
28
 *
29
 */
30
#define BM_CHAR_INDEX_MAX 256
31

32
//
33
// private:
34
//
35

36
static mstr_result_t mstr_find_simple(
37
    isize_t*, const char*, usize_t, usize_t, const char*, usize_t
38
);
39
static mstr_result_t patt_match_impl(
40
    isize_t*, const mstr_char_t*, usize_t, const mstr_char_t*, usize_t
41
);
42
static mstr_result_t mstr_retain_start_with_impl(
43
    MString*, const char*, usize_t
44
);
45
static mstr_result_t mstr_retain_end_with_impl(
46
    MString*, const char*, usize_t
47
);
48
static mstr_result_t mstr_retain_all_impl(
49
    MString*, const char*, usize_t
50
);
51
static mstr_result_t mstr_retain_all_impl_helper(
52
    MString*, const MString*, const char*, usize_t, usize_t
53
);
54
static void make_jump_table(uint8_t*, const mstr_char_t*, usize_t);
55
static mstr_result_t unicode_length_of(usize_t*, const char*, usize_t);
56
//
57
// public:
58
//
59
MSTR_EXPORT_API(mstr_result_t)
60
mstr_find(
52✔
61
    const MString* str,
62
    MStringMatchResult* f_res,
63
    usize_t begin_pos,
64
    const char* pattern,
65
    usize_t pattern_cnt
66
)
67
{
68
    usize_t offset;
69
    isize_t find_res;
70
    mstr_result_t ret_code;
71
    offset = mstr_char_offset_at(str, begin_pos);
52✔
72
    ret_code = mstr_find_simple(
52✔
73
        &find_res, str->buff, str->count, offset, pattern, pattern_cnt
52✔
74
    );
75
    if (MSTR_SUCC(ret_code)) {
52✔
76
        if (find_res == -1) {
52✔
77
            f_res->is_matched = False;
13✔
78
            f_res->begin_pos = 0;
13✔
79
            f_res->begin_offset = 0;
13✔
80
        }
81
        else {
82
            usize_t cur_cnt = 0;
39✔
83
            usize_t cur_len = 0;
39✔
84
            const char* it = str->buff + offset;
39✔
85
            while (cur_cnt != (usize_t)find_res) {
162✔
86
                usize_t cnt = mstr_char_length(*it);
123✔
87
                it += cnt;
123✔
88
                cur_len += 1;
123✔
89
                cur_cnt += cnt;
123✔
90
            }
91
            f_res->is_matched = True;
39✔
92
            f_res->begin_pos = cur_len;
39✔
93
            f_res->begin_offset = (usize_t)find_res;
39✔
94
        }
95
    }
96
    return ret_code;
52✔
97
}
98

99
MSTR_EXPORT_API(mstr_result_t)
100
mstr_replace(
×
101
    MString* str,
102
    MStringReplaceOption opt,
103
    const char* pattern,
104
    usize_t pattern_cnt,
105
    const char* replace_to,
106
    usize_t replace_to_cnt
107
)
108
{
109

110
    return MStr_Err_NoImplemention;
×
111
}
112

113
MSTR_EXPORT_API(mstr_result_t)
114
mstr_retain(
6✔
115
    MString* str,
116
    MStringReplaceOption opt,
117
    const char* patt,
118
    usize_t patt_cnt
119
)
120
{
121
    if (patt_cnt == 0) {
6✔
122
        return MStr_Ok;
×
123
    }
124
    else if (opt == MStringReplaceOption_StartWith) {
6✔
125
        return mstr_retain_start_with_impl(str, patt, patt_cnt);
2✔
126
    }
127
    else if (opt == MStringReplaceOption_EndWith) {
4✔
128
        return mstr_retain_end_with_impl(str, patt, patt_cnt);
2✔
129
    }
130
    else if (opt == MStringReplaceOption_All) {
2✔
131
        return mstr_retain_all_impl(str, patt, patt_cnt);
2✔
132
    }
133
    else {
134
        mstr_unreachable();
135
        return MStr_Err_NoImplemention;
×
136
    }
137
}
138

139
/**
140
 * @brief 字符串查找(仅返回字符的count)
141
 *
142
 * @param[out] result: 结果, -1表示没有找到
143
 * @param[in] main_str: 主串
144
 * @param[in] main_str_cnt: 主串的字符计数
145
 * @param[in] begin_pos: 开始查找的位置的字符计数偏移量
146
 * @param[in] pattern: 模式串
147
 * @param[in] pattern_cnt: 模式串的长度计数
148
 *
149
 */
150
static mstr_result_t mstr_find_simple(
52✔
151
    isize_t* result,
152
    const char* main_str,
153
    usize_t main_str_cnt,
154
    usize_t begin_pos,
155
    const char* pattern,
156
    usize_t pattern_cnt
157
)
158
{
159
    isize_t find_res = -1;
52✔
160
    mstr_result_t ret_code;
161
    if (begin_pos > main_str_cnt || pattern_cnt > main_str_cnt) {
52✔
162
        // 超过主串的长度了, 肯定是找不到的
163
        // 子串过长, 肯定是找不到的
164
        *result = -1;
3✔
165
        return MStr_Ok;
3✔
166
    }
167
    if (pattern_cnt == 0) {
49✔
168
        // 空的, 永远返回true
169
        *result = 0;
3✔
170
        return MStr_Ok;
3✔
171
    }
172
    ret_code = patt_match_impl(
46✔
173
        &find_res,
174
        main_str + begin_pos,
46✔
175
        main_str_cnt - begin_pos,
176
        pattern,
177
        pattern_cnt
178
    );
179
    if (MSTR_SUCC(ret_code)) {
46✔
180
        *result = find_res;
46✔
181
    }
182
    return ret_code;
46✔
183
}
184

185
/**
186
 * @brief 子串匹配
187
 *
188
 * @param[out] res: 匹配结果
189
 * @param[in] mstr: 主串
190
 * @param[in] mstr_cnt: 主串长度
191
 * @param[in] patt: 模式串
192
 * @param[in] patt_len: 模式串长度
193
 */
194
static mstr_result_t patt_match_impl(
46✔
195
    isize_t* res,
196
    const mstr_char_t* mstr,
197
    usize_t mstr_cnt,
198
    const mstr_char_t* patt,
199
    usize_t patt_cnt
200
)
201
{
202
    uint8_t* bad_char_arr = NULL;
46✔
203
    usize_t match_cnt = patt_cnt - 1;
46✔
204
    const mstr_char_t* m_b = mstr;
46✔
205
    const mstr_char_t* m_p = mstr;
46✔
206
    const mstr_char_t* m_end = mstr + mstr_cnt;
46✔
207
    mstr_assert(patt_cnt > 0);
208
    // 默认为没有找到
209
    *res = -1;
46✔
210
    // 分配bad char数组, 并构造跳转表
211
    if (patt_cnt >= BM_THRESHOLD_CNT) {
46✔
212
        bad_char_arr = (uint8_t*)mstr_heap_alloc(
24✔
213
            sizeof(*bad_char_arr) * BM_CHAR_INDEX_MAX
214
        );
215
        if (bad_char_arr == NULL) {
24✔
216
            return MStr_Err_HeapTooSmall;
×
217
        }
218
        make_jump_table(bad_char_arr, patt, patt_cnt);
24✔
219
    }
220
    // 匹配子串
221
    while (m_p < m_end - match_cnt) {
167✔
222
        isize_t i = (isize_t)match_cnt;
157✔
223
        // 倒着来匹配
224
        while (i >= 0 && patt[i] == m_p[i]) {
470✔
225
            i -= 1;
313✔
226
        }
227
        if (i < 0) {
157✔
228
            // 匹配成功
229
            *res = (isize_t)(m_p - m_b);
36✔
230
            break;
36✔
231
        }
232
        else {
233
            // substring的第i个字符匹配失败, m_p + i处失败
234
            // 移动next_pos到匹配失败的字符最后一次在sub
235
            // string中出现的位置处
236
            usize_t arr_idx = (usize_t)(uint8_t)m_p[i];
121✔
237
            mstr_assert(arr_idx >= 0 && arr_idx <= BM_CHAR_INDEX_MAX);
238
            m_p += bad_char_arr ? (usize_t)bad_char_arr[arr_idx] : 1;
121✔
239
        }
240
    }
241
    if (bad_char_arr != NULL) {
46✔
242
        mstr_heap_free(bad_char_arr);
24✔
243
    }
244
    return MStr_Ok;
46✔
245
}
246

247
/**
248
 * @brief mstr_retain (start_with) 的实现
249
 *
250
 * @param[inout] str: 需要进行剔除的字符串
251
 * @param[in] patt: 模式串
252
 * @param[in] patt_cnt: 模式串的字符计数
253
 */
254
static mstr_result_t mstr_retain_start_with_impl(
2✔
255
    MString* str, const char* patt, usize_t patt_cnt
256
)
257
{
258
    usize_t patt_len = 0;
2✔
259
    mstr_result_t res = MStr_Ok;
2✔
260
    if (mstr_start_with(str, patt, patt_cnt)) {
2✔
261
        // 计算unicode长度
262
        MSTR_AND_THEN(
2✔
263
            res, unicode_length_of(&patt_len, patt, patt_cnt)
264
        );
265
        // 把数据挪到前面覆盖掉
266
        if (MSTR_SUCC(res)) {
2✔
267
            memmove(
2✔
268
                str->buff, str->buff + patt_cnt, str->count - patt_cnt
2✔
269
            );
270
            // 减去长度
271
            str->count -= patt_cnt;
2✔
272
            str->length -= patt_len;
2✔
273
        }
274
    }
275
    return res;
2✔
276
}
277

278
/**
279
 * @brief mstr_retain (end_with) 的实现
280
 *
281
 * @param[inout] str: 需要进行剔除的字符串
282
 * @param[in] patt: 模式串
283
 * @param[in] patt_cnt: 模式串的字符计数
284
 */
285
static mstr_result_t mstr_retain_end_with_impl(
2✔
286
    MString* str, const char* patt, usize_t patt_cnt
287
)
288
{
289
    usize_t patt_len = 0;
2✔
290
    mstr_result_t res = MStr_Ok;
2✔
291
    if (mstr_end_with(str, patt, patt_cnt)) {
2✔
292
        // 计算unicode长度
293
        MSTR_AND_THEN(
2✔
294
            res, unicode_length_of(&patt_len, patt, patt_cnt)
295
        );
296
        if (MSTR_SUCC(res)) {
2✔
297
            // 减去长度, 把数据截断
298
            str->count -= patt_cnt;
2✔
299
            str->length -= patt_len;
2✔
300
        }
301
    }
302
    return res;
2✔
303
}
304

305
/**
306
 * @brief mstr_retain (all) 的实现
307
 *
308
 * @param[inout] str: 需要进行剔除的字符串
309
 * @param[in] patt: 模式串
310
 * @param[in] patt_cnt: 模式串的字符计数
311
 */
312
static mstr_result_t mstr_retain_all_impl(
2✔
313
    MString* str, const char* patt, usize_t patt_cnt
314
)
315
{
316
    MString tmp_str;
317
    usize_t patt_len = 0;
2✔
318
    mstr_result_t res = MStr_Ok;
2✔
319
    mstr_init(&tmp_str);
2✔
320
    mstr_assert(patt_cnt > 0);
321
    // 计算unicode长度
322
    MSTR_AND_THEN(res, unicode_length_of(&patt_len, patt, patt_cnt));
2✔
323
    // 保留足够的长度, retain 结果一定是 小于等于 源字符串 的
324
    MSTR_AND_THEN(res, mstr_reserve(&tmp_str, str->count));
2✔
325
    // 进行处理
326
    MSTR_AND_THEN(
2✔
327
        res,
328
        mstr_retain_all_impl_helper(
329
            &tmp_str, str, patt, patt_cnt, patt_len
330
        )
331
    );
332
    // 把tmp_str移动到str
333
    if (MSTR_SUCC(res)) {
2✔
334
        mstr_move_from(str, &tmp_str);
2✔
335
    }
336
    return res;
2✔
337
}
338

339
/**
340
 * @brief mstr_retain (all) 的实现 (helper)
341
 *
342
 * @param[in] out_str: 剔除结果, 外部已经init, 并保留了足够的内存区
343
 * @param[in] src_str: 需要进行剔除的字符串
344
 * @param[in] patt: 模式串
345
 * @param[in] patt_cnt: 模式串的字符计数
346
 * @param[in] patt_len: 模式串的长度
347
 */
348
static mstr_result_t mstr_retain_all_impl_helper(
2✔
349
    MString* out_str,
350
    const MString* src_str,
351
    const char* patt,
352
    usize_t patt_cnt,
353
    usize_t patt_len
354
)
355
{
356
    uint8_t* jmp_table = NULL;
2✔
357
    usize_t match_cnt = patt_cnt - 1;
2✔
358
    usize_t result_cnt = src_str->count;
2✔
359
    usize_t result_len = src_str->length;
2✔
360
    mstr_char_t* m_r = out_str->buff;
2✔
361
    const mstr_char_t* m_p = src_str->buff;
2✔
362
    const mstr_char_t* m_end = src_str->buff + src_str->count;
2✔
363
    mstr_assert(patt_cnt > 0);
364
    mstr_assert(patt_len > 0);
365
    // 构造表
366
    if (patt_cnt >= BM_THRESHOLD_CNT) {
2✔
367
        jmp_table = (uint8_t*)mstr_heap_alloc(
×
368
            sizeof(*jmp_table) * BM_CHAR_INDEX_MAX
369
        );
370
        if (jmp_table == NULL) {
×
371
            return MStr_Err_HeapTooSmall;
×
372
        }
373
        make_jump_table(jmp_table, patt, patt_cnt);
×
374
    }
375
    // 进行剔除
376
    while (m_p < m_end - match_cnt) {
31✔
377
        isize_t i = (isize_t)match_cnt;
29✔
378
        // 倒着来匹配
379
        while (i >= 0 && patt[i] == m_p[i]) {
41✔
380
            i -= 1;
12✔
381
        }
382
        if (i < 0) {
29✔
383
            // 匹配成功, 剔除掉这个东东
384
            m_p += patt_cnt;
4✔
385
            result_cnt -= patt_cnt;
4✔
386
            result_len -= patt_len;
4✔
387
        }
388
        else {
389
            // substring的第i个字符匹配失败, m_p + i处失败
390
            // 移动next_pos到匹配失败的字符最后一次在sub
391
            // string中出现的位置处
392
            usize_t arr_idx = (usize_t)(uint8_t)m_p[i];
25✔
393
            usize_t arr_add_len = 0;
25✔
394
            mstr_assert(arr_idx >= 0 && arr_idx <= BM_CHAR_INDEX_MAX);
395
            arr_add_len = jmp_table ? (usize_t)jmp_table[arr_idx] : 1;
25✔
396
            // 添加本内容
397
            mstr_assert(
398
                (usize_t)(m_r - out_str->buff) < out_str->cap_size
399
            );
400
            memcpy(m_r, m_p, arr_add_len);
25✔
401
            m_r += arr_add_len;
25✔
402
            m_p += arr_add_len;
25✔
403
        }
404
    }
405
    // 拼接剩下的部分
406
    mstr_assert(
407
        (usize_t)((m_r - out_str->buff) + (m_end - m_p)) <
408
        out_str->cap_size
409
    );
410
    memcpy(m_r, m_p, (usize_t)(m_end - m_p));
2✔
411
    // 填充结果
412
    out_str->count = result_cnt;
2✔
413
    out_str->length = result_len;
2✔
414
    // 释放跳转表
415
    if (jmp_table != NULL) {
2✔
416
        mstr_heap_free(jmp_table);
×
417
    }
418
    return MStr_Ok;
2✔
419
}
420

421
/**
422
 * @brief 按照模式串 patt 构造跳转表
423
 *
424
 * @param[out] table: 跳转表
425
 * @param[in] patt: 模式串
426
 * @param[in] patt_cnt: 模式串的字符数
427
 */
428
static void make_jump_table(
24✔
429
    uint8_t* table, const mstr_char_t* patt, usize_t patt_cnt
430
)
431
{
432
    const usize_t BAD_CHAR_MAX_OFFSET = 255;
24✔
433
    mstr_assert(patt_cnt > 0);
434
    // 默认情况, 没有出现在sub string的字符移动substring.length长度
435
    for (usize_t i = 0; i < BM_CHAR_INDEX_MAX; i += 1) {
6,168✔
436
        usize_t offset = patt_cnt > BAD_CHAR_MAX_OFFSET ?
6,144✔
437
                             BAD_CHAR_MAX_OFFSET :
438
                             patt_cnt;
439
        table[i] = (uint8_t)offset;
6,144✔
440
    }
441
    // 不然, bad_char[ch]就是ch在sub string中最后一次出现的位置
442
    for (usize_t i = 0; i < patt_cnt - 1; i += 1) {
348✔
443
        usize_t offset = patt_cnt - i - 1;
324✔
444
        usize_t li_off =
324✔
445
            offset > BAD_CHAR_MAX_OFFSET ? BAD_CHAR_MAX_OFFSET : offset;
446
        usize_t ch_idx = (usize_t)(uint8_t)patt[i];
324✔
447
        mstr_assert(ch_idx >= 0 && ch_idx <= BM_CHAR_INDEX_MAX);
448
        table[ch_idx] = (uint8_t)li_off;
324✔
449
    }
450
}
24✔
451

452
/**
453
 * @brief 计算unicode字符串的字符长度
454
 *
455
 */
456
static mstr_result_t unicode_length_of(
6✔
457
    usize_t* len, const char* str, usize_t cnt
458
)
459
{
460
    usize_t str_len = 0;
6✔
461
    const char* str_beg = str;
6✔
462
    while (*str && (usize_t)(str - str_beg) < cnt) {
27✔
463
        usize_t cnt = mstr_char_length(*str);
21✔
464
        if (cnt == 0) {
21✔
465
            return MStr_Err_UnicodeEncodingError;
×
466
        }
467
        str += cnt;
21✔
468
        str_len += 1;
21✔
469
    }
470
    *len = str_len;
6✔
471
    return MStr_Ok;
6✔
472
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc