• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / sdvg / 16647465507

31 Jul 2025 11:15AM UTC coverage: 68.944% (+0.1%) from 68.803%
16647465507

Pull #10

github

web-flow
Merge 1e60b6354 into 30e6d4242
Pull Request #10: Optimize http writer

179 of 222 new or added lines in 12 files covered. (80.63%)

34 existing lines in 1 file now uncovered.

4955 of 7187 relevant lines covered (68.94%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.73
/internal/generator/usecase/general/generator/value/string.go
1
package value
2

3
import (
4
        "bytes"
5
        "math"
6
        "math/big"
7
        "slices"
8
        "strings"
9
        "sync"
10
        "text/template"
11

12
        "github.com/pkg/errors"
13
        "github.com/tarantool/sdvg/internal/generator/models"
14
        "github.com/tarantool/sdvg/internal/generator/usecase/general/locale"
15
        "github.com/tarantool/sdvg/internal/generator/usecase/general/locale/en"
16
        "github.com/tarantool/sdvg/internal/generator/usecase/general/locale/ru"
17
)
18

19
// Verify interface compliance in compile time.
20
var _ Generator = (*StringGenerator)(nil)
21

22
// StringGenerator type is used to describe generator for strings.
23
type StringGenerator struct {
24
        *models.ColumnStringParams
25
        totalValuesCount uint64
26
        template         *template.Template
27
        bufPool          *sync.Pool
28
        localeModule     locale.LocalModule
29
        charset          []rune
30
        countByPrefix    []float64
31
        sumByPrefix      []float64
32
        completions      []int64 // completions[i] stores the number of ways to form a text of length i
33
}
34

35
//nolint:cyclop
36
func (g *StringGenerator) Prepare() error {
2✔
37
        if g.Template != "" {
3✔
38
                tmpl, err := template.New("template").
1✔
39
                        Option("missingkey=error").
1✔
40
                        Funcs(template.FuncMap{
1✔
41
                                "upper": strings.ToUpper,
1✔
42
                                "lower": strings.ToLower,
1✔
43
                        }).
1✔
44
                        Parse(g.Template)
1✔
45
                if err != nil {
1✔
NEW
46
                        return errors.Errorf("failed to parse template: %s", err.Error())
×
NEW
47
                }
×
48

49
                g.template = tmpl
1✔
50
                g.bufPool = &sync.Pool{
1✔
51
                        New: func() any {
1✔
NEW
52
                                return new(bytes.Buffer)
×
NEW
53
                        },
×
54
                }
55
        }
56

57
        switch g.Locale {
2✔
58
        case "ru":
1✔
59
                g.localeModule = ru.NewLocaleModule(g.LogicalType, g.MinLength, g.MaxLength)
1✔
60
        case "en":
2✔
61
                g.localeModule = en.NewLocaleModule(g.LogicalType, g.MinLength, g.MaxLength)
2✔
62
        default:
×
63
                return errors.Errorf("unknown locale: %q", g.Locale)
×
64
        }
65

66
        switch g.LogicalType {
2✔
67
        case models.FirstNameType:
1✔
68
                if len(g.localeModule.GetFirstNames(locale.MaleGender)) == 0 {
1✔
69
                        return errors.Errorf("no male first names with length between %v and %v", g.MinLength, g.MaxLength)
×
70
                }
×
71

72
                if len(g.localeModule.GetFirstNames(locale.FemaleGender)) == 0 {
1✔
73
                        return errors.Errorf("no female first names with length between %v and %v", g.MinLength, g.MaxLength)
×
74
                }
×
75
        case models.LastNameType:
1✔
76
                if len(g.localeModule.GetLastNames(locale.MaleGender)) == 0 {
1✔
77
                        return errors.Errorf("no male last names with length between %v and %v", g.MinLength, g.MaxLength)
×
78
                }
×
79

80
                if len(g.localeModule.GetLastNames(locale.FemaleGender)) == 0 {
1✔
81
                        return errors.Errorf("no female last names with length between %v and %v", g.MinLength, g.MaxLength)
×
82
                }
×
83
        case models.PhoneType:
1✔
84
                if len(g.localeModule.GetPhonePatterns()) == 0 {
1✔
85
                        return errors.Errorf("no phone patterns with length between %v and %v", g.MinLength, g.MaxLength)
×
86
                }
×
87
        }
88

89
        g.charset = make([]rune, 0)
2✔
90

2✔
91
        if !g.WithoutLargeLetters {
4✔
92
                g.charset = append(g.charset, g.localeModule.LargeLetters()...)
2✔
93
        }
2✔
94

95
        if !g.WithoutSmallLetters {
4✔
96
                g.charset = append(g.charset, g.localeModule.SmallLetters()...)
2✔
97
        }
2✔
98

99
        if !g.WithoutNumbers {
4✔
100
                g.charset = append(g.charset, locale.Numbers...)
2✔
101
        }
2✔
102

103
        if !g.WithoutSpecialChars {
4✔
104
                g.charset = append(g.charset, locale.SpecialChars...)
2✔
105
        }
2✔
106

107
        slices.Sort(g.charset)
2✔
108

2✔
109
        if g.LogicalType == models.TextType {
3✔
110
                g.completions = g.calculateCompletions(g.MaxLength + 1)
1✔
111
        }
1✔
112

113
        return nil
2✔
114
}
115

116
func (g *StringGenerator) SetTotalCount(totalValuesCount uint64) error {
2✔
117
        g.totalValuesCount = totalValuesCount
2✔
118

2✔
119
        if g.LogicalType == "" && g.Template == "" {
4✔
120
                countByLength := make([]float64, g.MaxLength+1)
2✔
121
                avgRangeCount := math.Ceil(float64(totalValuesCount) / float64(g.MaxLength-g.MinLength+1))
2✔
122

2✔
123
                for length := g.MinLength; length <= g.MaxLength; length++ {
4✔
124
                        rangeCount := math.Pow(float64(len(g.charset)), float64(length))
2✔
125

2✔
126
                        var currentLenCount float64
2✔
127
                        if avgRangeCount > rangeCount {
2✔
128
                                currentLenCount = rangeCount
×
129
                                avgRangeCount += (avgRangeCount - rangeCount) / float64(g.MaxLength-length)
×
130
                        } else {
2✔
131
                                currentLenCount = math.Ceil(avgRangeCount)
2✔
132
                        }
2✔
133

134
                        countByLength[length] = currentLenCount
2✔
135
                }
136

137
                g.countByPrefix = make([]float64, g.MaxLength+1)
2✔
138
                g.sumByPrefix = make([]float64, g.MaxLength+1)
2✔
139

2✔
140
                for prefix := 0; prefix <= g.MaxLength; prefix++ {
4✔
141
                        prefixDivider := math.Pow(float64(len(g.charset)), float64(prefix))
2✔
142
                        g.countByPrefix[prefix] = countByLength[prefix] / prefixDivider
2✔
143

2✔
144
                        for length := 0; length <= g.MaxLength-prefix; length++ {
4✔
145
                                g.sumByPrefix[prefix] += countByLength[length+prefix] / prefixDivider
2✔
146
                        }
2✔
147
                }
148
        }
149

150
        return nil
2✔
151
}
152

153
// calculateCompletions precomputes completions.
154
func (g *StringGenerator) calculateCompletions(length int) []int64 {
1✔
155
        words := g.localeModule.GetWords()
1✔
156
        bytesPerChar := g.localeModule.GetBytesPerChar()
1✔
157
        delimiterLen := len(locale.WordsDelimiter)
1✔
158

1✔
159
        completionsBig := make([]*big.Int, length+1)
1✔
160
        for i := range completionsBig {
2✔
161
                completionsBig[i] = big.NewInt(0)
1✔
162
        }
1✔
163

164
        // Base case: one way to form a text of length 0 (the empty text).
165
        completionsBig[0].SetInt64(1)
1✔
166

1✔
167
        // Base case: all one-letter words.
1✔
168
        for _, w := range words {
2✔
169
                if len(w) == 1 {
2✔
170
                        completionsBig[1].Add(completionsBig[1], big.NewInt(1))
1✔
171
                }
1✔
172
        }
173

174
        // For every target length, add ways by choosing each word that fits.
175
        for l := 2; l <= length; l++ {
2✔
176
                for _, w := range words {
2✔
177
                        wLen := len(w)/bytesPerChar + delimiterLen
1✔
178
                        if wLen <= l {
2✔
179
                                completionsBig[l].Add(completionsBig[l], completionsBig[l-wLen])
1✔
180
                        }
1✔
181
                }
182
        }
183

184
        // convert from big.Int to int64
185
        completions := make([]int64, 0, length+1)
1✔
186

1✔
187
        for _, blockCount := range completionsBig {
2✔
188
                if !blockCount.IsInt64() {
2✔
189
                        break
1✔
190
                }
191

192
                completions = append(completions, blockCount.Int64())
1✔
193
        }
194

195
        return completions
1✔
196
}
197

198
// templateString returns n-th string by template.
199
//
200
//nolint:forcetypeassert
NEW
201
func (g *StringGenerator) templateString(rowValues map[string]any) (string, error) {
×
NEW
202
        buf := g.bufPool.Get().(*bytes.Buffer)
×
NEW
203
        buf.Reset()
×
NEW
204

×
NEW
205
        err := g.template.Execute(buf, rowValues)
×
NEW
206
        if err != nil {
×
NEW
207
                g.bufPool.Put(buf)
×
NEW
208

×
NEW
209
                return "", errors.New(err.Error())
×
NEW
210
        }
×
211

NEW
212
        val := buf.String()
×
NEW
213
        g.bufPool.Put(buf)
×
NEW
214

×
NEW
215
        return val, nil
×
216
}
217

218
// patternString returns n-th string by pattern.
219
func (g *StringGenerator) patternString(number float64) string {
1✔
220
        val := []rune(g.Pattern)
1✔
221
        index := number / float64(g.totalValuesCount)
1✔
222

1✔
223
        for i := range val {
2✔
224
                var letters []rune
1✔
225

1✔
226
                switch val[i] {
1✔
227
                case 'A':
1✔
228
                        letters = g.localeModule.LargeLetters()
1✔
229
                case 'a':
1✔
230
                        letters = g.localeModule.SmallLetters()
1✔
231
                case '0':
1✔
232
                        letters = locale.Numbers
1✔
233
                case '#':
1✔
234
                        letters = locale.SpecialChars
1✔
235
                default:
1✔
236
                        continue
1✔
237
                }
238

239
                var pos int
1✔
240
                pos, index = orderedPos(len(letters), index)
1✔
241
                val[i] = letters[pos]
1✔
242
        }
243

244
        return string(val)
1✔
245
}
246

247
// firstName returns n-th first name from range.
248
func (g *StringGenerator) firstName(number float64) string {
1✔
249
        firstNames := g.localeModule.GetFirstNames(locale.AnyGender)
1✔
250

1✔
251
        pos := orderedInt64(0, int64(len(firstNames)-1), number, g.totalValuesCount)
1✔
252

1✔
253
        return firstNames[pos]
1✔
254
}
1✔
255

256
// lastName returns n-th last name from range.
257
func (g *StringGenerator) lastName(number float64) string {
1✔
258
        lastNames := g.localeModule.GetLastNames(locale.AnyGender)
1✔
259

1✔
260
        pos := orderedInt64(0, int64(len(lastNames)-1), number, g.totalValuesCount)
1✔
261

1✔
262
        return lastNames[pos]
1✔
263
}
1✔
264

265
// phone returns n-th phone number from range.
266
func (g *StringGenerator) phone(number float64) string {
1✔
267
        patterns := g.localeModule.GetPhonePatterns()
1✔
268

1✔
269
        pos := orderedInt64(0, int64(len(patterns)-1), number, g.totalValuesCount)
1✔
270

1✔
271
        pattern := patterns[pos]
1✔
272
        maxPhone := int64(math.Pow(10, float64(strings.Count(pattern, "#")))) - 1 //nolint:mnd
1✔
273

1✔
274
        phone := orderedInt64(0, maxPhone, number, g.totalValuesCount)
1✔
275

1✔
276
        return replaceWithNumber(pattern, '#', phone)
1✔
277
}
1✔
278

279
// text sorts texts only within their respective length groups.
280
// Texts of the same length will be ordered, but ordering
281
// between texts of different lengths is not guaranteed.
282
//
283
//nolint:cyclop
284
func (g *StringGenerator) text(num float64) (string, error) {
1✔
285
        words := g.localeModule.GetWords()
1✔
286
        oneLetterWords := g.localeModule.GetOneLetterWords()
1✔
287
        oneLetterWordsLen := int64(len(oneLetterWords))
1✔
288

1✔
289
        delimiter := locale.WordsDelimiter
1✔
290
        delimiterLen := len(delimiter)
1✔
291

1✔
292
        bytesPerChar := g.localeModule.GetBytesPerChar()
1✔
293

1✔
294
        maxPreComputedLength := len(g.completions) - 1
1✔
295

1✔
296
        wantedLen := g.MinLength + delimiterLen + int(num)%(g.MaxLength-g.MinLength+1)
1✔
297

1✔
298
        number := int64(math.Floor(float64(g.completions[maxPreComputedLength]-1) * (num / float64(g.totalValuesCount))))
1✔
299

1✔
300
        result := make([]byte, 0, wantedLen*bytesPerChar)
1✔
301

1✔
302
        var textLen int
1✔
303

1✔
304
        remaining := maxPreComputedLength
1✔
305
        // Process until we've built the full text.
1✔
306
        for remaining > 0 {
2✔
307
                found := false
1✔
308
                // Iterate over words in lexicographical order.
1✔
309
                if remaining == 1 {
2✔
310
                        if number > oneLetterWordsLen-1 {
1✔
311
                                return "", errors.Errorf("remaining length is 1 but k: %v overflows: %v", number, oneLetterWordsLen)
×
312
                        }
×
313

314
                        result = append(result, oneLetterWords[number]...)
1✔
315

1✔
316
                        textLen++
1✔
317

1✔
318
                        break
1✔
319
                }
320

321
                for _, w := range words {
2✔
322
                        wLen := len(w)/bytesPerChar + delimiterLen
1✔
323
                        if wLen > remaining {
2✔
324
                                continue
1✔
325
                        }
326
                        // count = number of completions if we choose word w at this step.
327
                        count := g.completions[remaining-wLen]
1✔
328
                        // If k is within the block for word w, choose it.
1✔
329
                        if number < count {
2✔
330
                                result = append(result, w...)
1✔
331
                                result = append(result, delimiter...)
1✔
332

1✔
333
                                textLen += wLen
1✔
334

1✔
335
                                remaining -= wLen
1✔
336
                                found = true
1✔
337

1✔
338
                                break
1✔
339
                        }
340
                        // Otherwise, skip this block.
341
                        number -= count
1✔
342
                }
343

344
                if !found {
1✔
345
                        return "", errors.Errorf("index %v out of range for remaining length %d, %v", number, remaining, wantedLen)
×
346
                }
×
347
        }
348

349
        for textLen < wantedLen {
2✔
350
                w := words[number%int64(len(words)-1)]
1✔
351

1✔
352
                result = append(result, w...)
1✔
353
                result = append(result, delimiter...)
1✔
354

1✔
355
                textLen += len(w)/bytesPerChar + delimiterLen
1✔
356
        }
1✔
357

358
        text := string(result)
1✔
359

1✔
360
        if textLen > wantedLen {
2✔
361
                if bytesPerChar == 1 {
2✔
362
                        text = text[:wantedLen]
1✔
363
                } else {
2✔
364
                        text = string([]rune(text)[:wantedLen])
1✔
365
                }
1✔
366
        }
367

368
        return text, nil
1✔
369
}
370

371
// simpleString generates a lexicographically ordered string based on the given number.
372
// The function ensures that strings of different lengths are evenly distributed.
373
//
374
// Prepared variables (from Prepare method):
375
//   - countByLength - determines how many strings of each length should be generated; aims for an even distribution
376
//     but adjusts when the number of possible strings at a given length is limited;
377
//   - countByPrefix - determines how many times a given prefix should be repeated across generated strings;
378
//   - sumByPrefix - keeps total number of strings that should be generated with a specific prefix of a certain length.
379
//
380
// Each iteration of loop follows these steps:
381
//   - Subtracting the Current Prefix Group.
382
//     countByPrefix[prefixLen] represents how many times the current prefix is repeated.
383
//     We subtract this value from remain to determine if the target string falls within this group.
384
//     If remain is negative, it means the desired index falls within the current prefix group, so we stop.
385
//     If sumByPrefix[prefixLen+1] == 0, it means no further characters can be added, so we also stop.
386
//   - Determining the Next Character.
387
//     sumByPrefix[prefixLen+1] tells us how many strings exist for the next character choices.
388
//     remain / sumByPrefix[prefixLen+1] determines how many prefixes we need to skip before choosing next character.
389
//     We update remain according to reflect the choice. The selected character charset[i] is added to prefix.
390
//
391
// This approach ensures precision up to 217 characters in prefix length due to float64 limitations.
392
// Any additional characters required beyond the ordered prefix are filled in using a pattern based on `number`.
393
//
394
// Let's assume that:
395
//   - charset = ['a', 'b']
396
//   - min length = 2, max length = 3
397
//   - total strings = 10
398
//
399
// Generated strings and counts:
400
//   - a   → 0 times
401
//   - aa  → 1 time
402
//   - aaa → 0.75 times
403
//   - aab → 0.75 times
404
//   - ab  → 1 time
405
//   - ...
406
//
407
// Precomputed values:
408
//   - countByLength = [0, 4, 6]
409
//   - countByPrefix = [0, 0, 1, 0.75]
410
//   - sumByPrefix   = [10, 5, 2.5, 0.75]
411
//
412
// Suppose we want to generate simpleString(7), let's trace the loop:
413
//   - remain -= countByPrefix[0] = 7 - 0 = 7
414
//     i = remain / sumByPrefix[1] = 7 / 5 = 1 (selects 'b')
415
//     remain -= sumByPrefix[1] * i = 7 - (5 * 1) = 2
416
//     prefix = ['b']
417
//   - remain -= countByPrefix[1] = 2 - 0 = 2
418
//     i = remain / sumByPrefix[2] = 2 / 2.5 = 0 (selects 'a')
419
//     remain -= sumByPrefix[2] * i = 2 - (2.5 * 0) = 2
420
//     prefix = ['b', 'a']
421
//   - remain -= countByPrefix[2] = 2 - 1 = 1
422
//     i = remain / sumByPrefix[3] = 1 / 0.75 = 1 (selects 'b')
423
//     remain -= sumByPrefix[3] * i = 1 - (0.75 * 1) = 0.25
424
//     prefix = ['b', 'a', 'b']
425
//   - remain -= countByPrefix[3] = 0.25 - 0.75 = -0.5
426
//     remain < 0 → break with result "bab"
427
func (g *StringGenerator) simpleString(number float64) string {
2✔
428
        prefix := make([]rune, 0, g.MaxLength)
2✔
429

2✔
430
        var prefixLen int
2✔
431

2✔
432
        for remain := number; ; {
4✔
433
                prefixLen = len(prefix)
2✔
434

2✔
435
                remain -= g.countByPrefix[prefixLen]
2✔
436
                if remain < 0 || g.sumByPrefix[prefixLen+1] == 0 {
4✔
437
                        break
2✔
438
                }
439

440
                i := int(remain / g.sumByPrefix[prefixLen+1])
2✔
441
                remain -= g.sumByPrefix[prefixLen+1] * float64(i)
2✔
442
                prefix = append(prefix, g.charset[i])
2✔
443
        }
444

445
        // The precision of float64 allows us to generate only 217 prefix characters (which is enough for us).
446
        // Within the ordered prefix, we can supplement with random characters.
447
        if prefixLen < g.MinLength {
2✔
448
                destLen := g.MinLength + int(number)%(g.MaxLength-g.MinLength+1)
×
449
                for i := range destLen - prefixLen {
×
450
                        prefix = append(prefix, g.charset[(int(number)+i*i)%len(g.charset)])
×
451
                }
×
452
        }
453

454
        return string(prefix)
2✔
455
}
456

457
// Value returns n-th string from range.
458
func (g *StringGenerator) Value(number float64, rowValues map[string]any) (any, error) {
2✔
459
        if g.Template != "" {
2✔
NEW
460
                val, err := g.templateString(rowValues)
×
NEW
461
                if err != nil {
×
NEW
462
                        return nil, errors.WithMessage(err, "failed to render template string")
×
NEW
463
                }
×
464

NEW
465
                return val, nil
×
466
        }
467

468
        if g.Pattern != "" {
3✔
469
                return g.patternString(number), nil
1✔
470
        }
1✔
471

472
        switch g.LogicalType {
2✔
473
        case models.FirstNameType:
1✔
474
                return g.firstName(number), nil
1✔
475
        case models.LastNameType:
1✔
476
                return g.lastName(number), nil
1✔
477
        case models.PhoneType:
1✔
478
                return g.phone(number), nil
1✔
479
        case models.TextType:
1✔
480
                return g.text(number)
1✔
481
        }
482

483
        return g.simpleString(number), nil
2✔
484
}
485

486
//nolint:cyclop
487
func (g *StringGenerator) ValuesCount() float64 {
2✔
488
        if g.Template != "" {
3✔
489
                return 1.0
1✔
490
        }
1✔
491

492
        if g.Pattern != "" {
3✔
493
                total := 1.0
1✔
494

1✔
495
                if count := strings.Count(g.Pattern, "A"); count > 0 {
2✔
496
                        total *= math.Pow(float64(len(g.localeModule.LargeLetters())), float64(count))
1✔
497
                }
1✔
498

499
                if count := strings.Count(g.Pattern, "a"); count > 0 {
2✔
500
                        total *= math.Pow(float64(len(g.localeModule.SmallLetters())), float64(count))
1✔
501
                }
1✔
502

503
                if count := strings.Count(g.Pattern, "0"); count > 0 {
2✔
504
                        total *= math.Pow(float64(len(locale.Numbers)), float64(count))
1✔
505
                }
1✔
506

507
                if count := strings.Count(g.Pattern, "#"); count > 0 {
2✔
508
                        total *= math.Pow(float64(len(locale.SpecialChars)), float64(count))
1✔
509
                }
1✔
510

511
                return total
1✔
512
        }
513

514
        switch g.LogicalType {
2✔
515
        case models.FirstNameType:
1✔
516
                return float64(len(g.localeModule.GetFirstNames(locale.AnyGender)))
1✔
517

518
        case models.LastNameType:
1✔
519
                return float64(len(g.localeModule.GetLastNames(locale.AnyGender)))
1✔
520

521
        case models.PhoneType:
1✔
522
                totalCount := float64(0)
1✔
523
                for _, pattern := range g.localeModule.GetPhonePatterns() {
2✔
524
                        totalCount += math.Pow(float64(10), float64(strings.Count(pattern, "#"))) //nolint:mnd
1✔
525
                }
1✔
526

527
                return totalCount
1✔
528

529
        case models.TextType:
1✔
530
                if g.MinLength > len(g.completions) {
2✔
531
                        return math.Inf(1)
1✔
532
                }
1✔
533

534
                totalCount := float64(0)
1✔
535
                for length := g.MinLength; length <= g.MaxLength && length+1 < len(g.completions); length++ {
2✔
536
                        totalCount += float64(g.completions[length+1])
1✔
537
                }
1✔
538

539
                return totalCount
1✔
540
        }
541

542
        totalCount := float64(0)
2✔
543
        for length := g.MinLength; length <= g.MaxLength; length++ {
4✔
544
                totalCount += math.Pow(float64(len(g.charset)), float64(length))
2✔
545
        }
2✔
546

547
        return totalCount
2✔
548
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc