• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tarantool / sdvg / 16540465582

26 Jul 2025 01:49PM UTC coverage: 69.305% (+0.5%) from 68.803%
16540465582

Pull #9

github

web-flow
Merge bd2244f8d into 061a8ec2d
Pull Request #9: Improve string template

127 of 139 new or added lines in 10 files covered. (91.37%)

120 existing lines in 6 files now uncovered.

4938 of 7125 relevant lines covered (69.31%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.1
/internal/generator/usecase/general/generator/value/string.go
1
package value
2

3
import (
4
        "math"
5
        "math/big"
6
        "regexp"
7
        "slices"
8
        "strings"
9

10
        "github.com/flosch/pongo2"
11
        "github.com/pkg/errors"
12
        "github.com/tarantool/sdvg/internal/generator/common"
13
        "github.com/tarantool/sdvg/internal/generator/models"
14
        "github.com/tarantool/sdvg/internal/generator/usecase/general/locale"
15
        "github.com/tarantool/sdvg/internal/generator/usecase/general/locale/en"
16
        "github.com/tarantool/sdvg/internal/generator/usecase/general/locale/ru"
17
)
18

19
var (
20
        rePatternVal = regexp.MustCompile(`pattern\((?:'([^']*)'|"([^"]*)")\)`)
21
)
22

23
// Verify interface compliance in compile time.
24
var _ Generator = (*StringGenerator)(nil)
25

26
// StringGenerator type is used to describe generator for strings.
27
type StringGenerator struct {
28
        *models.ColumnStringParams
29
        totalValuesCount uint64
30
        localeModule     locale.LocalModule
31
        template         *pongo2.Template
32
        charset          []rune
33
        countByPrefix    []float64
34
        sumByPrefix      []float64
35
        completions      []int64 // completions[i] stores the number of ways to form a text of length i
36
}
37

38
//nolint:cyclop
39
func (g *StringGenerator) Prepare() error {
2✔
40
        if g.Template != "" {
3✔
41
                template, err := pongo2.FromString(g.Template)
1✔
42
                if err != nil {
1✔
NEW
43
                        return errors.Errorf("failed to parse template: %s", err.Error())
×
NEW
44
                }
×
45

46
                g.template = template
1✔
47
        }
48

49
        switch g.Locale {
2✔
50
        case "ru":
1✔
51
                g.localeModule = ru.NewLocaleModule(g.LogicalType, g.MinLength, g.MaxLength)
1✔
52
        case "en":
2✔
53
                g.localeModule = en.NewLocaleModule(g.LogicalType, g.MinLength, g.MaxLength)
2✔
54
        default:
×
55
                return errors.Errorf("unknown locale: %q", g.Locale)
×
56
        }
57

58
        switch g.LogicalType {
2✔
59
        case models.FirstNameType:
1✔
60
                if len(g.localeModule.GetFirstNames(locale.MaleGender)) == 0 {
1✔
UNCOV
61
                        return errors.Errorf("no male first names with length between %v and %v", g.MinLength, g.MaxLength)
×
62
                }
×
63

64
                if len(g.localeModule.GetFirstNames(locale.FemaleGender)) == 0 {
1✔
UNCOV
65
                        return errors.Errorf("no female first names with length between %v and %v", g.MinLength, g.MaxLength)
×
UNCOV
66
                }
×
67
        case models.LastNameType:
1✔
68
                if len(g.localeModule.GetLastNames(locale.MaleGender)) == 0 {
1✔
69
                        return errors.Errorf("no male last names with length between %v and %v", g.MinLength, g.MaxLength)
×
70
                }
×
71

72
                if len(g.localeModule.GetLastNames(locale.FemaleGender)) == 0 {
1✔
73
                        return errors.Errorf("no female last names with length between %v and %v", g.MinLength, g.MaxLength)
×
74
                }
×
75
        case models.PhoneType:
1✔
76
                if len(g.localeModule.GetPhonePatterns()) == 0 {
1✔
77
                        return errors.Errorf("no phone patterns with length between %v and %v", g.MinLength, g.MaxLength)
×
78
                }
×
79
        }
80

81
        g.charset = make([]rune, 0)
2✔
82

2✔
83
        if !g.WithoutLargeLetters {
4✔
84
                g.charset = append(g.charset, g.localeModule.LargeLetters()...)
2✔
85
        }
2✔
86

87
        if !g.WithoutSmallLetters {
4✔
88
                g.charset = append(g.charset, g.localeModule.SmallLetters()...)
2✔
89
        }
2✔
90

91
        if !g.WithoutNumbers {
4✔
92
                g.charset = append(g.charset, locale.Numbers...)
2✔
93
        }
2✔
94

95
        if !g.WithoutSpecialChars {
4✔
96
                g.charset = append(g.charset, locale.SpecialChars...)
2✔
97
        }
2✔
98

99
        slices.Sort(g.charset)
2✔
100

2✔
101
        if g.LogicalType == models.TextType {
3✔
102
                g.completions = g.calculateCompletions(g.MaxLength + 1)
1✔
103
        }
1✔
104

105
        return nil
2✔
106
}
107

108
func (g *StringGenerator) SetTotalCount(totalValuesCount uint64) error {
2✔
109
        g.totalValuesCount = totalValuesCount
2✔
110

2✔
111
        if g.LogicalType == "" && g.Template == "" {
4✔
112
                countByLength := make([]float64, g.MaxLength+1)
2✔
113
                avgRangeCount := math.Ceil(float64(totalValuesCount) / float64(g.MaxLength-g.MinLength+1))
2✔
114

2✔
115
                for length := g.MinLength; length <= g.MaxLength; length++ {
4✔
116
                        rangeCount := math.Pow(float64(len(g.charset)), float64(length))
2✔
117

2✔
118
                        var currentLenCount float64
2✔
119
                        if avgRangeCount > rangeCount {
2✔
UNCOV
120
                                currentLenCount = rangeCount
×
UNCOV
121
                                avgRangeCount += (avgRangeCount - rangeCount) / float64(g.MaxLength-length)
×
122
                        } else {
2✔
123
                                currentLenCount = math.Ceil(avgRangeCount)
2✔
124
                        }
2✔
125

126
                        countByLength[length] = currentLenCount
2✔
127
                }
128

129
                g.countByPrefix = make([]float64, g.MaxLength+1)
2✔
130
                g.sumByPrefix = make([]float64, g.MaxLength+1)
2✔
131

2✔
132
                for prefix := 0; prefix <= g.MaxLength; prefix++ {
4✔
133
                        prefixDivider := math.Pow(float64(len(g.charset)), float64(prefix))
2✔
134
                        g.countByPrefix[prefix] = countByLength[prefix] / prefixDivider
2✔
135

2✔
136
                        for length := 0; length <= g.MaxLength-prefix; length++ {
4✔
137
                                g.sumByPrefix[prefix] += countByLength[length+prefix] / prefixDivider
2✔
138
                        }
2✔
139
                }
140
        }
141

142
        return nil
2✔
143
}
144

145
// calculateCompletions precomputes completions.
146
func (g *StringGenerator) calculateCompletions(length int) []int64 {
1✔
147
        words := g.localeModule.GetWords()
1✔
148
        bytesPerChar := g.localeModule.GetBytesPerChar()
1✔
149
        delimiterLen := len(locale.WordsDelimiter)
1✔
150

1✔
151
        completionsBig := make([]*big.Int, length+1)
1✔
152
        for i := range completionsBig {
2✔
153
                completionsBig[i] = big.NewInt(0)
1✔
154
        }
1✔
155

156
        // Base case: one way to form a text of length 0 (the empty text).
157
        completionsBig[0].SetInt64(1)
1✔
158

1✔
159
        // Base case: all one-letter words.
1✔
160
        for _, w := range words {
2✔
161
                if len(w) == 1 {
2✔
162
                        completionsBig[1].Add(completionsBig[1], big.NewInt(1))
1✔
163
                }
1✔
164
        }
165

166
        // For every target length, add ways by choosing each word that fits.
167
        for l := 2; l <= length; l++ {
2✔
168
                for _, w := range words {
2✔
169
                        wLen := len(w)/bytesPerChar + delimiterLen
1✔
170
                        if wLen <= l {
2✔
171
                                completionsBig[l].Add(completionsBig[l], completionsBig[l-wLen])
1✔
172
                        }
1✔
173
                }
174
        }
175

176
        // convert from big.Int to int64
177
        completions := make([]int64, 0, length+1)
1✔
178

1✔
179
        for _, blockCount := range completionsBig {
2✔
180
                if !blockCount.IsInt64() {
2✔
181
                        break
1✔
182
                }
183

184
                completions = append(completions, blockCount.Int64())
1✔
185
        }
186

187
        return completions
1✔
188
}
189

190
// templateString returns n-th string by template.
191
func (g *StringGenerator) templateString(number float64, generatedValues map[string]any) (string, error) {
1✔
192
        generatedValues["pattern"] = func(pattern string) *pongo2.Value {
2✔
193
                return pongo2.AsSafeValue(g.patternString(number, pattern))
1✔
194
        }
1✔
195

196
        val, err := g.template.Execute(generatedValues)
1✔
197
        if err != nil {
1✔
NEW
UNCOV
198
                return "", errors.New(err.Error())
×
NEW
199
        }
×
200

201
        return val, nil
1✔
202
}
203

204
// patternString returns n-th string by pattern.
205
func (g *StringGenerator) patternString(number float64, pattern string) string {
1✔
206
        val := []rune(pattern)
1✔
207
        index := number / float64(g.totalValuesCount)
1✔
208

1✔
209
        for i := range val {
2✔
210
                var letters []rune
1✔
211

1✔
212
                switch val[i] {
1✔
213
                case 'A':
1✔
214
                        letters = g.localeModule.LargeLetters()
1✔
215
                case 'a':
1✔
216
                        letters = g.localeModule.SmallLetters()
1✔
217
                case '0':
1✔
218
                        letters = locale.Numbers
1✔
219
                case '#':
1✔
220
                        letters = locale.SpecialChars
1✔
221
                default:
1✔
222
                        continue
1✔
223
                }
224

225
                var pos int
1✔
226
                pos, index = orderedPos(len(letters), index)
1✔
227
                val[i] = letters[pos]
1✔
228
        }
229

230
        return string(val)
1✔
231
}
232

233
// firstName returns n-th first name from range.
234
func (g *StringGenerator) firstName(number float64) string {
1✔
235
        firstNames := g.localeModule.GetFirstNames(locale.AnyGender)
1✔
236

1✔
237
        pos := orderedInt64(0, int64(len(firstNames)-1), number, g.totalValuesCount)
1✔
238

1✔
239
        return firstNames[pos]
1✔
240
}
1✔
241

242
// lastName returns n-th last name from range.
243
func (g *StringGenerator) lastName(number float64) string {
1✔
244
        lastNames := g.localeModule.GetLastNames(locale.AnyGender)
1✔
245

1✔
246
        pos := orderedInt64(0, int64(len(lastNames)-1), number, g.totalValuesCount)
1✔
247

1✔
248
        return lastNames[pos]
1✔
249
}
1✔
250

251
// phone returns n-th phone number from range.
252
func (g *StringGenerator) phone(number float64) string {
1✔
253
        patterns := g.localeModule.GetPhonePatterns()
1✔
254

1✔
255
        pos := orderedInt64(0, int64(len(patterns)-1), number, g.totalValuesCount)
1✔
256

1✔
257
        pattern := patterns[pos]
1✔
258
        maxPhone := int64(math.Pow(10, float64(strings.Count(pattern, "#")))) - 1 //nolint:mnd
1✔
259

1✔
260
        phone := orderedInt64(0, maxPhone, number, g.totalValuesCount)
1✔
261

1✔
262
        return replaceWithNumber(pattern, '#', phone)
1✔
263
}
1✔
264

265
// text sorts texts only within their respective length groups.
266
// Texts of the same length will be ordered, but ordering
267
// between texts of different lengths is not guaranteed.
268
//
269
//nolint:cyclop
270
func (g *StringGenerator) text(num float64) (string, error) {
1✔
271
        words := g.localeModule.GetWords()
1✔
272
        oneLetterWords := g.localeModule.GetOneLetterWords()
1✔
273
        oneLetterWordsLen := int64(len(oneLetterWords))
1✔
274

1✔
275
        delimiter := locale.WordsDelimiter
1✔
276
        delimiterLen := len(delimiter)
1✔
277

1✔
278
        bytesPerChar := g.localeModule.GetBytesPerChar()
1✔
279

1✔
280
        maxPreComputedLength := len(g.completions) - 1
1✔
281

1✔
282
        wantedLen := g.MinLength + delimiterLen + int(num)%(g.MaxLength-g.MinLength+1)
1✔
283

1✔
284
        number := int64(math.Floor(float64(g.completions[maxPreComputedLength]-1) * (num / float64(g.totalValuesCount))))
1✔
285

1✔
286
        result := make([]byte, 0, wantedLen*bytesPerChar)
1✔
287

1✔
288
        var textLen int
1✔
289

1✔
290
        remaining := maxPreComputedLength
1✔
291
        // Process until we've built the full text.
1✔
292
        for remaining > 0 {
2✔
293
                found := false
1✔
294
                // Iterate over words in lexicographical order.
1✔
295
                if remaining == 1 {
2✔
296
                        if number > oneLetterWordsLen-1 {
1✔
UNCOV
297
                                return "", errors.Errorf("remaining length is 1 but k: %v overflows: %v", number, oneLetterWordsLen)
×
UNCOV
298
                        }
×
299

300
                        result = append(result, oneLetterWords[number]...)
1✔
301

1✔
302
                        textLen++
1✔
303

1✔
304
                        break
1✔
305
                }
306

307
                for _, w := range words {
2✔
308
                        wLen := len(w)/bytesPerChar + delimiterLen
1✔
309
                        if wLen > remaining {
2✔
310
                                continue
1✔
311
                        }
312
                        // count = number of completions if we choose word w at this step.
313
                        count := g.completions[remaining-wLen]
1✔
314
                        // If k is within the block for word w, choose it.
1✔
315
                        if number < count {
2✔
316
                                result = append(result, w...)
1✔
317
                                result = append(result, delimiter...)
1✔
318

1✔
319
                                textLen += wLen
1✔
320

1✔
321
                                remaining -= wLen
1✔
322
                                found = true
1✔
323

1✔
324
                                break
1✔
325
                        }
326
                        // Otherwise, skip this block.
327
                        number -= count
1✔
328
                }
329

330
                if !found {
1✔
UNCOV
331
                        return "", errors.Errorf("index %v out of range for remaining length %d, %v", number, remaining, wantedLen)
×
UNCOV
332
                }
×
333
        }
334

335
        for textLen < wantedLen {
2✔
336
                w := words[number%int64(len(words)-1)]
1✔
337

1✔
338
                result = append(result, w...)
1✔
339
                result = append(result, delimiter...)
1✔
340

1✔
341
                textLen += len(w)/bytesPerChar + delimiterLen
1✔
342
        }
1✔
343

344
        text := string(result)
1✔
345

1✔
346
        if textLen > wantedLen {
2✔
347
                if bytesPerChar == 1 {
2✔
348
                        text = text[:wantedLen]
1✔
349
                } else {
2✔
350
                        text = string([]rune(text)[:wantedLen])
1✔
351
                }
1✔
352
        }
353

354
        return text, nil
1✔
355
}
356

357
// simpleString generates a lexicographically ordered string based on the given number.
358
// The function ensures that strings of different lengths are evenly distributed.
359
//
360
// Prepared variables (from Prepare method):
361
//   - countByLength - determines how many strings of each length should be generated; aims for an even distribution
362
//     but adjusts when the number of possible strings at a given length is limited;
363
//   - countByPrefix - determines how many times a given prefix should be repeated across generated strings;
364
//   - sumByPrefix - keeps total number of strings that should be generated with a specific prefix of a certain length.
365
//
366
// Each iteration of loop follows these steps:
367
//   - Subtracting the Current Prefix Group.
368
//     countByPrefix[prefixLen] represents how many times the current prefix is repeated.
369
//     We subtract this value from remain to determine if the target string falls within this group.
370
//     If remain is negative, it means the desired index falls within the current prefix group, so we stop.
371
//     If sumByPrefix[prefixLen+1] == 0, it means no further characters can be added, so we also stop.
372
//   - Determining the Next Character.
373
//     sumByPrefix[prefixLen+1] tells us how many strings exist for the next character choices.
374
//     remain / sumByPrefix[prefixLen+1] determines how many prefixes we need to skip before choosing next character.
375
//     We update remain according to reflect the choice. The selected character charset[i] is added to prefix.
376
//
377
// This approach ensures precision up to 217 characters in prefix length due to float64 limitations.
378
// Any additional characters required beyond the ordered prefix are filled in using a pattern based on `number`.
379
//
380
// Let's assume that:
381
//   - charset = ['a', 'b']
382
//   - min length = 2, max length = 3
383
//   - total strings = 10
384
//
385
// Generated strings and counts:
386
//   - a   → 0 times
387
//   - aa  → 1 time
388
//   - aaa → 0.75 times
389
//   - aab → 0.75 times
390
//   - ab  → 1 time
391
//   - ...
392
//
393
// Precomputed values:
394
//   - countByLength = [0, 4, 6]
395
//   - countByPrefix = [0, 0, 1, 0.75]
396
//   - sumByPrefix   = [10, 5, 2.5, 0.75]
397
//
398
// Suppose we want to generate simpleString(7), let's trace the loop:
399
//   - remain -= countByPrefix[0] = 7 - 0 = 7
400
//     i = remain / sumByPrefix[1] = 7 / 5 = 1 (selects 'b')
401
//     remain -= sumByPrefix[1] * i = 7 - (5 * 1) = 2
402
//     prefix = ['b']
403
//   - remain -= countByPrefix[1] = 2 - 0 = 2
404
//     i = remain / sumByPrefix[2] = 2 / 2.5 = 0 (selects 'a')
405
//     remain -= sumByPrefix[2] * i = 2 - (2.5 * 0) = 2
406
//     prefix = ['b', 'a']
407
//   - remain -= countByPrefix[2] = 2 - 1 = 1
408
//     i = remain / sumByPrefix[3] = 1 / 0.75 = 1 (selects 'b')
409
//     remain -= sumByPrefix[3] * i = 1 - (0.75 * 1) = 0.25
410
//     prefix = ['b', 'a', 'b']
411
//   - remain -= countByPrefix[3] = 0.25 - 0.75 = -0.5
412
//     remain < 0 → break with result "bab"
413
func (g *StringGenerator) simpleString(number float64) string {
2✔
414
        prefix := make([]rune, 0, g.MaxLength)
2✔
415

2✔
416
        var prefixLen int
2✔
417

2✔
418
        for remain := number; ; {
4✔
419
                prefixLen = len(prefix)
2✔
420

2✔
421
                remain -= g.countByPrefix[prefixLen]
2✔
422
                if remain < 0 || g.sumByPrefix[prefixLen+1] == 0 {
4✔
423
                        break
2✔
424
                }
425

426
                i := int(remain / g.sumByPrefix[prefixLen+1])
2✔
427
                remain -= g.sumByPrefix[prefixLen+1] * float64(i)
2✔
428
                prefix = append(prefix, g.charset[i])
2✔
429
        }
430

431
        // The precision of float64 allows us to generate only 217 prefix characters (which is enough for us).
432
        // Within the ordered prefix, we can supplement with random characters.
433
        if prefixLen < g.MinLength {
2✔
UNCOV
434
                destLen := g.MinLength + int(number)%(g.MaxLength-g.MinLength+1)
×
UNCOV
435
                for i := range destLen - prefixLen {
×
UNCOV
436
                        prefix = append(prefix, g.charset[(int(number)+i*i)%len(g.charset)])
×
UNCOV
437
                }
×
438
        }
439

440
        return string(prefix)
2✔
441
}
442

443
// Value returns n-th string from range.
444
func (g *StringGenerator) Value(number float64, row map[string]any) (any, error) {
2✔
445
        if g.Template != "" {
3✔
446
                val, err := g.templateString(number, row)
1✔
447
                if err != nil {
1✔
NEW
448
                        return nil, errors.WithMessage(err, "failed to template string")
×
NEW
449
                }
×
450

451
                return val, nil
1✔
452
        }
453

454
        switch g.LogicalType {
2✔
455
        case models.FirstNameType:
1✔
456
                return g.firstName(number), nil
1✔
457
        case models.LastNameType:
1✔
458
                return g.lastName(number), nil
1✔
459
        case models.PhoneType:
1✔
460
                return g.phone(number), nil
1✔
461
        case models.TextType:
1✔
462
                return g.text(number)
1✔
463
        }
464

465
        return g.simpleString(number), nil
2✔
466
}
467

468
//nolint:cyclop
469
func (g *StringGenerator) ValuesCount(distinctValuesCountByColumn map[string]uint64) float64 {
2✔
470
        if g.Template != "" {
3✔
471
                return g.templateCardinality(distinctValuesCountByColumn)
1✔
472
        }
1✔
473

474
        switch g.LogicalType {
2✔
475
        case models.FirstNameType:
1✔
476
                return float64(len(g.localeModule.GetFirstNames(locale.AnyGender)))
1✔
477

478
        case models.LastNameType:
1✔
479
                return float64(len(g.localeModule.GetLastNames(locale.AnyGender)))
1✔
480

481
        case models.PhoneType:
1✔
482
                totalCount := float64(0)
1✔
483
                for _, pattern := range g.localeModule.GetPhonePatterns() {
2✔
484
                        totalCount += math.Pow(float64(10), float64(strings.Count(pattern, "#"))) //nolint:mnd
1✔
485
                }
1✔
486

487
                return totalCount
1✔
488

489
        case models.TextType:
1✔
490
                if g.MinLength > len(g.completions) {
2✔
491
                        return math.Inf(1)
1✔
492
                }
1✔
493

494
                totalCount := float64(0)
1✔
495
                for length := g.MinLength; length <= g.MaxLength && length+1 < len(g.completions); length++ {
2✔
496
                        totalCount += float64(g.completions[length+1])
1✔
497
                }
1✔
498

499
                return totalCount
1✔
500
        }
501

502
        totalCount := float64(0)
2✔
503
        for length := g.MinLength; length <= g.MaxLength; length++ {
4✔
504
                totalCount += math.Pow(float64(len(g.charset)), float64(length))
2✔
505
        }
2✔
506

507
        return totalCount
2✔
508
}
509

510
func (g *StringGenerator) templateCardinality(distinctValuesCountByColumn map[string]uint64) float64 {
1✔
511
        total := 1.0
1✔
512

1✔
513
        patternValMatches := rePatternVal.FindAllStringSubmatch(g.Template, -1)
1✔
514
        for _, match := range patternValMatches {
2✔
515
                pattern := match[1]
1✔
516
                if pattern == "" {
1✔
NEW
UNCOV
517
                        pattern = match[2]
×
NEW
UNCOV
518
                }
×
519

520
                total *= g.patternCardinality(pattern)
1✔
521
        }
522

523
        columns := common.ExtractValuesFromTemplate(g.Template)
1✔
524
        for _, column := range columns {
2✔
525
                if count, ok := distinctValuesCountByColumn[column]; ok && count > 0 {
2✔
526
                        total *= float64(count)
1✔
527
                }
1✔
528
        }
529

530
        return total
1✔
531
}
532

533
func (g *StringGenerator) patternCardinality(pattern string) float64 {
1✔
534
        total := 1.0
1✔
535

1✔
536
        if count := strings.Count(pattern, "A"); count > 0 {
2✔
537
                total *= math.Pow(float64(len(g.localeModule.LargeLetters())), float64(count))
1✔
538
        }
1✔
539

540
        if count := strings.Count(pattern, "a"); count > 0 {
2✔
541
                total *= math.Pow(float64(len(g.localeModule.SmallLetters())), float64(count))
1✔
542
        }
1✔
543

544
        if count := strings.Count(pattern, "0"); count > 0 {
2✔
545
                total *= math.Pow(float64(len(locale.Numbers)), float64(count))
1✔
546
        }
1✔
547

548
        if count := strings.Count(pattern, "#"); count > 0 {
2✔
549
                total *= math.Pow(float64(len(locale.SpecialChars)), float64(count))
1✔
550
        }
1✔
551

552
        return total
1✔
553
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc